bindec.sa revision 1.2 1 * MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
2 * M68000 Hi-Performance Microprocessor Division
3 * M68040 Software Package
4 *
5 * M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
6 * All rights reserved.
7 *
8 * THE SOFTWARE is provided on an "AS IS" basis and without warranty.
9 * To the maximum extent permitted by applicable law,
10 * MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
11 * INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
12 * PARTICULAR PURPOSE and any warranty against infringement with
13 * regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
14 * and any accompanying written materials.
15 *
16 * To the maximum extent permitted by applicable law,
17 * IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
18 * (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
19 * PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
20 * OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
21 * SOFTWARE. Motorola assumes no responsibility for the maintenance
22 * and support of the SOFTWARE.
23 *
24 * You are hereby granted a copyright license to use, modify, and
25 * distribute the SOFTWARE so long as this entire notice is retained
26 * without alteration in any modified and/or redistributed versions,
27 * and that such modified versions are clearly identified as such.
28 * No licenses are granted by implication, estoppel or otherwise
29 * under any patents or trademarks of Motorola, Inc.
30
31 *
32 * bindec.sa 3.4 1/3/91
33 *
34 * bindec
35 *
36 * Description:
37 * Converts an input in extended precision format
38 * to bcd format.
39 *
40 * Input:
41 * a0 points to the input extended precision value
42 * value in memory; d0 contains the k-factor sign-extended
43 * to 32-bits. The input may be either normalized,
44 * unnormalized, or denormalized.
45 *
46 * Output: result in the FP_SCR1 space on the stack.
47 *
48 * Saves and Modifies: D2-D7,A2,FP2
49 *
50 * Algorithm:
51 *
52 * A1. Set RM and size ext; Set SIGMA = sign of input.
53 * The k-factor is saved for use in d7. Clear the
54 * BINDEC_FLG for separating normalized/denormalized
55 * input. If input is unnormalized or denormalized,
56 * normalize it.
57 *
58 * A2. Set X = abs(input).
59 *
60 * A3. Compute ILOG.
61 * ILOG is the log base 10 of the input value. It is
62 * approximated by adding e + 0.f when the original
63 * value is viewed as 2^^e * 1.f in extended precision.
64 * This value is stored in d6.
65 *
66 * A4. Clr INEX bit.
67 * The operation in A3 above may have set INEX2.
68 *
69 * A5. Set ICTR = 0;
70 * ICTR is a flag used in A13. It must be set before the
71 * loop entry A6.
72 *
73 * A6. Calculate LEN.
74 * LEN is the number of digits to be displayed. The
75 * k-factor can dictate either the total number of digits,
76 * if it is a positive number, or the number of digits
77 * after the decimal point which are to be included as
78 * significant. See the 68882 manual for examples.
79 * If LEN is computed to be greater than 17, set OPERR in
80 * USER_FPSR. LEN is stored in d4.
81 *
82 * A7. Calculate SCALE.
83 * SCALE is equal to 10^ISCALE, where ISCALE is the number
84 * of decimal places needed to insure LEN integer digits
85 * in the output before conversion to bcd. LAMBDA is the
86 * sign of ISCALE, used in A9. Fp1 contains
87 * 10^^(abs(ISCALE)) using a rounding mode which is a
88 * function of the original rounding mode and the signs
89 * of ISCALE and X. A table is given in the code.
90 *
91 * A8. Clr INEX; Force RZ.
92 * The operation in A3 above may have set INEX2.
93 * RZ mode is forced for the scaling operation to insure
94 * only one rounding error. The grs bits are collected in
95 * the INEX flag for use in A10.
96 *
97 * A9. Scale X -> Y.
98 * The mantissa is scaled to the desired number of
99 * significant digits. The excess digits are collected
100 * in INEX2.
101 *
102 * A10. Or in INEX.
103 * If INEX is set, round error occured. This is
104 * compensated for by 'or-ing' in the INEX2 flag to
105 * the lsb of Y.
106 *
107 * A11. Restore original FPCR; set size ext.
108 * Perform FINT operation in the user's rounding mode.
109 * Keep the size to extended.
110 *
111 * A12. Calculate YINT = FINT(Y) according to user's rounding
112 * mode. The FPSP routine sintd0 is used. The output
113 * is in fp0.
114 *
115 * A13. Check for LEN digits.
116 * If the int operation results in more than LEN digits,
117 * or less than LEN -1 digits, adjust ILOG and repeat from
118 * A6. This test occurs only on the first pass. If the
119 * result is exactly 10^LEN, decrement ILOG and divide
120 * the mantissa by 10.
121 *
122 * A14. Convert the mantissa to bcd.
123 * The binstr routine is used to convert the LEN digit
124 * mantissa to bcd in memory. The input to binstr is
125 * to be a fraction; i.e. (mantissa)/10^LEN and adjusted
126 * such that the decimal point is to the left of bit 63.
127 * The bcd digits are stored in the correct position in
128 * the final string area in memory.
129 *
130 * A15. Convert the exponent to bcd.
131 * As in A14 above, the exp is converted to bcd and the
132 * digits are stored in the final string.
133 * Test the length of the final exponent string. If the
134 * length is 4, set operr.
135 *
136 * A16. Write sign bits to final string.
137 *
138 * Implementation Notes:
139 *
140 * The registers are used as follows:
141 *
142 * d0: scratch; LEN input to binstr
143 * d1: scratch
144 * d2: upper 32-bits of mantissa for binstr
145 * d3: scratch;lower 32-bits of mantissa for binstr
146 * d4: LEN
147 * d5: LAMBDA/ICTR
148 * d6: ILOG
149 * d7: k-factor
150 * a0: ptr for original operand/final result
151 * a1: scratch pointer
152 * a2: pointer to FP_X; abs(original value) in ext
153 * fp0: scratch
154 * fp1: scratch
155 * fp2: scratch
156 * F_SCR1:
157 * F_SCR2:
158 * L_SCR1:
159 * L_SCR2:
160 *
161
162 BINDEC IDNT 2,1 Motorola 040 Floating Point Software Package
163
164 include fpsp.h
165
166 section 8
167
168 * Constants in extended precision
169 LOG2 dc.l $3FFD0000,$9A209A84,$FBCFF798,$00000000
170 LOG2UP1 dc.l $3FFD0000,$9A209A84,$FBCFF799,$00000000
171
172 * Constants in single precision
173 FONE dc.l $3F800000,$00000000,$00000000,$00000000
174 FTWO dc.l $40000000,$00000000,$00000000,$00000000
175 FTEN dc.l $41200000,$00000000,$00000000,$00000000
176 F4933 dc.l $459A2800,$00000000,$00000000,$00000000
177
178 RBDTBL dc.b 0,0,0,0
179 dc.b 3,3,2,2
180 dc.b 3,2,2,3
181 dc.b 2,3,3,2
182
183 xref binstr
184 xref sintdo
185 xref ptenrn,ptenrm,ptenrp
186
187 xdef bindec
188 xdef sc_mul
189 bindec:
190 movem.l d2-d7/a2,-(a7)
191 fmovem.x fp0-fp2,-(a7)
192
193 * A1. Set RM and size ext. Set SIGMA = sign input;
194 * The k-factor is saved for use in d7. Clear BINDEC_FLG for
195 * separating normalized/denormalized input. If the input
196 * is a denormalized number, set the BINDEC_FLG memory word
197 * to signal denorm. If the input is unnormalized, normalize
198 * the input and test for denormalized result.
199 *
200 fmove.l #rm_mode,FPCR ;set RM and ext
201 move.l (a0),L_SCR2(a6) ;save exponent for sign check
202 move.l d0,d7 ;move k-factor to d7
203 clr.b BINDEC_FLG(a6) ;clr norm/denorm flag
204 move.w STAG(a6),d0 ;get stag
205 andi.w #$e000,d0 ;isolate stag bits
206 beq A2_str ;if zero, input is norm
207 *
208 * Normalize the denorm
209 *
210 un_de_norm:
211 move.w (a0),d0
212 andi.w #$7fff,d0 ;strip sign of normalized exp
213 move.l 4(a0),d1
214 move.l 8(a0),d2
215 norm_loop:
216 sub.w #1,d0
217 add.l d2,d2
218 addx.l d1,d1
219 tst.l d1
220 bge.b norm_loop
221 *
222 * Test if the normalized input is denormalized
223 *
224 tst.w d0
225 bgt.b pos_exp ;if greater than zero, it is a norm
226 st BINDEC_FLG(a6) ;set flag for denorm
227 pos_exp:
228 andi.w #$7fff,d0 ;strip sign of normalized exp
229 move.w d0,(a0)
230 move.l d1,4(a0)
231 move.l d2,8(a0)
232
233 * A2. Set X = abs(input).
234 *
235 A2_str:
236 move.l (a0),FP_SCR2(a6) ; move input to work space
237 move.l 4(a0),FP_SCR2+4(a6) ; move input to work space
238 move.l 8(a0),FP_SCR2+8(a6) ; move input to work space
239 andi.l #$7fffffff,FP_SCR2(a6) ;create abs(X)
240
241 * A3. Compute ILOG.
242 * ILOG is the log base 10 of the input value. It is approx-
243 * imated by adding e + 0.f when the original value is viewed
244 * as 2^^e * 1.f in extended precision. This value is stored
245 * in d6.
246 *
247 * Register usage:
248 * Input/Output
249 * d0: k-factor/exponent
250 * d2: x/x
251 * d3: x/x
252 * d4: x/x
253 * d5: x/x
254 * d6: x/ILOG
255 * d7: k-factor/Unchanged
256 * a0: ptr for original operand/final result
257 * a1: x/x
258 * a2: x/x
259 * fp0: x/float(ILOG)
260 * fp1: x/x
261 * fp2: x/x
262 * F_SCR1:x/x
263 * F_SCR2:Abs(X)/Abs(X) with $3fff exponent
264 * L_SCR1:x/x
265 * L_SCR2:first word of X packed/Unchanged
266
267 tst.b BINDEC_FLG(a6) ;check for denorm
268 beq.b A3_cont ;if clr, continue with norm
269 move.l #-4933,d6 ;force ILOG = -4933
270 bra.b A4_str
271 A3_cont:
272 move.w FP_SCR2(a6),d0 ;move exp to d0
273 move.w #$3fff,FP_SCR2(a6) ;replace exponent with 0x3fff
274 fmove.x FP_SCR2(a6),fp0 ;now fp0 has 1.f
275 sub.w #$3fff,d0 ;strip off bias
276 fadd.w d0,fp0 ;add in exp
277 fsub.s FONE,fp0 ;subtract off 1.0
278 fbge.w pos_res ;if pos, branch
279 fmul.x LOG2UP1,fp0 ;if neg, mul by LOG2UP1
280 fmove.l fp0,d6 ;put ILOG in d6 as a lword
281 bra.b A4_str ;go move out ILOG
282 pos_res:
283 fmul.x LOG2,fp0 ;if pos, mul by LOG2
284 fmove.l fp0,d6 ;put ILOG in d6 as a lword
285
286
287 * A4. Clr INEX bit.
288 * The operation in A3 above may have set INEX2.
289
290 A4_str:
291 fmove.l #0,FPSR ;zero all of fpsr - nothing needed
292
293
294 * A5. Set ICTR = 0;
295 * ICTR is a flag used in A13. It must be set before the
296 * loop entry A6. The lower word of d5 is used for ICTR.
297
298 clr.w d5 ;clear ICTR
299
300
301 * A6. Calculate LEN.
302 * LEN is the number of digits to be displayed. The k-factor
303 * can dictate either the total number of digits, if it is
304 * a positive number, or the number of digits after the
305 * original decimal point which are to be included as
306 * significant. See the 68882 manual for examples.
307 * If LEN is computed to be greater than 17, set OPERR in
308 * USER_FPSR. LEN is stored in d4.
309 *
310 * Register usage:
311 * Input/Output
312 * d0: exponent/Unchanged
313 * d2: x/x/scratch
314 * d3: x/x
315 * d4: exc picture/LEN
316 * d5: ICTR/Unchanged
317 * d6: ILOG/Unchanged
318 * d7: k-factor/Unchanged
319 * a0: ptr for original operand/final result
320 * a1: x/x
321 * a2: x/x
322 * fp0: float(ILOG)/Unchanged
323 * fp1: x/x
324 * fp2: x/x
325 * F_SCR1:x/x
326 * F_SCR2:Abs(X) with $3fff exponent/Unchanged
327 * L_SCR1:x/x
328 * L_SCR2:first word of X packed/Unchanged
329
330 A6_str:
331 tst.l d7 ;branch on sign of k
332 ble.b k_neg ;if k <= 0, LEN = ILOG + 1 - k
333 move.l d7,d4 ;if k > 0, LEN = k
334 bra.b len_ck ;skip to LEN check
335 k_neg:
336 move.l d6,d4 ;first load ILOG to d4
337 sub.l d7,d4 ;subtract off k
338 addq.l #1,d4 ;add in the 1
339 len_ck:
340 tst.l d4 ;LEN check: branch on sign of LEN
341 ble.b LEN_ng ;if neg, set LEN = 1
342 cmp.l #17,d4 ;test if LEN > 17
343 ble.b A7_str ;if not, forget it
344 move.l #17,d4 ;set max LEN = 17
345 tst.l d7 ;if negative, never set OPERR
346 ble.b A7_str ;if positive, continue
347 or.l #opaop_mask,USER_FPSR(a6) ;set OPERR & AIOP in USER_FPSR
348 bra.b A7_str ;finished here
349 LEN_ng:
350 moveq.l #1,d4 ;min LEN is 1
351
352
353 * A7. Calculate SCALE.
354 * SCALE is equal to 10^ISCALE, where ISCALE is the number
355 * of decimal places needed to insure LEN integer digits
356 * in the output before conversion to bcd. LAMBDA is the sign
357 * of ISCALE, used in A9. Fp1 contains 10^^(abs(ISCALE)) using
358 * the rounding mode as given in the following table (see
359 * Coonen, p. 7.23 as ref.; however, the SCALE variable is
360 * of opposite sign in bindec.sa from Coonen).
361 *
362 * Initial USE
363 * FPCR[6:5] LAMBDA SIGN(X) FPCR[6:5]
364 * ----------------------------------------------
365 * RN 00 0 0 00/0 RN
366 * RN 00 0 1 00/0 RN
367 * RN 00 1 0 00/0 RN
368 * RN 00 1 1 00/0 RN
369 * RZ 01 0 0 11/3 RP
370 * RZ 01 0 1 11/3 RP
371 * RZ 01 1 0 10/2 RM
372 * RZ 01 1 1 10/2 RM
373 * RM 10 0 0 11/3 RP
374 * RM 10 0 1 10/2 RM
375 * RM 10 1 0 10/2 RM
376 * RM 10 1 1 11/3 RP
377 * RP 11 0 0 10/2 RM
378 * RP 11 0 1 11/3 RP
379 * RP 11 1 0 11/3 RP
380 * RP 11 1 1 10/2 RM
381 *
382 * Register usage:
383 * Input/Output
384 * d0: exponent/scratch - final is 0
385 * d2: x/0 or 24 for A9
386 * d3: x/scratch - offset ptr into PTENRM array
387 * d4: LEN/Unchanged
388 * d5: 0/ICTR:LAMBDA
389 * d6: ILOG/ILOG or k if ((k<=0)&(ILOG<k))
390 * d7: k-factor/Unchanged
391 * a0: ptr for original operand/final result
392 * a1: x/ptr to PTENRM array
393 * a2: x/x
394 * fp0: float(ILOG)/Unchanged
395 * fp1: x/10^ISCALE
396 * fp2: x/x
397 * F_SCR1:x/x
398 * F_SCR2:Abs(X) with $3fff exponent/Unchanged
399 * L_SCR1:x/x
400 * L_SCR2:first word of X packed/Unchanged
401
402 A7_str:
403 tst.l d7 ;test sign of k
404 bgt.b k_pos ;if pos and > 0, skip this
405 cmp.l d6,d7 ;test k - ILOG
406 blt.b k_pos ;if ILOG >= k, skip this
407 move.l d7,d6 ;if ((k<0) & (ILOG < k)) ILOG = k
408 k_pos:
409 move.l d6,d0 ;calc ILOG + 1 - LEN in d0
410 addq.l #1,d0 ;add the 1
411 sub.l d4,d0 ;sub off LEN
412 swap d5 ;use upper word of d5 for LAMBDA
413 clr.w d5 ;set it zero initially
414 clr.w d2 ;set up d2 for very small case
415 tst.l d0 ;test sign of ISCALE
416 bge.b iscale ;if pos, skip next inst
417 addq.w #1,d5 ;if neg, set LAMBDA true
418 cmp.l #$ffffecd4,d0 ;test iscale <= -4908
419 bgt.b no_inf ;if false, skip rest
420 addi.l #24,d0 ;add in 24 to iscale
421 move.l #24,d2 ;put 24 in d2 for A9
422 no_inf:
423 neg.l d0 ;and take abs of ISCALE
424 iscale:
425 fmove.s FONE,fp1 ;init fp1 to 1
426 bfextu USER_FPCR(a6){26:2},d1 ;get initial rmode bits
427 add.w d1,d1 ;put them in bits 2:1
428 add.w d5,d1 ;add in LAMBDA
429 add.w d1,d1 ;put them in bits 3:1
430 tst.l L_SCR2(a6) ;test sign of original x
431 bge.b x_pos ;if pos, don't set bit 0
432 addq.l #1,d1 ;if neg, set bit 0
433 x_pos:
434 lea.l RBDTBL,a2 ;load rbdtbl base
435 move.b (a2,d1),d3 ;load d3 with new rmode
436 lsl.l #4,d3 ;put bits in proper position
437 fmove.l d3,fpcr ;load bits into fpu
438 lsr.l #4,d3 ;put bits in proper position
439 tst.b d3 ;decode new rmode for pten table
440 bne.b not_rn ;if zero, it is RN
441 lea.l PTENRN,a1 ;load a1 with RN table base
442 bra.b rmode ;exit decode
443 not_rn:
444 lsr.b #1,d3 ;get lsb in carry
445 bcc.b not_rp ;if carry clear, it is RM
446 lea.l PTENRP,a1 ;load a1 with RP table base
447 bra.b rmode ;exit decode
448 not_rp:
449 lea.l PTENRM,a1 ;load a1 with RM table base
450 rmode:
451 clr.l d3 ;clr table index
452 e_loop:
453 lsr.l #1,d0 ;shift next bit into carry
454 bcc.b e_next ;if zero, skip the mul
455 fmul.x (a1,d3),fp1 ;mul by 10**(d3_bit_no)
456 e_next:
457 add.l #12,d3 ;inc d3 to next pwrten table entry
458 tst.l d0 ;test if ISCALE is zero
459 bne.b e_loop ;if not, loop
460
461
462 * A8. Clr INEX; Force RZ.
463 * The operation in A3 above may have set INEX2.
464 * RZ mode is forced for the scaling operation to insure
465 * only one rounding error. The grs bits are collected in
466 * the INEX flag for use in A10.
467 *
468 * Register usage:
469 * Input/Output
470
471 fmove.l #0,FPSR ;clr INEX
472 fmove.l #rz_mode,FPCR ;set RZ rounding mode
473
474
475 * A9. Scale X -> Y.
476 * The mantissa is scaled to the desired number of significant
477 * digits. The excess digits are collected in INEX2. If mul,
478 * Check d2 for excess 10 exponential value. If not zero,
479 * the iscale value would have caused the pwrten calculation
480 * to overflow. Only a negative iscale can cause this, so
481 * multiply by 10^(d2), which is now only allowed to be 24,
482 * with a multiply by 10^8 and 10^16, which is exact since
483 * 10^24 is exact. If the input was denormalized, we must
484 * create a busy stack frame with the mul command and the
485 * two operands, and allow the fpu to complete the multiply.
486 *
487 * Register usage:
488 * Input/Output
489 * d0: FPCR with RZ mode/Unchanged
490 * d2: 0 or 24/unchanged
491 * d3: x/x
492 * d4: LEN/Unchanged
493 * d5: ICTR:LAMBDA
494 * d6: ILOG/Unchanged
495 * d7: k-factor/Unchanged
496 * a0: ptr for original operand/final result
497 * a1: ptr to PTENRM array/Unchanged
498 * a2: x/x
499 * fp0: float(ILOG)/X adjusted for SCALE (Y)
500 * fp1: 10^ISCALE/Unchanged
501 * fp2: x/x
502 * F_SCR1:x/x
503 * F_SCR2:Abs(X) with $3fff exponent/Unchanged
504 * L_SCR1:x/x
505 * L_SCR2:first word of X packed/Unchanged
506
507 A9_str:
508 fmove.x (a0),fp0 ;load X from memory
509 fabs.x fp0 ;use abs(X)
510 tst.w d5 ;LAMBDA is in lower word of d5
511 bne.b sc_mul ;if neg (LAMBDA = 1), scale by mul
512 fdiv.x fp1,fp0 ;calculate X / SCALE -> Y to fp0
513 bra.b A10_st ;branch to A10
514
515 sc_mul:
516 tst.b BINDEC_FLG(a6) ;check for denorm
517 beq.b A9_norm ;if norm, continue with mul
518 fmovem.x fp1,-(a7) ;load ETEMP with 10^ISCALE
519 move.l 8(a0),-(a7) ;load FPTEMP with input arg
520 move.l 4(a0),-(a7)
521 move.l (a0),-(a7)
522 move.l #18,d3 ;load count for busy stack
523 A9_loop:
524 clr.l -(a7) ;clear lword on stack
525 dbf.w d3,A9_loop
526 move.b VER_TMP(a6),(a7) ;write current version number
527 move.b #BUSY_SIZE-4,1(a7) ;write current busy size
528 move.b #$10,$44(a7) ;set fcefpte[15] bit
529 move.w #$0023,$40(a7) ;load cmdreg1b with mul command
530 move.b #$fe,$8(a7) ;load all 1s to cu savepc
531 frestore (a7)+ ;restore frame to fpu for completion
532 fmul.x 36(a1),fp0 ;multiply fp0 by 10^8
533 fmul.x 48(a1),fp0 ;multiply fp0 by 10^16
534 bra.b A10_st
535 A9_norm:
536 tst.w d2 ;test for small exp case
537 beq.b A9_con ;if zero, continue as normal
538 fmul.x 36(a1),fp0 ;multiply fp0 by 10^8
539 fmul.x 48(a1),fp0 ;multiply fp0 by 10^16
540 A9_con:
541 fmul.x fp1,fp0 ;calculate X * SCALE -> Y to fp0
542
543
544 * A10. Or in INEX.
545 * If INEX is set, round error occured. This is compensated
546 * for by 'or-ing' in the INEX2 flag to the lsb of Y.
547 *
548 * Register usage:
549 * Input/Output
550 * d0: FPCR with RZ mode/FPSR with INEX2 isolated
551 * d2: x/x
552 * d3: x/x
553 * d4: LEN/Unchanged
554 * d5: ICTR:LAMBDA
555 * d6: ILOG/Unchanged
556 * d7: k-factor/Unchanged
557 * a0: ptr for original operand/final result
558 * a1: ptr to PTENxx array/Unchanged
559 * a2: x/ptr to FP_SCR2(a6)
560 * fp0: Y/Y with lsb adjusted
561 * fp1: 10^ISCALE/Unchanged
562 * fp2: x/x
563
564 A10_st:
565 fmove.l FPSR,d0 ;get FPSR
566 fmove.x fp0,FP_SCR2(a6) ;move Y to memory
567 lea.l FP_SCR2(a6),a2 ;load a2 with ptr to FP_SCR2
568 btst.l #9,d0 ;check if INEX2 set
569 beq.b A11_st ;if clear, skip rest
570 ori.l #1,8(a2) ;or in 1 to lsb of mantissa
571 fmove.x FP_SCR2(a6),fp0 ;write adjusted Y back to fpu
572
573
574 * A11. Restore original FPCR; set size ext.
575 * Perform FINT operation in the user's rounding mode. Keep
576 * the size to extended. The sintdo entry point in the sint
577 * routine expects the FPCR value to be in USER_FPCR for
578 * mode and precision. The original FPCR is saved in L_SCR1.
579
580 A11_st:
581 move.l USER_FPCR(a6),L_SCR1(a6) ;save it for later
582 andi.l #$00000030,USER_FPCR(a6) ;set size to ext,
583 * ;block exceptions
584
585
586 * A12. Calculate YINT = FINT(Y) according to user's rounding mode.
587 * The FPSP routine sintd0 is used. The output is in fp0.
588 *
589 * Register usage:
590 * Input/Output
591 * d0: FPSR with AINEX cleared/FPCR with size set to ext
592 * d2: x/x/scratch
593 * d3: x/x
594 * d4: LEN/Unchanged
595 * d5: ICTR:LAMBDA/Unchanged
596 * d6: ILOG/Unchanged
597 * d7: k-factor/Unchanged
598 * a0: ptr for original operand/src ptr for sintdo
599 * a1: ptr to PTENxx array/Unchanged
600 * a2: ptr to FP_SCR2(a6)/Unchanged
601 * a6: temp pointer to FP_SCR2(a6) - orig value saved and restored
602 * fp0: Y/YINT
603 * fp1: 10^ISCALE/Unchanged
604 * fp2: x/x
605 * F_SCR1:x/x
606 * F_SCR2:Y adjusted for inex/Y with original exponent
607 * L_SCR1:x/original USER_FPCR
608 * L_SCR2:first word of X packed/Unchanged
609
610 A12_st:
611 movem.l d0-d1/a0-a1,-(a7) ;save regs used by sintd0
612 move.l L_SCR1(a6),-(a7)
613 move.l L_SCR2(a6),-(a7)
614 lea.l FP_SCR2(a6),a0 ;a0 is ptr to F_SCR2(a6)
615 fmove.x fp0,(a0) ;move Y to memory at FP_SCR2(a6)
616 tst.l L_SCR2(a6) ;test sign of original operand
617 bge.b do_fint ;if pos, use Y
618 or.l #$80000000,(a0) ;if neg, use -Y
619 do_fint:
620 move.l USER_FPSR(a6),-(a7)
621 bsr sintdo ;sint routine returns int in fp0
622 move.b (a7),USER_FPSR(a6)
623 add.l #4,a7
624 move.l (a7)+,L_SCR2(a6)
625 move.l (a7)+,L_SCR1(a6)
626 movem.l (a7)+,d0-d1/a0-a1 ;restore regs used by sint
627 move.l L_SCR2(a6),FP_SCR2(a6) ;restore original exponent
628 move.l L_SCR1(a6),USER_FPCR(a6) ;restore user's FPCR
629
630
631 * A13. Check for LEN digits.
632 * If the int operation results in more than LEN digits,
633 * or less than LEN -1 digits, adjust ILOG and repeat from
634 * A6. This test occurs only on the first pass. If the
635 * result is exactly 10^LEN, decrement ILOG and divide
636 * the mantissa by 10. The calculation of 10^LEN cannot
637 * be inexact, since all powers of ten upto 10^27 are exact
638 * in extended precision, so the use of a previous power-of-ten
639 * table will introduce no error.
640 *
641 *
642 * Register usage:
643 * Input/Output
644 * d0: FPCR with size set to ext/scratch final = 0
645 * d2: x/x
646 * d3: x/scratch final = x
647 * d4: LEN/LEN adjusted
648 * d5: ICTR:LAMBDA/LAMBDA:ICTR
649 * d6: ILOG/ILOG adjusted
650 * d7: k-factor/Unchanged
651 * a0: pointer into memory for packed bcd string formation
652 * a1: ptr to PTENxx array/Unchanged
653 * a2: ptr to FP_SCR2(a6)/Unchanged
654 * fp0: int portion of Y/abs(YINT) adjusted
655 * fp1: 10^ISCALE/Unchanged
656 * fp2: x/10^LEN
657 * F_SCR1:x/x
658 * F_SCR2:Y with original exponent/Unchanged
659 * L_SCR1:original USER_FPCR/Unchanged
660 * L_SCR2:first word of X packed/Unchanged
661
662 A13_st:
663 swap d5 ;put ICTR in lower word of d5
664 tst.w d5 ;check if ICTR = 0
665 bne not_zr ;if non-zero, go to second test
666 *
667 * Compute 10^(LEN-1)
668 *
669 fmove.s FONE,fp2 ;init fp2 to 1.0
670 move.l d4,d0 ;put LEN in d0
671 subq.l #1,d0 ;d0 = LEN -1
672 clr.l d3 ;clr table index
673 l_loop:
674 lsr.l #1,d0 ;shift next bit into carry
675 bcc.b l_next ;if zero, skip the mul
676 fmul.x (a1,d3),fp2 ;mul by 10**(d3_bit_no)
677 l_next:
678 add.l #12,d3 ;inc d3 to next pwrten table entry
679 tst.l d0 ;test if LEN is zero
680 bne.b l_loop ;if not, loop
681 *
682 * 10^LEN-1 is computed for this test and A14. If the input was
683 * denormalized, check only the case in which YINT > 10^LEN.
684 *
685 tst.b BINDEC_FLG(a6) ;check if input was norm
686 beq.b A13_con ;if norm, continue with checking
687 fabs.x fp0 ;take abs of YINT
688 bra test_2
689 *
690 * Compare abs(YINT) to 10^(LEN-1) and 10^LEN
691 *
692 A13_con:
693 fabs.x fp0 ;take abs of YINT
694 fcmp.x fp2,fp0 ;compare abs(YINT) with 10^(LEN-1)
695 fbge.w test_2 ;if greater, do next test
696 subq.l #1,d6 ;subtract 1 from ILOG
697 move.w #1,d5 ;set ICTR
698 fmove.l #rm_mode,FPCR ;set rmode to RM
699 fmul.s FTEN,fp2 ;compute 10^LEN
700 bra.w A6_str ;return to A6 and recompute YINT
701 test_2:
702 fmul.s FTEN,fp2 ;compute 10^LEN
703 fcmp.x fp2,fp0 ;compare abs(YINT) with 10^LEN
704 fblt.w A14_st ;if less, all is ok, go to A14
705 fbgt.w fix_ex ;if greater, fix and redo
706 fdiv.s FTEN,fp0 ;if equal, divide by 10
707 addq.l #1,d6 ; and inc ILOG
708 bra.b A14_st ; and continue elsewhere
709 fix_ex:
710 addq.l #1,d6 ;increment ILOG by 1
711 move.w #1,d5 ;set ICTR
712 fmove.l #rm_mode,FPCR ;set rmode to RM
713 bra.w A6_str ;return to A6 and recompute YINT
714 *
715 * Since ICTR <> 0, we have already been through one adjustment,
716 * and shouldn't have another; this is to check if abs(YINT) = 10^LEN
717 * 10^LEN is again computed using whatever table is in a1 since the
718 * value calculated cannot be inexact.
719 *
720 not_zr:
721 fmove.s FONE,fp2 ;init fp2 to 1.0
722 move.l d4,d0 ;put LEN in d0
723 clr.l d3 ;clr table index
724 z_loop:
725 lsr.l #1,d0 ;shift next bit into carry
726 bcc.b z_next ;if zero, skip the mul
727 fmul.x (a1,d3),fp2 ;mul by 10**(d3_bit_no)
728 z_next:
729 add.l #12,d3 ;inc d3 to next pwrten table entry
730 tst.l d0 ;test if LEN is zero
731 bne.b z_loop ;if not, loop
732 fabs.x fp0 ;get abs(YINT)
733 fcmp.x fp2,fp0 ;check if abs(YINT) = 10^LEN
734 fbne.w A14_st ;if not, skip this
735 fdiv.s FTEN,fp0 ;divide abs(YINT) by 10
736 addq.l #1,d6 ;and inc ILOG by 1
737 addq.l #1,d4 ; and inc LEN
738 fmul.s FTEN,fp2 ; if LEN++, the get 10^^LEN
739
740
741 * A14. Convert the mantissa to bcd.
742 * The binstr routine is used to convert the LEN digit
743 * mantissa to bcd in memory. The input to binstr is
744 * to be a fraction; i.e. (mantissa)/10^LEN and adjusted
745 * such that the decimal point is to the left of bit 63.
746 * The bcd digits are stored in the correct position in
747 * the final string area in memory.
748 *
749 *
750 * Register usage:
751 * Input/Output
752 * d0: x/LEN call to binstr - final is 0
753 * d1: x/0
754 * d2: x/ms 32-bits of mant of abs(YINT)
755 * d3: x/ls 32-bits of mant of abs(YINT)
756 * d4: LEN/Unchanged
757 * d5: ICTR:LAMBDA/LAMBDA:ICTR
758 * d6: ILOG
759 * d7: k-factor/Unchanged
760 * a0: pointer into memory for packed bcd string formation
761 * /ptr to first mantissa byte in result string
762 * a1: ptr to PTENxx array/Unchanged
763 * a2: ptr to FP_SCR2(a6)/Unchanged
764 * fp0: int portion of Y/abs(YINT) adjusted
765 * fp1: 10^ISCALE/Unchanged
766 * fp2: 10^LEN/Unchanged
767 * F_SCR1:x/Work area for final result
768 * F_SCR2:Y with original exponent/Unchanged
769 * L_SCR1:original USER_FPCR/Unchanged
770 * L_SCR2:first word of X packed/Unchanged
771
772 A14_st:
773 fmove.l #rz_mode,FPCR ;force rz for conversion
774 fdiv.x fp2,fp0 ;divide abs(YINT) by 10^LEN
775 lea.l FP_SCR1(a6),a0
776 fmove.x fp0,(a0) ;move abs(YINT)/10^LEN to memory
777 move.l 4(a0),d2 ;move 2nd word of FP_RES to d2
778 move.l 8(a0),d3 ;move 3rd word of FP_RES to d3
779 clr.l 4(a0) ;zero word 2 of FP_RES
780 clr.l 8(a0) ;zero word 3 of FP_RES
781 move.l (a0),d0 ;move exponent to d0
782 swap d0 ;put exponent in lower word
783 beq.b no_sft ;if zero, don't shift
784 subi.l #$3ffd,d0 ;sub bias less 2 to make fract
785 tst.l d0 ;check if > 1
786 bgt.b no_sft ;if so, don't shift
787 neg.l d0 ;make exp positive
788 m_loop:
789 lsr.l #1,d2 ;shift d2:d3 right, add 0s
790 roxr.l #1,d3 ;the number of places
791 dbf.w d0,m_loop ;given in d0
792 no_sft:
793 tst.l d2 ;check for mantissa of zero
794 bne.b no_zr ;if not, go on
795 tst.l d3 ;continue zero check
796 beq.b zer_m ;if zero, go directly to binstr
797 no_zr:
798 clr.l d1 ;put zero in d1 for addx
799 addi.l #$00000080,d3 ;inc at bit 7
800 addx.l d1,d2 ;continue inc
801 andi.l #$ffffff80,d3 ;strip off lsb not used by 882
802 zer_m:
803 move.l d4,d0 ;put LEN in d0 for binstr call
804 addq.l #3,a0 ;a0 points to M16 byte in result
805 bsr binstr ;call binstr to convert mant
806
807
808 * A15. Convert the exponent to bcd.
809 * As in A14 above, the exp is converted to bcd and the
810 * digits are stored in the final string.
811 *
812 * Digits are stored in L_SCR1(a6) on return from BINDEC as:
813 *
814 * 32 16 15 0
815 * -----------------------------------------
816 * | 0 | e3 | e2 | e1 | e4 | X | X | X |
817 * -----------------------------------------
818 *
819 * And are moved into their proper places in FP_SCR1. If digit e4
820 * is non-zero, OPERR is signaled. In all cases, all 4 digits are
821 * written as specified in the 881/882 manual for packed decimal.
822 *
823 * Register usage:
824 * Input/Output
825 * d0: x/LEN call to binstr - final is 0
826 * d1: x/scratch (0);shift count for final exponent packing
827 * d2: x/ms 32-bits of exp fraction/scratch
828 * d3: x/ls 32-bits of exp fraction
829 * d4: LEN/Unchanged
830 * d5: ICTR:LAMBDA/LAMBDA:ICTR
831 * d6: ILOG
832 * d7: k-factor/Unchanged
833 * a0: ptr to result string/ptr to L_SCR1(a6)
834 * a1: ptr to PTENxx array/Unchanged
835 * a2: ptr to FP_SCR2(a6)/Unchanged
836 * fp0: abs(YINT) adjusted/float(ILOG)
837 * fp1: 10^ISCALE/Unchanged
838 * fp2: 10^LEN/Unchanged
839 * F_SCR1:Work area for final result/BCD result
840 * F_SCR2:Y with original exponent/ILOG/10^4
841 * L_SCR1:original USER_FPCR/Exponent digits on return from binstr
842 * L_SCR2:first word of X packed/Unchanged
843
844 A15_st:
845 tst.b BINDEC_FLG(a6) ;check for denorm
846 beq.b not_denorm
847 ftst.x fp0 ;test for zero
848 fbeq.w den_zero ;if zero, use k-factor or 4933
849 fmove.l d6,fp0 ;float ILOG
850 fabs.x fp0 ;get abs of ILOG
851 bra.b convrt
852 den_zero:
853 tst.l d7 ;check sign of the k-factor
854 blt.b use_ilog ;if negative, use ILOG
855 fmove.s F4933,fp0 ;force exponent to 4933
856 bra.b convrt ;do it
857 use_ilog:
858 fmove.l d6,fp0 ;float ILOG
859 fabs.x fp0 ;get abs of ILOG
860 bra.b convrt
861 not_denorm:
862 ftst.x fp0 ;test for zero
863 fbne.w not_zero ;if zero, force exponent
864 fmove.s FONE,fp0 ;force exponent to 1
865 bra.b convrt ;do it
866 not_zero:
867 fmove.l d6,fp0 ;float ILOG
868 fabs.x fp0 ;get abs of ILOG
869 convrt:
870 fdiv.x 24(a1),fp0 ;compute ILOG/10^4
871 fmove.x fp0,FP_SCR2(a6) ;store fp0 in memory
872 move.l 4(a2),d2 ;move word 2 to d2
873 move.l 8(a2),d3 ;move word 3 to d3
874 move.w (a2),d0 ;move exp to d0
875 beq.b x_loop_fin ;if zero, skip the shift
876 subi.w #$3ffd,d0 ;subtract off bias
877 neg.w d0 ;make exp positive
878 x_loop:
879 lsr.l #1,d2 ;shift d2:d3 right
880 roxr.l #1,d3 ;the number of places
881 dbf.w d0,x_loop ;given in d0
882 x_loop_fin:
883 clr.l d1 ;put zero in d1 for addx
884 addi.l #$00000080,d3 ;inc at bit 6
885 addx.l d1,d2 ;continue inc
886 andi.l #$ffffff80,d3 ;strip off lsb not used by 882
887 move.l #4,d0 ;put 4 in d0 for binstr call
888 lea.l L_SCR1(a6),a0 ;a0 is ptr to L_SCR1 for exp digits
889 bsr binstr ;call binstr to convert exp
890 move.l L_SCR1(a6),d0 ;load L_SCR1 lword to d0
891 move.l #12,d1 ;use d1 for shift count
892 lsr.l d1,d0 ;shift d0 right by 12
893 bfins d0,FP_SCR1(a6){4:12} ;put e3:e2:e1 in FP_SCR1
894 lsr.l d1,d0 ;shift d0 right by 12
895 bfins d0,FP_SCR1(a6){16:4} ;put e4 in FP_SCR1
896 tst.b d0 ;check if e4 is zero
897 beq.b A16_st ;if zero, skip rest
898 or.l #opaop_mask,USER_FPSR(a6) ;set OPERR & AIOP in USER_FPSR
899
900
901 * A16. Write sign bits to final string.
902 * Sigma is bit 31 of initial value; RHO is bit 31 of d6 (ILOG).
903 *
904 * Register usage:
905 * Input/Output
906 * d0: x/scratch - final is x
907 * d2: x/x
908 * d3: x/x
909 * d4: LEN/Unchanged
910 * d5: ICTR:LAMBDA/LAMBDA:ICTR
911 * d6: ILOG/ILOG adjusted
912 * d7: k-factor/Unchanged
913 * a0: ptr to L_SCR1(a6)/Unchanged
914 * a1: ptr to PTENxx array/Unchanged
915 * a2: ptr to FP_SCR2(a6)/Unchanged
916 * fp0: float(ILOG)/Unchanged
917 * fp1: 10^ISCALE/Unchanged
918 * fp2: 10^LEN/Unchanged
919 * F_SCR1:BCD result with correct signs
920 * F_SCR2:ILOG/10^4
921 * L_SCR1:Exponent digits on return from binstr
922 * L_SCR2:first word of X packed/Unchanged
923
924 A16_st:
925 clr.l d0 ;clr d0 for collection of signs
926 andi.b #$0f,FP_SCR1(a6) ;clear first nibble of FP_SCR1
927 tst.l L_SCR2(a6) ;check sign of original mantissa
928 bge.b mant_p ;if pos, don't set SM
929 moveq.l #2,d0 ;move 2 in to d0 for SM
930 mant_p:
931 tst.l d6 ;check sign of ILOG
932 bge.b wr_sgn ;if pos, don't set SE
933 addq.l #1,d0 ;set bit 0 in d0 for SE
934 wr_sgn:
935 bfins d0,FP_SCR1(a6){0:2} ;insert SM and SE into FP_SCR1
936
937 * Clean up and restore all registers used.
938
939 fmove.l #0,FPSR ;clear possible inex2/ainex bits
940 fmovem.x (a7)+,fp0-fp2
941 movem.l (a7)+,d2-d7/a2
942 rts
943
944 end
945