bindec.sa revision 1.4.4.2 1 * $NetBSD: bindec.sa,v 1.4.4.2 2002/01/08 00:25:52 nathanw Exp $
2
3 * MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
4 * M68000 Hi-Performance Microprocessor Division
5 * M68040 Software Package
6 *
7 * M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
8 * All rights reserved.
9 *
10 * THE SOFTWARE is provided on an "AS IS" basis and without warranty.
11 * To the maximum extent permitted by applicable law,
12 * MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
13 * INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
14 * PARTICULAR PURPOSE and any warranty against infringement with
15 * regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
16 * and any accompanying written materials.
17 *
18 * To the maximum extent permitted by applicable law,
19 * IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
20 * (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
21 * PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
22 * OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
23 * SOFTWARE. Motorola assumes no responsibility for the maintenance
24 * and support of the SOFTWARE.
25 *
26 * You are hereby granted a copyright license to use, modify, and
27 * distribute the SOFTWARE so long as this entire notice is retained
28 * without alteration in any modified and/or redistributed versions,
29 * and that such modified versions are clearly identified as such.
30 * No licenses are granted by implication, estoppel or otherwise
31 * under any patents or trademarks of Motorola, Inc.
32
33 *
34 * bindec.sa 3.4 1/3/91
35 *
36 * bindec
37 *
38 * Description:
39 * Converts an input in extended precision format
40 * to bcd format.
41 *
42 * Input:
43 * a0 points to the input extended precision value
44 * value in memory; d0 contains the k-factor sign-extended
45 * to 32-bits. The input may be either normalized,
46 * unnormalized, or denormalized.
47 *
48 * Output: result in the FP_SCR1 space on the stack.
49 *
50 * Saves and Modifies: D2-D7,A2,FP2
51 *
52 * Algorithm:
53 *
54 * A1. Set RM and size ext; Set SIGMA = sign of input.
55 * The k-factor is saved for use in d7. Clear the
56 * BINDEC_FLG for separating normalized/denormalized
57 * input. If input is unnormalized or denormalized,
58 * normalize it.
59 *
60 * A2. Set X = abs(input).
61 *
62 * A3. Compute ILOG.
63 * ILOG is the log base 10 of the input value. It is
64 * approximated by adding e + 0.f when the original
65 * value is viewed as 2^^e * 1.f in extended precision.
66 * This value is stored in d6.
67 *
68 * A4. Clr INEX bit.
69 * The operation in A3 above may have set INEX2.
70 *
71 * A5. Set ICTR = 0;
72 * ICTR is a flag used in A13. It must be set before the
73 * loop entry A6.
74 *
75 * A6. Calculate LEN.
76 * LEN is the number of digits to be displayed. The
77 * k-factor can dictate either the total number of digits,
78 * if it is a positive number, or the number of digits
79 * after the decimal point which are to be included as
80 * significant. See the 68882 manual for examples.
81 * If LEN is computed to be greater than 17, set OPERR in
82 * USER_FPSR. LEN is stored in d4.
83 *
84 * A7. Calculate SCALE.
85 * SCALE is equal to 10^ISCALE, where ISCALE is the number
86 * of decimal places needed to insure LEN integer digits
87 * in the output before conversion to bcd. LAMBDA is the
88 * sign of ISCALE, used in A9. Fp1 contains
89 * 10^^(abs(ISCALE)) using a rounding mode which is a
90 * function of the original rounding mode and the signs
91 * of ISCALE and X. A table is given in the code.
92 *
93 * A8. Clr INEX; Force RZ.
94 * The operation in A3 above may have set INEX2.
95 * RZ mode is forced for the scaling operation to insure
96 * only one rounding error. The grs bits are collected in
97 * the INEX flag for use in A10.
98 *
99 * A9. Scale X -> Y.
100 * The mantissa is scaled to the desired number of
101 * significant digits. The excess digits are collected
102 * in INEX2.
103 *
104 * A10. Or in INEX.
105 * If INEX is set, round error occurred. This is
106 * compensated for by 'or-ing' in the INEX2 flag to
107 * the lsb of Y.
108 *
109 * A11. Restore original FPCR; set size ext.
110 * Perform FINT operation in the user's rounding mode.
111 * Keep the size to extended.
112 *
113 * A12. Calculate YINT = FINT(Y) according to user's rounding
114 * mode. The FPSP routine sintd0 is used. The output
115 * is in fp0.
116 *
117 * A13. Check for LEN digits.
118 * If the int operation results in more than LEN digits,
119 * or less than LEN -1 digits, adjust ILOG and repeat from
120 * A6. This test occurs only on the first pass. If the
121 * result is exactly 10^LEN, decrement ILOG and divide
122 * the mantissa by 10.
123 *
124 * A14. Convert the mantissa to bcd.
125 * The binstr routine is used to convert the LEN digit
126 * mantissa to bcd in memory. The input to binstr is
127 * to be a fraction; i.e. (mantissa)/10^LEN and adjusted
128 * such that the decimal point is to the left of bit 63.
129 * The bcd digits are stored in the correct position in
130 * the final string area in memory.
131 *
132 * A15. Convert the exponent to bcd.
133 * As in A14 above, the exp is converted to bcd and the
134 * digits are stored in the final string.
135 * Test the length of the final exponent string. If the
136 * length is 4, set operr.
137 *
138 * A16. Write sign bits to final string.
139 *
140 * Implementation Notes:
141 *
142 * The registers are used as follows:
143 *
144 * d0: scratch; LEN input to binstr
145 * d1: scratch
146 * d2: upper 32-bits of mantissa for binstr
147 * d3: scratch;lower 32-bits of mantissa for binstr
148 * d4: LEN
149 * d5: LAMBDA/ICTR
150 * d6: ILOG
151 * d7: k-factor
152 * a0: ptr for original operand/final result
153 * a1: scratch pointer
154 * a2: pointer to FP_X; abs(original value) in ext
155 * fp0: scratch
156 * fp1: scratch
157 * fp2: scratch
158 * F_SCR1:
159 * F_SCR2:
160 * L_SCR1:
161 * L_SCR2:
162 *
163
164 BINDEC IDNT 2,1 Motorola 040 Floating Point Software Package
165
166 include fpsp.h
167
168 section 8
169
170 * Constants in extended precision
171 LOG2 dc.l $3FFD0000,$9A209A84,$FBCFF798,$00000000
172 LOG2UP1 dc.l $3FFD0000,$9A209A84,$FBCFF799,$00000000
173
174 * Constants in single precision
175 FONE dc.l $3F800000,$00000000,$00000000,$00000000
176 FTWO dc.l $40000000,$00000000,$00000000,$00000000
177 FTEN dc.l $41200000,$00000000,$00000000,$00000000
178 F4933 dc.l $459A2800,$00000000,$00000000,$00000000
179
180 RBDTBL dc.b 0,0,0,0
181 dc.b 3,3,2,2
182 dc.b 3,2,2,3
183 dc.b 2,3,3,2
184
185 xref binstr
186 xref sintdo
187 xref ptenrn,ptenrm,ptenrp
188
189 xdef bindec
190 xdef sc_mul
191 bindec:
192 movem.l d2-d7/a2,-(a7)
193 fmovem.x fp0-fp2,-(a7)
194
195 * A1. Set RM and size ext. Set SIGMA = sign input;
196 * The k-factor is saved for use in d7. Clear BINDEC_FLG for
197 * separating normalized/denormalized input. If the input
198 * is a denormalized number, set the BINDEC_FLG memory word
199 * to signal denorm. If the input is unnormalized, normalize
200 * the input and test for denormalized result.
201 *
202 fmove.l #rm_mode,FPCR ;set RM and ext
203 move.l (a0),L_SCR2(a6) ;save exponent for sign check
204 move.l d0,d7 ;move k-factor to d7
205 clr.b BINDEC_FLG(a6) ;clr norm/denorm flag
206 move.w STAG(a6),d0 ;get stag
207 andi.w #$e000,d0 ;isolate stag bits
208 beq A2_str ;if zero, input is norm
209 *
210 * Normalize the denorm
211 *
212 un_de_norm:
213 move.w (a0),d0
214 andi.w #$7fff,d0 ;strip sign of normalized exp
215 move.l 4(a0),d1
216 move.l 8(a0),d2
217 norm_loop:
218 sub.w #1,d0
219 add.l d2,d2
220 addx.l d1,d1
221 tst.l d1
222 bge.b norm_loop
223 *
224 * Test if the normalized input is denormalized
225 *
226 tst.w d0
227 bgt.b pos_exp ;if greater than zero, it is a norm
228 st BINDEC_FLG(a6) ;set flag for denorm
229 pos_exp:
230 andi.w #$7fff,d0 ;strip sign of normalized exp
231 move.w d0,(a0)
232 move.l d1,4(a0)
233 move.l d2,8(a0)
234
235 * A2. Set X = abs(input).
236 *
237 A2_str:
238 move.l (a0),FP_SCR2(a6) ; move input to work space
239 move.l 4(a0),FP_SCR2+4(a6) ; move input to work space
240 move.l 8(a0),FP_SCR2+8(a6) ; move input to work space
241 andi.l #$7fffffff,FP_SCR2(a6) ;create abs(X)
242
243 * A3. Compute ILOG.
244 * ILOG is the log base 10 of the input value. It is approx-
245 * imated by adding e + 0.f when the original value is viewed
246 * as 2^^e * 1.f in extended precision. This value is stored
247 * in d6.
248 *
249 * Register usage:
250 * Input/Output
251 * d0: k-factor/exponent
252 * d2: x/x
253 * d3: x/x
254 * d4: x/x
255 * d5: x/x
256 * d6: x/ILOG
257 * d7: k-factor/Unchanged
258 * a0: ptr for original operand/final result
259 * a1: x/x
260 * a2: x/x
261 * fp0: x/float(ILOG)
262 * fp1: x/x
263 * fp2: x/x
264 * F_SCR1:x/x
265 * F_SCR2:Abs(X)/Abs(X) with $3fff exponent
266 * L_SCR1:x/x
267 * L_SCR2:first word of X packed/Unchanged
268
269 tst.b BINDEC_FLG(a6) ;check for denorm
270 beq.b A3_cont ;if clr, continue with norm
271 move.l #-4933,d6 ;force ILOG = -4933
272 bra.b A4_str
273 A3_cont:
274 move.w FP_SCR2(a6),d0 ;move exp to d0
275 move.w #$3fff,FP_SCR2(a6) ;replace exponent with 0x3fff
276 fmove.x FP_SCR2(a6),fp0 ;now fp0 has 1.f
277 sub.w #$3fff,d0 ;strip off bias
278 fadd.w d0,fp0 ;add in exp
279 fsub.s FONE,fp0 ;subtract off 1.0
280 fbge.w pos_res ;if pos, branch
281 fmul.x LOG2UP1,fp0 ;if neg, mul by LOG2UP1
282 fmove.l fp0,d6 ;put ILOG in d6 as a lword
283 bra.b A4_str ;go move out ILOG
284 pos_res:
285 fmul.x LOG2,fp0 ;if pos, mul by LOG2
286 fmove.l fp0,d6 ;put ILOG in d6 as a lword
287
288
289 * A4. Clr INEX bit.
290 * The operation in A3 above may have set INEX2.
291
292 A4_str:
293 fmove.l #0,FPSR ;zero all of fpsr - nothing needed
294
295
296 * A5. Set ICTR = 0;
297 * ICTR is a flag used in A13. It must be set before the
298 * loop entry A6. The lower word of d5 is used for ICTR.
299
300 clr.w d5 ;clear ICTR
301
302
303 * A6. Calculate LEN.
304 * LEN is the number of digits to be displayed. The k-factor
305 * can dictate either the total number of digits, if it is
306 * a positive number, or the number of digits after the
307 * original decimal point which are to be included as
308 * significant. See the 68882 manual for examples.
309 * If LEN is computed to be greater than 17, set OPERR in
310 * USER_FPSR. LEN is stored in d4.
311 *
312 * Register usage:
313 * Input/Output
314 * d0: exponent/Unchanged
315 * d2: x/x/scratch
316 * d3: x/x
317 * d4: exc picture/LEN
318 * d5: ICTR/Unchanged
319 * d6: ILOG/Unchanged
320 * d7: k-factor/Unchanged
321 * a0: ptr for original operand/final result
322 * a1: x/x
323 * a2: x/x
324 * fp0: float(ILOG)/Unchanged
325 * fp1: x/x
326 * fp2: x/x
327 * F_SCR1:x/x
328 * F_SCR2:Abs(X) with $3fff exponent/Unchanged
329 * L_SCR1:x/x
330 * L_SCR2:first word of X packed/Unchanged
331
332 A6_str:
333 tst.l d7 ;branch on sign of k
334 ble.b k_neg ;if k <= 0, LEN = ILOG + 1 - k
335 move.l d7,d4 ;if k > 0, LEN = k
336 bra.b len_ck ;skip to LEN check
337 k_neg:
338 move.l d6,d4 ;first load ILOG to d4
339 sub.l d7,d4 ;subtract off k
340 addq.l #1,d4 ;add in the 1
341 len_ck:
342 tst.l d4 ;LEN check: branch on sign of LEN
343 ble.b LEN_ng ;if neg, set LEN = 1
344 cmp.l #17,d4 ;test if LEN > 17
345 ble.b A7_str ;if not, forget it
346 move.l #17,d4 ;set max LEN = 17
347 tst.l d7 ;if negative, never set OPERR
348 ble.b A7_str ;if positive, continue
349 or.l #opaop_mask,USER_FPSR(a6) ;set OPERR & AIOP in USER_FPSR
350 bra.b A7_str ;finished here
351 LEN_ng:
352 moveq.l #1,d4 ;min LEN is 1
353
354
355 * A7. Calculate SCALE.
356 * SCALE is equal to 10^ISCALE, where ISCALE is the number
357 * of decimal places needed to insure LEN integer digits
358 * in the output before conversion to bcd. LAMBDA is the sign
359 * of ISCALE, used in A9. Fp1 contains 10^^(abs(ISCALE)) using
360 * the rounding mode as given in the following table (see
361 * Coonen, p. 7.23 as ref.; however, the SCALE variable is
362 * of opposite sign in bindec.sa from Coonen).
363 *
364 * Initial USE
365 * FPCR[6:5] LAMBDA SIGN(X) FPCR[6:5]
366 * ----------------------------------------------
367 * RN 00 0 0 00/0 RN
368 * RN 00 0 1 00/0 RN
369 * RN 00 1 0 00/0 RN
370 * RN 00 1 1 00/0 RN
371 * RZ 01 0 0 11/3 RP
372 * RZ 01 0 1 11/3 RP
373 * RZ 01 1 0 10/2 RM
374 * RZ 01 1 1 10/2 RM
375 * RM 10 0 0 11/3 RP
376 * RM 10 0 1 10/2 RM
377 * RM 10 1 0 10/2 RM
378 * RM 10 1 1 11/3 RP
379 * RP 11 0 0 10/2 RM
380 * RP 11 0 1 11/3 RP
381 * RP 11 1 0 11/3 RP
382 * RP 11 1 1 10/2 RM
383 *
384 * Register usage:
385 * Input/Output
386 * d0: exponent/scratch - final is 0
387 * d2: x/0 or 24 for A9
388 * d3: x/scratch - offset ptr into PTENRM array
389 * d4: LEN/Unchanged
390 * d5: 0/ICTR:LAMBDA
391 * d6: ILOG/ILOG or k if ((k<=0)&(ILOG<k))
392 * d7: k-factor/Unchanged
393 * a0: ptr for original operand/final result
394 * a1: x/ptr to PTENRM array
395 * a2: x/x
396 * fp0: float(ILOG)/Unchanged
397 * fp1: x/10^ISCALE
398 * fp2: x/x
399 * F_SCR1:x/x
400 * F_SCR2:Abs(X) with $3fff exponent/Unchanged
401 * L_SCR1:x/x
402 * L_SCR2:first word of X packed/Unchanged
403
404 A7_str:
405 tst.l d7 ;test sign of k
406 bgt.b k_pos ;if pos and > 0, skip this
407 cmp.l d6,d7 ;test k - ILOG
408 blt.b k_pos ;if ILOG >= k, skip this
409 move.l d7,d6 ;if ((k<0) & (ILOG < k)) ILOG = k
410 k_pos:
411 move.l d6,d0 ;calc ILOG + 1 - LEN in d0
412 addq.l #1,d0 ;add the 1
413 sub.l d4,d0 ;sub off LEN
414 swap d5 ;use upper word of d5 for LAMBDA
415 clr.w d5 ;set it zero initially
416 clr.w d2 ;set up d2 for very small case
417 tst.l d0 ;test sign of ISCALE
418 bge.b iscale ;if pos, skip next inst
419 addq.w #1,d5 ;if neg, set LAMBDA true
420 cmp.l #$ffffecd4,d0 ;test iscale <= -4908
421 bgt.b no_inf ;if false, skip rest
422 addi.l #24,d0 ;add in 24 to iscale
423 move.l #24,d2 ;put 24 in d2 for A9
424 no_inf:
425 neg.l d0 ;and take abs of ISCALE
426 iscale:
427 fmove.s FONE,fp1 ;init fp1 to 1
428 bfextu USER_FPCR(a6){26:2},d1 ;get initial rmode bits
429 add.w d1,d1 ;put them in bits 2:1
430 add.w d5,d1 ;add in LAMBDA
431 add.w d1,d1 ;put them in bits 3:1
432 tst.l L_SCR2(a6) ;test sign of original x
433 bge.b x_pos ;if pos, don't set bit 0
434 addq.l #1,d1 ;if neg, set bit 0
435 x_pos:
436 lea.l RBDTBL,a2 ;load rbdtbl base
437 move.b (a2,d1),d3 ;load d3 with new rmode
438 lsl.l #4,d3 ;put bits in proper position
439 fmove.l d3,fpcr ;load bits into fpu
440 lsr.l #4,d3 ;put bits in proper position
441 tst.b d3 ;decode new rmode for pten table
442 bne.b not_rn ;if zero, it is RN
443 lea.l PTENRN,a1 ;load a1 with RN table base
444 bra.b rmode ;exit decode
445 not_rn:
446 lsr.b #1,d3 ;get lsb in carry
447 bcc.b not_rp ;if carry clear, it is RM
448 lea.l PTENRP,a1 ;load a1 with RP table base
449 bra.b rmode ;exit decode
450 not_rp:
451 lea.l PTENRM,a1 ;load a1 with RM table base
452 rmode:
453 clr.l d3 ;clr table index
454 e_loop:
455 lsr.l #1,d0 ;shift next bit into carry
456 bcc.b e_next ;if zero, skip the mul
457 fmul.x (a1,d3),fp1 ;mul by 10**(d3_bit_no)
458 e_next:
459 add.l #12,d3 ;inc d3 to next pwrten table entry
460 tst.l d0 ;test if ISCALE is zero
461 bne.b e_loop ;if not, loop
462
463
464 * A8. Clr INEX; Force RZ.
465 * The operation in A3 above may have set INEX2.
466 * RZ mode is forced for the scaling operation to insure
467 * only one rounding error. The grs bits are collected in
468 * the INEX flag for use in A10.
469 *
470 * Register usage:
471 * Input/Output
472
473 fmove.l #0,FPSR ;clr INEX
474 fmove.l #rz_mode,FPCR ;set RZ rounding mode
475
476
477 * A9. Scale X -> Y.
478 * The mantissa is scaled to the desired number of significant
479 * digits. The excess digits are collected in INEX2. If mul,
480 * Check d2 for excess 10 exponential value. If not zero,
481 * the iscale value would have caused the pwrten calculation
482 * to overflow. Only a negative iscale can cause this, so
483 * multiply by 10^(d2), which is now only allowed to be 24,
484 * with a multiply by 10^8 and 10^16, which is exact since
485 * 10^24 is exact. If the input was denormalized, we must
486 * create a busy stack frame with the mul command and the
487 * two operands, and allow the fpu to complete the multiply.
488 *
489 * Register usage:
490 * Input/Output
491 * d0: FPCR with RZ mode/Unchanged
492 * d2: 0 or 24/unchanged
493 * d3: x/x
494 * d4: LEN/Unchanged
495 * d5: ICTR:LAMBDA
496 * d6: ILOG/Unchanged
497 * d7: k-factor/Unchanged
498 * a0: ptr for original operand/final result
499 * a1: ptr to PTENRM array/Unchanged
500 * a2: x/x
501 * fp0: float(ILOG)/X adjusted for SCALE (Y)
502 * fp1: 10^ISCALE/Unchanged
503 * fp2: x/x
504 * F_SCR1:x/x
505 * F_SCR2:Abs(X) with $3fff exponent/Unchanged
506 * L_SCR1:x/x
507 * L_SCR2:first word of X packed/Unchanged
508
509 A9_str:
510 fmove.x (a0),fp0 ;load X from memory
511 fabs.x fp0 ;use abs(X)
512 tst.w d5 ;LAMBDA is in lower word of d5
513 bne.b short_sc_mul ;if neg (LAMBDA = 1), scale by mul
514 fdiv.x fp1,fp0 ;calculate X / SCALE -> Y to fp0
515 bra.b A10_st ;branch to A10
516
517 sc_mul:
518 short_sc_mul:
519 tst.b BINDEC_FLG(a6) ;check for denorm
520 beq.b A9_norm ;if norm, continue with mul
521 fmovem.x fp1,-(a7) ;load ETEMP with 10^ISCALE
522 move.l 8(a0),-(a7) ;load FPTEMP with input arg
523 move.l 4(a0),-(a7)
524 move.l (a0),-(a7)
525 move.l #18,d3 ;load count for busy stack
526 A9_loop:
527 clr.l -(a7) ;clear lword on stack
528 dbf.w d3,A9_loop
529 move.b VER_TMP(a6),(a7) ;write current version number
530 move.b #BUSY_SIZE-4,1(a7) ;write current busy size
531 move.b #$10,$44(a7) ;set fcefpte[15] bit
532 move.w #$0023,$40(a7) ;load cmdreg1b with mul command
533 move.b #$fe,$8(a7) ;load all 1s to cu savepc
534 frestore (a7)+ ;restore frame to fpu for completion
535 fmul.x 36(a1),fp0 ;multiply fp0 by 10^8
536 fmul.x 48(a1),fp0 ;multiply fp0 by 10^16
537 bra.b A10_st
538 A9_norm:
539 tst.w d2 ;test for small exp case
540 beq.b A9_con ;if zero, continue as normal
541 fmul.x 36(a1),fp0 ;multiply fp0 by 10^8
542 fmul.x 48(a1),fp0 ;multiply fp0 by 10^16
543 A9_con:
544 fmul.x fp1,fp0 ;calculate X * SCALE -> Y to fp0
545
546
547 * A10. Or in INEX.
548 * If INEX is set, round error occurred. This is compensated
549 * for by 'or-ing' in the INEX2 flag to the lsb of Y.
550 *
551 * Register usage:
552 * Input/Output
553 * d0: FPCR with RZ mode/FPSR with INEX2 isolated
554 * d2: x/x
555 * d3: x/x
556 * d4: LEN/Unchanged
557 * d5: ICTR:LAMBDA
558 * d6: ILOG/Unchanged
559 * d7: k-factor/Unchanged
560 * a0: ptr for original operand/final result
561 * a1: ptr to PTENxx array/Unchanged
562 * a2: x/ptr to FP_SCR2(a6)
563 * fp0: Y/Y with lsb adjusted
564 * fp1: 10^ISCALE/Unchanged
565 * fp2: x/x
566
567 A10_st:
568 fmove.l FPSR,d0 ;get FPSR
569 fmove.x fp0,FP_SCR2(a6) ;move Y to memory
570 lea.l FP_SCR2(a6),a2 ;load a2 with ptr to FP_SCR2
571 btst.l #9,d0 ;check if INEX2 set
572 beq.b A11_st ;if clear, skip rest
573 ori.l #1,8(a2) ;or in 1 to lsb of mantissa
574 fmove.x FP_SCR2(a6),fp0 ;write adjusted Y back to fpu
575
576
577 * A11. Restore original FPCR; set size ext.
578 * Perform FINT operation in the user's rounding mode. Keep
579 * the size to extended. The sintdo entry point in the sint
580 * routine expects the FPCR value to be in USER_FPCR for
581 * mode and precision. The original FPCR is saved in L_SCR1.
582
583 A11_st:
584 move.l USER_FPCR(a6),L_SCR1(a6) ;save it for later
585 andi.l #$00000030,USER_FPCR(a6) ;set size to ext,
586 * ;block exceptions
587
588
589 * A12. Calculate YINT = FINT(Y) according to user's rounding mode.
590 * The FPSP routine sintd0 is used. The output is in fp0.
591 *
592 * Register usage:
593 * Input/Output
594 * d0: FPSR with AINEX cleared/FPCR with size set to ext
595 * d2: x/x/scratch
596 * d3: x/x
597 * d4: LEN/Unchanged
598 * d5: ICTR:LAMBDA/Unchanged
599 * d6: ILOG/Unchanged
600 * d7: k-factor/Unchanged
601 * a0: ptr for original operand/src ptr for sintdo
602 * a1: ptr to PTENxx array/Unchanged
603 * a2: ptr to FP_SCR2(a6)/Unchanged
604 * a6: temp pointer to FP_SCR2(a6) - orig value saved and restored
605 * fp0: Y/YINT
606 * fp1: 10^ISCALE/Unchanged
607 * fp2: x/x
608 * F_SCR1:x/x
609 * F_SCR2:Y adjusted for inex/Y with original exponent
610 * L_SCR1:x/original USER_FPCR
611 * L_SCR2:first word of X packed/Unchanged
612
613 A12_st:
614 movem.l d0-d1/a0-a1,-(a7) ;save regs used by sintd0
615 move.l L_SCR1(a6),-(a7)
616 move.l L_SCR2(a6),-(a7)
617 lea.l FP_SCR2(a6),a0 ;a0 is ptr to F_SCR2(a6)
618 fmove.x fp0,(a0) ;move Y to memory at FP_SCR2(a6)
619 tst.l L_SCR2(a6) ;test sign of original operand
620 bge.b do_fint ;if pos, use Y
621 or.l #$80000000,(a0) ;if neg, use -Y
622 do_fint:
623 move.l USER_FPSR(a6),-(a7)
624 bsr sintdo ;sint routine returns int in fp0
625 move.b (a7),USER_FPSR(a6)
626 add.l #4,a7
627 move.l (a7)+,L_SCR2(a6)
628 move.l (a7)+,L_SCR1(a6)
629 movem.l (a7)+,d0-d1/a0-a1 ;restore regs used by sint
630 move.l L_SCR2(a6),FP_SCR2(a6) ;restore original exponent
631 move.l L_SCR1(a6),USER_FPCR(a6) ;restore user's FPCR
632
633
634 * A13. Check for LEN digits.
635 * If the int operation results in more than LEN digits,
636 * or less than LEN -1 digits, adjust ILOG and repeat from
637 * A6. This test occurs only on the first pass. If the
638 * result is exactly 10^LEN, decrement ILOG and divide
639 * the mantissa by 10. The calculation of 10^LEN cannot
640 * be inexact, since all powers of ten upto 10^27 are exact
641 * in extended precision, so the use of a previous power-of-ten
642 * table will introduce no error.
643 *
644 *
645 * Register usage:
646 * Input/Output
647 * d0: FPCR with size set to ext/scratch final = 0
648 * d2: x/x
649 * d3: x/scratch final = x
650 * d4: LEN/LEN adjusted
651 * d5: ICTR:LAMBDA/LAMBDA:ICTR
652 * d6: ILOG/ILOG adjusted
653 * d7: k-factor/Unchanged
654 * a0: pointer into memory for packed bcd string formation
655 * a1: ptr to PTENxx array/Unchanged
656 * a2: ptr to FP_SCR2(a6)/Unchanged
657 * fp0: int portion of Y/abs(YINT) adjusted
658 * fp1: 10^ISCALE/Unchanged
659 * fp2: x/10^LEN
660 * F_SCR1:x/x
661 * F_SCR2:Y with original exponent/Unchanged
662 * L_SCR1:original USER_FPCR/Unchanged
663 * L_SCR2:first word of X packed/Unchanged
664
665 A13_st:
666 swap d5 ;put ICTR in lower word of d5
667 tst.w d5 ;check if ICTR = 0
668 bne not_zr ;if non-zero, go to second test
669 *
670 * Compute 10^(LEN-1)
671 *
672 fmove.s FONE,fp2 ;init fp2 to 1.0
673 move.l d4,d0 ;put LEN in d0
674 subq.l #1,d0 ;d0 = LEN -1
675 clr.l d3 ;clr table index
676 l_loop:
677 lsr.l #1,d0 ;shift next bit into carry
678 bcc.b l_next ;if zero, skip the mul
679 fmul.x (a1,d3),fp2 ;mul by 10**(d3_bit_no)
680 l_next:
681 add.l #12,d3 ;inc d3 to next pwrten table entry
682 tst.l d0 ;test if LEN is zero
683 bne.b l_loop ;if not, loop
684 *
685 * 10^LEN-1 is computed for this test and A14. If the input was
686 * denormalized, check only the case in which YINT > 10^LEN.
687 *
688 tst.b BINDEC_FLG(a6) ;check if input was norm
689 beq.b A13_con ;if norm, continue with checking
690 fabs.x fp0 ;take abs of YINT
691 bra test_2
692 *
693 * Compare abs(YINT) to 10^(LEN-1) and 10^LEN
694 *
695 A13_con:
696 fabs.x fp0 ;take abs of YINT
697 fcmp.x fp2,fp0 ;compare abs(YINT) with 10^(LEN-1)
698 fbge.w test_2 ;if greater, do next test
699 subq.l #1,d6 ;subtract 1 from ILOG
700 move.w #1,d5 ;set ICTR
701 fmove.l #rm_mode,FPCR ;set rmode to RM
702 fmul.s FTEN,fp2 ;compute 10^LEN
703 bra.w A6_str ;return to A6 and recompute YINT
704 test_2:
705 fmul.s FTEN,fp2 ;compute 10^LEN
706 fcmp.x fp2,fp0 ;compare abs(YINT) with 10^LEN
707 fblt.w A14_st ;if less, all is ok, go to A14
708 fbgt.w fix_ex ;if greater, fix and redo
709 fdiv.s FTEN,fp0 ;if equal, divide by 10
710 addq.l #1,d6 ; and inc ILOG
711 bra.b A14_st ; and continue elsewhere
712 fix_ex:
713 addq.l #1,d6 ;increment ILOG by 1
714 move.w #1,d5 ;set ICTR
715 fmove.l #rm_mode,FPCR ;set rmode to RM
716 bra.w A6_str ;return to A6 and recompute YINT
717 *
718 * Since ICTR <> 0, we have already been through one adjustment,
719 * and shouldn't have another; this is to check if abs(YINT) = 10^LEN
720 * 10^LEN is again computed using whatever table is in a1 since the
721 * value calculated cannot be inexact.
722 *
723 not_zr:
724 fmove.s FONE,fp2 ;init fp2 to 1.0
725 move.l d4,d0 ;put LEN in d0
726 clr.l d3 ;clr table index
727 z_loop:
728 lsr.l #1,d0 ;shift next bit into carry
729 bcc.b z_next ;if zero, skip the mul
730 fmul.x (a1,d3),fp2 ;mul by 10**(d3_bit_no)
731 z_next:
732 add.l #12,d3 ;inc d3 to next pwrten table entry
733 tst.l d0 ;test if LEN is zero
734 bne.b z_loop ;if not, loop
735 fabs.x fp0 ;get abs(YINT)
736 fcmp.x fp2,fp0 ;check if abs(YINT) = 10^LEN
737 fbne.w A14_st ;if not, skip this
738 fdiv.s FTEN,fp0 ;divide abs(YINT) by 10
739 addq.l #1,d6 ;and inc ILOG by 1
740 addq.l #1,d4 ; and inc LEN
741 fmul.s FTEN,fp2 ; if LEN++, the get 10^^LEN
742
743
744 * A14. Convert the mantissa to bcd.
745 * The binstr routine is used to convert the LEN digit
746 * mantissa to bcd in memory. The input to binstr is
747 * to be a fraction; i.e. (mantissa)/10^LEN and adjusted
748 * such that the decimal point is to the left of bit 63.
749 * The bcd digits are stored in the correct position in
750 * the final string area in memory.
751 *
752 *
753 * Register usage:
754 * Input/Output
755 * d0: x/LEN call to binstr - final is 0
756 * d1: x/0
757 * d2: x/ms 32-bits of mant of abs(YINT)
758 * d3: x/ls 32-bits of mant of abs(YINT)
759 * d4: LEN/Unchanged
760 * d5: ICTR:LAMBDA/LAMBDA:ICTR
761 * d6: ILOG
762 * d7: k-factor/Unchanged
763 * a0: pointer into memory for packed bcd string formation
764 * /ptr to first mantissa byte in result string
765 * a1: ptr to PTENxx array/Unchanged
766 * a2: ptr to FP_SCR2(a6)/Unchanged
767 * fp0: int portion of Y/abs(YINT) adjusted
768 * fp1: 10^ISCALE/Unchanged
769 * fp2: 10^LEN/Unchanged
770 * F_SCR1:x/Work area for final result
771 * F_SCR2:Y with original exponent/Unchanged
772 * L_SCR1:original USER_FPCR/Unchanged
773 * L_SCR2:first word of X packed/Unchanged
774
775 A14_st:
776 fmove.l #rz_mode,FPCR ;force rz for conversion
777 fdiv.x fp2,fp0 ;divide abs(YINT) by 10^LEN
778 lea.l FP_SCR1(a6),a0
779 fmove.x fp0,(a0) ;move abs(YINT)/10^LEN to memory
780 move.l 4(a0),d2 ;move 2nd word of FP_RES to d2
781 move.l 8(a0),d3 ;move 3rd word of FP_RES to d3
782 clr.l 4(a0) ;zero word 2 of FP_RES
783 clr.l 8(a0) ;zero word 3 of FP_RES
784 move.l (a0),d0 ;move exponent to d0
785 swap d0 ;put exponent in lower word
786 beq.b no_sft ;if zero, don't shift
787 subi.l #$3ffd,d0 ;sub bias less 2 to make fract
788 tst.l d0 ;check if > 1
789 bgt.b no_sft ;if so, don't shift
790 neg.l d0 ;make exp positive
791 m_loop:
792 lsr.l #1,d2 ;shift d2:d3 right, add 0s
793 roxr.l #1,d3 ;the number of places
794 dbf.w d0,m_loop ;given in d0
795 no_sft:
796 tst.l d2 ;check for mantissa of zero
797 bne.b no_zr ;if not, go on
798 tst.l d3 ;continue zero check
799 beq.b zer_m ;if zero, go directly to binstr
800 no_zr:
801 clr.l d1 ;put zero in d1 for addx
802 addi.l #$00000080,d3 ;inc at bit 7
803 addx.l d1,d2 ;continue inc
804 andi.l #$ffffff80,d3 ;strip off lsb not used by 882
805 zer_m:
806 move.l d4,d0 ;put LEN in d0 for binstr call
807 addq.l #3,a0 ;a0 points to M16 byte in result
808 bsr binstr ;call binstr to convert mant
809
810
811 * A15. Convert the exponent to bcd.
812 * As in A14 above, the exp is converted to bcd and the
813 * digits are stored in the final string.
814 *
815 * Digits are stored in L_SCR1(a6) on return from BINDEC as:
816 *
817 * 32 16 15 0
818 * -----------------------------------------
819 * | 0 | e3 | e2 | e1 | e4 | X | X | X |
820 * -----------------------------------------
821 *
822 * And are moved into their proper places in FP_SCR1. If digit e4
823 * is non-zero, OPERR is signaled. In all cases, all 4 digits are
824 * written as specified in the 881/882 manual for packed decimal.
825 *
826 * Register usage:
827 * Input/Output
828 * d0: x/LEN call to binstr - final is 0
829 * d1: x/scratch (0);shift count for final exponent packing
830 * d2: x/ms 32-bits of exp fraction/scratch
831 * d3: x/ls 32-bits of exp fraction
832 * d4: LEN/Unchanged
833 * d5: ICTR:LAMBDA/LAMBDA:ICTR
834 * d6: ILOG
835 * d7: k-factor/Unchanged
836 * a0: ptr to result string/ptr to L_SCR1(a6)
837 * a1: ptr to PTENxx array/Unchanged
838 * a2: ptr to FP_SCR2(a6)/Unchanged
839 * fp0: abs(YINT) adjusted/float(ILOG)
840 * fp1: 10^ISCALE/Unchanged
841 * fp2: 10^LEN/Unchanged
842 * F_SCR1:Work area for final result/BCD result
843 * F_SCR2:Y with original exponent/ILOG/10^4
844 * L_SCR1:original USER_FPCR/Exponent digits on return from binstr
845 * L_SCR2:first word of X packed/Unchanged
846
847 A15_st:
848 tst.b BINDEC_FLG(a6) ;check for denorm
849 beq.b not_denorm
850 ftst.x fp0 ;test for zero
851 fbeq.w den_zero ;if zero, use k-factor or 4933
852 fmove.l d6,fp0 ;float ILOG
853 fabs.x fp0 ;get abs of ILOG
854 bra.b convrt
855 den_zero:
856 tst.l d7 ;check sign of the k-factor
857 blt.b use_ilog ;if negative, use ILOG
858 fmove.s F4933,fp0 ;force exponent to 4933
859 bra.b convrt ;do it
860 use_ilog:
861 fmove.l d6,fp0 ;float ILOG
862 fabs.x fp0 ;get abs of ILOG
863 bra.b convrt
864 not_denorm:
865 ftst.x fp0 ;test for zero
866 fbne.w not_zero ;if zero, force exponent
867 fmove.s FONE,fp0 ;force exponent to 1
868 bra.b convrt ;do it
869 not_zero:
870 fmove.l d6,fp0 ;float ILOG
871 fabs.x fp0 ;get abs of ILOG
872 convrt:
873 fdiv.x 24(a1),fp0 ;compute ILOG/10^4
874 fmove.x fp0,FP_SCR2(a6) ;store fp0 in memory
875 move.l 4(a2),d2 ;move word 2 to d2
876 move.l 8(a2),d3 ;move word 3 to d3
877 move.w (a2),d0 ;move exp to d0
878 beq.b x_loop_fin ;if zero, skip the shift
879 subi.w #$3ffd,d0 ;subtract off bias
880 neg.w d0 ;make exp positive
881 x_loop:
882 lsr.l #1,d2 ;shift d2:d3 right
883 roxr.l #1,d3 ;the number of places
884 dbf.w d0,x_loop ;given in d0
885 x_loop_fin:
886 clr.l d1 ;put zero in d1 for addx
887 addi.l #$00000080,d3 ;inc at bit 6
888 addx.l d1,d2 ;continue inc
889 andi.l #$ffffff80,d3 ;strip off lsb not used by 882
890 move.l #4,d0 ;put 4 in d0 for binstr call
891 lea.l L_SCR1(a6),a0 ;a0 is ptr to L_SCR1 for exp digits
892 bsr binstr ;call binstr to convert exp
893 move.l L_SCR1(a6),d0 ;load L_SCR1 lword to d0
894 move.l #12,d1 ;use d1 for shift count
895 lsr.l d1,d0 ;shift d0 right by 12
896 bfins d0,FP_SCR1(a6){4:12} ;put e3:e2:e1 in FP_SCR1
897 lsr.l d1,d0 ;shift d0 right by 12
898 bfins d0,FP_SCR1(a6){16:4} ;put e4 in FP_SCR1
899 tst.b d0 ;check if e4 is zero
900 beq.b A16_st ;if zero, skip rest
901 or.l #opaop_mask,USER_FPSR(a6) ;set OPERR & AIOP in USER_FPSR
902
903
904 * A16. Write sign bits to final string.
905 * Sigma is bit 31 of initial value; RHO is bit 31 of d6 (ILOG).
906 *
907 * Register usage:
908 * Input/Output
909 * d0: x/scratch - final is x
910 * d2: x/x
911 * d3: x/x
912 * d4: LEN/Unchanged
913 * d5: ICTR:LAMBDA/LAMBDA:ICTR
914 * d6: ILOG/ILOG adjusted
915 * d7: k-factor/Unchanged
916 * a0: ptr to L_SCR1(a6)/Unchanged
917 * a1: ptr to PTENxx array/Unchanged
918 * a2: ptr to FP_SCR2(a6)/Unchanged
919 * fp0: float(ILOG)/Unchanged
920 * fp1: 10^ISCALE/Unchanged
921 * fp2: 10^LEN/Unchanged
922 * F_SCR1:BCD result with correct signs
923 * F_SCR2:ILOG/10^4
924 * L_SCR1:Exponent digits on return from binstr
925 * L_SCR2:first word of X packed/Unchanged
926
927 A16_st:
928 clr.l d0 ;clr d0 for collection of signs
929 andi.b #$0f,FP_SCR1(a6) ;clear first nibble of FP_SCR1
930 tst.l L_SCR2(a6) ;check sign of original mantissa
931 bge.b mant_p ;if pos, don't set SM
932 moveq.l #2,d0 ;move 2 in to d0 for SM
933 mant_p:
934 tst.l d6 ;check sign of ILOG
935 bge.b wr_sgn ;if pos, don't set SE
936 addq.l #1,d0 ;set bit 0 in d0 for SE
937 wr_sgn:
938 bfins d0,FP_SCR1(a6){0:2} ;insert SM and SE into FP_SCR1
939
940 * Clean up and restore all registers used.
941
942 fmove.l #0,FPSR ;clear possible inex2/ainex bits
943 fmovem.x (a7)+,fp0-fp2
944 movem.l (a7)+,d2-d7/a2
945 rts
946
947 end
948