bindec.sa revision 1.4 1 * $NetBSD: bindec.sa,v 1.4 2001/09/16 16:34:32 wiz Exp $
2
3 * MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
4 * M68000 Hi-Performance Microprocessor Division
5 * M68040 Software Package
6 *
7 * M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
8 * All rights reserved.
9 *
10 * THE SOFTWARE is provided on an "AS IS" basis and without warranty.
11 * To the maximum extent permitted by applicable law,
12 * MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
13 * INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
14 * PARTICULAR PURPOSE and any warranty against infringement with
15 * regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
16 * and any accompanying written materials.
17 *
18 * To the maximum extent permitted by applicable law,
19 * IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
20 * (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
21 * PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
22 * OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
23 * SOFTWARE. Motorola assumes no responsibility for the maintenance
24 * and support of the SOFTWARE.
25 *
26 * You are hereby granted a copyright license to use, modify, and
27 * distribute the SOFTWARE so long as this entire notice is retained
28 * without alteration in any modified and/or redistributed versions,
29 * and that such modified versions are clearly identified as such.
30 * No licenses are granted by implication, estoppel or otherwise
31 * under any patents or trademarks of Motorola, Inc.
32
33 *
34 * bindec.sa 3.4 1/3/91
35 *
36 * bindec
37 *
38 * Description:
39 * Converts an input in extended precision format
40 * to bcd format.
41 *
42 * Input:
43 * a0 points to the input extended precision value
44 * value in memory; d0 contains the k-factor sign-extended
45 * to 32-bits. The input may be either normalized,
46 * unnormalized, or denormalized.
47 *
48 * Output: result in the FP_SCR1 space on the stack.
49 *
50 * Saves and Modifies: D2-D7,A2,FP2
51 *
52 * Algorithm:
53 *
54 * A1. Set RM and size ext; Set SIGMA = sign of input.
55 * The k-factor is saved for use in d7. Clear the
56 * BINDEC_FLG for separating normalized/denormalized
57 * input. If input is unnormalized or denormalized,
58 * normalize it.
59 *
60 * A2. Set X = abs(input).
61 *
62 * A3. Compute ILOG.
63 * ILOG is the log base 10 of the input value. It is
64 * approximated by adding e + 0.f when the original
65 * value is viewed as 2^^e * 1.f in extended precision.
66 * This value is stored in d6.
67 *
68 * A4. Clr INEX bit.
69 * The operation in A3 above may have set INEX2.
70 *
71 * A5. Set ICTR = 0;
72 * ICTR is a flag used in A13. It must be set before the
73 * loop entry A6.
74 *
75 * A6. Calculate LEN.
76 * LEN is the number of digits to be displayed. The
77 * k-factor can dictate either the total number of digits,
78 * if it is a positive number, or the number of digits
79 * after the decimal point which are to be included as
80 * significant. See the 68882 manual for examples.
81 * If LEN is computed to be greater than 17, set OPERR in
82 * USER_FPSR. LEN is stored in d4.
83 *
84 * A7. Calculate SCALE.
85 * SCALE is equal to 10^ISCALE, where ISCALE is the number
86 * of decimal places needed to insure LEN integer digits
87 * in the output before conversion to bcd. LAMBDA is the
88 * sign of ISCALE, used in A9. Fp1 contains
89 * 10^^(abs(ISCALE)) using a rounding mode which is a
90 * function of the original rounding mode and the signs
91 * of ISCALE and X. A table is given in the code.
92 *
93 * A8. Clr INEX; Force RZ.
94 * The operation in A3 above may have set INEX2.
95 * RZ mode is forced for the scaling operation to insure
96 * only one rounding error. The grs bits are collected in
97 * the INEX flag for use in A10.
98 *
99 * A9. Scale X -> Y.
100 * The mantissa is scaled to the desired number of
101 * significant digits. The excess digits are collected
102 * in INEX2.
103 *
104 * A10. Or in INEX.
105 * If INEX is set, round error occurred. This is
106 * compensated for by 'or-ing' in the INEX2 flag to
107 * the lsb of Y.
108 *
109 * A11. Restore original FPCR; set size ext.
110 * Perform FINT operation in the user's rounding mode.
111 * Keep the size to extended.
112 *
113 * A12. Calculate YINT = FINT(Y) according to user's rounding
114 * mode. The FPSP routine sintd0 is used. The output
115 * is in fp0.
116 *
117 * A13. Check for LEN digits.
118 * If the int operation results in more than LEN digits,
119 * or less than LEN -1 digits, adjust ILOG and repeat from
120 * A6. This test occurs only on the first pass. If the
121 * result is exactly 10^LEN, decrement ILOG and divide
122 * the mantissa by 10.
123 *
124 * A14. Convert the mantissa to bcd.
125 * The binstr routine is used to convert the LEN digit
126 * mantissa to bcd in memory. The input to binstr is
127 * to be a fraction; i.e. (mantissa)/10^LEN and adjusted
128 * such that the decimal point is to the left of bit 63.
129 * The bcd digits are stored in the correct position in
130 * the final string area in memory.
131 *
132 * A15. Convert the exponent to bcd.
133 * As in A14 above, the exp is converted to bcd and the
134 * digits are stored in the final string.
135 * Test the length of the final exponent string. If the
136 * length is 4, set operr.
137 *
138 * A16. Write sign bits to final string.
139 *
140 * Implementation Notes:
141 *
142 * The registers are used as follows:
143 *
144 * d0: scratch; LEN input to binstr
145 * d1: scratch
146 * d2: upper 32-bits of mantissa for binstr
147 * d3: scratch;lower 32-bits of mantissa for binstr
148 * d4: LEN
149 * d5: LAMBDA/ICTR
150 * d6: ILOG
151 * d7: k-factor
152 * a0: ptr for original operand/final result
153 * a1: scratch pointer
154 * a2: pointer to FP_X; abs(original value) in ext
155 * fp0: scratch
156 * fp1: scratch
157 * fp2: scratch
158 * F_SCR1:
159 * F_SCR2:
160 * L_SCR1:
161 * L_SCR2:
162 *
163
164 BINDEC IDNT 2,1 Motorola 040 Floating Point Software Package
165
166 include fpsp.h
167
168 section 8
169
170 * Constants in extended precision
171 LOG2 dc.l $3FFD0000,$9A209A84,$FBCFF798,$00000000
172 LOG2UP1 dc.l $3FFD0000,$9A209A84,$FBCFF799,$00000000
173
174 * Constants in single precision
175 FONE dc.l $3F800000,$00000000,$00000000,$00000000
176 FTWO dc.l $40000000,$00000000,$00000000,$00000000
177 FTEN dc.l $41200000,$00000000,$00000000,$00000000
178 F4933 dc.l $459A2800,$00000000,$00000000,$00000000
179
180 RBDTBL dc.b 0,0,0,0
181 dc.b 3,3,2,2
182 dc.b 3,2,2,3
183 dc.b 2,3,3,2
184
185 xref binstr
186 xref sintdo
187 xref ptenrn,ptenrm,ptenrp
188
189 xdef bindec
190 xdef sc_mul
191 bindec:
192 movem.l d2-d7/a2,-(a7)
193 fmovem.x fp0-fp2,-(a7)
194
195 * A1. Set RM and size ext. Set SIGMA = sign input;
196 * The k-factor is saved for use in d7. Clear BINDEC_FLG for
197 * separating normalized/denormalized input. If the input
198 * is a denormalized number, set the BINDEC_FLG memory word
199 * to signal denorm. If the input is unnormalized, normalize
200 * the input and test for denormalized result.
201 *
202 fmove.l #rm_mode,FPCR ;set RM and ext
203 move.l (a0),L_SCR2(a6) ;save exponent for sign check
204 move.l d0,d7 ;move k-factor to d7
205 clr.b BINDEC_FLG(a6) ;clr norm/denorm flag
206 move.w STAG(a6),d0 ;get stag
207 andi.w #$e000,d0 ;isolate stag bits
208 beq A2_str ;if zero, input is norm
209 *
210 * Normalize the denorm
211 *
212 un_de_norm:
213 move.w (a0),d0
214 andi.w #$7fff,d0 ;strip sign of normalized exp
215 move.l 4(a0),d1
216 move.l 8(a0),d2
217 norm_loop:
218 sub.w #1,d0
219 add.l d2,d2
220 addx.l d1,d1
221 tst.l d1
222 bge.b norm_loop
223 *
224 * Test if the normalized input is denormalized
225 *
226 tst.w d0
227 bgt.b pos_exp ;if greater than zero, it is a norm
228 st BINDEC_FLG(a6) ;set flag for denorm
229 pos_exp:
230 andi.w #$7fff,d0 ;strip sign of normalized exp
231 move.w d0,(a0)
232 move.l d1,4(a0)
233 move.l d2,8(a0)
234
235 * A2. Set X = abs(input).
236 *
237 A2_str:
238 move.l (a0),FP_SCR2(a6) ; move input to work space
239 move.l 4(a0),FP_SCR2+4(a6) ; move input to work space
240 move.l 8(a0),FP_SCR2+8(a6) ; move input to work space
241 andi.l #$7fffffff,FP_SCR2(a6) ;create abs(X)
242
243 * A3. Compute ILOG.
244 * ILOG is the log base 10 of the input value. It is approx-
245 * imated by adding e + 0.f when the original value is viewed
246 * as 2^^e * 1.f in extended precision. This value is stored
247 * in d6.
248 *
249 * Register usage:
250 * Input/Output
251 * d0: k-factor/exponent
252 * d2: x/x
253 * d3: x/x
254 * d4: x/x
255 * d5: x/x
256 * d6: x/ILOG
257 * d7: k-factor/Unchanged
258 * a0: ptr for original operand/final result
259 * a1: x/x
260 * a2: x/x
261 * fp0: x/float(ILOG)
262 * fp1: x/x
263 * fp2: x/x
264 * F_SCR1:x/x
265 * F_SCR2:Abs(X)/Abs(X) with $3fff exponent
266 * L_SCR1:x/x
267 * L_SCR2:first word of X packed/Unchanged
268
269 tst.b BINDEC_FLG(a6) ;check for denorm
270 beq.b A3_cont ;if clr, continue with norm
271 move.l #-4933,d6 ;force ILOG = -4933
272 bra.b A4_str
273 A3_cont:
274 move.w FP_SCR2(a6),d0 ;move exp to d0
275 move.w #$3fff,FP_SCR2(a6) ;replace exponent with 0x3fff
276 fmove.x FP_SCR2(a6),fp0 ;now fp0 has 1.f
277 sub.w #$3fff,d0 ;strip off bias
278 fadd.w d0,fp0 ;add in exp
279 fsub.s FONE,fp0 ;subtract off 1.0
280 fbge.w pos_res ;if pos, branch
281 fmul.x LOG2UP1,fp0 ;if neg, mul by LOG2UP1
282 fmove.l fp0,d6 ;put ILOG in d6 as a lword
283 bra.b A4_str ;go move out ILOG
284 pos_res:
285 fmul.x LOG2,fp0 ;if pos, mul by LOG2
286 fmove.l fp0,d6 ;put ILOG in d6 as a lword
287
288
289 * A4. Clr INEX bit.
290 * The operation in A3 above may have set INEX2.
291
292 A4_str:
293 fmove.l #0,FPSR ;zero all of fpsr - nothing needed
294
295
296 * A5. Set ICTR = 0;
297 * ICTR is a flag used in A13. It must be set before the
298 * loop entry A6. The lower word of d5 is used for ICTR.
299
300 clr.w d5 ;clear ICTR
301
302
303 * A6. Calculate LEN.
304 * LEN is the number of digits to be displayed. The k-factor
305 * can dictate either the total number of digits, if it is
306 * a positive number, or the number of digits after the
307 * original decimal point which are to be included as
308 * significant. See the 68882 manual for examples.
309 * If LEN is computed to be greater than 17, set OPERR in
310 * USER_FPSR. LEN is stored in d4.
311 *
312 * Register usage:
313 * Input/Output
314 * d0: exponent/Unchanged
315 * d2: x/x/scratch
316 * d3: x/x
317 * d4: exc picture/LEN
318 * d5: ICTR/Unchanged
319 * d6: ILOG/Unchanged
320 * d7: k-factor/Unchanged
321 * a0: ptr for original operand/final result
322 * a1: x/x
323 * a2: x/x
324 * fp0: float(ILOG)/Unchanged
325 * fp1: x/x
326 * fp2: x/x
327 * F_SCR1:x/x
328 * F_SCR2:Abs(X) with $3fff exponent/Unchanged
329 * L_SCR1:x/x
330 * L_SCR2:first word of X packed/Unchanged
331
332 A6_str:
333 tst.l d7 ;branch on sign of k
334 ble.b k_neg ;if k <= 0, LEN = ILOG + 1 - k
335 move.l d7,d4 ;if k > 0, LEN = k
336 bra.b len_ck ;skip to LEN check
337 k_neg:
338 move.l d6,d4 ;first load ILOG to d4
339 sub.l d7,d4 ;subtract off k
340 addq.l #1,d4 ;add in the 1
341 len_ck:
342 tst.l d4 ;LEN check: branch on sign of LEN
343 ble.b LEN_ng ;if neg, set LEN = 1
344 cmp.l #17,d4 ;test if LEN > 17
345 ble.b A7_str ;if not, forget it
346 move.l #17,d4 ;set max LEN = 17
347 tst.l d7 ;if negative, never set OPERR
348 ble.b A7_str ;if positive, continue
349 or.l #opaop_mask,USER_FPSR(a6) ;set OPERR & AIOP in USER_FPSR
350 bra.b A7_str ;finished here
351 LEN_ng:
352 moveq.l #1,d4 ;min LEN is 1
353
354
355 * A7. Calculate SCALE.
356 * SCALE is equal to 10^ISCALE, where ISCALE is the number
357 * of decimal places needed to insure LEN integer digits
358 * in the output before conversion to bcd. LAMBDA is the sign
359 * of ISCALE, used in A9. Fp1 contains 10^^(abs(ISCALE)) using
360 * the rounding mode as given in the following table (see
361 * Coonen, p. 7.23 as ref.; however, the SCALE variable is
362 * of opposite sign in bindec.sa from Coonen).
363 *
364 * Initial USE
365 * FPCR[6:5] LAMBDA SIGN(X) FPCR[6:5]
366 * ----------------------------------------------
367 * RN 00 0 0 00/0 RN
368 * RN 00 0 1 00/0 RN
369 * RN 00 1 0 00/0 RN
370 * RN 00 1 1 00/0 RN
371 * RZ 01 0 0 11/3 RP
372 * RZ 01 0 1 11/3 RP
373 * RZ 01 1 0 10/2 RM
374 * RZ 01 1 1 10/2 RM
375 * RM 10 0 0 11/3 RP
376 * RM 10 0 1 10/2 RM
377 * RM 10 1 0 10/2 RM
378 * RM 10 1 1 11/3 RP
379 * RP 11 0 0 10/2 RM
380 * RP 11 0 1 11/3 RP
381 * RP 11 1 0 11/3 RP
382 * RP 11 1 1 10/2 RM
383 *
384 * Register usage:
385 * Input/Output
386 * d0: exponent/scratch - final is 0
387 * d2: x/0 or 24 for A9
388 * d3: x/scratch - offset ptr into PTENRM array
389 * d4: LEN/Unchanged
390 * d5: 0/ICTR:LAMBDA
391 * d6: ILOG/ILOG or k if ((k<=0)&(ILOG<k))
392 * d7: k-factor/Unchanged
393 * a0: ptr for original operand/final result
394 * a1: x/ptr to PTENRM array
395 * a2: x/x
396 * fp0: float(ILOG)/Unchanged
397 * fp1: x/10^ISCALE
398 * fp2: x/x
399 * F_SCR1:x/x
400 * F_SCR2:Abs(X) with $3fff exponent/Unchanged
401 * L_SCR1:x/x
402 * L_SCR2:first word of X packed/Unchanged
403
404 A7_str:
405 tst.l d7 ;test sign of k
406 bgt.b k_pos ;if pos and > 0, skip this
407 cmp.l d6,d7 ;test k - ILOG
408 blt.b k_pos ;if ILOG >= k, skip this
409 move.l d7,d6 ;if ((k<0) & (ILOG < k)) ILOG = k
410 k_pos:
411 move.l d6,d0 ;calc ILOG + 1 - LEN in d0
412 addq.l #1,d0 ;add the 1
413 sub.l d4,d0 ;sub off LEN
414 swap d5 ;use upper word of d5 for LAMBDA
415 clr.w d5 ;set it zero initially
416 clr.w d2 ;set up d2 for very small case
417 tst.l d0 ;test sign of ISCALE
418 bge.b iscale ;if pos, skip next inst
419 addq.w #1,d5 ;if neg, set LAMBDA true
420 cmp.l #$ffffecd4,d0 ;test iscale <= -4908
421 bgt.b no_inf ;if false, skip rest
422 addi.l #24,d0 ;add in 24 to iscale
423 move.l #24,d2 ;put 24 in d2 for A9
424 no_inf:
425 neg.l d0 ;and take abs of ISCALE
426 iscale:
427 fmove.s FONE,fp1 ;init fp1 to 1
428 bfextu USER_FPCR(a6){26:2},d1 ;get initial rmode bits
429 add.w d1,d1 ;put them in bits 2:1
430 add.w d5,d1 ;add in LAMBDA
431 add.w d1,d1 ;put them in bits 3:1
432 tst.l L_SCR2(a6) ;test sign of original x
433 bge.b x_pos ;if pos, don't set bit 0
434 addq.l #1,d1 ;if neg, set bit 0
435 x_pos:
436 lea.l RBDTBL,a2 ;load rbdtbl base
437 move.b (a2,d1),d3 ;load d3 with new rmode
438 lsl.l #4,d3 ;put bits in proper position
439 fmove.l d3,fpcr ;load bits into fpu
440 lsr.l #4,d3 ;put bits in proper position
441 tst.b d3 ;decode new rmode for pten table
442 bne.b not_rn ;if zero, it is RN
443 lea.l PTENRN,a1 ;load a1 with RN table base
444 bra.b rmode ;exit decode
445 not_rn:
446 lsr.b #1,d3 ;get lsb in carry
447 bcc.b not_rp ;if carry clear, it is RM
448 lea.l PTENRP,a1 ;load a1 with RP table base
449 bra.b rmode ;exit decode
450 not_rp:
451 lea.l PTENRM,a1 ;load a1 with RM table base
452 rmode:
453 clr.l d3 ;clr table index
454 e_loop:
455 lsr.l #1,d0 ;shift next bit into carry
456 bcc.b e_next ;if zero, skip the mul
457 fmul.x (a1,d3),fp1 ;mul by 10**(d3_bit_no)
458 e_next:
459 add.l #12,d3 ;inc d3 to next pwrten table entry
460 tst.l d0 ;test if ISCALE is zero
461 bne.b e_loop ;if not, loop
462
463
464 * A8. Clr INEX; Force RZ.
465 * The operation in A3 above may have set INEX2.
466 * RZ mode is forced for the scaling operation to insure
467 * only one rounding error. The grs bits are collected in
468 * the INEX flag for use in A10.
469 *
470 * Register usage:
471 * Input/Output
472
473 fmove.l #0,FPSR ;clr INEX
474 fmove.l #rz_mode,FPCR ;set RZ rounding mode
475
476
477 * A9. Scale X -> Y.
478 * The mantissa is scaled to the desired number of significant
479 * digits. The excess digits are collected in INEX2. If mul,
480 * Check d2 for excess 10 exponential value. If not zero,
481 * the iscale value would have caused the pwrten calculation
482 * to overflow. Only a negative iscale can cause this, so
483 * multiply by 10^(d2), which is now only allowed to be 24,
484 * with a multiply by 10^8 and 10^16, which is exact since
485 * 10^24 is exact. If the input was denormalized, we must
486 * create a busy stack frame with the mul command and the
487 * two operands, and allow the fpu to complete the multiply.
488 *
489 * Register usage:
490 * Input/Output
491 * d0: FPCR with RZ mode/Unchanged
492 * d2: 0 or 24/unchanged
493 * d3: x/x
494 * d4: LEN/Unchanged
495 * d5: ICTR:LAMBDA
496 * d6: ILOG/Unchanged
497 * d7: k-factor/Unchanged
498 * a0: ptr for original operand/final result
499 * a1: ptr to PTENRM array/Unchanged
500 * a2: x/x
501 * fp0: float(ILOG)/X adjusted for SCALE (Y)
502 * fp1: 10^ISCALE/Unchanged
503 * fp2: x/x
504 * F_SCR1:x/x
505 * F_SCR2:Abs(X) with $3fff exponent/Unchanged
506 * L_SCR1:x/x
507 * L_SCR2:first word of X packed/Unchanged
508
509 A9_str:
510 fmove.x (a0),fp0 ;load X from memory
511 fabs.x fp0 ;use abs(X)
512 tst.w d5 ;LAMBDA is in lower word of d5
513 bne.b sc_mul ;if neg (LAMBDA = 1), scale by mul
514 fdiv.x fp1,fp0 ;calculate X / SCALE -> Y to fp0
515 bra.b A10_st ;branch to A10
516
517 sc_mul:
518 tst.b BINDEC_FLG(a6) ;check for denorm
519 beq.b A9_norm ;if norm, continue with mul
520 fmovem.x fp1,-(a7) ;load ETEMP with 10^ISCALE
521 move.l 8(a0),-(a7) ;load FPTEMP with input arg
522 move.l 4(a0),-(a7)
523 move.l (a0),-(a7)
524 move.l #18,d3 ;load count for busy stack
525 A9_loop:
526 clr.l -(a7) ;clear lword on stack
527 dbf.w d3,A9_loop
528 move.b VER_TMP(a6),(a7) ;write current version number
529 move.b #BUSY_SIZE-4,1(a7) ;write current busy size
530 move.b #$10,$44(a7) ;set fcefpte[15] bit
531 move.w #$0023,$40(a7) ;load cmdreg1b with mul command
532 move.b #$fe,$8(a7) ;load all 1s to cu savepc
533 frestore (a7)+ ;restore frame to fpu for completion
534 fmul.x 36(a1),fp0 ;multiply fp0 by 10^8
535 fmul.x 48(a1),fp0 ;multiply fp0 by 10^16
536 bra.b A10_st
537 A9_norm:
538 tst.w d2 ;test for small exp case
539 beq.b A9_con ;if zero, continue as normal
540 fmul.x 36(a1),fp0 ;multiply fp0 by 10^8
541 fmul.x 48(a1),fp0 ;multiply fp0 by 10^16
542 A9_con:
543 fmul.x fp1,fp0 ;calculate X * SCALE -> Y to fp0
544
545
546 * A10. Or in INEX.
547 * If INEX is set, round error occurred. This is compensated
548 * for by 'or-ing' in the INEX2 flag to the lsb of Y.
549 *
550 * Register usage:
551 * Input/Output
552 * d0: FPCR with RZ mode/FPSR with INEX2 isolated
553 * d2: x/x
554 * d3: x/x
555 * d4: LEN/Unchanged
556 * d5: ICTR:LAMBDA
557 * d6: ILOG/Unchanged
558 * d7: k-factor/Unchanged
559 * a0: ptr for original operand/final result
560 * a1: ptr to PTENxx array/Unchanged
561 * a2: x/ptr to FP_SCR2(a6)
562 * fp0: Y/Y with lsb adjusted
563 * fp1: 10^ISCALE/Unchanged
564 * fp2: x/x
565
566 A10_st:
567 fmove.l FPSR,d0 ;get FPSR
568 fmove.x fp0,FP_SCR2(a6) ;move Y to memory
569 lea.l FP_SCR2(a6),a2 ;load a2 with ptr to FP_SCR2
570 btst.l #9,d0 ;check if INEX2 set
571 beq.b A11_st ;if clear, skip rest
572 ori.l #1,8(a2) ;or in 1 to lsb of mantissa
573 fmove.x FP_SCR2(a6),fp0 ;write adjusted Y back to fpu
574
575
576 * A11. Restore original FPCR; set size ext.
577 * Perform FINT operation in the user's rounding mode. Keep
578 * the size to extended. The sintdo entry point in the sint
579 * routine expects the FPCR value to be in USER_FPCR for
580 * mode and precision. The original FPCR is saved in L_SCR1.
581
582 A11_st:
583 move.l USER_FPCR(a6),L_SCR1(a6) ;save it for later
584 andi.l #$00000030,USER_FPCR(a6) ;set size to ext,
585 * ;block exceptions
586
587
588 * A12. Calculate YINT = FINT(Y) according to user's rounding mode.
589 * The FPSP routine sintd0 is used. The output is in fp0.
590 *
591 * Register usage:
592 * Input/Output
593 * d0: FPSR with AINEX cleared/FPCR with size set to ext
594 * d2: x/x/scratch
595 * d3: x/x
596 * d4: LEN/Unchanged
597 * d5: ICTR:LAMBDA/Unchanged
598 * d6: ILOG/Unchanged
599 * d7: k-factor/Unchanged
600 * a0: ptr for original operand/src ptr for sintdo
601 * a1: ptr to PTENxx array/Unchanged
602 * a2: ptr to FP_SCR2(a6)/Unchanged
603 * a6: temp pointer to FP_SCR2(a6) - orig value saved and restored
604 * fp0: Y/YINT
605 * fp1: 10^ISCALE/Unchanged
606 * fp2: x/x
607 * F_SCR1:x/x
608 * F_SCR2:Y adjusted for inex/Y with original exponent
609 * L_SCR1:x/original USER_FPCR
610 * L_SCR2:first word of X packed/Unchanged
611
612 A12_st:
613 movem.l d0-d1/a0-a1,-(a7) ;save regs used by sintd0
614 move.l L_SCR1(a6),-(a7)
615 move.l L_SCR2(a6),-(a7)
616 lea.l FP_SCR2(a6),a0 ;a0 is ptr to F_SCR2(a6)
617 fmove.x fp0,(a0) ;move Y to memory at FP_SCR2(a6)
618 tst.l L_SCR2(a6) ;test sign of original operand
619 bge.b do_fint ;if pos, use Y
620 or.l #$80000000,(a0) ;if neg, use -Y
621 do_fint:
622 move.l USER_FPSR(a6),-(a7)
623 bsr sintdo ;sint routine returns int in fp0
624 move.b (a7),USER_FPSR(a6)
625 add.l #4,a7
626 move.l (a7)+,L_SCR2(a6)
627 move.l (a7)+,L_SCR1(a6)
628 movem.l (a7)+,d0-d1/a0-a1 ;restore regs used by sint
629 move.l L_SCR2(a6),FP_SCR2(a6) ;restore original exponent
630 move.l L_SCR1(a6),USER_FPCR(a6) ;restore user's FPCR
631
632
633 * A13. Check for LEN digits.
634 * If the int operation results in more than LEN digits,
635 * or less than LEN -1 digits, adjust ILOG and repeat from
636 * A6. This test occurs only on the first pass. If the
637 * result is exactly 10^LEN, decrement ILOG and divide
638 * the mantissa by 10. The calculation of 10^LEN cannot
639 * be inexact, since all powers of ten upto 10^27 are exact
640 * in extended precision, so the use of a previous power-of-ten
641 * table will introduce no error.
642 *
643 *
644 * Register usage:
645 * Input/Output
646 * d0: FPCR with size set to ext/scratch final = 0
647 * d2: x/x
648 * d3: x/scratch final = x
649 * d4: LEN/LEN adjusted
650 * d5: ICTR:LAMBDA/LAMBDA:ICTR
651 * d6: ILOG/ILOG adjusted
652 * d7: k-factor/Unchanged
653 * a0: pointer into memory for packed bcd string formation
654 * a1: ptr to PTENxx array/Unchanged
655 * a2: ptr to FP_SCR2(a6)/Unchanged
656 * fp0: int portion of Y/abs(YINT) adjusted
657 * fp1: 10^ISCALE/Unchanged
658 * fp2: x/10^LEN
659 * F_SCR1:x/x
660 * F_SCR2:Y with original exponent/Unchanged
661 * L_SCR1:original USER_FPCR/Unchanged
662 * L_SCR2:first word of X packed/Unchanged
663
664 A13_st:
665 swap d5 ;put ICTR in lower word of d5
666 tst.w d5 ;check if ICTR = 0
667 bne not_zr ;if non-zero, go to second test
668 *
669 * Compute 10^(LEN-1)
670 *
671 fmove.s FONE,fp2 ;init fp2 to 1.0
672 move.l d4,d0 ;put LEN in d0
673 subq.l #1,d0 ;d0 = LEN -1
674 clr.l d3 ;clr table index
675 l_loop:
676 lsr.l #1,d0 ;shift next bit into carry
677 bcc.b l_next ;if zero, skip the mul
678 fmul.x (a1,d3),fp2 ;mul by 10**(d3_bit_no)
679 l_next:
680 add.l #12,d3 ;inc d3 to next pwrten table entry
681 tst.l d0 ;test if LEN is zero
682 bne.b l_loop ;if not, loop
683 *
684 * 10^LEN-1 is computed for this test and A14. If the input was
685 * denormalized, check only the case in which YINT > 10^LEN.
686 *
687 tst.b BINDEC_FLG(a6) ;check if input was norm
688 beq.b A13_con ;if norm, continue with checking
689 fabs.x fp0 ;take abs of YINT
690 bra test_2
691 *
692 * Compare abs(YINT) to 10^(LEN-1) and 10^LEN
693 *
694 A13_con:
695 fabs.x fp0 ;take abs of YINT
696 fcmp.x fp2,fp0 ;compare abs(YINT) with 10^(LEN-1)
697 fbge.w test_2 ;if greater, do next test
698 subq.l #1,d6 ;subtract 1 from ILOG
699 move.w #1,d5 ;set ICTR
700 fmove.l #rm_mode,FPCR ;set rmode to RM
701 fmul.s FTEN,fp2 ;compute 10^LEN
702 bra.w A6_str ;return to A6 and recompute YINT
703 test_2:
704 fmul.s FTEN,fp2 ;compute 10^LEN
705 fcmp.x fp2,fp0 ;compare abs(YINT) with 10^LEN
706 fblt.w A14_st ;if less, all is ok, go to A14
707 fbgt.w fix_ex ;if greater, fix and redo
708 fdiv.s FTEN,fp0 ;if equal, divide by 10
709 addq.l #1,d6 ; and inc ILOG
710 bra.b A14_st ; and continue elsewhere
711 fix_ex:
712 addq.l #1,d6 ;increment ILOG by 1
713 move.w #1,d5 ;set ICTR
714 fmove.l #rm_mode,FPCR ;set rmode to RM
715 bra.w A6_str ;return to A6 and recompute YINT
716 *
717 * Since ICTR <> 0, we have already been through one adjustment,
718 * and shouldn't have another; this is to check if abs(YINT) = 10^LEN
719 * 10^LEN is again computed using whatever table is in a1 since the
720 * value calculated cannot be inexact.
721 *
722 not_zr:
723 fmove.s FONE,fp2 ;init fp2 to 1.0
724 move.l d4,d0 ;put LEN in d0
725 clr.l d3 ;clr table index
726 z_loop:
727 lsr.l #1,d0 ;shift next bit into carry
728 bcc.b z_next ;if zero, skip the mul
729 fmul.x (a1,d3),fp2 ;mul by 10**(d3_bit_no)
730 z_next:
731 add.l #12,d3 ;inc d3 to next pwrten table entry
732 tst.l d0 ;test if LEN is zero
733 bne.b z_loop ;if not, loop
734 fabs.x fp0 ;get abs(YINT)
735 fcmp.x fp2,fp0 ;check if abs(YINT) = 10^LEN
736 fbne.w A14_st ;if not, skip this
737 fdiv.s FTEN,fp0 ;divide abs(YINT) by 10
738 addq.l #1,d6 ;and inc ILOG by 1
739 addq.l #1,d4 ; and inc LEN
740 fmul.s FTEN,fp2 ; if LEN++, the get 10^^LEN
741
742
743 * A14. Convert the mantissa to bcd.
744 * The binstr routine is used to convert the LEN digit
745 * mantissa to bcd in memory. The input to binstr is
746 * to be a fraction; i.e. (mantissa)/10^LEN and adjusted
747 * such that the decimal point is to the left of bit 63.
748 * The bcd digits are stored in the correct position in
749 * the final string area in memory.
750 *
751 *
752 * Register usage:
753 * Input/Output
754 * d0: x/LEN call to binstr - final is 0
755 * d1: x/0
756 * d2: x/ms 32-bits of mant of abs(YINT)
757 * d3: x/ls 32-bits of mant of abs(YINT)
758 * d4: LEN/Unchanged
759 * d5: ICTR:LAMBDA/LAMBDA:ICTR
760 * d6: ILOG
761 * d7: k-factor/Unchanged
762 * a0: pointer into memory for packed bcd string formation
763 * /ptr to first mantissa byte in result string
764 * a1: ptr to PTENxx array/Unchanged
765 * a2: ptr to FP_SCR2(a6)/Unchanged
766 * fp0: int portion of Y/abs(YINT) adjusted
767 * fp1: 10^ISCALE/Unchanged
768 * fp2: 10^LEN/Unchanged
769 * F_SCR1:x/Work area for final result
770 * F_SCR2:Y with original exponent/Unchanged
771 * L_SCR1:original USER_FPCR/Unchanged
772 * L_SCR2:first word of X packed/Unchanged
773
774 A14_st:
775 fmove.l #rz_mode,FPCR ;force rz for conversion
776 fdiv.x fp2,fp0 ;divide abs(YINT) by 10^LEN
777 lea.l FP_SCR1(a6),a0
778 fmove.x fp0,(a0) ;move abs(YINT)/10^LEN to memory
779 move.l 4(a0),d2 ;move 2nd word of FP_RES to d2
780 move.l 8(a0),d3 ;move 3rd word of FP_RES to d3
781 clr.l 4(a0) ;zero word 2 of FP_RES
782 clr.l 8(a0) ;zero word 3 of FP_RES
783 move.l (a0),d0 ;move exponent to d0
784 swap d0 ;put exponent in lower word
785 beq.b no_sft ;if zero, don't shift
786 subi.l #$3ffd,d0 ;sub bias less 2 to make fract
787 tst.l d0 ;check if > 1
788 bgt.b no_sft ;if so, don't shift
789 neg.l d0 ;make exp positive
790 m_loop:
791 lsr.l #1,d2 ;shift d2:d3 right, add 0s
792 roxr.l #1,d3 ;the number of places
793 dbf.w d0,m_loop ;given in d0
794 no_sft:
795 tst.l d2 ;check for mantissa of zero
796 bne.b no_zr ;if not, go on
797 tst.l d3 ;continue zero check
798 beq.b zer_m ;if zero, go directly to binstr
799 no_zr:
800 clr.l d1 ;put zero in d1 for addx
801 addi.l #$00000080,d3 ;inc at bit 7
802 addx.l d1,d2 ;continue inc
803 andi.l #$ffffff80,d3 ;strip off lsb not used by 882
804 zer_m:
805 move.l d4,d0 ;put LEN in d0 for binstr call
806 addq.l #3,a0 ;a0 points to M16 byte in result
807 bsr binstr ;call binstr to convert mant
808
809
810 * A15. Convert the exponent to bcd.
811 * As in A14 above, the exp is converted to bcd and the
812 * digits are stored in the final string.
813 *
814 * Digits are stored in L_SCR1(a6) on return from BINDEC as:
815 *
816 * 32 16 15 0
817 * -----------------------------------------
818 * | 0 | e3 | e2 | e1 | e4 | X | X | X |
819 * -----------------------------------------
820 *
821 * And are moved into their proper places in FP_SCR1. If digit e4
822 * is non-zero, OPERR is signaled. In all cases, all 4 digits are
823 * written as specified in the 881/882 manual for packed decimal.
824 *
825 * Register usage:
826 * Input/Output
827 * d0: x/LEN call to binstr - final is 0
828 * d1: x/scratch (0);shift count for final exponent packing
829 * d2: x/ms 32-bits of exp fraction/scratch
830 * d3: x/ls 32-bits of exp fraction
831 * d4: LEN/Unchanged
832 * d5: ICTR:LAMBDA/LAMBDA:ICTR
833 * d6: ILOG
834 * d7: k-factor/Unchanged
835 * a0: ptr to result string/ptr to L_SCR1(a6)
836 * a1: ptr to PTENxx array/Unchanged
837 * a2: ptr to FP_SCR2(a6)/Unchanged
838 * fp0: abs(YINT) adjusted/float(ILOG)
839 * fp1: 10^ISCALE/Unchanged
840 * fp2: 10^LEN/Unchanged
841 * F_SCR1:Work area for final result/BCD result
842 * F_SCR2:Y with original exponent/ILOG/10^4
843 * L_SCR1:original USER_FPCR/Exponent digits on return from binstr
844 * L_SCR2:first word of X packed/Unchanged
845
846 A15_st:
847 tst.b BINDEC_FLG(a6) ;check for denorm
848 beq.b not_denorm
849 ftst.x fp0 ;test for zero
850 fbeq.w den_zero ;if zero, use k-factor or 4933
851 fmove.l d6,fp0 ;float ILOG
852 fabs.x fp0 ;get abs of ILOG
853 bra.b convrt
854 den_zero:
855 tst.l d7 ;check sign of the k-factor
856 blt.b use_ilog ;if negative, use ILOG
857 fmove.s F4933,fp0 ;force exponent to 4933
858 bra.b convrt ;do it
859 use_ilog:
860 fmove.l d6,fp0 ;float ILOG
861 fabs.x fp0 ;get abs of ILOG
862 bra.b convrt
863 not_denorm:
864 ftst.x fp0 ;test for zero
865 fbne.w not_zero ;if zero, force exponent
866 fmove.s FONE,fp0 ;force exponent to 1
867 bra.b convrt ;do it
868 not_zero:
869 fmove.l d6,fp0 ;float ILOG
870 fabs.x fp0 ;get abs of ILOG
871 convrt:
872 fdiv.x 24(a1),fp0 ;compute ILOG/10^4
873 fmove.x fp0,FP_SCR2(a6) ;store fp0 in memory
874 move.l 4(a2),d2 ;move word 2 to d2
875 move.l 8(a2),d3 ;move word 3 to d3
876 move.w (a2),d0 ;move exp to d0
877 beq.b x_loop_fin ;if zero, skip the shift
878 subi.w #$3ffd,d0 ;subtract off bias
879 neg.w d0 ;make exp positive
880 x_loop:
881 lsr.l #1,d2 ;shift d2:d3 right
882 roxr.l #1,d3 ;the number of places
883 dbf.w d0,x_loop ;given in d0
884 x_loop_fin:
885 clr.l d1 ;put zero in d1 for addx
886 addi.l #$00000080,d3 ;inc at bit 6
887 addx.l d1,d2 ;continue inc
888 andi.l #$ffffff80,d3 ;strip off lsb not used by 882
889 move.l #4,d0 ;put 4 in d0 for binstr call
890 lea.l L_SCR1(a6),a0 ;a0 is ptr to L_SCR1 for exp digits
891 bsr binstr ;call binstr to convert exp
892 move.l L_SCR1(a6),d0 ;load L_SCR1 lword to d0
893 move.l #12,d1 ;use d1 for shift count
894 lsr.l d1,d0 ;shift d0 right by 12
895 bfins d0,FP_SCR1(a6){4:12} ;put e3:e2:e1 in FP_SCR1
896 lsr.l d1,d0 ;shift d0 right by 12
897 bfins d0,FP_SCR1(a6){16:4} ;put e4 in FP_SCR1
898 tst.b d0 ;check if e4 is zero
899 beq.b A16_st ;if zero, skip rest
900 or.l #opaop_mask,USER_FPSR(a6) ;set OPERR & AIOP in USER_FPSR
901
902
903 * A16. Write sign bits to final string.
904 * Sigma is bit 31 of initial value; RHO is bit 31 of d6 (ILOG).
905 *
906 * Register usage:
907 * Input/Output
908 * d0: x/scratch - final is x
909 * d2: x/x
910 * d3: x/x
911 * d4: LEN/Unchanged
912 * d5: ICTR:LAMBDA/LAMBDA:ICTR
913 * d6: ILOG/ILOG adjusted
914 * d7: k-factor/Unchanged
915 * a0: ptr to L_SCR1(a6)/Unchanged
916 * a1: ptr to PTENxx array/Unchanged
917 * a2: ptr to FP_SCR2(a6)/Unchanged
918 * fp0: float(ILOG)/Unchanged
919 * fp1: 10^ISCALE/Unchanged
920 * fp2: 10^LEN/Unchanged
921 * F_SCR1:BCD result with correct signs
922 * F_SCR2:ILOG/10^4
923 * L_SCR1:Exponent digits on return from binstr
924 * L_SCR2:first word of X packed/Unchanged
925
926 A16_st:
927 clr.l d0 ;clr d0 for collection of signs
928 andi.b #$0f,FP_SCR1(a6) ;clear first nibble of FP_SCR1
929 tst.l L_SCR2(a6) ;check sign of original mantissa
930 bge.b mant_p ;if pos, don't set SM
931 moveq.l #2,d0 ;move 2 in to d0 for SM
932 mant_p:
933 tst.l d6 ;check sign of ILOG
934 bge.b wr_sgn ;if pos, don't set SE
935 addq.l #1,d0 ;set bit 0 in d0 for SE
936 wr_sgn:
937 bfins d0,FP_SCR1(a6){0:2} ;insert SM and SE into FP_SCR1
938
939 * Clean up and restore all registers used.
940
941 fmove.l #0,FPSR ;clear possible inex2/ainex bits
942 fmovem.x (a7)+,fp0-fp2
943 movem.l (a7)+,d2-d7/a2
944 rts
945
946 end
947