Home | History | Annotate | Line # | Download | only in fpsp
bindec.sa revision 1.4
      1 *	$NetBSD: bindec.sa,v 1.4 2001/09/16 16:34:32 wiz Exp $
      2 
      3 *	MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
      4 *	M68000 Hi-Performance Microprocessor Division
      5 *	M68040 Software Package 
      6 *
      7 *	M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
      8 *	All rights reserved.
      9 *
     10 *	THE SOFTWARE is provided on an "AS IS" basis and without warranty.
     11 *	To the maximum extent permitted by applicable law,
     12 *	MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
     13 *	INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
     14 *	PARTICULAR PURPOSE and any warranty against infringement with
     15 *	regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
     16 *	and any accompanying written materials. 
     17 *
     18 *	To the maximum extent permitted by applicable law,
     19 *	IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
     20 *	(INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
     21 *	PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
     22 *	OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
     23 *	SOFTWARE.  Motorola assumes no responsibility for the maintenance
     24 *	and support of the SOFTWARE.  
     25 *
     26 *	You are hereby granted a copyright license to use, modify, and
     27 *	distribute the SOFTWARE so long as this entire notice is retained
     28 *	without alteration in any modified and/or redistributed versions,
     29 *	and that such modified versions are clearly identified as such.
     30 *	No licenses are granted by implication, estoppel or otherwise
     31 *	under any patents or trademarks of Motorola, Inc.
     32 
     33 *
     34 *	bindec.sa 3.4 1/3/91
     35 *
     36 *	bindec
     37 *
     38 *	Description:
     39 *		Converts an input in extended precision format
     40 *		to bcd format.
     41 *
     42 *	Input:
     43 *		a0 points to the input extended precision value
     44 *		value in memory; d0 contains the k-factor sign-extended
     45 *		to 32-bits.  The input may be either normalized,
     46 *		unnormalized, or denormalized.
     47 *
     48 *	Output:	result in the FP_SCR1 space on the stack.
     49 *
     50 *	Saves and Modifies: D2-D7,A2,FP2
     51 *
     52 *	Algorithm:
     53 *
     54 *	A1.	Set RM and size ext;  Set SIGMA = sign of input.  
     55 *		The k-factor is saved for use in d7. Clear the
     56 *		BINDEC_FLG for separating normalized/denormalized
     57 *		input.  If input is unnormalized or denormalized,
     58 *		normalize it.
     59 *
     60 *	A2.	Set X = abs(input).
     61 *
     62 *	A3.	Compute ILOG.
     63 *		ILOG is the log base 10 of the input value.  It is
     64 *		approximated by adding e + 0.f when the original 
     65 *		value is viewed as 2^^e * 1.f in extended precision.  
     66 *		This value is stored in d6.
     67 *
     68 *	A4.	Clr INEX bit.
     69 *		The operation in A3 above may have set INEX2.  
     70 *
     71 *	A5.	Set ICTR = 0;
     72 *		ICTR is a flag used in A13.  It must be set before the 
     73 *		loop entry A6.
     74 *
     75 *	A6.	Calculate LEN.
     76 *		LEN is the number of digits to be displayed.  The
     77 *		k-factor can dictate either the total number of digits,
     78 *		if it is a positive number, or the number of digits
     79 *		after the decimal point which are to be included as
     80 *		significant.  See the 68882 manual for examples.
     81 *		If LEN is computed to be greater than 17, set OPERR in
     82 *		USER_FPSR.  LEN is stored in d4.
     83 *
     84 *	A7.	Calculate SCALE.
     85 *		SCALE is equal to 10^ISCALE, where ISCALE is the number
     86 *		of decimal places needed to insure LEN integer digits
     87 *		in the output before conversion to bcd. LAMBDA is the
     88 *		sign of ISCALE, used in A9. Fp1 contains
     89 *		10^^(abs(ISCALE)) using a rounding mode which is a
     90 *		function of the original rounding mode and the signs
     91 *		of ISCALE and X.  A table is given in the code.
     92 *
     93 *	A8.	Clr INEX; Force RZ.
     94 *		The operation in A3 above may have set INEX2.  
     95 *		RZ mode is forced for the scaling operation to insure
     96 *		only one rounding error.  The grs bits are collected in 
     97 *		the INEX flag for use in A10.
     98 *
     99 *	A9.	Scale X -> Y.
    100 *		The mantissa is scaled to the desired number of
    101 *		significant digits.  The excess digits are collected
    102 *		in INEX2.
    103 *
    104 *	A10.	Or in INEX.
    105 *		If INEX is set, round error occurred.  This is
    106 *		compensated for by 'or-ing' in the INEX2 flag to
    107 *		the lsb of Y.
    108 *
    109 *	A11.	Restore original FPCR; set size ext.
    110 *		Perform FINT operation in the user's rounding mode.
    111 *		Keep the size to extended.
    112 *
    113 *	A12.	Calculate YINT = FINT(Y) according to user's rounding
    114 *		mode.  The FPSP routine sintd0 is used.  The output
    115 *		is in fp0.
    116 *
    117 *	A13.	Check for LEN digits.
    118 *		If the int operation results in more than LEN digits,
    119 *		or less than LEN -1 digits, adjust ILOG and repeat from
    120 *		A6.  This test occurs only on the first pass.  If the
    121 *		result is exactly 10^LEN, decrement ILOG and divide
    122 *		the mantissa by 10.
    123 *
    124 *	A14.	Convert the mantissa to bcd.
    125 *		The binstr routine is used to convert the LEN digit 
    126 *		mantissa to bcd in memory.  The input to binstr is
    127 *		to be a fraction; i.e. (mantissa)/10^LEN and adjusted
    128 *		such that the decimal point is to the left of bit 63.
    129 *		The bcd digits are stored in the correct position in 
    130 *		the final string area in memory.
    131 *
    132 *	A15.	Convert the exponent to bcd.
    133 *		As in A14 above, the exp is converted to bcd and the
    134 *		digits are stored in the final string.
    135 *		Test the length of the final exponent string.  If the
    136 *		length is 4, set operr.
    137 *
    138 *	A16.	Write sign bits to final string.
    139 *
    140 *	Implementation Notes:
    141 *
    142 *	The registers are used as follows:
    143 *
    144 *		d0: scratch; LEN input to binstr
    145 *		d1: scratch
    146 *		d2: upper 32-bits of mantissa for binstr
    147 *		d3: scratch;lower 32-bits of mantissa for binstr
    148 *		d4: LEN
    149 *      		d5: LAMBDA/ICTR
    150 *		d6: ILOG
    151 *		d7: k-factor
    152 *		a0: ptr for original operand/final result
    153 *		a1: scratch pointer
    154 *		a2: pointer to FP_X; abs(original value) in ext
    155 *		fp0: scratch
    156 *		fp1: scratch
    157 *		fp2: scratch
    158 *		F_SCR1:
    159 *		F_SCR2:
    160 *		L_SCR1:
    161 *		L_SCR2:
    162 *
    163 
    164 BINDEC    IDNT    2,1 Motorola 040 Floating Point Software Package
    165 
    166 	include	fpsp.h
    167 
    168 	section	8
    169 
    170 * Constants in extended precision
    171 LOG2 	dc.l	$3FFD0000,$9A209A84,$FBCFF798,$00000000
    172 LOG2UP1	dc.l	$3FFD0000,$9A209A84,$FBCFF799,$00000000
    173 
    174 * Constants in single precision
    175 FONE 	dc.l	$3F800000,$00000000,$00000000,$00000000
    176 FTWO	dc.l	$40000000,$00000000,$00000000,$00000000
    177 FTEN 	dc.l	$41200000,$00000000,$00000000,$00000000
    178 F4933	dc.l	$459A2800,$00000000,$00000000,$00000000
    179 
    180 RBDTBL 	dc.b	0,0,0,0
    181 	dc.b	3,3,2,2
    182 	dc.b	3,2,2,3
    183 	dc.b	2,3,3,2
    184 
    185 	xref	binstr
    186 	xref	sintdo
    187 	xref	ptenrn,ptenrm,ptenrp
    188 
    189 	xdef	bindec
    190 	xdef	sc_mul
    191 bindec:
    192 	movem.l	d2-d7/a2,-(a7)
    193 	fmovem.x fp0-fp2,-(a7)
    194 
    195 * A1. Set RM and size ext. Set SIGMA = sign input;
    196 *     The k-factor is saved for use in d7.  Clear BINDEC_FLG for
    197 *     separating  normalized/denormalized input.  If the input
    198 *     is a denormalized number, set the BINDEC_FLG memory word
    199 *     to signal denorm.  If the input is unnormalized, normalize
    200 *     the input and test for denormalized result.  
    201 *
    202 	fmove.l	#rm_mode,FPCR	;set RM and ext
    203 	move.l	(a0),L_SCR2(a6)	;save exponent for sign check
    204 	move.l	d0,d7		;move k-factor to d7
    205 	clr.b	BINDEC_FLG(a6)	;clr norm/denorm flag
    206 	move.w	STAG(a6),d0	;get stag
    207 	andi.w	#$e000,d0	;isolate stag bits
    208 	beq	A2_str		;if zero, input is norm
    209 *
    210 * Normalize the denorm
    211 *
    212 un_de_norm:
    213 	move.w	(a0),d0
    214 	andi.w	#$7fff,d0	;strip sign of normalized exp
    215 	move.l	4(a0),d1
    216 	move.l	8(a0),d2
    217 norm_loop:
    218 	sub.w	#1,d0
    219 	add.l	d2,d2
    220 	addx.l	d1,d1
    221 	tst.l	d1
    222 	bge.b	norm_loop
    223 *
    224 * Test if the normalized input is denormalized
    225 *
    226 	tst.w	d0
    227 	bgt.b	pos_exp		;if greater than zero, it is a norm
    228 	st	BINDEC_FLG(a6)	;set flag for denorm
    229 pos_exp:
    230 	andi.w	#$7fff,d0	;strip sign of normalized exp
    231 	move.w	d0,(a0)
    232 	move.l	d1,4(a0)
    233 	move.l	d2,8(a0)
    234 
    235 * A2. Set X = abs(input).
    236 *
    237 A2_str:
    238 	move.l	(a0),FP_SCR2(a6) ; move input to work space
    239 	move.l	4(a0),FP_SCR2+4(a6) ; move input to work space
    240 	move.l	8(a0),FP_SCR2+8(a6) ; move input to work space
    241 	andi.l	#$7fffffff,FP_SCR2(a6) ;create abs(X)
    242 
    243 * A3. Compute ILOG.
    244 *     ILOG is the log base 10 of the input value.  It is approx-
    245 *     imated by adding e + 0.f when the original value is viewed
    246 *     as 2^^e * 1.f in extended precision.  This value is stored
    247 *     in d6.
    248 *
    249 * Register usage:
    250 *	Input/Output
    251 *	d0: k-factor/exponent
    252 *	d2: x/x
    253 *	d3: x/x
    254 *	d4: x/x
    255 *	d5: x/x
    256 *	d6: x/ILOG
    257 *	d7: k-factor/Unchanged
    258 *	a0: ptr for original operand/final result
    259 *	a1: x/x
    260 *	a2: x/x
    261 *	fp0: x/float(ILOG)
    262 *	fp1: x/x
    263 *	fp2: x/x
    264 *	F_SCR1:x/x
    265 *	F_SCR2:Abs(X)/Abs(X) with $3fff exponent
    266 *	L_SCR1:x/x
    267 *	L_SCR2:first word of X packed/Unchanged
    268 
    269 	tst.b	BINDEC_FLG(a6)	;check for denorm
    270 	beq.b	A3_cont		;if clr, continue with norm
    271 	move.l	#-4933,d6	;force ILOG = -4933
    272 	bra.b	A4_str
    273 A3_cont:
    274 	move.w	FP_SCR2(a6),d0	;move exp to d0
    275 	move.w	#$3fff,FP_SCR2(a6) ;replace exponent with 0x3fff
    276 	fmove.x	FP_SCR2(a6),fp0	;now fp0 has 1.f
    277 	sub.w	#$3fff,d0	;strip off bias
    278 	fadd.w	d0,fp0		;add in exp
    279 	fsub.s	FONE,fp0	;subtract off 1.0
    280 	fbge.w	pos_res		;if pos, branch 
    281 	fmul.x	LOG2UP1,fp0	;if neg, mul by LOG2UP1
    282 	fmove.l	fp0,d6		;put ILOG in d6 as a lword
    283 	bra.b	A4_str		;go move out ILOG
    284 pos_res:
    285 	fmul.x	LOG2,fp0	;if pos, mul by LOG2
    286 	fmove.l	fp0,d6		;put ILOG in d6 as a lword
    287 
    288 
    289 * A4. Clr INEX bit.
    290 *     The operation in A3 above may have set INEX2.  
    291 
    292 A4_str:	
    293 	fmove.l	#0,FPSR		;zero all of fpsr - nothing needed
    294 
    295 
    296 * A5. Set ICTR = 0;
    297 *     ICTR is a flag used in A13.  It must be set before the 
    298 *     loop entry A6. The lower word of d5 is used for ICTR.
    299 
    300 	clr.w	d5		;clear ICTR
    301 
    302 
    303 * A6. Calculate LEN.
    304 *     LEN is the number of digits to be displayed.  The k-factor
    305 *     can dictate either the total number of digits, if it is
    306 *     a positive number, or the number of digits after the
    307 *     original decimal point which are to be included as
    308 *     significant.  See the 68882 manual for examples.
    309 *     If LEN is computed to be greater than 17, set OPERR in
    310 *     USER_FPSR.  LEN is stored in d4.
    311 *
    312 * Register usage:
    313 *	Input/Output
    314 *	d0: exponent/Unchanged
    315 *	d2: x/x/scratch
    316 *	d3: x/x
    317 *	d4: exc picture/LEN
    318 *	d5: ICTR/Unchanged
    319 *	d6: ILOG/Unchanged
    320 *	d7: k-factor/Unchanged
    321 *	a0: ptr for original operand/final result
    322 *	a1: x/x
    323 *	a2: x/x
    324 *	fp0: float(ILOG)/Unchanged
    325 *	fp1: x/x
    326 *	fp2: x/x
    327 *	F_SCR1:x/x
    328 *	F_SCR2:Abs(X) with $3fff exponent/Unchanged
    329 *	L_SCR1:x/x
    330 *	L_SCR2:first word of X packed/Unchanged
    331 
    332 A6_str:	
    333 	tst.l	d7		;branch on sign of k
    334 	ble.b	k_neg		;if k <= 0, LEN = ILOG + 1 - k
    335 	move.l	d7,d4		;if k > 0, LEN = k
    336 	bra.b	len_ck		;skip to LEN check
    337 k_neg:
    338 	move.l	d6,d4		;first load ILOG to d4
    339 	sub.l	d7,d4		;subtract off k
    340 	addq.l	#1,d4		;add in the 1
    341 len_ck:
    342 	tst.l	d4		;LEN check: branch on sign of LEN
    343 	ble.b	LEN_ng		;if neg, set LEN = 1
    344 	cmp.l	#17,d4		;test if LEN > 17
    345 	ble.b	A7_str		;if not, forget it
    346 	move.l	#17,d4		;set max LEN = 17
    347 	tst.l	d7		;if negative, never set OPERR
    348 	ble.b	A7_str		;if positive, continue
    349 	or.l	#opaop_mask,USER_FPSR(a6) ;set OPERR & AIOP in USER_FPSR
    350 	bra.b	A7_str		;finished here
    351 LEN_ng:
    352 	moveq.l	#1,d4		;min LEN is 1
    353 
    354 
    355 * A7. Calculate SCALE.
    356 *     SCALE is equal to 10^ISCALE, where ISCALE is the number
    357 *     of decimal places needed to insure LEN integer digits
    358 *     in the output before conversion to bcd. LAMBDA is the sign
    359 *     of ISCALE, used in A9.  Fp1 contains 10^^(abs(ISCALE)) using
    360 *     the rounding mode as given in the following table (see
    361 *     Coonen, p. 7.23 as ref.; however, the SCALE variable is
    362 *     of opposite sign in bindec.sa from Coonen).
    363 *
    364 *	Initial					USE
    365 *	FPCR[6:5]	LAMBDA	SIGN(X)		FPCR[6:5]
    366 *	----------------------------------------------
    367 *	 RN	00	   0	   0		00/0	RN
    368 *	 RN	00	   0	   1		00/0	RN
    369 *	 RN	00	   1	   0		00/0	RN
    370 *	 RN	00	   1	   1		00/0	RN
    371 *	 RZ	01	   0	   0		11/3	RP
    372 *	 RZ	01	   0	   1		11/3	RP
    373 *	 RZ	01	   1	   0		10/2	RM
    374 *	 RZ	01	   1	   1		10/2	RM
    375 *	 RM	10	   0	   0		11/3	RP
    376 *	 RM	10	   0	   1		10/2	RM
    377 *	 RM	10	   1	   0		10/2	RM
    378 *	 RM	10	   1	   1		11/3	RP
    379 *	 RP	11	   0	   0		10/2	RM
    380 *	 RP	11	   0	   1		11/3	RP
    381 *	 RP	11	   1	   0		11/3	RP
    382 *	 RP	11	   1	   1		10/2	RM
    383 *
    384 * Register usage:
    385 *	Input/Output
    386 *	d0: exponent/scratch - final is 0
    387 *	d2: x/0 or 24 for A9
    388 *	d3: x/scratch - offset ptr into PTENRM array
    389 *	d4: LEN/Unchanged
    390 *	d5: 0/ICTR:LAMBDA
    391 *	d6: ILOG/ILOG or k if ((k<=0)&(ILOG<k))
    392 *	d7: k-factor/Unchanged
    393 *	a0: ptr for original operand/final result
    394 *	a1: x/ptr to PTENRM array
    395 *	a2: x/x
    396 *	fp0: float(ILOG)/Unchanged
    397 *	fp1: x/10^ISCALE
    398 *	fp2: x/x
    399 *	F_SCR1:x/x
    400 *	F_SCR2:Abs(X) with $3fff exponent/Unchanged
    401 *	L_SCR1:x/x
    402 *	L_SCR2:first word of X packed/Unchanged
    403 
    404 A7_str:	
    405 	tst.l	d7		;test sign of k
    406 	bgt.b	k_pos		;if pos and > 0, skip this
    407 	cmp.l	d6,d7		;test k - ILOG
    408 	blt.b	k_pos		;if ILOG >= k, skip this
    409 	move.l	d7,d6		;if ((k<0) & (ILOG < k)) ILOG = k
    410 k_pos:	
    411 	move.l	d6,d0		;calc ILOG + 1 - LEN in d0
    412 	addq.l	#1,d0		;add the 1
    413 	sub.l	d4,d0		;sub off LEN
    414 	swap	d5		;use upper word of d5 for LAMBDA
    415 	clr.w	d5		;set it zero initially
    416 	clr.w	d2		;set up d2 for very small case
    417 	tst.l	d0		;test sign of ISCALE
    418 	bge.b	iscale		;if pos, skip next inst
    419 	addq.w	#1,d5		;if neg, set LAMBDA true
    420 	cmp.l	#$ffffecd4,d0	;test iscale <= -4908
    421 	bgt.b	no_inf		;if false, skip rest
    422 	addi.l	#24,d0		;add in 24 to iscale
    423 	move.l	#24,d2		;put 24 in d2 for A9
    424 no_inf:	
    425 	neg.l	d0		;and take abs of ISCALE
    426 iscale:	
    427 	fmove.s	FONE,fp1	;init fp1 to 1
    428 	bfextu	USER_FPCR(a6){26:2},d1 ;get initial rmode bits
    429 	add.w	d1,d1		;put them in bits 2:1
    430 	add.w	d5,d1		;add in LAMBDA
    431 	add.w	d1,d1		;put them in bits 3:1
    432 	tst.l	L_SCR2(a6)	;test sign of original x
    433 	bge.b	x_pos		;if pos, don't set bit 0
    434 	addq.l	#1,d1		;if neg, set bit 0
    435 x_pos:
    436 	lea.l	RBDTBL,a2	;load rbdtbl base
    437 	move.b	(a2,d1),d3	;load d3 with new rmode
    438 	lsl.l	#4,d3		;put bits in proper position
    439 	fmove.l	d3,fpcr		;load bits into fpu
    440 	lsr.l	#4,d3		;put bits in proper position
    441 	tst.b	d3		;decode new rmode for pten table
    442 	bne.b	not_rn		;if zero, it is RN
    443 	lea.l	PTENRN,a1	;load a1 with RN table base
    444 	bra.b	rmode		;exit decode
    445 not_rn:
    446 	lsr.b	#1,d3		;get lsb in carry
    447 	bcc.b	not_rp		;if carry clear, it is RM
    448 	lea.l	PTENRP,a1	;load a1 with RP table base
    449 	bra.b	rmode		;exit decode
    450 not_rp:
    451 	lea.l	PTENRM,a1	;load a1 with RM table base
    452 rmode:
    453 	clr.l	d3		;clr table index
    454 e_loop:	
    455 	lsr.l	#1,d0		;shift next bit into carry
    456 	bcc.b	e_next		;if zero, skip the mul
    457 	fmul.x	(a1,d3),fp1	;mul by 10**(d3_bit_no)
    458 e_next:	
    459 	add.l	#12,d3		;inc d3 to next pwrten table entry
    460 	tst.l	d0		;test if ISCALE is zero
    461 	bne.b	e_loop		;if not, loop
    462 
    463 
    464 * A8. Clr INEX; Force RZ.
    465 *     The operation in A3 above may have set INEX2.  
    466 *     RZ mode is forced for the scaling operation to insure
    467 *     only one rounding error.  The grs bits are collected in 
    468 *     the INEX flag for use in A10.
    469 *
    470 * Register usage:
    471 *	Input/Output
    472 
    473 	fmove.l	#0,FPSR		;clr INEX 
    474 	fmove.l	#rz_mode,FPCR	;set RZ rounding mode
    475 
    476 
    477 * A9. Scale X -> Y.
    478 *     The mantissa is scaled to the desired number of significant
    479 *     digits.  The excess digits are collected in INEX2. If mul,
    480 *     Check d2 for excess 10 exponential value.  If not zero, 
    481 *     the iscale value would have caused the pwrten calculation
    482 *     to overflow.  Only a negative iscale can cause this, so
    483 *     multiply by 10^(d2), which is now only allowed to be 24,
    484 *     with a multiply by 10^8 and 10^16, which is exact since
    485 *     10^24 is exact.  If the input was denormalized, we must
    486 *     create a busy stack frame with the mul command and the
    487 *     two operands, and allow the fpu to complete the multiply.
    488 *
    489 * Register usage:
    490 *	Input/Output
    491 *	d0: FPCR with RZ mode/Unchanged
    492 *	d2: 0 or 24/unchanged
    493 *	d3: x/x
    494 *	d4: LEN/Unchanged
    495 *	d5: ICTR:LAMBDA
    496 *	d6: ILOG/Unchanged
    497 *	d7: k-factor/Unchanged
    498 *	a0: ptr for original operand/final result
    499 *	a1: ptr to PTENRM array/Unchanged
    500 *	a2: x/x
    501 *	fp0: float(ILOG)/X adjusted for SCALE (Y)
    502 *	fp1: 10^ISCALE/Unchanged
    503 *	fp2: x/x
    504 *	F_SCR1:x/x
    505 *	F_SCR2:Abs(X) with $3fff exponent/Unchanged
    506 *	L_SCR1:x/x
    507 *	L_SCR2:first word of X packed/Unchanged
    508 
    509 A9_str:	
    510 	fmove.x	(a0),fp0	;load X from memory
    511 	fabs.x	fp0		;use abs(X)
    512 	tst.w	d5		;LAMBDA is in lower word of d5
    513 	bne.b	sc_mul		;if neg (LAMBDA = 1), scale by mul
    514 	fdiv.x	fp1,fp0		;calculate X / SCALE -> Y to fp0
    515 	bra.b	A10_st		;branch to A10
    516 
    517 sc_mul:
    518 	tst.b	BINDEC_FLG(a6)	;check for denorm
    519 	beq.b	A9_norm		;if norm, continue with mul
    520 	fmovem.x fp1,-(a7)	;load ETEMP with 10^ISCALE
    521 	move.l	8(a0),-(a7)	;load FPTEMP with input arg
    522 	move.l	4(a0),-(a7)
    523 	move.l	(a0),-(a7)
    524 	move.l	#18,d3		;load count for busy stack
    525 A9_loop:
    526 	clr.l	-(a7)		;clear lword on stack
    527 	dbf.w	d3,A9_loop	
    528 	move.b	VER_TMP(a6),(a7) ;write current version number
    529 	move.b	#BUSY_SIZE-4,1(a7) ;write current busy size 
    530 	move.b	#$10,$44(a7)	;set fcefpte[15] bit
    531 	move.w	#$0023,$40(a7)	;load cmdreg1b with mul command
    532 	move.b	#$fe,$8(a7)	;load all 1s to cu savepc
    533 	frestore (a7)+		;restore frame to fpu for completion
    534 	fmul.x	36(a1),fp0	;multiply fp0 by 10^8
    535 	fmul.x	48(a1),fp0	;multiply fp0 by 10^16
    536 	bra.b	A10_st
    537 A9_norm:
    538 	tst.w	d2		;test for small exp case
    539 	beq.b	A9_con		;if zero, continue as normal
    540 	fmul.x	36(a1),fp0	;multiply fp0 by 10^8
    541 	fmul.x	48(a1),fp0	;multiply fp0 by 10^16
    542 A9_con:
    543 	fmul.x	fp1,fp0		;calculate X * SCALE -> Y to fp0
    544 
    545 
    546 * A10. Or in INEX.
    547 *      If INEX is set, round error occurred.  This is compensated
    548 *      for by 'or-ing' in the INEX2 flag to the lsb of Y.
    549 *
    550 * Register usage:
    551 *	Input/Output
    552 *	d0: FPCR with RZ mode/FPSR with INEX2 isolated
    553 *	d2: x/x
    554 *	d3: x/x
    555 *	d4: LEN/Unchanged
    556 *	d5: ICTR:LAMBDA
    557 *	d6: ILOG/Unchanged
    558 *	d7: k-factor/Unchanged
    559 *	a0: ptr for original operand/final result
    560 *	a1: ptr to PTENxx array/Unchanged
    561 *	a2: x/ptr to FP_SCR2(a6)
    562 *	fp0: Y/Y with lsb adjusted
    563 *	fp1: 10^ISCALE/Unchanged
    564 *	fp2: x/x
    565 
    566 A10_st:	
    567 	fmove.l	FPSR,d0		;get FPSR
    568 	fmove.x	fp0,FP_SCR2(a6)	;move Y to memory
    569 	lea.l	FP_SCR2(a6),a2	;load a2 with ptr to FP_SCR2
    570 	btst.l	#9,d0		;check if INEX2 set
    571 	beq.b	A11_st		;if clear, skip rest
    572 	ori.l	#1,8(a2)	;or in 1 to lsb of mantissa
    573 	fmove.x	FP_SCR2(a6),fp0	;write adjusted Y back to fpu
    574 
    575 
    576 * A11. Restore original FPCR; set size ext.
    577 *      Perform FINT operation in the user's rounding mode.  Keep
    578 *      the size to extended.  The sintdo entry point in the sint
    579 *      routine expects the FPCR value to be in USER_FPCR for
    580 *      mode and precision.  The original FPCR is saved in L_SCR1.
    581 
    582 A11_st:	
    583 	move.l	USER_FPCR(a6),L_SCR1(a6) ;save it for later
    584 	andi.l	#$00000030,USER_FPCR(a6) ;set size to ext, 
    585 *					;block exceptions
    586 
    587 
    588 * A12. Calculate YINT = FINT(Y) according to user's rounding mode.
    589 *      The FPSP routine sintd0 is used.  The output is in fp0.
    590 *
    591 * Register usage:
    592 *	Input/Output
    593 *	d0: FPSR with AINEX cleared/FPCR with size set to ext
    594 *	d2: x/x/scratch
    595 *	d3: x/x
    596 *	d4: LEN/Unchanged
    597 *	d5: ICTR:LAMBDA/Unchanged
    598 *	d6: ILOG/Unchanged
    599 *	d7: k-factor/Unchanged
    600 *	a0: ptr for original operand/src ptr for sintdo
    601 *	a1: ptr to PTENxx array/Unchanged
    602 *	a2: ptr to FP_SCR2(a6)/Unchanged
    603 *	a6: temp pointer to FP_SCR2(a6) - orig value saved and restored
    604 *	fp0: Y/YINT
    605 *	fp1: 10^ISCALE/Unchanged
    606 *	fp2: x/x
    607 *	F_SCR1:x/x
    608 *	F_SCR2:Y adjusted for inex/Y with original exponent
    609 *	L_SCR1:x/original USER_FPCR
    610 *	L_SCR2:first word of X packed/Unchanged
    611 
    612 A12_st:
    613 	movem.l	d0-d1/a0-a1,-(a7)	;save regs used by sintd0	
    614 	move.l	L_SCR1(a6),-(a7)
    615 	move.l	L_SCR2(a6),-(a7)
    616 	lea.l	FP_SCR2(a6),a0		;a0 is ptr to F_SCR2(a6)
    617 	fmove.x	fp0,(a0)		;move Y to memory at FP_SCR2(a6)
    618 	tst.l	L_SCR2(a6)		;test sign of original operand
    619 	bge.b	do_fint			;if pos, use Y 
    620 	or.l	#$80000000,(a0)		;if neg, use -Y
    621 do_fint:
    622 	move.l	USER_FPSR(a6),-(a7)
    623 	bsr	sintdo			;sint routine returns int in fp0
    624 	move.b	(a7),USER_FPSR(a6)
    625 	add.l	#4,a7
    626 	move.l	(a7)+,L_SCR2(a6)
    627 	move.l	(a7)+,L_SCR1(a6)
    628 	movem.l	(a7)+,d0-d1/a0-a1	;restore regs used by sint	
    629 	move.l	L_SCR2(a6),FP_SCR2(a6)	;restore original exponent
    630 	move.l	L_SCR1(a6),USER_FPCR(a6) ;restore user's FPCR
    631 
    632 
    633 * A13. Check for LEN digits.
    634 *      If the int operation results in more than LEN digits,
    635 *      or less than LEN -1 digits, adjust ILOG and repeat from
    636 *      A6.  This test occurs only on the first pass.  If the
    637 *      result is exactly 10^LEN, decrement ILOG and divide
    638 *      the mantissa by 10.  The calculation of 10^LEN cannot
    639 *      be inexact, since all powers of ten upto 10^27 are exact
    640 *      in extended precision, so the use of a previous power-of-ten
    641 *      table will introduce no error.
    642 *
    643 *
    644 * Register usage:
    645 *	Input/Output
    646 *	d0: FPCR with size set to ext/scratch final = 0
    647 *	d2: x/x
    648 *	d3: x/scratch final = x
    649 *	d4: LEN/LEN adjusted
    650 *	d5: ICTR:LAMBDA/LAMBDA:ICTR
    651 *	d6: ILOG/ILOG adjusted
    652 *	d7: k-factor/Unchanged
    653 *	a0: pointer into memory for packed bcd string formation
    654 *	a1: ptr to PTENxx array/Unchanged
    655 *	a2: ptr to FP_SCR2(a6)/Unchanged
    656 *	fp0: int portion of Y/abs(YINT) adjusted
    657 *	fp1: 10^ISCALE/Unchanged
    658 *	fp2: x/10^LEN
    659 *	F_SCR1:x/x
    660 *	F_SCR2:Y with original exponent/Unchanged
    661 *	L_SCR1:original USER_FPCR/Unchanged
    662 *	L_SCR2:first word of X packed/Unchanged
    663 
    664 A13_st:	
    665 	swap	d5		;put ICTR in lower word of d5
    666 	tst.w	d5		;check if ICTR = 0
    667 	bne	not_zr		;if non-zero, go to second test
    668 *
    669 * Compute 10^(LEN-1)
    670 *
    671 	fmove.s	FONE,fp2	;init fp2 to 1.0
    672 	move.l	d4,d0		;put LEN in d0
    673 	subq.l	#1,d0		;d0 = LEN -1
    674 	clr.l	d3		;clr table index
    675 l_loop:	
    676 	lsr.l	#1,d0		;shift next bit into carry
    677 	bcc.b	l_next		;if zero, skip the mul
    678 	fmul.x	(a1,d3),fp2	;mul by 10**(d3_bit_no)
    679 l_next:
    680 	add.l	#12,d3		;inc d3 to next pwrten table entry
    681 	tst.l	d0		;test if LEN is zero
    682 	bne.b	l_loop		;if not, loop
    683 *
    684 * 10^LEN-1 is computed for this test and A14.  If the input was
    685 * denormalized, check only the case in which YINT > 10^LEN.
    686 *
    687 	tst.b	BINDEC_FLG(a6)	;check if input was norm
    688 	beq.b	A13_con		;if norm, continue with checking
    689 	fabs.x	fp0		;take abs of YINT
    690 	bra	test_2
    691 *
    692 * Compare abs(YINT) to 10^(LEN-1) and 10^LEN
    693 *
    694 A13_con:
    695 	fabs.x	fp0		;take abs of YINT
    696 	fcmp.x	fp2,fp0		;compare abs(YINT) with 10^(LEN-1)
    697 	fbge.w	test_2		;if greater, do next test
    698 	subq.l	#1,d6		;subtract 1 from ILOG
    699 	move.w	#1,d5		;set ICTR
    700 	fmove.l	#rm_mode,FPCR	;set rmode to RM
    701 	fmul.s	FTEN,fp2	;compute 10^LEN 
    702 	bra.w	A6_str		;return to A6 and recompute YINT
    703 test_2:
    704 	fmul.s	FTEN,fp2	;compute 10^LEN
    705 	fcmp.x	fp2,fp0		;compare abs(YINT) with 10^LEN
    706 	fblt.w	A14_st		;if less, all is ok, go to A14
    707 	fbgt.w	fix_ex		;if greater, fix and redo
    708 	fdiv.s	FTEN,fp0	;if equal, divide by 10
    709 	addq.l	#1,d6		; and inc ILOG
    710 	bra.b	A14_st		; and continue elsewhere
    711 fix_ex:
    712 	addq.l	#1,d6		;increment ILOG by 1
    713 	move.w	#1,d5		;set ICTR
    714 	fmove.l	#rm_mode,FPCR	;set rmode to RM
    715 	bra.w	A6_str		;return to A6 and recompute YINT
    716 *
    717 * Since ICTR <> 0, we have already been through one adjustment, 
    718 * and shouldn't have another; this is to check if abs(YINT) = 10^LEN
    719 * 10^LEN is again computed using whatever table is in a1 since the
    720 * value calculated cannot be inexact.
    721 *
    722 not_zr:
    723 	fmove.s	FONE,fp2	;init fp2 to 1.0
    724 	move.l	d4,d0		;put LEN in d0
    725 	clr.l	d3		;clr table index
    726 z_loop:
    727 	lsr.l	#1,d0		;shift next bit into carry
    728 	bcc.b	z_next		;if zero, skip the mul
    729 	fmul.x	(a1,d3),fp2	;mul by 10**(d3_bit_no)
    730 z_next:
    731 	add.l	#12,d3		;inc d3 to next pwrten table entry
    732 	tst.l	d0		;test if LEN is zero
    733 	bne.b	z_loop		;if not, loop
    734 	fabs.x	fp0		;get abs(YINT)
    735 	fcmp.x	fp2,fp0		;check if abs(YINT) = 10^LEN
    736 	fbne.w	A14_st		;if not, skip this
    737 	fdiv.s	FTEN,fp0	;divide abs(YINT) by 10
    738 	addq.l	#1,d6		;and inc ILOG by 1
    739 	addq.l	#1,d4		; and inc LEN
    740 	fmul.s	FTEN,fp2	; if LEN++, the get 10^^LEN
    741 
    742 
    743 * A14. Convert the mantissa to bcd.
    744 *      The binstr routine is used to convert the LEN digit 
    745 *      mantissa to bcd in memory.  The input to binstr is
    746 *      to be a fraction; i.e. (mantissa)/10^LEN and adjusted
    747 *      such that the decimal point is to the left of bit 63.
    748 *      The bcd digits are stored in the correct position in 
    749 *      the final string area in memory.
    750 *
    751 *
    752 * Register usage:
    753 *	Input/Output
    754 *	d0: x/LEN call to binstr - final is 0
    755 *	d1: x/0
    756 *	d2: x/ms 32-bits of mant of abs(YINT)
    757 *	d3: x/ls 32-bits of mant of abs(YINT)
    758 *	d4: LEN/Unchanged
    759 *	d5: ICTR:LAMBDA/LAMBDA:ICTR
    760 *	d6: ILOG
    761 *	d7: k-factor/Unchanged
    762 *	a0: pointer into memory for packed bcd string formation
    763 *	    /ptr to first mantissa byte in result string
    764 *	a1: ptr to PTENxx array/Unchanged
    765 *	a2: ptr to FP_SCR2(a6)/Unchanged
    766 *	fp0: int portion of Y/abs(YINT) adjusted
    767 *	fp1: 10^ISCALE/Unchanged
    768 *	fp2: 10^LEN/Unchanged
    769 *	F_SCR1:x/Work area for final result
    770 *	F_SCR2:Y with original exponent/Unchanged
    771 *	L_SCR1:original USER_FPCR/Unchanged
    772 *	L_SCR2:first word of X packed/Unchanged
    773 
    774 A14_st:	
    775 	fmove.l	#rz_mode,FPCR	;force rz for conversion
    776 	fdiv.x	fp2,fp0		;divide abs(YINT) by 10^LEN
    777 	lea.l	FP_SCR1(a6),a0
    778 	fmove.x	fp0,(a0)	;move abs(YINT)/10^LEN to memory
    779 	move.l	4(a0),d2	;move 2nd word of FP_RES to d2
    780 	move.l	8(a0),d3	;move 3rd word of FP_RES to d3
    781 	clr.l	4(a0)		;zero word 2 of FP_RES
    782 	clr.l	8(a0)		;zero word 3 of FP_RES
    783 	move.l	(a0),d0		;move exponent to d0
    784 	swap	d0		;put exponent in lower word
    785 	beq.b	no_sft		;if zero, don't shift
    786 	subi.l	#$3ffd,d0	;sub bias less 2 to make fract
    787 	tst.l	d0		;check if > 1
    788 	bgt.b	no_sft		;if so, don't shift
    789 	neg.l	d0		;make exp positive
    790 m_loop:
    791 	lsr.l	#1,d2		;shift d2:d3 right, add 0s 
    792 	roxr.l	#1,d3		;the number of places
    793 	dbf.w	d0,m_loop	;given in d0
    794 no_sft:
    795 	tst.l	d2		;check for mantissa of zero
    796 	bne.b	no_zr		;if not, go on
    797 	tst.l	d3		;continue zero check
    798 	beq.b	zer_m		;if zero, go directly to binstr
    799 no_zr:
    800 	clr.l	d1		;put zero in d1 for addx
    801 	addi.l	#$00000080,d3	;inc at bit 7
    802 	addx.l	d1,d2		;continue inc
    803 	andi.l	#$ffffff80,d3	;strip off lsb not used by 882
    804 zer_m:
    805 	move.l	d4,d0		;put LEN in d0 for binstr call
    806 	addq.l	#3,a0		;a0 points to M16 byte in result
    807 	bsr	binstr		;call binstr to convert mant
    808 
    809 
    810 * A15. Convert the exponent to bcd.
    811 *      As in A14 above, the exp is converted to bcd and the
    812 *      digits are stored in the final string.
    813 *
    814 *      Digits are stored in L_SCR1(a6) on return from BINDEC as:
    815 *
    816 *  	 32               16 15                0
    817 *	-----------------------------------------
    818 *  	|  0 | e3 | e2 | e1 | e4 |  X |  X |  X |
    819 *	-----------------------------------------
    820 *
    821 * And are moved into their proper places in FP_SCR1.  If digit e4
    822 * is non-zero, OPERR is signaled.  In all cases, all 4 digits are
    823 * written as specified in the 881/882 manual for packed decimal.
    824 *
    825 * Register usage:
    826 *	Input/Output
    827 *	d0: x/LEN call to binstr - final is 0
    828 *	d1: x/scratch (0);shift count for final exponent packing
    829 *	d2: x/ms 32-bits of exp fraction/scratch
    830 *	d3: x/ls 32-bits of exp fraction
    831 *	d4: LEN/Unchanged
    832 *	d5: ICTR:LAMBDA/LAMBDA:ICTR
    833 *	d6: ILOG
    834 *	d7: k-factor/Unchanged
    835 *	a0: ptr to result string/ptr to L_SCR1(a6)
    836 *	a1: ptr to PTENxx array/Unchanged
    837 *	a2: ptr to FP_SCR2(a6)/Unchanged
    838 *	fp0: abs(YINT) adjusted/float(ILOG)
    839 *	fp1: 10^ISCALE/Unchanged
    840 *	fp2: 10^LEN/Unchanged
    841 *	F_SCR1:Work area for final result/BCD result
    842 *	F_SCR2:Y with original exponent/ILOG/10^4
    843 *	L_SCR1:original USER_FPCR/Exponent digits on return from binstr
    844 *	L_SCR2:first word of X packed/Unchanged
    845 
    846 A15_st:	
    847 	tst.b	BINDEC_FLG(a6)	;check for denorm
    848 	beq.b	not_denorm
    849 	ftst.x	fp0		;test for zero
    850 	fbeq.w	den_zero	;if zero, use k-factor or 4933
    851 	fmove.l	d6,fp0		;float ILOG
    852 	fabs.x	fp0		;get abs of ILOG
    853 	bra.b	convrt
    854 den_zero:
    855 	tst.l	d7		;check sign of the k-factor
    856 	blt.b	use_ilog	;if negative, use ILOG
    857 	fmove.s	F4933,fp0	;force exponent to 4933
    858 	bra.b	convrt		;do it
    859 use_ilog:
    860 	fmove.l	d6,fp0		;float ILOG
    861 	fabs.x	fp0		;get abs of ILOG
    862 	bra.b	convrt
    863 not_denorm:
    864 	ftst.x	fp0		;test for zero
    865 	fbne.w	not_zero	;if zero, force exponent
    866 	fmove.s	FONE,fp0	;force exponent to 1
    867 	bra.b	convrt		;do it
    868 not_zero:	
    869 	fmove.l	d6,fp0		;float ILOG
    870 	fabs.x	fp0		;get abs of ILOG
    871 convrt:
    872 	fdiv.x	24(a1),fp0	;compute ILOG/10^4
    873 	fmove.x	fp0,FP_SCR2(a6)	;store fp0 in memory
    874 	move.l	4(a2),d2	;move word 2 to d2
    875 	move.l	8(a2),d3	;move word 3 to d3
    876 	move.w	(a2),d0		;move exp to d0
    877 	beq.b	x_loop_fin	;if zero, skip the shift
    878 	subi.w	#$3ffd,d0	;subtract off bias
    879 	neg.w	d0		;make exp positive
    880 x_loop:
    881 	lsr.l	#1,d2		;shift d2:d3 right 
    882 	roxr.l	#1,d3		;the number of places
    883 	dbf.w	d0,x_loop	;given in d0
    884 x_loop_fin:
    885 	clr.l	d1		;put zero in d1 for addx
    886 	addi.l	#$00000080,d3	;inc at bit 6
    887 	addx.l	d1,d2		;continue inc
    888 	andi.l	#$ffffff80,d3	;strip off lsb not used by 882
    889 	move.l	#4,d0		;put 4 in d0 for binstr call
    890 	lea.l	L_SCR1(a6),a0	;a0 is ptr to L_SCR1 for exp digits
    891 	bsr	binstr		;call binstr to convert exp
    892 	move.l	L_SCR1(a6),d0	;load L_SCR1 lword to d0 
    893 	move.l	#12,d1		;use d1 for shift count
    894 	lsr.l	d1,d0		;shift d0 right by 12
    895 	bfins	d0,FP_SCR1(a6){4:12} ;put e3:e2:e1 in FP_SCR1
    896 	lsr.l	d1,d0		;shift d0 right by 12
    897 	bfins	d0,FP_SCR1(a6){16:4} ;put e4 in FP_SCR1 
    898 	tst.b	d0		;check if e4 is zero
    899 	beq.b	A16_st		;if zero, skip rest
    900 	or.l	#opaop_mask,USER_FPSR(a6) ;set OPERR & AIOP in USER_FPSR
    901 
    902 
    903 * A16. Write sign bits to final string.
    904 *	   Sigma is bit 31 of initial value; RHO is bit 31 of d6 (ILOG).
    905 *
    906 * Register usage:
    907 *	Input/Output
    908 *	d0: x/scratch - final is x
    909 *	d2: x/x
    910 *	d3: x/x
    911 *	d4: LEN/Unchanged
    912 *	d5: ICTR:LAMBDA/LAMBDA:ICTR
    913 *	d6: ILOG/ILOG adjusted
    914 *	d7: k-factor/Unchanged
    915 *	a0: ptr to L_SCR1(a6)/Unchanged
    916 *	a1: ptr to PTENxx array/Unchanged
    917 *	a2: ptr to FP_SCR2(a6)/Unchanged
    918 *	fp0: float(ILOG)/Unchanged
    919 *	fp1: 10^ISCALE/Unchanged
    920 *	fp2: 10^LEN/Unchanged
    921 *	F_SCR1:BCD result with correct signs
    922 *	F_SCR2:ILOG/10^4
    923 *	L_SCR1:Exponent digits on return from binstr
    924 *	L_SCR2:first word of X packed/Unchanged
    925 
    926 A16_st:
    927 	clr.l	d0		;clr d0 for collection of signs
    928 	andi.b	#$0f,FP_SCR1(a6) ;clear first nibble of FP_SCR1 
    929 	tst.l	L_SCR2(a6)	;check sign of original mantissa
    930 	bge.b	mant_p		;if pos, don't set SM
    931 	moveq.l	#2,d0		;move 2 in to d0 for SM
    932 mant_p:
    933 	tst.l	d6		;check sign of ILOG
    934 	bge.b	wr_sgn		;if pos, don't set SE
    935 	addq.l	#1,d0		;set bit 0 in d0 for SE 
    936 wr_sgn:
    937 	bfins	d0,FP_SCR1(a6){0:2} ;insert SM and SE into FP_SCR1
    938 
    939 * Clean up and restore all registers used.
    940 
    941 	fmove.l	#0,FPSR		;clear possible inex2/ainex bits
    942 	fmovem.x (a7)+,fp0-fp2
    943 	movem.l	(a7)+,d2-d7/a2
    944 	rts
    945 
    946 	end
    947