Home | History | Annotate | Line # | Download | only in fpsp
stan.sa revision 1.1.1.1
      1 *	MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
      2 *	M68000 Hi-Performance Microprocessor Division
      3 *	M68040 Software Package 
      4 *
      5 *	M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
      6 *	All rights reserved.
      7 *
      8 *	THE SOFTWARE is provided on an "AS IS" basis and without warranty.
      9 *	To the maximum extent permitted by applicable law,
     10 *	MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
     11 *	INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
     12 *	PARTICULAR PURPOSE and any warranty against infringement with
     13 *	regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
     14 *	and any accompanying written materials. 
     15 *
     16 *	To the maximum extent permitted by applicable law,
     17 *	IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
     18 *	(INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
     19 *	PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
     20 *	OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
     21 *	SOFTWARE.  Motorola assumes no responsibility for the maintenance
     22 *	and support of the SOFTWARE.  
     23 *
     24 *	You are hereby granted a copyright license to use, modify, and
     25 *	distribute the SOFTWARE so long as this entire notice is retained
     26 *	without alteration in any modified and/or redistributed versions,
     27 *	and that such modified versions are clearly identified as such.
     28 *	No licenses are granted by implication, estoppel or otherwise
     29 *	under any patents or trademarks of Motorola, Inc.
     30 
     31 *
     32 *	stan.sa 3.3 7/29/91
     33 *
     34 *	The entry point stan computes the tangent of
     35 *	an input argument;
     36 *	stand does the same except for denormalized input.
     37 *
     38 *	Input: Double-extended number X in location pointed to
     39 *		by address register a0.
     40 *
     41 *	Output: The value tan(X) returned in floating-point register Fp0.
     42 *
     43 *	Accuracy and Monotonicity: The returned result is within 3 ulp in
     44 *		64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
     45 *		result is subsequently rounded to double precision. The
     46 *		result is provably monotonic in double precision.
     47 *
     48 *	Speed: The program sTAN takes approximately 170 cycles for
     49 *		input argument X such that |X| < 15Pi, which is the the usual
     50 *		situation.
     51 *
     52 *	Algorithm:
     53 *
     54 *	1. If |X| >= 15Pi or |X| < 2**(-40), go to 6.
     55 *
     56 *	2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let
     57 *		k = N mod 2, so in particular, k = 0 or 1.
     58 *
     59 *	3. If k is odd, go to 5.
     60 *
     61 *	4. (k is even) Tan(X) = tan(r) and tan(r) is approximated by a
     62 *		rational function U/V where
     63 *		U = r + r*s*(P1 + s*(P2 + s*P3)), and
     64 *		V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))),  s = r*r.
     65 *		Exit.
     66 *
     67 *	4. (k is odd) Tan(X) = -cot(r). Since tan(r) is approximated by a
     68 *		rational function U/V where
     69 *		U = r + r*s*(P1 + s*(P2 + s*P3)), and
     70 *		V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r,
     71 *		-Cot(r) = -V/U. Exit.
     72 *
     73 *	6. If |X| > 1, go to 8.
     74 *
     75 *	7. (|X|<2**(-40)) Tan(X) = X. Exit.
     76 *
     77 *	8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back to 2.
     78 *
     79 
     80 STAN	IDNT	2,1 Motorola 040 Floating Point Software Package
     81 
     82 	section	8
     83 
     84 	include fpsp.h
     85 
     86 BOUNDS1	DC.L $3FD78000,$4004BC7E
     87 TWOBYPI	DC.L $3FE45F30,$6DC9C883
     88 
     89 TANQ4	DC.L $3EA0B759,$F50F8688
     90 TANP3	DC.L $BEF2BAA5,$A8924F04
     91 
     92 TANQ3	DC.L $BF346F59,$B39BA65F,$00000000,$00000000
     93 
     94 TANP2	DC.L $3FF60000,$E073D3FC,$199C4A00,$00000000
     95 
     96 TANQ2	DC.L $3FF90000,$D23CD684,$15D95FA1,$00000000
     97 
     98 TANP1	DC.L $BFFC0000,$8895A6C5,$FB423BCA,$00000000
     99 
    100 TANQ1	DC.L $BFFD0000,$EEF57E0D,$A84BC8CE,$00000000
    101 
    102 INVTWOPI DC.L $3FFC0000,$A2F9836E,$4E44152A,$00000000
    103 
    104 TWOPI1	DC.L $40010000,$C90FDAA2,$00000000,$00000000
    105 TWOPI2	DC.L $3FDF0000,$85A308D4,$00000000,$00000000
    106 
    107 *--N*PI/2, -32 <= N <= 32, IN A LEADING TERM IN EXT. AND TRAILING
    108 *--TERM IN SGL. NOTE THAT PI IS 64-BIT LONG, THUS N*PI/2 IS AT
    109 *--MOST 69 BITS LONG.
    110 	xdef	PITBL
    111 PITBL:
    112   DC.L  $C0040000,$C90FDAA2,$2168C235,$21800000
    113   DC.L  $C0040000,$C2C75BCD,$105D7C23,$A0D00000
    114   DC.L  $C0040000,$BC7EDCF7,$FF523611,$A1E80000
    115   DC.L  $C0040000,$B6365E22,$EE46F000,$21480000
    116   DC.L  $C0040000,$AFEDDF4D,$DD3BA9EE,$A1200000
    117   DC.L  $C0040000,$A9A56078,$CC3063DD,$21FC0000
    118   DC.L  $C0040000,$A35CE1A3,$BB251DCB,$21100000
    119   DC.L  $C0040000,$9D1462CE,$AA19D7B9,$A1580000
    120   DC.L  $C0040000,$96CBE3F9,$990E91A8,$21E00000
    121   DC.L  $C0040000,$90836524,$88034B96,$20B00000
    122   DC.L  $C0040000,$8A3AE64F,$76F80584,$A1880000
    123   DC.L  $C0040000,$83F2677A,$65ECBF73,$21C40000
    124   DC.L  $C0030000,$FB53D14A,$A9C2F2C2,$20000000
    125   DC.L  $C0030000,$EEC2D3A0,$87AC669F,$21380000
    126   DC.L  $C0030000,$E231D5F6,$6595DA7B,$A1300000
    127   DC.L  $C0030000,$D5A0D84C,$437F4E58,$9FC00000
    128   DC.L  $C0030000,$C90FDAA2,$2168C235,$21000000
    129   DC.L  $C0030000,$BC7EDCF7,$FF523611,$A1680000
    130   DC.L  $C0030000,$AFEDDF4D,$DD3BA9EE,$A0A00000
    131   DC.L  $C0030000,$A35CE1A3,$BB251DCB,$20900000
    132   DC.L  $C0030000,$96CBE3F9,$990E91A8,$21600000
    133   DC.L  $C0030000,$8A3AE64F,$76F80584,$A1080000
    134   DC.L  $C0020000,$FB53D14A,$A9C2F2C2,$1F800000
    135   DC.L  $C0020000,$E231D5F6,$6595DA7B,$A0B00000
    136   DC.L  $C0020000,$C90FDAA2,$2168C235,$20800000
    137   DC.L  $C0020000,$AFEDDF4D,$DD3BA9EE,$A0200000
    138   DC.L  $C0020000,$96CBE3F9,$990E91A8,$20E00000
    139   DC.L  $C0010000,$FB53D14A,$A9C2F2C2,$1F000000
    140   DC.L  $C0010000,$C90FDAA2,$2168C235,$20000000
    141   DC.L  $C0010000,$96CBE3F9,$990E91A8,$20600000
    142   DC.L  $C0000000,$C90FDAA2,$2168C235,$1F800000
    143   DC.L  $BFFF0000,$C90FDAA2,$2168C235,$1F000000
    144   DC.L  $00000000,$00000000,$00000000,$00000000
    145   DC.L  $3FFF0000,$C90FDAA2,$2168C235,$9F000000
    146   DC.L  $40000000,$C90FDAA2,$2168C235,$9F800000
    147   DC.L  $40010000,$96CBE3F9,$990E91A8,$A0600000
    148   DC.L  $40010000,$C90FDAA2,$2168C235,$A0000000
    149   DC.L  $40010000,$FB53D14A,$A9C2F2C2,$9F000000
    150   DC.L  $40020000,$96CBE3F9,$990E91A8,$A0E00000
    151   DC.L  $40020000,$AFEDDF4D,$DD3BA9EE,$20200000
    152   DC.L  $40020000,$C90FDAA2,$2168C235,$A0800000
    153   DC.L  $40020000,$E231D5F6,$6595DA7B,$20B00000
    154   DC.L  $40020000,$FB53D14A,$A9C2F2C2,$9F800000
    155   DC.L  $40030000,$8A3AE64F,$76F80584,$21080000
    156   DC.L  $40030000,$96CBE3F9,$990E91A8,$A1600000
    157   DC.L  $40030000,$A35CE1A3,$BB251DCB,$A0900000
    158   DC.L  $40030000,$AFEDDF4D,$DD3BA9EE,$20A00000
    159   DC.L  $40030000,$BC7EDCF7,$FF523611,$21680000
    160   DC.L  $40030000,$C90FDAA2,$2168C235,$A1000000
    161   DC.L  $40030000,$D5A0D84C,$437F4E58,$1FC00000
    162   DC.L  $40030000,$E231D5F6,$6595DA7B,$21300000
    163   DC.L  $40030000,$EEC2D3A0,$87AC669F,$A1380000
    164   DC.L  $40030000,$FB53D14A,$A9C2F2C2,$A0000000
    165   DC.L  $40040000,$83F2677A,$65ECBF73,$A1C40000
    166   DC.L  $40040000,$8A3AE64F,$76F80584,$21880000
    167   DC.L  $40040000,$90836524,$88034B96,$A0B00000
    168   DC.L  $40040000,$96CBE3F9,$990E91A8,$A1E00000
    169   DC.L  $40040000,$9D1462CE,$AA19D7B9,$21580000
    170   DC.L  $40040000,$A35CE1A3,$BB251DCB,$A1100000
    171   DC.L  $40040000,$A9A56078,$CC3063DD,$A1FC0000
    172   DC.L  $40040000,$AFEDDF4D,$DD3BA9EE,$21200000
    173   DC.L  $40040000,$B6365E22,$EE46F000,$A1480000
    174   DC.L  $40040000,$BC7EDCF7,$FF523611,$21E80000
    175   DC.L  $40040000,$C2C75BCD,$105D7C23,$20D00000
    176   DC.L  $40040000,$C90FDAA2,$2168C235,$A1800000
    177 
    178 INARG	equ	FP_SCR4
    179 
    180 TWOTO63 equ     L_SCR1
    181 ENDFLAG	equ	L_SCR2
    182 N       equ     L_SCR3
    183 
    184 	xref	t_frcinx
    185 	xref	t_extdnrm
    186 
    187 	xdef	stand
    188 stand:
    189 *--TAN(X) = X FOR DENORMALIZED X
    190 
    191 	bra		t_extdnrm
    192 
    193 	xdef	stan
    194 stan:
    195 	FMOVE.X		(a0),FP0	...LOAD INPUT
    196 
    197 	MOVE.L		(A0),D0
    198 	MOVE.W		4(A0),D0
    199 	ANDI.L		#$7FFFFFFF,D0
    200 
    201 	CMPI.L		#$3FD78000,D0		...|X| >= 2**(-40)?
    202 	BGE.B		TANOK1
    203 	BRA.W		TANSM
    204 TANOK1:
    205 	CMPI.L		#$4004BC7E,D0		...|X| < 15 PI?
    206 	BLT.B		TANMAIN
    207 	BRA.W		REDUCEX
    208 
    209 
    210 TANMAIN:
    211 *--THIS IS THE USUAL CASE, |X| <= 15 PI.
    212 *--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
    213 	FMOVE.X		FP0,FP1
    214 	FMUL.D		TWOBYPI,FP1	...X*2/PI
    215 
    216 *--HIDE THE NEXT TWO INSTRUCTIONS
    217 	lea.l		PITBL+$200,a1 ...TABLE OF N*PI/2, N = -32,...,32
    218 
    219 *--FP1 IS NOW READY
    220 	FMOVE.L		FP1,D0		...CONVERT TO INTEGER
    221 
    222 	ASL.L		#4,D0
    223 	ADDA.L		D0,a1		...ADDRESS N*PIBY2 IN Y1, Y2
    224 
    225 	FSUB.X		(a1)+,FP0	...X-Y1
    226 *--HIDE THE NEXT ONE
    227 
    228 	FSUB.S		(a1),FP0	...FP0 IS R = (X-Y1)-Y2
    229 
    230 	ROR.L		#5,D0
    231 	ANDI.L		#$80000000,D0	...D0 WAS ODD IFF D0 < 0
    232 
    233 TANCONT:
    234 
    235 	CMPI.L		#0,D0
    236 	BLT.W		NODD
    237 
    238 	FMOVE.X		FP0,FP1
    239 	FMUL.X		FP1,FP1	 	...S = R*R
    240 
    241 	FMOVE.D		TANQ4,FP3
    242 	FMOVE.D		TANP3,FP2
    243 
    244 	FMUL.X		FP1,FP3	 	...SQ4
    245 	FMUL.X		FP1,FP2	 	...SP3
    246 
    247 	FADD.D		TANQ3,FP3	...Q3+SQ4
    248 	FADD.X		TANP2,FP2	...P2+SP3
    249 
    250 	FMUL.X		FP1,FP3	 	...S(Q3+SQ4)
    251 	FMUL.X		FP1,FP2	 	...S(P2+SP3)
    252 
    253 	FADD.X		TANQ2,FP3	...Q2+S(Q3+SQ4)
    254 	FADD.X		TANP1,FP2	...P1+S(P2+SP3)
    255 
    256 	FMUL.X		FP1,FP3	 	...S(Q2+S(Q3+SQ4))
    257 	FMUL.X		FP1,FP2	 	...S(P1+S(P2+SP3))
    258 
    259 	FADD.X		TANQ1,FP3	...Q1+S(Q2+S(Q3+SQ4))
    260 	FMUL.X		FP0,FP2	 	...RS(P1+S(P2+SP3))
    261 
    262 	FMUL.X		FP3,FP1	 	...S(Q1+S(Q2+S(Q3+SQ4)))
    263 	
    264 
    265 	FADD.X		FP2,FP0	 	...R+RS(P1+S(P2+SP3))
    266 	
    267 
    268 	FADD.S		#:3F800000,FP1	...1+S(Q1+...)
    269 
    270 	FMOVE.L		d1,fpcr		;restore users exceptions
    271 	FDIV.X		FP1,FP0		;last inst - possible exception set
    272 
    273 	bra		t_frcinx
    274 
    275 NODD:
    276 	FMOVE.X		FP0,FP1
    277 	FMUL.X		FP0,FP0	 	...S = R*R
    278 
    279 	FMOVE.D		TANQ4,FP3
    280 	FMOVE.D		TANP3,FP2
    281 
    282 	FMUL.X		FP0,FP3	 	...SQ4
    283 	FMUL.X		FP0,FP2	 	...SP3
    284 
    285 	FADD.D		TANQ3,FP3	...Q3+SQ4
    286 	FADD.X		TANP2,FP2	...P2+SP3
    287 
    288 	FMUL.X		FP0,FP3	 	...S(Q3+SQ4)
    289 	FMUL.X		FP0,FP2	 	...S(P2+SP3)
    290 
    291 	FADD.X		TANQ2,FP3	...Q2+S(Q3+SQ4)
    292 	FADD.X		TANP1,FP2	...P1+S(P2+SP3)
    293 
    294 	FMUL.X		FP0,FP3	 	...S(Q2+S(Q3+SQ4))
    295 	FMUL.X		FP0,FP2	 	...S(P1+S(P2+SP3))
    296 
    297 	FADD.X		TANQ1,FP3	...Q1+S(Q2+S(Q3+SQ4))
    298 	FMUL.X		FP1,FP2	 	...RS(P1+S(P2+SP3))
    299 
    300 	FMUL.X		FP3,FP0	 	...S(Q1+S(Q2+S(Q3+SQ4)))
    301 	
    302 
    303 	FADD.X		FP2,FP1	 	...R+RS(P1+S(P2+SP3))
    304 	FADD.S		#:3F800000,FP0	...1+S(Q1+...)
    305 	
    306 
    307 	FMOVE.X		FP1,-(sp)
    308 	EORI.L		#$80000000,(sp)
    309 
    310 	FMOVE.L		d1,fpcr	 	;restore users exceptions
    311 	FDIV.X		(sp)+,FP0	;last inst - possible exception set
    312 
    313 	bra		t_frcinx
    314 
    315 TANBORS:
    316 *--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
    317 *--IF |X| < 2**(-40), RETURN X OR 1.
    318 	CMPI.L		#$3FFF8000,D0
    319 	BGT.B		REDUCEX
    320 
    321 TANSM:
    322 
    323 	FMOVE.X		FP0,-(sp)
    324 	FMOVE.L		d1,fpcr		 ;restore users exceptions
    325 	FMOVE.X		(sp)+,FP0	;last inst - posibble exception set
    326 
    327 	bra		t_frcinx
    328 
    329 
    330 REDUCEX:
    331 *--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
    332 *--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
    333 *--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.
    334 
    335 	FMOVEM.X	FP2-FP5,-(A7)	...save FP2 through FP5
    336 	MOVE.L		D2,-(A7)
    337         FMOVE.S         #:00000000,FP1
    338 
    339 *--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that
    340 *--there is a danger of unwanted overflow in first LOOP iteration.  In this
    341 *--case, reduce argument by one remainder step to make subsequent reduction
    342 *--safe.
    343 	cmpi.l	#$7ffeffff,d0		;is argument dangerously large?
    344 	bne.b	LOOP
    345 	move.l	#$7ffe0000,FP_SCR2(a6)	;yes
    346 *					;create 2**16383*PI/2
    347 	move.l	#$c90fdaa2,FP_SCR2+4(a6)
    348 	clr.l	FP_SCR2+8(a6)
    349 	ftst.x	fp0			;test sign of argument
    350 	move.l	#$7fdc0000,FP_SCR3(a6)	;create low half of 2**16383*
    351 *					;PI/2 at FP_SCR3
    352 	move.l	#$85a308d3,FP_SCR3+4(a6)
    353 	clr.l   FP_SCR3+8(a6)
    354 	fblt.w	red_neg
    355 	or.w	#$8000,FP_SCR2(a6)	;positive arg
    356 	or.w	#$8000,FP_SCR3(a6)
    357 red_neg:
    358 	fadd.x  FP_SCR2(a6),fp0		;high part of reduction is exact
    359 	fmove.x  fp0,fp1		;save high result in fp1
    360 	fadd.x  FP_SCR3(a6),fp0		;low part of reduction
    361 	fsub.x  fp0,fp1			;determine low component of result
    362 	fadd.x  FP_SCR3(a6),fp1		;fp0/fp1 are reduced argument.
    363 
    364 *--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.
    365 *--integer quotient will be stored in N
    366 *--Intermeditate remainder is 66-bit long; (R,r) in (FP0,FP1)
    367 
    368 LOOP:
    369 	FMOVE.X		FP0,INARG(a6)	...+-2**K * F, 1 <= F < 2
    370 	MOVE.W		INARG(a6),D0
    371         MOVE.L          D0,A1		...save a copy of D0
    372 	ANDI.L		#$00007FFF,D0
    373 	SUBI.L		#$00003FFF,D0	...D0 IS K
    374 	CMPI.L		#28,D0
    375 	BLE.B		LASTLOOP
    376 CONTLOOP:
    377 	SUBI.L		#27,D0	 ...D0 IS L := K-27
    378 	MOVE.L		#0,ENDFLAG(a6)
    379 	BRA.B		WORK
    380 LASTLOOP:
    381 	CLR.L		D0		...D0 IS L := 0
    382 	MOVE.L		#1,ENDFLAG(a6)
    383 
    384 WORK:
    385 *--FIND THE REMAINDER OF (R,r) W.R.T.	2**L * (PI/2). L IS SO CHOSEN
    386 *--THAT	INT( X * (2/PI) / 2**(L) ) < 2**29.
    387 
    388 *--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),
    389 *--2**L * (PIby2_1), 2**L * (PIby2_2)
    390 
    391 	MOVE.L		#$00003FFE,D2	...BIASED EXPO OF 2/PI
    392 	SUB.L		D0,D2		...BIASED EXPO OF 2**(-L)*(2/PI)
    393 
    394 	MOVE.L		#$A2F9836E,FP_SCR1+4(a6)
    395 	MOVE.L		#$4E44152A,FP_SCR1+8(a6)
    396 	MOVE.W		D2,FP_SCR1(a6)	...FP_SCR1 is 2**(-L)*(2/PI)
    397 
    398 	FMOVE.X		FP0,FP2
    399 	FMUL.X		FP_SCR1(a6),FP2
    400 *--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
    401 *--FLOATING POINT FORMAT, THE TWO FMOVE'S	FMOVE.L FP <--> N
    402 *--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
    403 *--(SIGN(INARG)*2**63	+	FP2) - SIGN(INARG)*2**63 WILL GIVE
    404 *--US THE DESIRED VALUE IN FLOATING POINT.
    405 
    406 *--HIDE SIX CYCLES OF INSTRUCTION
    407         MOVE.L		A1,D2
    408         SWAP		D2
    409 	ANDI.L		#$80000000,D2
    410 	ORI.L		#$5F000000,D2	...D2 IS SIGN(INARG)*2**63 IN SGL
    411 	MOVE.L		D2,TWOTO63(a6)
    412 
    413 	MOVE.L		D0,D2
    414 	ADDI.L		#$00003FFF,D2	...BIASED EXPO OF 2**L * (PI/2)
    415 
    416 *--FP2 IS READY
    417 	FADD.S		TWOTO63(a6),FP2	...THE FRACTIONAL PART OF FP1 IS ROUNDED
    418 
    419 *--HIDE 4 CYCLES OF INSTRUCTION; creating 2**(L)*Piby2_1  and  2**(L)*Piby2_2
    420         MOVE.W		D2,FP_SCR2(a6)
    421 	CLR.W           FP_SCR2+2(a6)
    422 	MOVE.L		#$C90FDAA2,FP_SCR2+4(a6)
    423 	CLR.L		FP_SCR2+8(a6)		...FP_SCR2 is  2**(L) * Piby2_1	
    424 
    425 *--FP2 IS READY
    426 	FSUB.S		TWOTO63(a6),FP2		...FP2 is N
    427 
    428 	ADDI.L		#$00003FDD,D0
    429         MOVE.W		D0,FP_SCR3(a6)
    430 	CLR.W           FP_SCR3+2(a6)
    431 	MOVE.L		#$85A308D3,FP_SCR3+4(a6)
    432 	CLR.L		FP_SCR3+8(a6)		...FP_SCR3 is 2**(L) * Piby2_2
    433 
    434 	MOVE.L		ENDFLAG(a6),D0
    435 
    436 *--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and
    437 *--P2 = 2**(L) * Piby2_2
    438 	FMOVE.X		FP2,FP4
    439 	FMul.X		FP_SCR2(a6),FP4		...W = N*P1
    440 	FMove.X		FP2,FP5
    441 	FMul.X		FP_SCR3(a6),FP5		...w = N*P2
    442 	FMove.X		FP4,FP3
    443 *--we want P+p = W+w  but  |p| <= half ulp of P
    444 *--Then, we need to compute  A := R-P   and  a := r-p
    445 	FAdd.X		FP5,FP3			...FP3 is P
    446 	FSub.X		FP3,FP4			...W-P
    447 
    448 	FSub.X		FP3,FP0			...FP0 is A := R - P
    449         FAdd.X		FP5,FP4			...FP4 is p = (W-P)+w
    450 
    451 	FMove.X		FP0,FP3			...FP3 A
    452 	FSub.X		FP4,FP1			...FP1 is a := r - p
    453 
    454 *--Now we need to normalize (A,a) to  "new (R,r)" where R+r = A+a but
    455 *--|r| <= half ulp of R.
    456 	FAdd.X		FP1,FP0			...FP0 is R := A+a
    457 *--No need to calculate r if this is the last loop
    458 	CMPI.L		#0,D0
    459 	BGT.W		RESTORE
    460 
    461 *--Need to calculate r
    462 	FSub.X		FP0,FP3			...A-R
    463 	FAdd.X		FP3,FP1			...FP1 is r := (A-R)+a
    464 	BRA.W		LOOP
    465 
    466 RESTORE:
    467         FMOVE.L		FP2,N(a6)
    468 	MOVE.L		(A7)+,D2
    469 	FMOVEM.X	(A7)+,FP2-FP5
    470 
    471 	
    472 	MOVE.L		N(a6),D0
    473         ROR.L		#1,D0
    474 
    475 
    476 	BRA.W		TANCONT
    477 
    478 	end
    479