Home | History | Annotate | Line # | Download | only in fpsp
      1 *	$NetBSD: stan.sa,v 1.4 2000/03/13 23:52:32 soren Exp $
      2 
      3 *	MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
      4 *	M68000 Hi-Performance Microprocessor Division
      5 *	M68040 Software Package 
      6 *
      7 *	M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
      8 *	All rights reserved.
      9 *
     10 *	THE SOFTWARE is provided on an "AS IS" basis and without warranty.
     11 *	To the maximum extent permitted by applicable law,
     12 *	MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
     13 *	INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
     14 *	PARTICULAR PURPOSE and any warranty against infringement with
     15 *	regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
     16 *	and any accompanying written materials. 
     17 *
     18 *	To the maximum extent permitted by applicable law,
     19 *	IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
     20 *	(INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
     21 *	PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
     22 *	OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
     23 *	SOFTWARE.  Motorola assumes no responsibility for the maintenance
     24 *	and support of the SOFTWARE.  
     25 *
     26 *	You are hereby granted a copyright license to use, modify, and
     27 *	distribute the SOFTWARE so long as this entire notice is retained
     28 *	without alteration in any modified and/or redistributed versions,
     29 *	and that such modified versions are clearly identified as such.
     30 *	No licenses are granted by implication, estoppel or otherwise
     31 *	under any patents or trademarks of Motorola, Inc.
     32 
     33 *
     34 *	stan.sa 3.3 7/29/91
     35 *
     36 *	The entry point stan computes the tangent of
     37 *	an input argument;
     38 *	stand does the same except for denormalized input.
     39 *
     40 *	Input: Double-extended number X in location pointed to
     41 *		by address register a0.
     42 *
     43 *	Output: The value tan(X) returned in floating-point register Fp0.
     44 *
     45 *	Accuracy and Monotonicity: The returned result is within 3 ulp in
     46 *		64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
     47 *		result is subsequently rounded to double precision. The
     48 *		result is provably monotonic in double precision.
     49 *
     50 *	Speed: The program sTAN takes approximately 170 cycles for
     51 *		input argument X such that |X| < 15Pi, which is the usual
     52 *		situation.
     53 *
     54 *	Algorithm:
     55 *
     56 *	1. If |X| >= 15Pi or |X| < 2**(-40), go to 6.
     57 *
     58 *	2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let
     59 *		k = N mod 2, so in particular, k = 0 or 1.
     60 *
     61 *	3. If k is odd, go to 5.
     62 *
     63 *	4. (k is even) Tan(X) = tan(r) and tan(r) is approximated by a
     64 *		rational function U/V where
     65 *		U = r + r*s*(P1 + s*(P2 + s*P3)), and
     66 *		V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))),  s = r*r.
     67 *		Exit.
     68 *
     69 *	4. (k is odd) Tan(X) = -cot(r). Since tan(r) is approximated by a
     70 *		rational function U/V where
     71 *		U = r + r*s*(P1 + s*(P2 + s*P3)), and
     72 *		V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r,
     73 *		-Cot(r) = -V/U. Exit.
     74 *
     75 *	6. If |X| > 1, go to 8.
     76 *
     77 *	7. (|X|<2**(-40)) Tan(X) = X. Exit.
     78 *
     79 *	8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back to 2.
     80 *
     81 
     82 STAN	IDNT	2,1 Motorola 040 Floating Point Software Package
     83 
     84 	section	8
     85 
     86 	include fpsp.h
     87 
     88 BOUNDS1	DC.L $3FD78000,$4004BC7E
     89 TWOBYPI	DC.L $3FE45F30,$6DC9C883
     90 
     91 TANQ4	DC.L $3EA0B759,$F50F8688
     92 TANP3	DC.L $BEF2BAA5,$A8924F04
     93 
     94 TANQ3	DC.L $BF346F59,$B39BA65F,$00000000,$00000000
     95 
     96 TANP2	DC.L $3FF60000,$E073D3FC,$199C4A00,$00000000
     97 
     98 TANQ2	DC.L $3FF90000,$D23CD684,$15D95FA1,$00000000
     99 
    100 TANP1	DC.L $BFFC0000,$8895A6C5,$FB423BCA,$00000000
    101 
    102 TANQ1	DC.L $BFFD0000,$EEF57E0D,$A84BC8CE,$00000000
    103 
    104 INVTWOPI DC.L $3FFC0000,$A2F9836E,$4E44152A,$00000000
    105 
    106 TWOPI1	DC.L $40010000,$C90FDAA2,$00000000,$00000000
    107 TWOPI2	DC.L $3FDF0000,$85A308D4,$00000000,$00000000
    108 
    109 *--N*PI/2, -32 <= N <= 32, IN A LEADING TERM IN EXT. AND TRAILING
    110 *--TERM IN SGL. NOTE THAT PI IS 64-BIT LONG, THUS N*PI/2 IS AT
    111 *--MOST 69 BITS LONG.
    112 	xdef	PITBL
    113 PITBL:
    114   DC.L  $C0040000,$C90FDAA2,$2168C235,$21800000
    115   DC.L  $C0040000,$C2C75BCD,$105D7C23,$A0D00000
    116   DC.L  $C0040000,$BC7EDCF7,$FF523611,$A1E80000
    117   DC.L  $C0040000,$B6365E22,$EE46F000,$21480000
    118   DC.L  $C0040000,$AFEDDF4D,$DD3BA9EE,$A1200000
    119   DC.L  $C0040000,$A9A56078,$CC3063DD,$21FC0000
    120   DC.L  $C0040000,$A35CE1A3,$BB251DCB,$21100000
    121   DC.L  $C0040000,$9D1462CE,$AA19D7B9,$A1580000
    122   DC.L  $C0040000,$96CBE3F9,$990E91A8,$21E00000
    123   DC.L  $C0040000,$90836524,$88034B96,$20B00000
    124   DC.L  $C0040000,$8A3AE64F,$76F80584,$A1880000
    125   DC.L  $C0040000,$83F2677A,$65ECBF73,$21C40000
    126   DC.L  $C0030000,$FB53D14A,$A9C2F2C2,$20000000
    127   DC.L  $C0030000,$EEC2D3A0,$87AC669F,$21380000
    128   DC.L  $C0030000,$E231D5F6,$6595DA7B,$A1300000
    129   DC.L  $C0030000,$D5A0D84C,$437F4E58,$9FC00000
    130   DC.L  $C0030000,$C90FDAA2,$2168C235,$21000000
    131   DC.L  $C0030000,$BC7EDCF7,$FF523611,$A1680000
    132   DC.L  $C0030000,$AFEDDF4D,$DD3BA9EE,$A0A00000
    133   DC.L  $C0030000,$A35CE1A3,$BB251DCB,$20900000
    134   DC.L  $C0030000,$96CBE3F9,$990E91A8,$21600000
    135   DC.L  $C0030000,$8A3AE64F,$76F80584,$A1080000
    136   DC.L  $C0020000,$FB53D14A,$A9C2F2C2,$1F800000
    137   DC.L  $C0020000,$E231D5F6,$6595DA7B,$A0B00000
    138   DC.L  $C0020000,$C90FDAA2,$2168C235,$20800000
    139   DC.L  $C0020000,$AFEDDF4D,$DD3BA9EE,$A0200000
    140   DC.L  $C0020000,$96CBE3F9,$990E91A8,$20E00000
    141   DC.L  $C0010000,$FB53D14A,$A9C2F2C2,$1F000000
    142   DC.L  $C0010000,$C90FDAA2,$2168C235,$20000000
    143   DC.L  $C0010000,$96CBE3F9,$990E91A8,$20600000
    144   DC.L  $C0000000,$C90FDAA2,$2168C235,$1F800000
    145   DC.L  $BFFF0000,$C90FDAA2,$2168C235,$1F000000
    146   DC.L  $00000000,$00000000,$00000000,$00000000
    147   DC.L  $3FFF0000,$C90FDAA2,$2168C235,$9F000000
    148   DC.L  $40000000,$C90FDAA2,$2168C235,$9F800000
    149   DC.L  $40010000,$96CBE3F9,$990E91A8,$A0600000
    150   DC.L  $40010000,$C90FDAA2,$2168C235,$A0000000
    151   DC.L  $40010000,$FB53D14A,$A9C2F2C2,$9F000000
    152   DC.L  $40020000,$96CBE3F9,$990E91A8,$A0E00000
    153   DC.L  $40020000,$AFEDDF4D,$DD3BA9EE,$20200000
    154   DC.L  $40020000,$C90FDAA2,$2168C235,$A0800000
    155   DC.L  $40020000,$E231D5F6,$6595DA7B,$20B00000
    156   DC.L  $40020000,$FB53D14A,$A9C2F2C2,$9F800000
    157   DC.L  $40030000,$8A3AE64F,$76F80584,$21080000
    158   DC.L  $40030000,$96CBE3F9,$990E91A8,$A1600000
    159   DC.L  $40030000,$A35CE1A3,$BB251DCB,$A0900000
    160   DC.L  $40030000,$AFEDDF4D,$DD3BA9EE,$20A00000
    161   DC.L  $40030000,$BC7EDCF7,$FF523611,$21680000
    162   DC.L  $40030000,$C90FDAA2,$2168C235,$A1000000
    163   DC.L  $40030000,$D5A0D84C,$437F4E58,$1FC00000
    164   DC.L  $40030000,$E231D5F6,$6595DA7B,$21300000
    165   DC.L  $40030000,$EEC2D3A0,$87AC669F,$A1380000
    166   DC.L  $40030000,$FB53D14A,$A9C2F2C2,$A0000000
    167   DC.L  $40040000,$83F2677A,$65ECBF73,$A1C40000
    168   DC.L  $40040000,$8A3AE64F,$76F80584,$21880000
    169   DC.L  $40040000,$90836524,$88034B96,$A0B00000
    170   DC.L  $40040000,$96CBE3F9,$990E91A8,$A1E00000
    171   DC.L  $40040000,$9D1462CE,$AA19D7B9,$21580000
    172   DC.L  $40040000,$A35CE1A3,$BB251DCB,$A1100000
    173   DC.L  $40040000,$A9A56078,$CC3063DD,$A1FC0000
    174   DC.L  $40040000,$AFEDDF4D,$DD3BA9EE,$21200000
    175   DC.L  $40040000,$B6365E22,$EE46F000,$A1480000
    176   DC.L  $40040000,$BC7EDCF7,$FF523611,$21E80000
    177   DC.L  $40040000,$C2C75BCD,$105D7C23,$20D00000
    178   DC.L  $40040000,$C90FDAA2,$2168C235,$A1800000
    179 
    180 INARG	equ	FP_SCR4
    181 
    182 TWOTO63 equ     L_SCR1
    183 ENDFLAG	equ	L_SCR2
    184 N       equ     L_SCR3
    185 
    186 	xref	t_frcinx
    187 	xref	t_extdnrm
    188 
    189 	xdef	stand
    190 stand:
    191 *--TAN(X) = X FOR DENORMALIZED X
    192 
    193 	bra		t_extdnrm
    194 
    195 	xdef	stan
    196 stan:
    197 	FMOVE.X		(a0),FP0	...LOAD INPUT
    198 
    199 	MOVE.L		(A0),D0
    200 	MOVE.W		4(A0),D0
    201 	ANDI.L		#$7FFFFFFF,D0
    202 
    203 	CMPI.L		#$3FD78000,D0		...|X| >= 2**(-40)?
    204 	BGE.B		TANOK1
    205 	BRA.W		TANSM
    206 TANOK1:
    207 	CMPI.L		#$4004BC7E,D0		...|X| < 15 PI?
    208 	BLT.B		TANMAIN
    209 	BRA.W		REDUCEX
    210 
    211 
    212 TANMAIN:
    213 *--THIS IS THE USUAL CASE, |X| <= 15 PI.
    214 *--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
    215 	FMOVE.X		FP0,FP1
    216 	FMUL.D		TWOBYPI,FP1	...X*2/PI
    217 
    218 *--HIDE THE NEXT TWO INSTRUCTIONS
    219 	lea.l		PITBL+$200,a1 ...TABLE OF N*PI/2, N = -32,...,32
    220 
    221 *--FP1 IS NOW READY
    222 	FMOVE.L		FP1,D0		...CONVERT TO INTEGER
    223 
    224 	ASL.L		#4,D0
    225 	ADDA.L		D0,a1		...ADDRESS N*PIBY2 IN Y1, Y2
    226 
    227 	FSUB.X		(a1)+,FP0	...X-Y1
    228 *--HIDE THE NEXT ONE
    229 
    230 	FSUB.S		(a1),FP0	...FP0 IS R = (X-Y1)-Y2
    231 
    232 	ROR.L		#5,D0
    233 	ANDI.L		#$80000000,D0	...D0 WAS ODD IFF D0 < 0
    234 
    235 TANCONT:
    236 
    237 	TST.L		D0
    238 	BLT.W		NODD
    239 
    240 	FMOVE.X		FP0,FP1
    241 	FMUL.X		FP1,FP1	 	...S = R*R
    242 
    243 	FMOVE.D		TANQ4,FP3
    244 	FMOVE.D		TANP3,FP2
    245 
    246 	FMUL.X		FP1,FP3	 	...SQ4
    247 	FMUL.X		FP1,FP2	 	...SP3
    248 
    249 	FADD.D		TANQ3,FP3	...Q3+SQ4
    250 	FADD.X		TANP2,FP2	...P2+SP3
    251 
    252 	FMUL.X		FP1,FP3	 	...S(Q3+SQ4)
    253 	FMUL.X		FP1,FP2	 	...S(P2+SP3)
    254 
    255 	FADD.X		TANQ2,FP3	...Q2+S(Q3+SQ4)
    256 	FADD.X		TANP1,FP2	...P1+S(P2+SP3)
    257 
    258 	FMUL.X		FP1,FP3	 	...S(Q2+S(Q3+SQ4))
    259 	FMUL.X		FP1,FP2	 	...S(P1+S(P2+SP3))
    260 
    261 	FADD.X		TANQ1,FP3	...Q1+S(Q2+S(Q3+SQ4))
    262 	FMUL.X		FP0,FP2	 	...RS(P1+S(P2+SP3))
    263 
    264 	FMUL.X		FP3,FP1	 	...S(Q1+S(Q2+S(Q3+SQ4)))
    265 	
    266 
    267 	FADD.X		FP2,FP0	 	...R+RS(P1+S(P2+SP3))
    268 	
    269 
    270 	FADD.S		#:3F800000,FP1	...1+S(Q1+...)
    271 
    272 	FMOVE.L		d1,fpcr		;restore users exceptions
    273 	FDIV.X		FP1,FP0		;last inst - possible exception set
    274 
    275 	bra		t_frcinx
    276 
    277 NODD:
    278 	FMOVE.X		FP0,FP1
    279 	FMUL.X		FP0,FP0	 	...S = R*R
    280 
    281 	FMOVE.D		TANQ4,FP3
    282 	FMOVE.D		TANP3,FP2
    283 
    284 	FMUL.X		FP0,FP3	 	...SQ4
    285 	FMUL.X		FP0,FP2	 	...SP3
    286 
    287 	FADD.D		TANQ3,FP3	...Q3+SQ4
    288 	FADD.X		TANP2,FP2	...P2+SP3
    289 
    290 	FMUL.X		FP0,FP3	 	...S(Q3+SQ4)
    291 	FMUL.X		FP0,FP2	 	...S(P2+SP3)
    292 
    293 	FADD.X		TANQ2,FP3	...Q2+S(Q3+SQ4)
    294 	FADD.X		TANP1,FP2	...P1+S(P2+SP3)
    295 
    296 	FMUL.X		FP0,FP3	 	...S(Q2+S(Q3+SQ4))
    297 	FMUL.X		FP0,FP2	 	...S(P1+S(P2+SP3))
    298 
    299 	FADD.X		TANQ1,FP3	...Q1+S(Q2+S(Q3+SQ4))
    300 	FMUL.X		FP1,FP2	 	...RS(P1+S(P2+SP3))
    301 
    302 	FMUL.X		FP3,FP0	 	...S(Q1+S(Q2+S(Q3+SQ4)))
    303 	
    304 
    305 	FADD.X		FP2,FP1	 	...R+RS(P1+S(P2+SP3))
    306 	FADD.S		#:3F800000,FP0	...1+S(Q1+...)
    307 	
    308 
    309 	FMOVE.X		FP1,-(sp)
    310 	EORI.L		#$80000000,(sp)
    311 
    312 	FMOVE.L		d1,fpcr	 	;restore users exceptions
    313 	FDIV.X		(sp)+,FP0	;last inst - possible exception set
    314 
    315 	bra		t_frcinx
    316 
    317 TANBORS:
    318 *--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
    319 *--IF |X| < 2**(-40), RETURN X OR 1.
    320 	CMPI.L		#$3FFF8000,D0
    321 	BGT.B		REDUCEX
    322 
    323 TANSM:
    324 
    325 	FMOVE.X		FP0,-(sp)
    326 	FMOVE.L		d1,fpcr		 ;restore users exceptions
    327 	FMOVE.X		(sp)+,FP0	;last inst - posibble exception set
    328 
    329 	bra		t_frcinx
    330 
    331 
    332 REDUCEX:
    333 *--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
    334 *--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
    335 *--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.
    336 
    337 	FMOVEM.X	FP2-FP5,-(A7)	...save FP2 through FP5
    338 	MOVE.L		D2,-(A7)
    339         FMOVE.S         #:00000000,FP1
    340 
    341 *--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that
    342 *--there is a danger of unwanted overflow in first LOOP iteration.  In this
    343 *--case, reduce argument by one remainder step to make subsequent reduction
    344 *--safe.
    345 	cmpi.l	#$7ffeffff,d0		;is argument dangerously large?
    346 	bne.b	LOOP
    347 	move.l	#$7ffe0000,FP_SCR2(a6)	;yes
    348 *					;create 2**16383*PI/2
    349 	move.l	#$c90fdaa2,FP_SCR2+4(a6)
    350 	clr.l	FP_SCR2+8(a6)
    351 	ftst.x	fp0			;test sign of argument
    352 	move.l	#$7fdc0000,FP_SCR3(a6)	;create low half of 2**16383*
    353 *					;PI/2 at FP_SCR3
    354 	move.l	#$85a308d3,FP_SCR3+4(a6)
    355 	clr.l   FP_SCR3+8(a6)
    356 	fblt.w	red_neg
    357 	or.w	#$8000,FP_SCR2(a6)	;positive arg
    358 	or.w	#$8000,FP_SCR3(a6)
    359 red_neg:
    360 	fadd.x  FP_SCR2(a6),fp0		;high part of reduction is exact
    361 	fmove.x  fp0,fp1		;save high result in fp1
    362 	fadd.x  FP_SCR3(a6),fp0		;low part of reduction
    363 	fsub.x  fp0,fp1			;determine low component of result
    364 	fadd.x  FP_SCR3(a6),fp1		;fp0/fp1 are reduced argument.
    365 
    366 *--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.
    367 *--integer quotient will be stored in N
    368 *--Intermeditate remainder is 66-bit long; (R,r) in (FP0,FP1)
    369 
    370 LOOP:
    371 	FMOVE.X		FP0,INARG(a6)	...+-2**K * F, 1 <= F < 2
    372 	MOVE.W		INARG(a6),D0
    373         MOVE.L          D0,A1		...save a copy of D0
    374 	ANDI.L		#$00007FFF,D0
    375 	SUBI.L		#$00003FFF,D0	...D0 IS K
    376 	CMPI.L		#28,D0
    377 	BLE.B		LASTLOOP
    378 CONTLOOP:
    379 	SUBI.L		#27,D0	 ...D0 IS L := K-27
    380 	CLR.L		ENDFLAG(a6)
    381 	BRA.B		WORK
    382 LASTLOOP:
    383 	CLR.L		D0		...D0 IS L := 0
    384 	MOVE.L		#1,ENDFLAG(a6)
    385 
    386 WORK:
    387 *--FIND THE REMAINDER OF (R,r) W.R.T.	2**L * (PI/2). L IS SO CHOSEN
    388 *--THAT	INT( X * (2/PI) / 2**(L) ) < 2**29.
    389 
    390 *--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),
    391 *--2**L * (PIby2_1), 2**L * (PIby2_2)
    392 
    393 	MOVE.L		#$00003FFE,D2	...BIASED EXPO OF 2/PI
    394 	SUB.L		D0,D2		...BIASED EXPO OF 2**(-L)*(2/PI)
    395 
    396 	MOVE.L		#$A2F9836E,FP_SCR1+4(a6)
    397 	MOVE.L		#$4E44152A,FP_SCR1+8(a6)
    398 	MOVE.W		D2,FP_SCR1(a6)	...FP_SCR1 is 2**(-L)*(2/PI)
    399 
    400 	FMOVE.X		FP0,FP2
    401 	FMUL.X		FP_SCR1(a6),FP2
    402 *--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
    403 *--FLOATING POINT FORMAT, THE TWO FMOVE'S	FMOVE.L FP <--> N
    404 *--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
    405 *--(SIGN(INARG)*2**63	+	FP2) - SIGN(INARG)*2**63 WILL GIVE
    406 *--US THE DESIRED VALUE IN FLOATING POINT.
    407 
    408 *--HIDE SIX CYCLES OF INSTRUCTION
    409         MOVE.L		A1,D2
    410         SWAP		D2
    411 	ANDI.L		#$80000000,D2
    412 	ORI.L		#$5F000000,D2	...D2 IS SIGN(INARG)*2**63 IN SGL
    413 	MOVE.L		D2,TWOTO63(a6)
    414 
    415 	MOVE.L		D0,D2
    416 	ADDI.L		#$00003FFF,D2	...BIASED EXPO OF 2**L * (PI/2)
    417 
    418 *--FP2 IS READY
    419 	FADD.S		TWOTO63(a6),FP2	...THE FRACTIONAL PART OF FP1 IS ROUNDED
    420 
    421 *--HIDE 4 CYCLES OF INSTRUCTION; creating 2**(L)*Piby2_1  and  2**(L)*Piby2_2
    422         MOVE.W		D2,FP_SCR2(a6)
    423 	CLR.W           FP_SCR2+2(a6)
    424 	MOVE.L		#$C90FDAA2,FP_SCR2+4(a6)
    425 	CLR.L		FP_SCR2+8(a6)		...FP_SCR2 is  2**(L) * Piby2_1	
    426 
    427 *--FP2 IS READY
    428 	FSUB.S		TWOTO63(a6),FP2		...FP2 is N
    429 
    430 	ADDI.L		#$00003FDD,D0
    431         MOVE.W		D0,FP_SCR3(a6)
    432 	CLR.W           FP_SCR3+2(a6)
    433 	MOVE.L		#$85A308D3,FP_SCR3+4(a6)
    434 	CLR.L		FP_SCR3+8(a6)		...FP_SCR3 is 2**(L) * Piby2_2
    435 
    436 	MOVE.L		ENDFLAG(a6),D0
    437 
    438 *--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and
    439 *--P2 = 2**(L) * Piby2_2
    440 	FMOVE.X		FP2,FP4
    441 	FMul.X		FP_SCR2(a6),FP4		...W = N*P1
    442 	FMove.X		FP2,FP5
    443 	FMul.X		FP_SCR3(a6),FP5		...w = N*P2
    444 	FMove.X		FP4,FP3
    445 *--we want P+p = W+w  but  |p| <= half ulp of P
    446 *--Then, we need to compute  A := R-P   and  a := r-p
    447 	FAdd.X		FP5,FP3			...FP3 is P
    448 	FSub.X		FP3,FP4			...W-P
    449 
    450 	FSub.X		FP3,FP0			...FP0 is A := R - P
    451         FAdd.X		FP5,FP4			...FP4 is p = (W-P)+w
    452 
    453 	FMove.X		FP0,FP3			...FP3 A
    454 	FSub.X		FP4,FP1			...FP1 is a := r - p
    455 
    456 *--Now we need to normalize (A,a) to  "new (R,r)" where R+r = A+a but
    457 *--|r| <= half ulp of R.
    458 	FAdd.X		FP1,FP0			...FP0 is R := A+a
    459 *--No need to calculate r if this is the last loop
    460 	TST.L		D0
    461 	BGT.W		RESTORE
    462 
    463 *--Need to calculate r
    464 	FSub.X		FP0,FP3			...A-R
    465 	FAdd.X		FP3,FP1			...FP1 is r := (A-R)+a
    466 	BRA.W		LOOP
    467 
    468 RESTORE:
    469         FMOVE.L		FP2,N(a6)
    470 	MOVE.L		(A7)+,D2
    471 	FMOVEM.X	(A7)+,FP2-FP5
    472 
    473 	
    474 	MOVE.L		N(a6),D0
    475         ROR.L		#1,D0
    476 
    477 
    478 	BRA.W		TANCONT
    479 
    480 	end
    481