ssin.sa revision 1.1
1*	MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
2*	M68000 Hi-Performance Microprocessor Division
3*	M68040 Software Package 
4*
5*	M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
6*	All rights reserved.
7*
8*	THE SOFTWARE is provided on an "AS IS" basis and without warranty.
9*	To the maximum extent permitted by applicable law,
10*	MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
11*	INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
12*	PARTICULAR PURPOSE and any warranty against infringement with
13*	regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
14*	and any accompanying written materials. 
15*
16*	To the maximum extent permitted by applicable law,
17*	IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
18*	(INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
19*	PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
20*	OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
21*	SOFTWARE.  Motorola assumes no responsibility for the maintenance
22*	and support of the SOFTWARE.  
23*
24*	You are hereby granted a copyright license to use, modify, and
25*	distribute the SOFTWARE so long as this entire notice is retained
26*	without alteration in any modified and/or redistributed versions,
27*	and that such modified versions are clearly identified as such.
28*	No licenses are granted by implication, estoppel or otherwise
29*	under any patents or trademarks of Motorola, Inc.
30
31*
32*	ssin.sa 3.3 7/29/91
33*
34*	The entry point sSIN computes the sine of an input argument
35*	sCOS computes the cosine, and sSINCOS computes both. The
36*	corresponding entry points with a "d" computes the same
37*	corresponding function values for denormalized inputs.
38*
39*	Input: Double-extended number X in location pointed to
40*		by address register a0.
41*
42*	Output: The funtion value sin(X) or cos(X) returned in Fp0 if SIN or
43*		COS is requested. Otherwise, for SINCOS, sin(X) is returned
44*		in Fp0, and cos(X) is returned in Fp1.
45*
46*	Modifies: Fp0 for SIN or COS; both Fp0 and Fp1 for SINCOS.
47*
48*	Accuracy and Monotonicity: The returned result is within 1 ulp in
49*		64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
50*		result is subsequently rounded to double precision. The
51*		result is provably monotonic in double precision.
52*
53*	Speed: The programs sSIN and sCOS take approximately 150 cycles for
54*		input argument X such that |X| < 15Pi, which is the the usual
55*		situation. The speed for sSINCOS is approximately 190 cycles.
56*
57*	Algorithm:
58*
59*	SIN and COS:
60*	1. If SIN is invoked, set AdjN := 0; otherwise, set AdjN := 1.
61*
62*	2. If |X| >= 15Pi or |X| < 2**(-40), go to 7.
63*
64*	3. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let
65*		k = N mod 4, so in particular, k = 0,1,2,or 3. Overwirte
66*		k by k := k + AdjN.
67*
68*	4. If k is even, go to 6.
69*
70*	5. (k is odd) Set j := (k-1)/2, sgn := (-1)**j. Return sgn*cos(r)
71*		where cos(r) is approximated by an even polynomial in r,
72*		1 + r*r*(B1+s*(B2+ ... + s*B8)),	s = r*r.
73*		Exit.
74*
75*	6. (k is even) Set j := k/2, sgn := (-1)**j. Return sgn*sin(r)
76*		where sin(r) is approximated by an odd polynomial in r
77*		r + r*s*(A1+s*(A2+ ... + s*A7)),	s = r*r.
78*		Exit.
79*
80*	7. If |X| > 1, go to 9.
81*
82*	8. (|X|<2**(-40)) If SIN is invoked, return X; otherwise return 1.
83*
84*	9. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back to 3.
85*
86*	SINCOS:
87*	1. If |X| >= 15Pi or |X| < 2**(-40), go to 6.
88*
89*	2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let
90*		k = N mod 4, so in particular, k = 0,1,2,or 3.
91*
92*	3. If k is even, go to 5.
93*
94*	4. (k is odd) Set j1 := (k-1)/2, j2 := j1 (EOR) (k mod 2), i.e.
95*		j1 exclusive or with the l.s.b. of k.
96*		sgn1 := (-1)**j1, sgn2 := (-1)**j2.
97*		SIN(X) = sgn1 * cos(r) and COS(X) = sgn2*sin(r) where
98*		sin(r) and cos(r) are computed as odd and even polynomials
99*		in r, respectively. Exit
100*
101*	5. (k is even) Set j1 := k/2, sgn1 := (-1)**j1.
102*		SIN(X) = sgn1 * sin(r) and COS(X) = sgn1*cos(r) where
103*		sin(r) and cos(r) are computed as odd and even polynomials
104*		in r, respectively. Exit
105*
106*	6. If |X| > 1, go to 8.
107*
108*	7. (|X|<2**(-40)) SIN(X) = X and COS(X) = 1. Exit.
109*
110*	8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back to 2.
111*
112
113SSIN	IDNT	2,1 Motorola 040 Floating Point Software Package
114
115	section	8
116
117	include	fpsp.h
118
119BOUNDS1	DC.L $3FD78000,$4004BC7E
120TWOBYPI	DC.L $3FE45F30,$6DC9C883
121
122SINA7	DC.L $BD6AAA77,$CCC994F5
123SINA6	DC.L $3DE61209,$7AAE8DA1
124
125SINA5	DC.L $BE5AE645,$2A118AE4
126SINA4	DC.L $3EC71DE3,$A5341531
127
128SINA3	DC.L $BF2A01A0,$1A018B59,$00000000,$00000000
129
130SINA2	DC.L $3FF80000,$88888888,$888859AF,$00000000
131
132SINA1	DC.L $BFFC0000,$AAAAAAAA,$AAAAAA99,$00000000
133
134COSB8	DC.L $3D2AC4D0,$D6011EE3
135COSB7	DC.L $BDA9396F,$9F45AC19
136
137COSB6	DC.L $3E21EED9,$0612C972
138COSB5	DC.L $BE927E4F,$B79D9FCF
139
140COSB4	DC.L $3EFA01A0,$1A01D423,$00000000,$00000000
141
142COSB3	DC.L $BFF50000,$B60B60B6,$0B61D438,$00000000
143
144COSB2	DC.L $3FFA0000,$AAAAAAAA,$AAAAAB5E
145COSB1	DC.L $BF000000
146
147INVTWOPI DC.L $3FFC0000,$A2F9836E,$4E44152A
148
149TWOPI1	DC.L $40010000,$C90FDAA2,$00000000,$00000000
150TWOPI2	DC.L $3FDF0000,$85A308D4,$00000000,$00000000
151
152	xref	PITBL
153
154INARG	equ	FP_SCR4
155
156X	equ	FP_SCR5
157XDCARE	equ	X+2
158XFRAC	equ	X+4
159
160RPRIME	equ	FP_SCR1
161SPRIME	equ	FP_SCR2
162
163POSNEG1	equ	L_SCR1
164TWOTO63	equ	L_SCR1
165
166ENDFLAG	equ	L_SCR2
167N	equ	L_SCR2
168
169ADJN	equ	L_SCR3
170
171	xref	t_frcinx
172	xref	t_extdnrm
173	xref	sto_cos
174
175	xdef	ssind
176ssind:
177*--SIN(X) = X FOR DENORMALIZED X
178	bra		t_extdnrm
179
180	xdef	scosd
181scosd:
182*--COS(X) = 1 FOR DENORMALIZED X
183
184	FMOVE.S		#:3F800000,FP0
185*
186*	9D25B Fix: Sometimes the previous fmove.s sets fpsr bits
187*
188	fmove.l		#0,fpsr
189*
190	bra		t_frcinx
191
192	xdef	ssin
193ssin:
194*--SET ADJN TO 0
195	MOVE.L		#0,ADJN(a6)
196	BRA.B		SINBGN
197
198	xdef	scos
199scos:
200*--SET ADJN TO 1
201	MOVE.L		#1,ADJN(a6)
202
203SINBGN:
204*--SAVE FPCR, FP1. CHECK IF |X| IS TOO SMALL OR LARGE
205
206	FMOVE.X		(a0),FP0	...LOAD INPUT
207
208	MOVE.L		(A0),D0
209	MOVE.W		4(A0),D0
210	FMOVE.X		FP0,X(a6)
211	ANDI.L		#$7FFFFFFF,D0		...COMPACTIFY X
212
213	CMPI.L		#$3FD78000,D0		...|X| >= 2**(-40)?
214	BGE.B		SOK1
215	BRA.W		SINSM
216
217SOK1:
218	CMPI.L		#$4004BC7E,D0		...|X| < 15 PI?
219	BLT.B		SINMAIN
220	BRA.W		REDUCEX
221
222SINMAIN:
223*--THIS IS THE USUAL CASE, |X| <= 15 PI.
224*--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
225	FMOVE.X		FP0,FP1
226	FMUL.D		TWOBYPI,FP1	...X*2/PI
227
228*--HIDE THE NEXT THREE INSTRUCTIONS
229	LEA		PITBL+$200,A1 ...TABLE OF N*PI/2, N = -32,...,32
230	
231
232*--FP1 IS NOW READY
233	FMOVE.L		FP1,N(a6)		...CONVERT TO INTEGER
234
235	MOVE.L		N(a6),D0
236	ASL.L		#4,D0
237	ADDA.L		D0,A1	...A1 IS THE ADDRESS OF N*PIBY2
238*				...WHICH IS IN TWO PIECES Y1 & Y2
239
240	FSUB.X		(A1)+,FP0	...X-Y1
241*--HIDE THE NEXT ONE
242	FSUB.S		(A1),FP0	...FP0 IS R = (X-Y1)-Y2
243
244SINCONT:
245*--continuation from REDUCEX
246
247*--GET N+ADJN AND SEE IF SIN(R) OR COS(R) IS NEEDED
248	MOVE.L		N(a6),D0
249	ADD.L		ADJN(a6),D0	...SEE IF D0 IS ODD OR EVEN
250	ROR.L		#1,D0	...D0 WAS ODD IFF D0 IS NEGATIVE
251	CMPI.L		#0,D0
252	BLT.W		COSPOLY
253
254SINPOLY:
255*--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
256*--THEN WE RETURN	SGN*SIN(R). SGN*SIN(R) IS COMPUTED BY
257*--R' + R'*S*(A1 + S(A2 + S(A3 + S(A4 + ... + SA7)))), WHERE
258*--R' = SGN*R, S=R*R. THIS CAN BE REWRITTEN AS
259*--R' + R'*S*( [A1+T(A3+T(A5+TA7))] + [S(A2+T(A4+TA6))])
260*--WHERE T=S*S.
261*--NOTE THAT A3 THROUGH A7 ARE STORED IN DOUBLE PRECISION
262*--WHILE A1 AND A2 ARE IN DOUBLE-EXTENDED FORMAT.
263	FMOVE.X		FP0,X(a6)	...X IS R
264	FMUL.X		FP0,FP0	...FP0 IS S
265*---HIDE THE NEXT TWO WHILE WAITING FOR FP0
266	FMOVE.D		SINA7,FP3
267	FMOVE.D		SINA6,FP2
268*--FP0 IS NOW READY
269	FMOVE.X		FP0,FP1
270	FMUL.X		FP1,FP1	...FP1 IS T
271*--HIDE THE NEXT TWO WHILE WAITING FOR FP1
272
273	ROR.L		#1,D0
274	ANDI.L		#$80000000,D0
275*				...LEAST SIG. BIT OF D0 IN SIGN POSITION
276	EOR.L		D0,X(a6)	...X IS NOW R'= SGN*R
277
278	FMUL.X		FP1,FP3	...TA7
279	FMUL.X		FP1,FP2	...TA6
280
281	FADD.D		SINA5,FP3 ...A5+TA7
282	FADD.D		SINA4,FP2 ...A4+TA6
283
284	FMUL.X		FP1,FP3	...T(A5+TA7)
285	FMUL.X		FP1,FP2	...T(A4+TA6)
286
287	FADD.D		SINA3,FP3 ...A3+T(A5+TA7)
288	FADD.X		SINA2,FP2 ...A2+T(A4+TA6)
289
290	FMUL.X		FP3,FP1	...T(A3+T(A5+TA7))
291
292	FMUL.X		FP0,FP2	...S(A2+T(A4+TA6))
293	FADD.X		SINA1,FP1 ...A1+T(A3+T(A5+TA7))
294	FMUL.X		X(a6),FP0	...R'*S
295
296	FADD.X		FP2,FP1	...[A1+T(A3+T(A5+TA7))]+[S(A2+T(A4+TA6))]
297*--FP3 RELEASED, RESTORE NOW AND TAKE SOME ADVANTAGE OF HIDING
298*--FP2 RELEASED, RESTORE NOW AND TAKE FULL ADVANTAGE OF HIDING
299	
300
301	FMUL.X		FP1,FP0		...SIN(R')-R'
302*--FP1 RELEASED.
303
304	FMOVE.L		d1,FPCR		;restore users exceptions
305	FADD.X		X(a6),FP0		;last inst - possible exception set
306	bra		t_frcinx
307
308
309COSPOLY:
310*--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
311*--THEN WE RETURN	SGN*COS(R). SGN*COS(R) IS COMPUTED BY
312*--SGN + S'*(B1 + S(B2 + S(B3 + S(B4 + ... + SB8)))), WHERE
313*--S=R*R AND S'=SGN*S. THIS CAN BE REWRITTEN AS
314*--SGN + S'*([B1+T(B3+T(B5+TB7))] + [S(B2+T(B4+T(B6+TB8)))])
315*--WHERE T=S*S.
316*--NOTE THAT B4 THROUGH B8 ARE STORED IN DOUBLE PRECISION
317*--WHILE B2 AND B3 ARE IN DOUBLE-EXTENDED FORMAT, B1 IS -1/2
318*--AND IS THEREFORE STORED AS SINGLE PRECISION.
319
320	FMUL.X		FP0,FP0	...FP0 IS S
321*---HIDE THE NEXT TWO WHILE WAITING FOR FP0
322	FMOVE.D		COSB8,FP2
323	FMOVE.D		COSB7,FP3
324*--FP0 IS NOW READY
325	FMOVE.X		FP0,FP1
326	FMUL.X		FP1,FP1	...FP1 IS T
327*--HIDE THE NEXT TWO WHILE WAITING FOR FP1
328	FMOVE.X		FP0,X(a6)	...X IS S
329	ROR.L		#1,D0
330	ANDI.L		#$80000000,D0
331*			...LEAST SIG. BIT OF D0 IN SIGN POSITION
332
333	FMUL.X		FP1,FP2	...TB8
334*--HIDE THE NEXT TWO WHILE WAITING FOR THE XU
335	EOR.L		D0,X(a6)	...X IS NOW S'= SGN*S
336	ANDI.L		#$80000000,D0
337
338	FMUL.X		FP1,FP3	...TB7
339*--HIDE THE NEXT TWO WHILE WAITING FOR THE XU
340	ORI.L		#$3F800000,D0	...D0 IS SGN IN SINGLE
341	MOVE.L		D0,POSNEG1(a6)
342
343	FADD.D		COSB6,FP2 ...B6+TB8
344	FADD.D		COSB5,FP3 ...B5+TB7
345
346	FMUL.X		FP1,FP2	...T(B6+TB8)
347	FMUL.X		FP1,FP3	...T(B5+TB7)
348
349	FADD.D		COSB4,FP2 ...B4+T(B6+TB8)
350	FADD.X		COSB3,FP3 ...B3+T(B5+TB7)
351
352	FMUL.X		FP1,FP2	...T(B4+T(B6+TB8))
353	FMUL.X		FP3,FP1	...T(B3+T(B5+TB7))
354
355	FADD.X		COSB2,FP2 ...B2+T(B4+T(B6+TB8))
356	FADD.S		COSB1,FP1 ...B1+T(B3+T(B5+TB7))
357
358	FMUL.X		FP2,FP0	...S(B2+T(B4+T(B6+TB8)))
359*--FP3 RELEASED, RESTORE NOW AND TAKE SOME ADVANTAGE OF HIDING
360*--FP2 RELEASED.
361	
362
363	FADD.X		FP1,FP0
364*--FP1 RELEASED
365
366	FMUL.X		X(a6),FP0
367
368	FMOVE.L		d1,FPCR		;restore users exceptions
369	FADD.S		POSNEG1(a6),FP0	;last inst - possible exception set
370	bra		t_frcinx
371
372
373SINBORS:
374*--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
375*--IF |X| < 2**(-40), RETURN X OR 1.
376	CMPI.L		#$3FFF8000,D0
377	BGT.B		REDUCEX
378        
379
380SINSM:
381	MOVE.L		ADJN(a6),D0
382	CMPI.L		#0,D0
383	BGT.B		COSTINY
384
385SINTINY:
386	MOVE.W		#$0000,XDCARE(a6)	...JUST IN CASE
387	FMOVE.L		d1,FPCR		;restore users exceptions
388	FMOVE.X		X(a6),FP0		;last inst - possible exception set
389	bra		t_frcinx
390
391
392COSTINY:
393	FMOVE.S		#:3F800000,FP0
394
395	FMOVE.L		d1,FPCR		;restore users exceptions
396	FSUB.S		#:00800000,FP0	;last inst - possible exception set
397	bra		t_frcinx
398
399
400REDUCEX:
401*--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
402*--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
403*--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.
404
405	FMOVEM.X	FP2-FP5,-(A7)	...save FP2 through FP5
406	MOVE.L		D2,-(A7)
407        FMOVE.S         #:00000000,FP1
408*--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that
409*--there is a danger of unwanted overflow in first LOOP iteration.  In this
410*--case, reduce argument by one remainder step to make subsequent reduction
411*--safe.
412	cmpi.l	#$7ffeffff,d0		;is argument dangerously large?
413	bne.b	LOOP
414	move.l	#$7ffe0000,FP_SCR2(a6)	;yes
415*					;create 2**16383*PI/2
416	move.l	#$c90fdaa2,FP_SCR2+4(a6)
417	clr.l	FP_SCR2+8(a6)
418	ftst.x	fp0			;test sign of argument
419	move.l	#$7fdc0000,FP_SCR3(a6)	;create low half of 2**16383*
420*					;PI/2 at FP_SCR3
421	move.l	#$85a308d3,FP_SCR3+4(a6)
422	clr.l   FP_SCR3+8(a6)
423	fblt.w	red_neg
424	or.w	#$8000,FP_SCR2(a6)	;positive arg
425	or.w	#$8000,FP_SCR3(a6)
426red_neg:
427	fadd.x  FP_SCR2(a6),fp0		;high part of reduction is exact
428	fmove.x  fp0,fp1		;save high result in fp1
429	fadd.x  FP_SCR3(a6),fp0		;low part of reduction
430	fsub.x  fp0,fp1			;determine low component of result
431	fadd.x  FP_SCR3(a6),fp1		;fp0/fp1 are reduced argument.
432
433*--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.
434*--integer quotient will be stored in N
435*--Intermeditate remainder is 66-bit long; (R,r) in (FP0,FP1)
436
437LOOP:
438	FMOVE.X		FP0,INARG(a6)	...+-2**K * F, 1 <= F < 2
439	MOVE.W		INARG(a6),D0
440        MOVE.L          D0,A1		...save a copy of D0
441	ANDI.L		#$00007FFF,D0
442	SUBI.L		#$00003FFF,D0	...D0 IS K
443	CMPI.L		#28,D0
444	BLE.B		LASTLOOP
445CONTLOOP:
446	SUBI.L		#27,D0	 ...D0 IS L := K-27
447	MOVE.L		#0,ENDFLAG(a6)
448	BRA.B		WORK
449LASTLOOP:
450	CLR.L		D0		...D0 IS L := 0
451	MOVE.L		#1,ENDFLAG(a6)
452
453WORK:
454*--FIND THE REMAINDER OF (R,r) W.R.T.	2**L * (PI/2). L IS SO CHOSEN
455*--THAT	INT( X * (2/PI) / 2**(L) ) < 2**29.
456
457*--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),
458*--2**L * (PIby2_1), 2**L * (PIby2_2)
459
460	MOVE.L		#$00003FFE,D2	...BIASED EXPO OF 2/PI
461	SUB.L		D0,D2		...BIASED EXPO OF 2**(-L)*(2/PI)
462
463	MOVE.L		#$A2F9836E,FP_SCR1+4(a6)
464	MOVE.L		#$4E44152A,FP_SCR1+8(a6)
465	MOVE.W		D2,FP_SCR1(a6)	...FP_SCR1 is 2**(-L)*(2/PI)
466
467	FMOVE.X		FP0,FP2
468	FMUL.X		FP_SCR1(a6),FP2
469*--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
470*--FLOATING POINT FORMAT, THE TWO FMOVE'S	FMOVE.L FP <--> N
471*--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
472*--(SIGN(INARG)*2**63	+	FP2) - SIGN(INARG)*2**63 WILL GIVE
473*--US THE DESIRED VALUE IN FLOATING POINT.
474
475*--HIDE SIX CYCLES OF INSTRUCTION
476        MOVE.L		A1,D2
477        SWAP		D2
478	ANDI.L		#$80000000,D2
479	ORI.L		#$5F000000,D2	...D2 IS SIGN(INARG)*2**63 IN SGL
480	MOVE.L		D2,TWOTO63(a6)
481
482	MOVE.L		D0,D2
483	ADDI.L		#$00003FFF,D2	...BIASED EXPO OF 2**L * (PI/2)
484
485*--FP2 IS READY
486	FADD.S		TWOTO63(a6),FP2	...THE FRACTIONAL PART OF FP1 IS ROUNDED
487
488*--HIDE 4 CYCLES OF INSTRUCTION; creating 2**(L)*Piby2_1  and  2**(L)*Piby2_2
489        MOVE.W		D2,FP_SCR2(a6)
490	CLR.W           FP_SCR2+2(a6)
491	MOVE.L		#$C90FDAA2,FP_SCR2+4(a6)
492	CLR.L		FP_SCR2+8(a6)		...FP_SCR2 is  2**(L) * Piby2_1	
493
494*--FP2 IS READY
495	FSUB.S		TWOTO63(a6),FP2		...FP2 is N
496
497	ADDI.L		#$00003FDD,D0
498        MOVE.W		D0,FP_SCR3(a6)
499	CLR.W           FP_SCR3+2(a6)
500	MOVE.L		#$85A308D3,FP_SCR3+4(a6)
501	CLR.L		FP_SCR3+8(a6)		...FP_SCR3 is 2**(L) * Piby2_2
502
503	MOVE.L		ENDFLAG(a6),D0
504
505*--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and
506*--P2 = 2**(L) * Piby2_2
507	FMOVE.X		FP2,FP4
508	FMul.X		FP_SCR2(a6),FP4		...W = N*P1
509	FMove.X		FP2,FP5
510	FMul.X		FP_SCR3(a6),FP5		...w = N*P2
511	FMove.X		FP4,FP3
512*--we want P+p = W+w  but  |p| <= half ulp of P
513*--Then, we need to compute  A := R-P   and  a := r-p
514	FAdd.X		FP5,FP3			...FP3 is P
515	FSub.X		FP3,FP4			...W-P
516
517	FSub.X		FP3,FP0			...FP0 is A := R - P
518        FAdd.X		FP5,FP4			...FP4 is p = (W-P)+w
519
520	FMove.X		FP0,FP3			...FP3 A
521	FSub.X		FP4,FP1			...FP1 is a := r - p
522
523*--Now we need to normalize (A,a) to  "new (R,r)" where R+r = A+a but
524*--|r| <= half ulp of R.
525	FAdd.X		FP1,FP0			...FP0 is R := A+a
526*--No need to calculate r if this is the last loop
527	CMPI.L		#0,D0
528	BGT.W		RESTORE
529
530*--Need to calculate r
531	FSub.X		FP0,FP3			...A-R
532	FAdd.X		FP3,FP1			...FP1 is r := (A-R)+a
533	BRA.W		LOOP
534
535RESTORE:
536        FMOVE.L		FP2,N(a6)
537	MOVE.L		(A7)+,D2
538	FMOVEM.X	(A7)+,FP2-FP5
539
540	
541	MOVE.L		ADJN(a6),D0
542	CMPI.L		#4,D0
543
544	BLT.W		SINCONT
545	BRA.B		SCCONT
546
547	xdef	ssincosd
548ssincosd:
549*--SIN AND COS OF X FOR DENORMALIZED X
550
551	FMOVE.S		#:3F800000,FP1
552	bsr		sto_cos		;store cosine result
553	bra		t_extdnrm
554
555	xdef	ssincos
556ssincos:
557*--SET ADJN TO 4
558	MOVE.L		#4,ADJN(a6)
559
560	FMOVE.X		(a0),FP0	...LOAD INPUT
561
562	MOVE.L		(A0),D0
563	MOVE.W		4(A0),D0
564	FMOVE.X		FP0,X(a6)
565	ANDI.L		#$7FFFFFFF,D0		...COMPACTIFY X
566
567	CMPI.L		#$3FD78000,D0		...|X| >= 2**(-40)?
568	BGE.B		SCOK1
569	BRA.W		SCSM
570
571SCOK1:
572	CMPI.L		#$4004BC7E,D0		...|X| < 15 PI?
573	BLT.B		SCMAIN
574	BRA.W		REDUCEX
575
576
577SCMAIN:
578*--THIS IS THE USUAL CASE, |X| <= 15 PI.
579*--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
580	FMOVE.X		FP0,FP1
581	FMUL.D		TWOBYPI,FP1	...X*2/PI
582
583*--HIDE THE NEXT THREE INSTRUCTIONS
584	LEA		PITBL+$200,A1 ...TABLE OF N*PI/2, N = -32,...,32
585	
586
587*--FP1 IS NOW READY
588	FMOVE.L		FP1,N(a6)		...CONVERT TO INTEGER
589
590	MOVE.L		N(a6),D0
591	ASL.L		#4,D0
592	ADDA.L		D0,A1		...ADDRESS OF N*PIBY2, IN Y1, Y2
593
594	FSUB.X		(A1)+,FP0	...X-Y1
595        FSUB.S		(A1),FP0	...FP0 IS R = (X-Y1)-Y2
596
597SCCONT:
598*--continuation point from REDUCEX
599
600*--HIDE THE NEXT TWO
601	MOVE.L		N(a6),D0
602	ROR.L		#1,D0
603	
604	CMPI.L		#0,D0		...D0 < 0 IFF N IS ODD
605	BGE.W		NEVEN
606
607NODD:
608*--REGISTERS SAVED SO FAR: D0, A0, FP2.
609
610	FMOVE.X		FP0,RPRIME(a6)
611	FMUL.X		FP0,FP0	 ...FP0 IS S = R*R
612	FMOVE.D		SINA7,FP1	...A7
613	FMOVE.D		COSB8,FP2	...B8
614	FMUL.X		FP0,FP1	 ...SA7
615	MOVE.L		d2,-(A7)
616	MOVE.L		D0,d2
617	FMUL.X		FP0,FP2	 ...SB8
618	ROR.L		#1,d2
619	ANDI.L		#$80000000,d2
620
621	FADD.D		SINA6,FP1	...A6+SA7
622	EOR.L		D0,d2
623	ANDI.L		#$80000000,d2
624	FADD.D		COSB7,FP2	...B7+SB8
625
626	FMUL.X		FP0,FP1	 ...S(A6+SA7)
627	EOR.L		d2,RPRIME(a6)
628	MOVE.L		(A7)+,d2
629	FMUL.X		FP0,FP2	 ...S(B7+SB8)
630	ROR.L		#1,D0
631	ANDI.L		#$80000000,D0
632
633	FADD.D		SINA5,FP1	...A5+S(A6+SA7)
634	MOVE.L		#$3F800000,POSNEG1(a6)
635	EOR.L		D0,POSNEG1(a6)
636	FADD.D		COSB6,FP2	...B6+S(B7+SB8)
637
638	FMUL.X		FP0,FP1	 ...S(A5+S(A6+SA7))
639	FMUL.X		FP0,FP2	 ...S(B6+S(B7+SB8))
640	FMOVE.X		FP0,SPRIME(a6)
641
642	FADD.D		SINA4,FP1	...A4+S(A5+S(A6+SA7))
643	EOR.L		D0,SPRIME(a6)
644	FADD.D		COSB5,FP2	...B5+S(B6+S(B7+SB8))
645
646	FMUL.X		FP0,FP1	 ...S(A4+...)
647	FMUL.X		FP0,FP2	 ...S(B5+...)
648
649	FADD.D		SINA3,FP1	...A3+S(A4+...)
650	FADD.D		COSB4,FP2	...B4+S(B5+...)
651
652	FMUL.X		FP0,FP1	 ...S(A3+...)
653	FMUL.X		FP0,FP2	 ...S(B4+...)
654
655	FADD.X		SINA2,FP1	...A2+S(A3+...)
656	FADD.X		COSB3,FP2	...B3+S(B4+...)
657
658	FMUL.X		FP0,FP1	 ...S(A2+...)
659	FMUL.X		FP0,FP2	 ...S(B3+...)
660
661	FADD.X		SINA1,FP1	...A1+S(A2+...)
662	FADD.X		COSB2,FP2	...B2+S(B3+...)
663
664	FMUL.X		FP0,FP1	 ...S(A1+...)
665	FMUL.X		FP2,FP0	 ...S(B2+...)
666
667	
668
669	FMUL.X		RPRIME(a6),FP1	...R'S(A1+...)
670	FADD.S		COSB1,FP0	...B1+S(B2...)
671	FMUL.X		SPRIME(a6),FP0	...S'(B1+S(B2+...))
672
673	move.l		d1,-(sp)	;restore users mode & precision
674	andi.l		#$ff,d1		;mask off all exceptions
675	fmove.l		d1,FPCR
676	FADD.X		RPRIME(a6),FP1	...COS(X)
677	bsr		sto_cos		;store cosine result
678	FMOVE.L		(sp)+,FPCR	;restore users exceptions
679	FADD.S		POSNEG1(a6),FP0	...SIN(X)
680
681	bra		t_frcinx
682
683
684NEVEN:
685*--REGISTERS SAVED SO FAR: FP2.
686
687	FMOVE.X		FP0,RPRIME(a6)
688	FMUL.X		FP0,FP0	 ...FP0 IS S = R*R
689	FMOVE.D		COSB8,FP1			...B8
690	FMOVE.D		SINA7,FP2			...A7
691	FMUL.X		FP0,FP1	 ...SB8
692	FMOVE.X		FP0,SPRIME(a6)
693	FMUL.X		FP0,FP2	 ...SA7
694	ROR.L		#1,D0
695	ANDI.L		#$80000000,D0
696	FADD.D		COSB7,FP1	...B7+SB8
697	FADD.D		SINA6,FP2	...A6+SA7
698	EOR.L		D0,RPRIME(a6)
699	EOR.L		D0,SPRIME(a6)
700	FMUL.X		FP0,FP1	 ...S(B7+SB8)
701	ORI.L		#$3F800000,D0
702	MOVE.L		D0,POSNEG1(a6)
703	FMUL.X		FP0,FP2	 ...S(A6+SA7)
704
705	FADD.D		COSB6,FP1	...B6+S(B7+SB8)
706	FADD.D		SINA5,FP2	...A5+S(A6+SA7)
707
708	FMUL.X		FP0,FP1	 ...S(B6+S(B7+SB8))
709	FMUL.X		FP0,FP2	 ...S(A5+S(A6+SA7))
710
711	FADD.D		COSB5,FP1	...B5+S(B6+S(B7+SB8))
712	FADD.D		SINA4,FP2	...A4+S(A5+S(A6+SA7))
713
714	FMUL.X		FP0,FP1	 ...S(B5+...)
715	FMUL.X		FP0,FP2	 ...S(A4+...)
716
717	FADD.D		COSB4,FP1	...B4+S(B5+...)
718	FADD.D		SINA3,FP2	...A3+S(A4+...)
719
720	FMUL.X		FP0,FP1	 ...S(B4+...)
721	FMUL.X		FP0,FP2	 ...S(A3+...)
722
723	FADD.X		COSB3,FP1	...B3+S(B4+...)
724	FADD.X		SINA2,FP2	...A2+S(A3+...)
725
726	FMUL.X		FP0,FP1	 ...S(B3+...)
727	FMUL.X		FP0,FP2	 ...S(A2+...)
728
729	FADD.X		COSB2,FP1	...B2+S(B3+...)
730	FADD.X		SINA1,FP2	...A1+S(A2+...)
731
732	FMUL.X		FP0,FP1	 ...S(B2+...)
733	fmul.x		fp2,fp0	 ...s(a1+...)
734
735	
736
737	FADD.S		COSB1,FP1	...B1+S(B2...)
738	FMUL.X		RPRIME(a6),FP0	...R'S(A1+...)
739	FMUL.X		SPRIME(a6),FP1	...S'(B1+S(B2+...))
740
741	move.l		d1,-(sp)	;save users mode & precision
742	andi.l		#$ff,d1		;mask off all exceptions
743	fmove.l		d1,FPCR
744	FADD.S		POSNEG1(a6),FP1	...COS(X)
745	bsr		sto_cos		;store cosine result
746	FMOVE.L		(sp)+,FPCR	;restore users exceptions
747	FADD.X		RPRIME(a6),FP0	...SIN(X)
748
749	bra		t_frcinx
750
751SCBORS:
752	CMPI.L		#$3FFF8000,D0
753	BGT.W		REDUCEX
754        
755
756SCSM:
757	MOVE.W		#$0000,XDCARE(a6)
758	FMOVE.S		#:3F800000,FP1
759
760	move.l		d1,-(sp)	;save users mode & precision
761	andi.l		#$ff,d1		;mask off all exceptions
762	fmove.l		d1,FPCR
763	FSUB.S		#:00800000,FP1
764	bsr		sto_cos		;store cosine result
765	FMOVE.L		(sp)+,FPCR	;restore users exceptions
766	FMOVE.X		X(a6),FP0
767	bra		t_frcinx
768
769	end
770