srem_mod.sa revision 1.1
1*	MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
2*	M68000 Hi-Performance Microprocessor Division
3*	M68040 Software Package 
4*
5*	M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
6*	All rights reserved.
7*
8*	THE SOFTWARE is provided on an "AS IS" basis and without warranty.
9*	To the maximum extent permitted by applicable law,
10*	MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
11*	INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
12*	PARTICULAR PURPOSE and any warranty against infringement with
13*	regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
14*	and any accompanying written materials. 
15*
16*	To the maximum extent permitted by applicable law,
17*	IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
18*	(INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
19*	PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
20*	OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
21*	SOFTWARE.  Motorola assumes no responsibility for the maintenance
22*	and support of the SOFTWARE.  
23*
24*	You are hereby granted a copyright license to use, modify, and
25*	distribute the SOFTWARE so long as this entire notice is retained
26*	without alteration in any modified and/or redistributed versions,
27*	and that such modified versions are clearly identified as such.
28*	No licenses are granted by implication, estoppel or otherwise
29*	under any patents or trademarks of Motorola, Inc.
30
31*
32*	srem_mod.sa 3.1 12/10/90
33*
34*      The entry point sMOD computes the floating point MOD of the
35*      input values X and Y. The entry point sREM computes the floating
36*      point (IEEE) REM of the input values X and Y.
37*
38*      INPUT
39*      -----
40*      Double-extended value Y is pointed to by address in register
41*      A0. Double-extended value X is located in -12(A0). The values
42*      of X and Y are both nonzero and finite; although either or both
43*      of them can be denormalized. The special cases of zeros, NaNs,
44*      and infinities are handled elsewhere.
45*
46*      OUTPUT
47*      ------
48*      FREM(X,Y) or FMOD(X,Y), depending on entry point.
49*
50*       ALGORITHM
51*       ---------
52*
53*       Step 1.  Save and strip signs of X and Y: signX := sign(X),
54*                signY := sign(Y), X := |X|, Y := |Y|, 
55*                signQ := signX EOR signY. Record whether MOD or REM
56*                is requested.
57*
58*       Step 2.  Set L := expo(X)-expo(Y), k := 0, Q := 0.
59*                If (L < 0) then
60*                   R := X, go to Step 4.
61*                else
62*                   R := 2^(-L)X, j := L.
63*                endif
64*
65*       Step 3.  Perform MOD(X,Y)
66*            3.1 If R = Y, go to Step 9.
67*            3.2 If R > Y, then { R := R - Y, Q := Q + 1}
68*            3.3 If j = 0, go to Step 4.
69*            3.4 k := k + 1, j := j - 1, Q := 2Q, R := 2R. Go to
70*                Step 3.1.
71*
72*       Step 4.  At this point, R = X - QY = MOD(X,Y). Set
73*                Last_Subtract := false (used in Step 7 below). If
74*                MOD is requested, go to Step 6. 
75*
76*       Step 5.  R = MOD(X,Y), but REM(X,Y) is requested.
77*            5.1 If R < Y/2, then R = MOD(X,Y) = REM(X,Y). Go to
78*                Step 6.
79*            5.2 If R > Y/2, then { set Last_Subtract := true,
80*                Q := Q + 1, Y := signY*Y }. Go to Step 6.
81*            5.3 This is the tricky case of R = Y/2. If Q is odd,
82*                then { Q := Q + 1, signX := -signX }.
83*
84*       Step 6.  R := signX*R.
85*
86*       Step 7.  If Last_Subtract = true, R := R - Y.
87*
88*       Step 8.  Return signQ, last 7 bits of Q, and R as required.
89*
90*       Step 9.  At this point, R = 2^(-j)*X - Q Y = Y. Thus,
91*                X = 2^(j)*(Q+1)Y. set Q := 2^(j)*(Q+1),
92*                R := 0. Return signQ, last 7 bits of Q, and R.
93*                
94
95SREM_MOD    IDNT    2,1 Motorola 040 Floating Point Software Package
96
97	section    8
98
99	include	fpsp.h
100
101Mod_Flag  equ	L_SCR3
102SignY     equ	FP_SCR3+4
103SignX     equ	FP_SCR3+8
104SignQ     equ	FP_SCR3+12
105Sc_Flag   equ	FP_SCR4
106
107Y         equ	FP_SCR1
108Y_Hi      equ	Y+4
109Y_Lo      equ	Y+8
110
111R         equ	FP_SCR2
112R_Hi      equ	R+4
113R_Lo      equ	R+8
114
115
116Scale     DC.L	$00010000,$80000000,$00000000,$00000000
117
118	xref	t_avoid_unsupp
119
120        xdef        smod
121smod:
122
123   Move.L               #0,Mod_Flag(a6)
124   BRA.B                Mod_Rem
125
126        xdef        srem
127srem:
128
129   Move.L               #1,Mod_Flag(a6)
130
131Mod_Rem:
132*..Save sign of X and Y
133   MoveM.L              D2-D7,-(A7)     ...save data registers
134   Move.W               (A0),D3
135   Move.W               D3,SignY(a6)
136   AndI.L               #$00007FFF,D3   ...Y := |Y|
137
138*
139   Move.L               4(A0),D4
140   Move.L               8(A0),D5        ...(D3,D4,D5) is |Y|
141
142   Tst.L                D3
143   BNE.B                Y_Normal
144
145   Move.L               #$00003FFE,D3	...$3FFD + 1
146   Tst.L                D4
147   BNE.B                HiY_not0
148
149HiY_0:
150   Move.L               D5,D4
151   CLR.L                D5
152   SubI.L               #32,D3
153   CLR.L                D6
154   BFFFO                D4{0:32},D6
155   LSL.L                D6,D4
156   Sub.L                D6,D3           ...(D3,D4,D5) is normalized
157*                                       ...with bias $7FFD
158   BRA.B                Chk_X
159
160HiY_not0:
161   CLR.L                D6
162   BFFFO                D4{0:32},D6
163   Sub.L                D6,D3
164   LSL.L                D6,D4
165   Move.L               D5,D7           ...a copy of D5
166   LSL.L                D6,D5
167   Neg.L                D6
168   AddI.L               #32,D6
169   LSR.L                D6,D7
170   Or.L                 D7,D4           ...(D3,D4,D5) normalized
171*                                       ...with bias $7FFD
172   BRA.B                Chk_X
173
174Y_Normal:
175   AddI.L               #$00003FFE,D3   ...(D3,D4,D5) normalized
176*                                       ...with bias $7FFD
177
178Chk_X:
179   Move.W               -12(A0),D0
180   Move.W               D0,SignX(a6)
181   Move.W               SignY(a6),D1
182   EOr.L                D0,D1
183   AndI.L               #$00008000,D1
184   Move.W               D1,SignQ(a6)	...sign(Q) obtained
185   AndI.L               #$00007FFF,D0
186   Move.L               -8(A0),D1
187   Move.L               -4(A0),D2       ...(D0,D1,D2) is |X|
188   Tst.L                D0
189   BNE.B                X_Normal
190   Move.L               #$00003FFE,D0
191   Tst.L                D1
192   BNE.B                HiX_not0
193
194HiX_0:
195   Move.L               D2,D1
196   CLR.L                D2
197   SubI.L               #32,D0
198   CLR.L                D6
199   BFFFO                D1{0:32},D6
200   LSL.L                D6,D1
201   Sub.L                D6,D0           ...(D0,D1,D2) is normalized
202*                                       ...with bias $7FFD
203   BRA.B                Init
204
205HiX_not0:
206   CLR.L                D6
207   BFFFO                D1{0:32},D6
208   Sub.L                D6,D0
209   LSL.L                D6,D1
210   Move.L               D2,D7           ...a copy of D2
211   LSL.L                D6,D2
212   Neg.L                D6
213   AddI.L               #32,D6
214   LSR.L                D6,D7
215   Or.L                 D7,D1           ...(D0,D1,D2) normalized
216*                                       ...with bias $7FFD
217   BRA.B                Init
218
219X_Normal:
220   AddI.L               #$00003FFE,D0   ...(D0,D1,D2) normalized
221*                                       ...with bias $7FFD
222
223Init:
224*
225   Move.L               D3,L_SCR1(a6)   ...save biased expo(Y)
226   move.l		d0,L_SCR2(a6)	;save d0
227   Sub.L                D3,D0           ...L := expo(X)-expo(Y)
228*   Move.L               D0,L            ...D0 is j
229   CLR.L                D6              ...D6 := carry <- 0
230   CLR.L                D3              ...D3 is Q
231   MoveA.L              #0,A1           ...A1 is k; j+k=L, Q=0
232
233*..(Carry,D1,D2) is R
234   Tst.L                D0
235   BGE.B                Mod_Loop
236
237*..expo(X) < expo(Y). Thus X = mod(X,Y)
238*
239   move.l		L_SCR2(a6),d0	;restore d0
240   BRA.W                Get_Mod
241
242*..At this point  R = 2^(-L)X; Q = 0; k = 0; and  k+j = L
243
244
245Mod_Loop:
246   Tst.L                D6              ...test carry bit
247   BGT.B                R_GT_Y
248
249*..At this point carry = 0, R = (D1,D2), Y = (D4,D5)
250   Cmp.L                D4,D1           ...compare hi(R) and hi(Y)
251   BNE.B                R_NE_Y
252   Cmp.L                D5,D2           ...compare lo(R) and lo(Y)
253   BNE.B                R_NE_Y
254
255*..At this point, R = Y
256   BRA.W                Rem_is_0
257
258R_NE_Y:
259*..use the borrow of the previous compare
260   BCS.B                R_LT_Y          ...borrow is set iff R < Y
261
262R_GT_Y:
263*..If Carry is set, then Y < (Carry,D1,D2) < 2Y. Otherwise, Carry = 0
264*..and Y < (D1,D2) < 2Y. Either way, perform R - Y
265   Sub.L                D5,D2           ...lo(R) - lo(Y)
266   SubX.L               D4,D1           ...hi(R) - hi(Y)
267   CLR.L                D6              ...clear carry
268   AddQ.L               #1,D3           ...Q := Q + 1
269
270R_LT_Y:
271*..At this point, Carry=0, R < Y. R = 2^(k-L)X - QY; k+j = L; j >= 0.
272   Tst.L                D0              ...see if j = 0.
273   BEQ.B                PostLoop
274
275   Add.L                D3,D3           ...Q := 2Q
276   Add.L                D2,D2           ...lo(R) = 2lo(R)
277   RoXL.L               #1,D1           ...hi(R) = 2hi(R) + carry
278   SCS                  D6              ...set Carry if 2(R) overflows
279   AddQ.L               #1,A1           ...k := k+1
280   SubQ.L               #1,D0           ...j := j - 1
281*..At this point, R=(Carry,D1,D2) = 2^(k-L)X - QY, j+k=L, j >= 0, R < 2Y.
282
283   BRA.B                Mod_Loop
284
285PostLoop:
286*..k = L, j = 0, Carry = 0, R = (D1,D2) = X - QY, R < Y.
287
288*..normalize R.
289   Move.L               L_SCR1(a6),D0           ...new biased expo of R
290   Tst.L                D1
291   BNE.B                HiR_not0
292
293HiR_0:
294   Move.L               D2,D1
295   CLR.L                D2
296   SubI.L               #32,D0
297   CLR.L                D6
298   BFFFO                D1{0:32},D6
299   LSL.L                D6,D1
300   Sub.L                D6,D0           ...(D0,D1,D2) is normalized
301*                                       ...with bias $7FFD
302   BRA.B                Get_Mod
303
304HiR_not0:
305   CLR.L                D6
306   BFFFO                D1{0:32},D6
307   BMI.B                Get_Mod         ...already normalized
308   Sub.L                D6,D0
309   LSL.L                D6,D1
310   Move.L               D2,D7           ...a copy of D2
311   LSL.L                D6,D2
312   Neg.L                D6
313   AddI.L               #32,D6
314   LSR.L                D6,D7
315   Or.L                 D7,D1           ...(D0,D1,D2) normalized
316
317*
318Get_Mod:
319   CmpI.L		#$000041FE,D0
320   BGE.B		No_Scale
321Do_Scale:
322   Move.W		D0,R(a6)
323   clr.w		R+2(a6)
324   Move.L		D1,R_Hi(a6)
325   Move.L		D2,R_Lo(a6)
326   Move.L		L_SCR1(a6),D6
327   Move.W		D6,Y(a6)
328   clr.w		Y+2(a6)
329   Move.L		D4,Y_Hi(a6)
330   Move.L		D5,Y_Lo(a6)
331   FMove.X		R(a6),fp0		...no exception
332   Move.L		#1,Sc_Flag(a6)
333   BRA.B		ModOrRem
334No_Scale:
335   Move.L		D1,R_Hi(a6)
336   Move.L		D2,R_Lo(a6)
337   SubI.L		#$3FFE,D0
338   Move.W		D0,R(a6)
339   clr.w		R+2(a6)
340   Move.L		L_SCR1(a6),D6
341   SubI.L		#$3FFE,D6
342   Move.L		D6,L_SCR1(a6)
343   FMove.X		R(a6),fp0
344   Move.W		D6,Y(a6)
345   Move.L		D4,Y_Hi(a6)
346   Move.L		D5,Y_Lo(a6)
347   Move.L		#0,Sc_Flag(a6)
348
349*
350
351
352ModOrRem:
353   Move.L               Mod_Flag(a6),D6
354   BEQ.B                Fix_Sign
355
356   Move.L               L_SCR1(a6),D6           ...new biased expo(Y)
357   SubQ.L               #1,D6           ...biased expo(Y/2)
358   Cmp.L                D6,D0
359   BLT.B                Fix_Sign
360   BGT.B                Last_Sub
361
362   Cmp.L                D4,D1
363   BNE.B                Not_EQ
364   Cmp.L                D5,D2
365   BNE.B                Not_EQ
366   BRA.W                Tie_Case
367
368Not_EQ:
369   BCS.B                Fix_Sign
370
371Last_Sub:
372*
373   FSub.X		Y(a6),fp0		...no exceptions
374   AddQ.L               #1,D3           ...Q := Q + 1
375
376*
377
378Fix_Sign:
379*..Get sign of X
380   Move.W               SignX(a6),D6
381   BGE.B		Get_Q
382   FNeg.X		fp0
383
384*..Get Q
385*
386Get_Q:
387   clr.l		d6		
388   Move.W               SignQ(a6),D6        ...D6 is sign(Q)
389   Move.L               #8,D7
390   LSR.L                D7,D6           
391   AndI.L               #$0000007F,D3   ...7 bits of Q
392   Or.L                 D6,D3           ...sign and bits of Q
393   Swap                 D3
394   FMove.L              fpsr,D6
395   AndI.L               #$FF00FFFF,D6
396   Or.L                 D3,D6
397   FMove.L              D6,fpsr         ...put Q in fpsr
398
399*
400Restore:
401   MoveM.L              (A7)+,D2-D7
402   FMove.L              USER_FPCR(a6),fpcr
403   Move.L               Sc_Flag(a6),D0
404   BEQ.B                Finish
405   FMul.X		Scale(pc),fp0	...may cause underflow
406   bra			t_avoid_unsupp	;check for denorm as a
407*					;result of the scaling
408
409Finish:
410	fmove.x		fp0,fp0		;capture exceptions & round
411	rts
412
413Rem_is_0:
414*..R = 2^(-j)X - Q Y = Y, thus R = 0 and quotient = 2^j (Q+1)
415   AddQ.L               #1,D3
416   CmpI.L               #8,D0           ...D0 is j 
417   BGE.B                Q_Big
418
419   LSL.L                D0,D3
420   BRA.B                Set_R_0
421
422Q_Big:
423   CLR.L                D3
424
425Set_R_0:
426   FMove.S		#:00000000,fp0
427   Move.L		#0,Sc_Flag(a6)
428   BRA.W                Fix_Sign
429
430Tie_Case:
431*..Check parity of Q
432   Move.L               D3,D6
433   AndI.L               #$00000001,D6
434   Tst.L                D6
435   BEq.W                Fix_Sign	...Q is even
436
437*..Q is odd, Q := Q + 1, signX := -signX
438   AddQ.L               #1,D3
439   Move.W               SignX(a6),D6
440   EOrI.L               #$00008000,D6
441   Move.W               D6,SignX(a6)
442   BRA.W                Fix_Sign
443
444   End
445