ilsp.s revision 1.1.180.1 1 1.1 is #
2 1.1.180.1 jdolecek # $NetBSD: ilsp.s,v 1.1.180.1 2017/12/03 11:36:23 jdolecek Exp $
3 1.1 is #
4 1.1 is
5 1.1 is #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
6 1.1 is # MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
7 1.1 is # M68000 Hi-Performance Microprocessor Division
8 1.1 is # M68060 Software Package Production Release
9 1.1 is #
10 1.1 is # M68060 Software Package Copyright (C) 1993, 1994, 1995, 1996 Motorola Inc.
11 1.1 is # All rights reserved.
12 1.1 is #
13 1.1 is # THE SOFTWARE is provided on an "AS IS" basis and without warranty.
14 1.1 is # To the maximum extent permitted by applicable law,
15 1.1 is # MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
16 1.1 is # INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS
17 1.1 is # FOR A PARTICULAR PURPOSE and any warranty against infringement with
18 1.1 is # regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
19 1.1 is # and any accompanying written materials.
20 1.1 is #
21 1.1 is # To the maximum extent permitted by applicable law,
22 1.1 is # IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
23 1.1 is # (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS,
24 1.1 is # BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS)
25 1.1 is # ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE.
26 1.1 is #
27 1.1 is # Motorola assumes no responsibility for the maintenance and support
28 1.1 is # of the SOFTWARE.
29 1.1 is #
30 1.1 is # You are hereby granted a copyright license to use, modify, and distribute the
31 1.1 is # SOFTWARE so long as this entire notice is retained without alteration
32 1.1 is # in any modified and/or redistributed versions, and that such modified
33 1.1 is # versions are clearly identified as such.
34 1.1 is # No licenses are granted by implication, estoppel or otherwise under any
35 1.1 is # patents or trademarks of Motorola, Inc.
36 1.1 is #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
37 1.1 is
38 1.1 is #
39 1.1 is # litop.s:
40 1.1 is # This file is appended to the top of the 060FPLSP package
41 1.1 is # and contains the entry points into the package. The user, in
42 1.1 is # effect, branches to one of the branch table entries located here.
43 1.1 is #
44 1.1 is
45 1.1 is bra.l _060LSP__idivs64_
46 1.1 is short 0x0000
47 1.1 is bra.l _060LSP__idivu64_
48 1.1 is short 0x0000
49 1.1 is
50 1.1 is bra.l _060LSP__imuls64_
51 1.1 is short 0x0000
52 1.1 is bra.l _060LSP__imulu64_
53 1.1 is short 0x0000
54 1.1 is
55 1.1 is bra.l _060LSP__cmp2_Ab_
56 1.1 is short 0x0000
57 1.1 is bra.l _060LSP__cmp2_Aw_
58 1.1 is short 0x0000
59 1.1 is bra.l _060LSP__cmp2_Al_
60 1.1 is short 0x0000
61 1.1 is bra.l _060LSP__cmp2_Db_
62 1.1 is short 0x0000
63 1.1 is bra.l _060LSP__cmp2_Dw_
64 1.1 is short 0x0000
65 1.1 is bra.l _060LSP__cmp2_Dl_
66 1.1 is short 0x0000
67 1.1 is
68 1.1 is # leave room for future possible aditions.
69 1.1 is align 0x200
70 1.1 is
71 1.1 is #########################################################################
72 1.1 is # XDEF **************************************************************** #
73 1.1 is # _060LSP__idivu64_(): Emulate 64-bit unsigned div instruction. #
74 1.1 is # _060LSP__idivs64_(): Emulate 64-bit signed div instruction. #
75 1.1 is # #
76 1.1 is # This is the library version which is accessed as a subroutine #
77 1.1 is # and therefore does not work exactly like the 680X0 div{s,u}.l #
78 1.1 is # 64-bit divide instruction. #
79 1.1 is # #
80 1.1 is # XREF **************************************************************** #
81 1.1 is # None. #
82 1.1 is # #
83 1.1 is # INPUT *************************************************************** #
84 1.1 is # 0x4(sp) = divisor #
85 1.1 is # 0x8(sp) = hi(dividend) #
86 1.1 is # 0xc(sp) = lo(dividend) #
87 1.1 is # 0x10(sp) = pointer to location to place quotient/remainder #
88 1.1 is # #
89 1.1 is # OUTPUT ************************************************************** #
90 1.1 is # 0x10(sp) = points to location of remainder/quotient. #
91 1.1 is # remainder is in first longword, quotient is in 2nd. #
92 1.1 is # #
93 1.1 is # ALGORITHM *********************************************************** #
94 1.1 is # If the operands are signed, make them unsigned and save the #
95 1.1 is # sign info for later. Separate out special cases like divide-by-zero #
96 1.1 is # or 32-bit divides if possible. Else, use a special math algorithm #
97 1.1 is # to calculate the result. #
98 1.1 is # Restore sign info if signed instruction. Set the condition #
99 1.1 is # codes before performing the final "rts". If the divisor was equal to #
100 1.1 is # zero, then perform a divide-by-zero using a 16-bit implemented #
101 1.1 is # divide instruction. This way, the operating system can record that #
102 1.1 is # the event occurred even though it may not point to the correct place. #
103 1.1 is # #
104 1.1 is #########################################################################
105 1.1 is
106 1.1 is set POSNEG, -1
107 1.1 is set NDIVISOR, -2
108 1.1 is set NDIVIDEND, -3
109 1.1 is set DDSECOND, -4
110 1.1 is set DDNORMAL, -8
111 1.1 is set DDQUOTIENT, -12
112 1.1 is set DIV64_CC, -16
113 1.1 is
114 1.1 is ##########
115 1.1 is # divs.l #
116 1.1 is ##########
117 1.1 is global _060LSP__idivs64_
118 1.1 is _060LSP__idivs64_:
119 1.1 is # PROLOGUE BEGIN ########################################################
120 1.1 is link.w %a6,&-16
121 1.1 is movm.l &0x3f00,-(%sp) # save d2-d7
122 1.1 is # fmovm.l &0x0,-(%sp) # save no fpregs
123 1.1 is # PROLOGUE END ##########################################################
124 1.1 is
125 1.1 is mov.w %cc,DIV64_CC(%a6)
126 1.1 is st POSNEG(%a6) # signed operation
127 1.1 is bra.b ldiv64_cont
128 1.1 is
129 1.1 is ##########
130 1.1 is # divu.l #
131 1.1 is ##########
132 1.1 is global _060LSP__idivu64_
133 1.1 is _060LSP__idivu64_:
134 1.1 is # PROLOGUE BEGIN ########################################################
135 1.1 is link.w %a6,&-16
136 1.1 is movm.l &0x3f00,-(%sp) # save d2-d7
137 1.1 is # fmovm.l &0x0,-(%sp) # save no fpregs
138 1.1 is # PROLOGUE END ##########################################################
139 1.1 is
140 1.1 is mov.w %cc,DIV64_CC(%a6)
141 1.1 is sf POSNEG(%a6) # unsigned operation
142 1.1 is
143 1.1 is ldiv64_cont:
144 1.1 is mov.l 0x8(%a6),%d7 # fetch divisor
145 1.1 is
146 1.1 is beq.w ldiv64eq0 # divisor is = 0!!!
147 1.1 is
148 1.1 is mov.l 0xc(%a6), %d5 # get dividend hi
149 1.1 is mov.l 0x10(%a6), %d6 # get dividend lo
150 1.1 is
151 1.1 is # separate signed and unsigned divide
152 1.1 is tst.b POSNEG(%a6) # signed or unsigned?
153 1.1 is beq.b ldspecialcases # use positive divide
154 1.1 is
155 1.1 is # save the sign of the divisor
156 1.1 is # make divisor unsigned if it's negative
157 1.1 is tst.l %d7 # chk sign of divisor
158 1.1 is slt NDIVISOR(%a6) # save sign of divisor
159 1.1 is bpl.b ldsgndividend
160 1.1 is neg.l %d7 # complement negative divisor
161 1.1 is
162 1.1 is # save the sign of the dividend
163 1.1 is # make dividend unsigned if it's negative
164 1.1 is ldsgndividend:
165 1.1 is tst.l %d5 # chk sign of hi(dividend)
166 1.1 is slt NDIVIDEND(%a6) # save sign of dividend
167 1.1 is bpl.b ldspecialcases
168 1.1 is
169 1.1 is mov.w &0x0, %cc # clear 'X' cc bit
170 1.1 is negx.l %d6 # complement signed dividend
171 1.1 is negx.l %d5
172 1.1 is
173 1.1 is # extract some special cases:
174 1.1 is # - is (dividend == 0) ?
175 1.1 is # - is (hi(dividend) == 0 && (divisor <= lo(dividend))) ? (32-bit div)
176 1.1 is ldspecialcases:
177 1.1 is tst.l %d5 # is (hi(dividend) == 0)
178 1.1 is bne.b ldnormaldivide # no, so try it the long way
179 1.1 is
180 1.1 is tst.l %d6 # is (lo(dividend) == 0), too
181 1.1 is beq.w lddone # yes, so (dividend == 0)
182 1.1 is
183 1.1 is cmp.l %d7,%d6 # is (divisor <= lo(dividend))
184 1.1 is bls.b ld32bitdivide # yes, so use 32 bit divide
185 1.1 is
186 1.1 is exg %d5,%d6 # q = 0, r = dividend
187 1.1 is bra.w ldivfinish # can't divide, we're done.
188 1.1 is
189 1.1 is ld32bitdivide:
190 1.1 is tdivu.l %d7, %d5:%d6 # it's only a 32/32 bit div!
191 1.1 is
192 1.1 is bra.b ldivfinish
193 1.1 is
194 1.1 is ldnormaldivide:
195 1.1 is # last special case:
196 1.1 is # - is hi(dividend) >= divisor ? if yes, then overflow
197 1.1 is cmp.l %d7,%d5
198 1.1 is bls.b lddovf # answer won't fit in 32 bits
199 1.1 is
200 1.1 is # perform the divide algorithm:
201 1.1 is bsr.l ldclassical # do int divide
202 1.1 is
203 1.1 is # separate into signed and unsigned finishes.
204 1.1 is ldivfinish:
205 1.1 is tst.b POSNEG(%a6) # do divs, divu separately
206 1.1 is beq.b lddone # divu has no processing!!!
207 1.1 is
208 1.1 is # it was a divs.l, so ccode setting is a little more complicated...
209 1.1 is tst.b NDIVIDEND(%a6) # remainder has same sign
210 1.1 is beq.b ldcc # as dividend.
211 1.1 is neg.l %d5 # sgn(rem) = sgn(dividend)
212 1.1 is ldcc:
213 1.1 is mov.b NDIVISOR(%a6), %d0
214 1.1 is eor.b %d0, NDIVIDEND(%a6) # chk if quotient is negative
215 1.1 is beq.b ldqpos # branch to quot positive
216 1.1 is
217 1.1 is # 0x80000000 is the largest number representable as a 32-bit negative
218 1.1 is # number. the negative of 0x80000000 is 0x80000000.
219 1.1 is cmpi.l %d6, &0x80000000 # will (-quot) fit in 32 bits?
220 1.1 is bhi.b lddovf
221 1.1 is
222 1.1 is neg.l %d6 # make (-quot) 2's comp
223 1.1 is
224 1.1 is bra.b lddone
225 1.1 is
226 1.1 is ldqpos:
227 1.1 is btst &0x1f, %d6 # will (+quot) fit in 32 bits?
228 1.1 is bne.b lddovf
229 1.1 is
230 1.1 is lddone:
231 1.1 is # if the register numbers are the same, only the quotient gets saved.
232 1.1 is # so, if we always save the quotient second, we save ourselves a cmp&beq
233 1.1 is andi.w &0x10,DIV64_CC(%a6)
234 1.1 is mov.w DIV64_CC(%a6),%cc
235 1.1 is tst.l %d6 # may set 'N' ccode bit
236 1.1 is
237 1.1 is # here, the result is in d1 and d0. the current strategy is to save
238 1.1 is # the values at the location pointed to by a0.
239 1.1 is # use movm here to not disturb the condition codes.
240 1.1 is ldexit:
241 1.1 is movm.l &0x0060,([0x14,%a6]) # save result
242 1.1 is
243 1.1 is # EPILOGUE BEGIN ########################################################
244 1.1 is # fmovm.l (%sp)+,&0x0 # restore no fpregs
245 1.1 is movm.l (%sp)+,&0x00fc # restore d2-d7
246 1.1 is unlk %a6
247 1.1 is # EPILOGUE END ##########################################################
248 1.1 is
249 1.1 is rts
250 1.1 is
251 1.1 is # the result should be the unchanged dividend
252 1.1 is lddovf:
253 1.1 is mov.l 0xc(%a6), %d5 # get dividend hi
254 1.1 is mov.l 0x10(%a6), %d6 # get dividend lo
255 1.1 is
256 1.1 is andi.w &0x1c,DIV64_CC(%a6)
257 1.1 is ori.w &0x02,DIV64_CC(%a6) # set 'V' ccode bit
258 1.1 is mov.w DIV64_CC(%a6),%cc
259 1.1 is
260 1.1 is bra.b ldexit
261 1.1 is
262 1.1 is ldiv64eq0:
263 1.1 is mov.l 0xc(%a6),([0x14,%a6])
264 1.1 is mov.l 0x10(%a6),([0x14,%a6],0x4)
265 1.1 is
266 1.1 is mov.w DIV64_CC(%a6),%cc
267 1.1 is
268 1.1 is # EPILOGUE BEGIN ########################################################
269 1.1 is # fmovm.l (%sp)+,&0x0 # restore no fpregs
270 1.1 is movm.l (%sp)+,&0x00fc # restore d2-d7
271 1.1 is unlk %a6
272 1.1 is # EPILOGUE END ##########################################################
273 1.1 is
274 1.1 is divu.w &0x0,%d0 # force a divbyzero exception
275 1.1 is rts
276 1.1 is
277 1.1 is ###########################################################################
278 1.1 is #########################################################################
279 1.1 is # This routine uses the 'classical' Algorithm D from Donald Knuth's #
280 1.1 is # Art of Computer Programming, vol II, Seminumerical Algorithms. #
281 1.1 is # For this implementation b=2**16, and the target is U1U2U3U4/V1V2, #
282 1.1 is # where U,V are words of the quadword dividend and longword divisor, #
283 1.1 is # and U1, V1 are the most significant words. #
284 1.1 is # #
285 1.1 is # The most sig. longword of the 64 bit dividend must be in %d5, least #
286 1.1 is # in %d6. The divisor must be in the variable ddivisor, and the #
287 1.1 is # signed/unsigned flag ddusign must be set (0=unsigned,1=signed). #
288 1.1 is # The quotient is returned in %d6, remainder in %d5, unless the #
289 1.1 is # v (overflow) bit is set in the saved %ccr. If overflow, the dividend #
290 1.1 is # is unchanged. #
291 1.1 is #########################################################################
292 1.1 is ldclassical:
293 1.1 is # if the divisor msw is 0, use simpler algorithm then the full blown
294 1.1 is # one at ddknuth:
295 1.1 is
296 1.1 is cmpi.l %d7, &0xffff
297 1.1 is bhi.b lddknuth # go use D. Knuth algorithm
298 1.1 is
299 1.1 is # Since the divisor is only a word (and larger than the mslw of the dividend),
300 1.1 is # a simpler algorithm may be used :
301 1.1 is # In the general case, four quotient words would be created by
302 1.1 is # dividing the divisor word into each dividend word. In this case,
303 1.1 is # the first two quotient words must be zero, or overflow would occur.
304 1.1 is # Since we already checked this case above, we can treat the most significant
305 1.1 is # longword of the dividend as (0) remainder (see Knuth) and merely complete
306 1.1 is # the last two divisions to get a quotient longword and word remainder:
307 1.1 is
308 1.1 is clr.l %d1
309 1.1 is swap %d5 # same as r*b if previous step rqd
310 1.1 is swap %d6 # get u3 to lsw position
311 1.1 is mov.w %d6, %d5 # rb + u3
312 1.1 is
313 1.1 is divu.w %d7, %d5
314 1.1 is
315 1.1 is mov.w %d5, %d1 # first quotient word
316 1.1 is swap %d6 # get u4
317 1.1 is mov.w %d6, %d5 # rb + u4
318 1.1 is
319 1.1 is divu.w %d7, %d5
320 1.1 is
321 1.1 is swap %d1
322 1.1 is mov.w %d5, %d1 # 2nd quotient 'digit'
323 1.1 is clr.w %d5
324 1.1 is swap %d5 # now remainder
325 1.1 is mov.l %d1, %d6 # and quotient
326 1.1 is
327 1.1 is rts
328 1.1 is
329 1.1 is lddknuth:
330 1.1 is # In this algorithm, the divisor is treated as a 2 digit (word) number
331 1.1 is # which is divided into a 3 digit (word) dividend to get one quotient
332 1.1 is # digit (word). After subtraction, the dividend is shifted and the
333 1.1 is # process repeated. Before beginning, the divisor and quotient are
334 1.1 is # 'normalized' so that the process of estimating the quotient digit
335 1.1 is # will yield verifiably correct results..
336 1.1 is
337 1.1 is clr.l DDNORMAL(%a6) # count of shifts for normalization
338 1.1 is clr.b DDSECOND(%a6) # clear flag for quotient digits
339 1.1 is clr.l %d1 # %d1 will hold trial quotient
340 1.1 is lddnchk:
341 1.1 is btst &31, %d7 # must we normalize? first word of
342 1.1 is bne.b lddnormalized # divisor (V1) must be >= 65536/2
343 1.1 is addq.l &0x1, DDNORMAL(%a6) # count normalization shifts
344 1.1 is lsl.l &0x1, %d7 # shift the divisor
345 1.1 is lsl.l &0x1, %d6 # shift u4,u3 with overflow to u2
346 1.1 is roxl.l &0x1, %d5 # shift u1,u2
347 1.1 is bra.w lddnchk
348 1.1 is lddnormalized:
349 1.1 is
350 1.1 is # Now calculate an estimate of the quotient words (msw first, then lsw).
351 1.1 is # The comments use subscripts for the first quotient digit determination.
352 1.1 is mov.l %d7, %d3 # divisor
353 1.1 is mov.l %d5, %d2 # dividend mslw
354 1.1 is swap %d2
355 1.1 is swap %d3
356 1.1 is cmp.w %d2, %d3 # V1 = U1 ?
357 1.1 is bne.b lddqcalc1
358 1.1 is mov.w &0xffff, %d1 # use max trial quotient word
359 1.1 is bra.b lddadj0
360 1.1 is lddqcalc1:
361 1.1 is mov.l %d5, %d1
362 1.1 is
363 1.1 is divu.w %d3, %d1 # use quotient of mslw/msw
364 1.1 is
365 1.1 is andi.l &0x0000ffff, %d1 # zero any remainder
366 1.1 is lddadj0:
367 1.1 is
368 1.1 is # now test the trial quotient and adjust. This step plus the
369 1.1 is # normalization assures (according to Knuth) that the trial
370 1.1 is # quotient will be at worst 1 too large.
371 1.1 is mov.l %d6, -(%sp)
372 1.1 is clr.w %d6 # word u3 left
373 1.1 is swap %d6 # in lsw position
374 1.1 is lddadj1: mov.l %d7, %d3
375 1.1 is mov.l %d1, %d2
376 1.1 is mulu.w %d7, %d2 # V2q
377 1.1 is swap %d3
378 1.1 is mulu.w %d1, %d3 # V1q
379 1.1 is mov.l %d5, %d4 # U1U2
380 1.1 is sub.l %d3, %d4 # U1U2 - V1q
381 1.1 is
382 1.1 is swap %d4
383 1.1 is
384 1.1 is mov.w %d4,%d0
385 1.1 is mov.w %d6,%d4 # insert lower word (U3)
386 1.1 is
387 1.1 is tst.w %d0 # is upper word set?
388 1.1 is bne.w lddadjd1
389 1.1 is
390 1.1 is # add.l %d6, %d4 # (U1U2 - V1q) + U3
391 1.1 is
392 1.1 is cmp.l %d2, %d4
393 1.1 is bls.b lddadjd1 # is V2q > (U1U2-V1q) + U3 ?
394 1.1 is subq.l &0x1, %d1 # yes, decrement and recheck
395 1.1 is bra.b lddadj1
396 1.1 is lddadjd1:
397 1.1 is # now test the word by multiplying it by the divisor (V1V2) and comparing
398 1.1 is # the 3 digit (word) result with the current dividend words
399 1.1 is mov.l %d5, -(%sp) # save %d5 (%d6 already saved)
400 1.1 is mov.l %d1, %d6
401 1.1 is swap %d6 # shift answer to ms 3 words
402 1.1 is mov.l %d7, %d5
403 1.1 is bsr.l ldmm2
404 1.1 is mov.l %d5, %d2 # now %d2,%d3 are trial*divisor
405 1.1 is mov.l %d6, %d3
406 1.1 is mov.l (%sp)+, %d5 # restore dividend
407 1.1 is mov.l (%sp)+, %d6
408 1.1 is sub.l %d3, %d6
409 1.1 is subx.l %d2, %d5 # subtract double precision
410 1.1 is bcc ldd2nd # no carry, do next quotient digit
411 1.1 is subq.l &0x1, %d1 # q is one too large
412 1.1 is # need to add back divisor longword to current ms 3 digits of dividend
413 1.1 is # - according to Knuth, this is done only 2 out of 65536 times for random
414 1.1 is # divisor, dividend selection.
415 1.1 is clr.l %d2
416 1.1 is mov.l %d7, %d3
417 1.1 is swap %d3
418 1.1 is clr.w %d3 # %d3 now ls word of divisor
419 1.1 is add.l %d3, %d6 # aligned with 3rd word of dividend
420 1.1 is addx.l %d2, %d5
421 1.1 is mov.l %d7, %d3
422 1.1 is clr.w %d3 # %d3 now ms word of divisor
423 1.1 is swap %d3 # aligned with 2nd word of dividend
424 1.1 is add.l %d3, %d5
425 1.1 is ldd2nd:
426 1.1 is tst.b DDSECOND(%a6) # both q words done?
427 1.1 is bne.b lddremain
428 1.1 is # first quotient digit now correct. store digit and shift the
429 1.1 is # (subtracted) dividend
430 1.1 is mov.w %d1, DDQUOTIENT(%a6)
431 1.1 is clr.l %d1
432 1.1 is swap %d5
433 1.1 is swap %d6
434 1.1 is mov.w %d6, %d5
435 1.1 is clr.w %d6
436 1.1 is st DDSECOND(%a6) # second digit
437 1.1 is bra.w lddnormalized
438 1.1 is lddremain:
439 1.1 is # add 2nd word to quotient, get the remainder.
440 1.1 is mov.w %d1, DDQUOTIENT+2(%a6)
441 1.1 is # shift down one word/digit to renormalize remainder.
442 1.1 is mov.w %d5, %d6
443 1.1 is swap %d6
444 1.1 is swap %d5
445 1.1 is mov.l DDNORMAL(%a6), %d7 # get norm shift count
446 1.1 is beq.b lddrn
447 1.1 is subq.l &0x1, %d7 # set for loop count
448 1.1 is lddnlp:
449 1.1 is lsr.l &0x1, %d5 # shift into %d6
450 1.1 is roxr.l &0x1, %d6
451 1.1 is dbf %d7, lddnlp
452 1.1 is lddrn:
453 1.1 is mov.l %d6, %d5 # remainder
454 1.1 is mov.l DDQUOTIENT(%a6), %d6 # quotient
455 1.1 is
456 1.1 is rts
457 1.1 is ldmm2:
458 1.1 is # factors for the 32X32->64 multiplication are in %d5 and %d6.
459 1.1 is # returns 64 bit result in %d5 (hi) %d6(lo).
460 1.1 is # destroys %d2,%d3,%d4.
461 1.1 is
462 1.1 is # multiply hi,lo words of each factor to get 4 intermediate products
463 1.1 is mov.l %d6, %d2
464 1.1 is mov.l %d6, %d3
465 1.1 is mov.l %d5, %d4
466 1.1 is swap %d3
467 1.1 is swap %d4
468 1.1 is mulu.w %d5, %d6 # %d6 <- lsw*lsw
469 1.1 is mulu.w %d3, %d5 # %d5 <- msw-dest*lsw-source
470 1.1 is mulu.w %d4, %d2 # %d2 <- msw-source*lsw-dest
471 1.1 is mulu.w %d4, %d3 # %d3 <- msw*msw
472 1.1 is # now use swap and addx to consolidate to two longwords
473 1.1 is clr.l %d4
474 1.1 is swap %d6
475 1.1 is add.w %d5, %d6 # add msw of l*l to lsw of m*l product
476 1.1 is addx.w %d4, %d3 # add any carry to m*m product
477 1.1 is add.w %d2, %d6 # add in lsw of other m*l product
478 1.1 is addx.w %d4, %d3 # add any carry to m*m product
479 1.1 is swap %d6 # %d6 is low 32 bits of final product
480 1.1 is clr.w %d5
481 1.1 is clr.w %d2 # lsw of two mixed products used,
482 1.1 is swap %d5 # now use msws of longwords
483 1.1 is swap %d2
484 1.1 is add.l %d2, %d5
485 1.1 is add.l %d3, %d5 # %d5 now ms 32 bits of final product
486 1.1 is rts
487 1.1 is
488 1.1 is #########################################################################
489 1.1 is # XDEF **************************************************************** #
490 1.1 is # _060LSP__imulu64_(): Emulate 64-bit unsigned mul instruction #
491 1.1 is # _060LSP__imuls64_(): Emulate 64-bit signed mul instruction. #
492 1.1 is # #
493 1.1 is # This is the library version which is accessed as a subroutine #
494 1.1 is # and therefore does not work exactly like the 680X0 mul{s,u}.l #
495 1.1 is # 64-bit multiply instruction. #
496 1.1 is # #
497 1.1 is # XREF **************************************************************** #
498 1.1 is # None #
499 1.1 is # #
500 1.1 is # INPUT *************************************************************** #
501 1.1 is # 0x4(sp) = multiplier #
502 1.1 is # 0x8(sp) = multiplicand #
503 1.1 is # 0xc(sp) = pointer to location to place 64-bit result #
504 1.1 is # #
505 1.1 is # OUTPUT ************************************************************** #
506 1.1 is # 0xc(sp) = points to location of 64-bit result #
507 1.1 is # #
508 1.1 is # ALGORITHM *********************************************************** #
509 1.1 is # Perform the multiply in pieces using 16x16->32 unsigned #
510 1.1 is # multiplies and "add" instructions. #
511 1.1 is # Set the condition codes as appropriate before performing an #
512 1.1 is # "rts". #
513 1.1 is # #
514 1.1 is #########################################################################
515 1.1 is
516 1.1 is set MUL64_CC, -4
517 1.1 is
518 1.1 is global _060LSP__imulu64_
519 1.1 is _060LSP__imulu64_:
520 1.1 is
521 1.1 is # PROLOGUE BEGIN ########################################################
522 1.1 is link.w %a6,&-4
523 1.1 is movm.l &0x3800,-(%sp) # save d2-d4
524 1.1 is # fmovm.l &0x0,-(%sp) # save no fpregs
525 1.1 is # PROLOGUE END ##########################################################
526 1.1 is
527 1.1 is mov.w %cc,MUL64_CC(%a6) # save incomming ccodes
528 1.1 is
529 1.1 is mov.l 0x8(%a6),%d0 # store multiplier in d0
530 1.1 is beq.w mulu64_zero # handle zero separately
531 1.1 is
532 1.1 is mov.l 0xc(%a6),%d1 # get multiplicand in d1
533 1.1 is beq.w mulu64_zero # handle zero separately
534 1.1 is
535 1.1 is #########################################################################
536 1.1 is # 63 32 0 #
537 1.1 is # ---------------------------- #
538 1.1 is # | hi(mplier) * hi(mplicand)| #
539 1.1 is # ---------------------------- #
540 1.1 is # ----------------------------- #
541 1.1 is # | hi(mplier) * lo(mplicand) | #
542 1.1 is # ----------------------------- #
543 1.1 is # ----------------------------- #
544 1.1 is # | lo(mplier) * hi(mplicand) | #
545 1.1 is # ----------------------------- #
546 1.1 is # | ----------------------------- #
547 1.1 is # --|-- | lo(mplier) * lo(mplicand) | #
548 1.1 is # | ----------------------------- #
549 1.1 is # ======================================================== #
550 1.1 is # -------------------------------------------------------- #
551 1.1 is # | hi(result) | lo(result) | #
552 1.1 is # -------------------------------------------------------- #
553 1.1 is #########################################################################
554 1.1 is mulu64_alg:
555 1.1 is # load temp registers with operands
556 1.1 is mov.l %d0,%d2 # mr in d2
557 1.1 is mov.l %d0,%d3 # mr in d3
558 1.1 is mov.l %d1,%d4 # md in d4
559 1.1 is swap %d3 # hi(mr) in lo d3
560 1.1 is swap %d4 # hi(md) in lo d4
561 1.1 is
562 1.1 is # complete necessary multiplies:
563 1.1 is mulu.w %d1,%d0 # [1] lo(mr) * lo(md)
564 1.1 is mulu.w %d3,%d1 # [2] hi(mr) * lo(md)
565 1.1 is mulu.w %d4,%d2 # [3] lo(mr) * hi(md)
566 1.1 is mulu.w %d4,%d3 # [4] hi(mr) * hi(md)
567 1.1 is
568 1.1 is # add lo portions of [2],[3] to hi portion of [1].
569 1.1 is # add carries produced from these adds to [4].
570 1.1 is # lo([1]) is the final lo 16 bits of the result.
571 1.1 is clr.l %d4 # load d4 w/ zero value
572 1.1 is swap %d0 # hi([1]) <==> lo([1])
573 1.1 is add.w %d1,%d0 # hi([1]) + lo([2])
574 1.1 is addx.l %d4,%d3 # [4] + carry
575 1.1 is add.w %d2,%d0 # hi([1]) + lo([3])
576 1.1 is addx.l %d4,%d3 # [4] + carry
577 1.1 is swap %d0 # lo([1]) <==> hi([1])
578 1.1 is
579 1.1 is # lo portions of [2],[3] have been added in to final result.
580 1.1 is # now, clear lo, put hi in lo reg, and add to [4]
581 1.1 is clr.w %d1 # clear lo([2])
582 1.1 is clr.w %d2 # clear hi([3])
583 1.1 is swap %d1 # hi([2]) in lo d1
584 1.1 is swap %d2 # hi([3]) in lo d2
585 1.1 is add.l %d2,%d1 # [4] + hi([2])
586 1.1 is add.l %d3,%d1 # [4] + hi([3])
587 1.1 is
588 1.1 is # now, grab the condition codes. only one that can be set is 'N'.
589 1.1 is # 'N' CAN be set if the operation is unsigned if bit 63 is set.
590 1.1 is mov.w MUL64_CC(%a6),%d4
591 1.1 is andi.b &0x10,%d4 # keep old 'X' bit
592 1.1 is tst.l %d1 # may set 'N' bit
593 1.1 is bpl.b mulu64_ddone
594 1.1 is ori.b &0x8,%d4 # set 'N' bit
595 1.1 is mulu64_ddone:
596 1.1 is mov.w %d4,%cc
597 1.1 is
598 1.1 is # here, the result is in d1 and d0. the current strategy is to save
599 1.1 is # the values at the location pointed to by a0.
600 1.1 is # use movm here to not disturb the condition codes.
601 1.1 is mulu64_end:
602 1.1 is exg %d1,%d0
603 1.1 is movm.l &0x0003,([0x10,%a6]) # save result
604 1.1 is
605 1.1 is # EPILOGUE BEGIN ########################################################
606 1.1 is # fmovm.l (%sp)+,&0x0 # restore no fpregs
607 1.1 is movm.l (%sp)+,&0x001c # restore d2-d4
608 1.1 is unlk %a6
609 1.1 is # EPILOGUE END ##########################################################
610 1.1 is
611 1.1 is rts
612 1.1 is
613 1.1 is # one or both of the operands is zero so the result is also zero.
614 1.1 is # save the zero result to the register file and set the 'Z' ccode bit.
615 1.1 is mulu64_zero:
616 1.1 is clr.l %d0
617 1.1 is clr.l %d1
618 1.1 is
619 1.1 is mov.w MUL64_CC(%a6),%d4
620 1.1 is andi.b &0x10,%d4
621 1.1 is ori.b &0x4,%d4
622 1.1 is mov.w %d4,%cc # set 'Z' ccode bit
623 1.1 is
624 1.1 is bra.b mulu64_end
625 1.1 is
626 1.1 is ##########
627 1.1 is # muls.l #
628 1.1 is ##########
629 1.1 is global _060LSP__imuls64_
630 1.1 is _060LSP__imuls64_:
631 1.1 is
632 1.1 is # PROLOGUE BEGIN ########################################################
633 1.1 is link.w %a6,&-4
634 1.1 is movm.l &0x3c00,-(%sp) # save d2-d5
635 1.1 is # fmovm.l &0x0,-(%sp) # save no fpregs
636 1.1 is # PROLOGUE END ##########################################################
637 1.1 is
638 1.1 is mov.w %cc,MUL64_CC(%a6) # save incomming ccodes
639 1.1 is
640 1.1 is mov.l 0x8(%a6),%d0 # store multiplier in d0
641 1.1 is beq.b mulu64_zero # handle zero separately
642 1.1 is
643 1.1 is mov.l 0xc(%a6),%d1 # get multiplicand in d1
644 1.1 is beq.b mulu64_zero # handle zero separately
645 1.1 is
646 1.1 is clr.b %d5 # clear sign tag
647 1.1 is tst.l %d0 # is multiplier negative?
648 1.1 is bge.b muls64_chk_md_sgn # no
649 1.1 is neg.l %d0 # make multiplier positive
650 1.1 is
651 1.1 is ori.b &0x1,%d5 # save multiplier sgn
652 1.1 is
653 1.1 is # the result sign is the exclusive or of the operand sign bits.
654 1.1 is muls64_chk_md_sgn:
655 1.1 is tst.l %d1 # is multiplicand negative?
656 1.1 is bge.b muls64_alg # no
657 1.1 is neg.l %d1 # make multiplicand positive
658 1.1 is
659 1.1 is eori.b &0x1,%d5 # calculate correct sign
660 1.1 is
661 1.1 is #########################################################################
662 1.1 is # 63 32 0 #
663 1.1 is # ---------------------------- #
664 1.1 is # | hi(mplier) * hi(mplicand)| #
665 1.1 is # ---------------------------- #
666 1.1 is # ----------------------------- #
667 1.1 is # | hi(mplier) * lo(mplicand) | #
668 1.1 is # ----------------------------- #
669 1.1 is # ----------------------------- #
670 1.1 is # | lo(mplier) * hi(mplicand) | #
671 1.1 is # ----------------------------- #
672 1.1 is # | ----------------------------- #
673 1.1 is # --|-- | lo(mplier) * lo(mplicand) | #
674 1.1 is # | ----------------------------- #
675 1.1 is # ======================================================== #
676 1.1 is # -------------------------------------------------------- #
677 1.1 is # | hi(result) | lo(result) | #
678 1.1 is # -------------------------------------------------------- #
679 1.1 is #########################################################################
680 1.1 is muls64_alg:
681 1.1 is # load temp registers with operands
682 1.1 is mov.l %d0,%d2 # mr in d2
683 1.1 is mov.l %d0,%d3 # mr in d3
684 1.1 is mov.l %d1,%d4 # md in d4
685 1.1 is swap %d3 # hi(mr) in lo d3
686 1.1 is swap %d4 # hi(md) in lo d4
687 1.1 is
688 1.1 is # complete necessary multiplies:
689 1.1 is mulu.w %d1,%d0 # [1] lo(mr) * lo(md)
690 1.1 is mulu.w %d3,%d1 # [2] hi(mr) * lo(md)
691 1.1 is mulu.w %d4,%d2 # [3] lo(mr) * hi(md)
692 1.1 is mulu.w %d4,%d3 # [4] hi(mr) * hi(md)
693 1.1 is
694 1.1 is # add lo portions of [2],[3] to hi portion of [1].
695 1.1 is # add carries produced from these adds to [4].
696 1.1 is # lo([1]) is the final lo 16 bits of the result.
697 1.1 is clr.l %d4 # load d4 w/ zero value
698 1.1 is swap %d0 # hi([1]) <==> lo([1])
699 1.1 is add.w %d1,%d0 # hi([1]) + lo([2])
700 1.1 is addx.l %d4,%d3 # [4] + carry
701 1.1 is add.w %d2,%d0 # hi([1]) + lo([3])
702 1.1 is addx.l %d4,%d3 # [4] + carry
703 1.1 is swap %d0 # lo([1]) <==> hi([1])
704 1.1 is
705 1.1 is # lo portions of [2],[3] have been added in to final result.
706 1.1 is # now, clear lo, put hi in lo reg, and add to [4]
707 1.1 is clr.w %d1 # clear lo([2])
708 1.1 is clr.w %d2 # clear hi([3])
709 1.1 is swap %d1 # hi([2]) in lo d1
710 1.1 is swap %d2 # hi([3]) in lo d2
711 1.1 is add.l %d2,%d1 # [4] + hi([2])
712 1.1 is add.l %d3,%d1 # [4] + hi([3])
713 1.1 is
714 1.1 is tst.b %d5 # should result be signed?
715 1.1 is beq.b muls64_done # no
716 1.1 is
717 1.1 is # result should be a signed negative number.
718 1.1 is # compute 2's complement of the unsigned number:
719 1.1 is # -negate all bits and add 1
720 1.1 is muls64_neg:
721 1.1 is not.l %d0 # negate lo(result) bits
722 1.1 is not.l %d1 # negate hi(result) bits
723 1.1 is addq.l &1,%d0 # add 1 to lo(result)
724 1.1 is addx.l %d4,%d1 # add carry to hi(result)
725 1.1 is
726 1.1 is muls64_done:
727 1.1 is mov.w MUL64_CC(%a6),%d4
728 1.1 is andi.b &0x10,%d4 # keep old 'X' bit
729 1.1 is tst.l %d1 # may set 'N' bit
730 1.1 is bpl.b muls64_ddone
731 1.1 is ori.b &0x8,%d4 # set 'N' bit
732 1.1 is muls64_ddone:
733 1.1 is mov.w %d4,%cc
734 1.1 is
735 1.1 is # here, the result is in d1 and d0. the current strategy is to save
736 1.1 is # the values at the location pointed to by a0.
737 1.1 is # use movm here to not disturb the condition codes.
738 1.1 is muls64_end:
739 1.1 is exg %d1,%d0
740 1.1 is movm.l &0x0003,([0x10,%a6]) # save result at (a0)
741 1.1 is
742 1.1 is # EPILOGUE BEGIN ########################################################
743 1.1 is # fmovm.l (%sp)+,&0x0 # restore no fpregs
744 1.1 is movm.l (%sp)+,&0x003c # restore d2-d5
745 1.1 is unlk %a6
746 1.1 is # EPILOGUE END ##########################################################
747 1.1 is
748 1.1 is rts
749 1.1 is
750 1.1 is # one or both of the operands is zero so the result is also zero.
751 1.1 is # save the zero result to the register file and set the 'Z' ccode bit.
752 1.1 is muls64_zero:
753 1.1 is clr.l %d0
754 1.1 is clr.l %d1
755 1.1 is
756 1.1 is mov.w MUL64_CC(%a6),%d4
757 1.1 is andi.b &0x10,%d4
758 1.1 is ori.b &0x4,%d4
759 1.1 is mov.w %d4,%cc # set 'Z' ccode bit
760 1.1 is
761 1.1 is bra.b muls64_end
762 1.1 is
763 1.1 is #########################################################################
764 1.1 is # XDEF **************************************************************** #
765 1.1 is # _060LSP__cmp2_Ab_(): Emulate "cmp2.b An,<ea>". #
766 1.1 is # _060LSP__cmp2_Aw_(): Emulate "cmp2.w An,<ea>". #
767 1.1 is # _060LSP__cmp2_Al_(): Emulate "cmp2.l An,<ea>". #
768 1.1 is # _060LSP__cmp2_Db_(): Emulate "cmp2.b Dn,<ea>". #
769 1.1 is # _060LSP__cmp2_Dw_(): Emulate "cmp2.w Dn,<ea>". #
770 1.1 is # _060LSP__cmp2_Dl_(): Emulate "cmp2.l Dn,<ea>". #
771 1.1 is # #
772 1.1 is # This is the library version which is accessed as a subroutine #
773 1.1 is # and therefore does not work exactly like the 680X0 "cmp2" #
774 1.1 is # instruction. #
775 1.1 is # #
776 1.1 is # XREF **************************************************************** #
777 1.1 is # None #
778 1.1 is # #
779 1.1 is # INPUT *************************************************************** #
780 1.1 is # 0x4(sp) = Rn #
781 1.1 is # 0x8(sp) = pointer to boundary pair #
782 1.1 is # #
783 1.1 is # OUTPUT ************************************************************** #
784 1.1 is # cc = condition codes are set correctly #
785 1.1 is # #
786 1.1 is # ALGORITHM *********************************************************** #
787 1.1 is # In the interest of simplicity, all operands are converted to #
788 1.1 is # longword size whether the operation is byte, word, or long. The #
789 1.1.180.1 jdolecek # bounds are sign extended accordingly. If Rn is a data register, Rn is #
790 1.1 is # also sign extended. If Rn is an address register, it need not be sign #
791 1.1 is # extended since the full register is always used. #
792 1.1 is # The condition codes are set correctly before the final "rts". #
793 1.1 is # #
794 1.1 is #########################################################################
795 1.1 is
796 1.1 is set CMP2_CC, -4
797 1.1 is
798 1.1 is global _060LSP__cmp2_Ab_
799 1.1 is _060LSP__cmp2_Ab_:
800 1.1 is
801 1.1 is # PROLOGUE BEGIN ########################################################
802 1.1 is link.w %a6,&-4
803 1.1 is movm.l &0x3800,-(%sp) # save d2-d4
804 1.1 is # fmovm.l &0x0,-(%sp) # save no fpregs
805 1.1 is # PROLOGUE END ##########################################################
806 1.1 is
807 1.1 is mov.w %cc,CMP2_CC(%a6)
808 1.1 is mov.l 0x8(%a6), %d2 # get regval
809 1.1 is
810 1.1 is mov.b ([0xc,%a6],0x0),%d0
811 1.1 is mov.b ([0xc,%a6],0x1),%d1
812 1.1 is
813 1.1 is extb.l %d0 # sign extend lo bnd
814 1.1 is extb.l %d1 # sign extend hi bnd
815 1.1 is bra.w l_cmp2_cmp # go do the compare emulation
816 1.1 is
817 1.1 is global _060LSP__cmp2_Aw_
818 1.1 is _060LSP__cmp2_Aw_:
819 1.1 is
820 1.1 is # PROLOGUE BEGIN ########################################################
821 1.1 is link.w %a6,&-4
822 1.1 is movm.l &0x3800,-(%sp) # save d2-d4
823 1.1 is # fmovm.l &0x0,-(%sp) # save no fpregs
824 1.1 is # PROLOGUE END ##########################################################
825 1.1 is
826 1.1 is mov.w %cc,CMP2_CC(%a6)
827 1.1 is mov.l 0x8(%a6), %d2 # get regval
828 1.1 is
829 1.1 is mov.w ([0xc,%a6],0x0),%d0
830 1.1 is mov.w ([0xc,%a6],0x2),%d1
831 1.1 is
832 1.1 is ext.l %d0 # sign extend lo bnd
833 1.1 is ext.l %d1 # sign extend hi bnd
834 1.1 is bra.w l_cmp2_cmp # go do the compare emulation
835 1.1 is
836 1.1 is global _060LSP__cmp2_Al_
837 1.1 is _060LSP__cmp2_Al_:
838 1.1 is
839 1.1 is # PROLOGUE BEGIN ########################################################
840 1.1 is link.w %a6,&-4
841 1.1 is movm.l &0x3800,-(%sp) # save d2-d4
842 1.1 is # fmovm.l &0x0,-(%sp) # save no fpregs
843 1.1 is # PROLOGUE END ##########################################################
844 1.1 is
845 1.1 is mov.w %cc,CMP2_CC(%a6)
846 1.1 is mov.l 0x8(%a6), %d2 # get regval
847 1.1 is
848 1.1 is mov.l ([0xc,%a6],0x0),%d0
849 1.1 is mov.l ([0xc,%a6],0x4),%d1
850 1.1 is bra.w l_cmp2_cmp # go do the compare emulation
851 1.1 is
852 1.1 is global _060LSP__cmp2_Db_
853 1.1 is _060LSP__cmp2_Db_:
854 1.1 is
855 1.1 is # PROLOGUE BEGIN ########################################################
856 1.1 is link.w %a6,&-4
857 1.1 is movm.l &0x3800,-(%sp) # save d2-d4
858 1.1 is # fmovm.l &0x0,-(%sp) # save no fpregs
859 1.1 is # PROLOGUE END ##########################################################
860 1.1 is
861 1.1 is mov.w %cc,CMP2_CC(%a6)
862 1.1 is mov.l 0x8(%a6), %d2 # get regval
863 1.1 is
864 1.1 is mov.b ([0xc,%a6],0x0),%d0
865 1.1 is mov.b ([0xc,%a6],0x1),%d1
866 1.1 is
867 1.1 is extb.l %d0 # sign extend lo bnd
868 1.1 is extb.l %d1 # sign extend hi bnd
869 1.1 is
870 1.1 is # operation is a data register compare.
871 1.1 is # sign extend byte to long so we can do simple longword compares.
872 1.1 is extb.l %d2 # sign extend data byte
873 1.1 is bra.w l_cmp2_cmp # go do the compare emulation
874 1.1 is
875 1.1 is global _060LSP__cmp2_Dw_
876 1.1 is _060LSP__cmp2_Dw_:
877 1.1 is
878 1.1 is # PROLOGUE BEGIN ########################################################
879 1.1 is link.w %a6,&-4
880 1.1 is movm.l &0x3800,-(%sp) # save d2-d4
881 1.1 is # fmovm.l &0x0,-(%sp) # save no fpregs
882 1.1 is # PROLOGUE END ##########################################################
883 1.1 is
884 1.1 is mov.w %cc,CMP2_CC(%a6)
885 1.1 is mov.l 0x8(%a6), %d2 # get regval
886 1.1 is
887 1.1 is mov.w ([0xc,%a6],0x0),%d0
888 1.1 is mov.w ([0xc,%a6],0x2),%d1
889 1.1 is
890 1.1 is ext.l %d0 # sign extend lo bnd
891 1.1 is ext.l %d1 # sign extend hi bnd
892 1.1 is
893 1.1 is # operation is a data register compare.
894 1.1 is # sign extend word to long so we can do simple longword compares.
895 1.1 is ext.l %d2 # sign extend data word
896 1.1 is bra.w l_cmp2_cmp # go emulate compare
897 1.1 is
898 1.1 is global _060LSP__cmp2_Dl_
899 1.1 is _060LSP__cmp2_Dl_:
900 1.1 is
901 1.1 is # PROLOGUE BEGIN ########################################################
902 1.1 is link.w %a6,&-4
903 1.1 is movm.l &0x3800,-(%sp) # save d2-d4
904 1.1 is # fmovm.l &0x0,-(%sp) # save no fpregs
905 1.1 is # PROLOGUE END ##########################################################
906 1.1 is
907 1.1 is mov.w %cc,CMP2_CC(%a6)
908 1.1 is mov.l 0x8(%a6), %d2 # get regval
909 1.1 is
910 1.1 is mov.l ([0xc,%a6],0x0),%d0
911 1.1 is mov.l ([0xc,%a6],0x4),%d1
912 1.1 is
913 1.1 is #
914 1.1 is # To set the ccodes correctly:
915 1.1 is # (1) save 'Z' bit from (Rn - lo)
916 1.1 is # (2) save 'Z' and 'N' bits from ((hi - lo) - (Rn - hi))
917 1.1 is # (3) keep 'X', 'N', and 'V' from before instruction
918 1.1 is # (4) combine ccodes
919 1.1 is #
920 1.1 is l_cmp2_cmp:
921 1.1 is sub.l %d0, %d2 # (Rn - lo)
922 1.1 is mov.w %cc, %d3 # fetch resulting ccodes
923 1.1 is andi.b &0x4, %d3 # keep 'Z' bit
924 1.1 is sub.l %d0, %d1 # (hi - lo)
925 1.1 is cmp.l %d1,%d2 # ((hi - lo) - (Rn - hi))
926 1.1 is
927 1.1 is mov.w %cc, %d4 # fetch resulting ccodes
928 1.1 is or.b %d4, %d3 # combine w/ earlier ccodes
929 1.1 is andi.b &0x5, %d3 # keep 'Z' and 'N'
930 1.1 is
931 1.1 is mov.w CMP2_CC(%a6), %d4 # fetch old ccodes
932 1.1 is andi.b &0x1a, %d4 # keep 'X','N','V' bits
933 1.1 is or.b %d3, %d4 # insert new ccodes
934 1.1 is mov.w %d4,%cc # save new ccodes
935 1.1 is
936 1.1 is # EPILOGUE BEGIN ########################################################
937 1.1 is # fmovm.l (%sp)+,&0x0 # restore no fpregs
938 1.1 is movm.l (%sp)+,&0x001c # restore d2-d4
939 1.1 is unlk %a6
940 1.1 is # EPILOGUE END ##########################################################
941 1.1 is
942 1.1 is rts
943