Home | History | Annotate | Line # | Download | only in dist
ilsp.s revision 1.2
      1  1.1     is #
      2  1.2  kamil # $NetBSD: ilsp.s,v 1.2 2015/07/11 10:32:46 kamil Exp $
      3  1.1     is #
      4  1.1     is 
      5  1.1     is #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
      6  1.1     is # MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
      7  1.1     is # M68000 Hi-Performance Microprocessor Division
      8  1.1     is # M68060 Software Package Production Release
      9  1.1     is #
     10  1.1     is # M68060 Software Package Copyright (C) 1993, 1994, 1995, 1996 Motorola Inc.
     11  1.1     is # All rights reserved.
     12  1.1     is #
     13  1.1     is # THE SOFTWARE is provided on an "AS IS" basis and without warranty.
     14  1.1     is # To the maximum extent permitted by applicable law,
     15  1.1     is # MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
     16  1.1     is # INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS
     17  1.1     is # FOR A PARTICULAR PURPOSE and any warranty against infringement with
     18  1.1     is # regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
     19  1.1     is # and any accompanying written materials.
     20  1.1     is #
     21  1.1     is # To the maximum extent permitted by applicable law,
     22  1.1     is # IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
     23  1.1     is # (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS,
     24  1.1     is # BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS)
     25  1.1     is # ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE.
     26  1.1     is #
     27  1.1     is # Motorola assumes no responsibility for the maintenance and support
     28  1.1     is # of the SOFTWARE.
     29  1.1     is #
     30  1.1     is # You are hereby granted a copyright license to use, modify, and distribute the
     31  1.1     is # SOFTWARE so long as this entire notice is retained without alteration
     32  1.1     is # in any modified and/or redistributed versions, and that such modified
     33  1.1     is # versions are clearly identified as such.
     34  1.1     is # No licenses are granted by implication, estoppel or otherwise under any
     35  1.1     is # patents or trademarks of Motorola, Inc.
     36  1.1     is #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     37  1.1     is 
     38  1.1     is #
     39  1.1     is # litop.s:
     40  1.1     is # 	This file is appended to the top of the 060FPLSP package
     41  1.1     is # and contains the entry points into the package. The user, in
     42  1.1     is # effect, branches to one of the branch table entries located here.
     43  1.1     is #
     44  1.1     is 
     45  1.1     is 	bra.l	_060LSP__idivs64_
     46  1.1     is 	short	0x0000
     47  1.1     is 	bra.l	_060LSP__idivu64_
     48  1.1     is 	short	0x0000
     49  1.1     is 
     50  1.1     is 	bra.l	_060LSP__imuls64_
     51  1.1     is 	short	0x0000
     52  1.1     is 	bra.l	_060LSP__imulu64_
     53  1.1     is 	short	0x0000
     54  1.1     is 
     55  1.1     is 	bra.l	_060LSP__cmp2_Ab_
     56  1.1     is 	short	0x0000
     57  1.1     is 	bra.l	_060LSP__cmp2_Aw_
     58  1.1     is 	short	0x0000
     59  1.1     is 	bra.l	_060LSP__cmp2_Al_
     60  1.1     is 	short	0x0000
     61  1.1     is 	bra.l	_060LSP__cmp2_Db_
     62  1.1     is 	short	0x0000
     63  1.1     is 	bra.l	_060LSP__cmp2_Dw_
     64  1.1     is 	short	0x0000
     65  1.1     is 	bra.l	_060LSP__cmp2_Dl_
     66  1.1     is 	short	0x0000
     67  1.1     is 
     68  1.1     is # leave room for future possible aditions.
     69  1.1     is 	align	0x200
     70  1.1     is 
     71  1.1     is #########################################################################
     72  1.1     is # XDEF ****************************************************************	#
     73  1.1     is #	_060LSP__idivu64_(): Emulate 64-bit unsigned div instruction.	#
     74  1.1     is #	_060LSP__idivs64_(): Emulate 64-bit signed div instruction.	#
     75  1.1     is #									#
     76  1.1     is #	This is the library version which is accessed as a subroutine	#
     77  1.1     is # 	and therefore does not work exactly like the 680X0 div{s,u}.l	#
     78  1.1     is #	64-bit divide instruction.					#
     79  1.1     is #									#
     80  1.1     is # XREF ****************************************************************	#
     81  1.1     is #	None.								#
     82  1.1     is #									#
     83  1.1     is # INPUT ***************************************************************	#
     84  1.1     is #	0x4(sp)  = divisor						#
     85  1.1     is #	0x8(sp)  = hi(dividend)						#
     86  1.1     is #	0xc(sp)  = lo(dividend)						#
     87  1.1     is #	0x10(sp) = pointer to location to place quotient/remainder	#
     88  1.1     is # 									#
     89  1.1     is # OUTPUT **************************************************************	#
     90  1.1     is #	0x10(sp) = points to location of remainder/quotient.		#
     91  1.1     is #		   remainder is in first longword, quotient is in 2nd.	#
     92  1.1     is #									#
     93  1.1     is # ALGORITHM ***********************************************************	#
     94  1.1     is #	If the operands are signed, make them unsigned and save the 	#
     95  1.1     is # sign info for later. Separate out special cases like divide-by-zero	#
     96  1.1     is # or 32-bit divides if possible. Else, use a special math algorithm	#
     97  1.1     is # to calculate the result.						#
     98  1.1     is #	Restore sign info if signed instruction. Set the condition 	#
     99  1.1     is # codes before performing the final "rts". If the divisor was equal to	#
    100  1.1     is # zero, then perform a divide-by-zero using a 16-bit implemented	#
    101  1.1     is # divide instruction. This way, the operating system can record that	#
    102  1.1     is # the event occurred even though it may not point to the correct place.	#
    103  1.1     is #									#
    104  1.1     is #########################################################################
    105  1.1     is 
    106  1.1     is set	POSNEG,		-1
    107  1.1     is set	NDIVISOR,	-2
    108  1.1     is set	NDIVIDEND,	-3
    109  1.1     is set	DDSECOND,	-4
    110  1.1     is set	DDNORMAL,	-8
    111  1.1     is set	DDQUOTIENT,	-12
    112  1.1     is set	DIV64_CC,	-16
    113  1.1     is 
    114  1.1     is ##########
    115  1.1     is # divs.l #
    116  1.1     is ##########
    117  1.1     is 	global		_060LSP__idivs64_
    118  1.1     is _060LSP__idivs64_:
    119  1.1     is # PROLOGUE BEGIN ########################################################
    120  1.1     is 	link.w		%a6,&-16
    121  1.1     is 	movm.l		&0x3f00,-(%sp)		# save d2-d7
    122  1.1     is #	fmovm.l		&0x0,-(%sp)		# save no fpregs
    123  1.1     is # PROLOGUE END ##########################################################
    124  1.1     is 
    125  1.1     is 	mov.w		%cc,DIV64_CC(%a6)
    126  1.1     is 	st		POSNEG(%a6)		# signed operation
    127  1.1     is 	bra.b		ldiv64_cont
    128  1.1     is 
    129  1.1     is ##########
    130  1.1     is # divu.l #
    131  1.1     is ##########
    132  1.1     is 	global		_060LSP__idivu64_
    133  1.1     is _060LSP__idivu64_:
    134  1.1     is # PROLOGUE BEGIN ########################################################
    135  1.1     is 	link.w		%a6,&-16
    136  1.1     is 	movm.l		&0x3f00,-(%sp)		# save d2-d7
    137  1.1     is #	fmovm.l		&0x0,-(%sp)		# save no fpregs
    138  1.1     is # PROLOGUE END ##########################################################
    139  1.1     is 
    140  1.1     is 	mov.w		%cc,DIV64_CC(%a6)
    141  1.1     is 	sf		POSNEG(%a6)		# unsigned operation
    142  1.1     is 
    143  1.1     is ldiv64_cont:
    144  1.1     is 	mov.l		0x8(%a6),%d7		# fetch divisor
    145  1.1     is 
    146  1.1     is 	beq.w		ldiv64eq0		# divisor is = 0!!!
    147  1.1     is 
    148  1.1     is 	mov.l		0xc(%a6), %d5 		# get dividend hi
    149  1.1     is 	mov.l		0x10(%a6), %d6 		# get dividend lo
    150  1.1     is 
    151  1.1     is # separate signed and unsigned divide
    152  1.1     is 	tst.b		POSNEG(%a6)		# signed or unsigned?
    153  1.1     is 	beq.b		ldspecialcases		# use positive divide
    154  1.1     is 
    155  1.1     is # save the sign of the divisor
    156  1.1     is # make divisor unsigned if it's negative
    157  1.1     is 	tst.l		%d7			# chk sign of divisor
    158  1.1     is 	slt		NDIVISOR(%a6)		# save sign of divisor
    159  1.1     is 	bpl.b		ldsgndividend
    160  1.1     is 	neg.l		%d7			# complement negative divisor
    161  1.1     is 
    162  1.1     is # save the sign of the dividend
    163  1.1     is # make dividend unsigned if it's negative
    164  1.1     is ldsgndividend:
    165  1.1     is 	tst.l		%d5			# chk sign of hi(dividend)
    166  1.1     is 	slt		NDIVIDEND(%a6)		# save sign of dividend
    167  1.1     is 	bpl.b		ldspecialcases
    168  1.1     is 
    169  1.1     is 	mov.w		&0x0, %cc		# clear 'X' cc bit
    170  1.1     is 	negx.l		%d6			# complement signed dividend
    171  1.1     is 	negx.l		%d5
    172  1.1     is 
    173  1.1     is # extract some special cases:
    174  1.1     is # 	- is (dividend == 0) ?
    175  1.1     is #	- is (hi(dividend) == 0 && (divisor <= lo(dividend))) ? (32-bit div)
    176  1.1     is ldspecialcases:
    177  1.1     is 	tst.l		%d5			# is (hi(dividend) == 0)
    178  1.1     is 	bne.b		ldnormaldivide		# no, so try it the long way
    179  1.1     is 
    180  1.1     is 	tst.l		%d6			# is (lo(dividend) == 0), too
    181  1.1     is 	beq.w		lddone			# yes, so (dividend == 0)
    182  1.1     is 
    183  1.1     is 	cmp.l	 	%d7,%d6			# is (divisor <= lo(dividend))
    184  1.1     is 	bls.b		ld32bitdivide		# yes, so use 32 bit divide
    185  1.1     is 
    186  1.1     is 	exg		%d5,%d6			# q = 0, r = dividend
    187  1.1     is 	bra.w		ldivfinish		# can't divide, we're done.
    188  1.1     is 
    189  1.1     is ld32bitdivide:
    190  1.1     is 	tdivu.l		%d7, %d5:%d6		# it's only a 32/32 bit div!
    191  1.1     is 
    192  1.1     is 	bra.b		ldivfinish
    193  1.1     is 
    194  1.1     is ldnormaldivide:
    195  1.1     is # last special case:
    196  1.1     is # 	- is hi(dividend) >= divisor ? if yes, then overflow
    197  1.1     is 	cmp.l		%d7,%d5
    198  1.1     is 	bls.b		lddovf			# answer won't fit in 32 bits
    199  1.1     is 
    200  1.1     is # perform the divide algorithm:
    201  1.1     is 	bsr.l		ldclassical		# do int divide
    202  1.1     is 
    203  1.1     is # separate into signed and unsigned finishes.
    204  1.1     is ldivfinish:
    205  1.1     is 	tst.b		POSNEG(%a6)		# do divs, divu separately
    206  1.1     is 	beq.b		lddone			# divu has no processing!!!
    207  1.1     is 
    208  1.1     is # it was a divs.l, so ccode setting is a little more complicated...
    209  1.1     is 	tst.b		NDIVIDEND(%a6)		# remainder has same sign
    210  1.1     is 	beq.b		ldcc			# as dividend.
    211  1.1     is 	neg.l		%d5			# sgn(rem) = sgn(dividend)
    212  1.1     is ldcc:
    213  1.1     is 	mov.b		NDIVISOR(%a6), %d0
    214  1.1     is 	eor.b		%d0, NDIVIDEND(%a6)	# chk if quotient is negative
    215  1.1     is 	beq.b		ldqpos			# branch to quot positive
    216  1.1     is 
    217  1.1     is # 0x80000000 is the largest number representable as a 32-bit negative
    218  1.1     is # number. the negative of 0x80000000 is 0x80000000.
    219  1.1     is 	cmpi.l		%d6, &0x80000000	# will (-quot) fit in 32 bits?
    220  1.1     is 	bhi.b		lddovf
    221  1.1     is 
    222  1.1     is 	neg.l		%d6			# make (-quot) 2's comp
    223  1.1     is 
    224  1.1     is 	bra.b		lddone
    225  1.1     is 
    226  1.1     is ldqpos:
    227  1.1     is 	btst		&0x1f, %d6		# will (+quot) fit in 32 bits?
    228  1.1     is 	bne.b		lddovf
    229  1.1     is 
    230  1.1     is lddone:
    231  1.1     is # if the register numbers are the same, only the quotient gets saved.
    232  1.1     is # so, if we always save the quotient second, we save ourselves a cmp&beq
    233  1.1     is 	andi.w		&0x10,DIV64_CC(%a6)
    234  1.1     is 	mov.w		DIV64_CC(%a6),%cc
    235  1.1     is 	tst.l		%d6			# may set 'N' ccode bit
    236  1.1     is 
    237  1.1     is # here, the result is in d1 and d0. the current strategy is to save
    238  1.1     is # the values at the location pointed to by a0.
    239  1.1     is # use movm here to not disturb the condition codes.
    240  1.1     is ldexit:
    241  1.1     is 	movm.l		&0x0060,([0x14,%a6])	# save result
    242  1.1     is 
    243  1.1     is # EPILOGUE BEGIN ########################################################
    244  1.1     is #	fmovm.l		(%sp)+,&0x0		# restore no fpregs
    245  1.1     is 	movm.l		(%sp)+,&0x00fc		# restore d2-d7
    246  1.1     is 	unlk		%a6
    247  1.1     is # EPILOGUE END ##########################################################
    248  1.1     is 
    249  1.1     is 	rts
    250  1.1     is 
    251  1.1     is # the result should be the unchanged dividend
    252  1.1     is lddovf:
    253  1.1     is 	mov.l		0xc(%a6), %d5 		# get dividend hi
    254  1.1     is 	mov.l		0x10(%a6), %d6 		# get dividend lo
    255  1.1     is 
    256  1.1     is 	andi.w		&0x1c,DIV64_CC(%a6)
    257  1.1     is 	ori.w		&0x02,DIV64_CC(%a6)	# set 'V' ccode bit
    258  1.1     is 	mov.w		DIV64_CC(%a6),%cc
    259  1.1     is 
    260  1.1     is 	bra.b		ldexit
    261  1.1     is 
    262  1.1     is ldiv64eq0:
    263  1.1     is 	mov.l		0xc(%a6),([0x14,%a6])
    264  1.1     is 	mov.l		0x10(%a6),([0x14,%a6],0x4)
    265  1.1     is 
    266  1.1     is 	mov.w		DIV64_CC(%a6),%cc
    267  1.1     is 
    268  1.1     is # EPILOGUE BEGIN ########################################################
    269  1.1     is #	fmovm.l		(%sp)+,&0x0		# restore no fpregs
    270  1.1     is 	movm.l		(%sp)+,&0x00fc		# restore d2-d7
    271  1.1     is 	unlk		%a6
    272  1.1     is # EPILOGUE END ##########################################################
    273  1.1     is 
    274  1.1     is 	divu.w		&0x0,%d0		# force a divbyzero exception
    275  1.1     is 	rts
    276  1.1     is 
    277  1.1     is ###########################################################################
    278  1.1     is #########################################################################
    279  1.1     is # This routine uses the 'classical' Algorithm D from Donald Knuth's	#
    280  1.1     is # Art of Computer Programming, vol II, Seminumerical Algorithms.	#
    281  1.1     is # For this implementation b=2**16, and the target is U1U2U3U4/V1V2,	#
    282  1.1     is # where U,V are words of the quadword dividend and longword divisor,	#
    283  1.1     is # and U1, V1 are the most significant words.				#
    284  1.1     is # 									#
    285  1.1     is # The most sig. longword of the 64 bit dividend must be in %d5, least 	#
    286  1.1     is # in %d6. The divisor must be in the variable ddivisor, and the		#
    287  1.1     is # signed/unsigned flag ddusign must be set (0=unsigned,1=signed).	#
    288  1.1     is # The quotient is returned in %d6, remainder in %d5, unless the		#
    289  1.1     is # v (overflow) bit is set in the saved %ccr. If overflow, the dividend	#
    290  1.1     is # is unchanged.								#
    291  1.1     is #########################################################################
    292  1.1     is ldclassical:
    293  1.1     is # if the divisor msw is 0, use simpler algorithm then the full blown
    294  1.1     is # one at ddknuth:
    295  1.1     is 
    296  1.1     is 	cmpi.l		%d7, &0xffff
    297  1.1     is 	bhi.b		lddknuth		# go use D. Knuth algorithm
    298  1.1     is 
    299  1.1     is # Since the divisor is only a word (and larger than the mslw of the dividend),
    300  1.1     is # a simpler algorithm may be used :
    301  1.1     is # In the general case, four quotient words would be created by
    302  1.1     is # dividing the divisor word into each dividend word. In this case,
    303  1.1     is # the first two quotient words must be zero, or overflow would occur.
    304  1.1     is # Since we already checked this case above, we can treat the most significant
    305  1.1     is # longword of the dividend as (0) remainder (see Knuth) and merely complete
    306  1.1     is # the last two divisions to get a quotient longword and word remainder:
    307  1.1     is 
    308  1.1     is 	clr.l		%d1
    309  1.1     is 	swap		%d5			# same as r*b if previous step rqd
    310  1.1     is 	swap		%d6			# get u3 to lsw position
    311  1.1     is 	mov.w		%d6, %d5		# rb + u3
    312  1.1     is 
    313  1.1     is 	divu.w		%d7, %d5
    314  1.1     is 
    315  1.1     is 	mov.w		%d5, %d1		# first quotient word
    316  1.1     is 	swap		%d6			# get u4
    317  1.1     is 	mov.w		%d6, %d5		# rb + u4
    318  1.1     is 
    319  1.1     is 	divu.w		%d7, %d5
    320  1.1     is 
    321  1.1     is 	swap		%d1
    322  1.1     is 	mov.w		%d5, %d1		# 2nd quotient 'digit'
    323  1.1     is 	clr.w		%d5
    324  1.1     is 	swap		%d5			# now remainder
    325  1.1     is 	mov.l		%d1, %d6		# and quotient
    326  1.1     is 
    327  1.1     is 	rts
    328  1.1     is 
    329  1.1     is lddknuth:
    330  1.1     is # In this algorithm, the divisor is treated as a 2 digit (word) number
    331  1.1     is # which is divided into a 3 digit (word) dividend to get one quotient
    332  1.1     is # digit (word). After subtraction, the dividend is shifted and the
    333  1.1     is # process repeated. Before beginning, the divisor and quotient are
    334  1.1     is # 'normalized' so that the process of estimating the quotient digit
    335  1.1     is # will yield verifiably correct results..
    336  1.1     is 
    337  1.1     is 	clr.l		DDNORMAL(%a6)		# count of shifts for normalization
    338  1.1     is 	clr.b		DDSECOND(%a6)		# clear flag for quotient digits
    339  1.1     is 	clr.l		%d1			# %d1 will hold trial quotient
    340  1.1     is lddnchk:
    341  1.1     is 	btst		&31, %d7		# must we normalize? first word of
    342  1.1     is 	bne.b		lddnormalized		# divisor (V1) must be >= 65536/2
    343  1.1     is 	addq.l		&0x1, DDNORMAL(%a6)	# count normalization shifts
    344  1.1     is 	lsl.l		&0x1, %d7		# shift the divisor
    345  1.1     is 	lsl.l		&0x1, %d6		# shift u4,u3 with overflow to u2
    346  1.1     is 	roxl.l		&0x1, %d5		# shift u1,u2
    347  1.1     is 	bra.w		lddnchk
    348  1.1     is lddnormalized:
    349  1.1     is 
    350  1.1     is # Now calculate an estimate of the quotient words (msw first, then lsw).
    351  1.1     is # The comments use subscripts for the first quotient digit determination.
    352  1.1     is 	mov.l		%d7, %d3		# divisor
    353  1.1     is 	mov.l		%d5, %d2		# dividend mslw
    354  1.1     is 	swap		%d2
    355  1.1     is 	swap		%d3
    356  1.1     is 	cmp.w	 	%d2, %d3		# V1 = U1 ?
    357  1.1     is 	bne.b		lddqcalc1
    358  1.1     is 	mov.w		&0xffff, %d1		# use max trial quotient word
    359  1.1     is 	bra.b		lddadj0
    360  1.1     is lddqcalc1:
    361  1.1     is 	mov.l		%d5, %d1
    362  1.1     is 
    363  1.1     is 	divu.w		%d3, %d1		# use quotient of mslw/msw
    364  1.1     is 
    365  1.1     is 	andi.l		&0x0000ffff, %d1	# zero any remainder
    366  1.1     is lddadj0:
    367  1.1     is 
    368  1.1     is # now test the trial quotient and adjust. This step plus the
    369  1.1     is # normalization assures (according to Knuth) that the trial
    370  1.1     is # quotient will be at worst 1 too large.
    371  1.1     is 	mov.l		%d6, -(%sp)
    372  1.1     is 	clr.w		%d6			# word u3 left
    373  1.1     is 	swap		%d6			# in lsw position
    374  1.1     is lddadj1: mov.l		%d7, %d3
    375  1.1     is 	mov.l		%d1, %d2
    376  1.1     is 	mulu.w		%d7, %d2		# V2q
    377  1.1     is 	swap		%d3
    378  1.1     is 	mulu.w		%d1, %d3		# V1q
    379  1.1     is 	mov.l		%d5, %d4		# U1U2
    380  1.1     is 	sub.l		%d3, %d4		# U1U2 - V1q
    381  1.1     is 
    382  1.1     is 	swap		%d4
    383  1.1     is 
    384  1.1     is 	mov.w		%d4,%d0
    385  1.1     is 	mov.w		%d6,%d4			# insert lower word (U3)
    386  1.1     is 
    387  1.1     is 	tst.w		%d0			# is upper word set?
    388  1.1     is 	bne.w		lddadjd1
    389  1.1     is 
    390  1.1     is #	add.l		%d6, %d4		# (U1U2 - V1q) + U3
    391  1.1     is 
    392  1.1     is 	cmp.l	 	%d2, %d4
    393  1.1     is 	bls.b		lddadjd1		# is V2q > (U1U2-V1q) + U3 ?
    394  1.1     is 	subq.l		&0x1, %d1		# yes, decrement and recheck
    395  1.1     is 	bra.b		lddadj1
    396  1.1     is lddadjd1:
    397  1.1     is # now test the word by multiplying it by the divisor (V1V2) and comparing
    398  1.1     is # the 3 digit (word) result with the current dividend words
    399  1.1     is 	mov.l		%d5, -(%sp)		# save %d5 (%d6 already saved)
    400  1.1     is 	mov.l		%d1, %d6
    401  1.1     is 	swap		%d6			# shift answer to ms 3 words
    402  1.1     is 	mov.l		%d7, %d5
    403  1.1     is 	bsr.l		ldmm2
    404  1.1     is 	mov.l		%d5, %d2		# now %d2,%d3 are trial*divisor
    405  1.1     is 	mov.l		%d6, %d3
    406  1.1     is 	mov.l		(%sp)+, %d5		# restore dividend
    407  1.1     is 	mov.l		(%sp)+, %d6
    408  1.1     is 	sub.l		%d3, %d6
    409  1.1     is 	subx.l		%d2, %d5		# subtract double precision
    410  1.1     is 	bcc		ldd2nd			# no carry, do next quotient digit
    411  1.1     is 	subq.l		&0x1, %d1		# q is one too large
    412  1.1     is # need to add back divisor longword to current ms 3 digits of dividend
    413  1.1     is # - according to Knuth, this is done only 2 out of 65536 times for random
    414  1.1     is # divisor, dividend selection.
    415  1.1     is 	clr.l		%d2
    416  1.1     is 	mov.l		%d7, %d3
    417  1.1     is 	swap		%d3
    418  1.1     is 	clr.w		%d3			# %d3 now ls word of divisor
    419  1.1     is 	add.l		%d3, %d6		# aligned with 3rd word of dividend
    420  1.1     is 	addx.l		%d2, %d5
    421  1.1     is 	mov.l		%d7, %d3
    422  1.1     is 	clr.w		%d3			# %d3 now ms word of divisor
    423  1.1     is 	swap		%d3			# aligned with 2nd word of dividend
    424  1.1     is 	add.l		%d3, %d5
    425  1.1     is ldd2nd:
    426  1.1     is 	tst.b		DDSECOND(%a6)	# both q words done?
    427  1.1     is 	bne.b		lddremain
    428  1.1     is # first quotient digit now correct. store digit and shift the
    429  1.1     is # (subtracted) dividend
    430  1.1     is 	mov.w		%d1, DDQUOTIENT(%a6)
    431  1.1     is 	clr.l		%d1
    432  1.1     is 	swap		%d5
    433  1.1     is 	swap		%d6
    434  1.1     is 	mov.w		%d6, %d5
    435  1.1     is 	clr.w		%d6
    436  1.1     is 	st		DDSECOND(%a6)		# second digit
    437  1.1     is 	bra.w		lddnormalized
    438  1.1     is lddremain:
    439  1.1     is # add 2nd word to quotient, get the remainder.
    440  1.1     is 	mov.w 		%d1, DDQUOTIENT+2(%a6)
    441  1.1     is # shift down one word/digit to renormalize remainder.
    442  1.1     is 	mov.w		%d5, %d6
    443  1.1     is 	swap		%d6
    444  1.1     is 	swap		%d5
    445  1.1     is 	mov.l		DDNORMAL(%a6), %d7	# get norm shift count
    446  1.1     is 	beq.b		lddrn
    447  1.1     is 	subq.l		&0x1, %d7		# set for loop count
    448  1.1     is lddnlp:
    449  1.1     is 	lsr.l		&0x1, %d5		# shift into %d6
    450  1.1     is 	roxr.l		&0x1, %d6
    451  1.1     is 	dbf		%d7, lddnlp
    452  1.1     is lddrn:
    453  1.1     is 	mov.l		%d6, %d5		# remainder
    454  1.1     is 	mov.l		DDQUOTIENT(%a6), %d6 	# quotient
    455  1.1     is 
    456  1.1     is 	rts
    457  1.1     is ldmm2:
    458  1.1     is # factors for the 32X32->64 multiplication are in %d5 and %d6.
    459  1.1     is # returns 64 bit result in %d5 (hi) %d6(lo).
    460  1.1     is # destroys %d2,%d3,%d4.
    461  1.1     is 
    462  1.1     is # multiply hi,lo words of each factor to get 4 intermediate products
    463  1.1     is 	mov.l		%d6, %d2
    464  1.1     is 	mov.l		%d6, %d3
    465  1.1     is 	mov.l		%d5, %d4
    466  1.1     is 	swap		%d3
    467  1.1     is 	swap		%d4
    468  1.1     is 	mulu.w		%d5, %d6		# %d6 <- lsw*lsw
    469  1.1     is 	mulu.w		%d3, %d5		# %d5 <- msw-dest*lsw-source
    470  1.1     is 	mulu.w		%d4, %d2		# %d2 <- msw-source*lsw-dest
    471  1.1     is 	mulu.w		%d4, %d3		# %d3 <- msw*msw
    472  1.1     is # now use swap and addx to consolidate to two longwords
    473  1.1     is 	clr.l		%d4
    474  1.1     is 	swap		%d6
    475  1.1     is 	add.w		%d5, %d6		# add msw of l*l to lsw of m*l product
    476  1.1     is 	addx.w		%d4, %d3		# add any carry to m*m product
    477  1.1     is 	add.w		%d2, %d6		# add in lsw of other m*l product
    478  1.1     is 	addx.w		%d4, %d3		# add any carry to m*m product
    479  1.1     is 	swap		%d6			# %d6 is low 32 bits of final product
    480  1.1     is 	clr.w		%d5
    481  1.1     is 	clr.w		%d2			# lsw of two mixed products used,
    482  1.1     is 	swap		%d5			# now use msws of longwords
    483  1.1     is 	swap		%d2
    484  1.1     is 	add.l		%d2, %d5
    485  1.1     is 	add.l		%d3, %d5	# %d5 now ms 32 bits of final product
    486  1.1     is 	rts
    487  1.1     is 
    488  1.1     is #########################################################################
    489  1.1     is # XDEF ****************************************************************	#
    490  1.1     is #	_060LSP__imulu64_(): Emulate 64-bit unsigned mul instruction	#
    491  1.1     is #	_060LSP__imuls64_(): Emulate 64-bit signed mul instruction.	#
    492  1.1     is #									#
    493  1.1     is #	This is the library version which is accessed as a subroutine	#
    494  1.1     is #	and therefore does not work exactly like the 680X0 mul{s,u}.l	#
    495  1.1     is #	64-bit multiply instruction.					#
    496  1.1     is #									#
    497  1.1     is # XREF ****************************************************************	#
    498  1.1     is #	None								#
    499  1.1     is #									#
    500  1.1     is # INPUT ***************************************************************	#
    501  1.1     is #	0x4(sp) = multiplier						#
    502  1.1     is #	0x8(sp) = multiplicand						#
    503  1.1     is #	0xc(sp) = pointer to location to place 64-bit result		#
    504  1.1     is # 									#
    505  1.1     is # OUTPUT **************************************************************	#
    506  1.1     is #	0xc(sp) = points to location of 64-bit result			#
    507  1.1     is #									#
    508  1.1     is # ALGORITHM ***********************************************************	#
    509  1.1     is #	Perform the multiply in pieces using 16x16->32 unsigned		#
    510  1.1     is # multiplies and "add" instructions.					#
    511  1.1     is #	Set the condition codes as appropriate before performing an	#
    512  1.1     is # "rts".								#
    513  1.1     is #									#
    514  1.1     is #########################################################################
    515  1.1     is 
    516  1.1     is set MUL64_CC, -4
    517  1.1     is 
    518  1.1     is 	global		_060LSP__imulu64_
    519  1.1     is _060LSP__imulu64_:
    520  1.1     is 
    521  1.1     is # PROLOGUE BEGIN ########################################################
    522  1.1     is 	link.w		%a6,&-4
    523  1.1     is 	movm.l		&0x3800,-(%sp)		# save d2-d4
    524  1.1     is #	fmovm.l		&0x0,-(%sp)		# save no fpregs
    525  1.1     is # PROLOGUE END ##########################################################
    526  1.1     is 
    527  1.1     is 	mov.w		%cc,MUL64_CC(%a6)	# save incomming ccodes
    528  1.1     is 
    529  1.1     is 	mov.l		0x8(%a6),%d0		# store multiplier in d0
    530  1.1     is 	beq.w		mulu64_zero		# handle zero separately
    531  1.1     is 
    532  1.1     is 	mov.l		0xc(%a6),%d1		# get multiplicand in d1
    533  1.1     is 	beq.w		mulu64_zero		# handle zero separately
    534  1.1     is 
    535  1.1     is #########################################################################
    536  1.1     is #	63			   32				0	#
    537  1.1     is # 	----------------------------					#
    538  1.1     is # 	| hi(mplier) * hi(mplicand)|					#
    539  1.1     is # 	----------------------------					#
    540  1.1     is #		     -----------------------------			#
    541  1.1     is #		     | hi(mplier) * lo(mplicand) |			#
    542  1.1     is #		     -----------------------------			#
    543  1.1     is #		     -----------------------------			#
    544  1.1     is #		     | lo(mplier) * hi(mplicand) |			#
    545  1.1     is #		     -----------------------------			#
    546  1.1     is #	  |			   -----------------------------	#
    547  1.1     is #	--|--			   | lo(mplier) * lo(mplicand) |	#
    548  1.1     is #	  |			   -----------------------------	#
    549  1.1     is #	========================================================	#
    550  1.1     is #	--------------------------------------------------------	#
    551  1.1     is #	|	hi(result)	   |	    lo(result)         |	#
    552  1.1     is #	--------------------------------------------------------	#
    553  1.1     is #########################################################################
    554  1.1     is mulu64_alg:
    555  1.1     is # load temp registers with operands
    556  1.1     is 	mov.l		%d0,%d2			# mr in d2
    557  1.1     is 	mov.l		%d0,%d3			# mr in d3
    558  1.1     is 	mov.l		%d1,%d4			# md in d4
    559  1.1     is 	swap		%d3			# hi(mr) in lo d3
    560  1.1     is 	swap		%d4			# hi(md) in lo d4
    561  1.1     is 
    562  1.1     is # complete necessary multiplies:
    563  1.1     is 	mulu.w		%d1,%d0			# [1] lo(mr) * lo(md)
    564  1.1     is 	mulu.w		%d3,%d1			# [2] hi(mr) * lo(md)
    565  1.1     is 	mulu.w		%d4,%d2			# [3] lo(mr) * hi(md)
    566  1.1     is 	mulu.w		%d4,%d3			# [4] hi(mr) * hi(md)
    567  1.1     is 
    568  1.1     is # add lo portions of [2],[3] to hi portion of [1].
    569  1.1     is # add carries produced from these adds to [4].
    570  1.1     is # lo([1]) is the final lo 16 bits of the result.
    571  1.1     is 	clr.l		%d4			# load d4 w/ zero value
    572  1.1     is 	swap		%d0			# hi([1]) <==> lo([1])
    573  1.1     is 	add.w		%d1,%d0			# hi([1]) + lo([2])
    574  1.1     is 	addx.l		%d4,%d3			#    [4]  + carry
    575  1.1     is 	add.w		%d2,%d0			# hi([1]) + lo([3])
    576  1.1     is 	addx.l		%d4,%d3			#    [4]  + carry
    577  1.1     is 	swap		%d0			# lo([1]) <==> hi([1])
    578  1.1     is 
    579  1.1     is # lo portions of [2],[3] have been added in to final result.
    580  1.1     is # now, clear lo, put hi in lo reg, and add to [4]
    581  1.1     is 	clr.w		%d1			# clear lo([2])
    582  1.1     is 	clr.w		%d2			# clear hi([3])
    583  1.1     is 	swap		%d1			# hi([2]) in lo d1
    584  1.1     is 	swap		%d2			# hi([3]) in lo d2
    585  1.1     is 	add.l		%d2,%d1			#    [4]  + hi([2])
    586  1.1     is 	add.l		%d3,%d1			#    [4]  + hi([3])
    587  1.1     is 
    588  1.1     is # now, grab the condition codes. only one that can be set is 'N'.
    589  1.1     is # 'N' CAN be set if the operation is unsigned if bit 63 is set.
    590  1.1     is 	mov.w		MUL64_CC(%a6),%d4
    591  1.1     is 	andi.b		&0x10,%d4		# keep old 'X' bit
    592  1.1     is 	tst.l		%d1			# may set 'N' bit
    593  1.1     is 	bpl.b		mulu64_ddone
    594  1.1     is 	ori.b		&0x8,%d4		# set 'N' bit
    595  1.1     is mulu64_ddone:
    596  1.1     is 	mov.w		%d4,%cc
    597  1.1     is 
    598  1.1     is # here, the result is in d1 and d0. the current strategy is to save
    599  1.1     is # the values at the location pointed to by a0.
    600  1.1     is # use movm here to not disturb the condition codes.
    601  1.1     is mulu64_end:
    602  1.1     is 	exg		%d1,%d0
    603  1.1     is 	movm.l		&0x0003,([0x10,%a6])		# save result
    604  1.1     is 
    605  1.1     is # EPILOGUE BEGIN ########################################################
    606  1.1     is #	fmovm.l		(%sp)+,&0x0		# restore no fpregs
    607  1.1     is 	movm.l		(%sp)+,&0x001c		# restore d2-d4
    608  1.1     is 	unlk		%a6
    609  1.1     is # EPILOGUE END ##########################################################
    610  1.1     is 
    611  1.1     is 	rts
    612  1.1     is 
    613  1.1     is # one or both of the operands is zero so the result is also zero.
    614  1.1     is # save the zero result to the register file and set the 'Z' ccode bit.
    615  1.1     is mulu64_zero:
    616  1.1     is 	clr.l		%d0
    617  1.1     is 	clr.l		%d1
    618  1.1     is 
    619  1.1     is 	mov.w		MUL64_CC(%a6),%d4
    620  1.1     is 	andi.b		&0x10,%d4
    621  1.1     is 	ori.b		&0x4,%d4
    622  1.1     is 	mov.w		%d4,%cc			# set 'Z' ccode bit
    623  1.1     is 
    624  1.1     is 	bra.b		mulu64_end
    625  1.1     is 
    626  1.1     is ##########
    627  1.1     is # muls.l #
    628  1.1     is ##########
    629  1.1     is 	global		_060LSP__imuls64_
    630  1.1     is _060LSP__imuls64_:
    631  1.1     is 
    632  1.1     is # PROLOGUE BEGIN ########################################################
    633  1.1     is 	link.w		%a6,&-4
    634  1.1     is 	movm.l		&0x3c00,-(%sp)		# save d2-d5
    635  1.1     is #	fmovm.l		&0x0,-(%sp)		# save no fpregs
    636  1.1     is # PROLOGUE END ##########################################################
    637  1.1     is 
    638  1.1     is 	mov.w		%cc,MUL64_CC(%a6)	# save incomming ccodes
    639  1.1     is 
    640  1.1     is 	mov.l		0x8(%a6),%d0		# store multiplier in d0
    641  1.1     is 	beq.b		mulu64_zero		# handle zero separately
    642  1.1     is 
    643  1.1     is 	mov.l		0xc(%a6),%d1		# get multiplicand in d1
    644  1.1     is 	beq.b		mulu64_zero		# handle zero separately
    645  1.1     is 
    646  1.1     is 	clr.b		%d5			# clear sign tag
    647  1.1     is 	tst.l		%d0			# is multiplier negative?
    648  1.1     is 	bge.b		muls64_chk_md_sgn	# no
    649  1.1     is 	neg.l		%d0			# make multiplier positive
    650  1.1     is 
    651  1.1     is 	ori.b		&0x1,%d5		# save multiplier sgn
    652  1.1     is 
    653  1.1     is # the result sign is the exclusive or of the operand sign bits.
    654  1.1     is muls64_chk_md_sgn:
    655  1.1     is 	tst.l		%d1			# is multiplicand negative?
    656  1.1     is 	bge.b		muls64_alg		# no
    657  1.1     is 	neg.l		%d1			# make multiplicand positive
    658  1.1     is 
    659  1.1     is 	eori.b		&0x1,%d5		# calculate correct sign
    660  1.1     is 
    661  1.1     is #########################################################################
    662  1.1     is #	63			   32				0	#
    663  1.1     is # 	----------------------------					#
    664  1.1     is # 	| hi(mplier) * hi(mplicand)|					#
    665  1.1     is # 	----------------------------					#
    666  1.1     is #		     -----------------------------			#
    667  1.1     is #		     | hi(mplier) * lo(mplicand) |			#
    668  1.1     is #		     -----------------------------			#
    669  1.1     is #		     -----------------------------			#
    670  1.1     is #		     | lo(mplier) * hi(mplicand) |			#
    671  1.1     is #		     -----------------------------			#
    672  1.1     is #	  |			   -----------------------------	#
    673  1.1     is #	--|--			   | lo(mplier) * lo(mplicand) |	#
    674  1.1     is #	  |			   -----------------------------	#
    675  1.1     is #	========================================================	#
    676  1.1     is #	--------------------------------------------------------	#
    677  1.1     is #	|	hi(result)	   |	    lo(result)         |	#
    678  1.1     is #	--------------------------------------------------------	#
    679  1.1     is #########################################################################
    680  1.1     is muls64_alg:
    681  1.1     is # load temp registers with operands
    682  1.1     is 	mov.l		%d0,%d2			# mr in d2
    683  1.1     is 	mov.l		%d0,%d3			# mr in d3
    684  1.1     is 	mov.l		%d1,%d4			# md in d4
    685  1.1     is 	swap		%d3			# hi(mr) in lo d3
    686  1.1     is 	swap		%d4			# hi(md) in lo d4
    687  1.1     is 
    688  1.1     is # complete necessary multiplies:
    689  1.1     is 	mulu.w		%d1,%d0			# [1] lo(mr) * lo(md)
    690  1.1     is 	mulu.w		%d3,%d1			# [2] hi(mr) * lo(md)
    691  1.1     is 	mulu.w		%d4,%d2			# [3] lo(mr) * hi(md)
    692  1.1     is 	mulu.w		%d4,%d3			# [4] hi(mr) * hi(md)
    693  1.1     is 
    694  1.1     is # add lo portions of [2],[3] to hi portion of [1].
    695  1.1     is # add carries produced from these adds to [4].
    696  1.1     is # lo([1]) is the final lo 16 bits of the result.
    697  1.1     is 	clr.l		%d4			# load d4 w/ zero value
    698  1.1     is 	swap		%d0			# hi([1]) <==> lo([1])
    699  1.1     is 	add.w		%d1,%d0			# hi([1]) + lo([2])
    700  1.1     is 	addx.l		%d4,%d3			#    [4]  + carry
    701  1.1     is 	add.w		%d2,%d0			# hi([1]) + lo([3])
    702  1.1     is 	addx.l		%d4,%d3			#    [4]  + carry
    703  1.1     is 	swap		%d0			# lo([1]) <==> hi([1])
    704  1.1     is 
    705  1.1     is # lo portions of [2],[3] have been added in to final result.
    706  1.1     is # now, clear lo, put hi in lo reg, and add to [4]
    707  1.1     is 	clr.w		%d1			# clear lo([2])
    708  1.1     is 	clr.w		%d2			# clear hi([3])
    709  1.1     is 	swap		%d1			# hi([2]) in lo d1
    710  1.1     is 	swap		%d2			# hi([3]) in lo d2
    711  1.1     is 	add.l		%d2,%d1			#    [4]  + hi([2])
    712  1.1     is 	add.l		%d3,%d1			#    [4]  + hi([3])
    713  1.1     is 
    714  1.1     is 	tst.b		%d5			# should result be signed?
    715  1.1     is 	beq.b		muls64_done		# no
    716  1.1     is 
    717  1.1     is # result should be a signed negative number.
    718  1.1     is # compute 2's complement of the unsigned number:
    719  1.1     is #   -negate all bits and add 1
    720  1.1     is muls64_neg:
    721  1.1     is 	not.l		%d0			# negate lo(result) bits
    722  1.1     is 	not.l		%d1			# negate hi(result) bits
    723  1.1     is 	addq.l		&1,%d0			# add 1 to lo(result)
    724  1.1     is 	addx.l		%d4,%d1			# add carry to hi(result)
    725  1.1     is 
    726  1.1     is muls64_done:
    727  1.1     is 	mov.w		MUL64_CC(%a6),%d4
    728  1.1     is 	andi.b		&0x10,%d4		# keep old 'X' bit
    729  1.1     is 	tst.l		%d1			# may set 'N' bit
    730  1.1     is 	bpl.b		muls64_ddone
    731  1.1     is 	ori.b		&0x8,%d4		# set 'N' bit
    732  1.1     is muls64_ddone:
    733  1.1     is 	mov.w		%d4,%cc
    734  1.1     is 
    735  1.1     is # here, the result is in d1 and d0. the current strategy is to save
    736  1.1     is # the values at the location pointed to by a0.
    737  1.1     is # use movm here to not disturb the condition codes.
    738  1.1     is muls64_end:
    739  1.1     is 	exg		%d1,%d0
    740  1.1     is 	movm.l		&0x0003,([0x10,%a6])	# save result at (a0)
    741  1.1     is 
    742  1.1     is # EPILOGUE BEGIN ########################################################
    743  1.1     is #	fmovm.l		(%sp)+,&0x0		# restore no fpregs
    744  1.1     is 	movm.l		(%sp)+,&0x003c		# restore d2-d5
    745  1.1     is 	unlk		%a6
    746  1.1     is # EPILOGUE END ##########################################################
    747  1.1     is 
    748  1.1     is 	rts
    749  1.1     is 
    750  1.1     is # one or both of the operands is zero so the result is also zero.
    751  1.1     is # save the zero result to the register file and set the 'Z' ccode bit.
    752  1.1     is muls64_zero:
    753  1.1     is 	clr.l		%d0
    754  1.1     is 	clr.l		%d1
    755  1.1     is 
    756  1.1     is 	mov.w		MUL64_CC(%a6),%d4
    757  1.1     is 	andi.b		&0x10,%d4
    758  1.1     is 	ori.b		&0x4,%d4
    759  1.1     is 	mov.w		%d4,%cc			# set 'Z' ccode bit
    760  1.1     is 
    761  1.1     is 	bra.b		muls64_end
    762  1.1     is 
    763  1.1     is #########################################################################
    764  1.1     is # XDEF ****************************************************************	#
    765  1.1     is #	_060LSP__cmp2_Ab_(): Emulate "cmp2.b An,<ea>".			#
    766  1.1     is #	_060LSP__cmp2_Aw_(): Emulate "cmp2.w An,<ea>".			#
    767  1.1     is #	_060LSP__cmp2_Al_(): Emulate "cmp2.l An,<ea>".			#
    768  1.1     is #	_060LSP__cmp2_Db_(): Emulate "cmp2.b Dn,<ea>".			#
    769  1.1     is #	_060LSP__cmp2_Dw_(): Emulate "cmp2.w Dn,<ea>".			#
    770  1.1     is #	_060LSP__cmp2_Dl_(): Emulate "cmp2.l Dn,<ea>".			#
    771  1.1     is #									#
    772  1.1     is #	This is the library version which is accessed as a subroutine	#
    773  1.1     is #	and therefore does not work exactly like the 680X0 "cmp2"	#
    774  1.1     is #	instruction.							#
    775  1.1     is #									#
    776  1.1     is # XREF ****************************************************************	#
    777  1.1     is #	None								#
    778  1.1     is #									#
    779  1.1     is # INPUT ***************************************************************	#
    780  1.1     is #	0x4(sp) = Rn							#
    781  1.1     is #	0x8(sp) = pointer to boundary pair				#
    782  1.1     is # 									#
    783  1.1     is # OUTPUT **************************************************************	#
    784  1.1     is #	cc = condition codes are set correctly				#
    785  1.1     is #									#
    786  1.1     is # ALGORITHM ***********************************************************	#
    787  1.1     is # 	In the interest of simplicity, all operands are converted to	#
    788  1.1     is # longword size whether the operation is byte, word, or long. The	#
    789  1.2  kamil # bounds are sign extended accordingly. If Rn is a data register, Rn is #
    790  1.1     is # also sign extended. If Rn is an address register, it need not be sign #
    791  1.1     is # extended since the full register is always used.			#
    792  1.1     is #	The condition codes are set correctly before the final "rts".	#
    793  1.1     is #									#
    794  1.1     is #########################################################################
    795  1.1     is 
    796  1.1     is set	CMP2_CC,	-4
    797  1.1     is 
    798  1.1     is 	global 		_060LSP__cmp2_Ab_
    799  1.1     is _060LSP__cmp2_Ab_:
    800  1.1     is 
    801  1.1     is # PROLOGUE BEGIN ########################################################
    802  1.1     is 	link.w		%a6,&-4
    803  1.1     is 	movm.l		&0x3800,-(%sp)		# save d2-d4
    804  1.1     is #	fmovm.l		&0x0,-(%sp)		# save no fpregs
    805  1.1     is # PROLOGUE END ##########################################################
    806  1.1     is 
    807  1.1     is 	mov.w		%cc,CMP2_CC(%a6)
    808  1.1     is 	mov.l		0x8(%a6), %d2 		# get regval
    809  1.1     is 
    810  1.1     is 	mov.b		([0xc,%a6],0x0),%d0
    811  1.1     is 	mov.b		([0xc,%a6],0x1),%d1
    812  1.1     is 
    813  1.1     is 	extb.l		%d0			# sign extend lo bnd
    814  1.1     is 	extb.l		%d1			# sign extend hi bnd
    815  1.1     is 	bra.w		l_cmp2_cmp		# go do the compare emulation
    816  1.1     is 
    817  1.1     is 	global 		_060LSP__cmp2_Aw_
    818  1.1     is _060LSP__cmp2_Aw_:
    819  1.1     is 
    820  1.1     is # PROLOGUE BEGIN ########################################################
    821  1.1     is 	link.w		%a6,&-4
    822  1.1     is 	movm.l		&0x3800,-(%sp)		# save d2-d4
    823  1.1     is #	fmovm.l		&0x0,-(%sp)		# save no fpregs
    824  1.1     is # PROLOGUE END ##########################################################
    825  1.1     is 
    826  1.1     is 	mov.w		%cc,CMP2_CC(%a6)
    827  1.1     is 	mov.l		0x8(%a6), %d2 		# get regval
    828  1.1     is 
    829  1.1     is 	mov.w		([0xc,%a6],0x0),%d0
    830  1.1     is 	mov.w		([0xc,%a6],0x2),%d1
    831  1.1     is 
    832  1.1     is 	ext.l		%d0			# sign extend lo bnd
    833  1.1     is 	ext.l		%d1			# sign extend hi bnd
    834  1.1     is 	bra.w		l_cmp2_cmp		# go do the compare emulation
    835  1.1     is 
    836  1.1     is 	global 		_060LSP__cmp2_Al_
    837  1.1     is _060LSP__cmp2_Al_:
    838  1.1     is 
    839  1.1     is # PROLOGUE BEGIN ########################################################
    840  1.1     is 	link.w		%a6,&-4
    841  1.1     is 	movm.l		&0x3800,-(%sp)		# save d2-d4
    842  1.1     is #	fmovm.l		&0x0,-(%sp)		# save no fpregs
    843  1.1     is # PROLOGUE END ##########################################################
    844  1.1     is 
    845  1.1     is 	mov.w		%cc,CMP2_CC(%a6)
    846  1.1     is 	mov.l		0x8(%a6), %d2 		# get regval
    847  1.1     is 
    848  1.1     is 	mov.l		([0xc,%a6],0x0),%d0
    849  1.1     is 	mov.l		([0xc,%a6],0x4),%d1
    850  1.1     is 	bra.w		l_cmp2_cmp		# go do the compare emulation
    851  1.1     is 
    852  1.1     is 	global 		_060LSP__cmp2_Db_
    853  1.1     is _060LSP__cmp2_Db_:
    854  1.1     is 
    855  1.1     is # PROLOGUE BEGIN ########################################################
    856  1.1     is 	link.w		%a6,&-4
    857  1.1     is 	movm.l		&0x3800,-(%sp)		# save d2-d4
    858  1.1     is #	fmovm.l		&0x0,-(%sp)		# save no fpregs
    859  1.1     is # PROLOGUE END ##########################################################
    860  1.1     is 
    861  1.1     is 	mov.w		%cc,CMP2_CC(%a6)
    862  1.1     is 	mov.l		0x8(%a6), %d2 		# get regval
    863  1.1     is 
    864  1.1     is 	mov.b		([0xc,%a6],0x0),%d0
    865  1.1     is 	mov.b		([0xc,%a6],0x1),%d1
    866  1.1     is 
    867  1.1     is 	extb.l		%d0			# sign extend lo bnd
    868  1.1     is 	extb.l		%d1			# sign extend hi bnd
    869  1.1     is 
    870  1.1     is # operation is a data register compare.
    871  1.1     is # sign extend byte to long so we can do simple longword compares.
    872  1.1     is 	extb.l		%d2			# sign extend data byte
    873  1.1     is 	bra.w		l_cmp2_cmp		# go do the compare emulation
    874  1.1     is 
    875  1.1     is 	global 		_060LSP__cmp2_Dw_
    876  1.1     is _060LSP__cmp2_Dw_:
    877  1.1     is 
    878  1.1     is # PROLOGUE BEGIN ########################################################
    879  1.1     is 	link.w		%a6,&-4
    880  1.1     is 	movm.l		&0x3800,-(%sp)		# save d2-d4
    881  1.1     is #	fmovm.l		&0x0,-(%sp)		# save no fpregs
    882  1.1     is # PROLOGUE END ##########################################################
    883  1.1     is 
    884  1.1     is 	mov.w		%cc,CMP2_CC(%a6)
    885  1.1     is 	mov.l		0x8(%a6), %d2 		# get regval
    886  1.1     is 
    887  1.1     is 	mov.w		([0xc,%a6],0x0),%d0
    888  1.1     is 	mov.w		([0xc,%a6],0x2),%d1
    889  1.1     is 
    890  1.1     is 	ext.l		%d0			# sign extend lo bnd
    891  1.1     is 	ext.l		%d1			# sign extend hi bnd
    892  1.1     is 
    893  1.1     is # operation is a data register compare.
    894  1.1     is # sign extend word to long so we can do simple longword compares.
    895  1.1     is 	ext.l		%d2			# sign extend data word
    896  1.1     is 	bra.w		l_cmp2_cmp		# go emulate compare
    897  1.1     is 
    898  1.1     is 	global 		_060LSP__cmp2_Dl_
    899  1.1     is _060LSP__cmp2_Dl_:
    900  1.1     is 
    901  1.1     is # PROLOGUE BEGIN ########################################################
    902  1.1     is 	link.w		%a6,&-4
    903  1.1     is 	movm.l		&0x3800,-(%sp)		# save d2-d4
    904  1.1     is #	fmovm.l		&0x0,-(%sp)		# save no fpregs
    905  1.1     is # PROLOGUE END ##########################################################
    906  1.1     is 
    907  1.1     is 	mov.w		%cc,CMP2_CC(%a6)
    908  1.1     is 	mov.l		0x8(%a6), %d2 		# get regval
    909  1.1     is 
    910  1.1     is 	mov.l		([0xc,%a6],0x0),%d0
    911  1.1     is 	mov.l		([0xc,%a6],0x4),%d1
    912  1.1     is 
    913  1.1     is #
    914  1.1     is # To set the ccodes correctly:
    915  1.1     is # 	(1) save 'Z' bit from (Rn - lo)
    916  1.1     is #	(2) save 'Z' and 'N' bits from ((hi - lo) - (Rn - hi))
    917  1.1     is #	(3) keep 'X', 'N', and 'V' from before instruction
    918  1.1     is #	(4) combine ccodes
    919  1.1     is #
    920  1.1     is l_cmp2_cmp:
    921  1.1     is 	sub.l		%d0, %d2		# (Rn - lo)
    922  1.1     is 	mov.w		%cc, %d3		# fetch resulting ccodes
    923  1.1     is 	andi.b		&0x4, %d3		# keep 'Z' bit
    924  1.1     is 	sub.l		%d0, %d1		# (hi - lo)
    925  1.1     is 	cmp.l	 	%d1,%d2			# ((hi - lo) - (Rn - hi))
    926  1.1     is 
    927  1.1     is 	mov.w		%cc, %d4		# fetch resulting ccodes
    928  1.1     is 	or.b		%d4, %d3		# combine w/ earlier ccodes
    929  1.1     is 	andi.b		&0x5, %d3		# keep 'Z' and 'N'
    930  1.1     is 
    931  1.1     is 	mov.w		CMP2_CC(%a6), %d4	# fetch old ccodes
    932  1.1     is 	andi.b		&0x1a, %d4		# keep 'X','N','V' bits
    933  1.1     is 	or.b		%d3, %d4		# insert new ccodes
    934  1.1     is 	mov.w		%d4,%cc			# save new ccodes
    935  1.1     is 
    936  1.1     is # EPILOGUE BEGIN ########################################################
    937  1.1     is #	fmovm.l		(%sp)+,&0x0		# restore no fpregs
    938  1.1     is 	movm.l		(%sp)+,&0x001c		# restore d2-d4
    939  1.1     is 	unlk		%a6
    940  1.1     is # EPILOGUE END ##########################################################
    941  1.1     is 
    942  1.1     is 	rts
    943