Home | History | Annotate | Line # | Download | only in dist
      1 #
      2 # $NetBSD: ilsp.s,v 1.3 2022/05/28 21:14:56 andvar Exp $
      3 #
      4 
      5 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
      6 # MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
      7 # M68000 Hi-Performance Microprocessor Division
      8 # M68060 Software Package Production Release
      9 #
     10 # M68060 Software Package Copyright (C) 1993, 1994, 1995, 1996 Motorola Inc.
     11 # All rights reserved.
     12 #
     13 # THE SOFTWARE is provided on an "AS IS" basis and without warranty.
     14 # To the maximum extent permitted by applicable law,
     15 # MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
     16 # INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS
     17 # FOR A PARTICULAR PURPOSE and any warranty against infringement with
     18 # regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
     19 # and any accompanying written materials.
     20 #
     21 # To the maximum extent permitted by applicable law,
     22 # IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
     23 # (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS,
     24 # BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS)
     25 # ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE.
     26 #
     27 # Motorola assumes no responsibility for the maintenance and support
     28 # of the SOFTWARE.
     29 #
     30 # You are hereby granted a copyright license to use, modify, and distribute the
     31 # SOFTWARE so long as this entire notice is retained without alteration
     32 # in any modified and/or redistributed versions, and that such modified
     33 # versions are clearly identified as such.
     34 # No licenses are granted by implication, estoppel or otherwise under any
     35 # patents or trademarks of Motorola, Inc.
     36 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     37 
     38 #
     39 # litop.s:
     40 # 	This file is appended to the top of the 060FPLSP package
     41 # and contains the entry points into the package. The user, in
     42 # effect, branches to one of the branch table entries located here.
     43 #
     44 
     45 	bra.l	_060LSP__idivs64_
     46 	short	0x0000
     47 	bra.l	_060LSP__idivu64_
     48 	short	0x0000
     49 
     50 	bra.l	_060LSP__imuls64_
     51 	short	0x0000
     52 	bra.l	_060LSP__imulu64_
     53 	short	0x0000
     54 
     55 	bra.l	_060LSP__cmp2_Ab_
     56 	short	0x0000
     57 	bra.l	_060LSP__cmp2_Aw_
     58 	short	0x0000
     59 	bra.l	_060LSP__cmp2_Al_
     60 	short	0x0000
     61 	bra.l	_060LSP__cmp2_Db_
     62 	short	0x0000
     63 	bra.l	_060LSP__cmp2_Dw_
     64 	short	0x0000
     65 	bra.l	_060LSP__cmp2_Dl_
     66 	short	0x0000
     67 
     68 # leave room for future possible aditions.
     69 	align	0x200
     70 
     71 #########################################################################
     72 # XDEF ****************************************************************	#
     73 #	_060LSP__idivu64_(): Emulate 64-bit unsigned div instruction.	#
     74 #	_060LSP__idivs64_(): Emulate 64-bit signed div instruction.	#
     75 #									#
     76 #	This is the library version which is accessed as a subroutine	#
     77 # 	and therefore does not work exactly like the 680X0 div{s,u}.l	#
     78 #	64-bit divide instruction.					#
     79 #									#
     80 # XREF ****************************************************************	#
     81 #	None.								#
     82 #									#
     83 # INPUT ***************************************************************	#
     84 #	0x4(sp)  = divisor						#
     85 #	0x8(sp)  = hi(dividend)						#
     86 #	0xc(sp)  = lo(dividend)						#
     87 #	0x10(sp) = pointer to location to place quotient/remainder	#
     88 # 									#
     89 # OUTPUT **************************************************************	#
     90 #	0x10(sp) = points to location of remainder/quotient.		#
     91 #		   remainder is in first longword, quotient is in 2nd.	#
     92 #									#
     93 # ALGORITHM ***********************************************************	#
     94 #	If the operands are signed, make them unsigned and save the 	#
     95 # sign info for later. Separate out special cases like divide-by-zero	#
     96 # or 32-bit divides if possible. Else, use a special math algorithm	#
     97 # to calculate the result.						#
     98 #	Restore sign info if signed instruction. Set the condition 	#
     99 # codes before performing the final "rts". If the divisor was equal to	#
    100 # zero, then perform a divide-by-zero using a 16-bit implemented	#
    101 # divide instruction. This way, the operating system can record that	#
    102 # the event occurred even though it may not point to the correct place.	#
    103 #									#
    104 #########################################################################
    105 
    106 set	POSNEG,		-1
    107 set	NDIVISOR,	-2
    108 set	NDIVIDEND,	-3
    109 set	DDSECOND,	-4
    110 set	DDNORMAL,	-8
    111 set	DDQUOTIENT,	-12
    112 set	DIV64_CC,	-16
    113 
    114 ##########
    115 # divs.l #
    116 ##########
    117 	global		_060LSP__idivs64_
    118 _060LSP__idivs64_:
    119 # PROLOGUE BEGIN ########################################################
    120 	link.w		%a6,&-16
    121 	movm.l		&0x3f00,-(%sp)		# save d2-d7
    122 #	fmovm.l		&0x0,-(%sp)		# save no fpregs
    123 # PROLOGUE END ##########################################################
    124 
    125 	mov.w		%cc,DIV64_CC(%a6)
    126 	st		POSNEG(%a6)		# signed operation
    127 	bra.b		ldiv64_cont
    128 
    129 ##########
    130 # divu.l #
    131 ##########
    132 	global		_060LSP__idivu64_
    133 _060LSP__idivu64_:
    134 # PROLOGUE BEGIN ########################################################
    135 	link.w		%a6,&-16
    136 	movm.l		&0x3f00,-(%sp)		# save d2-d7
    137 #	fmovm.l		&0x0,-(%sp)		# save no fpregs
    138 # PROLOGUE END ##########################################################
    139 
    140 	mov.w		%cc,DIV64_CC(%a6)
    141 	sf		POSNEG(%a6)		# unsigned operation
    142 
    143 ldiv64_cont:
    144 	mov.l		0x8(%a6),%d7		# fetch divisor
    145 
    146 	beq.w		ldiv64eq0		# divisor is = 0!!!
    147 
    148 	mov.l		0xc(%a6), %d5 		# get dividend hi
    149 	mov.l		0x10(%a6), %d6 		# get dividend lo
    150 
    151 # separate signed and unsigned divide
    152 	tst.b		POSNEG(%a6)		# signed or unsigned?
    153 	beq.b		ldspecialcases		# use positive divide
    154 
    155 # save the sign of the divisor
    156 # make divisor unsigned if it's negative
    157 	tst.l		%d7			# chk sign of divisor
    158 	slt		NDIVISOR(%a6)		# save sign of divisor
    159 	bpl.b		ldsgndividend
    160 	neg.l		%d7			# complement negative divisor
    161 
    162 # save the sign of the dividend
    163 # make dividend unsigned if it's negative
    164 ldsgndividend:
    165 	tst.l		%d5			# chk sign of hi(dividend)
    166 	slt		NDIVIDEND(%a6)		# save sign of dividend
    167 	bpl.b		ldspecialcases
    168 
    169 	mov.w		&0x0, %cc		# clear 'X' cc bit
    170 	negx.l		%d6			# complement signed dividend
    171 	negx.l		%d5
    172 
    173 # extract some special cases:
    174 # 	- is (dividend == 0) ?
    175 #	- is (hi(dividend) == 0 && (divisor <= lo(dividend))) ? (32-bit div)
    176 ldspecialcases:
    177 	tst.l		%d5			# is (hi(dividend) == 0)
    178 	bne.b		ldnormaldivide		# no, so try it the long way
    179 
    180 	tst.l		%d6			# is (lo(dividend) == 0), too
    181 	beq.w		lddone			# yes, so (dividend == 0)
    182 
    183 	cmp.l	 	%d7,%d6			# is (divisor <= lo(dividend))
    184 	bls.b		ld32bitdivide		# yes, so use 32 bit divide
    185 
    186 	exg		%d5,%d6			# q = 0, r = dividend
    187 	bra.w		ldivfinish		# can't divide, we're done.
    188 
    189 ld32bitdivide:
    190 	tdivu.l		%d7, %d5:%d6		# it's only a 32/32 bit div!
    191 
    192 	bra.b		ldivfinish
    193 
    194 ldnormaldivide:
    195 # last special case:
    196 # 	- is hi(dividend) >= divisor ? if yes, then overflow
    197 	cmp.l		%d7,%d5
    198 	bls.b		lddovf			# answer won't fit in 32 bits
    199 
    200 # perform the divide algorithm:
    201 	bsr.l		ldclassical		# do int divide
    202 
    203 # separate into signed and unsigned finishes.
    204 ldivfinish:
    205 	tst.b		POSNEG(%a6)		# do divs, divu separately
    206 	beq.b		lddone			# divu has no processing!!!
    207 
    208 # it was a divs.l, so ccode setting is a little more complicated...
    209 	tst.b		NDIVIDEND(%a6)		# remainder has same sign
    210 	beq.b		ldcc			# as dividend.
    211 	neg.l		%d5			# sgn(rem) = sgn(dividend)
    212 ldcc:
    213 	mov.b		NDIVISOR(%a6), %d0
    214 	eor.b		%d0, NDIVIDEND(%a6)	# chk if quotient is negative
    215 	beq.b		ldqpos			# branch to quot positive
    216 
    217 # 0x80000000 is the largest number representable as a 32-bit negative
    218 # number. the negative of 0x80000000 is 0x80000000.
    219 	cmpi.l		%d6, &0x80000000	# will (-quot) fit in 32 bits?
    220 	bhi.b		lddovf
    221 
    222 	neg.l		%d6			# make (-quot) 2's comp
    223 
    224 	bra.b		lddone
    225 
    226 ldqpos:
    227 	btst		&0x1f, %d6		# will (+quot) fit in 32 bits?
    228 	bne.b		lddovf
    229 
    230 lddone:
    231 # if the register numbers are the same, only the quotient gets saved.
    232 # so, if we always save the quotient second, we save ourselves a cmp&beq
    233 	andi.w		&0x10,DIV64_CC(%a6)
    234 	mov.w		DIV64_CC(%a6),%cc
    235 	tst.l		%d6			# may set 'N' ccode bit
    236 
    237 # here, the result is in d1 and d0. the current strategy is to save
    238 # the values at the location pointed to by a0.
    239 # use movm here to not disturb the condition codes.
    240 ldexit:
    241 	movm.l		&0x0060,([0x14,%a6])	# save result
    242 
    243 # EPILOGUE BEGIN ########################################################
    244 #	fmovm.l		(%sp)+,&0x0		# restore no fpregs
    245 	movm.l		(%sp)+,&0x00fc		# restore d2-d7
    246 	unlk		%a6
    247 # EPILOGUE END ##########################################################
    248 
    249 	rts
    250 
    251 # the result should be the unchanged dividend
    252 lddovf:
    253 	mov.l		0xc(%a6), %d5 		# get dividend hi
    254 	mov.l		0x10(%a6), %d6 		# get dividend lo
    255 
    256 	andi.w		&0x1c,DIV64_CC(%a6)
    257 	ori.w		&0x02,DIV64_CC(%a6)	# set 'V' ccode bit
    258 	mov.w		DIV64_CC(%a6),%cc
    259 
    260 	bra.b		ldexit
    261 
    262 ldiv64eq0:
    263 	mov.l		0xc(%a6),([0x14,%a6])
    264 	mov.l		0x10(%a6),([0x14,%a6],0x4)
    265 
    266 	mov.w		DIV64_CC(%a6),%cc
    267 
    268 # EPILOGUE BEGIN ########################################################
    269 #	fmovm.l		(%sp)+,&0x0		# restore no fpregs
    270 	movm.l		(%sp)+,&0x00fc		# restore d2-d7
    271 	unlk		%a6
    272 # EPILOGUE END ##########################################################
    273 
    274 	divu.w		&0x0,%d0		# force a divbyzero exception
    275 	rts
    276 
    277 ###########################################################################
    278 #########################################################################
    279 # This routine uses the 'classical' Algorithm D from Donald Knuth's	#
    280 # Art of Computer Programming, vol II, Seminumerical Algorithms.	#
    281 # For this implementation b=2**16, and the target is U1U2U3U4/V1V2,	#
    282 # where U,V are words of the quadword dividend and longword divisor,	#
    283 # and U1, V1 are the most significant words.				#
    284 # 									#
    285 # The most sig. longword of the 64 bit dividend must be in %d5, least 	#
    286 # in %d6. The divisor must be in the variable ddivisor, and the		#
    287 # signed/unsigned flag ddusign must be set (0=unsigned,1=signed).	#
    288 # The quotient is returned in %d6, remainder in %d5, unless the		#
    289 # v (overflow) bit is set in the saved %ccr. If overflow, the dividend	#
    290 # is unchanged.								#
    291 #########################################################################
    292 ldclassical:
    293 # if the divisor msw is 0, use simpler algorithm then the full blown
    294 # one at ddknuth:
    295 
    296 	cmpi.l		%d7, &0xffff
    297 	bhi.b		lddknuth		# go use D. Knuth algorithm
    298 
    299 # Since the divisor is only a word (and larger than the mslw of the dividend),
    300 # a simpler algorithm may be used :
    301 # In the general case, four quotient words would be created by
    302 # dividing the divisor word into each dividend word. In this case,
    303 # the first two quotient words must be zero, or overflow would occur.
    304 # Since we already checked this case above, we can treat the most significant
    305 # longword of the dividend as (0) remainder (see Knuth) and merely complete
    306 # the last two divisions to get a quotient longword and word remainder:
    307 
    308 	clr.l		%d1
    309 	swap		%d5			# same as r*b if previous step rqd
    310 	swap		%d6			# get u3 to lsw position
    311 	mov.w		%d6, %d5		# rb + u3
    312 
    313 	divu.w		%d7, %d5
    314 
    315 	mov.w		%d5, %d1		# first quotient word
    316 	swap		%d6			# get u4
    317 	mov.w		%d6, %d5		# rb + u4
    318 
    319 	divu.w		%d7, %d5
    320 
    321 	swap		%d1
    322 	mov.w		%d5, %d1		# 2nd quotient 'digit'
    323 	clr.w		%d5
    324 	swap		%d5			# now remainder
    325 	mov.l		%d1, %d6		# and quotient
    326 
    327 	rts
    328 
    329 lddknuth:
    330 # In this algorithm, the divisor is treated as a 2 digit (word) number
    331 # which is divided into a 3 digit (word) dividend to get one quotient
    332 # digit (word). After subtraction, the dividend is shifted and the
    333 # process repeated. Before beginning, the divisor and quotient are
    334 # 'normalized' so that the process of estimating the quotient digit
    335 # will yield verifiably correct results..
    336 
    337 	clr.l		DDNORMAL(%a6)		# count of shifts for normalization
    338 	clr.b		DDSECOND(%a6)		# clear flag for quotient digits
    339 	clr.l		%d1			# %d1 will hold trial quotient
    340 lddnchk:
    341 	btst		&31, %d7		# must we normalize? first word of
    342 	bne.b		lddnormalized		# divisor (V1) must be >= 65536/2
    343 	addq.l		&0x1, DDNORMAL(%a6)	# count normalization shifts
    344 	lsl.l		&0x1, %d7		# shift the divisor
    345 	lsl.l		&0x1, %d6		# shift u4,u3 with overflow to u2
    346 	roxl.l		&0x1, %d5		# shift u1,u2
    347 	bra.w		lddnchk
    348 lddnormalized:
    349 
    350 # Now calculate an estimate of the quotient words (msw first, then lsw).
    351 # The comments use subscripts for the first quotient digit determination.
    352 	mov.l		%d7, %d3		# divisor
    353 	mov.l		%d5, %d2		# dividend mslw
    354 	swap		%d2
    355 	swap		%d3
    356 	cmp.w	 	%d2, %d3		# V1 = U1 ?
    357 	bne.b		lddqcalc1
    358 	mov.w		&0xffff, %d1		# use max trial quotient word
    359 	bra.b		lddadj0
    360 lddqcalc1:
    361 	mov.l		%d5, %d1
    362 
    363 	divu.w		%d3, %d1		# use quotient of mslw/msw
    364 
    365 	andi.l		&0x0000ffff, %d1	# zero any remainder
    366 lddadj0:
    367 
    368 # now test the trial quotient and adjust. This step plus the
    369 # normalization assures (according to Knuth) that the trial
    370 # quotient will be at worst 1 too large.
    371 	mov.l		%d6, -(%sp)
    372 	clr.w		%d6			# word u3 left
    373 	swap		%d6			# in lsw position
    374 lddadj1: mov.l		%d7, %d3
    375 	mov.l		%d1, %d2
    376 	mulu.w		%d7, %d2		# V2q
    377 	swap		%d3
    378 	mulu.w		%d1, %d3		# V1q
    379 	mov.l		%d5, %d4		# U1U2
    380 	sub.l		%d3, %d4		# U1U2 - V1q
    381 
    382 	swap		%d4
    383 
    384 	mov.w		%d4,%d0
    385 	mov.w		%d6,%d4			# insert lower word (U3)
    386 
    387 	tst.w		%d0			# is upper word set?
    388 	bne.w		lddadjd1
    389 
    390 #	add.l		%d6, %d4		# (U1U2 - V1q) + U3
    391 
    392 	cmp.l	 	%d2, %d4
    393 	bls.b		lddadjd1		# is V2q > (U1U2-V1q) + U3 ?
    394 	subq.l		&0x1, %d1		# yes, decrement and recheck
    395 	bra.b		lddadj1
    396 lddadjd1:
    397 # now test the word by multiplying it by the divisor (V1V2) and comparing
    398 # the 3 digit (word) result with the current dividend words
    399 	mov.l		%d5, -(%sp)		# save %d5 (%d6 already saved)
    400 	mov.l		%d1, %d6
    401 	swap		%d6			# shift answer to ms 3 words
    402 	mov.l		%d7, %d5
    403 	bsr.l		ldmm2
    404 	mov.l		%d5, %d2		# now %d2,%d3 are trial*divisor
    405 	mov.l		%d6, %d3
    406 	mov.l		(%sp)+, %d5		# restore dividend
    407 	mov.l		(%sp)+, %d6
    408 	sub.l		%d3, %d6
    409 	subx.l		%d2, %d5		# subtract double precision
    410 	bcc		ldd2nd			# no carry, do next quotient digit
    411 	subq.l		&0x1, %d1		# q is one too large
    412 # need to add back divisor longword to current ms 3 digits of dividend
    413 # - according to Knuth, this is done only 2 out of 65536 times for random
    414 # divisor, dividend selection.
    415 	clr.l		%d2
    416 	mov.l		%d7, %d3
    417 	swap		%d3
    418 	clr.w		%d3			# %d3 now ls word of divisor
    419 	add.l		%d3, %d6		# aligned with 3rd word of dividend
    420 	addx.l		%d2, %d5
    421 	mov.l		%d7, %d3
    422 	clr.w		%d3			# %d3 now ms word of divisor
    423 	swap		%d3			# aligned with 2nd word of dividend
    424 	add.l		%d3, %d5
    425 ldd2nd:
    426 	tst.b		DDSECOND(%a6)	# both q words done?
    427 	bne.b		lddremain
    428 # first quotient digit now correct. store digit and shift the
    429 # (subtracted) dividend
    430 	mov.w		%d1, DDQUOTIENT(%a6)
    431 	clr.l		%d1
    432 	swap		%d5
    433 	swap		%d6
    434 	mov.w		%d6, %d5
    435 	clr.w		%d6
    436 	st		DDSECOND(%a6)		# second digit
    437 	bra.w		lddnormalized
    438 lddremain:
    439 # add 2nd word to quotient, get the remainder.
    440 	mov.w 		%d1, DDQUOTIENT+2(%a6)
    441 # shift down one word/digit to renormalize remainder.
    442 	mov.w		%d5, %d6
    443 	swap		%d6
    444 	swap		%d5
    445 	mov.l		DDNORMAL(%a6), %d7	# get norm shift count
    446 	beq.b		lddrn
    447 	subq.l		&0x1, %d7		# set for loop count
    448 lddnlp:
    449 	lsr.l		&0x1, %d5		# shift into %d6
    450 	roxr.l		&0x1, %d6
    451 	dbf		%d7, lddnlp
    452 lddrn:
    453 	mov.l		%d6, %d5		# remainder
    454 	mov.l		DDQUOTIENT(%a6), %d6 	# quotient
    455 
    456 	rts
    457 ldmm2:
    458 # factors for the 32X32->64 multiplication are in %d5 and %d6.
    459 # returns 64 bit result in %d5 (hi) %d6(lo).
    460 # destroys %d2,%d3,%d4.
    461 
    462 # multiply hi,lo words of each factor to get 4 intermediate products
    463 	mov.l		%d6, %d2
    464 	mov.l		%d6, %d3
    465 	mov.l		%d5, %d4
    466 	swap		%d3
    467 	swap		%d4
    468 	mulu.w		%d5, %d6		# %d6 <- lsw*lsw
    469 	mulu.w		%d3, %d5		# %d5 <- msw-dest*lsw-source
    470 	mulu.w		%d4, %d2		# %d2 <- msw-source*lsw-dest
    471 	mulu.w		%d4, %d3		# %d3 <- msw*msw
    472 # now use swap and addx to consolidate to two longwords
    473 	clr.l		%d4
    474 	swap		%d6
    475 	add.w		%d5, %d6		# add msw of l*l to lsw of m*l product
    476 	addx.w		%d4, %d3		# add any carry to m*m product
    477 	add.w		%d2, %d6		# add in lsw of other m*l product
    478 	addx.w		%d4, %d3		# add any carry to m*m product
    479 	swap		%d6			# %d6 is low 32 bits of final product
    480 	clr.w		%d5
    481 	clr.w		%d2			# lsw of two mixed products used,
    482 	swap		%d5			# now use msws of longwords
    483 	swap		%d2
    484 	add.l		%d2, %d5
    485 	add.l		%d3, %d5	# %d5 now ms 32 bits of final product
    486 	rts
    487 
    488 #########################################################################
    489 # XDEF ****************************************************************	#
    490 #	_060LSP__imulu64_(): Emulate 64-bit unsigned mul instruction	#
    491 #	_060LSP__imuls64_(): Emulate 64-bit signed mul instruction.	#
    492 #									#
    493 #	This is the library version which is accessed as a subroutine	#
    494 #	and therefore does not work exactly like the 680X0 mul{s,u}.l	#
    495 #	64-bit multiply instruction.					#
    496 #									#
    497 # XREF ****************************************************************	#
    498 #	None								#
    499 #									#
    500 # INPUT ***************************************************************	#
    501 #	0x4(sp) = multiplier						#
    502 #	0x8(sp) = multiplicand						#
    503 #	0xc(sp) = pointer to location to place 64-bit result		#
    504 # 									#
    505 # OUTPUT **************************************************************	#
    506 #	0xc(sp) = points to location of 64-bit result			#
    507 #									#
    508 # ALGORITHM ***********************************************************	#
    509 #	Perform the multiply in pieces using 16x16->32 unsigned		#
    510 # multiplies and "add" instructions.					#
    511 #	Set the condition codes as appropriate before performing an	#
    512 # "rts".								#
    513 #									#
    514 #########################################################################
    515 
    516 set MUL64_CC, -4
    517 
    518 	global		_060LSP__imulu64_
    519 _060LSP__imulu64_:
    520 
    521 # PROLOGUE BEGIN ########################################################
    522 	link.w		%a6,&-4
    523 	movm.l		&0x3800,-(%sp)		# save d2-d4
    524 #	fmovm.l		&0x0,-(%sp)		# save no fpregs
    525 # PROLOGUE END ##########################################################
    526 
    527 	mov.w		%cc,MUL64_CC(%a6)	# save incoming ccodes
    528 
    529 	mov.l		0x8(%a6),%d0		# store multiplier in d0
    530 	beq.w		mulu64_zero		# handle zero separately
    531 
    532 	mov.l		0xc(%a6),%d1		# get multiplicand in d1
    533 	beq.w		mulu64_zero		# handle zero separately
    534 
    535 #########################################################################
    536 #	63			   32				0	#
    537 # 	----------------------------					#
    538 # 	| hi(mplier) * hi(mplicand)|					#
    539 # 	----------------------------					#
    540 #		     -----------------------------			#
    541 #		     | hi(mplier) * lo(mplicand) |			#
    542 #		     -----------------------------			#
    543 #		     -----------------------------			#
    544 #		     | lo(mplier) * hi(mplicand) |			#
    545 #		     -----------------------------			#
    546 #	  |			   -----------------------------	#
    547 #	--|--			   | lo(mplier) * lo(mplicand) |	#
    548 #	  |			   -----------------------------	#
    549 #	========================================================	#
    550 #	--------------------------------------------------------	#
    551 #	|	hi(result)	   |	    lo(result)         |	#
    552 #	--------------------------------------------------------	#
    553 #########################################################################
    554 mulu64_alg:
    555 # load temp registers with operands
    556 	mov.l		%d0,%d2			# mr in d2
    557 	mov.l		%d0,%d3			# mr in d3
    558 	mov.l		%d1,%d4			# md in d4
    559 	swap		%d3			# hi(mr) in lo d3
    560 	swap		%d4			# hi(md) in lo d4
    561 
    562 # complete necessary multiplies:
    563 	mulu.w		%d1,%d0			# [1] lo(mr) * lo(md)
    564 	mulu.w		%d3,%d1			# [2] hi(mr) * lo(md)
    565 	mulu.w		%d4,%d2			# [3] lo(mr) * hi(md)
    566 	mulu.w		%d4,%d3			# [4] hi(mr) * hi(md)
    567 
    568 # add lo portions of [2],[3] to hi portion of [1].
    569 # add carries produced from these adds to [4].
    570 # lo([1]) is the final lo 16 bits of the result.
    571 	clr.l		%d4			# load d4 w/ zero value
    572 	swap		%d0			# hi([1]) <==> lo([1])
    573 	add.w		%d1,%d0			# hi([1]) + lo([2])
    574 	addx.l		%d4,%d3			#    [4]  + carry
    575 	add.w		%d2,%d0			# hi([1]) + lo([3])
    576 	addx.l		%d4,%d3			#    [4]  + carry
    577 	swap		%d0			# lo([1]) <==> hi([1])
    578 
    579 # lo portions of [2],[3] have been added in to final result.
    580 # now, clear lo, put hi in lo reg, and add to [4]
    581 	clr.w		%d1			# clear lo([2])
    582 	clr.w		%d2			# clear hi([3])
    583 	swap		%d1			# hi([2]) in lo d1
    584 	swap		%d2			# hi([3]) in lo d2
    585 	add.l		%d2,%d1			#    [4]  + hi([2])
    586 	add.l		%d3,%d1			#    [4]  + hi([3])
    587 
    588 # now, grab the condition codes. only one that can be set is 'N'.
    589 # 'N' CAN be set if the operation is unsigned if bit 63 is set.
    590 	mov.w		MUL64_CC(%a6),%d4
    591 	andi.b		&0x10,%d4		# keep old 'X' bit
    592 	tst.l		%d1			# may set 'N' bit
    593 	bpl.b		mulu64_ddone
    594 	ori.b		&0x8,%d4		# set 'N' bit
    595 mulu64_ddone:
    596 	mov.w		%d4,%cc
    597 
    598 # here, the result is in d1 and d0. the current strategy is to save
    599 # the values at the location pointed to by a0.
    600 # use movm here to not disturb the condition codes.
    601 mulu64_end:
    602 	exg		%d1,%d0
    603 	movm.l		&0x0003,([0x10,%a6])		# save result
    604 
    605 # EPILOGUE BEGIN ########################################################
    606 #	fmovm.l		(%sp)+,&0x0		# restore no fpregs
    607 	movm.l		(%sp)+,&0x001c		# restore d2-d4
    608 	unlk		%a6
    609 # EPILOGUE END ##########################################################
    610 
    611 	rts
    612 
    613 # one or both of the operands is zero so the result is also zero.
    614 # save the zero result to the register file and set the 'Z' ccode bit.
    615 mulu64_zero:
    616 	clr.l		%d0
    617 	clr.l		%d1
    618 
    619 	mov.w		MUL64_CC(%a6),%d4
    620 	andi.b		&0x10,%d4
    621 	ori.b		&0x4,%d4
    622 	mov.w		%d4,%cc			# set 'Z' ccode bit
    623 
    624 	bra.b		mulu64_end
    625 
    626 ##########
    627 # muls.l #
    628 ##########
    629 	global		_060LSP__imuls64_
    630 _060LSP__imuls64_:
    631 
    632 # PROLOGUE BEGIN ########################################################
    633 	link.w		%a6,&-4
    634 	movm.l		&0x3c00,-(%sp)		# save d2-d5
    635 #	fmovm.l		&0x0,-(%sp)		# save no fpregs
    636 # PROLOGUE END ##########################################################
    637 
    638 	mov.w		%cc,MUL64_CC(%a6)	# save incoming ccodes
    639 
    640 	mov.l		0x8(%a6),%d0		# store multiplier in d0
    641 	beq.b		mulu64_zero		# handle zero separately
    642 
    643 	mov.l		0xc(%a6),%d1		# get multiplicand in d1
    644 	beq.b		mulu64_zero		# handle zero separately
    645 
    646 	clr.b		%d5			# clear sign tag
    647 	tst.l		%d0			# is multiplier negative?
    648 	bge.b		muls64_chk_md_sgn	# no
    649 	neg.l		%d0			# make multiplier positive
    650 
    651 	ori.b		&0x1,%d5		# save multiplier sgn
    652 
    653 # the result sign is the exclusive or of the operand sign bits.
    654 muls64_chk_md_sgn:
    655 	tst.l		%d1			# is multiplicand negative?
    656 	bge.b		muls64_alg		# no
    657 	neg.l		%d1			# make multiplicand positive
    658 
    659 	eori.b		&0x1,%d5		# calculate correct sign
    660 
    661 #########################################################################
    662 #	63			   32				0	#
    663 # 	----------------------------					#
    664 # 	| hi(mplier) * hi(mplicand)|					#
    665 # 	----------------------------					#
    666 #		     -----------------------------			#
    667 #		     | hi(mplier) * lo(mplicand) |			#
    668 #		     -----------------------------			#
    669 #		     -----------------------------			#
    670 #		     | lo(mplier) * hi(mplicand) |			#
    671 #		     -----------------------------			#
    672 #	  |			   -----------------------------	#
    673 #	--|--			   | lo(mplier) * lo(mplicand) |	#
    674 #	  |			   -----------------------------	#
    675 #	========================================================	#
    676 #	--------------------------------------------------------	#
    677 #	|	hi(result)	   |	    lo(result)         |	#
    678 #	--------------------------------------------------------	#
    679 #########################################################################
    680 muls64_alg:
    681 # load temp registers with operands
    682 	mov.l		%d0,%d2			# mr in d2
    683 	mov.l		%d0,%d3			# mr in d3
    684 	mov.l		%d1,%d4			# md in d4
    685 	swap		%d3			# hi(mr) in lo d3
    686 	swap		%d4			# hi(md) in lo d4
    687 
    688 # complete necessary multiplies:
    689 	mulu.w		%d1,%d0			# [1] lo(mr) * lo(md)
    690 	mulu.w		%d3,%d1			# [2] hi(mr) * lo(md)
    691 	mulu.w		%d4,%d2			# [3] lo(mr) * hi(md)
    692 	mulu.w		%d4,%d3			# [4] hi(mr) * hi(md)
    693 
    694 # add lo portions of [2],[3] to hi portion of [1].
    695 # add carries produced from these adds to [4].
    696 # lo([1]) is the final lo 16 bits of the result.
    697 	clr.l		%d4			# load d4 w/ zero value
    698 	swap		%d0			# hi([1]) <==> lo([1])
    699 	add.w		%d1,%d0			# hi([1]) + lo([2])
    700 	addx.l		%d4,%d3			#    [4]  + carry
    701 	add.w		%d2,%d0			# hi([1]) + lo([3])
    702 	addx.l		%d4,%d3			#    [4]  + carry
    703 	swap		%d0			# lo([1]) <==> hi([1])
    704 
    705 # lo portions of [2],[3] have been added in to final result.
    706 # now, clear lo, put hi in lo reg, and add to [4]
    707 	clr.w		%d1			# clear lo([2])
    708 	clr.w		%d2			# clear hi([3])
    709 	swap		%d1			# hi([2]) in lo d1
    710 	swap		%d2			# hi([3]) in lo d2
    711 	add.l		%d2,%d1			#    [4]  + hi([2])
    712 	add.l		%d3,%d1			#    [4]  + hi([3])
    713 
    714 	tst.b		%d5			# should result be signed?
    715 	beq.b		muls64_done		# no
    716 
    717 # result should be a signed negative number.
    718 # compute 2's complement of the unsigned number:
    719 #   -negate all bits and add 1
    720 muls64_neg:
    721 	not.l		%d0			# negate lo(result) bits
    722 	not.l		%d1			# negate hi(result) bits
    723 	addq.l		&1,%d0			# add 1 to lo(result)
    724 	addx.l		%d4,%d1			# add carry to hi(result)
    725 
    726 muls64_done:
    727 	mov.w		MUL64_CC(%a6),%d4
    728 	andi.b		&0x10,%d4		# keep old 'X' bit
    729 	tst.l		%d1			# may set 'N' bit
    730 	bpl.b		muls64_ddone
    731 	ori.b		&0x8,%d4		# set 'N' bit
    732 muls64_ddone:
    733 	mov.w		%d4,%cc
    734 
    735 # here, the result is in d1 and d0. the current strategy is to save
    736 # the values at the location pointed to by a0.
    737 # use movm here to not disturb the condition codes.
    738 muls64_end:
    739 	exg		%d1,%d0
    740 	movm.l		&0x0003,([0x10,%a6])	# save result at (a0)
    741 
    742 # EPILOGUE BEGIN ########################################################
    743 #	fmovm.l		(%sp)+,&0x0		# restore no fpregs
    744 	movm.l		(%sp)+,&0x003c		# restore d2-d5
    745 	unlk		%a6
    746 # EPILOGUE END ##########################################################
    747 
    748 	rts
    749 
    750 # one or both of the operands is zero so the result is also zero.
    751 # save the zero result to the register file and set the 'Z' ccode bit.
    752 muls64_zero:
    753 	clr.l		%d0
    754 	clr.l		%d1
    755 
    756 	mov.w		MUL64_CC(%a6),%d4
    757 	andi.b		&0x10,%d4
    758 	ori.b		&0x4,%d4
    759 	mov.w		%d4,%cc			# set 'Z' ccode bit
    760 
    761 	bra.b		muls64_end
    762 
    763 #########################################################################
    764 # XDEF ****************************************************************	#
    765 #	_060LSP__cmp2_Ab_(): Emulate "cmp2.b An,<ea>".			#
    766 #	_060LSP__cmp2_Aw_(): Emulate "cmp2.w An,<ea>".			#
    767 #	_060LSP__cmp2_Al_(): Emulate "cmp2.l An,<ea>".			#
    768 #	_060LSP__cmp2_Db_(): Emulate "cmp2.b Dn,<ea>".			#
    769 #	_060LSP__cmp2_Dw_(): Emulate "cmp2.w Dn,<ea>".			#
    770 #	_060LSP__cmp2_Dl_(): Emulate "cmp2.l Dn,<ea>".			#
    771 #									#
    772 #	This is the library version which is accessed as a subroutine	#
    773 #	and therefore does not work exactly like the 680X0 "cmp2"	#
    774 #	instruction.							#
    775 #									#
    776 # XREF ****************************************************************	#
    777 #	None								#
    778 #									#
    779 # INPUT ***************************************************************	#
    780 #	0x4(sp) = Rn							#
    781 #	0x8(sp) = pointer to boundary pair				#
    782 # 									#
    783 # OUTPUT **************************************************************	#
    784 #	cc = condition codes are set correctly				#
    785 #									#
    786 # ALGORITHM ***********************************************************	#
    787 # 	In the interest of simplicity, all operands are converted to	#
    788 # longword size whether the operation is byte, word, or long. The	#
    789 # bounds are sign extended accordingly. If Rn is a data register, Rn is #
    790 # also sign extended. If Rn is an address register, it need not be sign #
    791 # extended since the full register is always used.			#
    792 #	The condition codes are set correctly before the final "rts".	#
    793 #									#
    794 #########################################################################
    795 
    796 set	CMP2_CC,	-4
    797 
    798 	global 		_060LSP__cmp2_Ab_
    799 _060LSP__cmp2_Ab_:
    800 
    801 # PROLOGUE BEGIN ########################################################
    802 	link.w		%a6,&-4
    803 	movm.l		&0x3800,-(%sp)		# save d2-d4
    804 #	fmovm.l		&0x0,-(%sp)		# save no fpregs
    805 # PROLOGUE END ##########################################################
    806 
    807 	mov.w		%cc,CMP2_CC(%a6)
    808 	mov.l		0x8(%a6), %d2 		# get regval
    809 
    810 	mov.b		([0xc,%a6],0x0),%d0
    811 	mov.b		([0xc,%a6],0x1),%d1
    812 
    813 	extb.l		%d0			# sign extend lo bnd
    814 	extb.l		%d1			# sign extend hi bnd
    815 	bra.w		l_cmp2_cmp		# go do the compare emulation
    816 
    817 	global 		_060LSP__cmp2_Aw_
    818 _060LSP__cmp2_Aw_:
    819 
    820 # PROLOGUE BEGIN ########################################################
    821 	link.w		%a6,&-4
    822 	movm.l		&0x3800,-(%sp)		# save d2-d4
    823 #	fmovm.l		&0x0,-(%sp)		# save no fpregs
    824 # PROLOGUE END ##########################################################
    825 
    826 	mov.w		%cc,CMP2_CC(%a6)
    827 	mov.l		0x8(%a6), %d2 		# get regval
    828 
    829 	mov.w		([0xc,%a6],0x0),%d0
    830 	mov.w		([0xc,%a6],0x2),%d1
    831 
    832 	ext.l		%d0			# sign extend lo bnd
    833 	ext.l		%d1			# sign extend hi bnd
    834 	bra.w		l_cmp2_cmp		# go do the compare emulation
    835 
    836 	global 		_060LSP__cmp2_Al_
    837 _060LSP__cmp2_Al_:
    838 
    839 # PROLOGUE BEGIN ########################################################
    840 	link.w		%a6,&-4
    841 	movm.l		&0x3800,-(%sp)		# save d2-d4
    842 #	fmovm.l		&0x0,-(%sp)		# save no fpregs
    843 # PROLOGUE END ##########################################################
    844 
    845 	mov.w		%cc,CMP2_CC(%a6)
    846 	mov.l		0x8(%a6), %d2 		# get regval
    847 
    848 	mov.l		([0xc,%a6],0x0),%d0
    849 	mov.l		([0xc,%a6],0x4),%d1
    850 	bra.w		l_cmp2_cmp		# go do the compare emulation
    851 
    852 	global 		_060LSP__cmp2_Db_
    853 _060LSP__cmp2_Db_:
    854 
    855 # PROLOGUE BEGIN ########################################################
    856 	link.w		%a6,&-4
    857 	movm.l		&0x3800,-(%sp)		# save d2-d4
    858 #	fmovm.l		&0x0,-(%sp)		# save no fpregs
    859 # PROLOGUE END ##########################################################
    860 
    861 	mov.w		%cc,CMP2_CC(%a6)
    862 	mov.l		0x8(%a6), %d2 		# get regval
    863 
    864 	mov.b		([0xc,%a6],0x0),%d0
    865 	mov.b		([0xc,%a6],0x1),%d1
    866 
    867 	extb.l		%d0			# sign extend lo bnd
    868 	extb.l		%d1			# sign extend hi bnd
    869 
    870 # operation is a data register compare.
    871 # sign extend byte to long so we can do simple longword compares.
    872 	extb.l		%d2			# sign extend data byte
    873 	bra.w		l_cmp2_cmp		# go do the compare emulation
    874 
    875 	global 		_060LSP__cmp2_Dw_
    876 _060LSP__cmp2_Dw_:
    877 
    878 # PROLOGUE BEGIN ########################################################
    879 	link.w		%a6,&-4
    880 	movm.l		&0x3800,-(%sp)		# save d2-d4
    881 #	fmovm.l		&0x0,-(%sp)		# save no fpregs
    882 # PROLOGUE END ##########################################################
    883 
    884 	mov.w		%cc,CMP2_CC(%a6)
    885 	mov.l		0x8(%a6), %d2 		# get regval
    886 
    887 	mov.w		([0xc,%a6],0x0),%d0
    888 	mov.w		([0xc,%a6],0x2),%d1
    889 
    890 	ext.l		%d0			# sign extend lo bnd
    891 	ext.l		%d1			# sign extend hi bnd
    892 
    893 # operation is a data register compare.
    894 # sign extend word to long so we can do simple longword compares.
    895 	ext.l		%d2			# sign extend data word
    896 	bra.w		l_cmp2_cmp		# go emulate compare
    897 
    898 	global 		_060LSP__cmp2_Dl_
    899 _060LSP__cmp2_Dl_:
    900 
    901 # PROLOGUE BEGIN ########################################################
    902 	link.w		%a6,&-4
    903 	movm.l		&0x3800,-(%sp)		# save d2-d4
    904 #	fmovm.l		&0x0,-(%sp)		# save no fpregs
    905 # PROLOGUE END ##########################################################
    906 
    907 	mov.w		%cc,CMP2_CC(%a6)
    908 	mov.l		0x8(%a6), %d2 		# get regval
    909 
    910 	mov.l		([0xc,%a6],0x0),%d0
    911 	mov.l		([0xc,%a6],0x4),%d1
    912 
    913 #
    914 # To set the ccodes correctly:
    915 # 	(1) save 'Z' bit from (Rn - lo)
    916 #	(2) save 'Z' and 'N' bits from ((hi - lo) - (Rn - hi))
    917 #	(3) keep 'X', 'N', and 'V' from before instruction
    918 #	(4) combine ccodes
    919 #
    920 l_cmp2_cmp:
    921 	sub.l		%d0, %d2		# (Rn - lo)
    922 	mov.w		%cc, %d3		# fetch resulting ccodes
    923 	andi.b		&0x4, %d3		# keep 'Z' bit
    924 	sub.l		%d0, %d1		# (hi - lo)
    925 	cmp.l	 	%d1,%d2			# ((hi - lo) - (Rn - hi))
    926 
    927 	mov.w		%cc, %d4		# fetch resulting ccodes
    928 	or.b		%d4, %d3		# combine w/ earlier ccodes
    929 	andi.b		&0x5, %d3		# keep 'Z' and 'N'
    930 
    931 	mov.w		CMP2_CC(%a6), %d4	# fetch old ccodes
    932 	andi.b		&0x1a, %d4		# keep 'X','N','V' bits
    933 	or.b		%d3, %d4		# insert new ccodes
    934 	mov.w		%d4,%cc			# save new ccodes
    935 
    936 # EPILOGUE BEGIN ########################################################
    937 #	fmovm.l		(%sp)+,&0x0		# restore no fpregs
    938 	movm.l		(%sp)+,&0x001c		# restore d2-d4
    939 	unlk		%a6
    940 # EPILOGUE END ##########################################################
    941 
    942 	rts
    943