Home | History | Annotate | Line # | Download | only in dist
      1 #
      2 # $NetBSD: pfpsp.s,v 1.4 2005/12/11 12:17:52 christos Exp $
      3 #
      4 
      5 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
      6 # MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
      7 # M68000 Hi-Performance Microprocessor Division
      8 # M68060 Software Package Production Release
      9 #
     10 # M68060 Software Package Copyright (C) 1993, 1994, 1995, 1996 Motorola Inc.
     11 # All rights reserved.
     12 #
     13 # THE SOFTWARE is provided on an "AS IS" basis and without warranty.
     14 # To the maximum extent permitted by applicable law,
     15 # MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
     16 # INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS
     17 # FOR A PARTICULAR PURPOSE and any warranty against infringement with
     18 # regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
     19 # and any accompanying written materials.
     20 #
     21 # To the maximum extent permitted by applicable law,
     22 # IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
     23 # (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS,
     24 # BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS)
     25 # ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE.
     26 #
     27 # Motorola assumes no responsibility for the maintenance and support
     28 # of the SOFTWARE.
     29 #
     30 # You are hereby granted a copyright license to use, modify, and distribute the
     31 # SOFTWARE so long as this entire notice is retained without alteration
     32 # in any modified and/or redistributed versions, and that such modified
     33 # versions are clearly identified as such.
     34 # No licenses are granted by implication, estoppel or otherwise under any
     35 # patents or trademarks of Motorola, Inc.
     36 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     37 
     38 #
     39 # freal.s:
     40 #	This file is appended to the top of the 060FPSP package
     41 # and contains the entry points into the package. The user, in
     42 # effect, branches to one of the branch table entries located
     43 # after _060FPSP_TABLE.
     44 #	Also, subroutine stubs exist in this file (_fpsp_done for
     45 # example) that are referenced by the FPSP package itself in order
     46 # to call a given routine. The stub routine actually performs the
     47 # callout. The FPSP code does a "bsr" to the stub routine. This
     48 # extra layer of hierarchy adds a slight performance penalty but
     49 # it makes the FPSP code easier to read and more mainatinable.
     50 #
     51 
     52 set	_off_bsun,	0x00
     53 set	_off_snan,	0x04
     54 set	_off_operr,	0x08
     55 set	_off_ovfl,	0x0c
     56 set	_off_unfl,	0x10
     57 set	_off_dz,	0x14
     58 set	_off_inex,	0x18
     59 set	_off_fline,	0x1c
     60 set	_off_fpu_dis,	0x20
     61 set	_off_trap,	0x24
     62 set	_off_trace,	0x28
     63 set	_off_access,	0x2c
     64 set	_off_done,	0x30
     65 
     66 set	_off_imr,	0x40
     67 set	_off_dmr,	0x44
     68 set	_off_dmw,	0x48
     69 set	_off_irw,	0x4c
     70 set	_off_irl,	0x50
     71 set	_off_drb,	0x54
     72 set	_off_drw,	0x58
     73 set	_off_drl,	0x5c
     74 set	_off_dwb,	0x60
     75 set	_off_dww,	0x64
     76 set	_off_dwl,	0x68
     77 
     78 _060FPSP_TABLE:
     79 
     80 ###############################################################
     81 
     82 # Here's the table of ENTRY POINTS for those linking the package.
     83 	bra.l		_fpsp_snan
     84 	short		0x0000
     85 	bra.l		_fpsp_operr
     86 	short		0x0000
     87 	bra.l		_fpsp_ovfl
     88 	short		0x0000
     89 	bra.l		_fpsp_unfl
     90 	short		0x0000
     91 	bra.l		_fpsp_dz
     92 	short		0x0000
     93 	bra.l		_fpsp_inex
     94 	short		0x0000
     95 	bra.l		_fpsp_fline
     96 	short		0x0000
     97 	bra.l		_fpsp_unsupp
     98 	short		0x0000
     99 	bra.l		_fpsp_effadd
    100 	short		0x0000
    101 
    102 	space 		56
    103 
    104 ###############################################################
    105 	global		_fpsp_done
    106 _fpsp_done:
    107 	mov.l		%d0,-(%sp)
    108 	mov.l		(_060FPSP_TABLE-0x80+_off_done,%pc),%d0
    109 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    110 	mov.l		0x4(%sp),%d0
    111 	rtd		&0x4
    112 
    113 	global		_real_ovfl
    114 _real_ovfl:
    115 	mov.l		%d0,-(%sp)
    116 	mov.l		(_060FPSP_TABLE-0x80+_off_ovfl,%pc),%d0
    117 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    118 	mov.l		0x4(%sp),%d0
    119 	rtd		&0x4
    120 
    121 	global		_real_unfl
    122 _real_unfl:
    123 	mov.l		%d0,-(%sp)
    124 	mov.l		(_060FPSP_TABLE-0x80+_off_unfl,%pc),%d0
    125 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    126 	mov.l		0x4(%sp),%d0
    127 	rtd		&0x4
    128 
    129 	global		_real_inex
    130 _real_inex:
    131 	mov.l		%d0,-(%sp)
    132 	mov.l		(_060FPSP_TABLE-0x80+_off_inex,%pc),%d0
    133 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    134 	mov.l		0x4(%sp),%d0
    135 	rtd		&0x4
    136 
    137 	global		_real_bsun
    138 _real_bsun:
    139 	mov.l		%d0,-(%sp)
    140 	mov.l		(_060FPSP_TABLE-0x80+_off_bsun,%pc),%d0
    141 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    142 	mov.l		0x4(%sp),%d0
    143 	rtd		&0x4
    144 
    145 	global		_real_operr
    146 _real_operr:
    147 	mov.l		%d0,-(%sp)
    148 	mov.l		(_060FPSP_TABLE-0x80+_off_operr,%pc),%d0
    149 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    150 	mov.l		0x4(%sp),%d0
    151 	rtd		&0x4
    152 
    153 	global		_real_snan
    154 _real_snan:
    155 	mov.l		%d0,-(%sp)
    156 	mov.l		(_060FPSP_TABLE-0x80+_off_snan,%pc),%d0
    157 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    158 	mov.l		0x4(%sp),%d0
    159 	rtd		&0x4
    160 
    161 	global		_real_dz
    162 _real_dz:
    163 	mov.l		%d0,-(%sp)
    164 	mov.l		(_060FPSP_TABLE-0x80+_off_dz,%pc),%d0
    165 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    166 	mov.l		0x4(%sp),%d0
    167 	rtd		&0x4
    168 
    169 	global		_real_fline
    170 _real_fline:
    171 	mov.l		%d0,-(%sp)
    172 	mov.l		(_060FPSP_TABLE-0x80+_off_fline,%pc),%d0
    173 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    174 	mov.l		0x4(%sp),%d0
    175 	rtd		&0x4
    176 
    177 	global		_real_fpu_disabled
    178 _real_fpu_disabled:
    179 	mov.l		%d0,-(%sp)
    180 	mov.l		(_060FPSP_TABLE-0x80+_off_fpu_dis,%pc),%d0
    181 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    182 	mov.l		0x4(%sp),%d0
    183 	rtd		&0x4
    184 
    185 	global		_real_trap
    186 _real_trap:
    187 	mov.l		%d0,-(%sp)
    188 	mov.l		(_060FPSP_TABLE-0x80+_off_trap,%pc),%d0
    189 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    190 	mov.l		0x4(%sp),%d0
    191 	rtd		&0x4
    192 
    193 	global		_real_trace
    194 _real_trace:
    195 	mov.l		%d0,-(%sp)
    196 	mov.l		(_060FPSP_TABLE-0x80+_off_trace,%pc),%d0
    197 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    198 	mov.l		0x4(%sp),%d0
    199 	rtd		&0x4
    200 
    201 	global		_real_access
    202 _real_access:
    203 	mov.l		%d0,-(%sp)
    204 	mov.l		(_060FPSP_TABLE-0x80+_off_access,%pc),%d0
    205 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    206 	mov.l		0x4(%sp),%d0
    207 	rtd		&0x4
    208 
    209 #######################################
    210 
    211 	global		_imem_read
    212 _imem_read:
    213 	mov.l		%d0,-(%sp)
    214 	mov.l		(_060FPSP_TABLE-0x80+_off_imr,%pc),%d0
    215 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    216 	mov.l		0x4(%sp),%d0
    217 	rtd		&0x4
    218 
    219 	global		_dmem_read
    220 _dmem_read:
    221 	mov.l		%d0,-(%sp)
    222 	mov.l		(_060FPSP_TABLE-0x80+_off_dmr,%pc),%d0
    223 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    224 	mov.l		0x4(%sp),%d0
    225 	rtd		&0x4
    226 
    227 	global		_dmem_write
    228 _dmem_write:
    229 	mov.l		%d0,-(%sp)
    230 	mov.l		(_060FPSP_TABLE-0x80+_off_dmw,%pc),%d0
    231 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    232 	mov.l		0x4(%sp),%d0
    233 	rtd		&0x4
    234 
    235 	global		_imem_read_word
    236 _imem_read_word:
    237 	mov.l		%d0,-(%sp)
    238 	mov.l		(_060FPSP_TABLE-0x80+_off_irw,%pc),%d0
    239 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    240 	mov.l		0x4(%sp),%d0
    241 	rtd		&0x4
    242 
    243 	global		_imem_read_long
    244 _imem_read_long:
    245 	mov.l		%d0,-(%sp)
    246 	mov.l		(_060FPSP_TABLE-0x80+_off_irl,%pc),%d0
    247 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    248 	mov.l		0x4(%sp),%d0
    249 	rtd		&0x4
    250 
    251 	global		_dmem_read_byte
    252 _dmem_read_byte:
    253 	mov.l		%d0,-(%sp)
    254 	mov.l		(_060FPSP_TABLE-0x80+_off_drb,%pc),%d0
    255 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    256 	mov.l		0x4(%sp),%d0
    257 	rtd		&0x4
    258 
    259 	global		_dmem_read_word
    260 _dmem_read_word:
    261 	mov.l		%d0,-(%sp)
    262 	mov.l		(_060FPSP_TABLE-0x80+_off_drw,%pc),%d0
    263 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    264 	mov.l		0x4(%sp),%d0
    265 	rtd		&0x4
    266 
    267 	global		_dmem_read_long
    268 _dmem_read_long:
    269 	mov.l		%d0,-(%sp)
    270 	mov.l		(_060FPSP_TABLE-0x80+_off_drl,%pc),%d0
    271 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    272 	mov.l		0x4(%sp),%d0
    273 	rtd		&0x4
    274 
    275 	global		_dmem_write_byte
    276 _dmem_write_byte:
    277 	mov.l		%d0,-(%sp)
    278 	mov.l		(_060FPSP_TABLE-0x80+_off_dwb,%pc),%d0
    279 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    280 	mov.l		0x4(%sp),%d0
    281 	rtd		&0x4
    282 
    283 	global		_dmem_write_word
    284 _dmem_write_word:
    285 	mov.l		%d0,-(%sp)
    286 	mov.l		(_060FPSP_TABLE-0x80+_off_dww,%pc),%d0
    287 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    288 	mov.l		0x4(%sp),%d0
    289 	rtd		&0x4
    290 
    291 	global		_dmem_write_long
    292 _dmem_write_long:
    293 	mov.l		%d0,-(%sp)
    294 	mov.l		(_060FPSP_TABLE-0x80+_off_dwl,%pc),%d0
    295 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    296 	mov.l		0x4(%sp),%d0
    297 	rtd		&0x4
    298 
    299 #
    300 # This file contains a set of define statements for constants
    301 # in order to promote readability within the corecode itself.
    302 #
    303 
    304 set LOCAL_SIZE,		192			# stack frame size(bytes)
    305 set LV,			-LOCAL_SIZE		# stack offset
    306 
    307 set EXC_SR,		0x4			# stack status register
    308 set EXC_PC,		0x6			# stack pc
    309 set EXC_VOFF,		0xa			# stacked vector offset
    310 set EXC_EA,		0xc			# stacked <ea>
    311 
    312 set EXC_FP,		0x0			# frame pointer
    313 
    314 set EXC_AREGS,		-68			# offset of all address regs
    315 set EXC_DREGS,		-100			# offset of all data regs
    316 set EXC_FPREGS,		-36			# offset of all fp regs
    317 
    318 set EXC_A7,		EXC_AREGS+(7*4)		# offset of saved a7
    319 set OLD_A7,		EXC_AREGS+(6*4)		# extra copy of saved a7
    320 set EXC_A6,		EXC_AREGS+(6*4)		# offset of saved a6
    321 set EXC_A5,		EXC_AREGS+(5*4)
    322 set EXC_A4,		EXC_AREGS+(4*4)
    323 set EXC_A3,		EXC_AREGS+(3*4)
    324 set EXC_A2,		EXC_AREGS+(2*4)
    325 set EXC_A1,		EXC_AREGS+(1*4)
    326 set EXC_A0,		EXC_AREGS+(0*4)
    327 set EXC_D7,		EXC_DREGS+(7*4)
    328 set EXC_D6,		EXC_DREGS+(6*4)
    329 set EXC_D5,		EXC_DREGS+(5*4)
    330 set EXC_D4,		EXC_DREGS+(4*4)
    331 set EXC_D3,		EXC_DREGS+(3*4)
    332 set EXC_D2,		EXC_DREGS+(2*4)
    333 set EXC_D1,		EXC_DREGS+(1*4)
    334 set EXC_D0,		EXC_DREGS+(0*4)
    335 
    336 set EXC_FP0, 		EXC_FPREGS+(0*12)	# offset of saved fp0
    337 set EXC_FP1, 		EXC_FPREGS+(1*12)	# offset of saved fp1
    338 set EXC_FP2, 		EXC_FPREGS+(2*12)	# offset of saved fp2 (not used)
    339 
    340 set FP_SCR1, 		LV+80			# fp scratch 1
    341 set FP_SCR1_EX, 	FP_SCR1+0
    342 set FP_SCR1_SGN,	FP_SCR1+2
    343 set FP_SCR1_HI, 	FP_SCR1+4
    344 set FP_SCR1_LO, 	FP_SCR1+8
    345 
    346 set FP_SCR0, 		LV+68			# fp scratch 0
    347 set FP_SCR0_EX, 	FP_SCR0+0
    348 set FP_SCR0_SGN,	FP_SCR0+2
    349 set FP_SCR0_HI, 	FP_SCR0+4
    350 set FP_SCR0_LO, 	FP_SCR0+8
    351 
    352 set FP_DST, 		LV+56			# fp destination operand
    353 set FP_DST_EX, 		FP_DST+0
    354 set FP_DST_SGN,		FP_DST+2
    355 set FP_DST_HI, 		FP_DST+4
    356 set FP_DST_LO, 		FP_DST+8
    357 
    358 set FP_SRC, 		LV+44			# fp source operand
    359 set FP_SRC_EX, 		FP_SRC+0
    360 set FP_SRC_SGN,		FP_SRC+2
    361 set FP_SRC_HI, 		FP_SRC+4
    362 set FP_SRC_LO, 		FP_SRC+8
    363 
    364 set USER_FPIAR,		LV+40			# FP instr address register
    365 
    366 set USER_FPSR,		LV+36			# FP status register
    367 set FPSR_CC,		USER_FPSR+0		# FPSR condition codes
    368 set FPSR_QBYTE,		USER_FPSR+1		# FPSR qoutient byte
    369 set FPSR_EXCEPT,	USER_FPSR+2		# FPSR exception status byte
    370 set FPSR_AEXCEPT,	USER_FPSR+3		# FPSR accrued exception byte
    371 
    372 set USER_FPCR,		LV+32			# FP control register
    373 set FPCR_ENABLE,	USER_FPCR+2		# FPCR exception enable
    374 set FPCR_MODE,		USER_FPCR+3		# FPCR rounding mode control
    375 
    376 set L_SCR3,		LV+28			# integer scratch 3
    377 set L_SCR2,		LV+24			# integer scratch 2
    378 set L_SCR1,		LV+20			# integer scratch 1
    379 
    380 set STORE_FLG,		LV+19			# flag: operand store (ie. not fcmp/ftst)
    381 
    382 set EXC_TEMP2,		LV+24			# temporary space
    383 set EXC_TEMP,		LV+16			# temporary space
    384 
    385 set DTAG,		LV+15			# destination operand type
    386 set STAG, 		LV+14			# source operand type
    387 
    388 set SPCOND_FLG,		LV+10			# flag: special case (see below)
    389 
    390 set EXC_CC,		LV+8			# saved condition codes
    391 set EXC_EXTWPTR,	LV+4			# saved current PC (active)
    392 set EXC_EXTWORD,	LV+2			# saved extension word
    393 set EXC_CMDREG,		LV+2			# saved extension word
    394 set EXC_OPWORD,		LV+0			# saved operation word
    395 
    396 ################################
    397 
    398 # Helpful macros
    399 
    400 set FTEMP,		0			# offsets within an
    401 set FTEMP_EX, 		0			# extended precision
    402 set FTEMP_SGN,		2			# value saved in memory.
    403 set FTEMP_HI, 		4
    404 set FTEMP_LO, 		8
    405 set FTEMP_GRS,		12
    406 
    407 set LOCAL,		0			# offsets within an
    408 set LOCAL_EX, 		0			# extended precision
    409 set LOCAL_SGN,		2			# value saved in memory.
    410 set LOCAL_HI, 		4
    411 set LOCAL_LO, 		8
    412 set LOCAL_GRS,		12
    413 
    414 set DST,		0			# offsets within an
    415 set DST_EX,		0			# extended precision
    416 set DST_HI,		4			# value saved in memory.
    417 set DST_LO,		8
    418 
    419 set SRC,		0			# offsets within an
    420 set SRC_EX,		0			# extended precision
    421 set SRC_HI,		4			# value saved in memory.
    422 set SRC_LO,		8
    423 
    424 set SGL_LO,		0x3f81			# min sgl prec exponent
    425 set SGL_HI,		0x407e			# max sgl prec exponent
    426 set DBL_LO,		0x3c01			# min dbl prec exponent
    427 set DBL_HI,		0x43fe			# max dbl prec exponent
    428 set EXT_LO,		0x0			# min ext prec exponent
    429 set EXT_HI,		0x7ffe			# max ext prec exponent
    430 
    431 set EXT_BIAS,		0x3fff			# extended precision bias
    432 set SGL_BIAS,		0x007f			# single precision bias
    433 set DBL_BIAS,		0x03ff			# double precision bias
    434 
    435 set NORM,		0x00			# operand type for STAG/DTAG
    436 set ZERO,		0x01			# operand type for STAG/DTAG
    437 set INF,		0x02			# operand type for STAG/DTAG
    438 set QNAN,		0x03			# operand type for STAG/DTAG
    439 set DENORM,		0x04			# operand type for STAG/DTAG
    440 set SNAN,		0x05			# operand type for STAG/DTAG
    441 set UNNORM,		0x06			# operand type for STAG/DTAG
    442 
    443 ##################
    444 # FPSR/FPCR bits #
    445 ##################
    446 set neg_bit,		0x3			# negative result
    447 set z_bit,		0x2			# zero result
    448 set inf_bit,		0x1			# infinite result
    449 set nan_bit,		0x0			# NAN result
    450 
    451 set q_sn_bit,		0x7			# sign bit of quotient byte
    452 
    453 set bsun_bit,		7			# branch on unordered
    454 set snan_bit,		6			# signalling NAN
    455 set operr_bit,		5			# operand error
    456 set ovfl_bit,		4			# overflow
    457 set unfl_bit,		3			# underflow
    458 set dz_bit,		2			# divide by zero
    459 set inex2_bit,		1			# inexact result 2
    460 set inex1_bit,		0			# inexact result 1
    461 
    462 set aiop_bit,		7			# accrued inexact operation bit
    463 set aovfl_bit,		6			# accrued overflow bit
    464 set aunfl_bit,		5			# accrued underflow bit
    465 set adz_bit,		4			# accrued dz bit
    466 set ainex_bit,		3			# accrued inexact bit
    467 
    468 #############################
    469 # FPSR individual bit masks #
    470 #############################
    471 set neg_mask,		0x08000000		# negative bit mask (lw)
    472 set inf_mask,		0x02000000		# infinity bit mask (lw)
    473 set z_mask,		0x04000000		# zero bit mask (lw)
    474 set nan_mask,		0x01000000		# nan bit mask (lw)
    475 
    476 set neg_bmask,		0x08			# negative bit mask (byte)
    477 set inf_bmask,		0x02			# infinity bit mask (byte)
    478 set z_bmask,		0x04			# zero bit mask (byte)
    479 set nan_bmask,		0x01			# nan bit mask (byte)
    480 
    481 set bsun_mask,		0x00008000		# bsun exception mask
    482 set snan_mask,		0x00004000		# snan exception mask
    483 set operr_mask,		0x00002000		# operr exception mask
    484 set ovfl_mask,		0x00001000		# overflow exception mask
    485 set unfl_mask,		0x00000800		# underflow exception mask
    486 set dz_mask,		0x00000400		# dz exception mask
    487 set inex2_mask,		0x00000200		# inex2 exception mask
    488 set inex1_mask,		0x00000100		# inex1 exception mask
    489 
    490 set aiop_mask,		0x00000080		# accrued illegal operation
    491 set aovfl_mask,		0x00000040		# accrued overflow
    492 set aunfl_mask,		0x00000020		# accrued underflow
    493 set adz_mask,		0x00000010		# accrued divide by zero
    494 set ainex_mask,		0x00000008		# accrued inexact
    495 
    496 ######################################
    497 # FPSR combinations used in the FPSP #
    498 ######################################
    499 set dzinf_mask,		inf_mask+dz_mask+adz_mask
    500 set opnan_mask,		nan_mask+operr_mask+aiop_mask
    501 set nzi_mask,		0x01ffffff 		#clears N, Z, and I
    502 set unfinx_mask,	unfl_mask+inex2_mask+aunfl_mask+ainex_mask
    503 set unf2inx_mask,	unfl_mask+inex2_mask+ainex_mask
    504 set ovfinx_mask,	ovfl_mask+inex2_mask+aovfl_mask+ainex_mask
    505 set inx1a_mask,		inex1_mask+ainex_mask
    506 set inx2a_mask,		inex2_mask+ainex_mask
    507 set snaniop_mask, 	nan_mask+snan_mask+aiop_mask
    508 set snaniop2_mask,	snan_mask+aiop_mask
    509 set naniop_mask,	nan_mask+aiop_mask
    510 set neginf_mask,	neg_mask+inf_mask
    511 set infaiop_mask, 	inf_mask+aiop_mask
    512 set negz_mask,		neg_mask+z_mask
    513 set opaop_mask,		operr_mask+aiop_mask
    514 set unfl_inx_mask,	unfl_mask+aunfl_mask+ainex_mask
    515 set ovfl_inx_mask,	ovfl_mask+aovfl_mask+ainex_mask
    516 
    517 #########
    518 # misc. #
    519 #########
    520 set rnd_stky_bit,	29			# stky bit pos in longword
    521 
    522 set sign_bit,		0x7			# sign bit
    523 set signan_bit,		0x6			# signalling nan bit
    524 
    525 set sgl_thresh,		0x3f81			# minimum sgl exponent
    526 set dbl_thresh,		0x3c01			# minimum dbl exponent
    527 
    528 set x_mode,		0x0			# extended precision
    529 set s_mode,		0x4			# single precision
    530 set d_mode,		0x8			# double precision
    531 
    532 set rn_mode,		0x0			# round-to-nearest
    533 set rz_mode,		0x1			# round-to-zero
    534 set rm_mode,		0x2			# round-tp-minus-infinity
    535 set rp_mode,		0x3			# round-to-plus-infinity
    536 
    537 set mantissalen,	64			# length of mantissa in bits
    538 
    539 set BYTE,		1			# len(byte) == 1 byte
    540 set WORD, 		2			# len(word) == 2 bytes
    541 set LONG, 		4			# len(longword) == 2 bytes
    542 
    543 set BSUN_VEC,		0xc0			# bsun    vector offset
    544 set INEX_VEC,		0xc4			# inexact vector offset
    545 set DZ_VEC,		0xc8			# dz      vector offset
    546 set UNFL_VEC,		0xcc			# unfl    vector offset
    547 set OPERR_VEC,		0xd0			# operr   vector offset
    548 set OVFL_VEC,		0xd4			# ovfl    vector offset
    549 set SNAN_VEC,		0xd8			# snan    vector offset
    550 
    551 ###########################
    552 # SPecial CONDition FLaGs #
    553 ###########################
    554 set ftrapcc_flg,	0x01			# flag bit: ftrapcc exception
    555 set fbsun_flg,		0x02			# flag bit: bsun exception
    556 set mia7_flg,		0x04			# flag bit: (a7)+ <ea>
    557 set mda7_flg,		0x08			# flag bit: -(a7) <ea>
    558 set fmovm_flg,		0x40			# flag bit: fmovm instruction
    559 set immed_flg,		0x80			# flag bit: &<data> <ea>
    560 
    561 set ftrapcc_bit,	0x0
    562 set fbsun_bit,		0x1
    563 set mia7_bit,		0x2
    564 set mda7_bit,		0x3
    565 set immed_bit,		0x7
    566 
    567 ##################################
    568 # TRANSCENDENTAL "LAST-OP" FLAGS #
    569 ##################################
    570 set FMUL_OP,		0x0			# fmul instr performed last
    571 set FDIV_OP,		0x1			# fdiv performed last
    572 set FADD_OP,		0x2			# fadd performed last
    573 set FMOV_OP,		0x3			# fmov performed last
    574 
    575 #############
    576 # CONSTANTS #
    577 #############
    578 T1:	long		0x40C62D38,0xD3D64634	# 16381 LOG2 LEAD
    579 T2:	long		0x3D6F90AE,0xB1E75CC7	# 16381 LOG2 TRAIL
    580 
    581 PI:	long		0x40000000,0xC90FDAA2,0x2168C235,0x00000000
    582 PIBY2:	long		0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
    583 
    584 TWOBYPI:
    585 	long		0x3FE45F30,0x6DC9C883
    586 
    587 #########################################################################
    588 # XDEF ****************************************************************	#
    589 #	_fpsp_ovfl(): 060FPSP entry point for FP Overflow exception.	#
    590 #									#
    591 #	This handler should be the first code executed upon taking the	#
    592 #	FP Overflow exception in an operating system.			#
    593 #									#
    594 # XREF ****************************************************************	#
    595 #	_imem_read_long() - read instruction longword			#
    596 #	fix_skewed_ops() - adjust src operand in fsave frame		#
    597 #	set_tag_x() - determine optype of src/dst operands		#
    598 #	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
    599 #	unnorm_fix() - change UNNORM operands to NORM or ZERO		#
    600 #	load_fpn2() - load dst operand from FP regfile			#
    601 #	fout() - emulate an opclass 3 instruction			#
    602 #	tbl_unsupp - add of table of emulation routines for opclass 0,2	#
    603 #	_fpsp_done() - "callout" for 060FPSP exit (all work done!)	#
    604 #	_real_ovfl() - "callout" for Overflow exception enabled code	#
    605 #	_real_inex() - "callout" for Inexact exception enabled code	#
    606 #	_real_trace() - "callout" for Trace exception code		#
    607 #									#
    608 # INPUT ***************************************************************	#
    609 #	- The system stack contains the FP Ovfl exception stack frame	#
    610 #	- The fsave frame contains the source operand			#
    611 # 									#
    612 # OUTPUT **************************************************************	#
    613 #	Overflow Exception enabled:					#
    614 #	- The system stack is unchanged					#
    615 #	- The fsave frame contains the adjusted src op for opclass 0,2	#
    616 #	Overflow Exception disabled:					#
    617 #	- The system stack is unchanged					#
    618 #	- The "exception present" flag in the fsave frame is cleared	#
    619 #									#
    620 # ALGORITHM ***********************************************************	#
    621 #	On the 060, if an FP overflow is present as the result of any	#
    622 # instruction, the 060 will take an overflow exception whether the 	#
    623 # exception is enabled or disabled in the FPCR. For the disabled case, 	#
    624 # This handler emulates the instruction to determine what the correct	#
    625 # default result should be for the operation. This default result is	#
    626 # then stored in either the FP regfile, data regfile, or memory. 	#
    627 # Finally, the handler exits through the "callout" _fpsp_done() 	#
    628 # denoting that no exceptional conditions exist within the machine.	#
    629 # 	If the exception is enabled, then this handler must create the	#
    630 # exceptional operand and plave it in the fsave state frame, and store	#
    631 # the default result (only if the instruction is opclass 3). For 	#
    632 # exceptions enabled, this handler must exit through the "callout" 	#
    633 # _real_ovfl() so that the operating system enabled overflow handler	#
    634 # can handle this case.							#
    635 #	Two other conditions exist. First, if overflow was disabled 	#
    636 # but the inexact exception was enabled, this handler must exit 	#
    637 # through the "callout" _real_inex() regardless of whether the result	#
    638 # was inexact.								#
    639 #	Also, in the case of an opclass three instruction where 	#
    640 # overflow was disabled and the trace exception was enabled, this	#
    641 # handler must exit through the "callout" _real_trace().		#
    642 #									#
    643 #########################################################################
    644 
    645 	global		_fpsp_ovfl
    646 _fpsp_ovfl:
    647 
    648 #$#	sub.l		&24,%sp			# make room for src/dst
    649 
    650 	link.w		%a6,&-LOCAL_SIZE	# init stack frame
    651 
    652 	fsave		FP_SRC(%a6)		# grab the "busy" frame
    653 
    654  	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
    655 	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
    656  	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
    657 
    658 # the FPIAR holds the "current PC" of the faulting instruction
    659 	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
    660 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
    661 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
    662 	bsr.l		_imem_read_long		# fetch the instruction words
    663 	mov.l		%d0,EXC_OPWORD(%a6)
    664 
    665 ##############################################################################
    666 
    667 	btst		&0x5,EXC_CMDREG(%a6)	# is instr an fmove out?
    668 	bne.w		fovfl_out
    669 
    670 
    671 	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
    672 	bsr.l		fix_skewed_ops		# fix src op
    673 
    674 # since, I believe, only NORMs and DENORMs can come through here,
    675 # maybe we can avoid the subroutine call.
    676 	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
    677 	bsr.l		set_tag_x		# tag the operand type
    678 	mov.b		%d0,STAG(%a6)		# maybe NORM,DENORM
    679 
    680 # bit five of the fp extension word separates the monadic and dyadic operations
    681 # that can pass through fpsp_ovfl(). remember that fcmp, ftst, and fsincos
    682 # will never take this exception.
    683 	btst		&0x5,1+EXC_CMDREG(%a6)	# is operation monadic or dyadic?
    684 	beq.b		fovfl_extract		# monadic
    685 
    686 	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
    687 	bsr.l		load_fpn2		# load dst into FP_DST
    688 
    689 	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
    690 	bsr.l		set_tag_x		# tag the operand type
    691 	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
    692 	bne.b		fovfl_op2_done		# no
    693 	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
    694 fovfl_op2_done:
    695 	mov.b		%d0,DTAG(%a6)		# save dst optype tag
    696 
    697 fovfl_extract:
    698 
    699 #$#	mov.l		FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
    700 #$#	mov.l		FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
    701 #$#	mov.l		FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
    702 #$#	mov.l		FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6)
    703 #$#	mov.l		FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6)
    704 #$#	mov.l		FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6)
    705 
    706 	clr.l		%d0
    707 	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec/mode
    708 
    709 	mov.b		1+EXC_CMDREG(%a6),%d1
    710 	andi.w		&0x007f,%d1		# extract extension
    711 
    712 	andi.l		&0x00ff01ff,USER_FPSR(%a6) # zero all but accured field
    713 
    714 	fmov.l		&0x0,%fpcr		# zero current control regs
    715 	fmov.l		&0x0,%fpsr
    716 
    717 	lea		FP_SRC(%a6),%a0
    718 	lea		FP_DST(%a6),%a1
    719 
    720 # maybe we can make these entry points ONLY the OVFL entry points of each routine.
    721 	mov.l		(tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
    722 	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
    723 
    724 # the operation has been emulated. the result is in fp0.
    725 # the EXOP, if an exception occurred, is in fp1.
    726 # we must save the default result regardless of whether
    727 # traps are enabled or disabled.
    728 	bfextu		EXC_CMDREG(%a6){&6:&3},%d0
    729 	bsr.l		store_fpreg
    730 
    731 # the exceptional possibilities we have left ourselves with are ONLY overflow
    732 # and inexact. and, the inexact is such that overflow occurred and was disabled
    733 # but inexact was enabled.
    734 	btst		&ovfl_bit,FPCR_ENABLE(%a6)
    735 	bne.b		fovfl_ovfl_on
    736 
    737 	btst		&inex2_bit,FPCR_ENABLE(%a6)
    738 	bne.b		fovfl_inex_on
    739 
    740 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
    741 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
    742 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
    743 
    744 	unlk		%a6
    745 #$#	add.l		&24,%sp
    746 	bra.l		_fpsp_done
    747 
    748 # overflow is enabled AND overflow, of course, occurred. so, we have the EXOP
    749 # in fp1. now, simply jump to _real_ovfl()!
    750 fovfl_ovfl_on:
    751 	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP (fp1) to stack
    752 
    753 	mov.w		&0xe005,2+FP_SRC(%a6) 	# save exc status
    754 
    755 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
    756 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
    757 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
    758 
    759 	frestore	FP_SRC(%a6)		# do this after fmovm,other f<op>s!
    760 
    761 	unlk		%a6
    762 
    763 	bra.l		_real_ovfl
    764 
    765 # overflow occurred but is disabled. meanwhile, inexact is enabled. therefore,
    766 # we must jump to real_inex().
    767 fovfl_inex_on:
    768 
    769 	fmovm.x		&0x40,FP_SRC(%a6) 	# save EXOP (fp1) to stack
    770 
    771 	mov.b		&0xc4,1+EXC_VOFF(%a6)	# vector offset = 0xc4
    772 	mov.w		&0xe001,2+FP_SRC(%a6) 	# save exc status
    773 
    774 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
    775 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
    776 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
    777 
    778 	frestore	FP_SRC(%a6)		# do this after fmovm,other f<op>s!
    779 
    780 	unlk		%a6
    781 
    782 	bra.l		_real_inex
    783 
    784 ########################################################################
    785 fovfl_out:
    786 
    787 
    788 #$#	mov.l		FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
    789 #$#	mov.l		FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
    790 #$#	mov.l		FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
    791 
    792 # the src operand is definitely a NORM(!), so tag it as such
    793 	mov.b		&NORM,STAG(%a6)		# set src optype tag
    794 
    795 	clr.l		%d0
    796 	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec/mode
    797 
    798 	and.l		&0xffff00ff,USER_FPSR(%a6) # zero all but accured field
    799 
    800 	fmov.l		&0x0,%fpcr		# zero current control regs
    801 	fmov.l		&0x0,%fpsr
    802 
    803 	lea		FP_SRC(%a6),%a0		# pass ptr to src operand
    804 
    805 	bsr.l		fout
    806 
    807 	btst		&ovfl_bit,FPCR_ENABLE(%a6)
    808 	bne.w		fovfl_ovfl_on
    809 
    810 	btst		&inex2_bit,FPCR_ENABLE(%a6)
    811 	bne.w		fovfl_inex_on
    812 
    813 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
    814 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
    815 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
    816 
    817 	unlk		%a6
    818 #$#	add.l		&24,%sp
    819 
    820 	btst		&0x7,(%sp)		# is trace on?
    821 	beq.l		_fpsp_done		# no
    822 
    823 	fmov.l		%fpiar,0x8(%sp)		# "Current PC" is in FPIAR
    824 	mov.w		&0x2024,0x6(%sp)	# stk fmt = 0x2; voff = 0x024
    825 	bra.l		_real_trace
    826 
    827 #########################################################################
    828 # XDEF ****************************************************************	#
    829 #	_fpsp_unfl(): 060FPSP entry point for FP Underflow exception.	#
    830 #									#
    831 #	This handler should be the first code executed upon taking the	#
    832 #	FP Underflow exception in an operating system.			#
    833 #									#
    834 # XREF ****************************************************************	#
    835 #	_imem_read_long() - read instruction longword			#
    836 #	fix_skewed_ops() - adjust src operand in fsave frame		#
    837 #	set_tag_x() - determine optype of src/dst operands		#
    838 #	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
    839 #	unnorm_fix() - change UNNORM operands to NORM or ZERO		#
    840 #	load_fpn2() - load dst operand from FP regfile			#
    841 #	fout() - emulate an opclass 3 instruction			#
    842 #	tbl_unsupp - add of table of emulation routines for opclass 0,2	#
    843 #	_fpsp_done() - "callout" for 060FPSP exit (all work done!)	#
    844 #	_real_ovfl() - "callout" for Overflow exception enabled code	#
    845 #	_real_inex() - "callout" for Inexact exception enabled code	#
    846 #	_real_trace() - "callout" for Trace exception code		#
    847 #									#
    848 # INPUT ***************************************************************	#
    849 #	- The system stack contains the FP Unfl exception stack frame	#
    850 #	- The fsave frame contains the source operand			#
    851 # 									#
    852 # OUTPUT **************************************************************	#
    853 #	Underflow Exception enabled:					#
    854 #	- The system stack is unchanged					#
    855 #	- The fsave frame contains the adjusted src op for opclass 0,2	#
    856 #	Underflow Exception disabled:					#
    857 #	- The system stack is unchanged					#
    858 #	- The "exception present" flag in the fsave frame is cleared	#
    859 #									#
    860 # ALGORITHM ***********************************************************	#
    861 #	On the 060, if an FP underflow is present as the result of any	#
    862 # instruction, the 060 will take an underflow exception whether the 	#
    863 # exception is enabled or disabled in the FPCR. For the disabled case, 	#
    864 # This handler emulates the instruction to determine what the correct	#
    865 # default result should be for the operation. This default result is	#
    866 # then stored in either the FP regfile, data regfile, or memory. 	#
    867 # Finally, the handler exits through the "callout" _fpsp_done() 	#
    868 # denoting that no exceptional conditions exist within the machine.	#
    869 # 	If the exception is enabled, then this handler must create the	#
    870 # exceptional operand and plave it in the fsave state frame, and store	#
    871 # the default result (only if the instruction is opclass 3). For 	#
    872 # exceptions enabled, this handler must exit through the "callout" 	#
    873 # _real_unfl() so that the operating system enabled overflow handler	#
    874 # can handle this case.							#
    875 #	Two other conditions exist. First, if underflow was disabled 	#
    876 # but the inexact exception was enabled and the result was inexact, 	#
    877 # this handler must exit through the "callout" _real_inex().		#
    878 # was inexact.								#
    879 #	Also, in the case of an opclass three instruction where 	#
    880 # underflow was disabled and the trace exception was enabled, this	#
    881 # handler must exit through the "callout" _real_trace().		#
    882 #									#
    883 #########################################################################
    884 
    885 	global		_fpsp_unfl
    886 _fpsp_unfl:
    887 
    888 #$#	sub.l		&24,%sp			# make room for src/dst
    889 
    890 	link.w		%a6,&-LOCAL_SIZE	# init stack frame
    891 
    892 	fsave		FP_SRC(%a6)		# grab the "busy" frame
    893 
    894  	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
    895 	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
    896  	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
    897 
    898 # the FPIAR holds the "current PC" of the faulting instruction
    899 	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
    900 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
    901 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
    902 	bsr.l		_imem_read_long		# fetch the instruction words
    903 	mov.l		%d0,EXC_OPWORD(%a6)
    904 
    905 ##############################################################################
    906 
    907 	btst		&0x5,EXC_CMDREG(%a6)	# is instr an fmove out?
    908 	bne.w		funfl_out
    909 
    910 
    911 	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
    912 	bsr.l		fix_skewed_ops		# fix src op
    913 
    914 	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
    915 	bsr.l		set_tag_x		# tag the operand type
    916 	mov.b		%d0,STAG(%a6)		# maybe NORM,DENORM
    917 
    918 # bit five of the fp ext word separates the monadic and dyadic operations
    919 # that can pass through fpsp_unfl(). remember that fcmp, and ftst
    920 # will never take this exception.
    921 	btst		&0x5,1+EXC_CMDREG(%a6)	# is op monadic or dyadic?
    922 	beq.b		funfl_extract		# monadic
    923 
    924 # now, what's left that's not dyadic is fsincos. we can distinguish it
    925 # from all dyadics by the '0110xxx pattern
    926 	btst		&0x4,1+EXC_CMDREG(%a6)	# is op an fsincos?
    927 	bne.b		funfl_extract		# yes
    928 
    929 	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
    930 	bsr.l		load_fpn2		# load dst into FP_DST
    931 
    932 	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
    933 	bsr.l		set_tag_x		# tag the operand type
    934 	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
    935 	bne.b		funfl_op2_done		# no
    936 	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
    937 funfl_op2_done:
    938 	mov.b		%d0,DTAG(%a6)		# save dst optype tag
    939 
    940 funfl_extract:
    941 
    942 #$#	mov.l		FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
    943 #$#	mov.l		FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
    944 #$#	mov.l		FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
    945 #$#	mov.l		FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6)
    946 #$#	mov.l		FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6)
    947 #$#	mov.l		FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6)
    948 
    949 	clr.l		%d0
    950 	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec/mode
    951 
    952 	mov.b		1+EXC_CMDREG(%a6),%d1
    953 	andi.w		&0x007f,%d1		# extract extension
    954 
    955 	andi.l		&0x00ff01ff,USER_FPSR(%a6)
    956 
    957 	fmov.l		&0x0,%fpcr		# zero current control regs
    958 	fmov.l		&0x0,%fpsr
    959 
    960 	lea		FP_SRC(%a6),%a0
    961 	lea		FP_DST(%a6),%a1
    962 
    963 # maybe we can make these entry points ONLY the OVFL entry points of each routine.
    964 	mov.l		(tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
    965 	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
    966 
    967 	bfextu		EXC_CMDREG(%a6){&6:&3},%d0
    968 	bsr.l		store_fpreg
    969 
    970 # The `060 FPU multiplier hardware is such that if the result of a
    971 # multiply operation is the smallest possible normalized number
    972 # (0x00000000_80000000_00000000), then the machine will take an
    973 # underflow exception. Since this is incorrect, we need to check
    974 # if our emulation, after re-doing the operation, decided that
    975 # no underflow was called for. We do these checks only in
    976 # funfl_{unfl,inex}_on() because w/ both exceptions disabled, this
    977 # special case will simply exit gracefully with the correct result.
    978 
    979 # the exceptional possibilities we have left ourselves with are ONLY overflow
    980 # and inexact. and, the inexact is such that overflow occurred and was disabled
    981 # but inexact was enabled.
    982 	btst		&unfl_bit,FPCR_ENABLE(%a6)
    983 	bne.b		funfl_unfl_on
    984 
    985 funfl_chkinex:
    986 	btst		&inex2_bit,FPCR_ENABLE(%a6)
    987 	bne.b		funfl_inex_on
    988 
    989 funfl_exit:
    990 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
    991 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
    992 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
    993 
    994 	unlk		%a6
    995 #$#	add.l		&24,%sp
    996 	bra.l		_fpsp_done
    997 
    998 # overflow is enabled AND overflow, of course, occurred. so, we have the EXOP
    999 # in fp1 (don't forget to save fp0). what to do now?
   1000 # well, we simply have to get to go to _real_unfl()!
   1001 funfl_unfl_on:
   1002 
   1003 # The `060 FPU multiplier hardware is such that if the result of a
   1004 # multiply operation is the smallest possible normalized number
   1005 # (0x00000000_80000000_00000000), then the machine will take an
   1006 # underflow exception. Since this is incorrect, we check here to see
   1007 # if our emulation, after re-doing the operation, decided that
   1008 # no underflow was called for.
   1009 	btst		&unfl_bit,FPSR_EXCEPT(%a6)
   1010 	beq.w		funfl_chkinex
   1011 
   1012 funfl_unfl_on2:
   1013 	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP (fp1) to stack
   1014 
   1015 	mov.w		&0xe003,2+FP_SRC(%a6) 	# save exc status
   1016 
   1017 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
   1018 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   1019 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   1020 
   1021 	frestore	FP_SRC(%a6)		# do this after fmovm,other f<op>s!
   1022 
   1023 	unlk		%a6
   1024 
   1025 	bra.l		_real_unfl
   1026 
   1027 # undeflow occurred but is disabled. meanwhile, inexact is enabled. therefore,
   1028 # we must jump to real_inex().
   1029 funfl_inex_on:
   1030 
   1031 # The `060 FPU multiplier hardware is such that if the result of a
   1032 # multiply operation is the smallest possible normalized number
   1033 # (0x00000000_80000000_00000000), then the machine will take an
   1034 # underflow exception.
   1035 # But, whether bogus or not, if inexact is enabled AND it occurred,
   1036 # then we have to branch to real_inex.
   1037 
   1038 	btst		&inex2_bit,FPSR_EXCEPT(%a6)
   1039 	beq.w		funfl_exit
   1040 
   1041 funfl_inex_on2:
   1042 
   1043 	fmovm.x		&0x40,FP_SRC(%a6) 	# save EXOP to stack
   1044 
   1045 	mov.b		&0xc4,1+EXC_VOFF(%a6)	# vector offset = 0xc4
   1046 	mov.w		&0xe001,2+FP_SRC(%a6) 	# save exc status
   1047 
   1048 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
   1049 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   1050 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   1051 
   1052 	frestore	FP_SRC(%a6)		# do this after fmovm,other f<op>s!
   1053 
   1054 	unlk		%a6
   1055 
   1056 	bra.l		_real_inex
   1057 
   1058 #######################################################################
   1059 funfl_out:
   1060 
   1061 
   1062 #$#	mov.l		FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
   1063 #$#	mov.l		FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
   1064 #$#	mov.l		FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
   1065 
   1066 # the src operand is definitely a NORM(!), so tag it as such
   1067 	mov.b		&NORM,STAG(%a6)		# set src optype tag
   1068 
   1069 	clr.l		%d0
   1070 	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec/mode
   1071 
   1072 	and.l		&0xffff00ff,USER_FPSR(%a6) # zero all but accured field
   1073 
   1074 	fmov.l		&0x0,%fpcr		# zero current control regs
   1075 	fmov.l		&0x0,%fpsr
   1076 
   1077 	lea		FP_SRC(%a6),%a0		# pass ptr to src operand
   1078 
   1079 	bsr.l		fout
   1080 
   1081 	btst		&unfl_bit,FPCR_ENABLE(%a6)
   1082 	bne.w		funfl_unfl_on2
   1083 
   1084 	btst		&inex2_bit,FPCR_ENABLE(%a6)
   1085 	bne.w		funfl_inex_on2
   1086 
   1087 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
   1088 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   1089 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   1090 
   1091 	unlk		%a6
   1092 #$#	add.l		&24,%sp
   1093 
   1094 	btst		&0x7,(%sp)		# is trace on?
   1095 	beq.l		_fpsp_done		# no
   1096 
   1097 	fmov.l		%fpiar,0x8(%sp)		# "Current PC" is in FPIAR
   1098 	mov.w		&0x2024,0x6(%sp)	# stk fmt = 0x2; voff = 0x024
   1099 	bra.l		_real_trace
   1100 
   1101 #########################################################################
   1102 # XDEF ****************************************************************	#
   1103 #	_fpsp_unsupp(): 060FPSP entry point for FP "Unimplemented	#
   1104 #		        Data Type" exception.				#
   1105 #									#
   1106 #	This handler should be the first code executed upon taking the	#
   1107 #	FP Unimplemented Data Type exception in an operating system.	#
   1108 #									#
   1109 # XREF ****************************************************************	#
   1110 #	_imem_read_{word,long}() - read instruction word/longword	#
   1111 #	fix_skewed_ops() - adjust src operand in fsave frame		#
   1112 #	set_tag_x() - determine optype of src/dst operands		#
   1113 #	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
   1114 #	unnorm_fix() - change UNNORM operands to NORM or ZERO		#
   1115 #	load_fpn2() - load dst operand from FP regfile			#
   1116 #	load_fpn1() - load src operand from FP regfile			#
   1117 #	fout() - emulate an opclass 3 instruction			#
   1118 #	tbl_unsupp - add of table of emulation routines for opclass 0,2	#
   1119 #	_real_inex() - "callout" to operating system inexact handler	#
   1120 #	_fpsp_done() - "callout" for exit; work all done		#
   1121 #	_real_trace() - "callout" for Trace enabled exception		#
   1122 #	funimp_skew() - adjust fsave src ops to "incorrect" value	#
   1123 #	_real_snan() - "callout" for SNAN exception			#
   1124 #	_real_operr() - "callout" for OPERR exception			#
   1125 #	_real_ovfl() - "callout" for OVFL exception			#
   1126 #	_real_unfl() - "callout" for UNFL exception			#
   1127 #	get_packed() - fetch packed operand from memory			#
   1128 #									#
   1129 # INPUT ***************************************************************	#
   1130 #	- The system stack contains the "Unimp Data Type" stk frame	#
   1131 #	- The fsave frame contains the ssrc op (for UNNORM/DENORM)	#
   1132 # 									#
   1133 # OUTPUT **************************************************************	#
   1134 #	If Inexact exception (opclass 3):				#
   1135 #	- The system stack is changed to an Inexact exception stk frame	#
   1136 #	If SNAN exception (opclass 3):					#
   1137 #	- The system stack is changed to an SNAN exception stk frame	#
   1138 #	If OPERR exception (opclass 3):					#
   1139 #	- The system stack is changed to an OPERR exception stk frame	#
   1140 #	If OVFL exception (opclass 3):					#
   1141 #	- The system stack is changed to an OVFL exception stk frame	#
   1142 #	If UNFL exception (opclass 3):					#
   1143 #	- The system stack is changed to an UNFL exception stack frame	#
   1144 #	If Trace exception enabled:					#
   1145 #	- The system stack is changed to a Trace exception stack frame	#
   1146 #	Else: (normal case)						#
   1147 #	- Correct result has been stored as appropriate			#
   1148 #									#
   1149 # ALGORITHM ***********************************************************	#
   1150 #	Two main instruction types can enter here: (1) DENORM or UNNORM	#
   1151 # unimplemented data types. These can be either opclass 0,2 or 3 	#
   1152 # instructions, and (2) PACKED unimplemented data format instructions	#
   1153 # also of opclasses 0,2, or 3.						#
   1154 #	For UNNORM/DENORM opclass 0 and 2, the handler fetches the src	#
   1155 # operand from the fsave state frame and the dst operand (if dyadic)	#
   1156 # from the FP register file. The instruction is then emulated by 	#
   1157 # choosing an emulation routine from a table of routines indexed by	#
   1158 # instruction type. Once the instruction has been emulated and result	#
   1159 # saved, then we check to see if any enabled exceptions resulted from	#
   1160 # instruction emulation. If none, then we exit through the "callout"	#
   1161 # _fpsp_done(). If there is an enabled FP exception, then we insert	#
   1162 # this exception into the FPU in the fsave state frame and then exit	#
   1163 # through _fpsp_done().							#
   1164 #	PACKED opclass 0 and 2 is similar in how the instruction is	#
   1165 # emulated and exceptions handled. The differences occur in how the	#
   1166 # handler loads the packed op (by calling get_packed() routine) and	#
   1167 # by the fact that a Trace exception could be pending for PACKED ops.	#
   1168 # If a Trace exception is pending, then the current exception stack	#
   1169 # frame is changed to a Trace exception stack frame and an exit is	#
   1170 # made through _real_trace().						#
   1171 #	For UNNORM/DENORM opclass 3, the actual move out to memory is	#
   1172 # performed by calling the routine fout(). If no exception should occur	#
   1173 # as the result of emulation, then an exit either occurs through	#
   1174 # _fpsp_done() or through _real_trace() if a Trace exception is pending	#
   1175 # (a Trace stack frame must be created here, too). If an FP exception	#
   1176 # should occur, then we must create an exception stack frame of that	#
   1177 # type and jump to either _real_snan(), _real_operr(), _real_inex(),	#
   1178 # _real_unfl(), or _real_ovfl() as appropriate. PACKED opclass 3 	#
   1179 # emulation is performed in a similar manner.				#
   1180 #									#
   1181 #########################################################################
   1182 
   1183 #
   1184 # (1) DENORM and UNNORM (unimplemented) data types:
   1185 #
   1186 #				post-instruction
   1187 #				*****************
   1188 #				*      EA	*
   1189 #	 pre-instruction	*		*
   1190 # 	*****************	*****************
   1191 #	* 0x0 *  0x0dc  *	* 0x3 *  0x0dc  *
   1192 #	*****************	*****************
   1193 #	*     Next	*	*     Next	*
   1194 #	*      PC	*	*      PC	*
   1195 #	*****************	*****************
   1196 #	*      SR	*	*      SR	*
   1197 #	*****************	*****************
   1198 #
   1199 # (2) PACKED format (unsupported) opclasses two and three:
   1200 #	*****************
   1201 #	*      EA	*
   1202 #	*		*
   1203 #	*****************
   1204 #	* 0x2 *  0x0dc	*
   1205 #	*****************
   1206 #	*     Next	*
   1207 #	*      PC	*
   1208 #	*****************
   1209 #	*      SR	*
   1210 #	*****************
   1211 #
   1212 	global		_fpsp_unsupp
   1213 _fpsp_unsupp:
   1214 
   1215 	link.w		%a6,&-LOCAL_SIZE	# init stack frame
   1216 
   1217 	fsave		FP_SRC(%a6)		# save fp state
   1218 
   1219  	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
   1220 	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
   1221  	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
   1222 
   1223 	btst		&0x5,EXC_SR(%a6)	# user or supervisor mode?
   1224 	bne.b		fu_s
   1225 fu_u:
   1226 	mov.l		%usp,%a0		# fetch user stack pointer
   1227 	mov.l		%a0,EXC_A7(%a6)		# save on stack
   1228 	bra.b		fu_cont
   1229 # if the exception is an opclass zero or two unimplemented data type
   1230 # exception, then the a7' calculated here is wrong since it doesn't
   1231 # stack an ea. however, we don't need an a7' for this case anyways.
   1232 fu_s:
   1233 	lea		0x4+EXC_EA(%a6),%a0	# load old a7'
   1234 	mov.l		%a0,EXC_A7(%a6)		# save on stack
   1235 
   1236 fu_cont:
   1237 
   1238 # the FPIAR holds the "current PC" of the faulting instruction
   1239 # the FPIAR should be set correctly for ALL exceptions passing through
   1240 # this point.
   1241 	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
   1242 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   1243 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
   1244 	bsr.l		_imem_read_long		# fetch the instruction words
   1245 	mov.l		%d0,EXC_OPWORD(%a6)	# store OPWORD and EXTWORD
   1246 
   1247 ############################
   1248 
   1249 	clr.b		SPCOND_FLG(%a6)		# clear special condition flag
   1250 
   1251 # Separate opclass three (fpn-to-mem) ops since they have a different
   1252 # stack frame and protocol.
   1253 	btst		&0x5,EXC_CMDREG(%a6)	# is it an fmove out?
   1254 	bne.w		fu_out			# yes
   1255 
   1256 # Separate packed opclass two instructions.
   1257 	bfextu		EXC_CMDREG(%a6){&0:&6},%d0
   1258 	cmpi.b		%d0,&0x13
   1259 	beq.w		fu_in_pack
   1260 
   1261 
   1262 # I'm not sure at this point what FPSR bits are valid for this instruction.
   1263 # so, since the emulation routines re-create them anyways, zero exception field
   1264 	andi.l		&0x00ff00ff,USER_FPSR(%a6) # zero exception field
   1265 
   1266 	fmov.l		&0x0,%fpcr		# zero current control regs
   1267 	fmov.l		&0x0,%fpsr
   1268 
   1269 # Opclass two w/ memory-to-fpn operation will have an incorrect extended
   1270 # precision format if the src format was single or double and the
   1271 # source data type was an INF, NAN, DENORM, or UNNORM
   1272 	lea		FP_SRC(%a6),%a0		# pass ptr to input
   1273 	bsr.l		fix_skewed_ops
   1274 
   1275 # we don't know whether the src operand or the dst operand (or both) is the
   1276 # UNNORM or DENORM. call the function that tags the operand type. if the
   1277 # input is an UNNORM, then convert it to a NORM, DENORM, or ZERO.
   1278 	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
   1279 	bsr.l		set_tag_x		# tag the operand type
   1280 	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
   1281 	bne.b		fu_op2			# no
   1282 	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
   1283 
   1284 fu_op2:
   1285 	mov.b		%d0,STAG(%a6)		# save src optype tag
   1286 
   1287 	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
   1288 
   1289 # bit five of the fp extension word separates the monadic and dyadic operations
   1290 # at this point
   1291 	btst		&0x5,1+EXC_CMDREG(%a6)	# is operation monadic or dyadic?
   1292 	beq.b		fu_extract		# monadic
   1293 	cmpi.b		1+EXC_CMDREG(%a6),&0x3a	# is operation an ftst?
   1294 	beq.b		fu_extract		# yes, so it's monadic, too
   1295 
   1296 	bsr.l		load_fpn2		# load dst into FP_DST
   1297 
   1298 	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
   1299 	bsr.l		set_tag_x		# tag the operand type
   1300 	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
   1301 	bne.b		fu_op2_done		# no
   1302 	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
   1303 fu_op2_done:
   1304 	mov.b		%d0,DTAG(%a6)		# save dst optype tag
   1305 
   1306 fu_extract:
   1307 	clr.l		%d0
   1308 	mov.b		FPCR_MODE(%a6),%d0	# fetch rnd mode/prec
   1309 
   1310 	bfextu		1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension
   1311 
   1312 	lea		FP_SRC(%a6),%a0
   1313 	lea		FP_DST(%a6),%a1
   1314 
   1315 	mov.l		(tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr
   1316 	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
   1317 
   1318 #
   1319 # Exceptions in order of precedence:
   1320 # 	BSUN	: none
   1321 #	SNAN	: all dyadic ops
   1322 #	OPERR	: fsqrt(-NORM)
   1323 #	OVFL	: all except ftst,fcmp
   1324 #	UNFL	: all except ftst,fcmp
   1325 #	DZ	: fdiv
   1326 # 	INEX2	: all except ftst,fcmp
   1327 #	INEX1	: none (packed doesn't go through here)
   1328 #
   1329 
   1330 # we determine the highest priority exception(if any) set by the
   1331 # emulation routine that has also been enabled by the user.
   1332 	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions set
   1333 	bne.b		fu_in_ena		# some are enabled
   1334 
   1335 fu_in_cont:
   1336 # fcmp and ftst do not store any result.
   1337 	mov.b		1+EXC_CMDREG(%a6),%d0	# fetch extension
   1338 	andi.b		&0x38,%d0		# extract bits 3-5
   1339 	cmpi.b		%d0,&0x38		# is instr fcmp or ftst?
   1340 	beq.b		fu_in_exit		# yes
   1341 
   1342 	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
   1343 	bsr.l		store_fpreg		# store the result
   1344 
   1345 fu_in_exit:
   1346 
   1347 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
   1348 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   1349 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   1350 
   1351 	unlk		%a6
   1352 
   1353 	bra.l		_fpsp_done
   1354 
   1355 fu_in_ena:
   1356 	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enabled
   1357 	bfffo		%d0{&24:&8},%d0		# find highest priority exception
   1358 	bne.b		fu_in_exc		# there is at least one set
   1359 
   1360 #
   1361 # No exceptions occurred that were also enabled. Now:
   1362 #
   1363 #   	if (OVFL && ovfl_disabled && inexact_enabled) {
   1364 #	    branch to _real_inex() (even if the result was exact!);
   1365 #     	} else {
   1366 #	    save the result in the proper fp reg (unless the op is fcmp or ftst);
   1367 #	    return;
   1368 #     	}
   1369 #
   1370 	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
   1371 	beq.b		fu_in_cont		# no
   1372 
   1373 fu_in_ovflchk:
   1374 	btst		&inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
   1375 	beq.b		fu_in_cont		# no
   1376 	bra.w		fu_in_exc_ovfl		# go insert overflow frame
   1377 
   1378 #
   1379 # An exception occurred and that exception was enabled:
   1380 #
   1381 #	shift enabled exception field into lo byte of d0;
   1382 #	if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) ||
   1383 #	    ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) {
   1384 #		/*
   1385 #		 * this is the case where we must call _real_inex() now or else
   1386 #		 * there will be no other way to pass it the exceptional operand
   1387 #		 */
   1388 #		call _real_inex();
   1389 #	} else {
   1390 #		restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU;
   1391 #	}
   1392 #
   1393 fu_in_exc:
   1394 	subi.l		&24,%d0			# fix offset to be 0-8
   1395 	cmpi.b		%d0,&0x6		# is exception INEX? (6)
   1396 	bne.b		fu_in_exc_exit		# no
   1397 
   1398 # the enabled exception was inexact
   1399 	btst		&unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur?
   1400 	bne.w		fu_in_exc_unfl		# yes
   1401 	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur?
   1402 	bne.w		fu_in_exc_ovfl		# yes
   1403 
   1404 # here, we insert the correct fsave status value into the fsave frame for the
   1405 # corresponding exception. the operand in the fsave frame should be the original
   1406 # src operand.
   1407 fu_in_exc_exit:
   1408 	mov.l		%d0,-(%sp)		# save d0
   1409 	bsr.l		funimp_skew		# skew sgl or dbl inputs
   1410 	mov.l		(%sp)+,%d0		# restore d0
   1411 
   1412 	mov.w		(tbl_except.b,%pc,%d0.w*2),2+FP_SRC(%a6) # create exc status
   1413 
   1414 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
   1415 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   1416 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   1417 
   1418 	frestore	FP_SRC(%a6)		# restore src op
   1419 
   1420 	unlk		%a6
   1421 
   1422 	bra.l		_fpsp_done
   1423 
   1424 tbl_except:
   1425 	short		0xe000,0xe006,0xe004,0xe005
   1426 	short		0xe003,0xe002,0xe001,0xe001
   1427 
   1428 fu_in_exc_unfl:
   1429 	mov.w		&0x4,%d0
   1430 	bra.b		fu_in_exc_exit
   1431 fu_in_exc_ovfl:
   1432 	mov.w		&0x03,%d0
   1433 	bra.b		fu_in_exc_exit
   1434 
   1435 # If the input operand to this operation was opclass two and a single
   1436 # or double precision denorm, inf, or nan, the operand needs to be
   1437 # "corrected" in order to have the proper equivalent extended precision
   1438 # number.
   1439 	global		fix_skewed_ops
   1440 fix_skewed_ops:
   1441 	bfextu		EXC_CMDREG(%a6){&0:&6},%d0 # extract opclass,src fmt
   1442 	cmpi.b		%d0,&0x11		# is class = 2 & fmt = sgl?
   1443 	beq.b		fso_sgl			# yes
   1444 	cmpi.b		%d0,&0x15		# is class = 2 & fmt = dbl?
   1445 	beq.b		fso_dbl			# yes
   1446 	rts					# no
   1447 
   1448 fso_sgl:
   1449 	mov.w		LOCAL_EX(%a0),%d0	# fetch src exponent
   1450 	andi.w		&0x7fff,%d0		# strip sign
   1451 	cmpi.w		%d0,&0x3f80		# is |exp| == $3f80?
   1452 	beq.b		fso_sgl_dnrm_zero	# yes
   1453 	cmpi.w		%d0,&0x407f		# no; is |exp| == $407f?
   1454 	beq.b		fso_infnan		# yes
   1455 	rts					# no
   1456 
   1457 fso_sgl_dnrm_zero:
   1458 	andi.l		&0x7fffffff,LOCAL_HI(%a0) # clear j-bit
   1459 	beq.b		fso_zero		# it's a skewed zero
   1460 fso_sgl_dnrm:
   1461 # here, we count on norm not to alter a0...
   1462 	bsr.l		norm			# normalize mantissa
   1463 	neg.w		%d0			# -shft amt
   1464 	addi.w		&0x3f81,%d0		# adjust new exponent
   1465 	andi.w		&0x8000,LOCAL_EX(%a0) 	# clear old exponent
   1466 	or.w		%d0,LOCAL_EX(%a0)	# insert new exponent
   1467 	rts
   1468 
   1469 fso_zero:
   1470 	andi.w		&0x8000,LOCAL_EX(%a0)	# clear bogus exponent
   1471 	rts
   1472 
   1473 fso_infnan:
   1474 	andi.b		&0x7f,LOCAL_HI(%a0) 	# clear j-bit
   1475 	ori.w		&0x7fff,LOCAL_EX(%a0)	# make exponent = $7fff
   1476 	rts
   1477 
   1478 fso_dbl:
   1479 	mov.w		LOCAL_EX(%a0),%d0	# fetch src exponent
   1480 	andi.w		&0x7fff,%d0		# strip sign
   1481 	cmpi.w		%d0,&0x3c00		# is |exp| == $3c00?
   1482 	beq.b		fso_dbl_dnrm_zero	# yes
   1483 	cmpi.w		%d0,&0x43ff		# no; is |exp| == $43ff?
   1484 	beq.b		fso_infnan		# yes
   1485 	rts					# no
   1486 
   1487 fso_dbl_dnrm_zero:
   1488 	andi.l		&0x7fffffff,LOCAL_HI(%a0) # clear j-bit
   1489 	bne.b		fso_dbl_dnrm		# it's a skewed denorm
   1490 	tst.l		LOCAL_LO(%a0)		# is it a zero?
   1491 	beq.b		fso_zero		# yes
   1492 fso_dbl_dnrm:
   1493 # here, we count on norm not to alter a0...
   1494 	bsr.l		norm			# normalize mantissa
   1495 	neg.w		%d0			# -shft amt
   1496 	addi.w		&0x3c01,%d0		# adjust new exponent
   1497 	andi.w		&0x8000,LOCAL_EX(%a0) 	# clear old exponent
   1498 	or.w		%d0,LOCAL_EX(%a0)	# insert new exponent
   1499 	rts
   1500 
   1501 #################################################################
   1502 
   1503 # fmove out took an unimplemented data type exception.
   1504 # the src operand is in FP_SRC. Call _fout() to write out the result and
   1505 # to determine which exceptions, if any, to take.
   1506 fu_out:
   1507 
   1508 # Separate packed move outs from the UNNORM and DENORM move outs.
   1509 	bfextu		EXC_CMDREG(%a6){&3:&3},%d0
   1510 	cmpi.b		%d0,&0x3
   1511 	beq.w		fu_out_pack
   1512 	cmpi.b		%d0,&0x7
   1513 	beq.w		fu_out_pack
   1514 
   1515 
   1516 # I'm not sure at this point what FPSR bits are valid for this instruction.
   1517 # so, since the emulation routines re-create them anyways, zero exception field.
   1518 # fmove out doesn't affect ccodes.
   1519 	and.l		&0xffff00ff,USER_FPSR(%a6) # zero exception field
   1520 
   1521 	fmov.l		&0x0,%fpcr		# zero current control regs
   1522 	fmov.l		&0x0,%fpsr
   1523 
   1524 # the src can ONLY be a DENORM or an UNNORM! so, don't make any big subroutine
   1525 # call here. just figure out what it is...
   1526 	mov.w		FP_SRC_EX(%a6),%d0	# get exponent
   1527 	andi.w		&0x7fff,%d0		# strip sign
   1528 	beq.b		fu_out_denorm		# it's a DENORM
   1529 
   1530 	lea		FP_SRC(%a6),%a0
   1531 	bsr.l		unnorm_fix		# yes; fix it
   1532 
   1533 	mov.b		%d0,STAG(%a6)
   1534 
   1535 	bra.b		fu_out_cont
   1536 fu_out_denorm:
   1537 	mov.b		&DENORM,STAG(%a6)
   1538 fu_out_cont:
   1539 
   1540 	clr.l		%d0
   1541 	mov.b		FPCR_MODE(%a6),%d0	# fetch rnd mode/prec
   1542 
   1543 	lea		FP_SRC(%a6),%a0		# pass ptr to src operand
   1544 
   1545 	mov.l		(%a6),EXC_A6(%a6)	# in case a6 changes
   1546 	bsr.l		fout			# call fmove out routine
   1547 
   1548 # Exceptions in order of precedence:
   1549 # 	BSUN	: none
   1550 #	SNAN	: none
   1551 #	OPERR	: fmove.{b,w,l} out of large UNNORM
   1552 #	OVFL	: fmove.{s,d}
   1553 #	UNFL	: fmove.{s,d,x}
   1554 #	DZ	: none
   1555 # 	INEX2	: all
   1556 #	INEX1	: none (packed doesn't travel through here)
   1557 
   1558 # determine the highest priority exception(if any) set by the
   1559 # emulation routine that has also been enabled by the user.
   1560 	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions enabled
   1561 	bne.w		fu_out_ena		# some are enabled
   1562 
   1563 fu_out_done:
   1564 
   1565 	mov.l		EXC_A6(%a6),(%a6)	# in case a6 changed
   1566 
   1567 # on extended precision opclass three instructions using pre-decrement or
   1568 # post-increment addressing mode, the address register is not updated. is the
   1569 # address register was the stack pointer used from user mode, then let's update
   1570 # it here. if it was used from supervisor mode, then we have to handle this
   1571 # as a special case.
   1572 	btst		&0x5,EXC_SR(%a6)
   1573 	bne.b		fu_out_done_s
   1574 
   1575 	mov.l		EXC_A7(%a6),%a0		# restore a7
   1576 	mov.l		%a0,%usp
   1577 
   1578 fu_out_done_cont:
   1579 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
   1580 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   1581 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   1582 
   1583 	unlk		%a6
   1584 
   1585 	btst		&0x7,(%sp)		# is trace on?
   1586 	bne.b		fu_out_trace		# yes
   1587 
   1588 	bra.l		_fpsp_done
   1589 
   1590 # is the ea mode pre-decrement of the stack pointer from supervisor mode?
   1591 # ("fmov.x fpm,-(a7)") if so,
   1592 fu_out_done_s:
   1593 	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
   1594 	bne.b		fu_out_done_cont
   1595 
   1596 # the extended precision result is still in fp0. but, we need to save it
   1597 # somewhere on the stack until we can copy it to its final resting place.
   1598 # here, we're counting on the top of the stack to be the old place-holders
   1599 # for fp0/fp1 which have already been restored. that way, we can write
   1600 # over those destinations with the shifted stack frame.
   1601 	fmovm.x		&0x80,FP_SRC(%a6)	# put answer on stack
   1602 
   1603 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
   1604 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   1605 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   1606 
   1607 	mov.l		(%a6),%a6		# restore frame pointer
   1608 
   1609 	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
   1610 	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
   1611 
   1612 # now, copy the result to the proper place on the stack
   1613 	mov.l		LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
   1614 	mov.l		LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
   1615 	mov.l		LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
   1616 
   1617 	add.l		&LOCAL_SIZE-0x8,%sp
   1618 
   1619 	btst		&0x7,(%sp)
   1620 	bne.b		fu_out_trace
   1621 
   1622 	bra.l		_fpsp_done
   1623 
   1624 fu_out_ena:
   1625 	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enabled
   1626 	bfffo		%d0{&24:&8},%d0		# find highest priority exception
   1627 	bne.b		fu_out_exc		# there is at least one set
   1628 
   1629 # no exceptions were set.
   1630 # if a disabled overflow occurred and inexact was enabled but the result
   1631 # was exact, then a branch to _real_inex() is made.
   1632 	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
   1633 	beq.w		fu_out_done		# no
   1634 
   1635 fu_out_ovflchk:
   1636 	btst		&inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
   1637 	beq.w		fu_out_done		# no
   1638 	bra.w		fu_inex			# yes
   1639 
   1640 #
   1641 # The fp move out that took the "Unimplemented Data Type" exception was
   1642 # being traced. Since the stack frames are similar, get the "current" PC
   1643 # from FPIAR and put it in the trace stack frame then jump to _real_trace().
   1644 #
   1645 #		  UNSUPP FRAME		   TRACE FRAME
   1646 # 		*****************	*****************
   1647 #		*      EA	*	*    Current	*
   1648 #		*		*	*      PC	*
   1649 #		*****************	*****************
   1650 #		* 0x3 *  0x0dc	*	* 0x2 *  0x024	*
   1651 #		*****************	*****************
   1652 #		*     Next	*	*     Next	*
   1653 #		*      PC	*	*      PC	*
   1654 #		*****************	*****************
   1655 #		*      SR	*	*      SR	*
   1656 #		*****************	*****************
   1657 #
   1658 fu_out_trace:
   1659 	mov.w		&0x2024,0x6(%sp)
   1660 	fmov.l		%fpiar,0x8(%sp)
   1661 	bra.l		_real_trace
   1662 
   1663 # an exception occurred and that exception was enabled.
   1664 fu_out_exc:
   1665 	subi.l		&24,%d0			# fix offset to be 0-8
   1666 
   1667 # we don't mess with the existing fsave frame. just re-insert it and
   1668 # jump to the "_real_{}()" handler...
   1669 	mov.w		(tbl_fu_out.b,%pc,%d0.w*2),%d0
   1670 	jmp		(tbl_fu_out.b,%pc,%d0.w*1)
   1671 
   1672 	swbeg		&0x8
   1673 tbl_fu_out:
   1674 	short		tbl_fu_out	- tbl_fu_out	# BSUN can't happen
   1675 	short		tbl_fu_out 	- tbl_fu_out	# SNAN can't happen
   1676 	short		fu_operr	- tbl_fu_out	# OPERR
   1677 	short		fu_ovfl 	- tbl_fu_out	# OVFL
   1678 	short		fu_unfl 	- tbl_fu_out	# UNFL
   1679 	short		tbl_fu_out	- tbl_fu_out	# DZ can't happen
   1680 	short		fu_inex 	- tbl_fu_out	# INEX2
   1681 	short		tbl_fu_out	- tbl_fu_out	# INEX1 won't make it here
   1682 
   1683 # for snan,operr,ovfl,unfl, src op is still in FP_SRC so just
   1684 # frestore it.
   1685 fu_snan:
   1686 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
   1687 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   1688 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   1689 
   1690 	mov.w		&0x30d8,EXC_VOFF(%a6)	# vector offset = 0xd8
   1691 	mov.w		&0xe006,2+FP_SRC(%a6)
   1692 
   1693 	frestore	FP_SRC(%a6)
   1694 
   1695 	unlk		%a6
   1696 
   1697 
   1698 	bra.l		_real_snan
   1699 
   1700 fu_operr:
   1701 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
   1702 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   1703 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   1704 
   1705 	mov.w		&0x30d0,EXC_VOFF(%a6)	# vector offset = 0xd0
   1706 	mov.w		&0xe004,2+FP_SRC(%a6)
   1707 
   1708 	frestore	FP_SRC(%a6)
   1709 
   1710 	unlk		%a6
   1711 
   1712 
   1713 	bra.l		_real_operr
   1714 
   1715 fu_ovfl:
   1716 	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP to the stack
   1717 
   1718 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
   1719 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   1720 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   1721 
   1722 	mov.w		&0x30d4,EXC_VOFF(%a6)	# vector offset = 0xd4
   1723 	mov.w		&0xe005,2+FP_SRC(%a6)
   1724 
   1725 	frestore	FP_SRC(%a6)		# restore EXOP
   1726 
   1727 	unlk		%a6
   1728 
   1729 	bra.l		_real_ovfl
   1730 
   1731 # underflow can happen for extended precision. extended precision opclass
   1732 # three instruction exceptions don't update the stack pointer. so, if the
   1733 # exception occurred from user mode, then simply update a7 and exit normally.
   1734 # if the exception occurred from supervisor mode, check if
   1735 fu_unfl:
   1736 	mov.l		EXC_A6(%a6),(%a6)	# restore a6
   1737 
   1738 	btst		&0x5,EXC_SR(%a6)
   1739 	bne.w		fu_unfl_s
   1740 
   1741 	mov.l		EXC_A7(%a6),%a0		# restore a7 whether we need
   1742 	mov.l		%a0,%usp		# to or not...
   1743 
   1744 fu_unfl_cont:
   1745 	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP to the stack
   1746 
   1747 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
   1748 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   1749 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   1750 
   1751 	mov.w		&0x30cc,EXC_VOFF(%a6)	# vector offset = 0xcc
   1752 	mov.w		&0xe003,2+FP_SRC(%a6)
   1753 
   1754 	frestore	FP_SRC(%a6)		# restore EXOP
   1755 
   1756 	unlk		%a6
   1757 
   1758 	bra.l		_real_unfl
   1759 
   1760 fu_unfl_s:
   1761 	cmpi.b		SPCOND_FLG(%a6),&mda7_flg # was the <ea> mode -(sp)?
   1762 	bne.b		fu_unfl_cont
   1763 
   1764 # the extended precision result is still in fp0. but, we need to save it
   1765 # somewhere on the stack until we can copy it to its final resting place
   1766 # (where the exc frame is currently). make sure it's not at the top of the
   1767 # frame or it will get overwritten when the exc stack frame is shifted "down".
   1768 	fmovm.x		&0x80,FP_SRC(%a6)	# put answer on stack
   1769 	fmovm.x		&0x40,FP_DST(%a6)	# put EXOP on stack
   1770 
   1771 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
   1772 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   1773 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   1774 
   1775 	mov.w		&0x30cc,EXC_VOFF(%a6)	# vector offset = 0xcc
   1776 	mov.w		&0xe003,2+FP_DST(%a6)
   1777 
   1778 	frestore	FP_DST(%a6)		# restore EXOP
   1779 
   1780 	mov.l		(%a6),%a6		# restore frame pointer
   1781 
   1782 	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
   1783 	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
   1784 	mov.l		LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
   1785 
   1786 # now, copy the result to the proper place on the stack
   1787 	mov.l		LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
   1788 	mov.l		LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
   1789 	mov.l		LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
   1790 
   1791 	add.l		&LOCAL_SIZE-0x8,%sp
   1792 
   1793 	bra.l		_real_unfl
   1794 
   1795 # fmove in and out enter here.
   1796 fu_inex:
   1797 	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP to the stack
   1798 
   1799 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
   1800 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   1801 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   1802 
   1803 	mov.w		&0x30c4,EXC_VOFF(%a6)	# vector offset = 0xc4
   1804 	mov.w		&0xe001,2+FP_SRC(%a6)
   1805 
   1806 	frestore	FP_SRC(%a6)		# restore EXOP
   1807 
   1808 	unlk		%a6
   1809 
   1810 
   1811 	bra.l		_real_inex
   1812 
   1813 #########################################################################
   1814 #########################################################################
   1815 fu_in_pack:
   1816 
   1817 
   1818 # I'm not sure at this point what FPSR bits are valid for this instruction.
   1819 # so, since the emulation routines re-create them anyways, zero exception field
   1820 	andi.l		&0x0ff00ff,USER_FPSR(%a6) # zero exception field
   1821 
   1822 	fmov.l		&0x0,%fpcr		# zero current control regs
   1823 	fmov.l		&0x0,%fpsr
   1824 
   1825 	bsr.l		get_packed		# fetch packed src operand
   1826 
   1827 	lea		FP_SRC(%a6),%a0		# pass ptr to src
   1828 	bsr.l		set_tag_x		# set src optype tag
   1829 
   1830 	mov.b		%d0,STAG(%a6)		# save src optype tag
   1831 
   1832 	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
   1833 
   1834 # bit five of the fp extension word separates the monadic and dyadic operations
   1835 # at this point
   1836 	btst		&0x5,1+EXC_CMDREG(%a6)	# is operation monadic or dyadic?
   1837 	beq.b		fu_extract_p		# monadic
   1838 	cmpi.b		1+EXC_CMDREG(%a6),&0x3a	# is operation an ftst?
   1839 	beq.b		fu_extract_p		# yes, so it's monadic, too
   1840 
   1841 	bsr.l		load_fpn2		# load dst into FP_DST
   1842 
   1843 	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
   1844 	bsr.l		set_tag_x		# tag the operand type
   1845 	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
   1846 	bne.b		fu_op2_done_p		# no
   1847 	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
   1848 fu_op2_done_p:
   1849 	mov.b		%d0,DTAG(%a6)		# save dst optype tag
   1850 
   1851 fu_extract_p:
   1852 	clr.l		%d0
   1853 	mov.b		FPCR_MODE(%a6),%d0	# fetch rnd mode/prec
   1854 
   1855 	bfextu		1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension
   1856 
   1857 	lea		FP_SRC(%a6),%a0
   1858 	lea		FP_DST(%a6),%a1
   1859 
   1860 	mov.l		(tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr
   1861 	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
   1862 
   1863 #
   1864 # Exceptions in order of precedence:
   1865 # 	BSUN	: none
   1866 #	SNAN	: all dyadic ops
   1867 #	OPERR	: fsqrt(-NORM)
   1868 #	OVFL	: all except ftst,fcmp
   1869 #	UNFL	: all except ftst,fcmp
   1870 #	DZ	: fdiv
   1871 # 	INEX2	: all except ftst,fcmp
   1872 #	INEX1	: all
   1873 #
   1874 
   1875 # we determine the highest priority exception(if any) set by the
   1876 # emulation routine that has also been enabled by the user.
   1877 	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions enabled
   1878 	bne.w		fu_in_ena_p		# some are enabled
   1879 
   1880 fu_in_cont_p:
   1881 # fcmp and ftst do not store any result.
   1882 	mov.b		1+EXC_CMDREG(%a6),%d0	# fetch extension
   1883 	andi.b		&0x38,%d0		# extract bits 3-5
   1884 	cmpi.b		%d0,&0x38		# is instr fcmp or ftst?
   1885 	beq.b		fu_in_exit_p		# yes
   1886 
   1887 	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
   1888 	bsr.l		store_fpreg		# store the result
   1889 
   1890 fu_in_exit_p:
   1891 
   1892 	btst		&0x5,EXC_SR(%a6)	# user or supervisor?
   1893 	bne.w		fu_in_exit_s_p		# supervisor
   1894 
   1895 	mov.l		EXC_A7(%a6),%a0		# update user a7
   1896 	mov.l		%a0,%usp
   1897 
   1898 fu_in_exit_cont_p:
   1899 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
   1900 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   1901 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   1902 
   1903 	unlk		%a6			# unravel stack frame
   1904 
   1905 	btst		&0x7,(%sp)		# is trace on?
   1906 	bne.w		fu_trace_p		# yes
   1907 
   1908 	bra.l		_fpsp_done		# exit to os
   1909 
   1910 # the exception occurred in supervisor mode. check to see if the
   1911 # addressing mode was (a7)+. if so, we'll need to shift the
   1912 # stack frame "up".
   1913 fu_in_exit_s_p:
   1914 	btst		&mia7_bit,SPCOND_FLG(%a6) # was ea mode (a7)+
   1915 	beq.b		fu_in_exit_cont_p	# no
   1916 
   1917 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
   1918 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   1919 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   1920 
   1921 	unlk		%a6			# unravel stack frame
   1922 
   1923 # shift the stack frame "up". we don't really care about the <ea> field.
   1924 	mov.l		0x4(%sp),0x10(%sp)
   1925 	mov.l		0x0(%sp),0xc(%sp)
   1926 	add.l		&0xc,%sp
   1927 
   1928 	btst		&0x7,(%sp)		# is trace on?
   1929 	bne.w		fu_trace_p		# yes
   1930 
   1931 	bra.l		_fpsp_done		# exit to os
   1932 
   1933 fu_in_ena_p:
   1934 	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enabled & set
   1935 	bfffo		%d0{&24:&8},%d0		# find highest priority exception
   1936 	bne.b		fu_in_exc_p		# at least one was set
   1937 
   1938 #
   1939 # No exceptions occurred that were also enabled. Now:
   1940 #
   1941 #   	if (OVFL && ovfl_disabled && inexact_enabled) {
   1942 #	    branch to _real_inex() (even if the result was exact!);
   1943 #     	} else {
   1944 #	    save the result in the proper fp reg (unless the op is fcmp or ftst);
   1945 #	    return;
   1946 #     	}
   1947 #
   1948 	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
   1949 	beq.w		fu_in_cont_p		# no
   1950 
   1951 fu_in_ovflchk_p:
   1952 	btst		&inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
   1953 	beq.w		fu_in_cont_p		# no
   1954 	bra.w		fu_in_exc_ovfl_p	# do _real_inex() now
   1955 
   1956 #
   1957 # An exception occurred and that exception was enabled:
   1958 #
   1959 #	shift enabled exception field into lo byte of d0;
   1960 #	if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) ||
   1961 #	    ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) {
   1962 #		/*
   1963 #		 * this is the case where we must call _real_inex() now or else
   1964 #		 * there will be no other way to pass it the exceptional operand
   1965 #		 */
   1966 #		call _real_inex();
   1967 #	} else {
   1968 #		restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU;
   1969 #	}
   1970 #
   1971 fu_in_exc_p:
   1972 	subi.l		&24,%d0			# fix offset to be 0-8
   1973 	cmpi.b		%d0,&0x6		# is exception INEX? (6 or 7)
   1974 	blt.b		fu_in_exc_exit_p	# no
   1975 
   1976 # the enabled exception was inexact
   1977 	btst		&unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur?
   1978 	bne.w		fu_in_exc_unfl_p	# yes
   1979 	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur?
   1980 	bne.w		fu_in_exc_ovfl_p	# yes
   1981 
   1982 # here, we insert the correct fsave status value into the fsave frame for the
   1983 # corresponding exception. the operand in the fsave frame should be the original
   1984 # src operand.
   1985 # as a reminder for future predicted pain and agony, we are passing in fsave the
   1986 # "non-skewed" operand for cases of sgl and dbl src INFs,NANs, and DENORMs.
   1987 # this is INCORRECT for enabled SNAN which would give to the user the skewed SNAN!!!
   1988 fu_in_exc_exit_p:
   1989 	btst		&0x5,EXC_SR(%a6)	# user or supervisor?
   1990 	bne.w		fu_in_exc_exit_s_p	# supervisor
   1991 
   1992 	mov.l		EXC_A7(%a6),%a0		# update user a7
   1993 	mov.l		%a0,%usp
   1994 
   1995 fu_in_exc_exit_cont_p:
   1996 	mov.w		(tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6)
   1997 
   1998 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
   1999 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   2000 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   2001 
   2002 	frestore	FP_SRC(%a6)		# restore src op
   2003 
   2004 	unlk		%a6
   2005 
   2006 	btst		&0x7,(%sp)		# is trace enabled?
   2007 	bne.w		fu_trace_p		# yes
   2008 
   2009 	bra.l		_fpsp_done
   2010 
   2011 tbl_except_p:
   2012 	short		0xe000,0xe006,0xe004,0xe005
   2013 	short		0xe003,0xe002,0xe001,0xe001
   2014 
   2015 fu_in_exc_ovfl_p:
   2016 	mov.w		&0x3,%d0
   2017 	bra.w		fu_in_exc_exit_p
   2018 
   2019 fu_in_exc_unfl_p:
   2020 	mov.w		&0x4,%d0
   2021 	bra.w		fu_in_exc_exit_p
   2022 
   2023 fu_in_exc_exit_s_p:
   2024 	btst		&mia7_bit,SPCOND_FLG(%a6)
   2025 	beq.b		fu_in_exc_exit_cont_p
   2026 
   2027 	mov.w		(tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6)
   2028 
   2029 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
   2030 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   2031 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   2032 
   2033 	frestore	FP_SRC(%a6)		# restore src op
   2034 
   2035 	unlk		%a6			# unravel stack frame
   2036 
   2037 # shift stack frame "up". who cares about <ea> field.
   2038 	mov.l		0x4(%sp),0x10(%sp)
   2039 	mov.l		0x0(%sp),0xc(%sp)
   2040 	add.l		&0xc,%sp
   2041 
   2042 	btst		&0x7,(%sp)		# is trace on?
   2043 	bne.b		fu_trace_p		# yes
   2044 
   2045 	bra.l		_fpsp_done		# exit to os
   2046 
   2047 #
   2048 # The opclass two PACKED instruction that took an "Unimplemented Data Type"
   2049 # exception was being traced. Make the "current" PC the FPIAR and put it in the
   2050 # trace stack frame then jump to _real_trace().
   2051 #
   2052 #		  UNSUPP FRAME		   TRACE FRAME
   2053 #		*****************	*****************
   2054 #		*      EA	*	*    Current	*
   2055 #		*		*	*      PC	*
   2056 #		*****************	*****************
   2057 #		* 0x2 *	0x0dc	* 	* 0x2 *  0x024	*
   2058 #		*****************	*****************
   2059 #		*     Next	*	*     Next	*
   2060 #		*      PC	*      	*      PC	*
   2061 #		*****************	*****************
   2062 #		*      SR	*	*      SR	*
   2063 #		*****************	*****************
   2064 fu_trace_p:
   2065 	mov.w		&0x2024,0x6(%sp)
   2066 	fmov.l		%fpiar,0x8(%sp)
   2067 
   2068 	bra.l		_real_trace
   2069 
   2070 #########################################################
   2071 #########################################################
   2072 fu_out_pack:
   2073 
   2074 
   2075 # I'm not sure at this point what FPSR bits are valid for this instruction.
   2076 # so, since the emulation routines re-create them anyways, zero exception field.
   2077 # fmove out doesn't affect ccodes.
   2078 	and.l		&0xffff00ff,USER_FPSR(%a6) # zero exception field
   2079 
   2080 	fmov.l		&0x0,%fpcr		# zero current control regs
   2081 	fmov.l		&0x0,%fpsr
   2082 
   2083 	bfextu		EXC_CMDREG(%a6){&6:&3},%d0
   2084 	bsr.l		load_fpn1
   2085 
   2086 # unlike other opclass 3, unimplemented data type exceptions, packed must be
   2087 # able to detect all operand types.
   2088 	lea		FP_SRC(%a6),%a0
   2089 	bsr.l		set_tag_x		# tag the operand type
   2090 	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
   2091 	bne.b		fu_op2_p		# no
   2092 	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
   2093 
   2094 fu_op2_p:
   2095 	mov.b		%d0,STAG(%a6)		# save src optype tag
   2096 
   2097 	clr.l		%d0
   2098 	mov.b		FPCR_MODE(%a6),%d0	# fetch rnd mode/prec
   2099 
   2100 	lea		FP_SRC(%a6),%a0		# pass ptr to src operand
   2101 
   2102 	mov.l		(%a6),EXC_A6(%a6)	# in case a6 changes
   2103 	bsr.l		fout			# call fmove out routine
   2104 
   2105 # Exceptions in order of precedence:
   2106 # 	BSUN	: no
   2107 #	SNAN	: yes
   2108 #	OPERR	: if ((k_factor > +17) || (dec. exp exceeds 3 digits))
   2109 #	OVFL	: no
   2110 #	UNFL	: no
   2111 #	DZ	: no
   2112 # 	INEX2	: yes
   2113 #	INEX1	: no
   2114 
   2115 # determine the highest priority exception(if any) set by the
   2116 # emulation routine that has also been enabled by the user.
   2117 	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions enabled
   2118 	bne.w		fu_out_ena_p		# some are enabled
   2119 
   2120 fu_out_exit_p:
   2121 	mov.l		EXC_A6(%a6),(%a6)	# restore a6
   2122 
   2123 	btst		&0x5,EXC_SR(%a6)	# user or supervisor?
   2124 	bne.b		fu_out_exit_s_p		# supervisor
   2125 
   2126 	mov.l		EXC_A7(%a6),%a0		# update user a7
   2127 	mov.l		%a0,%usp
   2128 
   2129 fu_out_exit_cont_p:
   2130 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
   2131 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   2132 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   2133 
   2134 	unlk		%a6			# unravel stack frame
   2135 
   2136 	btst		&0x7,(%sp)		# is trace on?
   2137 	bne.w		fu_trace_p		# yes
   2138 
   2139 	bra.l		_fpsp_done		# exit to os
   2140 
   2141 # the exception occurred in supervisor mode. check to see if the
   2142 # addressing mode was -(a7). if so, we'll need to shift the
   2143 # stack frame "down".
   2144 fu_out_exit_s_p:
   2145 	btst		&mda7_bit,SPCOND_FLG(%a6) # was ea mode -(a7)
   2146 	beq.b		fu_out_exit_cont_p	# no
   2147 
   2148 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
   2149 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   2150 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   2151 
   2152 	mov.l		(%a6),%a6		# restore frame pointer
   2153 
   2154 	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
   2155 	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
   2156 
   2157 # now, copy the result to the proper place on the stack
   2158 	mov.l		LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
   2159 	mov.l		LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
   2160 	mov.l		LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
   2161 
   2162 	add.l		&LOCAL_SIZE-0x8,%sp
   2163 
   2164 	btst		&0x7,(%sp)
   2165 	bne.w		fu_trace_p
   2166 
   2167 	bra.l		_fpsp_done
   2168 
   2169 fu_out_ena_p:
   2170 	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enabled
   2171 	bfffo		%d0{&24:&8},%d0		# find highest priority exception
   2172 	beq.w		fu_out_exit_p
   2173 
   2174 	mov.l		EXC_A6(%a6),(%a6)	# restore a6
   2175 
   2176 # an exception occurred and that exception was enabled.
   2177 # the only exception possible on packed move out are INEX, OPERR, and SNAN.
   2178 fu_out_exc_p:
   2179 	cmpi.b		%d0,&0x1a
   2180 	bgt.w		fu_inex_p2
   2181 	beq.w		fu_operr_p
   2182 
   2183 fu_snan_p:
   2184 	btst		&0x5,EXC_SR(%a6)
   2185 	bne.b		fu_snan_s_p
   2186 
   2187 	mov.l		EXC_A7(%a6),%a0
   2188 	mov.l		%a0,%usp
   2189 	bra.w		fu_snan
   2190 
   2191 fu_snan_s_p:
   2192 	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
   2193 	bne.w		fu_snan
   2194 
   2195 # the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
   2196 # the strategy is to move the exception frame "down" 12 bytes. then, we
   2197 # can store the default result where the exception frame was.
   2198 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
   2199 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   2200 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   2201 
   2202 	mov.w		&0x30d8,EXC_VOFF(%a6)	# vector offset = 0xd0
   2203 	mov.w		&0xe006,2+FP_SRC(%a6) 	# set fsave status
   2204 
   2205 	frestore	FP_SRC(%a6)		# restore src operand
   2206 
   2207 	mov.l		(%a6),%a6		# restore frame pointer
   2208 
   2209 	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
   2210 	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
   2211 	mov.l		LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
   2212 
   2213 # now, we copy the default result to it's proper location
   2214 	mov.l		LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
   2215 	mov.l		LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
   2216 	mov.l		LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
   2217 
   2218 	add.l		&LOCAL_SIZE-0x8,%sp
   2219 
   2220 
   2221 	bra.l		_real_snan
   2222 
   2223 fu_operr_p:
   2224 	btst		&0x5,EXC_SR(%a6)
   2225 	bne.w		fu_operr_p_s
   2226 
   2227 	mov.l		EXC_A7(%a6),%a0
   2228 	mov.l		%a0,%usp
   2229 	bra.w		fu_operr
   2230 
   2231 fu_operr_p_s:
   2232 	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
   2233 	bne.w		fu_operr
   2234 
   2235 # the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
   2236 # the strategy is to move the exception frame "down" 12 bytes. then, we
   2237 # can store the default result where the exception frame was.
   2238 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
   2239 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   2240 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   2241 
   2242 	mov.w		&0x30d0,EXC_VOFF(%a6)	# vector offset = 0xd0
   2243 	mov.w		&0xe004,2+FP_SRC(%a6) 	# set fsave status
   2244 
   2245 	frestore	FP_SRC(%a6)		# restore src operand
   2246 
   2247 	mov.l		(%a6),%a6		# restore frame pointer
   2248 
   2249 	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
   2250 	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
   2251 	mov.l		LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
   2252 
   2253 # now, we copy the default result to it's proper location
   2254 	mov.l		LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
   2255 	mov.l		LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
   2256 	mov.l		LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
   2257 
   2258 	add.l		&LOCAL_SIZE-0x8,%sp
   2259 
   2260 
   2261 	bra.l		_real_operr
   2262 
   2263 fu_inex_p2:
   2264 	btst		&0x5,EXC_SR(%a6)
   2265 	bne.w		fu_inex_s_p2
   2266 
   2267 	mov.l		EXC_A7(%a6),%a0
   2268 	mov.l		%a0,%usp
   2269 	bra.w		fu_inex
   2270 
   2271 fu_inex_s_p2:
   2272 	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
   2273 	bne.w		fu_inex
   2274 
   2275 # the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
   2276 # the strategy is to move the exception frame "down" 12 bytes. then, we
   2277 # can store the default result where the exception frame was.
   2278 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
   2279 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   2280 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   2281 
   2282 	mov.w		&0x30c4,EXC_VOFF(%a6) 	# vector offset = 0xc4
   2283 	mov.w		&0xe001,2+FP_SRC(%a6) 	# set fsave status
   2284 
   2285 	frestore	FP_SRC(%a6)		# restore src operand
   2286 
   2287 	mov.l		(%a6),%a6		# restore frame pointer
   2288 
   2289 	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
   2290 	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
   2291 	mov.l		LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
   2292 
   2293 # now, we copy the default result to it's proper location
   2294 	mov.l		LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
   2295 	mov.l		LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
   2296 	mov.l		LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
   2297 
   2298 	add.l		&LOCAL_SIZE-0x8,%sp
   2299 
   2300 
   2301 	bra.l		_real_inex
   2302 
   2303 #########################################################################
   2304 
   2305 #
   2306 # if we're stuffing a source operand back into an fsave frame then we
   2307 # have to make sure that for single or double source operands that the
   2308 # format stuffed is as weird as the hardware usually makes it.
   2309 #
   2310 	global		funimp_skew
   2311 funimp_skew:
   2312 	bfextu		EXC_EXTWORD(%a6){&3:&3},%d0 # extract src specifier
   2313 	cmpi.b		%d0,&0x1		# was src sgl?
   2314 	beq.b		funimp_skew_sgl		# yes
   2315 	cmpi.b		%d0,&0x5		# was src dbl?
   2316 	beq.b		funimp_skew_dbl		# yes
   2317 	rts
   2318 
   2319 funimp_skew_sgl:
   2320 	mov.w		FP_SRC_EX(%a6),%d0	# fetch DENORM exponent
   2321 	andi.w		&0x7fff,%d0		# strip sign
   2322 	beq.b		funimp_skew_sgl_not
   2323 	cmpi.w		%d0,&0x3f80
   2324 	bgt.b		funimp_skew_sgl_not
   2325 	neg.w		%d0			# make exponent negative
   2326 	addi.w		&0x3f81,%d0		# find amt to shift
   2327 	mov.l		FP_SRC_HI(%a6),%d1	# fetch DENORM hi(man)
   2328 	lsr.l		%d0,%d1			# shift it
   2329 	bset		&31,%d1			# set j-bit
   2330 	mov.l		%d1,FP_SRC_HI(%a6)	# insert new hi(man)
   2331 	andi.w		&0x8000,FP_SRC_EX(%a6)	# clear old exponent
   2332 	ori.w		&0x3f80,FP_SRC_EX(%a6)	# insert new "skewed" exponent
   2333 funimp_skew_sgl_not:
   2334 	rts
   2335 
   2336 funimp_skew_dbl:
   2337 	mov.w		FP_SRC_EX(%a6),%d0	# fetch DENORM exponent
   2338 	andi.w		&0x7fff,%d0		# strip sign
   2339 	beq.b		funimp_skew_dbl_not
   2340 	cmpi.w		%d0,&0x3c00
   2341 	bgt.b		funimp_skew_dbl_not
   2342 
   2343 	tst.b		FP_SRC_EX(%a6)		# make "internal format"
   2344 	smi.b		0x2+FP_SRC(%a6)
   2345 	mov.w		%d0,FP_SRC_EX(%a6)	# insert exponent with cleared sign
   2346 	clr.l		%d0			# clear g,r,s
   2347 	lea		FP_SRC(%a6),%a0		# pass ptr to src op
   2348 	mov.w		&0x3c01,%d1		# pass denorm threshold
   2349 	bsr.l		dnrm_lp			# denorm it
   2350 	mov.w		&0x3c00,%d0		# new exponent
   2351 	tst.b		0x2+FP_SRC(%a6)		# is sign set?
   2352 	beq.b		fss_dbl_denorm_done	# no
   2353 	bset		&15,%d0			# set sign
   2354 fss_dbl_denorm_done:
   2355 	bset		&0x7,FP_SRC_HI(%a6)	# set j-bit
   2356 	mov.w		%d0,FP_SRC_EX(%a6)	# insert new exponent
   2357 funimp_skew_dbl_not:
   2358 	rts
   2359 
   2360 #########################################################################
   2361 	global		_mem_write2
   2362 _mem_write2:
   2363 	btst		&0x5,EXC_SR(%a6)
   2364 	beq.l		_dmem_write
   2365 	mov.l		0x0(%a0),FP_DST_EX(%a6)
   2366 	mov.l		0x4(%a0),FP_DST_HI(%a6)
   2367 	mov.l		0x8(%a0),FP_DST_LO(%a6)
   2368 	clr.l		%d1
   2369 	rts
   2370 
   2371 #########################################################################
   2372 # XDEF ****************************************************************	#
   2373 #	_fpsp_effadd(): 060FPSP entry point for FP "Unimplemented	#
   2374 #		     	effective address" exception.			#
   2375 #									#
   2376 #	This handler should be the first code executed upon taking the	#
   2377 #	FP Unimplemented Effective Address exception in an operating	#
   2378 #	system.								#
   2379 #									#
   2380 # XREF ****************************************************************	#
   2381 #	_imem_read_long() - read instruction longword			#
   2382 #	fix_skewed_ops() - adjust src operand in fsave frame		#
   2383 #	set_tag_x() - determine optype of src/dst operands		#
   2384 #	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
   2385 #	unnorm_fix() - change UNNORM operands to NORM or ZERO		#
   2386 #	load_fpn2() - load dst operand from FP regfile			#
   2387 #	tbl_unsupp - add of table of emulation routines for opclass 0,2	#
   2388 #	decbin() - convert packed data to FP binary data		#
   2389 #	_real_fpu_disabled() - "callout" for "FPU disabled" exception	#
   2390 #	_real_access() - "callout" for access error exception		#
   2391 #	_mem_read() - read extended immediate operand from memory	#
   2392 #	_fpsp_done() - "callout" for exit; work all done		#
   2393 #	_real_trace() - "callout" for Trace enabled exception		#
   2394 #	fmovm_dynamic() - emulate dynamic fmovm instruction		#
   2395 #	fmovm_ctrl() - emulate fmovm control instruction		#
   2396 #									#
   2397 # INPUT ***************************************************************	#
   2398 #	- The system stack contains the "Unimplemented <ea>" stk frame	#
   2399 # 									#
   2400 # OUTPUT **************************************************************	#
   2401 #	If access error:						#
   2402 #	- The system stack is changed to an access error stack frame	#
   2403 #	If FPU disabled:						#
   2404 #	- The system stack is changed to an FPU disabled stack frame	#
   2405 #	If Trace exception enabled:					#
   2406 #	- The system stack is changed to a Trace exception stack frame	#
   2407 #	Else: (normal case)						#
   2408 #	- None (correct result has been stored as appropriate)		#
   2409 #									#
   2410 # ALGORITHM ***********************************************************	#
   2411 #	This exception handles 3 types of operations:			#
   2412 # (1) FP Instructions using extended precision or packed immediate	#
   2413 #     addressing mode.							#
   2414 # (2) The "fmovm.x" instruction w/ dynamic register specification.	#
   2415 # (3) The "fmovm.l" instruction w/ 2 or 3 control registers.		#
   2416 #									#
   2417 #	For immediate data operations, the data is read in w/ a		#
   2418 # _mem_read() "callout", converted to FP binary (if packed), and used	#
   2419 # as the source operand to the instruction specified by the instruction	#
   2420 # word. If no FP exception should be reported ads a result of the 	#
   2421 # emulation, then the result is stored to the destination register and	#
   2422 # the handler exits through _fpsp_done(). If an enabled exc has been	#
   2423 # signalled as a result of emulation, then an fsave state frame		#
   2424 # corresponding to the FP exception type must be entered into the 060	#
   2425 # FPU before exiting. In either the enabled or disabled cases, we 	#
   2426 # must also check if a Trace exception is pending, in which case, we	#
   2427 # must create a Trace exception stack frame from the current exception	#
   2428 # stack frame. If no Trace is pending, we simply exit through		#
   2429 # _fpsp_done().								#
   2430 #	For "fmovm.x", call the routine fmovm_dynamic() which will 	#
   2431 # decode and emulate the instruction. No FP exceptions can be pending	#
   2432 # as a result of this operation emulation. A Trace exception can be	#
   2433 # pending, though, which means the current stack frame must be changed	#
   2434 # to a Trace stack frame and an exit made through _real_trace().	#
   2435 # For the case of "fmovm.x Dn,-(a7)", where the offending instruction	#
   2436 # was executed from supervisor mode, this handler must store the FP	#
   2437 # register file values to the system stack by itself since		#
   2438 # fmovm_dynamic() can't handle this. A normal exit is made through	#
   2439 # fpsp_done().								#
   2440 #	For "fmovm.l", fmovm_ctrl() is used to emulate the instruction.	#
   2441 # Again, a Trace exception may be pending and an exit made through	#
   2442 # _real_trace(). Else, a normal exit is made through _fpsp_done().	#
   2443 #									#
   2444 #	Before any of the above is attempted, it must be checked to	#
   2445 # see if the FPU is disabled. Since the "Unimp <ea>" exception is taken	#
   2446 # before the "FPU disabled" exception, but the "FPU disabled" exception	#
   2447 # has higher priority, we check the disabled bit in the PCR. If set,	#
   2448 # then we must create an 8 word "FPU disabled" exception stack frame	#
   2449 # from the current 4 word exception stack frame. This includes 		#
   2450 # reproducing the effective address of the instruction to put on the 	#
   2451 # new stack frame.							#
   2452 #									#
   2453 # 	In the process of all emulation work, if a _mem_read()		#
   2454 # "callout" returns a failing result indicating an access error, then	#
   2455 # we must create an access error stack frame from the current stack	#
   2456 # frame. This information includes a faulting address and a fault-	#
   2457 # status-longword. These are created within this handler.		#
   2458 #									#
   2459 #########################################################################
   2460 
   2461 	global		_fpsp_effadd
   2462 _fpsp_effadd:
   2463 
   2464 # This exception type takes priority over the "Line F Emulator"
   2465 # exception. Therefore, the FPU could be disabled when entering here.
   2466 # So, we must check to see if it's disabled and handle that case separately.
   2467 	mov.l		%d0,-(%sp)		# save d0
   2468 	movc		%pcr,%d0		# load proc cr
   2469 	btst		&0x1,%d0		# is FPU disabled?
   2470 	bne.w		iea_disabled		# yes
   2471 	mov.l		(%sp)+,%d0		# restore d0
   2472 
   2473 	link		%a6,&-LOCAL_SIZE	# init stack frame
   2474 
   2475 	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
   2476 	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
   2477 	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
   2478 
   2479 # PC of instruction that took the exception is the PC in the frame
   2480 	mov.l		EXC_PC(%a6),EXC_EXTWPTR(%a6)
   2481 
   2482 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   2483 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
   2484 	bsr.l		_imem_read_long		# fetch the instruction words
   2485 	mov.l		%d0,EXC_OPWORD(%a6)	# store OPWORD and EXTWORD
   2486 
   2487 #########################################################################
   2488 
   2489 	tst.w		%d0			# is operation fmovem?
   2490 	bmi.w		iea_fmovm		# yes
   2491 
   2492 #
   2493 # here, we will have:
   2494 # 	fabs	fdabs	fsabs		facos		fmod
   2495 #	fadd	fdadd	fsadd		fasin		frem
   2496 # 	fcmp				fatan		fscale
   2497 #	fdiv	fddiv	fsdiv		fatanh		fsin
   2498 #	fint				fcos		fsincos
   2499 #	fintrz				fcosh		fsinh
   2500 #	fmove	fdmove	fsmove		fetox		ftan
   2501 # 	fmul	fdmul	fsmul		fetoxm1		ftanh
   2502 #	fneg	fdneg	fsneg		fgetexp		ftentox
   2503 #	fsgldiv				fgetman		ftwotox
   2504 # 	fsglmul				flog10
   2505 # 	fsqrt				flog2
   2506 #	fsub	fdsub	fssub		flogn
   2507 #	ftst				flognp1
   2508 # which can all use f<op>.{x,p}
   2509 # so, now it's immediate data extended precision AND PACKED FORMAT!
   2510 #
   2511 iea_op:
   2512 	andi.l		&0x00ff00ff,USER_FPSR(%a6)
   2513 
   2514 	btst		&0xa,%d0		# is src fmt x or p?
   2515 	bne.b		iea_op_pack		# packed
   2516 
   2517 
   2518 	mov.l		EXC_EXTWPTR(%a6),%a0	# pass: ptr to #<data>
   2519 	lea		FP_SRC(%a6),%a1		# pass: ptr to super addr
   2520 	mov.l		&0xc,%d0		# pass: 12 bytes
   2521 	bsr.l		_imem_read		# read extended immediate
   2522 
   2523 	tst.l		%d1			# did ifetch fail?
   2524 	bne.w		iea_iacc		# yes
   2525 
   2526 	bra.b		iea_op_setsrc
   2527 
   2528 iea_op_pack:
   2529 
   2530 	mov.l		EXC_EXTWPTR(%a6),%a0	# pass: ptr to #<data>
   2531 	lea		FP_SRC(%a6),%a1		# pass: ptr to super dst
   2532 	mov.l		&0xc,%d0		# pass: 12 bytes
   2533 	bsr.l		_imem_read		# read packed operand
   2534 
   2535 	tst.l		%d1			# did ifetch fail?
   2536 	bne.w		iea_iacc		# yes
   2537 
   2538 # The packed operand is an INF or a NAN if the exponent field is all ones.
   2539 	bfextu		FP_SRC(%a6){&1:&15},%d0	# get exp
   2540 	cmpi.w		%d0,&0x7fff		# INF or NAN?
   2541 	beq.b		iea_op_setsrc		# operand is an INF or NAN
   2542 
   2543 # The packed operand is a zero if the mantissa is all zero, else it's
   2544 # a normal packed op.
   2545 	mov.b		3+FP_SRC(%a6),%d0	# get byte 4
   2546 	andi.b		&0x0f,%d0		# clear all but last nybble
   2547 	bne.b		iea_op_gp_not_spec	# not a zero
   2548 	tst.l		FP_SRC_HI(%a6)		# is lw 2 zero?
   2549 	bne.b		iea_op_gp_not_spec	# not a zero
   2550 	tst.l		FP_SRC_LO(%a6)		# is lw 3 zero?
   2551 	beq.b		iea_op_setsrc		# operand is a ZERO
   2552 iea_op_gp_not_spec:
   2553 	lea		FP_SRC(%a6),%a0		# pass: ptr to packed op
   2554 	bsr.l		decbin			# convert to extended
   2555 	fmovm.x		&0x80,FP_SRC(%a6)	# make this the srcop
   2556 
   2557 iea_op_setsrc:
   2558 	addi.l		&0xc,EXC_EXTWPTR(%a6)	# update extension word pointer
   2559 
   2560 # FP_SRC now holds the src operand.
   2561 	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
   2562 	bsr.l		set_tag_x		# tag the operand type
   2563 	mov.b		%d0,STAG(%a6)		# could be ANYTHING!!!
   2564 	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
   2565 	bne.b		iea_op_getdst		# no
   2566 	bsr.l		unnorm_fix		# yes; convert to NORM/DENORM/ZERO
   2567 	mov.b		%d0,STAG(%a6)		# set new optype tag
   2568 iea_op_getdst:
   2569 	clr.b		STORE_FLG(%a6)		# clear "store result" boolean
   2570 
   2571 	btst		&0x5,1+EXC_CMDREG(%a6)	# is operation monadic or dyadic?
   2572 	beq.b		iea_op_extract		# monadic
   2573 	btst		&0x4,1+EXC_CMDREG(%a6)	# is operation fsincos,ftst,fcmp?
   2574 	bne.b		iea_op_spec		# yes
   2575 
   2576 iea_op_loaddst:
   2577 	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno
   2578 	bsr.l		load_fpn2		# load dst operand
   2579 
   2580 	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
   2581 	bsr.l		set_tag_x		# tag the operand type
   2582 	mov.b		%d0,DTAG(%a6)		# could be ANYTHING!!!
   2583 	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
   2584 	bne.b		iea_op_extract		# no
   2585 	bsr.l		unnorm_fix		# yes; convert to NORM/DENORM/ZERO
   2586 	mov.b		%d0,DTAG(%a6)		# set new optype tag
   2587 	bra.b		iea_op_extract
   2588 
   2589 # the operation is fsincos, ftst, or fcmp. only fcmp is dyadic
   2590 iea_op_spec:
   2591 	btst		&0x3,1+EXC_CMDREG(%a6)	# is operation fsincos?
   2592 	beq.b		iea_op_extract		# yes
   2593 # now, we're left with ftst and fcmp. so, first let's tag them so that they don't
   2594 # store a result. then, only fcmp will branch back and pick up a dst operand.
   2595 	st		STORE_FLG(%a6)		# don't store a final result
   2596 	btst		&0x1,1+EXC_CMDREG(%a6)	# is operation fcmp?
   2597 	beq.b		iea_op_loaddst		# yes
   2598 
   2599 iea_op_extract:
   2600 	clr.l		%d0
   2601 	mov.b		FPCR_MODE(%a6),%d0	# pass: rnd mode,prec
   2602 
   2603 	mov.b		1+EXC_CMDREG(%a6),%d1
   2604 	andi.w		&0x007f,%d1		# extract extension
   2605 
   2606 	fmov.l		&0x0,%fpcr
   2607 	fmov.l		&0x0,%fpsr
   2608 
   2609 	lea		FP_SRC(%a6),%a0
   2610 	lea		FP_DST(%a6),%a1
   2611 
   2612 	mov.l		(tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
   2613 	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
   2614 
   2615 #
   2616 # Exceptions in order of precedence:
   2617 #	BSUN	: none
   2618 #	SNAN	: all operations
   2619 #	OPERR	: all reg-reg or mem-reg operations that can normally operr
   2620 #	OVFL	: same as OPERR
   2621 #	UNFL	: same as OPERR
   2622 #	DZ	: same as OPERR
   2623 #	INEX2	: same as OPERR
   2624 #	INEX1	: all packed immediate operations
   2625 #
   2626 
   2627 # we determine the highest priority exception(if any) set by the
   2628 # emulation routine that has also been enabled by the user.
   2629 	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions enabled
   2630 	bne.b		iea_op_ena		# some are enabled
   2631 
   2632 # now, we save the result, unless, of course, the operation was ftst or fcmp.
   2633 # these don't save results.
   2634 iea_op_save:
   2635 	tst.b		STORE_FLG(%a6)		# does this op store a result?
   2636 	bne.b		iea_op_exit1		# exit with no frestore
   2637 
   2638 iea_op_store:
   2639 	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno
   2640 	bsr.l		store_fpreg		# store the result
   2641 
   2642 iea_op_exit1:
   2643 	mov.l		EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC"
   2644 	mov.l		EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame
   2645 
   2646 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
   2647 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   2648 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   2649 
   2650 	unlk		%a6			# unravel the frame
   2651 
   2652 	btst		&0x7,(%sp)		# is trace on?
   2653 	bne.w		iea_op_trace		# yes
   2654 
   2655 	bra.l		_fpsp_done		# exit to os
   2656 
   2657 iea_op_ena:
   2658 	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enable and set
   2659 	bfffo		%d0{&24:&8},%d0		# find highest priority exception
   2660 	bne.b		iea_op_exc		# at least one was set
   2661 
   2662 # no exception occurred. now, did a disabled, exact overflow occur with inexact
   2663 # enabled? if so, then we have to stuff an overflow frame into the FPU.
   2664 	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
   2665 	beq.b		iea_op_save
   2666 
   2667 iea_op_ovfl:
   2668 	btst		&inex2_bit,FPCR_ENABLE(%a6) # is inexact enabled?
   2669 	beq.b		iea_op_store		# no
   2670 	bra.b		iea_op_exc_ovfl		# yes
   2671 
   2672 # an enabled exception occurred. we have to insert the exception type back into
   2673 # the machine.
   2674 iea_op_exc:
   2675 	subi.l		&24,%d0			# fix offset to be 0-8
   2676 	cmpi.b		%d0,&0x6		# is exception INEX?
   2677 	bne.b		iea_op_exc_force	# no
   2678 
   2679 # the enabled exception was inexact. so, if it occurs with an overflow
   2680 # or underflow that was disabled, then we have to force an overflow or
   2681 # underflow frame.
   2682 	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
   2683 	bne.b		iea_op_exc_ovfl		# yes
   2684 	btst		&unfl_bit,FPSR_EXCEPT(%a6) # did underflow occur?
   2685 	bne.b		iea_op_exc_unfl		# yes
   2686 
   2687 iea_op_exc_force:
   2688 	mov.w		(tbl_iea_except.b,%pc,%d0.w*2),2+FP_SRC(%a6)
   2689 	bra.b		iea_op_exit2		# exit with frestore
   2690 
   2691 tbl_iea_except:
   2692 	short		0xe002, 0xe006, 0xe004, 0xe005
   2693 	short		0xe003, 0xe002, 0xe001, 0xe001
   2694 
   2695 iea_op_exc_ovfl:
   2696 	mov.w		&0xe005,2+FP_SRC(%a6)
   2697 	bra.b		iea_op_exit2
   2698 
   2699 iea_op_exc_unfl:
   2700 	mov.w		&0xe003,2+FP_SRC(%a6)
   2701 
   2702 iea_op_exit2:
   2703 	mov.l		EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC"
   2704 	mov.l		EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame
   2705 
   2706 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
   2707 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   2708 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   2709 
   2710 	frestore 	FP_SRC(%a6)		# restore exceptional state
   2711 
   2712 	unlk		%a6			# unravel the frame
   2713 
   2714 	btst		&0x7,(%sp)		# is trace on?
   2715 	bne.b		iea_op_trace		# yes
   2716 
   2717 	bra.l		_fpsp_done		# exit to os
   2718 
   2719 #
   2720 # The opclass two instruction that took an "Unimplemented Effective Address"
   2721 # exception was being traced. Make the "current" PC the FPIAR and put it in
   2722 # the trace stack frame then jump to _real_trace().
   2723 #
   2724 #		 UNIMP EA FRAME		   TRACE FRAME
   2725 #		*****************	*****************
   2726 #		* 0x0 *  0x0f0	*	*    Current	*
   2727 #		*****************	*      PC	*
   2728 #		*    Current	*	*****************
   2729 #		*      PC	*	* 0x2 *  0x024	*
   2730 #		*****************	*****************
   2731 #		*      SR	*	*     Next	*
   2732 #		*****************	*      PC	*
   2733 #					*****************
   2734 #					*      SR	*
   2735 #					*****************
   2736 iea_op_trace:
   2737 	mov.l		(%sp),-(%sp)		# shift stack frame "down"
   2738 	mov.w		0x8(%sp),0x4(%sp)
   2739 	mov.w		&0x2024,0x6(%sp)	# stk fmt = 0x2; voff = 0x024
   2740 	fmov.l		%fpiar,0x8(%sp)		# "Current PC" is in FPIAR
   2741 
   2742 	bra.l		_real_trace
   2743 
   2744 #########################################################################
   2745 iea_fmovm:
   2746 	btst		&14,%d0			# ctrl or data reg
   2747 	beq.w		iea_fmovm_ctrl
   2748 
   2749 iea_fmovm_data:
   2750 
   2751 	btst		&0x5,EXC_SR(%a6)	# user or supervisor mode
   2752 	bne.b		iea_fmovm_data_s
   2753 
   2754 iea_fmovm_data_u:
   2755 	mov.l		%usp,%a0
   2756 	mov.l		%a0,EXC_A7(%a6)		# store current a7
   2757 	bsr.l		fmovm_dynamic		# do dynamic fmovm
   2758 	mov.l		EXC_A7(%a6),%a0		# load possibly new a7
   2759 	mov.l		%a0,%usp		# update usp
   2760 	bra.w		iea_fmovm_exit
   2761 
   2762 iea_fmovm_data_s:
   2763 	clr.b		SPCOND_FLG(%a6)
   2764 	lea		0x2+EXC_VOFF(%a6),%a0
   2765 	mov.l		%a0,EXC_A7(%a6)
   2766 	bsr.l		fmovm_dynamic		# do dynamic fmovm
   2767 
   2768 	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
   2769 	beq.w		iea_fmovm_data_predec
   2770 	cmpi.b		SPCOND_FLG(%a6),&mia7_flg
   2771 	bne.w		iea_fmovm_exit
   2772 
   2773 # right now, d0 = the size.
   2774 # the data has been fetched from the supervisor stack, but we have not
   2775 # incremented the stack pointer by the appropriate number of bytes.
   2776 # do it here.
   2777 iea_fmovm_data_postinc:
   2778 	btst		&0x7,EXC_SR(%a6)
   2779 	bne.b		iea_fmovm_data_pi_trace
   2780 
   2781 	mov.w		EXC_SR(%a6),(EXC_SR,%a6,%d0)
   2782 	mov.l		EXC_EXTWPTR(%a6),(EXC_PC,%a6,%d0)
   2783 	mov.w		&0x00f0,(EXC_VOFF,%a6,%d0)
   2784 
   2785 	lea		(EXC_SR,%a6,%d0),%a0
   2786 	mov.l		%a0,EXC_SR(%a6)
   2787 
   2788 	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
   2789 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   2790  	movm.l		EXC_DREGS(%a6),&0x0303 	# restore d0-d1/a0-a1
   2791 
   2792 	unlk		%a6
   2793 	mov.l		(%sp)+,%sp
   2794 	bra.l		_fpsp_done
   2795 
   2796 iea_fmovm_data_pi_trace:
   2797 	mov.w		EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0)
   2798 	mov.l		EXC_EXTWPTR(%a6),(EXC_PC-0x4,%a6,%d0)
   2799 	mov.w		&0x2024,(EXC_VOFF-0x4,%a6,%d0)
   2800 	mov.l		EXC_PC(%a6),(EXC_VOFF+0x2-0x4,%a6,%d0)
   2801 
   2802 	lea		(EXC_SR-0x4,%a6,%d0),%a0
   2803 	mov.l		%a0,EXC_SR(%a6)
   2804 
   2805 	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
   2806 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   2807  	movm.l		EXC_DREGS(%a6),&0x0303 	# restore d0-d1/a0-a1
   2808 
   2809 	unlk		%a6
   2810 	mov.l		(%sp)+,%sp
   2811 	bra.l		_real_trace
   2812 
   2813 # right now, d1 = size and d0 = the strg.
   2814 iea_fmovm_data_predec:
   2815 	mov.b		%d1,EXC_VOFF(%a6)	# store strg
   2816 	mov.b		%d0,0x1+EXC_VOFF(%a6)	# store size
   2817 
   2818 	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
   2819 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   2820  	movm.l		EXC_DREGS(%a6),&0x0303 	# restore d0-d1/a0-a1
   2821 
   2822 	mov.l		(%a6),-(%sp)		# make a copy of a6
   2823 	mov.l		%d0,-(%sp)		# save d0
   2824 	mov.l		%d1,-(%sp)		# save d1
   2825 	mov.l		EXC_EXTWPTR(%a6),-(%sp)	# make a copy of Next PC
   2826 
   2827 	clr.l		%d0
   2828 	mov.b		0x1+EXC_VOFF(%a6),%d0	# fetch size
   2829 	neg.l		%d0			# get negative of size
   2830 
   2831 	btst		&0x7,EXC_SR(%a6)	# is trace enabled?
   2832 	beq.b		iea_fmovm_data_p2
   2833 
   2834 	mov.w		EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0)
   2835 	mov.l		EXC_PC(%a6),(EXC_VOFF-0x2,%a6,%d0)
   2836 	mov.l		(%sp)+,(EXC_PC-0x4,%a6,%d0)
   2837 	mov.w		&0x2024,(EXC_VOFF-0x4,%a6,%d0)
   2838 
   2839 	pea		(%a6,%d0)		# create final sp
   2840 	bra.b		iea_fmovm_data_p3
   2841 
   2842 iea_fmovm_data_p2:
   2843 	mov.w		EXC_SR(%a6),(EXC_SR,%a6,%d0)
   2844 	mov.l		(%sp)+,(EXC_PC,%a6,%d0)
   2845 	mov.w		&0x00f0,(EXC_VOFF,%a6,%d0)
   2846 
   2847 	pea		(0x4,%a6,%d0)		# create final sp
   2848 
   2849 iea_fmovm_data_p3:
   2850 	clr.l		%d1
   2851 	mov.b		EXC_VOFF(%a6),%d1	# fetch strg
   2852 
   2853 	tst.b		%d1
   2854 	bpl.b		fm_1
   2855 	fmovm.x		&0x80,(0x4+0x8,%a6,%d0)
   2856 	addi.l		&0xc,%d0
   2857 fm_1:
   2858 	lsl.b		&0x1,%d1
   2859 	bpl.b		fm_2
   2860 	fmovm.x		&0x40,(0x4+0x8,%a6,%d0)
   2861 	addi.l		&0xc,%d0
   2862 fm_2:
   2863 	lsl.b		&0x1,%d1
   2864 	bpl.b		fm_3
   2865 	fmovm.x		&0x20,(0x4+0x8,%a6,%d0)
   2866 	addi.l		&0xc,%d0
   2867 fm_3:
   2868 	lsl.b		&0x1,%d1
   2869 	bpl.b		fm_4
   2870 	fmovm.x		&0x10,(0x4+0x8,%a6,%d0)
   2871 	addi.l		&0xc,%d0
   2872 fm_4:
   2873 	lsl.b		&0x1,%d1
   2874 	bpl.b		fm_5
   2875 	fmovm.x		&0x08,(0x4+0x8,%a6,%d0)
   2876 	addi.l		&0xc,%d0
   2877 fm_5:
   2878 	lsl.b		&0x1,%d1
   2879 	bpl.b		fm_6
   2880 	fmovm.x		&0x04,(0x4+0x8,%a6,%d0)
   2881 	addi.l		&0xc,%d0
   2882 fm_6:
   2883 	lsl.b		&0x1,%d1
   2884 	bpl.b		fm_7
   2885 	fmovm.x		&0x02,(0x4+0x8,%a6,%d0)
   2886 	addi.l		&0xc,%d0
   2887 fm_7:
   2888 	lsl.b		&0x1,%d1
   2889 	bpl.b		fm_end
   2890 	fmovm.x		&0x01,(0x4+0x8,%a6,%d0)
   2891 fm_end:
   2892 	mov.l		0x4(%sp),%d1
   2893 	mov.l		0x8(%sp),%d0
   2894 	mov.l		0xc(%sp),%a6
   2895 	mov.l		(%sp)+,%sp
   2896 
   2897 	btst		&0x7,(%sp)		# is trace enabled?
   2898 	beq.l		_fpsp_done
   2899 	bra.l		_real_trace
   2900 
   2901 #########################################################################
   2902 iea_fmovm_ctrl:
   2903 
   2904 	bsr.l		fmovm_ctrl		# load ctrl regs
   2905 
   2906 iea_fmovm_exit:
   2907 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
   2908 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   2909 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   2910 
   2911 	btst		&0x7,EXC_SR(%a6)	# is trace on?
   2912 	bne.b		iea_fmovm_trace		# yes
   2913 
   2914 	mov.l		EXC_EXTWPTR(%a6),EXC_PC(%a6) # set Next PC
   2915 
   2916 	unlk		%a6			# unravel the frame
   2917 
   2918 	bra.l		_fpsp_done		# exit to os
   2919 
   2920 #
   2921 # The control reg instruction that took an "Unimplemented Effective Address"
   2922 # exception was being traced. The "Current PC" for the trace frame is the
   2923 # PC stacked for Unimp EA. The "Next PC" is in EXC_EXTWPTR.
   2924 # After fixing the stack frame, jump to _real_trace().
   2925 #
   2926 #		 UNIMP EA FRAME		   TRACE FRAME
   2927 #		*****************	*****************
   2928 #		* 0x0 *  0x0f0	*	*    Current	*
   2929 #		*****************	*      PC	*
   2930 #		*    Current	*	*****************
   2931 #		*      PC	*	* 0x2 *  0x024	*
   2932 #		*****************	*****************
   2933 #		*      SR	*	*     Next	*
   2934 #		*****************	*      PC	*
   2935 #					*****************
   2936 #					*      SR	*
   2937 #					*****************
   2938 # this ain't a pretty solution, but it works:
   2939 # -restore a6 (not with unlk)
   2940 # -shift stack frame down over where old a6 used to be
   2941 # -add LOCAL_SIZE to stack pointer
   2942 iea_fmovm_trace:
   2943 	mov.l		(%a6),%a6		# restore frame pointer
   2944 	mov.w		EXC_SR+LOCAL_SIZE(%sp),0x0+LOCAL_SIZE(%sp)
   2945 	mov.l		EXC_PC+LOCAL_SIZE(%sp),0x8+LOCAL_SIZE(%sp)
   2946 	mov.l		EXC_EXTWPTR+LOCAL_SIZE(%sp),0x2+LOCAL_SIZE(%sp)
   2947 	mov.w		&0x2024,0x6+LOCAL_SIZE(%sp) # stk fmt = 0x2; voff = 0x024
   2948 	add.l		&LOCAL_SIZE,%sp		# clear stack frame
   2949 
   2950 	bra.l		_real_trace
   2951 
   2952 #########################################################################
   2953 # The FPU is disabled and so we should really have taken the "Line
   2954 # F Emulator" exception. So, here we create an 8-word stack frame
   2955 # from our 4-word stack frame. This means we must calculate the length
   2956 # of the faulting instruction to get the "next PC". This is trivial for
   2957 # immediate operands but requires some extra work for fmovm dynamic
   2958 # which can use most addressing modes.
   2959 iea_disabled:
   2960 	mov.l		(%sp)+,%d0		# restore d0
   2961 
   2962 	link		%a6,&-LOCAL_SIZE	# init stack frame
   2963 
   2964 	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
   2965 
   2966 # PC of instruction that took the exception is the PC in the frame
   2967 	mov.l		EXC_PC(%a6),EXC_EXTWPTR(%a6)
   2968 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   2969 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
   2970 	bsr.l		_imem_read_long		# fetch the instruction words
   2971 	mov.l		%d0,EXC_OPWORD(%a6)	# store OPWORD and EXTWORD
   2972 
   2973 	tst.w		%d0			# is instr fmovm?
   2974 	bmi.b		iea_dis_fmovm		# yes
   2975 # instruction is using an extended precision immediate operand. therefore,
   2976 # the total instruction length is 16 bytes.
   2977 iea_dis_immed:
   2978 	mov.l		&0x10,%d0		# 16 bytes of instruction
   2979 	bra.b		iea_dis_cont
   2980 iea_dis_fmovm:
   2981 	btst		&0xe,%d0		# is instr fmovm ctrl
   2982 	bne.b		iea_dis_fmovm_data	# no
   2983 # the instruction is a fmovm.l with 2 or 3 registers.
   2984 	bfextu		%d0{&19:&3},%d1
   2985 	mov.l		&0xc,%d0
   2986 	cmpi.b		%d1,&0x7		# move all regs?
   2987 	bne.b		iea_dis_cont
   2988 	addq.l		&0x4,%d0
   2989 	bra.b		iea_dis_cont
   2990 # the instruction is an fmovm.x dynamic which can use many addressing
   2991 # modes and thus can have several different total instruction lengths.
   2992 # call fmovm_calc_ea which will go through the ea calc process and,
   2993 # as a by-product, will tell us how long the instruction is.
   2994 iea_dis_fmovm_data:
   2995 	clr.l		%d0
   2996 	bsr.l		fmovm_calc_ea
   2997 	mov.l		EXC_EXTWPTR(%a6),%d0
   2998 	sub.l		EXC_PC(%a6),%d0
   2999 iea_dis_cont:
   3000 	mov.w		%d0,EXC_VOFF(%a6)	# store stack shift value
   3001 
   3002 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   3003 
   3004 	unlk		%a6
   3005 
   3006 # here, we actually create the 8-word frame from the 4-word frame,
   3007 # with the "next PC" as additional info.
   3008 # the <ea> field is let as undefined.
   3009 	subq.l		&0x8,%sp		# make room for new stack
   3010 	mov.l		%d0,-(%sp)		# save d0
   3011 	mov.w		0xc(%sp),0x4(%sp)	# move SR
   3012 	mov.l		0xe(%sp),0x6(%sp)	# move Current PC
   3013 	clr.l		%d0
   3014 	mov.w		0x12(%sp),%d0
   3015 	mov.l		0x6(%sp),0x10(%sp)	# move Current PC
   3016 	add.l		%d0,0x6(%sp)		# make Next PC
   3017 	mov.w		&0x402c,0xa(%sp)	# insert offset,frame format
   3018 	mov.l		(%sp)+,%d0		# restore d0
   3019 
   3020 	bra.l		_real_fpu_disabled
   3021 
   3022 ##########
   3023 
   3024 iea_iacc:
   3025 	movc		%pcr,%d0
   3026 	btst		&0x1,%d0
   3027 	bne.b		iea_iacc_cont
   3028 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   3029 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1 on stack
   3030 iea_iacc_cont:
   3031 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   3032 
   3033 	unlk		%a6
   3034 
   3035 	subq.w		&0x8,%sp		# make stack frame bigger
   3036 	mov.l		0x8(%sp),(%sp)		# store SR,hi(PC)
   3037 	mov.w		0xc(%sp),0x4(%sp)	# store lo(PC)
   3038 	mov.w		&0x4008,0x6(%sp)	# store voff
   3039 	mov.l		0x2(%sp),0x8(%sp)	# store ea
   3040 	mov.l		&0x09428001,0xc(%sp)	# store fslw
   3041 
   3042 iea_acc_done:
   3043 	btst		&0x5,(%sp)		# user or supervisor mode?
   3044 	beq.b		iea_acc_done2		# user
   3045 	bset		&0x2,0xd(%sp)		# set supervisor TM bit
   3046 
   3047 iea_acc_done2:
   3048 	bra.l		_real_access
   3049 
   3050 iea_dacc:
   3051 	lea		-LOCAL_SIZE(%a6),%sp
   3052 
   3053 	movc		%pcr,%d1
   3054 	btst		&0x1,%d1
   3055 	bne.b		iea_dacc_cont
   3056 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1 on stack
   3057 	fmovm.l		LOCAL_SIZE+USER_FPCR(%sp),%fpcr,%fpsr,%fpiar # restore ctrl regs
   3058 iea_dacc_cont:
   3059 	mov.l		(%a6),%a6
   3060 
   3061 	mov.l		0x4+LOCAL_SIZE(%sp),-0x8+0x4+LOCAL_SIZE(%sp)
   3062 	mov.w		0x8+LOCAL_SIZE(%sp),-0x8+0x8+LOCAL_SIZE(%sp)
   3063 	mov.w		&0x4008,-0x8+0xa+LOCAL_SIZE(%sp)
   3064 	mov.l		%a0,-0x8+0xc+LOCAL_SIZE(%sp)
   3065 	mov.w		%d0,-0x8+0x10+LOCAL_SIZE(%sp)
   3066 	mov.w		&0x0001,-0x8+0x12+LOCAL_SIZE(%sp)
   3067 
   3068 	movm.l		LOCAL_SIZE+EXC_DREGS(%sp),&0x0303 # restore d0-d1/a0-a1
   3069 	add.w		&LOCAL_SIZE-0x4,%sp
   3070 
   3071 	bra.b		iea_acc_done
   3072 
   3073 #########################################################################
   3074 # XDEF ****************************************************************	#
   3075 #	_fpsp_operr(): 060FPSP entry point for FP Operr exception.	#
   3076 #									#
   3077 #	This handler should be the first code executed upon taking the	#
   3078 # 	FP Operand Error exception in an operating system.		#
   3079 #									#
   3080 # XREF ****************************************************************	#
   3081 #	_imem_read_long() - read instruction longword			#
   3082 #	fix_skewed_ops() - adjust src operand in fsave frame		#
   3083 #	_real_operr() - "callout" to operating system operr handler	#
   3084 #	_dmem_write_{byte,word,long}() - store data to mem (opclass 3)	#
   3085 #	store_dreg_{b,w,l}() - store data to data regfile (opclass 3)	#
   3086 #	facc_out_{b,w,l}() - store to memory took access error (opcl 3)	#
   3087 #									#
   3088 # INPUT ***************************************************************	#
   3089 #	- The system stack contains the FP Operr exception frame	#
   3090 #	- The fsave frame contains the source operand			#
   3091 # 									#
   3092 # OUTPUT **************************************************************	#
   3093 #	No access error:						#
   3094 #	- The system stack is unchanged					#
   3095 #	- The fsave frame contains the adjusted src op for opclass 0,2	#
   3096 #									#
   3097 # ALGORITHM ***********************************************************	#
   3098 #	In a system where the FP Operr exception is enabled, the goal	#
   3099 # is to get to the handler specified at _real_operr(). But, on the 060,	#
   3100 # for opclass zero and two instruction taking this exception, the 	#
   3101 # input operand in the fsave frame may be incorrect for some cases	#
   3102 # and needs to be corrected. This handler calls fix_skewed_ops() to	#
   3103 # do just this and then exits through _real_operr().			#
   3104 #	For opclass 3 instructions, the 060 doesn't store the default	#
   3105 # operr result out to memory or data register file as it should.	#
   3106 # This code must emulate the move out before finally exiting through	#
   3107 # _real_inex(). The move out, if to memory, is performed using 		#
   3108 # _mem_write() "callout" routines that may return a failing result.	#
   3109 # In this special case, the handler must exit through facc_out() 	#
   3110 # which creates an access error stack frame from the current operr	#
   3111 # stack frame.								#
   3112 #									#
   3113 #########################################################################
   3114 
   3115 	global		_fpsp_operr
   3116 _fpsp_operr:
   3117 
   3118 	link.w		%a6,&-LOCAL_SIZE	# init stack frame
   3119 
   3120 	fsave		FP_SRC(%a6)		# grab the "busy" frame
   3121 
   3122  	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
   3123 	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
   3124  	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
   3125 
   3126 # the FPIAR holds the "current PC" of the faulting instruction
   3127 	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
   3128 
   3129 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   3130 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
   3131 	bsr.l		_imem_read_long		# fetch the instruction words
   3132 	mov.l		%d0,EXC_OPWORD(%a6)
   3133 
   3134 ##############################################################################
   3135 
   3136 	btst		&13,%d0			# is instr an fmove out?
   3137 	bne.b		foperr_out		# fmove out
   3138 
   3139 
   3140 # here, we simply see if the operand in the fsave frame needs to be "unskewed".
   3141 # this would be the case for opclass two operations with a source infinity or
   3142 # denorm operand in the sgl or dbl format. NANs also become skewed, but can't
   3143 # cause an operr so we don't need to check for them here.
   3144 	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
   3145 	bsr.l		fix_skewed_ops		# fix src op
   3146 
   3147 foperr_exit:
   3148 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
   3149 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   3150 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   3151 
   3152 	frestore	FP_SRC(%a6)
   3153 
   3154 	unlk		%a6
   3155 	bra.l		_real_operr
   3156 
   3157 ########################################################################
   3158 
   3159 #
   3160 # the hardware does not save the default result to memory on enabled
   3161 # operand error exceptions. we do this here before passing control to
   3162 # the user operand error handler.
   3163 #
   3164 # byte, word, and long destination format operations can pass
   3165 # through here. we simply need to test the sign of the src
   3166 # operand and save the appropriate minimum or maximum integer value
   3167 # to the effective address as pointed to by the stacked effective address.
   3168 #
   3169 # although packed opclass three operations can take operand error
   3170 # exceptions, they won't pass through here since they are caught
   3171 # first by the unsupported data format exception handler. that handler
   3172 # sends them directly to _real_operr() if necessary.
   3173 #
   3174 foperr_out:
   3175 
   3176 	mov.w		FP_SRC_EX(%a6),%d1	# fetch exponent
   3177 	andi.w		&0x7fff,%d1
   3178 	cmpi.w		%d1,&0x7fff
   3179 	bne.b		foperr_out_not_qnan
   3180 # the operand is either an infinity or a QNAN.
   3181 	tst.l		FP_SRC_LO(%a6)
   3182 	bne.b		foperr_out_qnan
   3183 	mov.l		FP_SRC_HI(%a6),%d1
   3184 	andi.l		&0x7fffffff,%d1
   3185 	beq.b		foperr_out_not_qnan
   3186 foperr_out_qnan:
   3187 	mov.l		FP_SRC_HI(%a6),L_SCR1(%a6)
   3188 	bra.b		foperr_out_jmp
   3189 
   3190 foperr_out_not_qnan:
   3191 	mov.l		&0x7fffffff,%d1
   3192 	tst.b		FP_SRC_EX(%a6)
   3193 	bpl.b		foperr_out_not_qnan2
   3194 	addq.l		&0x1,%d1
   3195 foperr_out_not_qnan2:
   3196 	mov.l		%d1,L_SCR1(%a6)
   3197 
   3198 foperr_out_jmp:
   3199 	bfextu		%d0{&19:&3},%d0		# extract dst format field
   3200 	mov.b		1+EXC_OPWORD(%a6),%d1	# extract <ea> mode,reg
   3201 	mov.w		(tbl_operr.b,%pc,%d0.w*2),%a0
   3202 	jmp		(tbl_operr.b,%pc,%a0)
   3203 
   3204 tbl_operr:
   3205 	short		foperr_out_l - tbl_operr # long word integer
   3206 	short		tbl_operr    - tbl_operr # sgl prec shouldn't happen
   3207 	short		tbl_operr    - tbl_operr # ext prec shouldn't happen
   3208 	short		foperr_exit  - tbl_operr # packed won't enter here
   3209 	short		foperr_out_w - tbl_operr # word integer
   3210 	short		tbl_operr    - tbl_operr # dbl prec shouldn't happen
   3211 	short		foperr_out_b - tbl_operr # byte integer
   3212 	short		tbl_operr    - tbl_operr # packed won't enter here
   3213 
   3214 foperr_out_b:
   3215 	mov.b		L_SCR1(%a6),%d0		# load positive default result
   3216 	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
   3217 	ble.b		foperr_out_b_save_dn	# yes
   3218 	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
   3219 	bsr.l		_dmem_write_byte	# write the default result
   3220 
   3221 	tst.l		%d1			# did dstore fail?
   3222 	bne.l		facc_out_b		# yes
   3223 
   3224 	bra.w		foperr_exit
   3225 foperr_out_b_save_dn:
   3226 	andi.w		&0x0007,%d1
   3227 	bsr.l		store_dreg_b		# store result to regfile
   3228 	bra.w		foperr_exit
   3229 
   3230 foperr_out_w:
   3231 	mov.w		L_SCR1(%a6),%d0		# load positive default result
   3232 	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
   3233 	ble.b		foperr_out_w_save_dn	# yes
   3234 	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
   3235 	bsr.l		_dmem_write_word	# write the default result
   3236 
   3237 	tst.l		%d1			# did dstore fail?
   3238 	bne.l		facc_out_w		# yes
   3239 
   3240 	bra.w		foperr_exit
   3241 foperr_out_w_save_dn:
   3242 	andi.w		&0x0007,%d1
   3243 	bsr.l		store_dreg_w		# store result to regfile
   3244 	bra.w		foperr_exit
   3245 
   3246 foperr_out_l:
   3247 	mov.l		L_SCR1(%a6),%d0		# load positive default result
   3248 	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
   3249 	ble.b		foperr_out_l_save_dn	# yes
   3250 	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
   3251 	bsr.l		_dmem_write_long	# write the default result
   3252 
   3253 	tst.l		%d1			# did dstore fail?
   3254 	bne.l		facc_out_l		# yes
   3255 
   3256 	bra.w		foperr_exit
   3257 foperr_out_l_save_dn:
   3258 	andi.w		&0x0007,%d1
   3259 	bsr.l		store_dreg_l		# store result to regfile
   3260 	bra.w		foperr_exit
   3261 
   3262 #########################################################################
   3263 # XDEF ****************************************************************	#
   3264 #	_fpsp_snan(): 060FPSP entry point for FP SNAN exception.	#
   3265 #									#
   3266 #	This handler should be the first code executed upon taking the	#
   3267 # 	FP Signalling NAN exception in an operating system.		#
   3268 #									#
   3269 # XREF ****************************************************************	#
   3270 #	_imem_read_long() - read instruction longword			#
   3271 #	fix_skewed_ops() - adjust src operand in fsave frame		#
   3272 #	_real_snan() - "callout" to operating system SNAN handler	#
   3273 #	_dmem_write_{byte,word,long}() - store data to mem (opclass 3)	#
   3274 #	store_dreg_{b,w,l}() - store data to data regfile (opclass 3)	#
   3275 #	facc_out_{b,w,l,d,x}() - store to mem took acc error (opcl 3)	#
   3276 #	_calc_ea_fout() - fix An if <ea> is -() or ()+; also get <ea>	#
   3277 #									#
   3278 # INPUT ***************************************************************	#
   3279 #	- The system stack contains the FP SNAN exception frame		#
   3280 #	- The fsave frame contains the source operand			#
   3281 # 									#
   3282 # OUTPUT **************************************************************	#
   3283 #	No access error:						#
   3284 #	- The system stack is unchanged					#
   3285 #	- The fsave frame contains the adjusted src op for opclass 0,2	#
   3286 #									#
   3287 # ALGORITHM ***********************************************************	#
   3288 #	In a system where the FP SNAN exception is enabled, the goal	#
   3289 # is to get to the handler specified at _real_snan(). But, on the 060,	#
   3290 # for opclass zero and two instructions taking this exception, the 	#
   3291 # input operand in the fsave frame may be incorrect for some cases	#
   3292 # and needs to be corrected. This handler calls fix_skewed_ops() to	#
   3293 # do just this and then exits through _real_snan().			#
   3294 #	For opclass 3 instructions, the 060 doesn't store the default	#
   3295 # SNAN result out to memory or data register file as it should.		#
   3296 # This code must emulate the move out before finally exiting through	#
   3297 # _real_snan(). The move out, if to memory, is performed using 		#
   3298 # _mem_write() "callout" routines that may return a failing result.	#
   3299 # In this special case, the handler must exit through facc_out() 	#
   3300 # which creates an access error stack frame from the current SNAN	#
   3301 # stack frame.								#
   3302 #	For the case of an extended precision opclass 3 instruction,	#
   3303 # if the effective addressing mode was -() or ()+, then the address	#
   3304 # register must get updated by calling _calc_ea_fout(). If the <ea>	#
   3305 # was -(a7) from supervisor mode, then the exception frame currently	#
   3306 # on the system stack must be carefully moved "down" to make room	#
   3307 # for the operand being moved.						#
   3308 #									#
   3309 #########################################################################
   3310 
   3311 	global		_fpsp_snan
   3312 _fpsp_snan:
   3313 
   3314 	link.w		%a6,&-LOCAL_SIZE	# init stack frame
   3315 
   3316 	fsave		FP_SRC(%a6)		# grab the "busy" frame
   3317 
   3318  	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
   3319 	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
   3320  	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
   3321 
   3322 # the FPIAR holds the "current PC" of the faulting instruction
   3323 	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
   3324 
   3325 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   3326 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
   3327 	bsr.l		_imem_read_long		# fetch the instruction words
   3328 	mov.l		%d0,EXC_OPWORD(%a6)
   3329 
   3330 ##############################################################################
   3331 
   3332 	btst		&13,%d0			# is instr an fmove out?
   3333 	bne.w		fsnan_out		# fmove out
   3334 
   3335 
   3336 # here, we simply see if the operand in the fsave frame needs to be "unskewed".
   3337 # this would be the case for opclass two operations with a source infinity or
   3338 # denorm operand in the sgl or dbl format. NANs also become skewed and must be
   3339 # fixed here.
   3340 	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
   3341 	bsr.l		fix_skewed_ops		# fix src op
   3342 
   3343 fsnan_exit:
   3344 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
   3345 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   3346 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   3347 
   3348 	frestore	FP_SRC(%a6)
   3349 
   3350 	unlk		%a6
   3351 	bra.l		_real_snan
   3352 
   3353 ########################################################################
   3354 
   3355 #
   3356 # the hardware does not save the default result to memory on enabled
   3357 # snan exceptions. we do this here before passing control to
   3358 # the user snan handler.
   3359 #
   3360 # byte, word, long, and packed destination format operations can pass
   3361 # through here. since packed format operations already were handled by
   3362 # fpsp_unsupp(), then we need to do nothing else for them here.
   3363 # for byte, word, and long, we simply need to test the sign of the src
   3364 # operand and save the appropriate minimum or maximum integer value
   3365 # to the effective address as pointed to by the stacked effective address.
   3366 #
   3367 fsnan_out:
   3368 
   3369 	bfextu		%d0{&19:&3},%d0		# extract dst format field
   3370 	mov.b		1+EXC_OPWORD(%a6),%d1	# extract <ea> mode,reg
   3371 	mov.w		(tbl_snan.b,%pc,%d0.w*2),%a0
   3372 	jmp		(tbl_snan.b,%pc,%a0)
   3373 
   3374 tbl_snan:
   3375 	short		fsnan_out_l - tbl_snan # long word integer
   3376 	short		fsnan_out_s - tbl_snan # sgl prec shouldn't happen
   3377 	short		fsnan_out_x - tbl_snan # ext prec shouldn't happen
   3378 	short		tbl_snan    - tbl_snan # packed needs no help
   3379 	short		fsnan_out_w - tbl_snan # word integer
   3380 	short		fsnan_out_d - tbl_snan # dbl prec shouldn't happen
   3381 	short		fsnan_out_b - tbl_snan # byte integer
   3382 	short		tbl_snan    - tbl_snan # packed needs no help
   3383 
   3384 fsnan_out_b:
   3385 	mov.b		FP_SRC_HI(%a6),%d0	# load upper byte of SNAN
   3386 	bset		&6,%d0			# set SNAN bit
   3387 	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
   3388 	ble.b		fsnan_out_b_dn		# yes
   3389 	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
   3390 	bsr.l		_dmem_write_byte	# write the default result
   3391 
   3392 	tst.l		%d1			# did dstore fail?
   3393 	bne.l		facc_out_b		# yes
   3394 
   3395 	bra.w		fsnan_exit
   3396 fsnan_out_b_dn:
   3397 	andi.w		&0x0007,%d1
   3398 	bsr.l		store_dreg_b		# store result to regfile
   3399 	bra.w		fsnan_exit
   3400 
   3401 fsnan_out_w:
   3402 	mov.w		FP_SRC_HI(%a6),%d0	# load upper word of SNAN
   3403 	bset		&14,%d0			# set SNAN bit
   3404 	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
   3405 	ble.b		fsnan_out_w_dn		# yes
   3406 	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
   3407 	bsr.l		_dmem_write_word	# write the default result
   3408 
   3409 	tst.l		%d1			# did dstore fail?
   3410 	bne.l		facc_out_w		# yes
   3411 
   3412 	bra.w		fsnan_exit
   3413 fsnan_out_w_dn:
   3414 	andi.w		&0x0007,%d1
   3415 	bsr.l		store_dreg_w		# store result to regfile
   3416 	bra.w		fsnan_exit
   3417 
   3418 fsnan_out_l:
   3419 	mov.l		FP_SRC_HI(%a6),%d0	# load upper longword of SNAN
   3420 	bset		&30,%d0			# set SNAN bit
   3421 	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
   3422 	ble.b		fsnan_out_l_dn		# yes
   3423 	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
   3424 	bsr.l		_dmem_write_long	# write the default result
   3425 
   3426 	tst.l		%d1			# did dstore fail?
   3427 	bne.l		facc_out_l		# yes
   3428 
   3429 	bra.w		fsnan_exit
   3430 fsnan_out_l_dn:
   3431 	andi.w		&0x0007,%d1
   3432 	bsr.l		store_dreg_l		# store result to regfile
   3433 	bra.w		fsnan_exit
   3434 
   3435 fsnan_out_s:
   3436 	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
   3437 	ble.b		fsnan_out_d_dn		# yes
   3438 	mov.l		FP_SRC_EX(%a6),%d0	# fetch SNAN sign
   3439 	andi.l		&0x80000000,%d0		# keep sign
   3440 	ori.l		&0x7fc00000,%d0		# insert new exponent,SNAN bit
   3441 	mov.l		FP_SRC_HI(%a6),%d1	# load mantissa
   3442 	lsr.l		&0x8,%d1		# shift mantissa for sgl
   3443 	or.l		%d1,%d0			# create sgl SNAN
   3444 	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
   3445 	bsr.l		_dmem_write_long	# write the default result
   3446 
   3447 	tst.l		%d1			# did dstore fail?
   3448 	bne.l		facc_out_l		# yes
   3449 
   3450 	bra.w		fsnan_exit
   3451 fsnan_out_d_dn:
   3452 	mov.l		FP_SRC_EX(%a6),%d0	# fetch SNAN sign
   3453 	andi.l		&0x80000000,%d0		# keep sign
   3454 	ori.l		&0x7fc00000,%d0		# insert new exponent,SNAN bit
   3455 	mov.l		%d1,-(%sp)
   3456 	mov.l		FP_SRC_HI(%a6),%d1	# load mantissa
   3457 	lsr.l		&0x8,%d1		# shift mantissa for sgl
   3458 	or.l		%d1,%d0			# create sgl SNAN
   3459 	mov.l		(%sp)+,%d1
   3460 	andi.w		&0x0007,%d1
   3461 	bsr.l		store_dreg_l		# store result to regfile
   3462 	bra.w		fsnan_exit
   3463 
   3464 fsnan_out_d:
   3465 	mov.l		FP_SRC_EX(%a6),%d0	# fetch SNAN sign
   3466 	andi.l		&0x80000000,%d0		# keep sign
   3467 	ori.l		&0x7ff80000,%d0		# insert new exponent,SNAN bit
   3468 	mov.l		FP_SRC_HI(%a6),%d1	# load hi mantissa
   3469 	mov.l		%d0,FP_SCR0_EX(%a6)	# store to temp space
   3470 	mov.l		&11,%d0			# load shift amt
   3471 	lsr.l		%d0,%d1
   3472 	or.l		%d1,FP_SCR0_EX(%a6)	# create dbl hi
   3473 	mov.l		FP_SRC_HI(%a6),%d1	# load hi mantissa
   3474 	andi.l		&0x000007ff,%d1
   3475 	ror.l		%d0,%d1
   3476 	mov.l		%d1,FP_SCR0_HI(%a6)	# store to temp space
   3477 	mov.l		FP_SRC_LO(%a6),%d1	# load lo mantissa
   3478 	lsr.l		%d0,%d1
   3479 	or.l		%d1,FP_SCR0_HI(%a6)	# create dbl lo
   3480 	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
   3481 	mov.l		EXC_EA(%a6),%a1		# pass: dst addr
   3482 	movq.l		&0x8,%d0		# pass: size of 8 bytes
   3483 	bsr.l		_dmem_write		# write the default result
   3484 
   3485 	tst.l		%d1			# did dstore fail?
   3486 	bne.l		facc_out_d		# yes
   3487 
   3488 	bra.w		fsnan_exit
   3489 
   3490 # for extended precision, if the addressing mode is pre-decrement or
   3491 # post-increment, then the address register did not get updated.
   3492 # in addition, for pre-decrement, the stacked <ea> is incorrect.
   3493 fsnan_out_x:
   3494 	clr.b		SPCOND_FLG(%a6)		# clear special case flag
   3495 
   3496 	mov.w		FP_SRC_EX(%a6),FP_SCR0_EX(%a6)
   3497 	clr.w		2+FP_SCR0(%a6)
   3498 	mov.l		FP_SRC_HI(%a6),%d0
   3499 	bset		&30,%d0
   3500 	mov.l		%d0,FP_SCR0_HI(%a6)
   3501 	mov.l		FP_SRC_LO(%a6),FP_SCR0_LO(%a6)
   3502 
   3503 	btst		&0x5,EXC_SR(%a6)	# supervisor mode exception?
   3504 	bne.b		fsnan_out_x_s		# yes
   3505 
   3506 	mov.l		%usp,%a0		# fetch user stack pointer
   3507 	mov.l		%a0,EXC_A7(%a6)		# save on stack for calc_ea()
   3508 	mov.l		(%a6),EXC_A6(%a6)
   3509 
   3510 	bsr.l		_calc_ea_fout		# find the correct ea,update An
   3511 	mov.l		%a0,%a1
   3512 	mov.l		%a0,EXC_EA(%a6)		# stack correct <ea>
   3513 
   3514 	mov.l		EXC_A7(%a6),%a0
   3515 	mov.l		%a0,%usp		# restore user stack pointer
   3516 	mov.l		EXC_A6(%a6),(%a6)
   3517 
   3518 fsnan_out_x_save:
   3519 	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
   3520 	movq.l		&0xc,%d0		# pass: size of extended
   3521 	bsr.l		_dmem_write		# write the default result
   3522 
   3523 	tst.l		%d1			# did dstore fail?
   3524 	bne.l		facc_out_x		# yes
   3525 
   3526 	bra.w		fsnan_exit
   3527 
   3528 fsnan_out_x_s:
   3529 	mov.l		(%a6),EXC_A6(%a6)
   3530 
   3531 	bsr.l		_calc_ea_fout		# find the correct ea,update An
   3532 	mov.l		%a0,%a1
   3533 	mov.l		%a0,EXC_EA(%a6)		# stack correct <ea>
   3534 
   3535 	mov.l		EXC_A6(%a6),(%a6)
   3536 
   3537 	cmpi.b		SPCOND_FLG(%a6),&mda7_flg # is <ea> mode -(a7)?
   3538 	bne.b		fsnan_out_x_save	# no
   3539 
   3540 # the operation was "fmove.x SNAN,-(a7)" from supervisor mode.
   3541 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
   3542 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   3543 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   3544 
   3545 	frestore	FP_SRC(%a6)
   3546 
   3547 	mov.l		EXC_A6(%a6),%a6		# restore frame pointer
   3548 
   3549 	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
   3550 	mov.l		LOCAL_SIZE+EXC_PC+0x2(%sp),LOCAL_SIZE+EXC_PC+0x2-0xc(%sp)
   3551 	mov.l		LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
   3552 
   3553 	mov.l		LOCAL_SIZE+FP_SCR0_EX(%sp),LOCAL_SIZE+EXC_SR(%sp)
   3554 	mov.l		LOCAL_SIZE+FP_SCR0_HI(%sp),LOCAL_SIZE+EXC_PC+0x2(%sp)
   3555 	mov.l		LOCAL_SIZE+FP_SCR0_LO(%sp),LOCAL_SIZE+EXC_EA(%sp)
   3556 
   3557 	add.l		&LOCAL_SIZE-0x8,%sp
   3558 
   3559 	bra.l		_real_snan
   3560 
   3561 #########################################################################
   3562 # XDEF ****************************************************************	#
   3563 #	_fpsp_inex(): 060FPSP entry point for FP Inexact exception.	#
   3564 #									#
   3565 #	This handler should be the first code executed upon taking the	#
   3566 # 	FP Inexact exception in an operating system.			#
   3567 #									#
   3568 # XREF ****************************************************************	#
   3569 #	_imem_read_long() - read instruction longword			#
   3570 #	fix_skewed_ops() - adjust src operand in fsave frame		#
   3571 #	set_tag_x() - determine optype of src/dst operands		#
   3572 #	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
   3573 #	unnorm_fix() - change UNNORM operands to NORM or ZERO		#
   3574 #	load_fpn2() - load dst operand from FP regfile			#
   3575 #	smovcr() - emulate an "fmovcr" instruction			#
   3576 #	fout() - emulate an opclass 3 instruction			#
   3577 #	tbl_unsupp - add of table of emulation routines for opclass 0,2	#
   3578 #	_real_inex() - "callout" to operating system inexact handler	#
   3579 #									#
   3580 # INPUT ***************************************************************	#
   3581 #	- The system stack contains the FP Inexact exception frame	#
   3582 #	- The fsave frame contains the source operand			#
   3583 # 									#
   3584 # OUTPUT **************************************************************	#
   3585 #	- The system stack is unchanged					#
   3586 #	- The fsave frame contains the adjusted src op for opclass 0,2	#
   3587 #									#
   3588 # ALGORITHM ***********************************************************	#
   3589 #	In a system where the FP Inexact exception is enabled, the goal	#
   3590 # is to get to the handler specified at _real_inex(). But, on the 060,	#
   3591 # for opclass zero and two instruction taking this exception, the 	#
   3592 # hardware doesn't store the correct result to the destination FP	#
   3593 # register as did the '040 and '881/2. This handler must emulate the 	#
   3594 # instruction in order to get this value and then store it to the 	#
   3595 # correct register before calling _real_inex().				#
   3596 #	For opclass 3 instructions, the 060 doesn't store the default	#
   3597 # inexact result out to memory or data register file as it should.	#
   3598 # This code must emulate the move out by calling fout() before finally	#
   3599 # exiting through _real_inex().						#
   3600 #									#
   3601 #########################################################################
   3602 
   3603 	global		_fpsp_inex
   3604 _fpsp_inex:
   3605 
   3606 	link.w		%a6,&-LOCAL_SIZE	# init stack frame
   3607 
   3608 	fsave		FP_SRC(%a6)		# grab the "busy" frame
   3609 
   3610  	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
   3611 	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
   3612  	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
   3613 
   3614 # the FPIAR holds the "current PC" of the faulting instruction
   3615 	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
   3616 
   3617 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   3618 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
   3619 	bsr.l		_imem_read_long		# fetch the instruction words
   3620 	mov.l		%d0,EXC_OPWORD(%a6)
   3621 
   3622 ##############################################################################
   3623 
   3624 	btst		&13,%d0			# is instr an fmove out?
   3625 	bne.w		finex_out		# fmove out
   3626 
   3627 
   3628 # the hardware, for "fabs" and "fneg" w/ a long source format, puts the
   3629 # longword integer directly into the upper longword of the mantissa along
   3630 # w/ an exponent value of 0x401e. we convert this to extended precision here.
   3631 	bfextu		%d0{&19:&3},%d0		# fetch instr size
   3632 	bne.b		finex_cont		# instr size is not long
   3633 	cmpi.w		FP_SRC_EX(%a6),&0x401e	# is exponent 0x401e?
   3634 	bne.b		finex_cont		# no
   3635 	fmov.l		&0x0,%fpcr
   3636 	fmov.l		FP_SRC_HI(%a6),%fp0	# load integer src
   3637 	fmov.x		%fp0,FP_SRC(%a6)	# store integer as extended precision
   3638 	mov.w		&0xe001,0x2+FP_SRC(%a6)
   3639 
   3640 finex_cont:
   3641 	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
   3642 	bsr.l		fix_skewed_ops		# fix src op
   3643 
   3644 # Here, we zero the ccode and exception byte field since we're going to
   3645 # emulate the whole instruction. Notice, though, that we don't kill the
   3646 # INEX1 bit. This is because a packed op has long since been converted
   3647 # to extended before arriving here. Therefore, we need to retain the
   3648 # INEX1 bit from when the operand was first converted.
   3649 	andi.l		&0x00ff01ff,USER_FPSR(%a6) # zero all but accured field
   3650 
   3651 	fmov.l		&0x0,%fpcr		# zero current control regs
   3652 	fmov.l		&0x0,%fpsr
   3653 
   3654 	bfextu		EXC_EXTWORD(%a6){&0:&6},%d1 # extract upper 6 of cmdreg
   3655 	cmpi.b		%d1,&0x17		# is op an fmovecr?
   3656 	beq.w		finex_fmovcr		# yes
   3657 
   3658 	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
   3659 	bsr.l		set_tag_x		# tag the operand type
   3660 	mov.b		%d0,STAG(%a6)		# maybe NORM,DENORM
   3661 
   3662 # bits four and five of the fp extension word separate the monadic and dyadic
   3663 # operations that can pass through fpsp_inex(). remember that fcmp and ftst
   3664 # will never take this exception, but fsincos will.
   3665 	btst		&0x5,1+EXC_CMDREG(%a6)	# is operation monadic or dyadic?
   3666 	beq.b		finex_extract		# monadic
   3667 
   3668 	btst		&0x4,1+EXC_CMDREG(%a6)	# is operation an fsincos?
   3669 	bne.b		finex_extract		# yes
   3670 
   3671 	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
   3672 	bsr.l		load_fpn2		# load dst into FP_DST
   3673 
   3674 	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
   3675 	bsr.l		set_tag_x		# tag the operand type
   3676 	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
   3677 	bne.b		finex_op2_done		# no
   3678 	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
   3679 finex_op2_done:
   3680 	mov.b		%d0,DTAG(%a6)		# save dst optype tag
   3681 
   3682 finex_extract:
   3683 	clr.l		%d0
   3684 	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec/mode
   3685 
   3686 	mov.b		1+EXC_CMDREG(%a6),%d1
   3687 	andi.w		&0x007f,%d1		# extract extension
   3688 
   3689 	lea		FP_SRC(%a6),%a0
   3690 	lea		FP_DST(%a6),%a1
   3691 
   3692 	mov.l		(tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
   3693 	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
   3694 
   3695 # the operation has been emulated. the result is in fp0.
   3696 finex_save:
   3697 	bfextu		EXC_CMDREG(%a6){&6:&3},%d0
   3698 	bsr.l		store_fpreg
   3699 
   3700 finex_exit:
   3701 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
   3702 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   3703 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   3704 
   3705 	frestore	FP_SRC(%a6)
   3706 
   3707 	unlk		%a6
   3708 	bra.l		_real_inex
   3709 
   3710 finex_fmovcr:
   3711 	clr.l		%d0
   3712 	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec,mode
   3713 	mov.b		1+EXC_CMDREG(%a6),%d1
   3714 	andi.l		&0x0000007f,%d1		# pass rom offset
   3715 	bsr.l		smovcr
   3716 	bra.b		finex_save
   3717 
   3718 ########################################################################
   3719 
   3720 #
   3721 # the hardware does not save the default result to memory on enabled
   3722 # inexact exceptions. we do this here before passing control to
   3723 # the user inexact handler.
   3724 #
   3725 # byte, word, and long destination format operations can pass
   3726 # through here. so can double and single precision.
   3727 # although packed opclass three operations can take inexact
   3728 # exceptions, they won't pass through here since they are caught
   3729 # first by the unsupported data format exception handler. that handler
   3730 # sends them directly to _real_inex() if necessary.
   3731 #
   3732 finex_out:
   3733 
   3734 	mov.b		&NORM,STAG(%a6)		# src is a NORM
   3735 
   3736 	clr.l		%d0
   3737 	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec,mode
   3738 
   3739 	andi.l		&0xffff00ff,USER_FPSR(%a6) # zero exception field
   3740 
   3741 	lea		FP_SRC(%a6),%a0		# pass ptr to src operand
   3742 
   3743 	bsr.l		fout			# store the default result
   3744 
   3745 	bra.b		finex_exit
   3746 
   3747 #########################################################################
   3748 # XDEF ****************************************************************	#
   3749 #	_fpsp_dz(): 060FPSP entry point for FP DZ exception.		#
   3750 #									#
   3751 #	This handler should be the first code executed upon taking	#
   3752 #	the FP DZ exception in an operating system.			#
   3753 #									#
   3754 # XREF ****************************************************************	#
   3755 #	_imem_read_long() - read instruction longword from memory	#
   3756 #	fix_skewed_ops() - adjust fsave operand				#
   3757 #	_real_dz() - "callout" exit point from FP DZ handler		#
   3758 #									#
   3759 # INPUT ***************************************************************	#
   3760 #	- The system stack contains the FP DZ exception stack.		#
   3761 #	- The fsave frame contains the source operand.			#
   3762 # 									#
   3763 # OUTPUT **************************************************************	#
   3764 #	- The system stack contains the FP DZ exception stack.		#
   3765 #	- The fsave frame contains the adjusted source operand.		#
   3766 #									#
   3767 # ALGORITHM ***********************************************************	#
   3768 #	In a system where the DZ exception is enabled, the goal is to	#
   3769 # get to the handler specified at _real_dz(). But, on the 060, when the	#
   3770 # exception is taken, the input operand in the fsave state frame may	#
   3771 # be incorrect for some cases and need to be adjusted. So, this package	#
   3772 # adjusts the operand using fix_skewed_ops() and then branches to	#
   3773 # _real_dz(). 								#
   3774 #									#
   3775 #########################################################################
   3776 
   3777 	global		_fpsp_dz
   3778 _fpsp_dz:
   3779 
   3780 	link.w		%a6,&-LOCAL_SIZE	# init stack frame
   3781 
   3782 	fsave		FP_SRC(%a6)		# grab the "busy" frame
   3783 
   3784  	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
   3785 	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
   3786  	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
   3787 
   3788 # the FPIAR holds the "current PC" of the faulting instruction
   3789 	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
   3790 
   3791 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   3792 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
   3793 	bsr.l		_imem_read_long		# fetch the instruction words
   3794 	mov.l		%d0,EXC_OPWORD(%a6)
   3795 
   3796 ##############################################################################
   3797 
   3798 
   3799 # here, we simply see if the operand in the fsave frame needs to be "unskewed".
   3800 # this would be the case for opclass two operations with a source zero
   3801 # in the sgl or dbl format.
   3802 	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
   3803 	bsr.l		fix_skewed_ops		# fix src op
   3804 
   3805 fdz_exit:
   3806 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
   3807 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   3808 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   3809 
   3810 	frestore	FP_SRC(%a6)
   3811 
   3812 	unlk		%a6
   3813 	bra.l		_real_dz
   3814 
   3815 #########################################################################
   3816 # XDEF ****************************************************************	#
   3817 #	_fpsp_fline(): 060FPSP entry point for "Line F emulator"	#
   3818 #		       exception when the "reduced" version of the 	#
   3819 #		       FPSP is implemented that does not emulate	#
   3820 #		       FP unimplemented instructions.			#
   3821 #									#
   3822 #	This handler should be the first code executed upon taking a	#
   3823 #	"Line F Emulator" exception in an operating system integrating	#
   3824 #	the reduced version of 060FPSP.					#
   3825 #									#
   3826 # XREF ****************************************************************	#
   3827 #	_real_fpu_disabled() - Handle "FPU disabled" exceptions		#
   3828 #	_real_fline() - Handle all other cases (treated equally)	#
   3829 #									#
   3830 # INPUT ***************************************************************	#
   3831 #	- The system stack contains a "Line F Emulator" exception	#
   3832 #	  stack frame.							#
   3833 # 									#
   3834 # OUTPUT **************************************************************	#
   3835 #	- The system stack is unchanged.				#
   3836 #									#
   3837 # ALGORITHM ***********************************************************	#
   3838 # 	When a "Line F Emulator" exception occurs in a system where	#
   3839 # "FPU Unimplemented" instructions will not be emulated, the exception	#
   3840 # can occur because then FPU is disabled or the instruction is to be	#
   3841 # classifed as "Line F". This module determines which case exists and	#
   3842 # calls the appropriate "callout".					#
   3843 #									#
   3844 #########################################################################
   3845 
   3846 	global		_fpsp_fline
   3847 _fpsp_fline:
   3848 
   3849 # check to see if the FPU is disabled. if so, jump to the OS entry
   3850 # point for that condition.
   3851 	cmpi.w		0x6(%sp),&0x402c
   3852 	beq.l		_real_fpu_disabled
   3853 
   3854 	bra.l		_real_fline
   3855 
   3856 #########################################################################
   3857 # XDEF ****************************************************************	#
   3858 #	_dcalc_ea(): calc correct <ea> from <ea> stacked on exception	#
   3859 #									#
   3860 # XREF ****************************************************************	#
   3861 #	inc_areg() - increment an address register			#
   3862 #	dec_areg() - decrement an address register			#
   3863 #									#
   3864 # INPUT ***************************************************************	#
   3865 #	d0 = number of bytes to adjust <ea> by				#
   3866 # 									#
   3867 # OUTPUT **************************************************************	#
   3868 #	None								#
   3869 #									#
   3870 # ALGORITHM ***********************************************************	#
   3871 # "Dummy" CALCulate Effective Address:					#
   3872 # 	The stacked <ea> for FP unimplemented instructions and opclass	#
   3873 #	two packed instructions is correct with the exception of...	#
   3874 #									#
   3875 #	1) -(An)   : The register is not updated regardless of size.	#
   3876 #		     Also, for extended precision and packed, the 	#
   3877 #		     stacked <ea> value is 8 bytes too big		#
   3878 #	2) (An)+   : The register is not updated.			#
   3879 #	3) #<data> : The upper longword of the immediate operand is 	#
   3880 #		     stacked b,w,l and s sizes are completely stacked. 	#
   3881 #		     d,x, and p are not.				#
   3882 #									#
   3883 #########################################################################
   3884 
   3885 	global		_dcalc_ea
   3886 _dcalc_ea:
   3887 	mov.l		%d0, %a0		# move # bytes to %a0
   3888 
   3889 	mov.b		1+EXC_OPWORD(%a6), %d0	# fetch opcode word
   3890 	mov.l		%d0, %d1		# make a copy
   3891 
   3892 	andi.w		&0x38, %d0		# extract mode field
   3893 	andi.l		&0x7, %d1		# extract reg  field
   3894 
   3895 	cmpi.b		%d0,&0x18		# is mode (An)+ ?
   3896 	beq.b		dcea_pi			# yes
   3897 
   3898 	cmpi.b		%d0,&0x20		# is mode -(An) ?
   3899 	beq.b		dcea_pd			# yes
   3900 
   3901 	or.w		%d1,%d0			# concat mode,reg
   3902 	cmpi.b		%d0,&0x3c		# is mode #<data>?
   3903 
   3904 	beq.b		dcea_imm		# yes
   3905 
   3906 	mov.l		EXC_EA(%a6),%a0		# return <ea>
   3907 	rts
   3908 
   3909 # need to set immediate data flag here since we'll need to do
   3910 # an imem_read to fetch this later.
   3911 dcea_imm:
   3912 	mov.b		&immed_flg,SPCOND_FLG(%a6)
   3913 	lea		([USER_FPIAR,%a6],0x4),%a0 # no; return <ea>
   3914 	rts
   3915 
   3916 # here, the <ea> is stacked correctly. however, we must update the
   3917 # address register...
   3918 dcea_pi:
   3919 	mov.l		%a0,%d0			# pass amt to inc by
   3920 	bsr.l		inc_areg		# inc addr register
   3921 
   3922 	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
   3923 	rts
   3924 
   3925 # the <ea> is stacked correctly for all but extended and packed which
   3926 # the <ea>s are 8 bytes too large.
   3927 # it would make no sense to have a pre-decrement to a7 in supervisor
   3928 # mode so we don't even worry about this tricky case here : )
   3929 dcea_pd:
   3930 	mov.l		%a0,%d0			# pass amt to dec by
   3931 	bsr.l		dec_areg		# dec addr register
   3932 
   3933 	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
   3934 
   3935 	cmpi.b		%d0,&0xc		# is opsize ext or packed?
   3936 	beq.b		dcea_pd2		# yes
   3937 	rts
   3938 dcea_pd2:
   3939 	sub.l		&0x8,%a0		# correct <ea>
   3940 	mov.l		%a0,EXC_EA(%a6)		# put correct <ea> on stack
   3941 	rts
   3942 
   3943 #########################################################################
   3944 # XDEF ****************************************************************	#
   3945 # 	_calc_ea_fout(): calculate correct stacked <ea> for extended	#
   3946 #			 and packed data opclass 3 operations.		#
   3947 #									#
   3948 # XREF ****************************************************************	#
   3949 #	None								#
   3950 #									#
   3951 # INPUT ***************************************************************	#
   3952 #	None								#
   3953 # 									#
   3954 # OUTPUT **************************************************************	#
   3955 #	a0 = return correct effective address				#
   3956 #									#
   3957 # ALGORITHM ***********************************************************	#
   3958 #	For opclass 3 extended and packed data operations, the <ea>	#
   3959 # stacked for the exception is incorrect for -(an) and (an)+ addressing	#
   3960 # modes. Also, while we're at it, the index register itself must get 	#
   3961 # updated.								#
   3962 # 	So, for -(an), we must subtract 8 off of the stacked <ea> value	#
   3963 # and return that value as the correct <ea> and store that value in An.	#
   3964 # For (an)+, the stacked <ea> is correct but we must adjust An by +12.	#
   3965 #									#
   3966 #########################################################################
   3967 
   3968 # This calc_ea is currently used to retrieve the correct <ea>
   3969 # for fmove outs of type extended and packed.
   3970 	global		_calc_ea_fout
   3971 _calc_ea_fout:
   3972 	mov.b		1+EXC_OPWORD(%a6),%d0	# fetch opcode word
   3973 	mov.l		%d0,%d1			# make a copy
   3974 
   3975 	andi.w		&0x38,%d0		# extract mode field
   3976 	andi.l		&0x7,%d1		# extract reg  field
   3977 
   3978 	cmpi.b		%d0,&0x18		# is mode (An)+ ?
   3979 	beq.b		ceaf_pi			# yes
   3980 
   3981 	cmpi.b		%d0,&0x20		# is mode -(An) ?
   3982 	beq.w		ceaf_pd			# yes
   3983 
   3984 	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
   3985 	rts
   3986 
   3987 # (An)+ : extended and packed fmove out
   3988 #	: stacked <ea> is correct
   3989 #	: "An" not updated
   3990 ceaf_pi:
   3991 	mov.w		(tbl_ceaf_pi.b,%pc,%d1.w*2),%d1
   3992 	mov.l		EXC_EA(%a6),%a0
   3993 	jmp		(tbl_ceaf_pi.b,%pc,%d1.w*1)
   3994 
   3995 	swbeg		&0x8
   3996 tbl_ceaf_pi:
   3997 	short		ceaf_pi0 - tbl_ceaf_pi
   3998 	short		ceaf_pi1 - tbl_ceaf_pi
   3999 	short		ceaf_pi2 - tbl_ceaf_pi
   4000 	short		ceaf_pi3 - tbl_ceaf_pi
   4001 	short		ceaf_pi4 - tbl_ceaf_pi
   4002 	short		ceaf_pi5 - tbl_ceaf_pi
   4003 	short		ceaf_pi6 - tbl_ceaf_pi
   4004 	short		ceaf_pi7 - tbl_ceaf_pi
   4005 
   4006 ceaf_pi0:
   4007 	addi.l		&0xc,EXC_DREGS+0x8(%a6)
   4008 	rts
   4009 ceaf_pi1:
   4010 	addi.l		&0xc,EXC_DREGS+0xc(%a6)
   4011 	rts
   4012 ceaf_pi2:
   4013 	add.l		&0xc,%a2
   4014 	rts
   4015 ceaf_pi3:
   4016 	add.l		&0xc,%a3
   4017 	rts
   4018 ceaf_pi4:
   4019 	add.l		&0xc,%a4
   4020 	rts
   4021 ceaf_pi5:
   4022 	add.l		&0xc,%a5
   4023 	rts
   4024 ceaf_pi6:
   4025 	addi.l		&0xc,EXC_A6(%a6)
   4026 	rts
   4027 ceaf_pi7:
   4028 	mov.b		&mia7_flg,SPCOND_FLG(%a6)
   4029 	addi.l		&0xc,EXC_A7(%a6)
   4030 	rts
   4031 
   4032 # -(An) : extended and packed fmove out
   4033 #	: stacked <ea> = actual <ea> + 8
   4034 #	: "An" not updated
   4035 ceaf_pd:
   4036 	mov.w		(tbl_ceaf_pd.b,%pc,%d1.w*2),%d1
   4037 	mov.l		EXC_EA(%a6),%a0
   4038 	sub.l		&0x8,%a0
   4039 	sub.l		&0x8,EXC_EA(%a6)
   4040 	jmp		(tbl_ceaf_pd.b,%pc,%d1.w*1)
   4041 
   4042 	swbeg		&0x8
   4043 tbl_ceaf_pd:
   4044 	short		ceaf_pd0 - tbl_ceaf_pd
   4045 	short		ceaf_pd1 - tbl_ceaf_pd
   4046 	short		ceaf_pd2 - tbl_ceaf_pd
   4047 	short		ceaf_pd3 - tbl_ceaf_pd
   4048 	short		ceaf_pd4 - tbl_ceaf_pd
   4049 	short		ceaf_pd5 - tbl_ceaf_pd
   4050 	short		ceaf_pd6 - tbl_ceaf_pd
   4051 	short		ceaf_pd7 - tbl_ceaf_pd
   4052 
   4053 ceaf_pd0:
   4054 	mov.l		%a0,EXC_DREGS+0x8(%a6)
   4055 	rts
   4056 ceaf_pd1:
   4057 	mov.l		%a0,EXC_DREGS+0xc(%a6)
   4058 	rts
   4059 ceaf_pd2:
   4060 	mov.l		%a0,%a2
   4061 	rts
   4062 ceaf_pd3:
   4063 	mov.l		%a0,%a3
   4064 	rts
   4065 ceaf_pd4:
   4066 	mov.l		%a0,%a4
   4067 	rts
   4068 ceaf_pd5:
   4069 	mov.l		%a0,%a5
   4070 	rts
   4071 ceaf_pd6:
   4072 	mov.l		%a0,EXC_A6(%a6)
   4073 	rts
   4074 ceaf_pd7:
   4075 	mov.l		%a0,EXC_A7(%a6)
   4076 	mov.b		&mda7_flg,SPCOND_FLG(%a6)
   4077 	rts
   4078 
   4079 #
   4080 # This table holds the offsets of the emulation routines for each individual
   4081 # math operation relative to the address of this table. Included are
   4082 # routines like fadd/fmul/fabs. The transcendentals ARE NOT. This is because
   4083 # this table is for the version if the 060FPSP without transcendentals.
   4084 # The location within the table is determined by the extension bits of the
   4085 # operation longword.
   4086 #
   4087 
   4088 	swbeg		&109
   4089 tbl_unsupp:
   4090 	long		fin	 	- tbl_unsupp	# 00: fmove
   4091 	long		fint	 	- tbl_unsupp	# 01: fint
   4092 	long		tbl_unsupp 	- tbl_unsupp	# 02: fsinh
   4093 	long		fintrz	 	- tbl_unsupp	# 03: fintrz
   4094 	long		fsqrt	 	- tbl_unsupp	# 04: fsqrt
   4095 	long		tbl_unsupp	- tbl_unsupp
   4096 	long		tbl_unsupp	- tbl_unsupp	# 06: flognp1
   4097 	long		tbl_unsupp	- tbl_unsupp
   4098 	long		tbl_unsupp	- tbl_unsupp	# 08: fetoxm1
   4099 	long		tbl_unsupp	- tbl_unsupp	# 09: ftanh
   4100 	long		tbl_unsupp	- tbl_unsupp	# 0a: fatan
   4101 	long		tbl_unsupp	- tbl_unsupp
   4102 	long		tbl_unsupp	- tbl_unsupp	# 0c: fasin
   4103 	long		tbl_unsupp	- tbl_unsupp	# 0d: fatanh
   4104 	long		tbl_unsupp	- tbl_unsupp	# 0e: fsin
   4105 	long		tbl_unsupp	- tbl_unsupp	# 0f: ftan
   4106 	long		tbl_unsupp	- tbl_unsupp	# 10: fetox
   4107 	long		tbl_unsupp	- tbl_unsupp	# 11: ftwotox
   4108 	long		tbl_unsupp	- tbl_unsupp	# 12: ftentox
   4109 	long		tbl_unsupp	- tbl_unsupp
   4110 	long		tbl_unsupp	- tbl_unsupp	# 14: flogn
   4111 	long		tbl_unsupp	- tbl_unsupp	# 15: flog10
   4112 	long		tbl_unsupp	- tbl_unsupp	# 16: flog2
   4113 	long		tbl_unsupp	- tbl_unsupp
   4114 	long		fabs		- tbl_unsupp 	# 18: fabs
   4115 	long		tbl_unsupp	- tbl_unsupp	# 19: fcosh
   4116 	long		fneg		- tbl_unsupp 	# 1a: fneg
   4117 	long		tbl_unsupp	- tbl_unsupp
   4118 	long		tbl_unsupp	- tbl_unsupp	# 1c: facos
   4119 	long		tbl_unsupp	- tbl_unsupp	# 1d: fcos
   4120 	long		tbl_unsupp	- tbl_unsupp	# 1e: fgetexp
   4121 	long		tbl_unsupp	- tbl_unsupp	# 1f: fgetman
   4122 	long		fdiv		- tbl_unsupp 	# 20: fdiv
   4123 	long		tbl_unsupp	- tbl_unsupp	# 21: fmod
   4124 	long		fadd		- tbl_unsupp 	# 22: fadd
   4125 	long		fmul		- tbl_unsupp 	# 23: fmul
   4126 	long		fsgldiv		- tbl_unsupp 	# 24: fsgldiv
   4127 	long		tbl_unsupp	- tbl_unsupp	# 25: frem
   4128 	long		tbl_unsupp	- tbl_unsupp	# 26: fscale
   4129 	long		fsglmul		- tbl_unsupp 	# 27: fsglmul
   4130 	long		fsub		- tbl_unsupp 	# 28: fsub
   4131 	long		tbl_unsupp	- tbl_unsupp
   4132 	long		tbl_unsupp	- tbl_unsupp
   4133 	long		tbl_unsupp	- tbl_unsupp
   4134 	long		tbl_unsupp	- tbl_unsupp
   4135 	long		tbl_unsupp	- tbl_unsupp
   4136 	long		tbl_unsupp	- tbl_unsupp
   4137 	long		tbl_unsupp	- tbl_unsupp
   4138 	long		tbl_unsupp	- tbl_unsupp	# 30: fsincos
   4139 	long		tbl_unsupp	- tbl_unsupp	# 31: fsincos
   4140 	long		tbl_unsupp	- tbl_unsupp	# 32: fsincos
   4141 	long		tbl_unsupp	- tbl_unsupp	# 33: fsincos
   4142 	long		tbl_unsupp	- tbl_unsupp	# 34: fsincos
   4143 	long		tbl_unsupp	- tbl_unsupp	# 35: fsincos
   4144 	long		tbl_unsupp	- tbl_unsupp	# 36: fsincos
   4145 	long		tbl_unsupp	- tbl_unsupp	# 37: fsincos
   4146 	long		fcmp		- tbl_unsupp 	# 38: fcmp
   4147 	long		tbl_unsupp	- tbl_unsupp
   4148 	long		ftst		- tbl_unsupp 	# 3a: ftst
   4149 	long		tbl_unsupp	- tbl_unsupp
   4150 	long		tbl_unsupp	- tbl_unsupp
   4151 	long		tbl_unsupp	- tbl_unsupp
   4152 	long		tbl_unsupp	- tbl_unsupp
   4153 	long		tbl_unsupp	- tbl_unsupp
   4154 	long		fsin		- tbl_unsupp 	# 40: fsmove
   4155 	long		fssqrt		- tbl_unsupp 	# 41: fssqrt
   4156 	long		tbl_unsupp	- tbl_unsupp
   4157 	long		tbl_unsupp	- tbl_unsupp
   4158 	long		fdin		- tbl_unsupp	# 44: fdmove
   4159 	long		fdsqrt		- tbl_unsupp 	# 45: fdsqrt
   4160 	long		tbl_unsupp	- tbl_unsupp
   4161 	long		tbl_unsupp	- tbl_unsupp
   4162 	long		tbl_unsupp	- tbl_unsupp
   4163 	long		tbl_unsupp	- tbl_unsupp
   4164 	long		tbl_unsupp	- tbl_unsupp
   4165 	long		tbl_unsupp	- tbl_unsupp
   4166 	long		tbl_unsupp	- tbl_unsupp
   4167 	long		tbl_unsupp	- tbl_unsupp
   4168 	long		tbl_unsupp	- tbl_unsupp
   4169 	long		tbl_unsupp	- tbl_unsupp
   4170 	long		tbl_unsupp	- tbl_unsupp
   4171 	long		tbl_unsupp	- tbl_unsupp
   4172 	long		tbl_unsupp	- tbl_unsupp
   4173 	long		tbl_unsupp	- tbl_unsupp
   4174 	long		tbl_unsupp	- tbl_unsupp
   4175 	long		tbl_unsupp	- tbl_unsupp
   4176 	long		tbl_unsupp	- tbl_unsupp
   4177 	long		tbl_unsupp	- tbl_unsupp
   4178 	long		fsabs		- tbl_unsupp 	# 58: fsabs
   4179 	long		tbl_unsupp	- tbl_unsupp
   4180 	long		fsneg		- tbl_unsupp 	# 5a: fsneg
   4181 	long		tbl_unsupp	- tbl_unsupp
   4182 	long		fdabs		- tbl_unsupp	# 5c: fdabs
   4183 	long		tbl_unsupp	- tbl_unsupp
   4184 	long		fdneg		- tbl_unsupp 	# 5e: fdneg
   4185 	long		tbl_unsupp	- tbl_unsupp
   4186 	long		fsdiv		- tbl_unsupp	# 60: fsdiv
   4187 	long		tbl_unsupp	- tbl_unsupp
   4188 	long		fsadd		- tbl_unsupp	# 62: fsadd
   4189 	long		fsmul		- tbl_unsupp	# 63: fsmul
   4190 	long		fddiv		- tbl_unsupp 	# 64: fddiv
   4191 	long		tbl_unsupp	- tbl_unsupp
   4192 	long		fdadd		- tbl_unsupp	# 66: fdadd
   4193 	long		fdmul		- tbl_unsupp 	# 67: fdmul
   4194 	long		fssub		- tbl_unsupp	# 68: fssub
   4195 	long		tbl_unsupp	- tbl_unsupp
   4196 	long		tbl_unsupp	- tbl_unsupp
   4197 	long		tbl_unsupp	- tbl_unsupp
   4198 	long		fdsub		- tbl_unsupp 	# 6c: fdsub
   4199 
   4200 #################################################
   4201 # Add this here so non-fp modules can compile.
   4202 # (smovcr is called from fpsp_inex.)
   4203 	global		smovcr
   4204 smovcr:
   4205 	bra.b		smovcr
   4206 
   4207 #########################################################################
   4208 # XDEF ****************************************************************	#
   4209 #	fmovm_dynamic(): emulate "fmovm" dynamic instruction		#
   4210 #									#
   4211 # XREF ****************************************************************	#
   4212 #	fetch_dreg() - fetch data register				#
   4213 #	{i,d,}mem_read() - fetch data from memory			#
   4214 #	_mem_write() - write data to memory				#
   4215 #	iea_iacc() - instruction memory access error occurred		#
   4216 #	iea_dacc() - data memory access error occurred			#
   4217 #	restore() - restore An index regs if access error occurred	#
   4218 #									#
   4219 # INPUT ***************************************************************	#
   4220 #	None								#
   4221 # 									#
   4222 # OUTPUT **************************************************************	#
   4223 #	If instr is "fmovm Dn,-(A7)" from supervisor mode,		#
   4224 #		d0 = size of dump					#
   4225 #		d1 = Dn							#
   4226 #	Else if instruction access error,				#
   4227 #		d0 = FSLW						#
   4228 #	Else if data access error,					#
   4229 #		d0 = FSLW						#
   4230 #		a0 = address of fault					#
   4231 #	Else								#
   4232 #		none.							#
   4233 #									#
   4234 # ALGORITHM ***********************************************************	#
   4235 #	The effective address must be calculated since this is entered	#
   4236 # from an "Unimplemented Effective Address" exception handler. So, we	#
   4237 # have our own fcalc_ea() routine here. If an access error is flagged	#
   4238 # by a _{i,d,}mem_read() call, we must exit through the special		#
   4239 # handler.								#
   4240 #	The data register is determined and its value loaded to get the	#
   4241 # string of FP registers affected. This value is used as an index into	#
   4242 # a lookup table such that we can determine the number of bytes		#
   4243 # involved. 								#
   4244 #	If the instruction is "fmovm.x <ea>,Dn", a _mem_read() is used	#
   4245 # to read in all FP values. Again, _mem_read() may fail and require a	#
   4246 # special exit. 							#
   4247 #	If the instruction is "fmovm.x DN,<ea>", a _mem_write() is used	#
   4248 # to write all FP values. _mem_write() may also fail.			#
   4249 # 	If the instruction is "fmovm.x DN,-(a7)" from supervisor mode,	#
   4250 # then we return the size of the dump and the string to the caller	#
   4251 # so that the move can occur outside of this routine. This special	#
   4252 # case is required so that moves to the system stack are handled	#
   4253 # correctly.								#
   4254 #									#
   4255 # DYNAMIC:								#
   4256 # 	fmovm.x	dn, <ea>						#
   4257 # 	fmovm.x	<ea>, dn						#
   4258 #									#
   4259 #	      <WORD 1>		      <WORD2>				#
   4260 #	1111 0010 00 |<ea>|	11@& 1000 0$$$ 0000			#
   4261 #					  				#
   4262 #	& = (0): predecrement addressing mode				#
   4263 #	    (1): postincrement or control addressing mode		#
   4264 #	@ = (0): move listed regs from memory to the FPU		#
   4265 #	    (1): move listed regs from the FPU to memory		#
   4266 #	$$$    : index of data register holding reg select mask		#
   4267 #									#
   4268 # NOTES:								#
   4269 #	If the data register holds a zero, then the			#
   4270 #	instruction is a nop.						#
   4271 #									#
   4272 #########################################################################
   4273 
   4274 	global		fmovm_dynamic
   4275 fmovm_dynamic:
   4276 
   4277 # extract the data register in which the bit string resides...
   4278 	mov.b		1+EXC_EXTWORD(%a6),%d1	# fetch extword
   4279 	andi.w		&0x70,%d1		# extract reg bits
   4280 	lsr.b		&0x4,%d1		# shift into lo bits
   4281 
   4282 # fetch the bit string into d0...
   4283 	bsr.l		fetch_dreg		# fetch reg string
   4284 
   4285 	andi.l		&0x000000ff,%d0		# keep only lo byte
   4286 
   4287 	mov.l		%d0,-(%sp)		# save strg
   4288 	mov.b		(tbl_fmovm_size.w,%pc,%d0),%d0
   4289 	mov.l		%d0,-(%sp)		# save size
   4290 	bsr.l		fmovm_calc_ea		# calculate <ea>
   4291 	mov.l		(%sp)+,%d0		# restore size
   4292 	mov.l		(%sp)+,%d1		# restore strg
   4293 
   4294 # if the bit string is a zero, then the operation is a no-op
   4295 # but, make sure that we've calculated ea and advanced the opword pointer
   4296 	beq.w		fmovm_data_done
   4297 
   4298 # separate move ins from move outs...
   4299 	btst		&0x5,EXC_EXTWORD(%a6)	# is it a move in or out?
   4300 	beq.w		fmovm_data_in		# it's a move out
   4301 
   4302 #############
   4303 # MOVE OUT: #
   4304 #############
   4305 fmovm_data_out:
   4306 	btst		&0x4,EXC_EXTWORD(%a6)	# control or predecrement?
   4307 	bne.w		fmovm_out_ctrl		# control
   4308 
   4309 ############################
   4310 fmovm_out_predec:
   4311 # for predecrement mode, the bit string is the opposite of both control
   4312 # operations and postincrement mode. (bit7 = FP7 ... bit0 = FP0)
   4313 # here, we convert it to be just like the others...
   4314 	mov.b		(tbl_fmovm_convert.w,%pc,%d1.w*1),%d1
   4315 
   4316 	btst		&0x5,EXC_SR(%a6)	# user or supervisor mode?
   4317 	beq.b		fmovm_out_ctrl		# user
   4318 
   4319 fmovm_out_predec_s:
   4320 	cmpi.b		SPCOND_FLG(%a6),&mda7_flg # is <ea> mode -(a7)?
   4321 	bne.b		fmovm_out_ctrl
   4322 
   4323 # the operation was unfortunately an: fmovm.x dn,-(sp)
   4324 # called from supervisor mode.
   4325 # we're also passing "size" and "strg" back to the calling routine
   4326 	rts
   4327 
   4328 ############################
   4329 fmovm_out_ctrl:
   4330 	mov.l		%a0,%a1			# move <ea> to a1
   4331 
   4332 	sub.l		%d0,%sp			# subtract size of dump
   4333 	lea		(%sp),%a0
   4334 
   4335 	tst.b		%d1			# should FP0 be moved?
   4336 	bpl.b		fmovm_out_ctrl_fp1	# no
   4337 
   4338 	mov.l		0x0+EXC_FP0(%a6),(%a0)+	# yes
   4339 	mov.l		0x4+EXC_FP0(%a6),(%a0)+
   4340 	mov.l		0x8+EXC_FP0(%a6),(%a0)+
   4341 
   4342 fmovm_out_ctrl_fp1:
   4343 	lsl.b		&0x1,%d1		# should FP1 be moved?
   4344 	bpl.b		fmovm_out_ctrl_fp2	# no
   4345 
   4346 	mov.l		0x0+EXC_FP1(%a6),(%a0)+	# yes
   4347 	mov.l		0x4+EXC_FP1(%a6),(%a0)+
   4348 	mov.l		0x8+EXC_FP1(%a6),(%a0)+
   4349 
   4350 fmovm_out_ctrl_fp2:
   4351 	lsl.b		&0x1,%d1		# should FP2 be moved?
   4352 	bpl.b		fmovm_out_ctrl_fp3	# no
   4353 
   4354 	fmovm.x		&0x20,(%a0)		# yes
   4355 	add.l		&0xc,%a0
   4356 
   4357 fmovm_out_ctrl_fp3:
   4358 	lsl.b		&0x1,%d1		# should FP3 be moved?
   4359 	bpl.b		fmovm_out_ctrl_fp4	# no
   4360 
   4361 	fmovm.x		&0x10,(%a0)		# yes
   4362 	add.l		&0xc,%a0
   4363 
   4364 fmovm_out_ctrl_fp4:
   4365 	lsl.b		&0x1,%d1		# should FP4 be moved?
   4366 	bpl.b		fmovm_out_ctrl_fp5	# no
   4367 
   4368 	fmovm.x		&0x08,(%a0)		# yes
   4369 	add.l		&0xc,%a0
   4370 
   4371 fmovm_out_ctrl_fp5:
   4372 	lsl.b		&0x1,%d1		# should FP5 be moved?
   4373 	bpl.b		fmovm_out_ctrl_fp6	# no
   4374 
   4375 	fmovm.x		&0x04,(%a0)		# yes
   4376 	add.l		&0xc,%a0
   4377 
   4378 fmovm_out_ctrl_fp6:
   4379 	lsl.b		&0x1,%d1		# should FP6 be moved?
   4380 	bpl.b		fmovm_out_ctrl_fp7	# no
   4381 
   4382 	fmovm.x		&0x02,(%a0)		# yes
   4383 	add.l		&0xc,%a0
   4384 
   4385 fmovm_out_ctrl_fp7:
   4386 	lsl.b		&0x1,%d1		# should FP7 be moved?
   4387 	bpl.b		fmovm_out_ctrl_done	# no
   4388 
   4389 	fmovm.x		&0x01,(%a0)		# yes
   4390 	add.l		&0xc,%a0
   4391 
   4392 fmovm_out_ctrl_done:
   4393 	mov.l		%a1,L_SCR1(%a6)
   4394 
   4395 	lea		(%sp),%a0		# pass: supervisor src
   4396 	mov.l		%d0,-(%sp)		# save size
   4397 	bsr.l		_dmem_write		# copy data to user mem
   4398 
   4399 	mov.l		(%sp)+,%d0
   4400 	add.l		%d0,%sp			# clear fpreg data from stack
   4401 
   4402 	tst.l		%d1			# did dstore err?
   4403 	bne.w		fmovm_out_err		# yes
   4404 
   4405 	rts
   4406 
   4407 ############
   4408 # MOVE IN: #
   4409 ############
   4410 fmovm_data_in:
   4411 	mov.l		%a0,L_SCR1(%a6)
   4412 
   4413 	sub.l		%d0,%sp			# make room for fpregs
   4414 	lea		(%sp),%a1
   4415 
   4416 	mov.l		%d1,-(%sp)		# save bit string for later
   4417 	mov.l		%d0,-(%sp)		# save # of bytes
   4418 
   4419 	bsr.l		_dmem_read		# copy data from user mem
   4420 
   4421 	mov.l		(%sp)+,%d0		# retrieve # of bytes
   4422 
   4423 	tst.l		%d1			# did dfetch fail?
   4424 	bne.w		fmovm_in_err		# yes
   4425 
   4426 	mov.l		(%sp)+,%d1		# load bit string
   4427 
   4428 	lea		(%sp),%a0		# addr of stack
   4429 
   4430 	tst.b		%d1			# should FP0 be moved?
   4431 	bpl.b		fmovm_data_in_fp1	# no
   4432 
   4433 	mov.l		(%a0)+,0x0+EXC_FP0(%a6)	# yes
   4434 	mov.l		(%a0)+,0x4+EXC_FP0(%a6)
   4435 	mov.l		(%a0)+,0x8+EXC_FP0(%a6)
   4436 
   4437 fmovm_data_in_fp1:
   4438 	lsl.b		&0x1,%d1		# should FP1 be moved?
   4439 	bpl.b		fmovm_data_in_fp2	# no
   4440 
   4441 	mov.l		(%a0)+,0x0+EXC_FP1(%a6)	# yes
   4442 	mov.l		(%a0)+,0x4+EXC_FP1(%a6)
   4443 	mov.l		(%a0)+,0x8+EXC_FP1(%a6)
   4444 
   4445 fmovm_data_in_fp2:
   4446 	lsl.b		&0x1,%d1		# should FP2 be moved?
   4447 	bpl.b		fmovm_data_in_fp3	# no
   4448 
   4449 	fmovm.x		(%a0)+,&0x20		# yes
   4450 
   4451 fmovm_data_in_fp3:
   4452 	lsl.b		&0x1,%d1		# should FP3 be moved?
   4453 	bpl.b		fmovm_data_in_fp4	# no
   4454 
   4455 	fmovm.x		(%a0)+,&0x10		# yes
   4456 
   4457 fmovm_data_in_fp4:
   4458 	lsl.b		&0x1,%d1		# should FP4 be moved?
   4459 	bpl.b		fmovm_data_in_fp5	# no
   4460 
   4461 	fmovm.x		(%a0)+,&0x08		# yes
   4462 
   4463 fmovm_data_in_fp5:
   4464 	lsl.b		&0x1,%d1		# should FP5 be moved?
   4465 	bpl.b		fmovm_data_in_fp6	# no
   4466 
   4467 	fmovm.x		(%a0)+,&0x04		# yes
   4468 
   4469 fmovm_data_in_fp6:
   4470 	lsl.b		&0x1,%d1		# should FP6 be moved?
   4471 	bpl.b		fmovm_data_in_fp7	# no
   4472 
   4473 	fmovm.x		(%a0)+,&0x02		# yes
   4474 
   4475 fmovm_data_in_fp7:
   4476 	lsl.b		&0x1,%d1		# should FP7 be moved?
   4477 	bpl.b		fmovm_data_in_done	# no
   4478 
   4479 	fmovm.x		(%a0)+,&0x01		# yes
   4480 
   4481 fmovm_data_in_done:
   4482 	add.l		%d0,%sp			# remove fpregs from stack
   4483 	rts
   4484 
   4485 #####################################
   4486 
   4487 fmovm_data_done:
   4488 	rts
   4489 
   4490 ##############################################################################
   4491 
   4492 #
   4493 # table indexed by the operation's bit string that gives the number
   4494 # of bytes that will be moved.
   4495 #
   4496 # number of bytes = (# of 1's in bit string) * 12(bytes/fpreg)
   4497 #
   4498 tbl_fmovm_size:
   4499 	byte	0x00,0x0c,0x0c,0x18,0x0c,0x18,0x18,0x24
   4500 	byte	0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
   4501 	byte	0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
   4502 	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
   4503 	byte	0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
   4504 	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
   4505 	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
   4506 	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
   4507 	byte	0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
   4508 	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
   4509 	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
   4510 	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
   4511 	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
   4512 	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
   4513 	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
   4514 	byte	0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
   4515 	byte	0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
   4516 	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
   4517 	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
   4518 	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
   4519 	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
   4520 	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
   4521 	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
   4522 	byte	0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
   4523 	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
   4524 	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
   4525 	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
   4526 	byte	0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
   4527 	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
   4528 	byte	0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
   4529 	byte	0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
   4530 	byte	0x3c,0x48,0x48,0x54,0x48,0x54,0x54,0x60
   4531 
   4532 #
   4533 # table to convert a pre-decrement bit string into a post-increment
   4534 # or control bit string.
   4535 # ex: 	0x00	==>	0x00
   4536 #	0x01	==>	0x80
   4537 #	0x02	==>	0x40
   4538 #		.
   4539 #		.
   4540 #	0xfd	==>	0xbf
   4541 #	0xfe	==>	0x7f
   4542 #	0xff	==>	0xff
   4543 #
   4544 tbl_fmovm_convert:
   4545 	byte	0x00,0x80,0x40,0xc0,0x20,0xa0,0x60,0xe0
   4546 	byte	0x10,0x90,0x50,0xd0,0x30,0xb0,0x70,0xf0
   4547 	byte	0x08,0x88,0x48,0xc8,0x28,0xa8,0x68,0xe8
   4548 	byte	0x18,0x98,0x58,0xd8,0x38,0xb8,0x78,0xf8
   4549 	byte	0x04,0x84,0x44,0xc4,0x24,0xa4,0x64,0xe4
   4550 	byte	0x14,0x94,0x54,0xd4,0x34,0xb4,0x74,0xf4
   4551 	byte	0x0c,0x8c,0x4c,0xcc,0x2c,0xac,0x6c,0xec
   4552 	byte	0x1c,0x9c,0x5c,0xdc,0x3c,0xbc,0x7c,0xfc
   4553 	byte	0x02,0x82,0x42,0xc2,0x22,0xa2,0x62,0xe2
   4554 	byte	0x12,0x92,0x52,0xd2,0x32,0xb2,0x72,0xf2
   4555 	byte	0x0a,0x8a,0x4a,0xca,0x2a,0xaa,0x6a,0xea
   4556 	byte	0x1a,0x9a,0x5a,0xda,0x3a,0xba,0x7a,0xfa
   4557 	byte	0x06,0x86,0x46,0xc6,0x26,0xa6,0x66,0xe6
   4558 	byte	0x16,0x96,0x56,0xd6,0x36,0xb6,0x76,0xf6
   4559 	byte	0x0e,0x8e,0x4e,0xce,0x2e,0xae,0x6e,0xee
   4560 	byte	0x1e,0x9e,0x5e,0xde,0x3e,0xbe,0x7e,0xfe
   4561 	byte	0x01,0x81,0x41,0xc1,0x21,0xa1,0x61,0xe1
   4562 	byte	0x11,0x91,0x51,0xd1,0x31,0xb1,0x71,0xf1
   4563 	byte	0x09,0x89,0x49,0xc9,0x29,0xa9,0x69,0xe9
   4564 	byte	0x19,0x99,0x59,0xd9,0x39,0xb9,0x79,0xf9
   4565 	byte	0x05,0x85,0x45,0xc5,0x25,0xa5,0x65,0xe5
   4566 	byte	0x15,0x95,0x55,0xd5,0x35,0xb5,0x75,0xf5
   4567 	byte	0x0d,0x8d,0x4d,0xcd,0x2d,0xad,0x6d,0xed
   4568 	byte	0x1d,0x9d,0x5d,0xdd,0x3d,0xbd,0x7d,0xfd
   4569 	byte	0x03,0x83,0x43,0xc3,0x23,0xa3,0x63,0xe3
   4570 	byte	0x13,0x93,0x53,0xd3,0x33,0xb3,0x73,0xf3
   4571 	byte	0x0b,0x8b,0x4b,0xcb,0x2b,0xab,0x6b,0xeb
   4572 	byte	0x1b,0x9b,0x5b,0xdb,0x3b,0xbb,0x7b,0xfb
   4573 	byte	0x07,0x87,0x47,0xc7,0x27,0xa7,0x67,0xe7
   4574 	byte	0x17,0x97,0x57,0xd7,0x37,0xb7,0x77,0xf7
   4575 	byte	0x0f,0x8f,0x4f,0xcf,0x2f,0xaf,0x6f,0xef
   4576 	byte	0x1f,0x9f,0x5f,0xdf,0x3f,0xbf,0x7f,0xff
   4577 
   4578 	global		fmovm_calc_ea
   4579 ###############################################
   4580 # _fmovm_calc_ea: calculate effective address #
   4581 ###############################################
   4582 fmovm_calc_ea:
   4583 	mov.l		%d0,%a0			# move # bytes to a0
   4584 
   4585 # currently, MODE and REG are taken from the EXC_OPWORD. this could be
   4586 # easily changed if they were inputs passed in registers.
   4587 	mov.w		EXC_OPWORD(%a6),%d0	# fetch opcode word
   4588 	mov.w		%d0,%d1			# make a copy
   4589 
   4590 	andi.w		&0x3f,%d0		# extract mode field
   4591 	andi.l		&0x7,%d1		# extract reg  field
   4592 
   4593 # jump to the corresponding function for each {MODE,REG} pair.
   4594 	mov.w		(tbl_fea_mode.b,%pc,%d0.w*2),%d0 # fetch jmp distance
   4595 	jmp		(tbl_fea_mode.b,%pc,%d0.w*1) # jmp to correct ea mode
   4596 
   4597 	swbeg		&64
   4598 tbl_fea_mode:
   4599 	short		tbl_fea_mode	-	tbl_fea_mode
   4600 	short		tbl_fea_mode	-	tbl_fea_mode
   4601 	short		tbl_fea_mode	-	tbl_fea_mode
   4602 	short		tbl_fea_mode	-	tbl_fea_mode
   4603 	short		tbl_fea_mode	-	tbl_fea_mode
   4604 	short		tbl_fea_mode	-	tbl_fea_mode
   4605 	short		tbl_fea_mode	-	tbl_fea_mode
   4606 	short		tbl_fea_mode	-	tbl_fea_mode
   4607 
   4608 	short		tbl_fea_mode	-	tbl_fea_mode
   4609 	short		tbl_fea_mode	-	tbl_fea_mode
   4610 	short		tbl_fea_mode	-	tbl_fea_mode
   4611 	short		tbl_fea_mode	-	tbl_fea_mode
   4612 	short		tbl_fea_mode	-	tbl_fea_mode
   4613 	short		tbl_fea_mode	-	tbl_fea_mode
   4614 	short		tbl_fea_mode	-	tbl_fea_mode
   4615 	short		tbl_fea_mode	-	tbl_fea_mode
   4616 
   4617 	short		faddr_ind_a0	- 	tbl_fea_mode
   4618 	short		faddr_ind_a1	- 	tbl_fea_mode
   4619 	short		faddr_ind_a2	- 	tbl_fea_mode
   4620 	short		faddr_ind_a3 	- 	tbl_fea_mode
   4621 	short		faddr_ind_a4 	- 	tbl_fea_mode
   4622 	short		faddr_ind_a5 	- 	tbl_fea_mode
   4623 	short		faddr_ind_a6 	- 	tbl_fea_mode
   4624 	short		faddr_ind_a7 	- 	tbl_fea_mode
   4625 
   4626 	short		faddr_ind_p_a0	- 	tbl_fea_mode
   4627 	short		faddr_ind_p_a1 	- 	tbl_fea_mode
   4628 	short		faddr_ind_p_a2 	- 	tbl_fea_mode
   4629 	short		faddr_ind_p_a3 	- 	tbl_fea_mode
   4630 	short		faddr_ind_p_a4 	- 	tbl_fea_mode
   4631 	short		faddr_ind_p_a5 	- 	tbl_fea_mode
   4632 	short		faddr_ind_p_a6 	- 	tbl_fea_mode
   4633 	short		faddr_ind_p_a7 	- 	tbl_fea_mode
   4634 
   4635 	short		faddr_ind_m_a0 	- 	tbl_fea_mode
   4636 	short		faddr_ind_m_a1 	- 	tbl_fea_mode
   4637 	short		faddr_ind_m_a2 	- 	tbl_fea_mode
   4638 	short		faddr_ind_m_a3 	- 	tbl_fea_mode
   4639 	short		faddr_ind_m_a4 	- 	tbl_fea_mode
   4640 	short		faddr_ind_m_a5 	- 	tbl_fea_mode
   4641 	short		faddr_ind_m_a6 	- 	tbl_fea_mode
   4642 	short		faddr_ind_m_a7 	- 	tbl_fea_mode
   4643 
   4644 	short		faddr_ind_disp_a0	- 	tbl_fea_mode
   4645 	short		faddr_ind_disp_a1 	- 	tbl_fea_mode
   4646 	short		faddr_ind_disp_a2 	- 	tbl_fea_mode
   4647 	short		faddr_ind_disp_a3 	- 	tbl_fea_mode
   4648 	short		faddr_ind_disp_a4 	- 	tbl_fea_mode
   4649 	short		faddr_ind_disp_a5 	- 	tbl_fea_mode
   4650 	short		faddr_ind_disp_a6 	- 	tbl_fea_mode
   4651 	short		faddr_ind_disp_a7	-	tbl_fea_mode
   4652 
   4653 	short		faddr_ind_ext 	- 	tbl_fea_mode
   4654 	short		faddr_ind_ext 	- 	tbl_fea_mode
   4655 	short		faddr_ind_ext 	- 	tbl_fea_mode
   4656 	short		faddr_ind_ext 	- 	tbl_fea_mode
   4657 	short		faddr_ind_ext 	- 	tbl_fea_mode
   4658 	short		faddr_ind_ext 	- 	tbl_fea_mode
   4659 	short		faddr_ind_ext 	- 	tbl_fea_mode
   4660 	short		faddr_ind_ext 	- 	tbl_fea_mode
   4661 
   4662 	short		fabs_short	- 	tbl_fea_mode
   4663 	short		fabs_long	- 	tbl_fea_mode
   4664 	short		fpc_ind		- 	tbl_fea_mode
   4665 	short		fpc_ind_ext	- 	tbl_fea_mode
   4666 	short		tbl_fea_mode	- 	tbl_fea_mode
   4667 	short		tbl_fea_mode	- 	tbl_fea_mode
   4668 	short		tbl_fea_mode	- 	tbl_fea_mode
   4669 	short		tbl_fea_mode	- 	tbl_fea_mode
   4670 
   4671 ###################################
   4672 # Address register indirect: (An) #
   4673 ###################################
   4674 faddr_ind_a0:
   4675 	mov.l		EXC_DREGS+0x8(%a6),%a0	# Get current a0
   4676 	rts
   4677 
   4678 faddr_ind_a1:
   4679 	mov.l		EXC_DREGS+0xc(%a6),%a0	# Get current a1
   4680 	rts
   4681 
   4682 faddr_ind_a2:
   4683 	mov.l		%a2,%a0			# Get current a2
   4684 	rts
   4685 
   4686 faddr_ind_a3:
   4687 	mov.l		%a3,%a0			# Get current a3
   4688 	rts
   4689 
   4690 faddr_ind_a4:
   4691 	mov.l		%a4,%a0			# Get current a4
   4692 	rts
   4693 
   4694 faddr_ind_a5:
   4695 	mov.l		%a5,%a0			# Get current a5
   4696 	rts
   4697 
   4698 faddr_ind_a6:
   4699 	mov.l		(%a6),%a0		# Get current a6
   4700 	rts
   4701 
   4702 faddr_ind_a7:
   4703 	mov.l		EXC_A7(%a6),%a0		# Get current a7
   4704 	rts
   4705 
   4706 #####################################################
   4707 # Address register indirect w/ postincrement: (An)+ #
   4708 #####################################################
   4709 faddr_ind_p_a0:
   4710 	mov.l		EXC_DREGS+0x8(%a6),%d0	# Get current a0
   4711 	mov.l		%d0,%d1
   4712 	add.l		%a0,%d1			# Increment
   4713 	mov.l		%d1,EXC_DREGS+0x8(%a6)	# Save incr value
   4714 	mov.l		%d0,%a0
   4715 	rts
   4716 
   4717 faddr_ind_p_a1:
   4718 	mov.l		EXC_DREGS+0xc(%a6),%d0	# Get current a1
   4719 	mov.l		%d0,%d1
   4720 	add.l		%a0,%d1			# Increment
   4721 	mov.l		%d1,EXC_DREGS+0xc(%a6)	# Save incr value
   4722 	mov.l		%d0,%a0
   4723 	rts
   4724 
   4725 faddr_ind_p_a2:
   4726 	mov.l		%a2,%d0			# Get current a2
   4727 	mov.l		%d0,%d1
   4728 	add.l		%a0,%d1			# Increment
   4729 	mov.l		%d1,%a2			# Save incr value
   4730 	mov.l		%d0,%a0
   4731 	rts
   4732 
   4733 faddr_ind_p_a3:
   4734 	mov.l		%a3,%d0			# Get current a3
   4735 	mov.l		%d0,%d1
   4736 	add.l		%a0,%d1			# Increment
   4737 	mov.l		%d1,%a3			# Save incr value
   4738 	mov.l		%d0,%a0
   4739 	rts
   4740 
   4741 faddr_ind_p_a4:
   4742 	mov.l		%a4,%d0			# Get current a4
   4743 	mov.l		%d0,%d1
   4744 	add.l		%a0,%d1			# Increment
   4745 	mov.l		%d1,%a4			# Save incr value
   4746 	mov.l		%d0,%a0
   4747 	rts
   4748 
   4749 faddr_ind_p_a5:
   4750 	mov.l		%a5,%d0			# Get current a5
   4751 	mov.l		%d0,%d1
   4752 	add.l		%a0,%d1			# Increment
   4753 	mov.l		%d1,%a5			# Save incr value
   4754 	mov.l		%d0,%a0
   4755 	rts
   4756 
   4757 faddr_ind_p_a6:
   4758 	mov.l		(%a6),%d0		# Get current a6
   4759 	mov.l		%d0,%d1
   4760 	add.l		%a0,%d1			# Increment
   4761 	mov.l		%d1,(%a6)		# Save incr value
   4762 	mov.l		%d0,%a0
   4763 	rts
   4764 
   4765 faddr_ind_p_a7:
   4766 	mov.b		&mia7_flg,SPCOND_FLG(%a6) # set "special case" flag
   4767 
   4768 	mov.l		EXC_A7(%a6),%d0		# Get current a7
   4769 	mov.l		%d0,%d1
   4770 	add.l		%a0,%d1			# Increment
   4771 	mov.l		%d1,EXC_A7(%a6)		# Save incr value
   4772 	mov.l		%d0,%a0
   4773 	rts
   4774 
   4775 ####################################################
   4776 # Address register indirect w/ predecrement: -(An) #
   4777 ####################################################
   4778 faddr_ind_m_a0:
   4779 	mov.l		EXC_DREGS+0x8(%a6),%d0	# Get current a0
   4780 	sub.l		%a0,%d0			# Decrement
   4781 	mov.l		%d0,EXC_DREGS+0x8(%a6)	# Save decr value
   4782 	mov.l		%d0,%a0
   4783 	rts
   4784 
   4785 faddr_ind_m_a1:
   4786 	mov.l		EXC_DREGS+0xc(%a6),%d0	# Get current a1
   4787 	sub.l		%a0,%d0			# Decrement
   4788 	mov.l		%d0,EXC_DREGS+0xc(%a6)	# Save decr value
   4789 	mov.l		%d0,%a0
   4790 	rts
   4791 
   4792 faddr_ind_m_a2:
   4793 	mov.l		%a2,%d0			# Get current a2
   4794 	sub.l		%a0,%d0			# Decrement
   4795 	mov.l		%d0,%a2			# Save decr value
   4796 	mov.l		%d0,%a0
   4797 	rts
   4798 
   4799 faddr_ind_m_a3:
   4800 	mov.l		%a3,%d0			# Get current a3
   4801 	sub.l		%a0,%d0			# Decrement
   4802 	mov.l		%d0,%a3			# Save decr value
   4803 	mov.l		%d0,%a0
   4804 	rts
   4805 
   4806 faddr_ind_m_a4:
   4807 	mov.l		%a4,%d0			# Get current a4
   4808 	sub.l		%a0,%d0			# Decrement
   4809 	mov.l		%d0,%a4			# Save decr value
   4810 	mov.l		%d0,%a0
   4811 	rts
   4812 
   4813 faddr_ind_m_a5:
   4814 	mov.l		%a5,%d0			# Get current a5
   4815 	sub.l		%a0,%d0			# Decrement
   4816 	mov.l		%d0,%a5			# Save decr value
   4817 	mov.l		%d0,%a0
   4818 	rts
   4819 
   4820 faddr_ind_m_a6:
   4821 	mov.l		(%a6),%d0		# Get current a6
   4822 	sub.l		%a0,%d0			# Decrement
   4823 	mov.l		%d0,(%a6)		# Save decr value
   4824 	mov.l		%d0,%a0
   4825 	rts
   4826 
   4827 faddr_ind_m_a7:
   4828 	mov.b		&mda7_flg,SPCOND_FLG(%a6) # set "special case" flag
   4829 
   4830 	mov.l		EXC_A7(%a6),%d0		# Get current a7
   4831 	sub.l		%a0,%d0			# Decrement
   4832 	mov.l		%d0,EXC_A7(%a6)		# Save decr value
   4833 	mov.l		%d0,%a0
   4834 	rts
   4835 
   4836 ########################################################
   4837 # Address register indirect w/ displacement: (d16, An) #
   4838 ########################################################
   4839 faddr_ind_disp_a0:
   4840 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   4841 	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
   4842 	bsr.l		_imem_read_word
   4843 
   4844 	tst.l		%d1			# did ifetch fail?
   4845 	bne.l		iea_iacc		# yes
   4846 
   4847 	mov.w		%d0,%a0			# sign extend displacement
   4848 
   4849 	add.l		EXC_DREGS+0x8(%a6),%a0	# a0 + d16
   4850 	rts
   4851 
   4852 faddr_ind_disp_a1:
   4853 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   4854 	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
   4855 	bsr.l		_imem_read_word
   4856 
   4857 	tst.l		%d1			# did ifetch fail?
   4858 	bne.l		iea_iacc		# yes
   4859 
   4860 	mov.w		%d0,%a0			# sign extend displacement
   4861 
   4862 	add.l		EXC_DREGS+0xc(%a6),%a0	# a1 + d16
   4863 	rts
   4864 
   4865 faddr_ind_disp_a2:
   4866 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   4867 	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
   4868 	bsr.l		_imem_read_word
   4869 
   4870 	tst.l		%d1			# did ifetch fail?
   4871 	bne.l		iea_iacc		# yes
   4872 
   4873 	mov.w		%d0,%a0			# sign extend displacement
   4874 
   4875 	add.l		%a2,%a0			# a2 + d16
   4876 	rts
   4877 
   4878 faddr_ind_disp_a3:
   4879 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   4880 	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
   4881 	bsr.l		_imem_read_word
   4882 
   4883 	tst.l		%d1			# did ifetch fail?
   4884 	bne.l		iea_iacc		# yes
   4885 
   4886 	mov.w		%d0,%a0			# sign extend displacement
   4887 
   4888 	add.l		%a3,%a0			# a3 + d16
   4889 	rts
   4890 
   4891 faddr_ind_disp_a4:
   4892 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   4893 	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
   4894 	bsr.l		_imem_read_word
   4895 
   4896 	tst.l		%d1			# did ifetch fail?
   4897 	bne.l		iea_iacc		# yes
   4898 
   4899 	mov.w		%d0,%a0			# sign extend displacement
   4900 
   4901 	add.l		%a4,%a0			# a4 + d16
   4902 	rts
   4903 
   4904 faddr_ind_disp_a5:
   4905 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   4906 	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
   4907 	bsr.l		_imem_read_word
   4908 
   4909 	tst.l		%d1			# did ifetch fail?
   4910 	bne.l		iea_iacc		# yes
   4911 
   4912 	mov.w		%d0,%a0			# sign extend displacement
   4913 
   4914 	add.l		%a5,%a0			# a5 + d16
   4915 	rts
   4916 
   4917 faddr_ind_disp_a6:
   4918 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   4919 	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
   4920 	bsr.l		_imem_read_word
   4921 
   4922 	tst.l		%d1			# did ifetch fail?
   4923 	bne.l		iea_iacc		# yes
   4924 
   4925 	mov.w		%d0,%a0			# sign extend displacement
   4926 
   4927 	add.l		(%a6),%a0		# a6 + d16
   4928 	rts
   4929 
   4930 faddr_ind_disp_a7:
   4931 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   4932 	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
   4933 	bsr.l		_imem_read_word
   4934 
   4935 	tst.l		%d1			# did ifetch fail?
   4936 	bne.l		iea_iacc		# yes
   4937 
   4938 	mov.w		%d0,%a0			# sign extend displacement
   4939 
   4940 	add.l		EXC_A7(%a6),%a0		# a7 + d16
   4941 	rts
   4942 
   4943 ########################################################################
   4944 # Address register indirect w/ index(8-bit displacement): (d8, An, Xn) #
   4945 #    "       "         "    w/   "  (base displacement): (bd, An, Xn)  #
   4946 # Memory indirect postindexed: ([bd, An], Xn, od)		       #
   4947 # Memory indirect preindexed: ([bd, An, Xn], od)		       #
   4948 ########################################################################
   4949 faddr_ind_ext:
   4950 	addq.l		&0x8,%d1
   4951 	bsr.l		fetch_dreg		# fetch base areg
   4952 	mov.l		%d0,-(%sp)
   4953 
   4954 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   4955 	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
   4956 	bsr.l		_imem_read_word		# fetch extword in d0
   4957 
   4958 	tst.l		%d1			# did ifetch fail?
   4959 	bne.l		iea_iacc		# yes
   4960 
   4961 	mov.l		(%sp)+,%a0
   4962 
   4963 	btst		&0x8,%d0
   4964 	bne.w		fcalc_mem_ind
   4965 
   4966 	mov.l		%d0,L_SCR1(%a6)		# hold opword
   4967 
   4968 	mov.l		%d0,%d1
   4969 	rol.w		&0x4,%d1
   4970 	andi.w		&0xf,%d1		# extract index regno
   4971 
   4972 # count on fetch_dreg() not to alter a0...
   4973 	bsr.l		fetch_dreg		# fetch index
   4974 
   4975 	mov.l		%d2,-(%sp)		# save d2
   4976 	mov.l		L_SCR1(%a6),%d2		# fetch opword
   4977 
   4978 	btst		&0xb,%d2		# is it word or long?
   4979 	bne.b		faii8_long
   4980 	ext.l		%d0			# sign extend word index
   4981 faii8_long:
   4982 	mov.l		%d2,%d1
   4983 	rol.w		&0x7,%d1
   4984 	andi.l		&0x3,%d1		# extract scale value
   4985 
   4986 	lsl.l		%d1,%d0			# shift index by scale
   4987 
   4988 	extb.l		%d2			# sign extend displacement
   4989 	add.l		%d2,%d0			# index + disp
   4990 	add.l		%d0,%a0			# An + (index + disp)
   4991 
   4992 	mov.l		(%sp)+,%d2		# restore old d2
   4993 	rts
   4994 
   4995 ###########################
   4996 # Absolute short: (XXX).W #
   4997 ###########################
   4998 fabs_short:
   4999 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   5000 	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
   5001 	bsr.l		_imem_read_word		# fetch short address
   5002 
   5003 	tst.l		%d1			# did ifetch fail?
   5004 	bne.l		iea_iacc		# yes
   5005 
   5006 	mov.w		%d0,%a0			# return <ea> in a0
   5007 	rts
   5008 
   5009 ##########################
   5010 # Absolute long: (XXX).L #
   5011 ##########################
   5012 fabs_long:
   5013 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   5014 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
   5015 	bsr.l		_imem_read_long		# fetch long address
   5016 
   5017 	tst.l		%d1			# did ifetch fail?
   5018 	bne.l		iea_iacc		# yes
   5019 
   5020 	mov.l		%d0,%a0			# return <ea> in a0
   5021 	rts
   5022 
   5023 #######################################################
   5024 # Program counter indirect w/ displacement: (d16, PC) #
   5025 #######################################################
   5026 fpc_ind:
   5027 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   5028 	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
   5029 	bsr.l		_imem_read_word		# fetch word displacement
   5030 
   5031 	tst.l		%d1			# did ifetch fail?
   5032 	bne.l		iea_iacc		# yes
   5033 
   5034 	mov.w		%d0,%a0			# sign extend displacement
   5035 
   5036 	add.l		EXC_EXTWPTR(%a6),%a0	# pc + d16
   5037 
   5038 # _imem_read_word() increased the extwptr by 2. need to adjust here.
   5039 	subq.l		&0x2,%a0		# adjust <ea>
   5040 	rts
   5041 
   5042 ##########################################################
   5043 # PC indirect w/ index(8-bit displacement): (d8, PC, An) #
   5044 # "     "     w/   "  (base displacement): (bd, PC, An)  #
   5045 # PC memory indirect postindexed: ([bd, PC], Xn, od)     #
   5046 # PC memory indirect preindexed: ([bd, PC, Xn], od)      #
   5047 ##########################################################
   5048 fpc_ind_ext:
   5049 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   5050 	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
   5051 	bsr.l		_imem_read_word		# fetch ext word
   5052 
   5053 	tst.l		%d1			# did ifetch fail?
   5054 	bne.l		iea_iacc		# yes
   5055 
   5056 	mov.l		EXC_EXTWPTR(%a6),%a0	# put base in a0
   5057 	subq.l		&0x2,%a0		# adjust base
   5058 
   5059 	btst		&0x8,%d0		# is disp only 8 bits?
   5060 	bne.w		fcalc_mem_ind		# calc memory indirect
   5061 
   5062 	mov.l		%d0,L_SCR1(%a6)		# store opword
   5063 
   5064 	mov.l		%d0,%d1			# make extword copy
   5065 	rol.w		&0x4,%d1		# rotate reg num into place
   5066 	andi.w		&0xf,%d1		# extract register number
   5067 
   5068 # count on fetch_dreg() not to alter a0...
   5069 	bsr.l		fetch_dreg		# fetch index
   5070 
   5071 	mov.l		%d2,-(%sp)		# save d2
   5072 	mov.l		L_SCR1(%a6),%d2		# fetch opword
   5073 
   5074 	btst		&0xb,%d2		# is index word or long?
   5075 	bne.b		fpii8_long		# long
   5076 	ext.l		%d0			# sign extend word index
   5077 fpii8_long:
   5078 	mov.l		%d2,%d1
   5079 	rol.w		&0x7,%d1		# rotate scale value into place
   5080 	andi.l		&0x3,%d1		# extract scale value
   5081 
   5082 	lsl.l		%d1,%d0			# shift index by scale
   5083 
   5084 	extb.l		%d2			# sign extend displacement
   5085 	add.l		%d2,%d0			# disp + index
   5086 	add.l		%d0,%a0			# An + (index + disp)
   5087 
   5088 	mov.l		(%sp)+,%d2		# restore temp register
   5089 	rts
   5090 
   5091 # d2 = index
   5092 # d3 = base
   5093 # d4 = od
   5094 # d5 = extword
   5095 fcalc_mem_ind:
   5096 	btst		&0x6,%d0		# is the index suppressed?
   5097 	beq.b		fcalc_index
   5098 
   5099 	movm.l		&0x3c00,-(%sp)		# save d2-d5
   5100 
   5101 	mov.l		%d0,%d5			# put extword in d5
   5102 	mov.l		%a0,%d3			# put base in d3
   5103 
   5104 	clr.l		%d2			# yes, so index = 0
   5105 	bra.b		fbase_supp_ck
   5106 
   5107 # index:
   5108 fcalc_index:
   5109 	mov.l		%d0,L_SCR1(%a6)		# save d0 (opword)
   5110 	bfextu		%d0{&16:&4},%d1		# fetch dreg index
   5111 	bsr.l		fetch_dreg
   5112 
   5113 	movm.l		&0x3c00,-(%sp)		# save d2-d5
   5114 	mov.l		%d0,%d2			# put index in d2
   5115 	mov.l		L_SCR1(%a6),%d5
   5116 	mov.l		%a0,%d3
   5117 
   5118 	btst		&0xb,%d5		# is index word or long?
   5119 	bne.b		fno_ext
   5120 	ext.l		%d2
   5121 
   5122 fno_ext:
   5123 	bfextu		%d5{&21:&2},%d0
   5124 	lsl.l		%d0,%d2
   5125 
   5126 # base address (passed as parameter in d3):
   5127 # we clear the value here if it should actually be suppressed.
   5128 fbase_supp_ck:
   5129 	btst		&0x7,%d5		# is the bd suppressed?
   5130 	beq.b		fno_base_sup
   5131 	clr.l		%d3
   5132 
   5133 # base displacement:
   5134 fno_base_sup:
   5135 	bfextu		%d5{&26:&2},%d0		# get bd size
   5136 #	beq.l		fmovm_error		# if (size == 0) it's reserved
   5137 
   5138 	cmpi.b	 	%d0,&0x2
   5139 	blt.b		fno_bd
   5140 	beq.b		fget_word_bd
   5141 
   5142 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   5143 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
   5144 	bsr.l		_imem_read_long
   5145 
   5146 	tst.l		%d1			# did ifetch fail?
   5147 	bne.l		fcea_iacc		# yes
   5148 
   5149 	bra.b		fchk_ind
   5150 
   5151 fget_word_bd:
   5152 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   5153 	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
   5154 	bsr.l		_imem_read_word
   5155 
   5156 	tst.l		%d1			# did ifetch fail?
   5157 	bne.l		fcea_iacc		# yes
   5158 
   5159 	ext.l		%d0			# sign extend bd
   5160 
   5161 fchk_ind:
   5162 	add.l		%d0,%d3			# base += bd
   5163 
   5164 # outer displacement:
   5165 fno_bd:
   5166 	bfextu		%d5{&30:&2},%d0		# is od suppressed?
   5167 	beq.w		faii_bd
   5168 
   5169 	cmpi.b	 	%d0,&0x2
   5170 	blt.b		fnull_od
   5171 	beq.b		fword_od
   5172 
   5173 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   5174 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
   5175 	bsr.l		_imem_read_long
   5176 
   5177 	tst.l		%d1			# did ifetch fail?
   5178 	bne.l		fcea_iacc		# yes
   5179 
   5180 	bra.b 		fadd_them
   5181 
   5182 fword_od:
   5183 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   5184 	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
   5185 	bsr.l		_imem_read_word
   5186 
   5187 	tst.l		%d1			# did ifetch fail?
   5188 	bne.l		fcea_iacc		# yes
   5189 
   5190 	ext.l		%d0			# sign extend od
   5191 	bra.b		fadd_them
   5192 
   5193 fnull_od:
   5194 	clr.l		%d0
   5195 
   5196 fadd_them:
   5197 	mov.l		%d0,%d4
   5198 
   5199 	btst		&0x2,%d5		# pre or post indexing?
   5200 	beq.b		fpre_indexed
   5201 
   5202 	mov.l		%d3,%a0
   5203 	bsr.l		_dmem_read_long
   5204 
   5205 	tst.l		%d1			# did dfetch fail?
   5206 	bne.w		fcea_err		# yes
   5207 
   5208 	add.l		%d2,%d0			# <ea> += index
   5209 	add.l		%d4,%d0			# <ea> += od
   5210 	bra.b		fdone_ea
   5211 
   5212 fpre_indexed:
   5213 	add.l		%d2,%d3			# preindexing
   5214 	mov.l		%d3,%a0
   5215 	bsr.l		_dmem_read_long
   5216 
   5217 	tst.l		%d1			# did dfetch fail?
   5218 	bne.w		fcea_err		# yes
   5219 
   5220 	add.l		%d4,%d0			# ea += od
   5221 	bra.b		fdone_ea
   5222 
   5223 faii_bd:
   5224 	add.l		%d2,%d3			# ea = (base + bd) + index
   5225 	mov.l		%d3,%d0
   5226 fdone_ea:
   5227 	mov.l		%d0,%a0
   5228 
   5229 	movm.l		(%sp)+,&0x003c		# restore d2-d5
   5230 	rts
   5231 
   5232 #########################################################
   5233 fcea_err:
   5234 	mov.l		%d3,%a0
   5235 
   5236 	movm.l		(%sp)+,&0x003c		# restore d2-d5
   5237 	mov.w		&0x0101,%d0
   5238 	bra.l		iea_dacc
   5239 
   5240 fcea_iacc:
   5241 	movm.l		(%sp)+,&0x003c		# restore d2-d5
   5242 	bra.l		iea_iacc
   5243 
   5244 fmovm_out_err:
   5245 	bsr.l		restore
   5246 	mov.w		&0x00e1,%d0
   5247 	bra.b		fmovm_err
   5248 
   5249 fmovm_in_err:
   5250 	bsr.l		restore
   5251 	mov.w		&0x0161,%d0
   5252 
   5253 fmovm_err:
   5254 	mov.l		L_SCR1(%a6),%a0
   5255 	bra.l		iea_dacc
   5256 
   5257 #########################################################################
   5258 # XDEF ****************************************************************	#
   5259 # 	fmovm_ctrl(): emulate fmovm.l of control registers instr	#
   5260 #									#
   5261 # XREF ****************************************************************	#
   5262 #	_imem_read_long() - read longword from memory			#
   5263 #	iea_iacc() - _imem_read_long() failed; error recovery		#
   5264 #									#
   5265 # INPUT ***************************************************************	#
   5266 #	None								#
   5267 # 									#
   5268 # OUTPUT **************************************************************	#
   5269 #	If _imem_read_long() doesn't fail:				#
   5270 #		USER_FPCR(a6)  = new FPCR value				#
   5271 #		USER_FPSR(a6)  = new FPSR value				#
   5272 #		USER_FPIAR(a6) = new FPIAR value			#
   5273 #									#
   5274 # ALGORITHM ***********************************************************	#
   5275 # 	Decode the instruction type by looking at the extension word 	#
   5276 # in order to see how many control registers to fetch from memory.	#
   5277 # Fetch them using _imem_read_long(). If this fetch fails, exit through	#
   5278 # the special access error exit handler iea_iacc().			#
   5279 #									#
   5280 # Instruction word decoding:						#
   5281 #									#
   5282 # 	fmovem.l #<data>, {FPIAR&|FPCR&|FPSR}				#
   5283 #									#
   5284 #		WORD1			WORD2				#
   5285 #	1111 0010 00 111100	100$ $$00 0000 0000			#
   5286 #									#
   5287 #	$$$ (100): FPCR							#
   5288 #	    (010): FPSR							#
   5289 #	    (001): FPIAR						#
   5290 #	    (000): FPIAR						#
   5291 #									#
   5292 #########################################################################
   5293 
   5294 	global		fmovm_ctrl
   5295 fmovm_ctrl:
   5296 	mov.b		EXC_EXTWORD(%a6),%d0	# fetch reg select bits
   5297 	cmpi.b		%d0,&0x9c		# fpcr & fpsr & fpiar ?
   5298 	beq.w		fctrl_in_7		# yes
   5299 	cmpi.b		%d0,&0x98		# fpcr & fpsr ?
   5300 	beq.w		fctrl_in_6		# yes
   5301 	cmpi.b		%d0,&0x94		# fpcr & fpiar ?
   5302 	beq.b		fctrl_in_5		# yes
   5303 
   5304 # fmovem.l #<data>, fpsr/fpiar
   5305 fctrl_in_3:
   5306 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   5307 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
   5308 	bsr.l		_imem_read_long		# fetch FPSR from mem
   5309 
   5310 	tst.l		%d1			# did ifetch fail?
   5311 	bne.l		iea_iacc		# yes
   5312 
   5313 	mov.l		%d0,USER_FPSR(%a6)	# store new FPSR to stack
   5314 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   5315 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
   5316 	bsr.l		_imem_read_long		# fetch FPIAR from mem
   5317 
   5318 	tst.l		%d1			# did ifetch fail?
   5319 	bne.l		iea_iacc		# yes
   5320 
   5321 	mov.l		%d0,USER_FPIAR(%a6)	# store new FPIAR to stack
   5322 	rts
   5323 
   5324 # fmovem.l #<data>, fpcr/fpiar
   5325 fctrl_in_5:
   5326 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   5327 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
   5328 	bsr.l		_imem_read_long		# fetch FPCR from mem
   5329 
   5330 	tst.l		%d1			# did ifetch fail?
   5331 	bne.l		iea_iacc		# yes
   5332 
   5333 	mov.l		%d0,USER_FPCR(%a6)	# store new FPCR to stack
   5334 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   5335 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
   5336 	bsr.l		_imem_read_long		# fetch FPIAR from mem
   5337 
   5338 	tst.l		%d1			# did ifetch fail?
   5339 	bne.l		iea_iacc		# yes
   5340 
   5341 	mov.l		%d0,USER_FPIAR(%a6)	# store new FPIAR to stack
   5342 	rts
   5343 
   5344 # fmovem.l #<data>, fpcr/fpsr
   5345 fctrl_in_6:
   5346 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   5347 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
   5348 	bsr.l		_imem_read_long		# fetch FPCR from mem
   5349 
   5350 	tst.l		%d1			# did ifetch fail?
   5351 	bne.l		iea_iacc		# yes
   5352 
   5353 	mov.l		%d0,USER_FPCR(%a6)	# store new FPCR to mem
   5354 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   5355 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
   5356 	bsr.l		_imem_read_long		# fetch FPSR from mem
   5357 
   5358 	tst.l		%d1			# did ifetch fail?
   5359 	bne.l		iea_iacc		# yes
   5360 
   5361 	mov.l		%d0,USER_FPSR(%a6)	# store new FPSR to mem
   5362 	rts
   5363 
   5364 # fmovem.l #<data>, fpcr/fpsr/fpiar
   5365 fctrl_in_7:
   5366 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   5367 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
   5368 	bsr.l		_imem_read_long		# fetch FPCR from mem
   5369 
   5370 	tst.l		%d1			# did ifetch fail?
   5371 	bne.l		iea_iacc		# yes
   5372 
   5373 	mov.l		%d0,USER_FPCR(%a6)	# store new FPCR to mem
   5374 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   5375 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
   5376 	bsr.l		_imem_read_long		# fetch FPSR from mem
   5377 
   5378 	tst.l		%d1			# did ifetch fail?
   5379 	bne.l		iea_iacc		# yes
   5380 
   5381 	mov.l		%d0,USER_FPSR(%a6)	# store new FPSR to mem
   5382 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   5383 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
   5384 	bsr.l		_imem_read_long		# fetch FPIAR from mem
   5385 
   5386 	tst.l		%d1			# did ifetch fail?
   5387 	bne.l		iea_iacc		# yes
   5388 
   5389 	mov.l		%d0,USER_FPIAR(%a6)	# store new FPIAR to mem
   5390 	rts
   5391 
   5392 ##########################################################################
   5393 
   5394 #########################################################################
   5395 # XDEF ****************************************************************	#
   5396 #	addsub_scaler2(): scale inputs to fadd/fsub such that no	#
   5397 #			  OVFL/UNFL exceptions will result		#
   5398 #									#
   5399 # XREF ****************************************************************	#
   5400 #	norm() - normalize mantissa after adjusting exponent		#
   5401 #									#
   5402 # INPUT ***************************************************************	#
   5403 #	FP_SRC(a6) = fp op1(src)					#
   5404 #	FP_DST(a6) = fp op2(dst)					#
   5405 # 									#
   5406 # OUTPUT **************************************************************	#
   5407 #	FP_SRC(a6) = fp op1 scaled(src)					#
   5408 #	FP_DST(a6) = fp op2 scaled(dst)					#
   5409 #	d0         = scale amount					#
   5410 #									#
   5411 # ALGORITHM ***********************************************************	#
   5412 # 	If the DST exponent is > the SRC exponent, set the DST exponent	#
   5413 # equal to 0x3fff and scale the SRC exponent by the value that the	#
   5414 # DST exponent was scaled by. If the SRC exponent is greater or equal,	#
   5415 # do the opposite. Return this scale factor in d0.			#
   5416 #	If the two exponents differ by > the number of mantissa bits	#
   5417 # plus two, then set the smallest exponent to a very small value as a	#
   5418 # quick shortcut.							#
   5419 #									#
   5420 #########################################################################
   5421 
   5422 	global		addsub_scaler2
   5423 addsub_scaler2:
   5424 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
   5425 	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
   5426 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
   5427 	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
   5428 	mov.w		SRC_EX(%a0),%d0
   5429 	mov.w		DST_EX(%a1),%d1
   5430 	mov.w		%d0,FP_SCR0_EX(%a6)
   5431 	mov.w		%d1,FP_SCR1_EX(%a6)
   5432 
   5433 	andi.w		&0x7fff,%d0
   5434 	andi.w		&0x7fff,%d1
   5435 	mov.w		%d0,L_SCR1(%a6)		# store src exponent
   5436 	mov.w		%d1,2+L_SCR1(%a6)	# store dst exponent
   5437 
   5438 	cmp.w		%d0, %d1		# is src exp >= dst exp?
   5439 	bge.l		src_exp_ge2
   5440 
   5441 # dst exp is >  src exp; scale dst to exp = 0x3fff
   5442 dst_exp_gt2:
   5443 	bsr.l		scale_to_zero_dst
   5444 	mov.l		%d0,-(%sp)		# save scale factor
   5445 
   5446 	cmpi.b		STAG(%a6),&DENORM	# is dst denormalized?
   5447 	bne.b		cmpexp12
   5448 
   5449 	lea		FP_SCR0(%a6),%a0
   5450 	bsr.l		norm			# normalize the denorm; result is new exp
   5451 	neg.w		%d0			# new exp = -(shft val)
   5452 	mov.w		%d0,L_SCR1(%a6)		# inset new exp
   5453 
   5454 cmpexp12:
   5455 	mov.w		2+L_SCR1(%a6),%d0
   5456 	subi.w		&mantissalen+2,%d0	# subtract mantissalen+2 from larger exp
   5457 
   5458 	cmp.w		%d0,L_SCR1(%a6)		# is difference >= len(mantissa)+2?
   5459 	bge.b		quick_scale12
   5460 
   5461 	mov.w		L_SCR1(%a6),%d0
   5462 	add.w		0x2(%sp),%d0		# scale src exponent by scale factor
   5463 	mov.w		FP_SCR0_EX(%a6),%d1
   5464 	and.w		&0x8000,%d1
   5465 	or.w		%d1,%d0			# concat {sgn,new exp}
   5466 	mov.w		%d0,FP_SCR0_EX(%a6)	# insert new dst exponent
   5467 
   5468 	mov.l		(%sp)+,%d0		# return SCALE factor
   5469 	rts
   5470 
   5471 quick_scale12:
   5472 	andi.w		&0x8000,FP_SCR0_EX(%a6)	# zero src exponent
   5473 	bset		&0x0,1+FP_SCR0_EX(%a6)	# set exp = 1
   5474 
   5475 	mov.l		(%sp)+,%d0		# return SCALE factor
   5476 	rts
   5477 
   5478 # src exp is >= dst exp; scale src to exp = 0x3fff
   5479 src_exp_ge2:
   5480 	bsr.l		scale_to_zero_src
   5481 	mov.l		%d0,-(%sp)		# save scale factor
   5482 
   5483 	cmpi.b		DTAG(%a6),&DENORM	# is dst denormalized?
   5484 	bne.b		cmpexp22
   5485 	lea		FP_SCR1(%a6),%a0
   5486 	bsr.l		norm			# normalize the denorm; result is new exp
   5487 	neg.w		%d0			# new exp = -(shft val)
   5488 	mov.w		%d0,2+L_SCR1(%a6)	# inset new exp
   5489 
   5490 cmpexp22:
   5491 	mov.w		L_SCR1(%a6),%d0
   5492 	subi.w		&mantissalen+2,%d0	# subtract mantissalen+2 from larger exp
   5493 
   5494 	cmp.w		%d0,2+L_SCR1(%a6)	# is difference >= len(mantissa)+2?
   5495 	bge.b		quick_scale22
   5496 
   5497 	mov.w		2+L_SCR1(%a6),%d0
   5498 	add.w		0x2(%sp),%d0		# scale dst exponent by scale factor
   5499 	mov.w		FP_SCR1_EX(%a6),%d1
   5500 	andi.w		&0x8000,%d1
   5501 	or.w		%d1,%d0			# concat {sgn,new exp}
   5502 	mov.w		%d0,FP_SCR1_EX(%a6)	# insert new dst exponent
   5503 
   5504 	mov.l		(%sp)+,%d0		# return SCALE factor
   5505 	rts
   5506 
   5507 quick_scale22:
   5508 	andi.w		&0x8000,FP_SCR1_EX(%a6)	# zero dst exponent
   5509 	bset		&0x0,1+FP_SCR1_EX(%a6)	# set exp = 1
   5510 
   5511 	mov.l		(%sp)+,%d0		# return SCALE factor
   5512 	rts
   5513 
   5514 ##########################################################################
   5515 
   5516 #########################################################################
   5517 # XDEF ****************************************************************	#
   5518 #	scale_to_zero_src(): scale the exponent of extended precision	#
   5519 #			     value at FP_SCR0(a6).			#
   5520 #									#
   5521 # XREF ****************************************************************	#
   5522 #	norm() - normalize the mantissa if the operand was a DENORM	#
   5523 #									#
   5524 # INPUT ***************************************************************	#
   5525 #	FP_SCR0(a6) = extended precision operand to be scaled		#
   5526 # 									#
   5527 # OUTPUT **************************************************************	#
   5528 #	FP_SCR0(a6) = scaled extended precision operand			#
   5529 #	d0	    = scale value					#
   5530 #									#
   5531 # ALGORITHM ***********************************************************	#
   5532 # 	Set the exponent of the input operand to 0x3fff. Save the value	#
   5533 # of the difference between the original and new exponent. Then, 	#
   5534 # normalize the operand if it was a DENORM. Add this normalization	#
   5535 # value to the previous value. Return the result.			#
   5536 #									#
   5537 #########################################################################
   5538 
   5539 	global		scale_to_zero_src
   5540 scale_to_zero_src:
   5541 	mov.w		FP_SCR0_EX(%a6),%d1	# extract operand's {sgn,exp}
   5542 	mov.w		%d1,%d0			# make a copy
   5543 
   5544 	andi.l		&0x7fff,%d1		# extract operand's exponent
   5545 
   5546 	andi.w		&0x8000,%d0		# extract operand's sgn
   5547 	or.w		&0x3fff,%d0		# insert new operand's exponent(=0)
   5548 
   5549 	mov.w		%d0,FP_SCR0_EX(%a6)	# insert biased exponent
   5550 
   5551 	cmpi.b		STAG(%a6),&DENORM	# is operand normalized?
   5552 	beq.b		stzs_denorm		# normalize the DENORM
   5553 
   5554 stzs_norm:
   5555 	mov.l		&0x3fff,%d0
   5556 	sub.l		%d1,%d0			# scale = BIAS + (-exp)
   5557 
   5558 	rts
   5559 
   5560 stzs_denorm:
   5561 	lea		FP_SCR0(%a6),%a0	# pass ptr to src op
   5562 	bsr.l		norm			# normalize denorm
   5563 	neg.l		%d0			# new exponent = -(shft val)
   5564 	mov.l		%d0,%d1			# prepare for op_norm call
   5565 	bra.b		stzs_norm		# finish scaling
   5566 
   5567 ###
   5568 
   5569 #########################################################################
   5570 # XDEF ****************************************************************	#
   5571 #	scale_sqrt(): scale the input operand exponent so a subsequent	#
   5572 #		      fsqrt operation won't take an exception.		#
   5573 #									#
   5574 # XREF ****************************************************************	#
   5575 #	norm() - normalize the mantissa if the operand was a DENORM	#
   5576 #									#
   5577 # INPUT ***************************************************************	#
   5578 #	FP_SCR0(a6) = extended precision operand to be scaled		#
   5579 # 									#
   5580 # OUTPUT **************************************************************	#
   5581 #	FP_SCR0(a6) = scaled extended precision operand			#
   5582 #	d0	    = scale value					#
   5583 #									#
   5584 # ALGORITHM ***********************************************************	#
   5585 #	If the input operand is a DENORM, normalize it.			#
   5586 # 	If the exponent of the input operand is even, set the exponent	#
   5587 # to 0x3ffe and return a scale factor of "(exp-0x3ffe)/2". If the 	#
   5588 # exponent of the input operand is off, set the exponent to ox3fff and	#
   5589 # return a scale factor of "(exp-0x3fff)/2". 				#
   5590 #									#
   5591 #########################################################################
   5592 
   5593 	global		scale_sqrt
   5594 scale_sqrt:
   5595 	cmpi.b		STAG(%a6),&DENORM	# is operand normalized?
   5596 	beq.b		ss_denorm		# normalize the DENORM
   5597 
   5598 	mov.w		FP_SCR0_EX(%a6),%d1	# extract operand's {sgn,exp}
   5599 	andi.l		&0x7fff,%d1		# extract operand's exponent
   5600 
   5601 	andi.w		&0x8000,FP_SCR0_EX(%a6)	# extract operand's sgn
   5602 
   5603 	btst		&0x0,%d1		# is exp even or odd?
   5604 	beq.b		ss_norm_even
   5605 
   5606 	ori.w		&0x3fff,FP_SCR0_EX(%a6)	# insert new operand's exponent(=0)
   5607 
   5608 	mov.l		&0x3fff,%d0
   5609 	sub.l		%d1,%d0			# scale = BIAS + (-exp)
   5610 	asr.l		&0x1,%d0		# divide scale factor by 2
   5611 	rts
   5612 
   5613 ss_norm_even:
   5614 	ori.w		&0x3ffe,FP_SCR0_EX(%a6)	# insert new operand's exponent(=0)
   5615 
   5616 	mov.l		&0x3ffe,%d0
   5617 	sub.l		%d1,%d0			# scale = BIAS + (-exp)
   5618 	asr.l		&0x1,%d0		# divide scale factor by 2
   5619 	rts
   5620 
   5621 ss_denorm:
   5622 	lea		FP_SCR0(%a6),%a0	# pass ptr to src op
   5623 	bsr.l		norm			# normalize denorm
   5624 
   5625 	btst		&0x0,%d0		# is exp even or odd?
   5626 	beq.b		ss_denorm_even
   5627 
   5628 	ori.w		&0x3fff,FP_SCR0_EX(%a6)	# insert new operand's exponent(=0)
   5629 
   5630 	add.l		&0x3fff,%d0
   5631 	asr.l		&0x1,%d0		# divide scale factor by 2
   5632 	rts
   5633 
   5634 ss_denorm_even:
   5635 	ori.w		&0x3ffe,FP_SCR0_EX(%a6)	# insert new operand's exponent(=0)
   5636 
   5637 	add.l		&0x3ffe,%d0
   5638 	asr.l		&0x1,%d0		# divide scale factor by 2
   5639 	rts
   5640 
   5641 ###
   5642 
   5643 #########################################################################
   5644 # XDEF ****************************************************************	#
   5645 #	scale_to_zero_dst(): scale the exponent of extended precision	#
   5646 #			     value at FP_SCR1(a6).			#
   5647 #									#
   5648 # XREF ****************************************************************	#
   5649 #	norm() - normalize the mantissa if the operand was a DENORM	#
   5650 #									#
   5651 # INPUT ***************************************************************	#
   5652 #	FP_SCR1(a6) = extended precision operand to be scaled		#
   5653 # 									#
   5654 # OUTPUT **************************************************************	#
   5655 #	FP_SCR1(a6) = scaled extended precision operand			#
   5656 #	d0	    = scale value					#
   5657 #									#
   5658 # ALGORITHM ***********************************************************	#
   5659 # 	Set the exponent of the input operand to 0x3fff. Save the value	#
   5660 # of the difference between the original and new exponent. Then, 	#
   5661 # normalize the operand if it was a DENORM. Add this normalization	#
   5662 # value to the previous value. Return the result.			#
   5663 #									#
   5664 #########################################################################
   5665 
   5666 	global		scale_to_zero_dst
   5667 scale_to_zero_dst:
   5668 	mov.w		FP_SCR1_EX(%a6),%d1	# extract operand's {sgn,exp}
   5669 	mov.w		%d1,%d0			# make a copy
   5670 
   5671 	andi.l		&0x7fff,%d1		# extract operand's exponent
   5672 
   5673 	andi.w		&0x8000,%d0		# extract operand's sgn
   5674 	or.w		&0x3fff,%d0		# insert new operand's exponent(=0)
   5675 
   5676 	mov.w		%d0,FP_SCR1_EX(%a6)	# insert biased exponent
   5677 
   5678 	cmpi.b		DTAG(%a6),&DENORM	# is operand normalized?
   5679 	beq.b		stzd_denorm		# normalize the DENORM
   5680 
   5681 stzd_norm:
   5682 	mov.l		&0x3fff,%d0
   5683 	sub.l		%d1,%d0			# scale = BIAS + (-exp)
   5684 	rts
   5685 
   5686 stzd_denorm:
   5687 	lea		FP_SCR1(%a6),%a0	# pass ptr to dst op
   5688 	bsr.l		norm			# normalize denorm
   5689 	neg.l		%d0			# new exponent = -(shft val)
   5690 	mov.l		%d0,%d1			# prepare for op_norm call
   5691 	bra.b		stzd_norm		# finish scaling
   5692 
   5693 ##########################################################################
   5694 
   5695 #########################################################################
   5696 # XDEF ****************************************************************	#
   5697 #	res_qnan(): return default result w/ QNAN operand for dyadic	#
   5698 #	res_snan(): return default result w/ SNAN operand for dyadic	#
   5699 #	res_qnan_1op(): return dflt result w/ QNAN operand for monadic	#
   5700 #	res_snan_1op(): return dflt result w/ SNAN operand for monadic	#
   5701 #									#
   5702 # XREF ****************************************************************	#
   5703 #	None								#
   5704 #									#
   5705 # INPUT ***************************************************************	#
   5706 #	FP_SRC(a6) = pointer to extended precision src operand		#
   5707 #	FP_DST(a6) = pointer to extended precision dst operand		#
   5708 # 									#
   5709 # OUTPUT **************************************************************	#
   5710 #	fp0 = default result						#
   5711 #									#
   5712 # ALGORITHM ***********************************************************	#
   5713 # 	If either operand (but not both operands) of an operation is a	#
   5714 # nonsignalling NAN, then that NAN is returned as the result. If both	#
   5715 # operands are nonsignalling NANs, then the destination operand 	#
   5716 # nonsignalling NAN is returned as the result.				#
   5717 # 	If either operand to an operation is a signalling NAN (SNAN),	#
   5718 # then, the SNAN bit is set in the FPSR EXC byte. If the SNAN trap	#
   5719 # enable bit is set in the FPCR, then the trap is taken and the 	#
   5720 # destination is not modified. If the SNAN trap enable bit is not set,	#
   5721 # then the SNAN is converted to a nonsignalling NAN (by setting the 	#
   5722 # SNAN bit in the operand to one), and the operation continues as 	#
   5723 # described in the preceding paragraph, for nonsignalling NANs.		#
   5724 #	Make sure the appropriate FPSR bits are set before exiting.	#
   5725 #									#
   5726 #########################################################################
   5727 
   5728 	global		res_qnan
   5729 	global		res_snan
   5730 res_qnan:
   5731 res_snan:
   5732 	cmp.b		DTAG(%a6), &SNAN	# is the dst an SNAN?
   5733 	beq.b		dst_snan2
   5734 	cmp.b		DTAG(%a6), &QNAN	# is the dst a  QNAN?
   5735 	beq.b		dst_qnan2
   5736 src_nan:
   5737 	cmp.b		STAG(%a6), &QNAN
   5738 	beq.b		src_qnan2
   5739 	global		res_snan_1op
   5740 res_snan_1op:
   5741 src_snan2:
   5742 	bset		&0x6, FP_SRC_HI(%a6)	# set SNAN bit
   5743 	or.l		&nan_mask+aiop_mask+snan_mask, USER_FPSR(%a6)
   5744 	lea		FP_SRC(%a6), %a0
   5745 	bra.b		nan_comp
   5746 	global		res_qnan_1op
   5747 res_qnan_1op:
   5748 src_qnan2:
   5749 	or.l		&nan_mask, USER_FPSR(%a6)
   5750 	lea		FP_SRC(%a6), %a0
   5751 	bra.b		nan_comp
   5752 dst_snan2:
   5753 	or.l		&nan_mask+aiop_mask+snan_mask, USER_FPSR(%a6)
   5754 	bset		&0x6, FP_DST_HI(%a6)	# set SNAN bit
   5755 	lea		FP_DST(%a6), %a0
   5756 	bra.b		nan_comp
   5757 dst_qnan2:
   5758 	lea		FP_DST(%a6), %a0
   5759 	cmp.b		STAG(%a6), &SNAN
   5760 	bne		nan_done
   5761 	or.l		&aiop_mask+snan_mask, USER_FPSR(%a6)
   5762 nan_done:
   5763 	or.l		&nan_mask, USER_FPSR(%a6)
   5764 nan_comp:
   5765 	btst		&0x7, FTEMP_EX(%a0)	# is NAN neg?
   5766 	beq.b		nan_not_neg
   5767 	or.l		&neg_mask, USER_FPSR(%a6)
   5768 nan_not_neg:
   5769 	fmovm.x		(%a0), &0x80
   5770 	rts
   5771 
   5772 #########################################################################
   5773 # XDEF ****************************************************************	#
   5774 # 	res_operr(): return default result during operand error		#
   5775 #									#
   5776 # XREF ****************************************************************	#
   5777 #	None								#
   5778 #									#
   5779 # INPUT ***************************************************************	#
   5780 #	None								#
   5781 # 									#
   5782 # OUTPUT **************************************************************	#
   5783 #	fp0 = default operand error result				#
   5784 #									#
   5785 # ALGORITHM ***********************************************************	#
   5786 #	An nonsignalling NAN is returned as the default result when	#
   5787 # an operand error occurs for the following cases:			#
   5788 #									#
   5789 # 	Multiply: (Infinity x Zero)					#
   5790 # 	Divide  : (Zero / Zero) || (Infinity / Infinity)		#
   5791 #									#
   5792 #########################################################################
   5793 
   5794 	global		res_operr
   5795 res_operr:
   5796 	or.l		&nan_mask+operr_mask+aiop_mask, USER_FPSR(%a6)
   5797 	fmovm.x		nan_return(%pc), &0x80
   5798 	rts
   5799 
   5800 nan_return:
   5801 	long		0x7fff0000, 0xffffffff, 0xffffffff
   5802 
   5803 #########################################################################
   5804 # XDEF ****************************************************************	#
   5805 # 	_denorm(): denormalize an intermediate result			#
   5806 #									#
   5807 # XREF ****************************************************************	#
   5808 #	None								#
   5809 #									#
   5810 # INPUT *************************************************************** #
   5811 #	a0 = points to the operand to be denormalized			#
   5812 #		(in the internal extended format)			#
   5813 #		 							#
   5814 #	d0 = rounding precision						#
   5815 #									#
   5816 # OUTPUT **************************************************************	#
   5817 #	a0 = pointer to the denormalized result				#
   5818 #		(in the internal extended format)			#
   5819 #									#
   5820 #	d0 = guard,round,sticky						#
   5821 #									#
   5822 # ALGORITHM ***********************************************************	#
   5823 # 	According to the exponent underflow threshold for the given	#
   5824 # precision, shift the mantissa bits to the right in order raise the	#
   5825 # exponent of the operand to the threshold value. While shifting the 	#
   5826 # mantissa bits right, maintain the value of the guard, round, and 	#
   5827 # sticky bits.								#
   5828 # other notes:								#
   5829 #	(1) _denorm() is called by the underflow routines		#
   5830 #	(2) _denorm() does NOT affect the status register		#
   5831 #									#
   5832 #########################################################################
   5833 
   5834 #
   5835 # table of exponent threshold values for each precision
   5836 #
   5837 tbl_thresh:
   5838 	short		0x0
   5839 	short		sgl_thresh
   5840 	short		dbl_thresh
   5841 
   5842 	global		_denorm
   5843 _denorm:
   5844 #
   5845 # Load the exponent threshold for the precision selected and check
   5846 # to see if (threshold - exponent) is > 65 in which case we can
   5847 # simply calculate the sticky bit and zero the mantissa. otherwise
   5848 # we have to call the denormalization routine.
   5849 #
   5850 	lsr.b		&0x2, %d0		# shift prec to lo bits
   5851 	mov.w		(tbl_thresh.b,%pc,%d0.w*2), %d1 # load prec threshold
   5852 	mov.w		%d1, %d0		# copy d1 into d0
   5853 	sub.w		FTEMP_EX(%a0), %d0	# diff = threshold - exp
   5854 	cmpi.w		%d0, &66		# is diff > 65? (mant + g,r bits)
   5855 	bpl.b		denorm_set_stky		# yes; just calc sticky
   5856 
   5857 	clr.l		%d0			# clear g,r,s
   5858 	btst		&inex2_bit, FPSR_EXCEPT(%a6) # yes; was INEX2 set?
   5859 	beq.b		denorm_call		# no; don't change anything
   5860 	bset		&29, %d0		# yes; set sticky bit
   5861 
   5862 denorm_call:
   5863 	bsr.l		dnrm_lp			# denormalize the number
   5864 	rts
   5865 
   5866 #
   5867 # all bit would have been shifted off during the denorm so simply
   5868 # calculate if the sticky should be set and clear the entire mantissa.
   5869 #
   5870 denorm_set_stky:
   5871 	mov.l		&0x20000000, %d0	# set sticky bit in return value
   5872 	mov.w		%d1, FTEMP_EX(%a0)	# load exp with threshold
   5873 	clr.l		FTEMP_HI(%a0)		# set d1 = 0 (ms mantissa)
   5874 	clr.l		FTEMP_LO(%a0)		# set d2 = 0 (ms mantissa)
   5875 	rts
   5876 
   5877 #									#
   5878 # dnrm_lp(): normalize exponent/mantissa to specified threshhold	#
   5879 #									#
   5880 # INPUT:								#
   5881 #	%a0	   : points to the operand to be denormalized		#
   5882 #	%d0{31:29} : initial guard,round,sticky				#
   5883 #	%d1{15:0}  : denormalization threshold				#
   5884 # OUTPUT:								#
   5885 #	%a0	   : points to the denormalized operand		 	#
   5886 #	%d0{31:29} : final guard,round,sticky				#
   5887 #									#
   5888 
   5889 # *** Local Equates *** #
   5890 set	GRS,		L_SCR2			# g,r,s temp storage
   5891 set	FTEMP_LO2,	L_SCR1			# FTEMP_LO copy
   5892 
   5893 	global		dnrm_lp
   5894 dnrm_lp:
   5895 
   5896 #
   5897 # make a copy of FTEMP_LO and place the g,r,s bits directly after it
   5898 # in memory so as to make the bitfield extraction for denormalization easier.
   5899 #
   5900 	mov.l		FTEMP_LO(%a0), FTEMP_LO2(%a6) # make FTEMP_LO copy
   5901 	mov.l		%d0, GRS(%a6)		# place g,r,s after it
   5902 
   5903 #
   5904 # check to see how much less than the underflow threshold the operand
   5905 # exponent is.
   5906 #
   5907 	mov.l		%d1, %d0		# copy the denorm threshold
   5908 	sub.w		FTEMP_EX(%a0), %d1	# d1 = threshold - uns exponent
   5909 	ble.b		dnrm_no_lp		# d1 <= 0
   5910 	cmpi.w		%d1, &0x20		# is ( 0 <= d1 < 32) ?
   5911 	blt.b		case_1			# yes
   5912 	cmpi.w		%d1, &0x40		# is (32 <= d1 < 64) ?
   5913 	blt.b		case_2			# yes
   5914 	bra.w		case_3			# (d1 >= 64)
   5915 
   5916 #
   5917 # No normalization necessary
   5918 #
   5919 dnrm_no_lp:
   5920 	mov.l		GRS(%a6), %d0 		# restore original g,r,s
   5921 	rts
   5922 
   5923 #
   5924 # case (0<d1<32)
   5925 #
   5926 # %d0 = denorm threshold
   5927 # %d1 = "n" = amt to shift
   5928 #
   5929 #	---------------------------------------------------------
   5930 #	|     FTEMP_HI	  |    	FTEMP_LO     |grs000.........000|
   5931 #	---------------------------------------------------------
   5932 #	<-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)->
   5933 #	\	   \		      \			 \
   5934 #	 \	    \		       \		  \
   5935 #	  \	     \			\		   \
   5936 #	   \	      \			 \		    \
   5937 #	    \	       \		  \		     \
   5938 #	     \		\		   \		      \
   5939 #	      \		 \		    \		       \
   5940 #	       \	  \		     \			\
   5941 #	<-(n)-><-(32 - n)-><------(32)-------><------(32)------->
   5942 #	---------------------------------------------------------
   5943 #	|0.....0| NEW_HI  |  NEW_FTEMP_LO     |grs		|
   5944 #	---------------------------------------------------------
   5945 #
   5946 case_1:
   5947 	mov.l		%d2, -(%sp)		# create temp storage
   5948 
   5949 	mov.w		%d0, FTEMP_EX(%a0)	# exponent = denorm threshold
   5950 	mov.l		&32, %d0
   5951 	sub.w		%d1, %d0		# %d0 = 32 - %d1
   5952 
   5953 	cmpi.w		%d1, &29		# is shft amt >= 29
   5954 	blt.b		case1_extract		# no; no fix needed
   5955 	mov.b		GRS(%a6), %d2
   5956 	or.b		%d2, 3+FTEMP_LO2(%a6)
   5957 
   5958 case1_extract:
   5959 	bfextu		FTEMP_HI(%a0){&0:%d0}, %d2 # %d2 = new FTEMP_HI
   5960 	bfextu		FTEMP_HI(%a0){%d0:&32}, %d1 # %d1 = new FTEMP_LO
   5961 	bfextu		FTEMP_LO2(%a6){%d0:&32}, %d0 # %d0 = new G,R,S
   5962 
   5963 	mov.l		%d2, FTEMP_HI(%a0)	# store new FTEMP_HI
   5964 	mov.l		%d1, FTEMP_LO(%a0)	# store new FTEMP_LO
   5965 
   5966 	bftst		%d0{&2:&30}		# were bits shifted off?
   5967 	beq.b		case1_sticky_clear	# no; go finish
   5968 	bset		&rnd_stky_bit, %d0	# yes; set sticky bit
   5969 
   5970 case1_sticky_clear:
   5971 	and.l		&0xe0000000, %d0	# clear all but G,R,S
   5972 	mov.l		(%sp)+, %d2		# restore temp register
   5973 	rts
   5974 
   5975 #
   5976 # case (32<=d1<64)
   5977 #
   5978 # %d0 = denorm threshold
   5979 # %d1 = "n" = amt to shift
   5980 #
   5981 #	---------------------------------------------------------
   5982 #	|     FTEMP_HI	  |    	FTEMP_LO     |grs000.........000|
   5983 #	---------------------------------------------------------
   5984 #	<-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)->
   5985 #	\	   \		      \
   5986 #	 \	    \		       \
   5987 #	  \	     \			-------------------
   5988 #	   \	      --------------------		   \
   5989 #	    -------------------	  	  \		    \
   5990 #	     		       \	   \		     \
   5991 #	      		 	\     	    \		      \
   5992 #	       		  	 \	     \		       \
   5993 #	<-------(32)------><-(n)-><-(32 - n)-><------(32)------->
   5994 #	---------------------------------------------------------
   5995 #	|0...............0|0....0| NEW_LO     |grs		|
   5996 #	---------------------------------------------------------
   5997 #
   5998 case_2:
   5999 	mov.l		%d2, -(%sp)		# create temp storage
   6000 
   6001 	mov.w		%d0, FTEMP_EX(%a0)	# exponent = denorm threshold
   6002 	subi.w		&0x20, %d1		# %d1 now between 0 and 32
   6003 	mov.l		&0x20, %d0
   6004 	sub.w		%d1, %d0		# %d0 = 32 - %d1
   6005 
   6006 # subtle step here; or in the g,r,s at the bottom of FTEMP_LO to minimize
   6007 # the number of bits to check for the sticky detect.
   6008 # it only plays a role in shift amounts of 61-63.
   6009 	mov.b		GRS(%a6), %d2
   6010 	or.b		%d2, 3+FTEMP_LO2(%a6)
   6011 
   6012 	bfextu		FTEMP_HI(%a0){&0:%d0}, %d2 # %d2 = new FTEMP_LO
   6013 	bfextu		FTEMP_HI(%a0){%d0:&32}, %d1 # %d1 = new G,R,S
   6014 
   6015 	bftst		%d1{&2:&30}		# were any bits shifted off?
   6016 	bne.b		case2_set_sticky	# yes; set sticky bit
   6017 	bftst		FTEMP_LO2(%a6){%d0:&31}	# were any bits shifted off?
   6018 	bne.b		case2_set_sticky	# yes; set sticky bit
   6019 
   6020 	mov.l		%d1, %d0		# move new G,R,S to %d0
   6021 	bra.b		case2_end
   6022 
   6023 case2_set_sticky:
   6024 	mov.l		%d1, %d0		# move new G,R,S to %d0
   6025 	bset		&rnd_stky_bit, %d0	# set sticky bit
   6026 
   6027 case2_end:
   6028 	clr.l		FTEMP_HI(%a0)		# store FTEMP_HI = 0
   6029 	mov.l		%d2, FTEMP_LO(%a0)	# store FTEMP_LO
   6030 	and.l		&0xe0000000, %d0	# clear all but G,R,S
   6031 
   6032 	mov.l		(%sp)+,%d2		# restore temp register
   6033 	rts
   6034 
   6035 #
   6036 # case (d1>=64)
   6037 #
   6038 # %d0 = denorm threshold
   6039 # %d1 = amt to shift
   6040 #
   6041 case_3:
   6042 	mov.w		%d0, FTEMP_EX(%a0)	# insert denorm threshold
   6043 
   6044 	cmpi.w		%d1, &65		# is shift amt > 65?
   6045 	blt.b		case3_64		# no; it's == 64
   6046 	beq.b		case3_65		# no; it's == 65
   6047 
   6048 #
   6049 # case (d1>65)
   6050 #
   6051 # Shift value is > 65 and out of range. All bits are shifted off.
   6052 # Return a zero mantissa with the sticky bit set
   6053 #
   6054 	clr.l		FTEMP_HI(%a0)		# clear hi(mantissa)
   6055 	clr.l		FTEMP_LO(%a0)		# clear lo(mantissa)
   6056 	mov.l		&0x20000000, %d0	# set sticky bit
   6057 	rts
   6058 
   6059 #
   6060 # case (d1 == 64)
   6061 #
   6062 #	---------------------------------------------------------
   6063 #	|     FTEMP_HI	  |    	FTEMP_LO     |grs000.........000|
   6064 #	---------------------------------------------------------
   6065 #	<-------(32)------>
   6066 #	\	   	   \
   6067 #	 \	    	    \
   6068 #	  \	     	     \
   6069 #	   \	      	      ------------------------------
   6070 #	    -------------------------------		    \
   6071 #	     		       		   \		     \
   6072 #	      		 	     	    \		      \
   6073 #	       		  	 	     \		       \
   6074 #					      <-------(32)------>
   6075 #	---------------------------------------------------------
   6076 #	|0...............0|0................0|grs		|
   6077 #	---------------------------------------------------------
   6078 #
   6079 case3_64:
   6080 	mov.l		FTEMP_HI(%a0), %d0	# fetch hi(mantissa)
   6081 	mov.l		%d0, %d1		# make a copy
   6082 	and.l		&0xc0000000, %d0	# extract G,R
   6083 	and.l		&0x3fffffff, %d1	# extract other bits
   6084 
   6085 	bra.b		case3_complete
   6086 
   6087 #
   6088 # case (d1 == 65)
   6089 #
   6090 #	---------------------------------------------------------
   6091 #	|     FTEMP_HI	  |    	FTEMP_LO     |grs000.........000|
   6092 #	---------------------------------------------------------
   6093 #	<-------(32)------>
   6094 #	\	   	   \
   6095 #	 \	    	    \
   6096 #	  \	     	     \
   6097 #	   \	      	      ------------------------------
   6098 #	    --------------------------------		    \
   6099 #	     		       		    \		     \
   6100 #	      		 	     	     \		      \
   6101 #	       		  	 	      \		       \
   6102 #					       <-------(31)----->
   6103 #	---------------------------------------------------------
   6104 #	|0...............0|0................0|0rs		|
   6105 #	---------------------------------------------------------
   6106 #
   6107 case3_65:
   6108 	mov.l		FTEMP_HI(%a0), %d0	# fetch hi(mantissa)
   6109 	and.l		&0x80000000, %d0	# extract R bit
   6110 	lsr.l		&0x1, %d0		# shift high bit into R bit
   6111 	and.l		&0x7fffffff, %d1	# extract other bits
   6112 
   6113 case3_complete:
   6114 # last operation done was an "and" of the bits shifted off so the condition
   6115 # codes are already set so branch accordingly.
   6116 	bne.b		case3_set_sticky	# yes; go set new sticky
   6117 	tst.l		FTEMP_LO(%a0)		# were any bits shifted off?
   6118 	bne.b		case3_set_sticky	# yes; go set new sticky
   6119 	tst.b		GRS(%a6)		# were any bits shifted off?
   6120 	bne.b		case3_set_sticky	# yes; go set new sticky
   6121 
   6122 #
   6123 # no bits were shifted off so don't set the sticky bit.
   6124 # the guard and
   6125 # the entire mantissa is zero.
   6126 #
   6127 	clr.l		FTEMP_HI(%a0)		# clear hi(mantissa)
   6128 	clr.l		FTEMP_LO(%a0)		# clear lo(mantissa)
   6129 	rts
   6130 
   6131 #
   6132 # some bits were shifted off so set the sticky bit.
   6133 # the entire mantissa is zero.
   6134 #
   6135 case3_set_sticky:
   6136 	bset		&rnd_stky_bit,%d0	# set new sticky bit
   6137 	clr.l		FTEMP_HI(%a0)		# clear hi(mantissa)
   6138 	clr.l		FTEMP_LO(%a0)		# clear lo(mantissa)
   6139 	rts
   6140 
   6141 #########################################################################
   6142 # XDEF ****************************************************************	#
   6143 #	_round(): round result according to precision/mode		#
   6144 #									#
   6145 # XREF ****************************************************************	#
   6146 #	None								#
   6147 #									#
   6148 # INPUT ***************************************************************	#
   6149 #	a0	  = ptr to input operand in internal extended format 	#
   6150 #	d1(hi)    = contains rounding precision:			#
   6151 #			ext = $0000xxxx					#
   6152 #			sgl = $0004xxxx					#
   6153 #			dbl = $0008xxxx					#
   6154 #	d1(lo)	  = contains rounding mode:				#
   6155 #			RN  = $xxxx0000					#
   6156 #			RZ  = $xxxx0001					#
   6157 #			RM  = $xxxx0002					#
   6158 #			RP  = $xxxx0003					#
   6159 #	d0{31:29} = contains the g,r,s bits (extended)			#
   6160 #									#
   6161 # OUTPUT **************************************************************	#
   6162 #	a0 = pointer to rounded result					#
   6163 #									#
   6164 # ALGORITHM ***********************************************************	#
   6165 #	On return the value pointed to by a0 is correctly rounded,	#
   6166 #	a0 is preserved and the g-r-s bits in d0 are cleared.		#
   6167 #	The result is not typed - the tag field is invalid.  The	#
   6168 #	result is still in the internal extended format.		#
   6169 #									#
   6170 #	The INEX bit of USER_FPSR will be set if the rounded result was	#
   6171 #	inexact (i.e. if any of the g-r-s bits were set).		#
   6172 #									#
   6173 #########################################################################
   6174 
   6175 	global		_round
   6176 _round:
   6177 #
   6178 # ext_grs() looks at the rounding precision and sets the appropriate
   6179 # G,R,S bits.
   6180 # If (G,R,S == 0) then result is exact and round is done, else set
   6181 # the inex flag in status reg and continue.
   6182 #
   6183 	bsr.l		ext_grs			# extract G,R,S
   6184 
   6185 	tst.l		%d0			# are G,R,S zero?
   6186 	beq.w		truncate		# yes; round is complete
   6187 
   6188 	or.w		&inx2a_mask, 2+USER_FPSR(%a6) # set inex2/ainex
   6189 
   6190 #
   6191 # Use rounding mode as an index into a jump table for these modes.
   6192 # All of the following assumes grs != 0.
   6193 #
   6194 	mov.w		(tbl_mode.b,%pc,%d1.w*2), %a1 # load jump offset
   6195 	jmp		(tbl_mode.b,%pc,%a1)	# jmp to rnd mode handler
   6196 
   6197 tbl_mode:
   6198 	short		rnd_near - tbl_mode
   6199 	short		truncate - tbl_mode	# RZ always truncates
   6200 	short		rnd_mnus - tbl_mode
   6201 	short		rnd_plus - tbl_mode
   6202 
   6203 #################################################################
   6204 #	ROUND PLUS INFINITY					#
   6205 #								#
   6206 #	If sign of fp number = 0 (positive), then add 1 to l.	#
   6207 #################################################################
   6208 rnd_plus:
   6209 	tst.b		FTEMP_SGN(%a0)		# check for sign
   6210 	bmi.w		truncate		# if positive then truncate
   6211 
   6212 	mov.l		&0xffffffff, %d0	# force g,r,s to be all f's
   6213 	swap		%d1			# set up d1 for round prec.
   6214 
   6215 	cmpi.b		%d1, &s_mode		# is prec = sgl?
   6216 	beq.w		add_sgl			# yes
   6217 	bgt.w		add_dbl			# no; it's dbl
   6218 	bra.w		add_ext			# no; it's ext
   6219 
   6220 #################################################################
   6221 #	ROUND MINUS INFINITY					#
   6222 #								#
   6223 #	If sign of fp number = 1 (negative), then add 1 to l.	#
   6224 #################################################################
   6225 rnd_mnus:
   6226 	tst.b		FTEMP_SGN(%a0)		# check for sign
   6227 	bpl.w		truncate		# if negative then truncate
   6228 
   6229 	mov.l		&0xffffffff, %d0	# force g,r,s to be all f's
   6230 	swap		%d1			# set up d1 for round prec.
   6231 
   6232 	cmpi.b		%d1, &s_mode		# is prec = sgl?
   6233 	beq.w		add_sgl			# yes
   6234 	bgt.w		add_dbl			# no; it's dbl
   6235 	bra.w		add_ext			# no; it's ext
   6236 
   6237 #################################################################
   6238 #	ROUND NEAREST						#
   6239 #								#
   6240 #	If (g=1), then add 1 to l and if (r=s=0), then clear l	#
   6241 #	Note that this will round to even in case of a tie.	#
   6242 #################################################################
   6243 rnd_near:
   6244 	asl.l		&0x1, %d0		# shift g-bit to c-bit
   6245 	bcc.w		truncate		# if (g=1) then
   6246 
   6247 	swap		%d1			# set up d1 for round prec.
   6248 
   6249 	cmpi.b		%d1, &s_mode		# is prec = sgl?
   6250 	beq.w		add_sgl			# yes
   6251 	bgt.w		add_dbl			# no; it's dbl
   6252 	bra.w		add_ext			# no; it's ext
   6253 
   6254 # *** LOCAL EQUATES ***
   6255 set	ad_1_sgl,	0x00000100	# constant to add 1 to l-bit in sgl prec
   6256 set	ad_1_dbl,	0x00000800	# constant to add 1 to l-bit in dbl prec
   6257 
   6258 #########################
   6259 #	ADD SINGLE	#
   6260 #########################
   6261 add_sgl:
   6262 	add.l		&ad_1_sgl, FTEMP_HI(%a0)
   6263 	bcc.b		scc_clr			# no mantissa overflow
   6264 	roxr.w		FTEMP_HI(%a0)		# shift v-bit back in
   6265 	roxr.w		FTEMP_HI+2(%a0)		# shift v-bit back in
   6266 	add.w		&0x1, FTEMP_EX(%a0)	# and incr exponent
   6267 scc_clr:
   6268 	tst.l		%d0			# test for rs = 0
   6269 	bne.b		sgl_done
   6270 	and.w		&0xfe00, FTEMP_HI+2(%a0) # clear the l-bit
   6271 sgl_done:
   6272 	and.l		&0xffffff00, FTEMP_HI(%a0) # truncate bits beyond sgl limit
   6273 	clr.l		FTEMP_LO(%a0)		# clear d2
   6274 	rts
   6275 
   6276 #########################
   6277 #	ADD EXTENDED	#
   6278 #########################
   6279 add_ext:
   6280 	addq.l		&1,FTEMP_LO(%a0)	# add 1 to l-bit
   6281 	bcc.b		xcc_clr			# test for carry out
   6282 	addq.l		&1,FTEMP_HI(%a0)	# propogate carry
   6283 	bcc.b		xcc_clr
   6284 	roxr.w		FTEMP_HI(%a0)		# mant is 0 so restore v-bit
   6285 	roxr.w		FTEMP_HI+2(%a0)		# mant is 0 so restore v-bit
   6286 	roxr.w		FTEMP_LO(%a0)
   6287 	roxr.w		FTEMP_LO+2(%a0)
   6288 	add.w		&0x1,FTEMP_EX(%a0)	# and inc exp
   6289 xcc_clr:
   6290 	tst.l		%d0			# test rs = 0
   6291 	bne.b		add_ext_done
   6292 	and.b		&0xfe,FTEMP_LO+3(%a0)	# clear the l bit
   6293 add_ext_done:
   6294 	rts
   6295 
   6296 #########################
   6297 #	ADD DOUBLE	#
   6298 #########################
   6299 add_dbl:
   6300 	add.l		&ad_1_dbl, FTEMP_LO(%a0) # add 1 to lsb
   6301 	bcc.b		dcc_clr			# no carry
   6302 	addq.l		&0x1, FTEMP_HI(%a0)	# propogate carry
   6303 	bcc.b		dcc_clr			# no carry
   6304 
   6305 	roxr.w		FTEMP_HI(%a0)		# mant is 0 so restore v-bit
   6306 	roxr.w		FTEMP_HI+2(%a0)		# mant is 0 so restore v-bit
   6307 	roxr.w		FTEMP_LO(%a0)
   6308 	roxr.w		FTEMP_LO+2(%a0)
   6309 	addq.w		&0x1, FTEMP_EX(%a0)	# incr exponent
   6310 dcc_clr:
   6311 	tst.l		%d0			# test for rs = 0
   6312 	bne.b		dbl_done
   6313 	and.w		&0xf000, FTEMP_LO+2(%a0) # clear the l-bit
   6314 
   6315 dbl_done:
   6316 	and.l		&0xfffff800,FTEMP_LO(%a0) # truncate bits beyond dbl limit
   6317 	rts
   6318 
   6319 ###########################
   6320 # Truncate all other bits #
   6321 ###########################
   6322 truncate:
   6323 	swap		%d1			# select rnd prec
   6324 
   6325 	cmpi.b		%d1, &s_mode		# is prec sgl?
   6326 	beq.w		sgl_done		# yes
   6327 	bgt.b		dbl_done		# no; it's dbl
   6328 	rts					# no; it's ext
   6329 
   6330 
   6331 #
   6332 # ext_grs(): extract guard, round and sticky bits according to
   6333 #	     rounding precision.
   6334 #
   6335 # INPUT
   6336 #	d0	   = extended precision g,r,s (in d0{31:29})
   6337 #	d1 	   = {PREC,ROUND}
   6338 # OUTPUT
   6339 #	d0{31:29}  = guard, round, sticky
   6340 #
   6341 # The ext_grs extract the guard/round/sticky bits according to the
   6342 # selected rounding precision. It is called by the round subroutine
   6343 # only.  All registers except d0 are kept intact. d0 becomes an
   6344 # updated guard,round,sticky in d0{31:29}
   6345 #
   6346 # Notes: the ext_grs uses the round PREC, and therefore has to swap d1
   6347 #	 prior to usage, and needs to restore d1 to original. this
   6348 #	 routine is tightly tied to the round routine and not meant to
   6349 #	 uphold standard subroutine calling practices.
   6350 #
   6351 
   6352 ext_grs:
   6353 	swap		%d1			# have d1.w point to round precision
   6354 	tst.b		%d1			# is rnd prec = extended?
   6355 	bne.b		ext_grs_not_ext		# no; go handle sgl or dbl
   6356 
   6357 #
   6358 # %d0 actually already hold g,r,s since _round() had it before calling
   6359 # this function. so, as long as we don't disturb it, we are "returning" it.
   6360 #
   6361 ext_grs_ext:
   6362 	swap		%d1			# yes; return to correct positions
   6363 	rts
   6364 
   6365 ext_grs_not_ext:
   6366 	movm.l		&0x3000, -(%sp)		# make some temp registers {d2/d3}
   6367 
   6368 	cmpi.b		%d1, &s_mode		# is rnd prec = sgl?
   6369 	bne.b		ext_grs_dbl		# no; go handle dbl
   6370 
   6371 #
   6372 # sgl:
   6373 #	96		64	  40	32		0
   6374 #	-----------------------------------------------------
   6375 #	| EXP	|XXXXXXX|	  |xx	|		|grs|
   6376 #	-----------------------------------------------------
   6377 #			<--(24)--->nn\			   /
   6378 #				   ee ---------------------
   6379 #				   ww		|
   6380 #						v
   6381 #				   gr	   new sticky
   6382 #
   6383 ext_grs_sgl:
   6384 	bfextu		FTEMP_HI(%a0){&24:&2}, %d3 # sgl prec. g-r are 2 bits right
   6385 	mov.l		&30, %d2		# of the sgl prec. limits
   6386 	lsl.l		%d2, %d3		# shift g-r bits to MSB of d3
   6387 	mov.l		FTEMP_HI(%a0), %d2	# get word 2 for s-bit test
   6388 	and.l		&0x0000003f, %d2	# s bit is the or of all other
   6389 	bne.b		ext_grs_st_stky		# bits to the right of g-r
   6390 	tst.l		FTEMP_LO(%a0)		# test lower mantissa
   6391 	bne.b		ext_grs_st_stky		# if any are set, set sticky
   6392 	tst.l		%d0			# test original g,r,s
   6393 	bne.b		ext_grs_st_stky		# if any are set, set sticky
   6394 	bra.b		ext_grs_end_sd		# if words 3 and 4 are clr, exit
   6395 
   6396 #
   6397 # dbl:
   6398 #	96		64	  	32	 11	0
   6399 #	-----------------------------------------------------
   6400 #	| EXP	|XXXXXXX|	  	|	 |xx	|grs|
   6401 #	-----------------------------------------------------
   6402 #						  nn\	    /
   6403 #						  ee -------
   6404 #						  ww	|
   6405 #							v
   6406 #						  gr	new sticky
   6407 #
   6408 ext_grs_dbl:
   6409 	bfextu		FTEMP_LO(%a0){&21:&2}, %d3 # dbl-prec. g-r are 2 bits right
   6410 	mov.l		&30, %d2		# of the dbl prec. limits
   6411 	lsl.l		%d2, %d3		# shift g-r bits to the MSB of d3
   6412 	mov.l		FTEMP_LO(%a0), %d2	# get lower mantissa  for s-bit test
   6413 	and.l		&0x000001ff, %d2	# s bit is the or-ing of all
   6414 	bne.b		ext_grs_st_stky		# other bits to the right of g-r
   6415 	tst.l		%d0			# test word original g,r,s
   6416 	bne.b		ext_grs_st_stky		# if any are set, set sticky
   6417 	bra.b		ext_grs_end_sd		# if clear, exit
   6418 
   6419 ext_grs_st_stky:
   6420 	bset		&rnd_stky_bit, %d3	# set sticky bit
   6421 ext_grs_end_sd:
   6422 	mov.l		%d3, %d0		# return grs to d0
   6423 
   6424 	movm.l		(%sp)+, &0xc		# restore scratch registers {d2/d3}
   6425 
   6426 	swap		%d1			# restore d1 to original
   6427 	rts
   6428 
   6429 #########################################################################
   6430 # norm(): normalize the mantissa of an extended precision input. the	#
   6431 #	  input operand should not be normalized already.		#
   6432 #									#
   6433 # XDEF ****************************************************************	#
   6434 #	norm()								#
   6435 #									#
   6436 # XREF **************************************************************** #
   6437 #	none								#
   6438 #									#
   6439 # INPUT *************************************************************** #
   6440 #	a0 = pointer fp extended precision operand to normalize		#
   6441 #									#
   6442 # OUTPUT ************************************************************** #
   6443 # 	d0 = number of bit positions the mantissa was shifted		#
   6444 #	a0 = the input operand's mantissa is normalized; the exponent	#
   6445 #	     is unchanged.						#
   6446 #									#
   6447 #########################################################################
   6448 	global		norm
   6449 norm:
   6450 	mov.l		%d2, -(%sp)		# create some temp regs
   6451 	mov.l		%d3, -(%sp)
   6452 
   6453 	mov.l		FTEMP_HI(%a0), %d0	# load hi(mantissa)
   6454 	mov.l		FTEMP_LO(%a0), %d1	# load lo(mantissa)
   6455 
   6456 	bfffo		%d0{&0:&32}, %d2	# how many places to shift?
   6457 	beq.b		norm_lo			# hi(man) is all zeroes!
   6458 
   6459 norm_hi:
   6460 	lsl.l		%d2, %d0		# left shift hi(man)
   6461 	bfextu		%d1{&0:%d2}, %d3	# extract lo bits
   6462 
   6463 	or.l		%d3, %d0		# create hi(man)
   6464 	lsl.l		%d2, %d1		# create lo(man)
   6465 
   6466 	mov.l		%d0, FTEMP_HI(%a0)	# store new hi(man)
   6467 	mov.l		%d1, FTEMP_LO(%a0)	# store new lo(man)
   6468 
   6469 	mov.l		%d2, %d0		# return shift amount
   6470 
   6471 	mov.l		(%sp)+, %d3		# restore temp regs
   6472 	mov.l		(%sp)+, %d2
   6473 
   6474 	rts
   6475 
   6476 norm_lo:
   6477 	bfffo		%d1{&0:&32}, %d2	# how many places to shift?
   6478 	lsl.l		%d2, %d1		# shift lo(man)
   6479 	add.l		&32, %d2		# add 32 to shft amount
   6480 
   6481 	mov.l		%d1, FTEMP_HI(%a0)	# store hi(man)
   6482 	clr.l		FTEMP_LO(%a0)		# lo(man) is now zero
   6483 
   6484 	mov.l		%d2, %d0		# return shift amount
   6485 
   6486 	mov.l		(%sp)+, %d3		# restore temp regs
   6487 	mov.l		(%sp)+, %d2
   6488 
   6489 	rts
   6490 
   6491 #########################################################################
   6492 # unnorm_fix(): - changes an UNNORM to one of NORM, DENORM, or ZERO	#
   6493 #		- returns corresponding optype tag			#
   6494 #									#
   6495 # XDEF ****************************************************************	#
   6496 #	unnorm_fix()							#
   6497 #									#
   6498 # XREF **************************************************************** #
   6499 #	norm() - normalize the mantissa					#
   6500 #									#
   6501 # INPUT *************************************************************** #
   6502 #	a0 = pointer to unnormalized extended precision number		#
   6503 #									#
   6504 # OUTPUT ************************************************************** #
   6505 #	d0 = optype tag - is corrected to one of NORM, DENORM, or ZERO	#
   6506 #	a0 = input operand has been converted to a norm, denorm, or	#
   6507 #	     zero; both the exponent and mantissa are changed.		#
   6508 #									#
   6509 #########################################################################
   6510 
   6511 	global		unnorm_fix
   6512 unnorm_fix:
   6513 	bfffo		FTEMP_HI(%a0){&0:&32}, %d0 # how many shifts are needed?
   6514 	bne.b		unnorm_shift		# hi(man) is not all zeroes
   6515 
   6516 #
   6517 # hi(man) is all zeroes so see if any bits in lo(man) are set
   6518 #
   6519 unnorm_chk_lo:
   6520 	bfffo		FTEMP_LO(%a0){&0:&32}, %d0 # is operand really a zero?
   6521 	beq.w		unnorm_zero		# yes
   6522 
   6523 	add.w		&32, %d0		# no; fix shift distance
   6524 
   6525 #
   6526 # d0 = # shifts needed for complete normalization
   6527 #
   6528 unnorm_shift:
   6529 	clr.l		%d1			# clear top word
   6530 	mov.w		FTEMP_EX(%a0), %d1	# extract exponent
   6531 	and.w		&0x7fff, %d1		# strip off sgn
   6532 
   6533 	cmp.w		%d0, %d1		# will denorm push exp < 0?
   6534 	bgt.b		unnorm_nrm_zero		# yes; denorm only until exp = 0
   6535 
   6536 #
   6537 # exponent would not go < 0. therefore, number stays normalized
   6538 #
   6539 	sub.w		%d0, %d1		# shift exponent value
   6540 	mov.w		FTEMP_EX(%a0), %d0	# load old exponent
   6541 	and.w		&0x8000, %d0		# save old sign
   6542 	or.w		%d0, %d1		# {sgn,new exp}
   6543 	mov.w		%d1, FTEMP_EX(%a0)	# insert new exponent
   6544 
   6545 	bsr.l		norm			# normalize UNNORM
   6546 
   6547 	mov.b		&NORM, %d0		# return new optype tag
   6548 	rts
   6549 
   6550 #
   6551 # exponent would go < 0, so only denormalize until exp = 0
   6552 #
   6553 unnorm_nrm_zero:
   6554 	cmp.b		%d1, &32		# is exp <= 32?
   6555 	bgt.b		unnorm_nrm_zero_lrg	# no; go handle large exponent
   6556 
   6557 	bfextu		FTEMP_HI(%a0){%d1:&32}, %d0 # extract new hi(man)
   6558 	mov.l		%d0, FTEMP_HI(%a0)	# save new hi(man)
   6559 
   6560 	mov.l		FTEMP_LO(%a0), %d0	# fetch old lo(man)
   6561 	lsl.l		%d1, %d0		# extract new lo(man)
   6562 	mov.l		%d0, FTEMP_LO(%a0)	# save new lo(man)
   6563 
   6564 	and.w		&0x8000, FTEMP_EX(%a0)	# set exp = 0
   6565 
   6566 	mov.b		&DENORM, %d0		# return new optype tag
   6567 	rts
   6568 
   6569 #
   6570 # only mantissa bits set are in lo(man)
   6571 #
   6572 unnorm_nrm_zero_lrg:
   6573 	sub.w		&32, %d1		# adjust shft amt by 32
   6574 
   6575 	mov.l		FTEMP_LO(%a0), %d0	# fetch old lo(man)
   6576 	lsl.l		%d1, %d0		# left shift lo(man)
   6577 
   6578 	mov.l		%d0, FTEMP_HI(%a0)	# store new hi(man)
   6579 	clr.l		FTEMP_LO(%a0)		# lo(man) = 0
   6580 
   6581 	and.w		&0x8000, FTEMP_EX(%a0)	# set exp = 0
   6582 
   6583 	mov.b		&DENORM, %d0		# return new optype tag
   6584 	rts
   6585 
   6586 #
   6587 # whole mantissa is zero so this UNNORM is actually a zero
   6588 #
   6589 unnorm_zero:
   6590 	and.w		&0x8000, FTEMP_EX(%a0) 	# force exponent to zero
   6591 
   6592 	mov.b		&ZERO, %d0		# fix optype tag
   6593 	rts
   6594 
   6595 #########################################################################
   6596 # XDEF ****************************************************************	#
   6597 # 	set_tag_x(): return the optype of the input ext fp number	#
   6598 #									#
   6599 # XREF ****************************************************************	#
   6600 #	None								#
   6601 #									#
   6602 # INPUT ***************************************************************	#
   6603 #	a0 = pointer to extended precision operand			#
   6604 # 									#
   6605 # OUTPUT **************************************************************	#
   6606 #	d0 = value of type tag						#
   6607 # 		one of: NORM, INF, QNAN, SNAN, DENORM, UNNORM, ZERO	#
   6608 #									#
   6609 # ALGORITHM ***********************************************************	#
   6610 #	Simply test the exponent, j-bit, and mantissa values to 	#
   6611 # determine the type of operand.					#
   6612 #	If it's an unnormalized zero, alter the operand and force it	#
   6613 # to be a normal zero.							#
   6614 #									#
   6615 #########################################################################
   6616 
   6617 	global		set_tag_x
   6618 set_tag_x:
   6619 	mov.w		FTEMP_EX(%a0), %d0	# extract exponent
   6620 	andi.w		&0x7fff, %d0		# strip off sign
   6621 	cmpi.w		%d0, &0x7fff		# is (EXP == MAX)?
   6622 	beq.b		inf_or_nan_x
   6623 not_inf_or_nan_x:
   6624 	btst		&0x7,FTEMP_HI(%a0)
   6625 	beq.b		not_norm_x
   6626 is_norm_x:
   6627 	mov.b		&NORM, %d0
   6628 	rts
   6629 not_norm_x:
   6630 	tst.w		%d0			# is exponent = 0?
   6631 	bne.b		is_unnorm_x
   6632 not_unnorm_x:
   6633 	tst.l		FTEMP_HI(%a0)
   6634 	bne.b		is_denorm_x
   6635 	tst.l		FTEMP_LO(%a0)
   6636 	bne.b		is_denorm_x
   6637 is_zero_x:
   6638 	mov.b		&ZERO, %d0
   6639 	rts
   6640 is_denorm_x:
   6641 	mov.b		&DENORM, %d0
   6642 	rts
   6643 # must distinguish now "Unnormalized zeroes" which we
   6644 # must convert to zero.
   6645 is_unnorm_x:
   6646 	tst.l		FTEMP_HI(%a0)
   6647 	bne.b		is_unnorm_reg_x
   6648 	tst.l		FTEMP_LO(%a0)
   6649 	bne.b		is_unnorm_reg_x
   6650 # it's an "unnormalized zero". let's convert it to an actual zero...
   6651 	andi.w		&0x8000,FTEMP_EX(%a0)	# clear exponent
   6652 	mov.b		&ZERO, %d0
   6653 	rts
   6654 is_unnorm_reg_x:
   6655 	mov.b		&UNNORM, %d0
   6656 	rts
   6657 inf_or_nan_x:
   6658 	tst.l		FTEMP_LO(%a0)
   6659 	bne.b		is_nan_x
   6660 	mov.l		FTEMP_HI(%a0), %d0
   6661 	and.l		&0x7fffffff, %d0	# msb is a don't care!
   6662 	bne.b		is_nan_x
   6663 is_inf_x:
   6664 	mov.b		&INF, %d0
   6665 	rts
   6666 is_nan_x:
   6667 	btst		&0x6, FTEMP_HI(%a0)
   6668 	beq.b		is_snan_x
   6669 	mov.b		&QNAN, %d0
   6670 	rts
   6671 is_snan_x:
   6672 	mov.b		&SNAN, %d0
   6673 	rts
   6674 
   6675 #########################################################################
   6676 # XDEF ****************************************************************	#
   6677 # 	set_tag_d(): return the optype of the input dbl fp number	#
   6678 #									#
   6679 # XREF ****************************************************************	#
   6680 #	None								#
   6681 #									#
   6682 # INPUT ***************************************************************	#
   6683 #	a0 = points to double precision operand				#
   6684 # 									#
   6685 # OUTPUT **************************************************************	#
   6686 #	d0 = value of type tag						#
   6687 # 		one of: NORM, INF, QNAN, SNAN, DENORM, ZERO		#
   6688 #									#
   6689 # ALGORITHM ***********************************************************	#
   6690 #	Simply test the exponent, j-bit, and mantissa values to 	#
   6691 # determine the type of operand.					#
   6692 #									#
   6693 #########################################################################
   6694 
   6695 	global		set_tag_d
   6696 set_tag_d:
   6697 	mov.l		FTEMP(%a0), %d0
   6698 	mov.l		%d0, %d1
   6699 
   6700 	andi.l		&0x7ff00000, %d0
   6701 	beq.b		zero_or_denorm_d
   6702 
   6703 	cmpi.l		%d0, &0x7ff00000
   6704 	beq.b		inf_or_nan_d
   6705 
   6706 is_norm_d:
   6707 	mov.b		&NORM, %d0
   6708 	rts
   6709 zero_or_denorm_d:
   6710 	and.l		&0x000fffff, %d1
   6711 	bne		is_denorm_d
   6712 	tst.l		4+FTEMP(%a0)
   6713 	bne		is_denorm_d
   6714 is_zero_d:
   6715 	mov.b		&ZERO, %d0
   6716 	rts
   6717 is_denorm_d:
   6718 	mov.b		&DENORM, %d0
   6719 	rts
   6720 inf_or_nan_d:
   6721 	and.l		&0x000fffff, %d1
   6722 	bne		is_nan_d
   6723 	tst.l		4+FTEMP(%a0)
   6724 	bne		is_nan_d
   6725 is_inf_d:
   6726 	mov.b		&INF, %d0
   6727 	rts
   6728 is_nan_d:
   6729 	btst		&19, %d1
   6730 	bne		is_qnan_d
   6731 is_snan_d:
   6732 	mov.b		&SNAN, %d0
   6733 	rts
   6734 is_qnan_d:
   6735 	mov.b		&QNAN, %d0
   6736 	rts
   6737 
   6738 #########################################################################
   6739 # XDEF ****************************************************************	#
   6740 # 	set_tag_s(): return the optype of the input sgl fp number	#
   6741 #									#
   6742 # XREF ****************************************************************	#
   6743 #	None								#
   6744 #									#
   6745 # INPUT ***************************************************************	#
   6746 #	a0 = pointer to single precision operand			#
   6747 # 									#
   6748 # OUTPUT **************************************************************	#
   6749 #	d0 = value of type tag						#
   6750 # 		one of: NORM, INF, QNAN, SNAN, DENORM, ZERO		#
   6751 #									#
   6752 # ALGORITHM ***********************************************************	#
   6753 #	Simply test the exponent, j-bit, and mantissa values to 	#
   6754 # determine the type of operand.					#
   6755 #									#
   6756 #########################################################################
   6757 
   6758 	global		set_tag_s
   6759 set_tag_s:
   6760 	mov.l		FTEMP(%a0), %d0
   6761 	mov.l		%d0, %d1
   6762 
   6763 	andi.l		&0x7f800000, %d0
   6764 	beq.b		zero_or_denorm_s
   6765 
   6766 	cmpi.l		%d0, &0x7f800000
   6767 	beq.b		inf_or_nan_s
   6768 
   6769 is_norm_s:
   6770 	mov.b		&NORM, %d0
   6771 	rts
   6772 zero_or_denorm_s:
   6773 	and.l		&0x007fffff, %d1
   6774 	bne		is_denorm_s
   6775 is_zero_s:
   6776 	mov.b		&ZERO, %d0
   6777 	rts
   6778 is_denorm_s:
   6779 	mov.b		&DENORM, %d0
   6780 	rts
   6781 inf_or_nan_s:
   6782 	and.l		&0x007fffff, %d1
   6783 	bne		is_nan_s
   6784 is_inf_s:
   6785 	mov.b		&INF, %d0
   6786 	rts
   6787 is_nan_s:
   6788 	btst		&22, %d1
   6789 	bne		is_qnan_s
   6790 is_snan_s:
   6791 	mov.b		&SNAN, %d0
   6792 	rts
   6793 is_qnan_s:
   6794 	mov.b		&QNAN, %d0
   6795 	rts
   6796 
   6797 #########################################################################
   6798 # XDEF ****************************************************************	#
   6799 # 	unf_res(): routine to produce default underflow result of a 	#
   6800 #	 	   scaled extended precision number; this is used by 	#
   6801 #		   fadd/fdiv/fmul/etc. emulation routines.		#
   6802 # 	unf_res4(): same as above but for fsglmul/fsgldiv which use	#
   6803 #		    single round prec and extended prec mode.		#
   6804 #									#
   6805 # XREF ****************************************************************	#
   6806 #	_denorm() - denormalize according to scale factor		#
   6807 # 	_round() - round denormalized number according to rnd prec	#
   6808 #									#
   6809 # INPUT ***************************************************************	#
   6810 #	a0 = pointer to extended precison operand			#
   6811 #	d0 = scale factor						#
   6812 #	d1 = rounding precision/mode					#
   6813 #									#
   6814 # OUTPUT **************************************************************	#
   6815 #	a0 = pointer to default underflow result in extended precision	#
   6816 #	d0.b = result FPSR_cc which caller may or may not want to save	#
   6817 #									#
   6818 # ALGORITHM ***********************************************************	#
   6819 # 	Convert the input operand to "internal format" which means the	#
   6820 # exponent is extended to 16 bits and the sign is stored in the unused	#
   6821 # portion of the extended precison operand. Denormalize the number	#
   6822 # according to the scale factor passed in d0. Then, round the 		#
   6823 # denormalized result.							#
   6824 # 	Set the FPSR_exc bits as appropriate but return the cc bits in	#
   6825 # d0 in case the caller doesn't want to save them (as is the case for	#
   6826 # fmove out).								#
   6827 # 	unf_res4() for fsglmul/fsgldiv forces the denorm to extended	#
   6828 # precision and the rounding mode to single.				#
   6829 #									#
   6830 #########################################################################
   6831 	global		unf_res
   6832 unf_res:
   6833 	mov.l		%d1, -(%sp)		# save rnd prec,mode on stack
   6834 
   6835 	btst		&0x7, FTEMP_EX(%a0)	# make "internal" format
   6836 	sne		FTEMP_SGN(%a0)
   6837 
   6838 	mov.w		FTEMP_EX(%a0), %d1	# extract exponent
   6839 	and.w		&0x7fff, %d1
   6840 	sub.w		%d0, %d1
   6841 	mov.w		%d1, FTEMP_EX(%a0)	# insert 16 bit exponent
   6842 
   6843 	mov.l		%a0, -(%sp)		# save operand ptr during calls
   6844 
   6845 	mov.l		0x4(%sp),%d0		# pass rnd prec.
   6846 	andi.w		&0x00c0,%d0
   6847 	lsr.w		&0x4,%d0
   6848 	bsr.l		_denorm			# denorm result
   6849 
   6850 	mov.l		(%sp),%a0
   6851 	mov.w		0x6(%sp),%d1		# load prec:mode into %d1
   6852 	andi.w		&0xc0,%d1		# extract rnd prec
   6853 	lsr.w		&0x4,%d1
   6854 	swap		%d1
   6855 	mov.w		0x6(%sp),%d1
   6856 	andi.w		&0x30,%d1
   6857 	lsr.w		&0x4,%d1
   6858 	bsr.l		_round			# round the denorm
   6859 
   6860 	mov.l		(%sp)+, %a0
   6861 
   6862 # result is now rounded properly. convert back to normal format
   6863 	bclr		&0x7, FTEMP_EX(%a0)	# clear sgn first; may have residue
   6864 	tst.b		FTEMP_SGN(%a0)		# is "internal result" sign set?
   6865 	beq.b		unf_res_chkifzero	# no; result is positive
   6866 	bset		&0x7, FTEMP_EX(%a0)	# set result sgn
   6867 	clr.b		FTEMP_SGN(%a0)		# clear temp sign
   6868 
   6869 # the number may have become zero after rounding. set ccodes accordingly.
   6870 unf_res_chkifzero:
   6871 	clr.l		%d0
   6872 	tst.l		FTEMP_HI(%a0)		# is value now a zero?
   6873 	bne.b		unf_res_cont		# no
   6874 	tst.l		FTEMP_LO(%a0)
   6875 	bne.b		unf_res_cont		# no
   6876 #	bset		&z_bit, FPSR_CC(%a6)	# yes; set zero ccode bit
   6877 	bset		&z_bit, %d0		# yes; set zero ccode bit
   6878 
   6879 unf_res_cont:
   6880 
   6881 #
   6882 # can inex1 also be set along with unfl and inex2???
   6883 #
   6884 # we know that underflow has occurred. aunfl should be set if INEX2 is also set.
   6885 #
   6886 	btst		&inex2_bit, FPSR_EXCEPT(%a6) # is INEX2 set?
   6887 	beq.b		unf_res_end		# no
   6888 	bset		&aunfl_bit, FPSR_AEXCEPT(%a6) # yes; set aunfl
   6889 
   6890 unf_res_end:
   6891 	add.l		&0x4, %sp		# clear stack
   6892 	rts
   6893 
   6894 # unf_res() for fsglmul() and fsgldiv().
   6895 	global		unf_res4
   6896 unf_res4:
   6897 	mov.l		%d1,-(%sp)		# save rnd prec,mode on stack
   6898 
   6899 	btst		&0x7,FTEMP_EX(%a0)	# make "internal" format
   6900 	sne		FTEMP_SGN(%a0)
   6901 
   6902 	mov.w		FTEMP_EX(%a0),%d1	# extract exponent
   6903 	and.w		&0x7fff,%d1
   6904 	sub.w		%d0,%d1
   6905 	mov.w		%d1,FTEMP_EX(%a0)	# insert 16 bit exponent
   6906 
   6907 	mov.l		%a0,-(%sp)		# save operand ptr during calls
   6908 
   6909 	clr.l		%d0			# force rnd prec = ext
   6910 	bsr.l		_denorm			# denorm result
   6911 
   6912 	mov.l		(%sp),%a0
   6913 	mov.w		&s_mode,%d1		# force rnd prec = sgl
   6914 	swap		%d1
   6915 	mov.w		0x6(%sp),%d1		# load rnd mode
   6916 	andi.w		&0x30,%d1		# extract rnd prec
   6917 	lsr.w		&0x4,%d1
   6918 	bsr.l		_round			# round the denorm
   6919 
   6920 	mov.l		(%sp)+,%a0
   6921 
   6922 # result is now rounded properly. convert back to normal format
   6923 	bclr		&0x7,FTEMP_EX(%a0)	# clear sgn first; may have residue
   6924 	tst.b		FTEMP_SGN(%a0)		# is "internal result" sign set?
   6925 	beq.b		unf_res4_chkifzero	# no; result is positive
   6926 	bset		&0x7,FTEMP_EX(%a0)	# set result sgn
   6927 	clr.b		FTEMP_SGN(%a0)		# clear temp sign
   6928 
   6929 # the number may have become zero after rounding. set ccodes accordingly.
   6930 unf_res4_chkifzero:
   6931 	clr.l		%d0
   6932 	tst.l		FTEMP_HI(%a0)		# is value now a zero?
   6933 	bne.b		unf_res4_cont		# no
   6934 	tst.l		FTEMP_LO(%a0)
   6935 	bne.b		unf_res4_cont		# no
   6936 #	bset		&z_bit,FPSR_CC(%a6)	# yes; set zero ccode bit
   6937 	bset		&z_bit,%d0		# yes; set zero ccode bit
   6938 
   6939 unf_res4_cont:
   6940 
   6941 #
   6942 # can inex1 also be set along with unfl and inex2???
   6943 #
   6944 # we know that underflow has occurred. aunfl should be set if INEX2 is also set.
   6945 #
   6946 	btst		&inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
   6947 	beq.b		unf_res4_end		# no
   6948 	bset		&aunfl_bit,FPSR_AEXCEPT(%a6) # yes; set aunfl
   6949 
   6950 unf_res4_end:
   6951 	add.l		&0x4,%sp		# clear stack
   6952 	rts
   6953 
   6954 #########################################################################
   6955 # XDEF ****************************************************************	#
   6956 #	ovf_res(): routine to produce the default overflow result of	#
   6957 #		   an overflowing number.				#
   6958 #	ovf_res2(): same as above but the rnd mode/prec are passed	#
   6959 #		    differently.					#
   6960 #									#
   6961 # XREF ****************************************************************	#
   6962 #	none								#
   6963 #									#
   6964 # INPUT ***************************************************************	#
   6965 #	d1.b 	= '-1' => (-); '0' => (+)				#
   6966 #   ovf_res():								#
   6967 #	d0 	= rnd mode/prec						#
   6968 #   ovf_res2():								#
   6969 #	hi(d0) 	= rnd prec						#
   6970 #	lo(d0)	= rnd mode						#
   6971 #									#
   6972 # OUTPUT **************************************************************	#
   6973 #	a0   	= points to extended precision result			#
   6974 #	d0.b 	= condition code bits					#
   6975 #									#
   6976 # ALGORITHM ***********************************************************	#
   6977 #	The default overflow result can be determined by the sign of	#
   6978 # the result and the rounding mode/prec in effect. These bits are	#
   6979 # concatenated together to create an index into the default result 	#
   6980 # table. A pointer to the correct result is returned in a0. The		#
   6981 # resulting condition codes are returned in d0 in case the caller 	#
   6982 # doesn't want FPSR_cc altered (as is the case for fmove out).		#
   6983 #									#
   6984 #########################################################################
   6985 
   6986 	global		ovf_res
   6987 ovf_res:
   6988 	andi.w		&0x10,%d1		# keep result sign
   6989 	lsr.b		&0x4,%d0		# shift prec/mode
   6990 	or.b		%d0,%d1			# concat the two
   6991 	mov.w		%d1,%d0			# make a copy
   6992 	lsl.b		&0x1,%d1		# multiply d1 by 2
   6993 	bra.b		ovf_res_load
   6994 
   6995 	global		ovf_res2
   6996 ovf_res2:
   6997 	and.w		&0x10, %d1		# keep result sign
   6998 	or.b		%d0, %d1		# insert rnd mode
   6999 	swap		%d0
   7000 	or.b		%d0, %d1		# insert rnd prec
   7001 	mov.w		%d1, %d0		# make a copy
   7002 	lsl.b		&0x1, %d1		# shift left by 1
   7003 
   7004 #
   7005 # use the rounding mode, precision, and result sign as in index into the
   7006 # two tables below to fetch the default result and the result ccodes.
   7007 #
   7008 ovf_res_load:
   7009 	mov.b		(tbl_ovfl_cc.b,%pc,%d0.w*1), %d0 # fetch result ccodes
   7010 	lea		(tbl_ovfl_result.b,%pc,%d1.w*8), %a0 # return result ptr
   7011 
   7012 	rts
   7013 
   7014 tbl_ovfl_cc:
   7015 	byte		0x2, 0x0, 0x0, 0x2
   7016 	byte		0x2, 0x0, 0x0, 0x2
   7017 	byte		0x2, 0x0, 0x0, 0x2
   7018 	byte		0x0, 0x0, 0x0, 0x0
   7019 	byte		0x2+0x8, 0x8, 0x2+0x8, 0x8
   7020 	byte		0x2+0x8, 0x8, 0x2+0x8, 0x8
   7021 	byte		0x2+0x8, 0x8, 0x2+0x8, 0x8
   7022 
   7023 tbl_ovfl_result:
   7024 	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
   7025 	long		0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RZ
   7026 	long		0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RM
   7027 	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
   7028 
   7029 	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
   7030 	long		0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RZ
   7031 	long		0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RM
   7032 	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
   7033 
   7034 	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
   7035 	long		0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RZ
   7036 	long		0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RM
   7037 	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
   7038 
   7039 	long		0x00000000,0x00000000,0x00000000,0x00000000
   7040 	long		0x00000000,0x00000000,0x00000000,0x00000000
   7041 	long		0x00000000,0x00000000,0x00000000,0x00000000
   7042 	long		0x00000000,0x00000000,0x00000000,0x00000000
   7043 
   7044 	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
   7045 	long		0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RZ
   7046 	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
   7047 	long		0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RP
   7048 
   7049 	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
   7050 	long		0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RZ
   7051 	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
   7052 	long		0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RP
   7053 
   7054 	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
   7055 	long		0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RZ
   7056 	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
   7057 	long		0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RP
   7058 
   7059 #########################################################################
   7060 # XDEF ****************************************************************	#
   7061 # 	fout(): move from fp register to memory or data register	#
   7062 #									#
   7063 # XREF ****************************************************************	#
   7064 #	_round() - needed to create EXOP for sgl/dbl precision		#
   7065 #	norm() - needed to create EXOP for extended precision		#
   7066 #	ovf_res() - create default overflow result for sgl/dbl precision#
   7067 #	unf_res() - create default underflow result for sgl/dbl prec.	#
   7068 #	dst_dbl() - create rounded dbl precision result.		#
   7069 #	dst_sgl() - create rounded sgl precision result.		#
   7070 #	fetch_dreg() - fetch dynamic k-factor reg for packed.		#
   7071 #	bindec() - convert FP binary number to packed number.		#
   7072 #	_mem_write() - write data to memory.				#
   7073 #	_mem_write2() - write data to memory unless supv mode -(a7) exc.#
   7074 #	_dmem_write_{byte,word,long}() - write data to memory.		#
   7075 #	store_dreg_{b,w,l}() - store data to data register file.	#
   7076 #	facc_out_{b,w,l,d,x}() - data access error occurred.		#
   7077 #									#
   7078 # INPUT ***************************************************************	#
   7079 #	a0 = pointer to extended precision source operand		#
   7080 #	d0 = round prec,mode						#
   7081 # 									#
   7082 # OUTPUT **************************************************************	#
   7083 #	fp0 : intermediate underflow or overflow result if		#
   7084 #	      OVFL/UNFL occurred for a sgl or dbl operand		#
   7085 #									#
   7086 # ALGORITHM ***********************************************************	#
   7087 #	This routine is accessed by many handlers that need to do an	#
   7088 # opclass three move of an operand out to memory.			#
   7089 #	Decode an fmove out (opclass 3) instruction to determine if	#
   7090 # it's b,w,l,s,d,x, or p in size. b,w,l can be stored to either a data	#
   7091 # register or memory. The algorithm uses a standard "fmove" to create	#
   7092 # the rounded result. Also, since exceptions are disabled, this also	#
   7093 # create the correct OPERR default result if appropriate.		#
   7094 #	For sgl or dbl precision, overflow or underflow can occur. If	#
   7095 # either occurs and is enabled, the EXOP.				#
   7096 #	For extended precision, the stacked <ea> must be fixed along	#
   7097 # w/ the address index register as appropriate w/ _calc_ea_fout(). If	#
   7098 # the source is a denorm and if underflow is enabled, an EXOP must be	#
   7099 # created.								#
   7100 # 	For packed, the k-factor must be fetched from the instruction	#
   7101 # word or a data register. The <ea> must be fixed as w/ extended 	#
   7102 # precision. Then, bindec() is called to create the appropriate 	#
   7103 # packed result.							#
   7104 #	If at any time an access error is flagged by one of the move-	#
   7105 # to-memory routines, then a special exit must be made so that the	#
   7106 # access error can be handled properly.					#
   7107 #									#
   7108 #########################################################################
   7109 
   7110 	global		fout
   7111 fout:
   7112 	bfextu		EXC_CMDREG(%a6){&3:&3},%d1 # extract dst fmt
   7113 	mov.w		(tbl_fout.b,%pc,%d1.w*2),%a1 # use as index
   7114 	jmp		(tbl_fout.b,%pc,%a1)	# jump to routine
   7115 
   7116 	swbeg		&0x8
   7117 tbl_fout:
   7118 	short		fout_long	-	tbl_fout
   7119 	short		fout_sgl	-	tbl_fout
   7120 	short		fout_ext	-	tbl_fout
   7121 	short		fout_pack	-	tbl_fout
   7122 	short		fout_word	-	tbl_fout
   7123 	short		fout_dbl	-	tbl_fout
   7124 	short		fout_byte	-	tbl_fout
   7125 	short		fout_pack	-	tbl_fout
   7126 
   7127 #################################################################
   7128 # fmove.b out ###################################################
   7129 #################################################################
   7130 
   7131 # Only "Unimplemented Data Type" exceptions enter here. The operand
   7132 # is either a DENORM or a NORM.
   7133 fout_byte:
   7134 	tst.b		STAG(%a6)		# is operand normalized?
   7135 	bne.b		fout_byte_denorm	# no
   7136 
   7137 	fmovm.x		SRC(%a0),&0x80		# load value
   7138 
   7139 fout_byte_norm:
   7140 	fmov.l		%d0,%fpcr		# insert rnd prec,mode
   7141 
   7142 	fmov.b		%fp0,%d0		# exec move out w/ correct rnd mode
   7143 
   7144 	fmov.l		&0x0,%fpcr		# clear FPCR
   7145 	fmov.l		%fpsr,%d1		# fetch FPSR
   7146 	or.w		%d1,2+USER_FPSR(%a6)	# save new exc,accrued bits
   7147 
   7148 	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
   7149 	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
   7150 	beq.b		fout_byte_dn		# must save to integer regfile
   7151 
   7152 	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
   7153 	bsr.l		_dmem_write_byte	# write byte
   7154 
   7155 	tst.l		%d1			# did dstore fail?
   7156 	bne.l		facc_out_b		# yes
   7157 
   7158 	rts
   7159 
   7160 fout_byte_dn:
   7161 	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
   7162 	andi.w		&0x7,%d1
   7163 	bsr.l		store_dreg_b
   7164 	rts
   7165 
   7166 fout_byte_denorm:
   7167 	mov.l		SRC_EX(%a0),%d1
   7168 	andi.l		&0x80000000,%d1		# keep DENORM sign
   7169 	ori.l		&0x00800000,%d1		# make smallest sgl
   7170 	fmov.s		%d1,%fp0
   7171 	bra.b		fout_byte_norm
   7172 
   7173 #################################################################
   7174 # fmove.w out ###################################################
   7175 #################################################################
   7176 
   7177 # Only "Unimplemented Data Type" exceptions enter here. The operand
   7178 # is either a DENORM or a NORM.
   7179 fout_word:
   7180 	tst.b		STAG(%a6)		# is operand normalized?
   7181 	bne.b		fout_word_denorm	# no
   7182 
   7183 	fmovm.x		SRC(%a0),&0x80		# load value
   7184 
   7185 fout_word_norm:
   7186 	fmov.l		%d0,%fpcr		# insert rnd prec:mode
   7187 
   7188 	fmov.w		%fp0,%d0		# exec move out w/ correct rnd mode
   7189 
   7190 	fmov.l		&0x0,%fpcr		# clear FPCR
   7191 	fmov.l		%fpsr,%d1		# fetch FPSR
   7192 	or.w		%d1,2+USER_FPSR(%a6)	# save new exc,accrued bits
   7193 
   7194 	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
   7195 	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
   7196 	beq.b		fout_word_dn		# must save to integer regfile
   7197 
   7198 	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
   7199 	bsr.l		_dmem_write_word	# write word
   7200 
   7201 	tst.l		%d1			# did dstore fail?
   7202 	bne.l		facc_out_w		# yes
   7203 
   7204 	rts
   7205 
   7206 fout_word_dn:
   7207 	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
   7208 	andi.w		&0x7,%d1
   7209 	bsr.l		store_dreg_w
   7210 	rts
   7211 
   7212 fout_word_denorm:
   7213 	mov.l		SRC_EX(%a0),%d1
   7214 	andi.l		&0x80000000,%d1		# keep DENORM sign
   7215 	ori.l		&0x00800000,%d1		# make smallest sgl
   7216 	fmov.s		%d1,%fp0
   7217 	bra.b		fout_word_norm
   7218 
   7219 #################################################################
   7220 # fmove.l out ###################################################
   7221 #################################################################
   7222 
   7223 # Only "Unimplemented Data Type" exceptions enter here. The operand
   7224 # is either a DENORM or a NORM.
   7225 fout_long:
   7226 	tst.b		STAG(%a6)		# is operand normalized?
   7227 	bne.b		fout_long_denorm	# no
   7228 
   7229 	fmovm.x		SRC(%a0),&0x80		# load value
   7230 
   7231 fout_long_norm:
   7232 	fmov.l		%d0,%fpcr		# insert rnd prec:mode
   7233 
   7234 	fmov.l		%fp0,%d0		# exec move out w/ correct rnd mode
   7235 
   7236 	fmov.l		&0x0,%fpcr		# clear FPCR
   7237 	fmov.l		%fpsr,%d1		# fetch FPSR
   7238 	or.w		%d1,2+USER_FPSR(%a6)	# save new exc,accrued bits
   7239 
   7240 fout_long_write:
   7241 	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
   7242 	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
   7243 	beq.b		fout_long_dn		# must save to integer regfile
   7244 
   7245 	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
   7246 	bsr.l		_dmem_write_long	# write long
   7247 
   7248 	tst.l		%d1			# did dstore fail?
   7249 	bne.l		facc_out_l		# yes
   7250 
   7251 	rts
   7252 
   7253 fout_long_dn:
   7254 	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
   7255 	andi.w		&0x7,%d1
   7256 	bsr.l		store_dreg_l
   7257 	rts
   7258 
   7259 fout_long_denorm:
   7260 	mov.l		SRC_EX(%a0),%d1
   7261 	andi.l		&0x80000000,%d1		# keep DENORM sign
   7262 	ori.l		&0x00800000,%d1		# make smallest sgl
   7263 	fmov.s		%d1,%fp0
   7264 	bra.b		fout_long_norm
   7265 
   7266 #################################################################
   7267 # fmove.x out ###################################################
   7268 #################################################################
   7269 
   7270 # Only "Unimplemented Data Type" exceptions enter here. The operand
   7271 # is either a DENORM or a NORM.
   7272 # The DENORM causes an Underflow exception.
   7273 fout_ext:
   7274 
   7275 # we copy the extended precision result to FP_SCR0 so that the reserved
   7276 # 16-bit field gets zeroed. we do this since we promise not to disturb
   7277 # what's at SRC(a0).
   7278 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
   7279 	clr.w		2+FP_SCR0_EX(%a6)	# clear reserved field
   7280 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
   7281 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
   7282 
   7283 	fmovm.x		SRC(%a0),&0x80		# return result
   7284 
   7285 	bsr.l		_calc_ea_fout		# fix stacked <ea>
   7286 
   7287 	mov.l		%a0,%a1			# pass: dst addr
   7288 	lea		FP_SCR0(%a6),%a0	# pass: src addr
   7289 	mov.l		&0xc,%d0		# pass: opsize is 12 bytes
   7290 
   7291 # we must not yet write the extended precision data to the stack
   7292 # in the pre-decrement case from supervisor mode or else we'll corrupt
   7293 # the stack frame. so, leave it in FP_SRC for now and deal with it later...
   7294 	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
   7295 	beq.b		fout_ext_a7
   7296 
   7297 	bsr.l		_dmem_write		# write ext prec number to memory
   7298 
   7299 	tst.l		%d1			# did dstore fail?
   7300 	bne.w		fout_ext_err		# yes
   7301 
   7302 	tst.b		STAG(%a6)		# is operand normalized?
   7303 	bne.b		fout_ext_denorm		# no
   7304 	rts
   7305 
   7306 # the number is a DENORM. must set the underflow exception bit
   7307 fout_ext_denorm:
   7308 	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set underflow exc bit
   7309 
   7310 	mov.b		FPCR_ENABLE(%a6),%d0
   7311 	andi.b		&0x0a,%d0		# is UNFL or INEX enabled?
   7312 	bne.b		fout_ext_exc		# yes
   7313 	rts
   7314 
   7315 # we don't want to do the write if the exception occurred in supervisor mode
   7316 # so _mem_write2() handles this for us.
   7317 fout_ext_a7:
   7318 	bsr.l		_mem_write2		# write ext prec number to memory
   7319 
   7320 	tst.l		%d1			# did dstore fail?
   7321 	bne.w		fout_ext_err		# yes
   7322 
   7323 	tst.b		STAG(%a6)		# is operand normalized?
   7324 	bne.b		fout_ext_denorm		# no
   7325 	rts
   7326 
   7327 fout_ext_exc:
   7328 	lea		FP_SCR0(%a6),%a0
   7329 	bsr.l		norm			# normalize the mantissa
   7330 	neg.w		%d0			# new exp = -(shft amt)
   7331 	andi.w		&0x7fff,%d0
   7332 	andi.w		&0x8000,FP_SCR0_EX(%a6)	# keep only old sign
   7333 	or.w		%d0,FP_SCR0_EX(%a6)	# insert new exponent
   7334 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
   7335 	rts
   7336 
   7337 fout_ext_err:
   7338 	mov.l		EXC_A6(%a6),(%a6)	# fix stacked a6
   7339 	bra.l		facc_out_x
   7340 
   7341 #########################################################################
   7342 # fmove.s out ###########################################################
   7343 #########################################################################
   7344 fout_sgl:
   7345 	andi.b		&0x30,%d0		# clear rnd prec
   7346 	ori.b		&s_mode*0x10,%d0	# insert sgl prec
   7347 	mov.l		%d0,L_SCR3(%a6)		# save rnd prec,mode on stack
   7348 
   7349 #
   7350 # operand is a normalized number. first, we check to see if the move out
   7351 # would cause either an underflow or overflow. these cases are handled
   7352 # separately. otherwise, set the FPCR to the proper rounding mode and
   7353 # execute the move.
   7354 #
   7355 	mov.w		SRC_EX(%a0),%d0		# extract exponent
   7356 	andi.w		&0x7fff,%d0		# strip sign
   7357 
   7358 	cmpi.w		%d0,&SGL_HI		# will operand overflow?
   7359 	bgt.w		fout_sgl_ovfl		# yes; go handle OVFL
   7360 	beq.w		fout_sgl_may_ovfl	# maybe; go handle possible OVFL
   7361 	cmpi.w		%d0,&SGL_LO		# will operand underflow?
   7362 	blt.w		fout_sgl_unfl		# yes; go handle underflow
   7363 
   7364 #
   7365 # NORMs(in range) can be stored out by a simple "fmov.s"
   7366 # Unnormalized inputs can come through this point.
   7367 #
   7368 fout_sgl_exg:
   7369 	fmovm.x		SRC(%a0),&0x80		# fetch fop from stack
   7370 
   7371 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   7372 	fmov.l		&0x0,%fpsr		# clear FPSR
   7373 
   7374 	fmov.s		%fp0,%d0		# store does convert and round
   7375 
   7376 	fmov.l		&0x0,%fpcr		# clear FPCR
   7377 	fmov.l		%fpsr,%d1		# save FPSR
   7378 
   7379 	or.w		%d1,2+USER_FPSR(%a6) 	# set possible inex2/ainex
   7380 
   7381 fout_sgl_exg_write:
   7382 	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
   7383 	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
   7384 	beq.b		fout_sgl_exg_write_dn	# must save to integer regfile
   7385 
   7386 	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
   7387 	bsr.l		_dmem_write_long	# write long
   7388 
   7389 	tst.l		%d1			# did dstore fail?
   7390 	bne.l		facc_out_l		# yes
   7391 
   7392 	rts
   7393 
   7394 fout_sgl_exg_write_dn:
   7395 	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
   7396 	andi.w		&0x7,%d1
   7397 	bsr.l		store_dreg_l
   7398 	rts
   7399 
   7400 #
   7401 # here, we know that the operand would UNFL if moved out to single prec,
   7402 # so, denorm and round and then use generic store single routine to
   7403 # write the value to memory.
   7404 #
   7405 fout_sgl_unfl:
   7406 	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set UNFL
   7407 
   7408 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
   7409 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
   7410 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
   7411 	mov.l		%a0,-(%sp)
   7412 
   7413 	clr.l		%d0			# pass: S.F. = 0
   7414 
   7415 	cmpi.b		STAG(%a6),&DENORM	# fetch src optype tag
   7416 	bne.b		fout_sgl_unfl_cont	# let DENORMs fall through
   7417 
   7418 	lea		FP_SCR0(%a6),%a0
   7419 	bsr.l		norm			# normalize the DENORM
   7420 
   7421 fout_sgl_unfl_cont:
   7422 	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
   7423 	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
   7424 	bsr.l		unf_res			# calc default underflow result
   7425 
   7426 	lea		FP_SCR0(%a6),%a0	# pass: ptr to fop
   7427 	bsr.l		dst_sgl			# convert to single prec
   7428 
   7429 	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
   7430 	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
   7431 	beq.b		fout_sgl_unfl_dn	# must save to integer regfile
   7432 
   7433 	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
   7434 	bsr.l		_dmem_write_long	# write long
   7435 
   7436 	tst.l		%d1			# did dstore fail?
   7437 	bne.l		facc_out_l		# yes
   7438 
   7439 	bra.b		fout_sgl_unfl_chkexc
   7440 
   7441 fout_sgl_unfl_dn:
   7442 	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
   7443 	andi.w		&0x7,%d1
   7444 	bsr.l		store_dreg_l
   7445 
   7446 fout_sgl_unfl_chkexc:
   7447 	mov.b		FPCR_ENABLE(%a6),%d1
   7448 	andi.b		&0x0a,%d1		# is UNFL or INEX enabled?
   7449 	bne.w		fout_sd_exc_unfl	# yes
   7450 	addq.l		&0x4,%sp
   7451 	rts
   7452 
   7453 #
   7454 # it's definitely an overflow so call ovf_res to get the correct answer
   7455 #
   7456 fout_sgl_ovfl:
   7457 	tst.b		3+SRC_HI(%a0)		# is result inexact?
   7458 	bne.b		fout_sgl_ovfl_inex2
   7459 	tst.l		SRC_LO(%a0)		# is result inexact?
   7460 	bne.b		fout_sgl_ovfl_inex2
   7461 	ori.w		&ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
   7462 	bra.b		fout_sgl_ovfl_cont
   7463 fout_sgl_ovfl_inex2:
   7464 	ori.w		&ovfinx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex/inex2
   7465 
   7466 fout_sgl_ovfl_cont:
   7467 	mov.l		%a0,-(%sp)
   7468 
   7469 # call ovf_res() w/ sgl prec and the correct rnd mode to create the default
   7470 # overflow result. DON'T save the returned ccodes from ovf_res() since
   7471 # fmove out doesn't alter them.
   7472 	tst.b		SRC_EX(%a0)		# is operand negative?
   7473 	smi		%d1			# set if so
   7474 	mov.l		L_SCR3(%a6),%d0		# pass: sgl prec,rnd mode
   7475 	bsr.l		ovf_res			# calc OVFL result
   7476 	fmovm.x		(%a0),&0x80		# load default overflow result
   7477 	fmov.s		%fp0,%d0		# store to single
   7478 
   7479 	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
   7480 	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
   7481 	beq.b		fout_sgl_ovfl_dn	# must save to integer regfile
   7482 
   7483 	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
   7484 	bsr.l		_dmem_write_long	# write long
   7485 
   7486 	tst.l		%d1			# did dstore fail?
   7487 	bne.l		facc_out_l		# yes
   7488 
   7489 	bra.b		fout_sgl_ovfl_chkexc
   7490 
   7491 fout_sgl_ovfl_dn:
   7492 	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
   7493 	andi.w		&0x7,%d1
   7494 	bsr.l		store_dreg_l
   7495 
   7496 fout_sgl_ovfl_chkexc:
   7497 	mov.b		FPCR_ENABLE(%a6),%d1
   7498 	andi.b		&0x0a,%d1		# is UNFL or INEX enabled?
   7499 	bne.w		fout_sd_exc_ovfl	# yes
   7500 	addq.l		&0x4,%sp
   7501 	rts
   7502 
   7503 #
   7504 # move out MAY overflow:
   7505 # (1) force the exp to 0x3fff
   7506 # (2) do a move w/ appropriate rnd mode
   7507 # (3) if exp still equals zero, then insert original exponent
   7508 #	for the correct result.
   7509 #     if exp now equals one, then it overflowed so call ovf_res.
   7510 #
   7511 fout_sgl_may_ovfl:
   7512 	mov.w		SRC_EX(%a0),%d1		# fetch current sign
   7513 	andi.w		&0x8000,%d1		# keep it,clear exp
   7514 	ori.w		&0x3fff,%d1		# insert exp = 0
   7515 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert scaled exp
   7516 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6) # copy hi(man)
   7517 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6) # copy lo(man)
   7518 
   7519 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   7520 
   7521 	fmov.x		FP_SCR0(%a6),%fp0	# force fop to be rounded
   7522 	fmov.l		&0x0,%fpcr		# clear FPCR
   7523 
   7524 	fabs.x		%fp0			# need absolute value
   7525 	fcmp.b		%fp0,&0x2		# did exponent increase?
   7526 	fblt.w		fout_sgl_exg		# no; go finish NORM
   7527 	bra.w		fout_sgl_ovfl		# yes; go handle overflow
   7528 
   7529 ################
   7530 
   7531 fout_sd_exc_unfl:
   7532 	mov.l		(%sp)+,%a0
   7533 
   7534 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
   7535 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
   7536 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
   7537 
   7538 	cmpi.b		STAG(%a6),&DENORM	# was src a DENORM?
   7539 	bne.b		fout_sd_exc_cont	# no
   7540 
   7541 	lea		FP_SCR0(%a6),%a0
   7542 	bsr.l		norm
   7543 	neg.l		%d0
   7544 	andi.w		&0x7fff,%d0
   7545 	bfins		%d0,FP_SCR0_EX(%a6){&1:&15}
   7546 	bra.b		fout_sd_exc_cont
   7547 
   7548 fout_sd_exc:
   7549 fout_sd_exc_ovfl:
   7550 	mov.l		(%sp)+,%a0		# restore a0
   7551 
   7552 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
   7553 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
   7554 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
   7555 
   7556 fout_sd_exc_cont:
   7557 	bclr		&0x7,FP_SCR0_EX(%a6)	# clear sign bit
   7558 	sne.b		2+FP_SCR0_EX(%a6)	# set internal sign bit
   7559 	lea		FP_SCR0(%a6),%a0	# pass: ptr to DENORM
   7560 
   7561 	mov.b		3+L_SCR3(%a6),%d1
   7562 	lsr.b		&0x4,%d1
   7563 	andi.w		&0x0c,%d1
   7564 	swap		%d1
   7565 	mov.b		3+L_SCR3(%a6),%d1
   7566 	lsr.b		&0x4,%d1
   7567 	andi.w		&0x03,%d1
   7568 	clr.l		%d0			# pass: zero g,r,s
   7569 	bsr.l		_round			# round the DENORM
   7570 
   7571 	tst.b		2+FP_SCR0_EX(%a6)	# is EXOP negative?
   7572 	beq.b		fout_sd_exc_done	# no
   7573 	bset		&0x7,FP_SCR0_EX(%a6)	# yes
   7574 
   7575 fout_sd_exc_done:
   7576 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
   7577 	rts
   7578 
   7579 #################################################################
   7580 # fmove.d out ###################################################
   7581 #################################################################
   7582 fout_dbl:
   7583 	andi.b		&0x30,%d0		# clear rnd prec
   7584 	ori.b		&d_mode*0x10,%d0	# insert dbl prec
   7585 	mov.l		%d0,L_SCR3(%a6)		# save rnd prec,mode on stack
   7586 
   7587 #
   7588 # operand is a normalized number. first, we check to see if the move out
   7589 # would cause either an underflow or overflow. these cases are handled
   7590 # separately. otherwise, set the FPCR to the proper rounding mode and
   7591 # execute the move.
   7592 #
   7593 	mov.w		SRC_EX(%a0),%d0		# extract exponent
   7594 	andi.w		&0x7fff,%d0		# strip sign
   7595 
   7596 	cmpi.w		%d0,&DBL_HI		# will operand overflow?
   7597 	bgt.w		fout_dbl_ovfl		# yes; go handle OVFL
   7598 	beq.w		fout_dbl_may_ovfl	# maybe; go handle possible OVFL
   7599 	cmpi.w		%d0,&DBL_LO		# will operand underflow?
   7600 	blt.w		fout_dbl_unfl		# yes; go handle underflow
   7601 
   7602 #
   7603 # NORMs(in range) can be stored out by a simple "fmov.d"
   7604 # Unnormalized inputs can come through this point.
   7605 #
   7606 fout_dbl_exg:
   7607 	fmovm.x		SRC(%a0),&0x80		# fetch fop from stack
   7608 
   7609 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   7610 	fmov.l		&0x0,%fpsr		# clear FPSR
   7611 
   7612 	fmov.d		%fp0,L_SCR1(%a6)	# store does convert and round
   7613 
   7614 	fmov.l		&0x0,%fpcr		# clear FPCR
   7615 	fmov.l		%fpsr,%d0		# save FPSR
   7616 
   7617 	or.w		%d0,2+USER_FPSR(%a6) 	# set possible inex2/ainex
   7618 
   7619 	mov.l		EXC_EA(%a6),%a1		# pass: dst addr
   7620 	lea		L_SCR1(%a6),%a0		# pass: src addr
   7621 	movq.l		&0x8,%d0		# pass: opsize is 8 bytes
   7622 	bsr.l		_dmem_write		# store dbl fop to memory
   7623 
   7624 	tst.l		%d1			# did dstore fail?
   7625 	bne.l		facc_out_d		# yes
   7626 
   7627 	rts					# no; so we're finished
   7628 
   7629 #
   7630 # here, we know that the operand would UNFL if moved out to double prec,
   7631 # so, denorm and round and then use generic store double routine to
   7632 # write the value to memory.
   7633 #
   7634 fout_dbl_unfl:
   7635 	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set UNFL
   7636 
   7637 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
   7638 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
   7639 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
   7640 	mov.l		%a0,-(%sp)
   7641 
   7642 	clr.l		%d0			# pass: S.F. = 0
   7643 
   7644 	cmpi.b		STAG(%a6),&DENORM	# fetch src optype tag
   7645 	bne.b		fout_dbl_unfl_cont	# let DENORMs fall through
   7646 
   7647 	lea		FP_SCR0(%a6),%a0
   7648 	bsr.l		norm			# normalize the DENORM
   7649 
   7650 fout_dbl_unfl_cont:
   7651 	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
   7652 	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
   7653 	bsr.l		unf_res			# calc default underflow result
   7654 
   7655 	lea		FP_SCR0(%a6),%a0	# pass: ptr to fop
   7656 	bsr.l		dst_dbl			# convert to single prec
   7657 	mov.l		%d0,L_SCR1(%a6)
   7658 	mov.l		%d1,L_SCR2(%a6)
   7659 
   7660 	mov.l		EXC_EA(%a6),%a1		# pass: dst addr
   7661 	lea		L_SCR1(%a6),%a0		# pass: src addr
   7662 	movq.l		&0x8,%d0		# pass: opsize is 8 bytes
   7663 	bsr.l		_dmem_write		# store dbl fop to memory
   7664 
   7665 	tst.l		%d1			# did dstore fail?
   7666 	bne.l		facc_out_d		# yes
   7667 
   7668 	mov.b		FPCR_ENABLE(%a6),%d1
   7669 	andi.b		&0x0a,%d1		# is UNFL or INEX enabled?
   7670 	bne.w		fout_sd_exc_unfl	# yes
   7671 	addq.l		&0x4,%sp
   7672 	rts
   7673 
   7674 #
   7675 # it's definitely an overflow so call ovf_res to get the correct answer
   7676 #
   7677 fout_dbl_ovfl:
   7678 	mov.w		2+SRC_LO(%a0),%d0
   7679 	andi.w		&0x7ff,%d0
   7680 	bne.b		fout_dbl_ovfl_inex2
   7681 
   7682 	ori.w		&ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
   7683 	bra.b		fout_dbl_ovfl_cont
   7684 fout_dbl_ovfl_inex2:
   7685 	ori.w		&ovfinx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex/inex2
   7686 
   7687 fout_dbl_ovfl_cont:
   7688 	mov.l		%a0,-(%sp)
   7689 
   7690 # call ovf_res() w/ dbl prec and the correct rnd mode to create the default
   7691 # overflow result. DON'T save the returned ccodes from ovf_res() since
   7692 # fmove out doesn't alter them.
   7693 	tst.b		SRC_EX(%a0)		# is operand negative?
   7694 	smi		%d1			# set if so
   7695 	mov.l		L_SCR3(%a6),%d0		# pass: dbl prec,rnd mode
   7696 	bsr.l		ovf_res			# calc OVFL result
   7697 	fmovm.x		(%a0),&0x80		# load default overflow result
   7698 	fmov.d		%fp0,L_SCR1(%a6)	# store to double
   7699 
   7700 	mov.l		EXC_EA(%a6),%a1		# pass: dst addr
   7701 	lea		L_SCR1(%a6),%a0		# pass: src addr
   7702 	movq.l		&0x8,%d0		# pass: opsize is 8 bytes
   7703 	bsr.l		_dmem_write		# store dbl fop to memory
   7704 
   7705 	tst.l		%d1			# did dstore fail?
   7706 	bne.l		facc_out_d		# yes
   7707 
   7708 	mov.b		FPCR_ENABLE(%a6),%d1
   7709 	andi.b		&0x0a,%d1		# is UNFL or INEX enabled?
   7710 	bne.w		fout_sd_exc_ovfl	# yes
   7711 	addq.l		&0x4,%sp
   7712 	rts
   7713 
   7714 #
   7715 # move out MAY overflow:
   7716 # (1) force the exp to 0x3fff
   7717 # (2) do a move w/ appropriate rnd mode
   7718 # (3) if exp still equals zero, then insert original exponent
   7719 #	for the correct result.
   7720 #     if exp now equals one, then it overflowed so call ovf_res.
   7721 #
   7722 fout_dbl_may_ovfl:
   7723 	mov.w		SRC_EX(%a0),%d1		# fetch current sign
   7724 	andi.w		&0x8000,%d1		# keep it,clear exp
   7725 	ori.w		&0x3fff,%d1		# insert exp = 0
   7726 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert scaled exp
   7727 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6) # copy hi(man)
   7728 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6) # copy lo(man)
   7729 
   7730 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   7731 
   7732 	fmov.x		FP_SCR0(%a6),%fp0	# force fop to be rounded
   7733 	fmov.l		&0x0,%fpcr		# clear FPCR
   7734 
   7735 	fabs.x		%fp0			# need absolute value
   7736 	fcmp.b		%fp0,&0x2		# did exponent increase?
   7737 	fblt.w		fout_dbl_exg		# no; go finish NORM
   7738 	bra.w		fout_dbl_ovfl		# yes; go handle overflow
   7739 
   7740 #########################################################################
   7741 # XDEF ****************************************************************	#
   7742 # 	dst_dbl(): create double precision value from extended prec.	#
   7743 #									#
   7744 # XREF ****************************************************************	#
   7745 #	None								#
   7746 #									#
   7747 # INPUT ***************************************************************	#
   7748 #	a0 = pointer to source operand in extended precision		#
   7749 # 									#
   7750 # OUTPUT **************************************************************	#
   7751 #	d0 = hi(double precision result)				#
   7752 #	d1 = lo(double precision result)				#
   7753 #									#
   7754 # ALGORITHM ***********************************************************	#
   7755 #									#
   7756 #  Changes extended precision to double precision.			#
   7757 #  Note: no attempt is made to round the extended value to double.	#
   7758 #	dbl_sign = ext_sign						#
   7759 #	dbl_exp = ext_exp - $3fff(ext bias) + $7ff(dbl bias)		#
   7760 #	get rid of ext integer bit					#
   7761 #	dbl_mant = ext_mant{62:12}					#
   7762 #									#
   7763 #	    	---------------   ---------------    ---------------	#
   7764 #  extended ->  |s|    exp    |   |1| ms mant   |    | ls mant     |	#
   7765 #	    	---------------   ---------------    ---------------	#
   7766 #	   	 95	    64    63 62	      32      31     11	  0	#
   7767 #				     |			     |		#
   7768 #				     |			     |		#
   7769 #				     |			     |		#
   7770 #		 	             v   		     v		#
   7771 #	    		      ---------------   ---------------		#
   7772 #  double   ->  	      |s|exp| mant  |   |  mant       |		#
   7773 #	    		      ---------------   ---------------		#
   7774 #	   	 	      63     51   32   31	       0	#
   7775 #									#
   7776 #########################################################################
   7777 
   7778 dst_dbl:
   7779 	clr.l		%d0			# clear d0
   7780 	mov.w		FTEMP_EX(%a0),%d0	# get exponent
   7781 	subi.w		&EXT_BIAS,%d0		# subtract extended precision bias
   7782 	addi.w		&DBL_BIAS,%d0		# add double precision bias
   7783 	tst.b		FTEMP_HI(%a0)		# is number a denorm?
   7784 	bmi.b		dst_get_dupper		# no
   7785 	subq.w		&0x1,%d0		# yes; denorm bias = DBL_BIAS - 1
   7786 dst_get_dupper:
   7787 	swap		%d0			# d0 now in upper word
   7788 	lsl.l		&0x4,%d0		# d0 in proper place for dbl prec exp
   7789 	tst.b		FTEMP_EX(%a0)		# test sign
   7790 	bpl.b		dst_get_dman		# if postive, go process mantissa
   7791 	bset		&0x1f,%d0		# if negative, set sign
   7792 dst_get_dman:
   7793 	mov.l		FTEMP_HI(%a0),%d1	# get ms mantissa
   7794 	bfextu		%d1{&1:&20},%d1		# get upper 20 bits of ms
   7795 	or.l		%d1,%d0			# put these bits in ms word of double
   7796 	mov.l		%d0,L_SCR1(%a6)		# put the new exp back on the stack
   7797 	mov.l		FTEMP_HI(%a0),%d1	# get ms mantissa
   7798 	mov.l		&21,%d0			# load shift count
   7799 	lsl.l		%d0,%d1			# put lower 11 bits in upper bits
   7800 	mov.l		%d1,L_SCR2(%a6)		# build lower lword in memory
   7801 	mov.l		FTEMP_LO(%a0),%d1	# get ls mantissa
   7802 	bfextu		%d1{&0:&21},%d0		# get ls 21 bits of double
   7803 	mov.l		L_SCR2(%a6),%d1
   7804 	or.l		%d0,%d1			# put them in double result
   7805 	mov.l		L_SCR1(%a6),%d0
   7806 	rts
   7807 
   7808 #########################################################################
   7809 # XDEF ****************************************************************	#
   7810 # 	dst_sgl(): create single precision value from extended prec	#
   7811 #									#
   7812 # XREF ****************************************************************	#
   7813 #									#
   7814 # INPUT ***************************************************************	#
   7815 #	a0 = pointer to source operand in extended precision		#
   7816 # 									#
   7817 # OUTPUT **************************************************************	#
   7818 #	d0 = single precision result					#
   7819 #									#
   7820 # ALGORITHM ***********************************************************	#
   7821 #									#
   7822 # Changes extended precision to single precision.			#
   7823 #	sgl_sign = ext_sign						#
   7824 #	sgl_exp = ext_exp - $3fff(ext bias) + $7f(sgl bias)		#
   7825 #	get rid of ext integer bit					#
   7826 #	sgl_mant = ext_mant{62:12}					#
   7827 #									#
   7828 #	    	---------------   ---------------    ---------------	#
   7829 #  extended ->  |s|    exp    |   |1| ms mant   |    | ls mant     |	#
   7830 #	    	---------------   ---------------    ---------------	#
   7831 #	   	 95	    64    63 62	   40 32      31     12	  0	#
   7832 #				     |	   |				#
   7833 #				     |	   |				#
   7834 #				     |	   |				#
   7835 #		 	             v     v				#
   7836 #	    		      ---------------				#
   7837 #  single   ->  	      |s|exp| mant  |				#
   7838 #	    		      ---------------				#
   7839 #	   	 	      31     22     0				#
   7840 #									#
   7841 #########################################################################
   7842 
   7843 dst_sgl:
   7844 	clr.l		%d0
   7845 	mov.w		FTEMP_EX(%a0),%d0	# get exponent
   7846 	subi.w		&EXT_BIAS,%d0		# subtract extended precision bias
   7847 	addi.w		&SGL_BIAS,%d0		# add single precision bias
   7848 	tst.b		FTEMP_HI(%a0)		# is number a denorm?
   7849 	bmi.b		dst_get_supper		# no
   7850 	subq.w		&0x1,%d0		# yes; denorm bias = SGL_BIAS - 1
   7851 dst_get_supper:
   7852 	swap		%d0			# put exp in upper word of d0
   7853 	lsl.l		&0x7,%d0		# shift it into single exp bits
   7854 	tst.b		FTEMP_EX(%a0)		# test sign
   7855 	bpl.b		dst_get_sman		# if positive, continue
   7856 	bset		&0x1f,%d0		# if negative, put in sign first
   7857 dst_get_sman:
   7858 	mov.l		FTEMP_HI(%a0),%d1	# get ms mantissa
   7859 	andi.l		&0x7fffff00,%d1		# get upper 23 bits of ms
   7860 	lsr.l		&0x8,%d1		# and put them flush right
   7861 	or.l		%d1,%d0			# put these bits in ms word of single
   7862 	rts
   7863 
   7864 ##############################################################################
   7865 fout_pack:
   7866 	bsr.l		_calc_ea_fout		# fetch the <ea>
   7867 	mov.l		%a0,-(%sp)
   7868 
   7869 	mov.b		STAG(%a6),%d0		# fetch input type
   7870 	bne.w		fout_pack_not_norm	# input is not NORM
   7871 
   7872 fout_pack_norm:
   7873 	btst		&0x4,EXC_CMDREG(%a6)	# static or dynamic?
   7874 	beq.b		fout_pack_s		# static
   7875 
   7876 fout_pack_d:
   7877 	mov.b		1+EXC_CMDREG(%a6),%d1	# fetch dynamic reg
   7878 	lsr.b		&0x4,%d1
   7879 	andi.w		&0x7,%d1
   7880 
   7881 	bsr.l		fetch_dreg		# fetch Dn w/ k-factor
   7882 
   7883 	bra.b		fout_pack_type
   7884 fout_pack_s:
   7885 	mov.b		1+EXC_CMDREG(%a6),%d0	# fetch static field
   7886 
   7887 fout_pack_type:
   7888 	bfexts		%d0{&25:&7},%d0		# extract k-factor
   7889 	mov.l	%d0,-(%sp)
   7890 
   7891 	lea		FP_SRC(%a6),%a0		# pass: ptr to input
   7892 
   7893 # bindec is currently scrambling FP_SRC for denorm inputs.
   7894 # we'll have to change this, but for now, tough luck!!!
   7895 	bsr.l		bindec			# convert xprec to packed
   7896 
   7897 #	andi.l		&0xcfff000f,FP_SCR0(%a6) # clear unused fields
   7898 	andi.l		&0xcffff00f,FP_SCR0(%a6) # clear unused fields
   7899 
   7900 	mov.l	(%sp)+,%d0
   7901 
   7902 	tst.b		3+FP_SCR0_EX(%a6)
   7903 	bne.b		fout_pack_set
   7904 	tst.l		FP_SCR0_HI(%a6)
   7905 	bne.b		fout_pack_set
   7906 	tst.l		FP_SCR0_LO(%a6)
   7907 	bne.b		fout_pack_set
   7908 
   7909 # add the extra condition that only if the k-factor was zero, too, should
   7910 # we zero the exponent
   7911 	tst.l		%d0
   7912 	bne.b		fout_pack_set
   7913 # "mantissa" is all zero which means that the answer is zero. but, the '040
   7914 # algorithm allows the exponent to be non-zero. the 881/2 do not. therefore,
   7915 # if the mantissa is zero, I will zero the exponent, too.
   7916 # the question now is whether the exponents sign bit is allowed to be non-zero
   7917 # for a zero, also...
   7918 	andi.w		&0xf000,FP_SCR0(%a6)
   7919 
   7920 fout_pack_set:
   7921 
   7922 	lea		FP_SCR0(%a6),%a0	# pass: src addr
   7923 
   7924 fout_pack_write:
   7925 	mov.l		(%sp)+,%a1		# pass: dst addr
   7926 	mov.l		&0xc,%d0		# pass: opsize is 12 bytes
   7927 
   7928 	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
   7929 	beq.b		fout_pack_a7
   7930 
   7931 	bsr.l		_dmem_write		# write ext prec number to memory
   7932 
   7933 	tst.l		%d1			# did dstore fail?
   7934 	bne.w		fout_ext_err		# yes
   7935 
   7936 	rts
   7937 
   7938 # we don't want to do the write if the exception occurred in supervisor mode
   7939 # so _mem_write2() handles this for us.
   7940 fout_pack_a7:
   7941 	bsr.l		_mem_write2		# write ext prec number to memory
   7942 
   7943 	tst.l		%d1			# did dstore fail?
   7944 	bne.w		fout_ext_err		# yes
   7945 
   7946 	rts
   7947 
   7948 fout_pack_not_norm:
   7949 	cmpi.b		%d0,&DENORM		# is it a DENORM?
   7950 	beq.w		fout_pack_norm		# yes
   7951 	lea		FP_SRC(%a6),%a0
   7952 	clr.w		2+FP_SRC_EX(%a6)
   7953 	cmpi.b		%d0,&SNAN		# is it an SNAN?
   7954 	beq.b		fout_pack_snan		# yes
   7955 	bra.b		fout_pack_write		# no
   7956 
   7957 fout_pack_snan:
   7958 	ori.w		&snaniop2_mask,FPSR_EXCEPT(%a6) # set SNAN/AIOP
   7959 	bset		&0x6,FP_SRC_HI(%a6)	# set snan bit
   7960 	bra.b		fout_pack_write
   7961 
   7962 #########################################################################
   7963 # XDEF ****************************************************************	#
   7964 # 	fmul(): emulates the fmul instruction				#
   7965 #	fsmul(): emulates the fsmul instruction				#
   7966 #	fdmul(): emulates the fdmul instruction				#
   7967 #									#
   7968 # XREF ****************************************************************	#
   7969 #	scale_to_zero_src() - scale src exponent to zero		#
   7970 #	scale_to_zero_dst() - scale dst exponent to zero		#
   7971 #	unf_res() - return default underflow result			#
   7972 #	ovf_res() - return default overflow result			#
   7973 # 	res_qnan() - return QNAN result					#
   7974 # 	res_snan() - return SNAN result					#
   7975 #									#
   7976 # INPUT ***************************************************************	#
   7977 #	a0 = pointer to extended precision source operand		#
   7978 #	a1 = pointer to extended precision destination operand		#
   7979 #	d0  rnd prec,mode						#
   7980 #									#
   7981 # OUTPUT **************************************************************	#
   7982 #	fp0 = result							#
   7983 #	fp1 = EXOP (if exception occurred)				#
   7984 #									#
   7985 # ALGORITHM ***********************************************************	#
   7986 #	Handle NANs, infinities, and zeroes as special cases. Divide	#
   7987 # norms/denorms into ext/sgl/dbl precision.				#
   7988 #	For norms/denorms, scale the exponents such that a multiply	#
   7989 # instruction won't cause an exception. Use the regular fmul to		#
   7990 # compute a result. Check if the regular operands would have taken	#
   7991 # an exception. If so, return the default overflow/underflow result	#
   7992 # and return the EXOP if exceptions are enabled. Else, scale the 	#
   7993 # result operand to the proper exponent.				#
   7994 #									#
   7995 #########################################################################
   7996 
   7997 	align 		0x10
   7998 tbl_fmul_ovfl:
   7999 	long		0x3fff - 0x7ffe		# ext_max
   8000 	long		0x3fff - 0x407e		# sgl_max
   8001 	long		0x3fff - 0x43fe		# dbl_max
   8002 tbl_fmul_unfl:
   8003 	long		0x3fff + 0x0001		# ext_unfl
   8004 	long		0x3fff - 0x3f80		# sgl_unfl
   8005 	long		0x3fff - 0x3c00		# dbl_unfl
   8006 
   8007 	global		fsmul
   8008 fsmul:
   8009 	andi.b		&0x30,%d0		# clear rnd prec
   8010 	ori.b		&s_mode*0x10,%d0	# insert sgl prec
   8011 	bra.b		fmul
   8012 
   8013 	global		fdmul
   8014 fdmul:
   8015 	andi.b		&0x30,%d0
   8016 	ori.b		&d_mode*0x10,%d0	# insert dbl prec
   8017 
   8018 	global		fmul
   8019 fmul:
   8020 	mov.l		%d0,L_SCR3(%a6)		# store rnd info
   8021 
   8022 	clr.w		%d1
   8023 	mov.b		DTAG(%a6),%d1
   8024 	lsl.b		&0x3,%d1
   8025 	or.b		STAG(%a6),%d1		# combine src tags
   8026 	bne.w		fmul_not_norm		# optimize on non-norm input
   8027 
   8028 fmul_norm:
   8029 	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
   8030 	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
   8031 	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
   8032 
   8033 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
   8034 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
   8035 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
   8036 
   8037 	bsr.l		scale_to_zero_src	# scale src exponent
   8038 	mov.l		%d0,-(%sp)		# save scale factor 1
   8039 
   8040 	bsr.l		scale_to_zero_dst	# scale dst exponent
   8041 
   8042 	add.l		%d0,(%sp)		# SCALE_FACTOR = scale1 + scale2
   8043 
   8044 	mov.w		2+L_SCR3(%a6),%d1	# fetch precision
   8045 	lsr.b		&0x6,%d1		# shift to lo bits
   8046 	mov.l		(%sp)+,%d0		# load S.F.
   8047 	cmp.l		%d0,(tbl_fmul_ovfl.w,%pc,%d1.w*4) # would result ovfl?
   8048 	beq.w		fmul_may_ovfl		# result may rnd to overflow
   8049 	blt.w		fmul_ovfl		# result will overflow
   8050 
   8051 	cmp.l		%d0,(tbl_fmul_unfl.w,%pc,%d1.w*4) # would result unfl?
   8052 	beq.w		fmul_may_unfl		# result may rnd to no unfl
   8053 	bgt.w		fmul_unfl		# result will underflow
   8054 
   8055 #
   8056 # NORMAL:
   8057 # - the result of the multiply operation will neither overflow nor underflow.
   8058 # - do the multiply to the proper precision and rounding mode.
   8059 # - scale the result exponent using the scale factor. if both operands were
   8060 # normalized then we really don't need to go through this scaling. but for now,
   8061 # this will do.
   8062 #
   8063 fmul_normal:
   8064 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
   8065 
   8066 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   8067 	fmov.l		&0x0,%fpsr		# clear FPSR
   8068 
   8069 	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
   8070 
   8071 	fmov.l		%fpsr,%d1		# save status
   8072 	fmov.l		&0x0,%fpcr		# clear FPCR
   8073 
   8074 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
   8075 
   8076 fmul_normal_exit:
   8077 	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
   8078 	mov.l		%d2,-(%sp)		# save d2
   8079 	mov.w		FP_SCR0_EX(%a6),%d1	# load {sgn,exp}
   8080 	mov.l		%d1,%d2			# make a copy
   8081 	andi.l		&0x7fff,%d1		# strip sign
   8082 	andi.w		&0x8000,%d2		# keep old sign
   8083 	sub.l		%d0,%d1			# add scale factor
   8084 	or.w		%d2,%d1			# concat old sign,new exp
   8085 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
   8086 	mov.l		(%sp)+,%d2		# restore d2
   8087 	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
   8088 	rts
   8089 
   8090 #
   8091 # OVERFLOW:
   8092 # - the result of the multiply operation is an overflow.
   8093 # - do the multiply to the proper precision and rounding mode in order to
   8094 # set the inexact bits.
   8095 # - calculate the default result and return it in fp0.
   8096 # - if overflow or inexact is enabled, we need a multiply result rounded to
   8097 # extended precision. if the original operation was extended, then we have this
   8098 # result. if the original operation was single or double, we have to do another
   8099 # multiply using extended precision and the correct rounding mode. the result
   8100 # of this operation then has its exponent scaled by -0x6000 to create the
   8101 # exceptional operand.
   8102 #
   8103 fmul_ovfl:
   8104 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
   8105 
   8106 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   8107 	fmov.l		&0x0,%fpsr		# clear FPSR
   8108 
   8109 	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
   8110 
   8111 	fmov.l		%fpsr,%d1		# save status
   8112 	fmov.l		&0x0,%fpcr		# clear FPCR
   8113 
   8114 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
   8115 
   8116 # save setting this until now because this is where fmul_may_ovfl may jump in
   8117 fmul_ovfl_tst:
   8118 	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
   8119 
   8120 	mov.b		FPCR_ENABLE(%a6),%d1
   8121 	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
   8122 	bne.b		fmul_ovfl_ena		# yes
   8123 
   8124 # calculate the default result
   8125 fmul_ovfl_dis:
   8126 	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
   8127 	sne		%d1			# set sign param accordingly
   8128 	mov.l		L_SCR3(%a6),%d0		# pass rnd prec,mode
   8129 	bsr.l		ovf_res			# calculate default result
   8130 	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
   8131 	fmovm.x		(%a0),&0x80		# return default result in fp0
   8132 	rts
   8133 
   8134 #
   8135 # OVFL is enabled; Create EXOP:
   8136 # - if precision is extended, then we have the EXOP. simply bias the exponent
   8137 # with an extra -0x6000. if the precision is single or double, we need to
   8138 # calculate a result rounded to extended precision.
   8139 #
   8140 fmul_ovfl_ena:
   8141 	mov.l		L_SCR3(%a6),%d1
   8142 	andi.b		&0xc0,%d1		# test the rnd prec
   8143 	bne.b		fmul_ovfl_ena_sd	# it's sgl or dbl
   8144 
   8145 fmul_ovfl_ena_cont:
   8146 	fmovm.x		&0x80,FP_SCR0(%a6)	# move result to stack
   8147 
   8148 	mov.l		%d2,-(%sp)		# save d2
   8149 	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
   8150 	mov.w		%d1,%d2			# make a copy
   8151 	andi.l		&0x7fff,%d1		# strip sign
   8152 	sub.l		%d0,%d1			# add scale factor
   8153 	subi.l		&0x6000,%d1		# subtract bias
   8154 	andi.w		&0x7fff,%d1		# clear sign bit
   8155 	andi.w		&0x8000,%d2		# keep old sign
   8156 	or.w		%d2,%d1			# concat old sign,new exp
   8157 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
   8158 	mov.l		(%sp)+,%d2		# restore d2
   8159 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
   8160 	bra.b		fmul_ovfl_dis
   8161 
   8162 fmul_ovfl_ena_sd:
   8163 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
   8164 
   8165 	mov.l		L_SCR3(%a6),%d1
   8166 	andi.b		&0x30,%d1		# keep rnd mode only
   8167 	fmov.l		%d1,%fpcr		# set FPCR
   8168 
   8169 	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
   8170 
   8171 	fmov.l		&0x0,%fpcr		# clear FPCR
   8172 	bra.b		fmul_ovfl_ena_cont
   8173 
   8174 #
   8175 # may OVERFLOW:
   8176 # - the result of the multiply operation MAY overflow.
   8177 # - do the multiply to the proper precision and rounding mode in order to
   8178 # set the inexact bits.
   8179 # - calculate the default result and return it in fp0.
   8180 #
   8181 fmul_may_ovfl:
   8182 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
   8183 
   8184 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   8185 	fmov.l		&0x0,%fpsr		# clear FPSR
   8186 
   8187 	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
   8188 
   8189 	fmov.l		%fpsr,%d1		# save status
   8190 	fmov.l		&0x0,%fpcr		# clear FPCR
   8191 
   8192 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
   8193 
   8194 	fabs.x		%fp0,%fp1		# make a copy of result
   8195 	fcmp.b		%fp1,&0x2		# is |result| >= 2.b?
   8196 	fbge.w		fmul_ovfl_tst		# yes; overflow has occurred
   8197 
   8198 # no, it didn't overflow; we have correct result
   8199 	bra.w		fmul_normal_exit
   8200 
   8201 #
   8202 # UNDERFLOW:
   8203 # - the result of the multiply operation is an underflow.
   8204 # - do the multiply to the proper precision and rounding mode in order to
   8205 # set the inexact bits.
   8206 # - calculate the default result and return it in fp0.
   8207 # - if overflow or inexact is enabled, we need a multiply result rounded to
   8208 # extended precision. if the original operation was extended, then we have this
   8209 # result. if the original operation was single or double, we have to do another
   8210 # multiply using extended precision and the correct rounding mode. the result
   8211 # of this operation then has its exponent scaled by -0x6000 to create the
   8212 # exceptional operand.
   8213 #
   8214 fmul_unfl:
   8215 	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
   8216 
   8217 # for fun, let's use only extended precision, round to zero. then, let
   8218 # the unf_res() routine figure out all the rest.
   8219 # will we get the correct answer.
   8220 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
   8221 
   8222 	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
   8223 	fmov.l		&0x0,%fpsr		# clear FPSR
   8224 
   8225 	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
   8226 
   8227 	fmov.l		%fpsr,%d1		# save status
   8228 	fmov.l		&0x0,%fpcr		# clear FPCR
   8229 
   8230 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
   8231 
   8232 	mov.b		FPCR_ENABLE(%a6),%d1
   8233 	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
   8234 	bne.b		fmul_unfl_ena		# yes
   8235 
   8236 fmul_unfl_dis:
   8237 	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
   8238 
   8239 	lea		FP_SCR0(%a6),%a0	# pass: result addr
   8240 	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
   8241 	bsr.l		unf_res			# calculate default result
   8242 	or.b		%d0,FPSR_CC(%a6)	# unf_res2 may have set 'Z'
   8243 	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
   8244 	rts
   8245 
   8246 #
   8247 # UNFL is enabled.
   8248 #
   8249 fmul_unfl_ena:
   8250 	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op
   8251 
   8252 	mov.l		L_SCR3(%a6),%d1
   8253 	andi.b		&0xc0,%d1		# is precision extended?
   8254 	bne.b		fmul_unfl_ena_sd	# no, sgl or dbl
   8255 
   8256 # if the rnd mode is anything but RZ, then we have to re-do the above
   8257 # multiplication because we used RZ for all.
   8258 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   8259 
   8260 fmul_unfl_ena_cont:
   8261 	fmov.l		&0x0,%fpsr		# clear FPSR
   8262 
   8263 	fmul.x		FP_SCR0(%a6),%fp1	# execute multiply
   8264 
   8265 	fmov.l		&0x0,%fpcr		# clear FPCR
   8266 
   8267 	fmovm.x		&0x40,FP_SCR0(%a6)	# save result to stack
   8268 	mov.l		%d2,-(%sp)		# save d2
   8269 	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
   8270 	mov.l		%d1,%d2			# make a copy
   8271 	andi.l		&0x7fff,%d1		# strip sign
   8272 	andi.w		&0x8000,%d2		# keep old sign
   8273 	sub.l		%d0,%d1			# add scale factor
   8274 	addi.l		&0x6000,%d1		# add bias
   8275 	andi.w		&0x7fff,%d1
   8276 	or.w		%d2,%d1			# concat old sign,new exp
   8277 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
   8278 	mov.l		(%sp)+,%d2		# restore d2
   8279 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
   8280 	bra.w		fmul_unfl_dis
   8281 
   8282 fmul_unfl_ena_sd:
   8283 	mov.l		L_SCR3(%a6),%d1
   8284 	andi.b		&0x30,%d1		# use only rnd mode
   8285 	fmov.l		%d1,%fpcr		# set FPCR
   8286 
   8287 	bra.b		fmul_unfl_ena_cont
   8288 
   8289 # MAY UNDERFLOW:
   8290 # -use the correct rounding mode and precision. this code favors operations
   8291 # that do not underflow.
   8292 fmul_may_unfl:
   8293 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
   8294 
   8295 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   8296 	fmov.l		&0x0,%fpsr		# clear FPSR
   8297 
   8298 	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
   8299 
   8300 	fmov.l		%fpsr,%d1		# save status
   8301 	fmov.l		&0x0,%fpcr		# clear FPCR
   8302 
   8303 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
   8304 
   8305 	fabs.x		%fp0,%fp1		# make a copy of result
   8306 	fcmp.b		%fp1,&0x2		# is |result| > 2.b?
   8307 	fbgt.w		fmul_normal_exit	# no; no underflow occurred
   8308 	fblt.w		fmul_unfl		# yes; underflow occurred
   8309 
   8310 #
   8311 # we still don't know if underflow occurred. result is ~ equal to 2. but,
   8312 # we don't know if the result was an underflow that rounded up to a 2 or
   8313 # a normalized number that rounded down to a 2. so, redo the entire operation
   8314 # using RZ as the rounding mode to see what the pre-rounded result is.
   8315 # this case should be relatively rare.
   8316 #
   8317 	fmovm.x		FP_SCR1(%a6),&0x40	# load dst operand
   8318 
   8319 	mov.l		L_SCR3(%a6),%d1
   8320 	andi.b		&0xc0,%d1		# keep rnd prec
   8321 	ori.b		&rz_mode*0x10,%d1	# insert RZ
   8322 
   8323 	fmov.l		%d1,%fpcr		# set FPCR
   8324 	fmov.l		&0x0,%fpsr		# clear FPSR
   8325 
   8326 	fmul.x		FP_SCR0(%a6),%fp1	# execute multiply
   8327 
   8328 	fmov.l		&0x0,%fpcr		# clear FPCR
   8329 	fabs.x		%fp1			# make absolute value
   8330 	fcmp.b		%fp1,&0x2		# is |result| < 2.b?
   8331 	fbge.w		fmul_normal_exit	# no; no underflow occurred
   8332 	bra.w		fmul_unfl		# yes, underflow occurred
   8333 
   8334 ################################################################################
   8335 
   8336 #
   8337 # Multiply: inputs are not both normalized; what are they?
   8338 #
   8339 fmul_not_norm:
   8340 	mov.w		(tbl_fmul_op.b,%pc,%d1.w*2),%d1
   8341 	jmp		(tbl_fmul_op.b,%pc,%d1.w)
   8342 
   8343 	swbeg		&48
   8344 tbl_fmul_op:
   8345 	short		fmul_norm	- tbl_fmul_op # NORM x NORM
   8346 	short		fmul_zero	- tbl_fmul_op # NORM x ZERO
   8347 	short		fmul_inf_src	- tbl_fmul_op # NORM x INF
   8348 	short		fmul_res_qnan	- tbl_fmul_op # NORM x QNAN
   8349 	short		fmul_norm	- tbl_fmul_op # NORM x DENORM
   8350 	short		fmul_res_snan	- tbl_fmul_op # NORM x SNAN
   8351 	short		tbl_fmul_op	- tbl_fmul_op #
   8352 	short		tbl_fmul_op	- tbl_fmul_op #
   8353 
   8354 	short		fmul_zero	- tbl_fmul_op # ZERO x NORM
   8355 	short		fmul_zero	- tbl_fmul_op # ZERO x ZERO
   8356 	short		fmul_res_operr	- tbl_fmul_op # ZERO x INF
   8357 	short		fmul_res_qnan	- tbl_fmul_op # ZERO x QNAN
   8358 	short		fmul_zero	- tbl_fmul_op # ZERO x DENORM
   8359 	short		fmul_res_snan	- tbl_fmul_op # ZERO x SNAN
   8360 	short		tbl_fmul_op	- tbl_fmul_op #
   8361 	short		tbl_fmul_op	- tbl_fmul_op #
   8362 
   8363 	short		fmul_inf_dst	- tbl_fmul_op # INF x NORM
   8364 	short		fmul_res_operr	- tbl_fmul_op # INF x ZERO
   8365 	short		fmul_inf_dst	- tbl_fmul_op # INF x INF
   8366 	short		fmul_res_qnan	- tbl_fmul_op # INF x QNAN
   8367 	short		fmul_inf_dst	- tbl_fmul_op # INF x DENORM
   8368 	short		fmul_res_snan	- tbl_fmul_op # INF x SNAN
   8369 	short		tbl_fmul_op	- tbl_fmul_op #
   8370 	short		tbl_fmul_op	- tbl_fmul_op #
   8371 
   8372 	short		fmul_res_qnan	- tbl_fmul_op # QNAN x NORM
   8373 	short		fmul_res_qnan	- tbl_fmul_op # QNAN x ZERO
   8374 	short		fmul_res_qnan	- tbl_fmul_op # QNAN x INF
   8375 	short		fmul_res_qnan	- tbl_fmul_op # QNAN x QNAN
   8376 	short		fmul_res_qnan	- tbl_fmul_op # QNAN x DENORM
   8377 	short		fmul_res_snan	- tbl_fmul_op # QNAN x SNAN
   8378 	short		tbl_fmul_op	- tbl_fmul_op #
   8379 	short		tbl_fmul_op	- tbl_fmul_op #
   8380 
   8381 	short		fmul_norm	- tbl_fmul_op # NORM x NORM
   8382 	short		fmul_zero	- tbl_fmul_op # NORM x ZERO
   8383 	short		fmul_inf_src	- tbl_fmul_op # NORM x INF
   8384 	short		fmul_res_qnan	- tbl_fmul_op # NORM x QNAN
   8385 	short		fmul_norm	- tbl_fmul_op # NORM x DENORM
   8386 	short		fmul_res_snan	- tbl_fmul_op # NORM x SNAN
   8387 	short		tbl_fmul_op	- tbl_fmul_op #
   8388 	short		tbl_fmul_op	- tbl_fmul_op #
   8389 
   8390 	short		fmul_res_snan	- tbl_fmul_op # SNAN x NORM
   8391 	short		fmul_res_snan	- tbl_fmul_op # SNAN x ZERO
   8392 	short		fmul_res_snan	- tbl_fmul_op # SNAN x INF
   8393 	short		fmul_res_snan	- tbl_fmul_op # SNAN x QNAN
   8394 	short		fmul_res_snan	- tbl_fmul_op # SNAN x DENORM
   8395 	short		fmul_res_snan	- tbl_fmul_op # SNAN x SNAN
   8396 	short		tbl_fmul_op	- tbl_fmul_op #
   8397 	short		tbl_fmul_op	- tbl_fmul_op #
   8398 
   8399 fmul_res_operr:
   8400 	bra.l		res_operr
   8401 fmul_res_snan:
   8402 	bra.l		res_snan
   8403 fmul_res_qnan:
   8404 	bra.l		res_qnan
   8405 
   8406 #
   8407 # Multiply: (Zero x Zero) || (Zero x norm) || (Zero x denorm)
   8408 #
   8409 	global		fmul_zero		# global for fsglmul
   8410 fmul_zero:
   8411 	mov.b		SRC_EX(%a0),%d0		# exclusive or the signs
   8412 	mov.b		DST_EX(%a1),%d1
   8413 	eor.b		%d0,%d1
   8414 	bpl.b		fmul_zero_p		# result ZERO is pos.
   8415 fmul_zero_n:
   8416 	fmov.s		&0x80000000,%fp0	# load -ZERO
   8417 	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/N
   8418 	rts
   8419 fmul_zero_p:
   8420 	fmov.s		&0x00000000,%fp0	# load +ZERO
   8421 	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
   8422 	rts
   8423 
   8424 #
   8425 # Multiply: (inf x inf) || (inf x norm) || (inf x denorm)
   8426 #
   8427 # Note: The j-bit for an infinity is a don't-care. However, to be
   8428 # strictly compatible w/ the 68881/882, we make sure to return an
   8429 # INF w/ the j-bit set if the input INF j-bit was set. Destination
   8430 # INFs take priority.
   8431 #
   8432 	global		fmul_inf_dst		# global for fsglmul
   8433 fmul_inf_dst:
   8434 	fmovm.x		DST(%a1),&0x80		# return INF result in fp0
   8435 	mov.b		SRC_EX(%a0),%d0		# exclusive or the signs
   8436 	mov.b		DST_EX(%a1),%d1
   8437 	eor.b		%d0,%d1
   8438 	bpl.b		fmul_inf_dst_p		# result INF is pos.
   8439 fmul_inf_dst_n:
   8440 	fabs.x		%fp0			# clear result sign
   8441 	fneg.x		%fp0			# set result sign
   8442 	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/N
   8443 	rts
   8444 fmul_inf_dst_p:
   8445 	fabs.x		%fp0			# clear result sign
   8446 	mov.b		&inf_bmask,FPSR_CC(%a6)	# set INF
   8447 	rts
   8448 
   8449 	global		fmul_inf_src		# global for fsglmul
   8450 fmul_inf_src:
   8451 	fmovm.x		SRC(%a0),&0x80		# return INF result in fp0
   8452 	mov.b		SRC_EX(%a0),%d0		# exclusive or the signs
   8453 	mov.b		DST_EX(%a1),%d1
   8454 	eor.b		%d0,%d1
   8455 	bpl.b		fmul_inf_dst_p		# result INF is pos.
   8456 	bra.b		fmul_inf_dst_n
   8457 
   8458 #########################################################################
   8459 # XDEF ****************************************************************	#
   8460 #	fin(): emulates the fmove instruction				#
   8461 #	fsin(): emulates the fsmove instruction				#
   8462 #	fdin(): emulates the fdmove instruction				#
   8463 #									#
   8464 # XREF ****************************************************************	#
   8465 #	norm() - normalize mantissa for EXOP on denorm			#
   8466 #	scale_to_zero_src() - scale src exponent to zero		#
   8467 #	ovf_res() - return default overflow result			#
   8468 # 	unf_res() - return default underflow result			#
   8469 #	res_qnan_1op() - return QNAN result				#
   8470 #	res_snan_1op() - return SNAN result				#
   8471 #									#
   8472 # INPUT ***************************************************************	#
   8473 #	a0 = pointer to extended precision source operand		#
   8474 #	d0 = round prec/mode						#
   8475 # 									#
   8476 # OUTPUT **************************************************************	#
   8477 #	fp0 = result							#
   8478 #	fp1 = EXOP (if exception occurred)				#
   8479 #									#
   8480 # ALGORITHM ***********************************************************	#
   8481 # 	Handle NANs, infinities, and zeroes as special cases. Divide	#
   8482 # norms into extended, single, and double precision.			#
   8483 # 	Norms can be emulated w/ a regular fmove instruction. For	#
   8484 # sgl/dbl, must scale exponent and perform an "fmove". Check to see	#
   8485 # if the result would have overflowed/underflowed. If so, use unf_res()	#
   8486 # or ovf_res() to return the default result. Also return EXOP if	#
   8487 # exception is enabled. If no exception, return the default result.	#
   8488 #	Unnorms don't pass through here.				#
   8489 #									#
   8490 #########################################################################
   8491 
   8492 	global		fsin
   8493 fsin:
   8494 	andi.b		&0x30,%d0		# clear rnd prec
   8495 	ori.b		&s_mode*0x10,%d0	# insert sgl precision
   8496 	bra.b		fin
   8497 
   8498 	global		fdin
   8499 fdin:
   8500 	andi.b		&0x30,%d0		# clear rnd prec
   8501 	ori.b		&d_mode*0x10,%d0	# insert dbl precision
   8502 
   8503 	global		fin
   8504 fin:
   8505 	mov.l		%d0,L_SCR3(%a6)		# store rnd info
   8506 
   8507 	mov.b		STAG(%a6),%d1		# fetch src optype tag
   8508 	bne.w		fin_not_norm		# optimize on non-norm input
   8509 
   8510 #
   8511 # FP MOVE IN: NORMs and DENORMs ONLY!
   8512 #
   8513 fin_norm:
   8514 	andi.b		&0xc0,%d0		# is precision extended?
   8515 	bne.w		fin_not_ext		# no, so go handle dbl or sgl
   8516 
   8517 #
   8518 # precision selected is extended. so...we cannot get an underflow
   8519 # or overflow because of rounding to the correct precision. so...
   8520 # skip the scaling and unscaling...
   8521 #
   8522 	tst.b		SRC_EX(%a0)		# is the operand negative?
   8523 	bpl.b		fin_norm_done		# no
   8524 	bset		&neg_bit,FPSR_CC(%a6)	# yes, so set 'N' ccode bit
   8525 fin_norm_done:
   8526 	fmovm.x		SRC(%a0),&0x80		# return result in fp0
   8527 	rts
   8528 
   8529 #
   8530 # for an extended precision DENORM, the UNFL exception bit is set
   8531 # the accrued bit is NOT set in this instance(no inexactness!)
   8532 #
   8533 fin_denorm:
   8534 	andi.b		&0xc0,%d0		# is precision extended?
   8535 	bne.w		fin_not_ext		# no, so go handle dbl or sgl
   8536 
   8537 	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
   8538 	tst.b		SRC_EX(%a0)		# is the operand negative?
   8539 	bpl.b		fin_denorm_done		# no
   8540 	bset		&neg_bit,FPSR_CC(%a6)	# yes, so set 'N' ccode bit
   8541 fin_denorm_done:
   8542 	fmovm.x		SRC(%a0),&0x80		# return result in fp0
   8543 	btst		&unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
   8544 	bne.b		fin_denorm_unfl_ena	# yes
   8545 	rts
   8546 
   8547 #
   8548 # the input is an extended DENORM and underflow is enabled in the FPCR.
   8549 # normalize the mantissa and add the bias of 0x6000 to the resulting negative
   8550 # exponent and insert back into the operand.
   8551 #
   8552 fin_denorm_unfl_ena:
   8553 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
   8554 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
   8555 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
   8556 	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
   8557 	bsr.l		norm			# normalize result
   8558 	neg.w		%d0			# new exponent = -(shft val)
   8559 	addi.w		&0x6000,%d0		# add new bias to exponent
   8560 	mov.w		FP_SCR0_EX(%a6),%d1	# fetch old sign,exp
   8561 	andi.w		&0x8000,%d1		# keep old sign
   8562 	andi.w		&0x7fff,%d0		# clear sign position
   8563 	or.w		%d1,%d0			# concat new exo,old sign
   8564 	mov.w		%d0,FP_SCR0_EX(%a6)	# insert new exponent
   8565 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
   8566 	rts
   8567 
   8568 #
   8569 # operand is to be rounded to single or double precision
   8570 #
   8571 fin_not_ext:
   8572 	cmpi.b		%d0,&s_mode*0x10 	# separate sgl/dbl prec
   8573 	bne.b		fin_dbl
   8574 
   8575 #
   8576 # operand is to be rounded to single precision
   8577 #
   8578 fin_sgl:
   8579 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
   8580 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
   8581 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
   8582 	bsr.l		scale_to_zero_src	# calculate scale factor
   8583 
   8584 	cmpi.l		%d0,&0x3fff-0x3f80	# will move in underflow?
   8585 	bge.w		fin_sd_unfl		# yes; go handle underflow
   8586 	cmpi.l		%d0,&0x3fff-0x407e	# will move in overflow?
   8587 	beq.w		fin_sd_may_ovfl		# maybe; go check
   8588 	blt.w		fin_sd_ovfl		# yes; go handle overflow
   8589 
   8590 #
   8591 # operand will NOT overflow or underflow when moved into the fp reg file
   8592 #
   8593 fin_sd_normal:
   8594 	fmov.l		&0x0,%fpsr		# clear FPSR
   8595 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   8596 
   8597 	fmov.x		FP_SCR0(%a6),%fp0	# perform move
   8598 
   8599 	fmov.l		%fpsr,%d1		# save FPSR
   8600 	fmov.l		&0x0,%fpcr		# clear FPCR
   8601 
   8602 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
   8603 
   8604 fin_sd_normal_exit:
   8605 	mov.l		%d2,-(%sp)		# save d2
   8606 	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
   8607 	mov.w		FP_SCR0_EX(%a6),%d1	# load {sgn,exp}
   8608 	mov.w		%d1,%d2			# make a copy
   8609 	andi.l		&0x7fff,%d1		# strip sign
   8610 	sub.l		%d0,%d1			# add scale factor
   8611 	andi.w		&0x8000,%d2		# keep old sign
   8612 	or.w		%d1,%d2			# concat old sign,new exponent
   8613 	mov.w		%d2,FP_SCR0_EX(%a6)	# insert new exponent
   8614 	mov.l		(%sp)+,%d2		# restore d2
   8615 	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
   8616 	rts
   8617 
   8618 #
   8619 # operand is to be rounded to double precision
   8620 #
   8621 fin_dbl:
   8622 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
   8623 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
   8624 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
   8625 	bsr.l		scale_to_zero_src	# calculate scale factor
   8626 
   8627 	cmpi.l		%d0,&0x3fff-0x3c00	# will move in underflow?
   8628 	bge.w		fin_sd_unfl		# yes; go handle underflow
   8629 	cmpi.l		%d0,&0x3fff-0x43fe	# will move in overflow?
   8630 	beq.w		fin_sd_may_ovfl		# maybe; go check
   8631 	blt.w		fin_sd_ovfl		# yes; go handle overflow
   8632 	bra.w		fin_sd_normal		# no; ho handle normalized op
   8633 
   8634 #
   8635 # operand WILL underflow when moved in to the fp register file
   8636 #
   8637 fin_sd_unfl:
   8638 	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
   8639 
   8640 	tst.b		FP_SCR0_EX(%a6)		# is operand negative?
   8641 	bpl.b		fin_sd_unfl_tst
   8642 	bset		&neg_bit,FPSR_CC(%a6)	# set 'N' ccode bit
   8643 
   8644 # if underflow or inexact is enabled, then go calculate the EXOP first.
   8645 fin_sd_unfl_tst:
   8646 	mov.b		FPCR_ENABLE(%a6),%d1
   8647 	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
   8648 	bne.b		fin_sd_unfl_ena		# yes
   8649 
   8650 fin_sd_unfl_dis:
   8651 	lea		FP_SCR0(%a6),%a0	# pass: result addr
   8652 	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
   8653 	bsr.l		unf_res			# calculate default result
   8654 	or.b		%d0,FPSR_CC(%a6)	# unf_res may have set 'Z'
   8655 	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
   8656 	rts
   8657 
   8658 #
   8659 # operand will underflow AND underflow or inexact is enabled.
   8660 # therefore, we must return the result rounded to extended precision.
   8661 #
   8662 fin_sd_unfl_ena:
   8663 	mov.l		FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
   8664 	mov.l		FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
   8665 	mov.w		FP_SCR0_EX(%a6),%d1	# load current exponent
   8666 
   8667 	mov.l		%d2,-(%sp)		# save d2
   8668 	mov.w		%d1,%d2			# make a copy
   8669 	andi.l		&0x7fff,%d1		# strip sign
   8670 	sub.l		%d0,%d1			# subtract scale factor
   8671 	andi.w		&0x8000,%d2		# extract old sign
   8672 	addi.l		&0x6000,%d1		# add new bias
   8673 	andi.w		&0x7fff,%d1
   8674 	or.w		%d1,%d2			# concat old sign,new exp
   8675 	mov.w		%d2,FP_SCR1_EX(%a6)	# insert new exponent
   8676 	fmovm.x		FP_SCR1(%a6),&0x40	# return EXOP in fp1
   8677 	mov.l		(%sp)+,%d2		# restore d2
   8678 	bra.b		fin_sd_unfl_dis
   8679 
   8680 #
   8681 # operand WILL overflow.
   8682 #
   8683 fin_sd_ovfl:
   8684 	fmov.l		&0x0,%fpsr		# clear FPSR
   8685 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   8686 
   8687 	fmov.x		FP_SCR0(%a6),%fp0	# perform move
   8688 
   8689 	fmov.l		&0x0,%fpcr		# clear FPCR
   8690 	fmov.l		%fpsr,%d1		# save FPSR
   8691 
   8692 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
   8693 
   8694 fin_sd_ovfl_tst:
   8695 	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
   8696 
   8697 	mov.b		FPCR_ENABLE(%a6),%d1
   8698 	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
   8699 	bne.b		fin_sd_ovfl_ena		# yes
   8700 
   8701 #
   8702 # OVFL is not enabled; therefore, we must create the default result by
   8703 # calling ovf_res().
   8704 #
   8705 fin_sd_ovfl_dis:
   8706 	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
   8707 	sne		%d1			# set sign param accordingly
   8708 	mov.l		L_SCR3(%a6),%d0		# pass: prec,mode
   8709 	bsr.l		ovf_res			# calculate default result
   8710 	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
   8711 	fmovm.x		(%a0),&0x80		# return default result in fp0
   8712 	rts
   8713 
   8714 #
   8715 # OVFL is enabled.
   8716 # the INEX2 bit has already been updated by the round to the correct precision.
   8717 # now, round to extended(and don't alter the FPSR).
   8718 #
   8719 fin_sd_ovfl_ena:
   8720 	mov.l		%d2,-(%sp)		# save d2
   8721 	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
   8722 	mov.l		%d1,%d2			# make a copy
   8723 	andi.l		&0x7fff,%d1		# strip sign
   8724 	andi.w		&0x8000,%d2		# keep old sign
   8725 	sub.l		%d0,%d1			# add scale factor
   8726 	sub.l		&0x6000,%d1		# subtract bias
   8727 	andi.w		&0x7fff,%d1
   8728 	or.w		%d2,%d1
   8729 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
   8730 	mov.l		(%sp)+,%d2		# restore d2
   8731 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
   8732 	bra.b		fin_sd_ovfl_dis
   8733 
   8734 #
   8735 # the move in MAY overflow. so...
   8736 #
   8737 fin_sd_may_ovfl:
   8738 	fmov.l		&0x0,%fpsr		# clear FPSR
   8739 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   8740 
   8741 	fmov.x		FP_SCR0(%a6),%fp0	# perform the move
   8742 
   8743 	fmov.l		%fpsr,%d1		# save status
   8744 	fmov.l		&0x0,%fpcr		# clear FPCR
   8745 
   8746 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
   8747 
   8748 	fabs.x		%fp0,%fp1		# make a copy of result
   8749 	fcmp.b		%fp1,&0x2		# is |result| >= 2.b?
   8750 	fbge.w		fin_sd_ovfl_tst		# yes; overflow has occurred
   8751 
   8752 # no, it didn't overflow; we have correct result
   8753 	bra.w		fin_sd_normal_exit
   8754 
   8755 ##########################################################################
   8756 
   8757 #
   8758 # operand is not a NORM: check its optype and branch accordingly
   8759 #
   8760 fin_not_norm:
   8761 	cmpi.b		%d1,&DENORM		# weed out DENORM
   8762 	beq.w		fin_denorm
   8763 	cmpi.b		%d1,&SNAN		# weed out SNANs
   8764 	beq.l		res_snan_1op
   8765 	cmpi.b		%d1,&QNAN		# weed out QNANs
   8766 	beq.l		res_qnan_1op
   8767 
   8768 #
   8769 # do the fmove in; at this point, only possible ops are ZERO and INF.
   8770 # use fmov to determine ccodes.
   8771 # prec:mode should be zero at this point but it won't affect answer anyways.
   8772 #
   8773 	fmov.x		SRC(%a0),%fp0		# do fmove in
   8774 	fmov.l		%fpsr,%d0		# no exceptions possible
   8775 	rol.l		&0x8,%d0		# put ccodes in lo byte
   8776 	mov.b		%d0,FPSR_CC(%a6)	# insert correct ccodes
   8777 	rts
   8778 
   8779 #########################################################################
   8780 # XDEF ****************************************************************	#
   8781 # 	fdiv(): emulates the fdiv instruction				#
   8782 #	fsdiv(): emulates the fsdiv instruction				#
   8783 #	fddiv(): emulates the fddiv instruction				#
   8784 #									#
   8785 # XREF ****************************************************************	#
   8786 #	scale_to_zero_src() - scale src exponent to zero		#
   8787 #	scale_to_zero_dst() - scale dst exponent to zero		#
   8788 #	unf_res() - return default underflow result			#
   8789 #	ovf_res() - return default overflow result			#
   8790 # 	res_qnan() - return QNAN result					#
   8791 # 	res_snan() - return SNAN result					#
   8792 #									#
   8793 # INPUT ***************************************************************	#
   8794 #	a0 = pointer to extended precision source operand		#
   8795 #	a1 = pointer to extended precision destination operand		#
   8796 #	d0  rnd prec,mode						#
   8797 #									#
   8798 # OUTPUT **************************************************************	#
   8799 #	fp0 = result							#
   8800 #	fp1 = EXOP (if exception occurred)				#
   8801 #									#
   8802 # ALGORITHM ***********************************************************	#
   8803 #	Handle NANs, infinities, and zeroes as special cases. Divide	#
   8804 # norms/denorms into ext/sgl/dbl precision.				#
   8805 #	For norms/denorms, scale the exponents such that a divide	#
   8806 # instruction won't cause an exception. Use the regular fdiv to		#
   8807 # compute a result. Check if the regular operands would have taken	#
   8808 # an exception. If so, return the default overflow/underflow result	#
   8809 # and return the EXOP if exceptions are enabled. Else, scale the 	#
   8810 # result operand to the proper exponent.				#
   8811 #									#
   8812 #########################################################################
   8813 
   8814 	align		0x10
   8815 tbl_fdiv_unfl:
   8816 	long		0x3fff - 0x0000		# ext_unfl
   8817 	long		0x3fff - 0x3f81		# sgl_unfl
   8818 	long		0x3fff - 0x3c01		# dbl_unfl
   8819 
   8820 tbl_fdiv_ovfl:
   8821 	long		0x3fff - 0x7ffe		# ext overflow exponent
   8822 	long		0x3fff - 0x407e		# sgl overflow exponent
   8823 	long		0x3fff - 0x43fe		# dbl overflow exponent
   8824 
   8825 	global		fsdiv
   8826 fsdiv:
   8827 	andi.b		&0x30,%d0		# clear rnd prec
   8828 	ori.b		&s_mode*0x10,%d0	# insert sgl prec
   8829 	bra.b		fdiv
   8830 
   8831 	global		fddiv
   8832 fddiv:
   8833 	andi.b		&0x30,%d0		# clear rnd prec
   8834 	ori.b		&d_mode*0x10,%d0	# insert dbl prec
   8835 
   8836 	global		fdiv
   8837 fdiv:
   8838 	mov.l		%d0,L_SCR3(%a6)		# store rnd info
   8839 
   8840 	clr.w		%d1
   8841 	mov.b		DTAG(%a6),%d1
   8842 	lsl.b		&0x3,%d1
   8843 	or.b		STAG(%a6),%d1		# combine src tags
   8844 
   8845 	bne.w		fdiv_not_norm		# optimize on non-norm input
   8846 
   8847 #
   8848 # DIVIDE: NORMs and DENORMs ONLY!
   8849 #
   8850 fdiv_norm:
   8851 	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
   8852 	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
   8853 	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
   8854 
   8855 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
   8856 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
   8857 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
   8858 
   8859 	bsr.l		scale_to_zero_src	# scale src exponent
   8860 	mov.l		%d0,-(%sp)		# save scale factor 1
   8861 
   8862 	bsr.l		scale_to_zero_dst	# scale dst exponent
   8863 
   8864 	neg.l		(%sp)			# SCALE FACTOR = scale1 - scale2
   8865 	add.l		%d0,(%sp)
   8866 
   8867 	mov.w		2+L_SCR3(%a6),%d1	# fetch precision
   8868 	lsr.b		&0x6,%d1		# shift to lo bits
   8869 	mov.l		(%sp)+,%d0		# load S.F.
   8870 	cmp.l		%d0,(tbl_fdiv_ovfl.b,%pc,%d1.w*4) # will result overflow?
   8871 	ble.w		fdiv_may_ovfl		# result will overflow
   8872 
   8873 	cmp.l		%d0,(tbl_fdiv_unfl.w,%pc,%d1.w*4) # will result underflow?
   8874 	beq.w		fdiv_may_unfl		# maybe
   8875 	bgt.w		fdiv_unfl		# yes; go handle underflow
   8876 
   8877 fdiv_normal:
   8878 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
   8879 
   8880 	fmov.l		L_SCR3(%a6),%fpcr	# save FPCR
   8881 	fmov.l		&0x0,%fpsr		# clear FPSR
   8882 
   8883 	fdiv.x		FP_SCR0(%a6),%fp0	# perform divide
   8884 
   8885 	fmov.l		%fpsr,%d1		# save FPSR
   8886 	fmov.l		&0x0,%fpcr		# clear FPCR
   8887 
   8888 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
   8889 
   8890 fdiv_normal_exit:
   8891 	fmovm.x		&0x80,FP_SCR0(%a6)	# store result on stack
   8892 	mov.l		%d2,-(%sp)		# store d2
   8893 	mov.w		FP_SCR0_EX(%a6),%d1	# load {sgn,exp}
   8894 	mov.l		%d1,%d2			# make a copy
   8895 	andi.l		&0x7fff,%d1		# strip sign
   8896 	andi.w		&0x8000,%d2		# keep old sign
   8897 	sub.l		%d0,%d1			# add scale factor
   8898 	or.w		%d2,%d1			# concat old sign,new exp
   8899 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
   8900 	mov.l		(%sp)+,%d2		# restore d2
   8901 	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
   8902 	rts
   8903 
   8904 tbl_fdiv_ovfl2:
   8905 	long		0x7fff
   8906 	long		0x407f
   8907 	long		0x43ff
   8908 
   8909 fdiv_no_ovfl:
   8910 	mov.l		(%sp)+,%d0		# restore scale factor
   8911 	bra.b		fdiv_normal_exit
   8912 
   8913 fdiv_may_ovfl:
   8914 	mov.l		%d0,-(%sp)		# save scale factor
   8915 
   8916 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
   8917 
   8918 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   8919 	fmov.l		&0x0,%fpsr		# set FPSR
   8920 
   8921 	fdiv.x		FP_SCR0(%a6),%fp0	# execute divide
   8922 
   8923 	fmov.l		%fpsr,%d0
   8924 	fmov.l		&0x0,%fpcr
   8925 
   8926 	or.l		%d0,USER_FPSR(%a6)	# save INEX,N
   8927 
   8928 	fmovm.x		&0x01,-(%sp)		# save result to stack
   8929 	mov.w		(%sp),%d0		# fetch new exponent
   8930 	add.l		&0xc,%sp		# clear result from stack
   8931 	andi.l		&0x7fff,%d0		# strip sign
   8932 	sub.l		(%sp),%d0		# add scale factor
   8933 	cmp.l		%d0,(tbl_fdiv_ovfl2.b,%pc,%d1.w*4)
   8934 	blt.b		fdiv_no_ovfl
   8935 	mov.l		(%sp)+,%d0
   8936 
   8937 fdiv_ovfl_tst:
   8938 	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
   8939 
   8940 	mov.b		FPCR_ENABLE(%a6),%d1
   8941 	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
   8942 	bne.b		fdiv_ovfl_ena		# yes
   8943 
   8944 fdiv_ovfl_dis:
   8945 	btst		&neg_bit,FPSR_CC(%a6) 	# is result negative?
   8946 	sne		%d1			# set sign param accordingly
   8947 	mov.l		L_SCR3(%a6),%d0		# pass prec:rnd
   8948 	bsr.l		ovf_res			# calculate default result
   8949 	or.b		%d0,FPSR_CC(%a6)	# set INF if applicable
   8950 	fmovm.x		(%a0),&0x80		# return default result in fp0
   8951 	rts
   8952 
   8953 fdiv_ovfl_ena:
   8954 	mov.l		L_SCR3(%a6),%d1
   8955 	andi.b		&0xc0,%d1		# is precision extended?
   8956 	bne.b		fdiv_ovfl_ena_sd	# no, do sgl or dbl
   8957 
   8958 fdiv_ovfl_ena_cont:
   8959 	fmovm.x		&0x80,FP_SCR0(%a6)	# move result to stack
   8960 
   8961 	mov.l		%d2,-(%sp)		# save d2
   8962 	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
   8963 	mov.w		%d1,%d2			# make a copy
   8964 	andi.l		&0x7fff,%d1		# strip sign
   8965 	sub.l		%d0,%d1			# add scale factor
   8966 	subi.l		&0x6000,%d1		# subtract bias
   8967 	andi.w		&0x7fff,%d1		# clear sign bit
   8968 	andi.w		&0x8000,%d2		# keep old sign
   8969 	or.w		%d2,%d1			# concat old sign,new exp
   8970 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
   8971 	mov.l		(%sp)+,%d2		# restore d2
   8972 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
   8973 	bra.b		fdiv_ovfl_dis
   8974 
   8975 fdiv_ovfl_ena_sd:
   8976 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
   8977 
   8978 	mov.l		L_SCR3(%a6),%d1
   8979 	andi.b		&0x30,%d1		# keep rnd mode
   8980 	fmov.l		%d1,%fpcr		# set FPCR
   8981 
   8982 	fdiv.x		FP_SCR0(%a6),%fp0	# execute divide
   8983 
   8984 	fmov.l		&0x0,%fpcr		# clear FPCR
   8985 	bra.b		fdiv_ovfl_ena_cont
   8986 
   8987 fdiv_unfl:
   8988 	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
   8989 
   8990 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
   8991 
   8992 	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
   8993 	fmov.l		&0x0,%fpsr		# clear FPSR
   8994 
   8995 	fdiv.x		FP_SCR0(%a6),%fp0	# execute divide
   8996 
   8997 	fmov.l		%fpsr,%d1		# save status
   8998 	fmov.l		&0x0,%fpcr		# clear FPCR
   8999 
   9000 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
   9001 
   9002 	mov.b		FPCR_ENABLE(%a6),%d1
   9003 	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
   9004 	bne.b		fdiv_unfl_ena		# yes
   9005 
   9006 fdiv_unfl_dis:
   9007 	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
   9008 
   9009 	lea		FP_SCR0(%a6),%a0	# pass: result addr
   9010 	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
   9011 	bsr.l		unf_res			# calculate default result
   9012 	or.b		%d0,FPSR_CC(%a6)	# 'Z' may have been set
   9013 	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
   9014 	rts
   9015 
   9016 #
   9017 # UNFL is enabled.
   9018 #
   9019 fdiv_unfl_ena:
   9020 	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op
   9021 
   9022 	mov.l		L_SCR3(%a6),%d1
   9023 	andi.b		&0xc0,%d1		# is precision extended?
   9024 	bne.b		fdiv_unfl_ena_sd	# no, sgl or dbl
   9025 
   9026 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   9027 
   9028 fdiv_unfl_ena_cont:
   9029 	fmov.l		&0x0,%fpsr		# clear FPSR
   9030 
   9031 	fdiv.x		FP_SCR0(%a6),%fp1	# execute divide
   9032 
   9033 	fmov.l		&0x0,%fpcr		# clear FPCR
   9034 
   9035 	fmovm.x		&0x40,FP_SCR0(%a6)	# save result to stack
   9036 	mov.l		%d2,-(%sp)		# save d2
   9037 	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
   9038 	mov.l		%d1,%d2			# make a copy
   9039 	andi.l		&0x7fff,%d1		# strip sign
   9040 	andi.w		&0x8000,%d2		# keep old sign
   9041 	sub.l		%d0,%d1			# add scale factoer
   9042 	addi.l		&0x6000,%d1		# add bias
   9043 	andi.w		&0x7fff,%d1
   9044 	or.w		%d2,%d1			# concat old sign,new exp
   9045 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exp
   9046 	mov.l		(%sp)+,%d2		# restore d2
   9047 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
   9048 	bra.w		fdiv_unfl_dis
   9049 
   9050 fdiv_unfl_ena_sd:
   9051 	mov.l		L_SCR3(%a6),%d1
   9052 	andi.b		&0x30,%d1		# use only rnd mode
   9053 	fmov.l		%d1,%fpcr		# set FPCR
   9054 
   9055 	bra.b		fdiv_unfl_ena_cont
   9056 
   9057 #
   9058 # the divide operation MAY underflow:
   9059 #
   9060 fdiv_may_unfl:
   9061 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
   9062 
   9063 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   9064 	fmov.l		&0x0,%fpsr		# clear FPSR
   9065 
   9066 	fdiv.x		FP_SCR0(%a6),%fp0	# execute divide
   9067 
   9068 	fmov.l		%fpsr,%d1		# save status
   9069 	fmov.l		&0x0,%fpcr		# clear FPCR
   9070 
   9071 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
   9072 
   9073 	fabs.x		%fp0,%fp1		# make a copy of result
   9074 	fcmp.b		%fp1,&0x1		# is |result| > 1.b?
   9075 	fbgt.w		fdiv_normal_exit	# no; no underflow occurred
   9076 	fblt.w		fdiv_unfl		# yes; underflow occurred
   9077 
   9078 #
   9079 # we still don't know if underflow occurred. result is ~ equal to 1. but,
   9080 # we don't know if the result was an underflow that rounded up to a 1
   9081 # or a normalized number that rounded down to a 1. so, redo the entire
   9082 # operation using RZ as the rounding mode to see what the pre-rounded
   9083 # result is. this case should be relatively rare.
   9084 #
   9085 	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op into fp1
   9086 
   9087 	mov.l		L_SCR3(%a6),%d1
   9088 	andi.b		&0xc0,%d1		# keep rnd prec
   9089 	ori.b		&rz_mode*0x10,%d1	# insert RZ
   9090 
   9091 	fmov.l		%d1,%fpcr		# set FPCR
   9092 	fmov.l		&0x0,%fpsr		# clear FPSR
   9093 
   9094 	fdiv.x		FP_SCR0(%a6),%fp1	# execute divide
   9095 
   9096 	fmov.l		&0x0,%fpcr		# clear FPCR
   9097 	fabs.x		%fp1			# make absolute value
   9098 	fcmp.b		%fp1,&0x1		# is |result| < 1.b?
   9099 	fbge.w		fdiv_normal_exit	# no; no underflow occurred
   9100 	bra.w		fdiv_unfl		# yes; underflow occurred
   9101 
   9102 ############################################################################
   9103 
   9104 #
   9105 # Divide: inputs are not both normalized; what are they?
   9106 #
   9107 fdiv_not_norm:
   9108 	mov.w		(tbl_fdiv_op.b,%pc,%d1.w*2),%d1
   9109 	jmp		(tbl_fdiv_op.b,%pc,%d1.w*1)
   9110 
   9111 	swbeg		&48
   9112 tbl_fdiv_op:
   9113 	short		fdiv_norm	- tbl_fdiv_op # NORM / NORM
   9114 	short		fdiv_inf_load	- tbl_fdiv_op # NORM / ZERO
   9115 	short		fdiv_zero_load	- tbl_fdiv_op # NORM / INF
   9116 	short		fdiv_res_qnan	- tbl_fdiv_op # NORM / QNAN
   9117 	short		fdiv_norm	- tbl_fdiv_op # NORM / DENORM
   9118 	short		fdiv_res_snan	- tbl_fdiv_op # NORM / SNAN
   9119 	short		tbl_fdiv_op	- tbl_fdiv_op #
   9120 	short		tbl_fdiv_op	- tbl_fdiv_op #
   9121 
   9122 	short		fdiv_zero_load	- tbl_fdiv_op # ZERO / NORM
   9123 	short		fdiv_res_operr	- tbl_fdiv_op # ZERO / ZERO
   9124 	short		fdiv_zero_load	- tbl_fdiv_op # ZERO / INF
   9125 	short		fdiv_res_qnan	- tbl_fdiv_op # ZERO / QNAN
   9126 	short		fdiv_zero_load	- tbl_fdiv_op # ZERO / DENORM
   9127 	short		fdiv_res_snan	- tbl_fdiv_op # ZERO / SNAN
   9128 	short		tbl_fdiv_op	- tbl_fdiv_op #
   9129 	short		tbl_fdiv_op	- tbl_fdiv_op #
   9130 
   9131 	short		fdiv_inf_dst	- tbl_fdiv_op # INF / NORM
   9132 	short		fdiv_inf_dst	- tbl_fdiv_op # INF / ZERO
   9133 	short		fdiv_res_operr	- tbl_fdiv_op # INF / INF
   9134 	short		fdiv_res_qnan	- tbl_fdiv_op # INF / QNAN
   9135 	short		fdiv_inf_dst	- tbl_fdiv_op # INF / DENORM
   9136 	short		fdiv_res_snan	- tbl_fdiv_op # INF / SNAN
   9137 	short		tbl_fdiv_op	- tbl_fdiv_op #
   9138 	short		tbl_fdiv_op	- tbl_fdiv_op #
   9139 
   9140 	short		fdiv_res_qnan	- tbl_fdiv_op # QNAN / NORM
   9141 	short		fdiv_res_qnan	- tbl_fdiv_op # QNAN / ZERO
   9142 	short		fdiv_res_qnan	- tbl_fdiv_op # QNAN / INF
   9143 	short		fdiv_res_qnan	- tbl_fdiv_op # QNAN / QNAN
   9144 	short		fdiv_res_qnan	- tbl_fdiv_op # QNAN / DENORM
   9145 	short		fdiv_res_snan	- tbl_fdiv_op # QNAN / SNAN
   9146 	short		tbl_fdiv_op	- tbl_fdiv_op #
   9147 	short		tbl_fdiv_op	- tbl_fdiv_op #
   9148 
   9149 	short		fdiv_norm	- tbl_fdiv_op # DENORM / NORM
   9150 	short		fdiv_inf_load	- tbl_fdiv_op # DENORM / ZERO
   9151 	short		fdiv_zero_load	- tbl_fdiv_op # DENORM / INF
   9152 	short		fdiv_res_qnan	- tbl_fdiv_op # DENORM / QNAN
   9153 	short		fdiv_norm	- tbl_fdiv_op # DENORM / DENORM
   9154 	short		fdiv_res_snan	- tbl_fdiv_op # DENORM / SNAN
   9155 	short		tbl_fdiv_op	- tbl_fdiv_op #
   9156 	short		tbl_fdiv_op	- tbl_fdiv_op #
   9157 
   9158 	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / NORM
   9159 	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / ZERO
   9160 	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / INF
   9161 	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / QNAN
   9162 	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / DENORM
   9163 	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / SNAN
   9164 	short		tbl_fdiv_op	- tbl_fdiv_op #
   9165 	short		tbl_fdiv_op	- tbl_fdiv_op #
   9166 
   9167 fdiv_res_qnan:
   9168 	bra.l		res_qnan
   9169 fdiv_res_snan:
   9170 	bra.l		res_snan
   9171 fdiv_res_operr:
   9172 	bra.l		res_operr
   9173 
   9174 	global		fdiv_zero_load		# global for fsgldiv
   9175 fdiv_zero_load:
   9176 	mov.b		SRC_EX(%a0),%d0		# result sign is exclusive
   9177 	mov.b		DST_EX(%a1),%d1		# or of input signs.
   9178 	eor.b		%d0,%d1
   9179 	bpl.b		fdiv_zero_load_p	# result is positive
   9180 	fmov.s		&0x80000000,%fp0	# load a -ZERO
   9181 	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6)	# set Z/N
   9182 	rts
   9183 fdiv_zero_load_p:
   9184 	fmov.s		&0x00000000,%fp0	# load a +ZERO
   9185 	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
   9186 	rts
   9187 
   9188 #
   9189 # The destination was In Range and the source was a ZERO. The result,
   9190 # therefore, is an INF w/ the proper sign.
   9191 # So, determine the sign and return a new INF (w/ the j-bit cleared).
   9192 #
   9193 	global		fdiv_inf_load		# global for fsgldiv
   9194 fdiv_inf_load:
   9195 	ori.w		&dz_mask+adz_mask,2+USER_FPSR(%a6) # no; set DZ/ADZ
   9196 	mov.b		SRC_EX(%a0),%d0		# load both signs
   9197 	mov.b		DST_EX(%a1),%d1
   9198 	eor.b		%d0,%d1
   9199 	bpl.b		fdiv_inf_load_p		# result is positive
   9200 	fmov.s		&0xff800000,%fp0	# make result -INF
   9201 	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/N
   9202 	rts
   9203 fdiv_inf_load_p:
   9204 	fmov.s		&0x7f800000,%fp0	# make result +INF
   9205 	mov.b		&inf_bmask,FPSR_CC(%a6)	# set INF
   9206 	rts
   9207 
   9208 #
   9209 # The destination was an INF w/ an In Range or ZERO source, the result is
   9210 # an INF w/ the proper sign.
   9211 # The 68881/882 returns the destination INF w/ the new sign(if the j-bit of the
   9212 # dst INF is set, then then j-bit of the result INF is also set).
   9213 #
   9214 	global		fdiv_inf_dst		# global for fsgldiv
   9215 fdiv_inf_dst:
   9216 	mov.b		DST_EX(%a1),%d0		# load both signs
   9217 	mov.b		SRC_EX(%a0),%d1
   9218 	eor.b		%d0,%d1
   9219 	bpl.b		fdiv_inf_dst_p		# result is positive
   9220 
   9221 	fmovm.x		DST(%a1),&0x80		# return result in fp0
   9222 	fabs.x		%fp0			# clear sign bit
   9223 	fneg.x		%fp0			# set sign bit
   9224 	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/NEG
   9225 	rts
   9226 
   9227 fdiv_inf_dst_p:
   9228 	fmovm.x		DST(%a1),&0x80		# return result in fp0
   9229 	fabs.x		%fp0			# return positive INF
   9230 	mov.b		&inf_bmask,FPSR_CC(%a6) # set INF
   9231 	rts
   9232 
   9233 #########################################################################
   9234 # XDEF ****************************************************************	#
   9235 #	fneg(): emulates the fneg instruction				#
   9236 #	fsneg(): emulates the fsneg instruction				#
   9237 #	fdneg(): emulates the fdneg instruction				#
   9238 #									#
   9239 # XREF ****************************************************************	#
   9240 # 	norm() - normalize a denorm to provide EXOP			#
   9241 #	scale_to_zero_src() - scale sgl/dbl source exponent		#
   9242 #	ovf_res() - return default overflow result			#
   9243 #	unf_res() - return default underflow result			#
   9244 # 	res_qnan_1op() - return QNAN result				#
   9245 #	res_snan_1op() - return SNAN result				#
   9246 #									#
   9247 # INPUT ***************************************************************	#
   9248 #	a0 = pointer to extended precision source operand		#
   9249 #	d0 = rnd prec,mode						#
   9250 #									#
   9251 # OUTPUT **************************************************************	#
   9252 #	fp0 = result							#
   9253 #	fp1 = EXOP (if exception occurred)				#
   9254 #									#
   9255 # ALGORITHM ***********************************************************	#
   9256 #	Handle NANs, zeroes, and infinities as special cases. Separate	#
   9257 # norms/denorms into ext/sgl/dbl precisions. Extended precision can be	#
   9258 # emulated by simply setting sign bit. Sgl/dbl operands must be scaled	#
   9259 # and an actual fneg performed to see if overflow/underflow would have	#
   9260 # occurred. If so, return default underflow/overflow result. Else,	#
   9261 # scale the result exponent and return result. FPSR gets set based on	#
   9262 # the result value.							#
   9263 #									#
   9264 #########################################################################
   9265 
   9266 	global		fsneg
   9267 fsneg:
   9268 	andi.b		&0x30,%d0		# clear rnd prec
   9269 	ori.b		&s_mode*0x10,%d0	# insert sgl precision
   9270 	bra.b		fneg
   9271 
   9272 	global		fdneg
   9273 fdneg:
   9274 	andi.b		&0x30,%d0		# clear rnd prec
   9275 	ori.b		&d_mode*0x10,%d0	# insert dbl prec
   9276 
   9277 	global		fneg
   9278 fneg:
   9279 	mov.l		%d0,L_SCR3(%a6)		# store rnd info
   9280 	mov.b		STAG(%a6),%d1
   9281 	bne.w		fneg_not_norm		# optimize on non-norm input
   9282 
   9283 #
   9284 # NEGATE SIGN : norms and denorms ONLY!
   9285 #
   9286 fneg_norm:
   9287 	andi.b		&0xc0,%d0		# is precision extended?
   9288 	bne.w		fneg_not_ext		# no; go handle sgl or dbl
   9289 
   9290 #
   9291 # precision selected is extended. so...we can not get an underflow
   9292 # or overflow because of rounding to the correct precision. so...
   9293 # skip the scaling and unscaling...
   9294 #
   9295 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
   9296 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
   9297 	mov.w		SRC_EX(%a0),%d0
   9298 	eori.w		&0x8000,%d0		# negate sign
   9299 	bpl.b		fneg_norm_load		# sign is positive
   9300 	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
   9301 fneg_norm_load:
   9302 	mov.w		%d0,FP_SCR0_EX(%a6)
   9303 	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
   9304 	rts
   9305 
   9306 #
   9307 # for an extended precision DENORM, the UNFL exception bit is set
   9308 # the accrued bit is NOT set in this instance(no inexactness!)
   9309 #
   9310 fneg_denorm:
   9311 	andi.b		&0xc0,%d0		# is precision extended?
   9312 	bne.b		fneg_not_ext		# no; go handle sgl or dbl
   9313 
   9314 	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
   9315 
   9316 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
   9317 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
   9318 	mov.w		SRC_EX(%a0),%d0
   9319 	eori.w		&0x8000,%d0		# negate sign
   9320 	bpl.b		fneg_denorm_done	# no
   9321 	mov.b		&neg_bmask,FPSR_CC(%a6)	# yes, set 'N' ccode bit
   9322 fneg_denorm_done:
   9323 	mov.w		%d0,FP_SCR0_EX(%a6)
   9324 	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
   9325 
   9326 	btst		&unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
   9327 	bne.b		fneg_ext_unfl_ena	# yes
   9328 	rts
   9329 
   9330 #
   9331 # the input is an extended DENORM and underflow is enabled in the FPCR.
   9332 # normalize the mantissa and add the bias of 0x6000 to the resulting negative
   9333 # exponent and insert back into the operand.
   9334 #
   9335 fneg_ext_unfl_ena:
   9336 	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
   9337 	bsr.l		norm			# normalize result
   9338 	neg.w		%d0			# new exponent = -(shft val)
   9339 	addi.w		&0x6000,%d0		# add new bias to exponent
   9340 	mov.w		FP_SCR0_EX(%a6),%d1	# fetch old sign,exp
   9341 	andi.w		&0x8000,%d1	 	# keep old sign
   9342 	andi.w		&0x7fff,%d0		# clear sign position
   9343 	or.w		%d1,%d0			# concat old sign, new exponent
   9344 	mov.w		%d0,FP_SCR0_EX(%a6)	# insert new exponent
   9345 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
   9346 	rts
   9347 
   9348 #
   9349 # operand is either single or double
   9350 #
   9351 fneg_not_ext:
   9352 	cmpi.b		%d0,&s_mode*0x10	# separate sgl/dbl prec
   9353 	bne.b		fneg_dbl
   9354 
   9355 #
   9356 # operand is to be rounded to single precision
   9357 #
   9358 fneg_sgl:
   9359 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
   9360 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
   9361 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
   9362 	bsr.l		scale_to_zero_src	# calculate scale factor
   9363 
   9364 	cmpi.l		%d0,&0x3fff-0x3f80	# will move in underflow?
   9365 	bge.w		fneg_sd_unfl		# yes; go handle underflow
   9366 	cmpi.l		%d0,&0x3fff-0x407e	# will move in overflow?
   9367 	beq.w		fneg_sd_may_ovfl	# maybe; go check
   9368 	blt.w		fneg_sd_ovfl		# yes; go handle overflow
   9369 
   9370 #
   9371 # operand will NOT overflow or underflow when moved in to the fp reg file
   9372 #
   9373 fneg_sd_normal:
   9374 	fmov.l		&0x0,%fpsr		# clear FPSR
   9375 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   9376 
   9377 	fneg.x		FP_SCR0(%a6),%fp0	# perform negation
   9378 
   9379 	fmov.l		%fpsr,%d1		# save FPSR
   9380 	fmov.l		&0x0,%fpcr		# clear FPCR
   9381 
   9382 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
   9383 
   9384 fneg_sd_normal_exit:
   9385 	mov.l		%d2,-(%sp)		# save d2
   9386 	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
   9387 	mov.w		FP_SCR0_EX(%a6),%d1	# load sgn,exp
   9388 	mov.w		%d1,%d2			# make a copy
   9389 	andi.l		&0x7fff,%d1		# strip sign
   9390 	sub.l		%d0,%d1			# add scale factor
   9391 	andi.w		&0x8000,%d2		# keep old sign
   9392 	or.w		%d1,%d2			# concat old sign,new exp
   9393 	mov.w		%d2,FP_SCR0_EX(%a6)	# insert new exponent
   9394 	mov.l		(%sp)+,%d2		# restore d2
   9395 	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
   9396 	rts
   9397 
   9398 #
   9399 # operand is to be rounded to double precision
   9400 #
   9401 fneg_dbl:
   9402 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
   9403 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
   9404 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
   9405 	bsr.l		scale_to_zero_src	# calculate scale factor
   9406 
   9407 	cmpi.l		%d0,&0x3fff-0x3c00	# will move in underflow?
   9408 	bge.b		fneg_sd_unfl		# yes; go handle underflow
   9409 	cmpi.l		%d0,&0x3fff-0x43fe	# will move in overflow?
   9410 	beq.w		fneg_sd_may_ovfl	# maybe; go check
   9411 	blt.w		fneg_sd_ovfl		# yes; go handle overflow
   9412 	bra.w		fneg_sd_normal		# no; ho handle normalized op
   9413 
   9414 #
   9415 # operand WILL underflow when moved in to the fp register file
   9416 #
   9417 fneg_sd_unfl:
   9418 	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
   9419 
   9420 	eori.b		&0x80,FP_SCR0_EX(%a6)	# negate sign
   9421 	bpl.b		fneg_sd_unfl_tst
   9422 	bset		&neg_bit,FPSR_CC(%a6)	# set 'N' ccode bit
   9423 
   9424 # if underflow or inexact is enabled, go calculate EXOP first.
   9425 fneg_sd_unfl_tst:
   9426 	mov.b		FPCR_ENABLE(%a6),%d1
   9427 	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
   9428 	bne.b		fneg_sd_unfl_ena	# yes
   9429 
   9430 fneg_sd_unfl_dis:
   9431 	lea		FP_SCR0(%a6),%a0	# pass: result addr
   9432 	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
   9433 	bsr.l		unf_res			# calculate default result
   9434 	or.b		%d0,FPSR_CC(%a6)	# unf_res may have set 'Z'
   9435 	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
   9436 	rts
   9437 
   9438 #
   9439 # operand will underflow AND underflow is enabled.
   9440 # therefore, we must return the result rounded to extended precision.
   9441 #
   9442 fneg_sd_unfl_ena:
   9443 	mov.l		FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
   9444 	mov.l		FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
   9445 	mov.w		FP_SCR0_EX(%a6),%d1	# load current exponent
   9446 
   9447 	mov.l		%d2,-(%sp)		# save d2
   9448 	mov.l		%d1,%d2			# make a copy
   9449 	andi.l		&0x7fff,%d1		# strip sign
   9450 	andi.w		&0x8000,%d2		# keep old sign
   9451 	sub.l		%d0,%d1			# subtract scale factor
   9452 	addi.l		&0x6000,%d1		# add new bias
   9453 	andi.w		&0x7fff,%d1
   9454 	or.w		%d2,%d1			# concat new sign,new exp
   9455 	mov.w		%d1,FP_SCR1_EX(%a6)	# insert new exp
   9456 	fmovm.x		FP_SCR1(%a6),&0x40	# return EXOP in fp1
   9457 	mov.l		(%sp)+,%d2		# restore d2
   9458 	bra.b		fneg_sd_unfl_dis
   9459 
   9460 #
   9461 # operand WILL overflow.
   9462 #
   9463 fneg_sd_ovfl:
   9464 	fmov.l		&0x0,%fpsr		# clear FPSR
   9465 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   9466 
   9467 	fneg.x		FP_SCR0(%a6),%fp0	# perform negation
   9468 
   9469 	fmov.l		&0x0,%fpcr		# clear FPCR
   9470 	fmov.l		%fpsr,%d1		# save FPSR
   9471 
   9472 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
   9473 
   9474 fneg_sd_ovfl_tst:
   9475 	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
   9476 
   9477 	mov.b		FPCR_ENABLE(%a6),%d1
   9478 	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
   9479 	bne.b		fneg_sd_ovfl_ena	# yes
   9480 
   9481 #
   9482 # OVFL is not enabled; therefore, we must create the default result by
   9483 # calling ovf_res().
   9484 #
   9485 fneg_sd_ovfl_dis:
   9486 	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
   9487 	sne		%d1			# set sign param accordingly
   9488 	mov.l		L_SCR3(%a6),%d0		# pass: prec,mode
   9489 	bsr.l		ovf_res			# calculate default result
   9490 	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
   9491 	fmovm.x		(%a0),&0x80		# return default result in fp0
   9492 	rts
   9493 
   9494 #
   9495 # OVFL is enabled.
   9496 # the INEX2 bit has already been updated by the round to the correct precision.
   9497 # now, round to extended(and don't alter the FPSR).
   9498 #
   9499 fneg_sd_ovfl_ena:
   9500 	mov.l		%d2,-(%sp)		# save d2
   9501 	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
   9502 	mov.l		%d1,%d2			# make a copy
   9503 	andi.l		&0x7fff,%d1		# strip sign
   9504 	andi.w		&0x8000,%d2		# keep old sign
   9505 	sub.l		%d0,%d1			# add scale factor
   9506 	subi.l		&0x6000,%d1		# subtract bias
   9507 	andi.w		&0x7fff,%d1
   9508 	or.w		%d2,%d1			# concat sign,exp
   9509 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
   9510 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
   9511 	mov.l		(%sp)+,%d2		# restore d2
   9512 	bra.b		fneg_sd_ovfl_dis
   9513 
   9514 #
   9515 # the move in MAY underflow. so...
   9516 #
   9517 fneg_sd_may_ovfl:
   9518 	fmov.l		&0x0,%fpsr		# clear FPSR
   9519 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   9520 
   9521 	fneg.x		FP_SCR0(%a6),%fp0	# perform negation
   9522 
   9523 	fmov.l		%fpsr,%d1		# save status
   9524 	fmov.l		&0x0,%fpcr		# clear FPCR
   9525 
   9526 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
   9527 
   9528 	fabs.x		%fp0,%fp1		# make a copy of result
   9529 	fcmp.b		%fp1,&0x2		# is |result| >= 2.b?
   9530 	fbge.w		fneg_sd_ovfl_tst	# yes; overflow has occurred
   9531 
   9532 # no, it didn't overflow; we have correct result
   9533 	bra.w		fneg_sd_normal_exit
   9534 
   9535 ##########################################################################
   9536 
   9537 #
   9538 # input is not normalized; what is it?
   9539 #
   9540 fneg_not_norm:
   9541 	cmpi.b		%d1,&DENORM		# weed out DENORM
   9542 	beq.w		fneg_denorm
   9543 	cmpi.b		%d1,&SNAN		# weed out SNAN
   9544 	beq.l		res_snan_1op
   9545 	cmpi.b		%d1,&QNAN		# weed out QNAN
   9546 	beq.l		res_qnan_1op
   9547 
   9548 #
   9549 # do the fneg; at this point, only possible ops are ZERO and INF.
   9550 # use fneg to determine ccodes.
   9551 # prec:mode should be zero at this point but it won't affect answer anyways.
   9552 #
   9553 	fneg.x		SRC_EX(%a0),%fp0	# do fneg
   9554 	fmov.l		%fpsr,%d0
   9555 	rol.l		&0x8,%d0		# put ccodes in lo byte
   9556 	mov.b		%d0,FPSR_CC(%a6)	# insert correct ccodes
   9557 	rts
   9558 
   9559 #########################################################################
   9560 # XDEF ****************************************************************	#
   9561 # 	ftst(): emulates the ftest instruction				#
   9562 #									#
   9563 # XREF ****************************************************************	#
   9564 # 	res{s,q}nan_1op() - set NAN result for monadic instruction	#
   9565 #									#
   9566 # INPUT ***************************************************************	#
   9567 # 	a0 = pointer to extended precision source operand		#
   9568 #									#
   9569 # OUTPUT **************************************************************	#
   9570 #	none								#
   9571 #									#
   9572 # ALGORITHM ***********************************************************	#
   9573 # 	Check the source operand tag (STAG) and set the FPCR according	#
   9574 # to the operand type and sign.						#
   9575 #									#
   9576 #########################################################################
   9577 
   9578 	global		ftst
   9579 ftst:
   9580 	mov.b		STAG(%a6),%d1
   9581 	bne.b		ftst_not_norm		# optimize on non-norm input
   9582 
   9583 #
   9584 # Norm:
   9585 #
   9586 ftst_norm:
   9587 	tst.b		SRC_EX(%a0)		# is operand negative?
   9588 	bmi.b		ftst_norm_m		# yes
   9589 	rts
   9590 ftst_norm_m:
   9591 	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
   9592 	rts
   9593 
   9594 #
   9595 # input is not normalized; what is it?
   9596 #
   9597 ftst_not_norm:
   9598 	cmpi.b		%d1,&ZERO		# weed out ZERO
   9599 	beq.b		ftst_zero
   9600 	cmpi.b		%d1,&INF		# weed out INF
   9601 	beq.b		ftst_inf
   9602 	cmpi.b		%d1,&SNAN		# weed out SNAN
   9603 	beq.l		res_snan_1op
   9604 	cmpi.b		%d1,&QNAN		# weed out QNAN
   9605 	beq.l		res_qnan_1op
   9606 
   9607 #
   9608 # Denorm:
   9609 #
   9610 ftst_denorm:
   9611 	tst.b		SRC_EX(%a0)		# is operand negative?
   9612 	bmi.b		ftst_denorm_m		# yes
   9613 	rts
   9614 ftst_denorm_m:
   9615 	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
   9616 	rts
   9617 
   9618 #
   9619 # Infinity:
   9620 #
   9621 ftst_inf:
   9622 	tst.b		SRC_EX(%a0)		# is operand negative?
   9623 	bmi.b		ftst_inf_m		# yes
   9624 ftst_inf_p:
   9625 	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'I' ccode bit
   9626 	rts
   9627 ftst_inf_m:
   9628 	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'I','N' ccode bits
   9629 	rts
   9630 
   9631 #
   9632 # Zero:
   9633 #
   9634 ftst_zero:
   9635 	tst.b		SRC_EX(%a0)		# is operand negative?
   9636 	bmi.b		ftst_zero_m		# yes
   9637 ftst_zero_p:
   9638 	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
   9639 	rts
   9640 ftst_zero_m:
   9641 	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6)	# set 'Z','N' ccode bits
   9642 	rts
   9643 
   9644 #########################################################################
   9645 # XDEF ****************************************************************	#
   9646 #	fint(): emulates the fint instruction				#
   9647 #									#
   9648 # XREF ****************************************************************	#
   9649 #	res_{s,q}nan_1op() - set NAN result for monadic operation	#
   9650 #									#
   9651 # INPUT ***************************************************************	#
   9652 #	a0 = pointer to extended precision source operand		#
   9653 #	d0 = round precision/mode					#
   9654 #									#
   9655 # OUTPUT **************************************************************	#
   9656 #	fp0 = result							#
   9657 #									#
   9658 # ALGORITHM ***********************************************************	#
   9659 # 	Separate according to operand type. Unnorms don't pass through 	#
   9660 # here. For norms, load the rounding mode/prec, execute a "fint", then 	#
   9661 # store the resulting FPSR bits.					#
   9662 # 	For denorms, force the j-bit to a one and do the same as for	#
   9663 # norms. Denorms are so low that the answer will either be a zero or a 	#
   9664 # one.									#
   9665 # 	For zeroes/infs/NANs, return the same while setting the FPSR	#
   9666 # as appropriate.							#
   9667 #									#
   9668 #########################################################################
   9669 
   9670 	global		fint
   9671 fint:
   9672 	mov.b		STAG(%a6),%d1
   9673 	bne.b		fint_not_norm		# optimize on non-norm input
   9674 
   9675 #
   9676 # Norm:
   9677 #
   9678 fint_norm:
   9679 	andi.b		&0x30,%d0		# set prec = ext
   9680 
   9681 	fmov.l		%d0,%fpcr		# set FPCR
   9682 	fmov.l		&0x0,%fpsr		# clear FPSR
   9683 
   9684 	fint.x 		SRC(%a0),%fp0		# execute fint
   9685 
   9686 	fmov.l		&0x0,%fpcr		# clear FPCR
   9687 	fmov.l		%fpsr,%d0		# save FPSR
   9688 	or.l		%d0,USER_FPSR(%a6)	# set exception bits
   9689 
   9690 	rts
   9691 
   9692 #
   9693 # input is not normalized; what is it?
   9694 #
   9695 fint_not_norm:
   9696 	cmpi.b		%d1,&ZERO		# weed out ZERO
   9697 	beq.b		fint_zero
   9698 	cmpi.b		%d1,&INF		# weed out INF
   9699 	beq.b		fint_inf
   9700 	cmpi.b		%d1,&DENORM		# weed out DENORM
   9701 	beq.b		fint_denorm
   9702 	cmpi.b		%d1,&SNAN		# weed out SNAN
   9703 	beq.l		res_snan_1op
   9704 	bra.l		res_qnan_1op		# weed out QNAN
   9705 
   9706 #
   9707 # Denorm:
   9708 #
   9709 # for DENORMs, the result will be either (+/-)ZERO or (+/-)1.
   9710 # also, the INEX2 and AINEX exception bits will be set.
   9711 # so, we could either set these manually or force the DENORM
   9712 # to a very small NORM and ship it to the NORM routine.
   9713 # I do the latter.
   9714 #
   9715 fint_denorm:
   9716 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6) # copy sign, zero exp
   9717 	mov.b		&0x80,FP_SCR0_HI(%a6)	# force DENORM ==> small NORM
   9718 	lea		FP_SCR0(%a6),%a0
   9719 	bra.b		fint_norm
   9720 
   9721 #
   9722 # Zero:
   9723 #
   9724 fint_zero:
   9725 	tst.b		SRC_EX(%a0)		# is ZERO negative?
   9726 	bmi.b		fint_zero_m		# yes
   9727 fint_zero_p:
   9728 	fmov.s		&0x00000000,%fp0	# return +ZERO in fp0
   9729 	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
   9730 	rts
   9731 fint_zero_m:
   9732 	fmov.s		&0x80000000,%fp0	# return -ZERO in fp0
   9733 	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
   9734 	rts
   9735 
   9736 #
   9737 # Infinity:
   9738 #
   9739 fint_inf:
   9740 	fmovm.x		SRC(%a0),&0x80		# return result in fp0
   9741 	tst.b		SRC_EX(%a0)		# is INF negative?
   9742 	bmi.b		fint_inf_m		# yes
   9743 fint_inf_p:
   9744 	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'I' ccode bit
   9745 	rts
   9746 fint_inf_m:
   9747 	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
   9748 	rts
   9749 
   9750 #########################################################################
   9751 # XDEF ****************************************************************	#
   9752 #	fintrz(): emulates the fintrz instruction			#
   9753 #									#
   9754 # XREF ****************************************************************	#
   9755 #	res_{s,q}nan_1op() - set NAN result for monadic operation	#
   9756 #									#
   9757 # INPUT ***************************************************************	#
   9758 #	a0 = pointer to extended precision source operand		#
   9759 #	d0 = round precision/mode					#
   9760 #									#
   9761 # OUTPUT **************************************************************	#
   9762 # 	fp0 = result							#
   9763 #									#
   9764 # ALGORITHM ***********************************************************	#
   9765 #	Separate according to operand type. Unnorms don't pass through	#
   9766 # here. For norms, load the rounding mode/prec, execute a "fintrz", 	#
   9767 # then store the resulting FPSR bits.					#
   9768 # 	For denorms, force the j-bit to a one and do the same as for	#
   9769 # norms. Denorms are so low that the answer will either be a zero or a	#
   9770 # one.									#
   9771 # 	For zeroes/infs/NANs, return the same while setting the FPSR	#
   9772 # as appropriate.							#
   9773 #									#
   9774 #########################################################################
   9775 
   9776 	global		fintrz
   9777 fintrz:
   9778 	mov.b		STAG(%a6),%d1
   9779 	bne.b		fintrz_not_norm		# optimize on non-norm input
   9780 
   9781 #
   9782 # Norm:
   9783 #
   9784 fintrz_norm:
   9785 	fmov.l		&0x0,%fpsr		# clear FPSR
   9786 
   9787 	fintrz.x	SRC(%a0),%fp0		# execute fintrz
   9788 
   9789 	fmov.l		%fpsr,%d0		# save FPSR
   9790 	or.l		%d0,USER_FPSR(%a6)	# set exception bits
   9791 
   9792 	rts
   9793 
   9794 #
   9795 # input is not normalized; what is it?
   9796 #
   9797 fintrz_not_norm:
   9798 	cmpi.b		%d1,&ZERO		# weed out ZERO
   9799 	beq.b		fintrz_zero
   9800 	cmpi.b		%d1,&INF		# weed out INF
   9801 	beq.b		fintrz_inf
   9802 	cmpi.b		%d1,&DENORM		# weed out DENORM
   9803 	beq.b		fintrz_denorm
   9804 	cmpi.b		%d1,&SNAN		# weed out SNAN
   9805 	beq.l		res_snan_1op
   9806 	bra.l		res_qnan_1op		# weed out QNAN
   9807 
   9808 #
   9809 # Denorm:
   9810 #
   9811 # for DENORMs, the result will be (+/-)ZERO.
   9812 # also, the INEX2 and AINEX exception bits will be set.
   9813 # so, we could either set these manually or force the DENORM
   9814 # to a very small NORM and ship it to the NORM routine.
   9815 # I do the latter.
   9816 #
   9817 fintrz_denorm:
   9818 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6) # copy sign, zero exp
   9819 	mov.b		&0x80,FP_SCR0_HI(%a6)	# force DENORM ==> small NORM
   9820 	lea		FP_SCR0(%a6),%a0
   9821 	bra.b		fintrz_norm
   9822 
   9823 #
   9824 # Zero:
   9825 #
   9826 fintrz_zero:
   9827 	tst.b		SRC_EX(%a0)		# is ZERO negative?
   9828 	bmi.b		fintrz_zero_m		# yes
   9829 fintrz_zero_p:
   9830 	fmov.s		&0x00000000,%fp0	# return +ZERO in fp0
   9831 	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
   9832 	rts
   9833 fintrz_zero_m:
   9834 	fmov.s		&0x80000000,%fp0	# return -ZERO in fp0
   9835 	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
   9836 	rts
   9837 
   9838 #
   9839 # Infinity:
   9840 #
   9841 fintrz_inf:
   9842 	fmovm.x		SRC(%a0),&0x80		# return result in fp0
   9843 	tst.b		SRC_EX(%a0)		# is INF negative?
   9844 	bmi.b		fintrz_inf_m		# yes
   9845 fintrz_inf_p:
   9846 	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'I' ccode bit
   9847 	rts
   9848 fintrz_inf_m:
   9849 	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
   9850 	rts
   9851 
   9852 #########################################################################
   9853 # XDEF ****************************************************************	#
   9854 #	fabs():  emulates the fabs instruction				#
   9855 #	fsabs(): emulates the fsabs instruction				#
   9856 #	fdabs(): emulates the fdabs instruction				#
   9857 #									#
   9858 # XREF **************************************************************** #
   9859 #	norm() - normalize denorm mantissa to provide EXOP		#
   9860 #	scale_to_zero_src() - make exponent. = 0; get scale factor	#
   9861 #	unf_res() - calculate underflow result				#
   9862 #	ovf_res() - calculate overflow result				#
   9863 #	res_{s,q}nan_1op() - set NAN result for monadic operation	#
   9864 #									#
   9865 # INPUT *************************************************************** #
   9866 #	a0 = pointer to extended precision source operand		#
   9867 #	d0 = rnd precision/mode						#
   9868 #									#
   9869 # OUTPUT ************************************************************** #
   9870 #	fp0 = result							#
   9871 #	fp1 = EXOP (if exception occurred)				#
   9872 #									#
   9873 # ALGORITHM ***********************************************************	#
   9874 #	Handle NANs, infinities, and zeroes as special cases. Divide	#
   9875 # norms into extended, single, and double precision. 			#
   9876 # 	Simply clear sign for extended precision norm. Ext prec denorm	#
   9877 # gets an EXOP created for it since it's an underflow.			#
   9878 #	Double and single precision can overflow and underflow. First,	#
   9879 # scale the operand such that the exponent is zero. Perform an "fabs"	#
   9880 # using the correct rnd mode/prec. Check to see if the original 	#
   9881 # exponent would take an exception. If so, use unf_res() or ovf_res()	#
   9882 # to calculate the default result. Also, create the EXOP for the	#
   9883 # exceptional case. If no exception should occur, insert the correct 	#
   9884 # result exponent and return.						#
   9885 # 	Unnorms don't pass through here.				#
   9886 #									#
   9887 #########################################################################
   9888 
   9889 	global		fsabs
   9890 fsabs:
   9891 	andi.b		&0x30,%d0		# clear rnd prec
   9892 	ori.b		&s_mode*0x10,%d0	# insert sgl precision
   9893 	bra.b		fabs
   9894 
   9895 	global		fdabs
   9896 fdabs:
   9897 	andi.b		&0x30,%d0		# clear rnd prec
   9898 	ori.b		&d_mode*0x10,%d0	# insert dbl precision
   9899 
   9900 	global		fabs
   9901 fabs:
   9902 	mov.l		%d0,L_SCR3(%a6)		# store rnd info
   9903 	mov.b		STAG(%a6),%d1
   9904 	bne.w		fabs_not_norm		# optimize on non-norm input
   9905 
   9906 #
   9907 # ABSOLUTE VALUE: norms and denorms ONLY!
   9908 #
   9909 fabs_norm:
   9910 	andi.b		&0xc0,%d0		# is precision extended?
   9911 	bne.b		fabs_not_ext		# no; go handle sgl or dbl
   9912 
   9913 #
   9914 # precision selected is extended. so...we can not get an underflow
   9915 # or overflow because of rounding to the correct precision. so...
   9916 # skip the scaling and unscaling...
   9917 #
   9918 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
   9919 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
   9920 	mov.w		SRC_EX(%a0),%d1
   9921 	bclr		&15,%d1			# force absolute value
   9922 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert exponent
   9923 	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
   9924 	rts
   9925 
   9926 #
   9927 # for an extended precision DENORM, the UNFL exception bit is set
   9928 # the accrued bit is NOT set in this instance(no inexactness!)
   9929 #
   9930 fabs_denorm:
   9931 	andi.b		&0xc0,%d0		# is precision extended?
   9932 	bne.b		fabs_not_ext		# no
   9933 
   9934 	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
   9935 
   9936 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
   9937 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
   9938 	mov.w		SRC_EX(%a0),%d0
   9939 	bclr		&15,%d0			# clear sign
   9940 	mov.w		%d0,FP_SCR0_EX(%a6)	# insert exponent
   9941 
   9942 	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
   9943 
   9944 	btst		&unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
   9945 	bne.b		fabs_ext_unfl_ena
   9946 	rts
   9947 
   9948 #
   9949 # the input is an extended DENORM and underflow is enabled in the FPCR.
   9950 # normalize the mantissa and add the bias of 0x6000 to the resulting negative
   9951 # exponent and insert back into the operand.
   9952 #
   9953 fabs_ext_unfl_ena:
   9954 	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
   9955 	bsr.l		norm			# normalize result
   9956 	neg.w		%d0			# new exponent = -(shft val)
   9957 	addi.w		&0x6000,%d0		# add new bias to exponent
   9958 	mov.w		FP_SCR0_EX(%a6),%d1	# fetch old sign,exp
   9959 	andi.w		&0x8000,%d1		# keep old sign
   9960 	andi.w		&0x7fff,%d0		# clear sign position
   9961 	or.w		%d1,%d0			# concat old sign, new exponent
   9962 	mov.w		%d0,FP_SCR0_EX(%a6)	# insert new exponent
   9963 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
   9964 	rts
   9965 
   9966 #
   9967 # operand is either single or double
   9968 #
   9969 fabs_not_ext:
   9970 	cmpi.b		%d0,&s_mode*0x10	# separate sgl/dbl prec
   9971 	bne.b		fabs_dbl
   9972 
   9973 #
   9974 # operand is to be rounded to single precision
   9975 #
   9976 fabs_sgl:
   9977 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
   9978 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
   9979 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
   9980 	bsr.l		scale_to_zero_src	# calculate scale factor
   9981 
   9982 	cmpi.l		%d0,&0x3fff-0x3f80	# will move in underflow?
   9983 	bge.w		fabs_sd_unfl		# yes; go handle underflow
   9984 	cmpi.l		%d0,&0x3fff-0x407e	# will move in overflow?
   9985 	beq.w		fabs_sd_may_ovfl	# maybe; go check
   9986 	blt.w		fabs_sd_ovfl		# yes; go handle overflow
   9987 
   9988 #
   9989 # operand will NOT overflow or underflow when moved in to the fp reg file
   9990 #
   9991 fabs_sd_normal:
   9992 	fmov.l		&0x0,%fpsr		# clear FPSR
   9993 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   9994 
   9995 	fabs.x		FP_SCR0(%a6),%fp0	# perform absolute
   9996 
   9997 	fmov.l		%fpsr,%d1		# save FPSR
   9998 	fmov.l		&0x0,%fpcr		# clear FPCR
   9999 
   10000 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
   10001 
   10002 fabs_sd_normal_exit:
   10003 	mov.l		%d2,-(%sp)		# save d2
   10004 	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
   10005 	mov.w		FP_SCR0_EX(%a6),%d1	# load sgn,exp
   10006 	mov.l		%d1,%d2			# make a copy
   10007 	andi.l		&0x7fff,%d1		# strip sign
   10008 	sub.l		%d0,%d1			# add scale factor
   10009 	andi.w		&0x8000,%d2		# keep old sign
   10010 	or.w		%d1,%d2			# concat old sign,new exp
   10011 	mov.w		%d2,FP_SCR0_EX(%a6)	# insert new exponent
   10012 	mov.l		(%sp)+,%d2		# restore d2
   10013 	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
   10014 	rts
   10015 
   10016 #
   10017 # operand is to be rounded to double precision
   10018 #
   10019 fabs_dbl:
   10020 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
   10021 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
   10022 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
   10023 	bsr.l		scale_to_zero_src	# calculate scale factor
   10024 
   10025 	cmpi.l		%d0,&0x3fff-0x3c00	# will move in underflow?
   10026 	bge.b		fabs_sd_unfl		# yes; go handle underflow
   10027 	cmpi.l		%d0,&0x3fff-0x43fe	# will move in overflow?
   10028 	beq.w		fabs_sd_may_ovfl	# maybe; go check
   10029 	blt.w		fabs_sd_ovfl		# yes; go handle overflow
   10030 	bra.w		fabs_sd_normal		# no; ho handle normalized op
   10031 
   10032 #
   10033 # operand WILL underflow when moved in to the fp register file
   10034 #
   10035 fabs_sd_unfl:
   10036 	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
   10037 
   10038 	bclr		&0x7,FP_SCR0_EX(%a6)	# force absolute value
   10039 
   10040 # if underflow or inexact is enabled, go calculate EXOP first.
   10041 	mov.b		FPCR_ENABLE(%a6),%d1
   10042 	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
   10043 	bne.b		fabs_sd_unfl_ena	# yes
   10044 
   10045 fabs_sd_unfl_dis:
   10046 	lea		FP_SCR0(%a6),%a0	# pass: result addr
   10047 	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
   10048 	bsr.l		unf_res			# calculate default result
   10049 	or.b		%d0,FPSR_CC(%a6)	# set possible 'Z' ccode
   10050 	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
   10051 	rts
   10052 
   10053 #
   10054 # operand will underflow AND underflow is enabled.
   10055 # therefore, we must return the result rounded to extended precision.
   10056 #
   10057 fabs_sd_unfl_ena:
   10058 	mov.l		FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
   10059 	mov.l		FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
   10060 	mov.w		FP_SCR0_EX(%a6),%d1	# load current exponent
   10061 
   10062 	mov.l		%d2,-(%sp)		# save d2
   10063 	mov.l		%d1,%d2			# make a copy
   10064 	andi.l		&0x7fff,%d1		# strip sign
   10065 	andi.w		&0x8000,%d2		# keep old sign
   10066 	sub.l		%d0,%d1			# subtract scale factor
   10067 	addi.l		&0x6000,%d1		# add new bias
   10068 	andi.w		&0x7fff,%d1
   10069 	or.w		%d2,%d1			# concat new sign,new exp
   10070 	mov.w		%d1,FP_SCR1_EX(%a6)	# insert new exp
   10071 	fmovm.x		FP_SCR1(%a6),&0x40	# return EXOP in fp1
   10072 	mov.l		(%sp)+,%d2		# restore d2
   10073 	bra.b		fabs_sd_unfl_dis
   10074 
   10075 #
   10076 # operand WILL overflow.
   10077 #
   10078 fabs_sd_ovfl:
   10079 	fmov.l		&0x0,%fpsr		# clear FPSR
   10080 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   10081 
   10082 	fabs.x		FP_SCR0(%a6),%fp0	# perform absolute
   10083 
   10084 	fmov.l		&0x0,%fpcr		# clear FPCR
   10085 	fmov.l		%fpsr,%d1		# save FPSR
   10086 
   10087 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
   10088 
   10089 fabs_sd_ovfl_tst:
   10090 	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
   10091 
   10092 	mov.b		FPCR_ENABLE(%a6),%d1
   10093 	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
   10094 	bne.b		fabs_sd_ovfl_ena	# yes
   10095 
   10096 #
   10097 # OVFL is not enabled; therefore, we must create the default result by
   10098 # calling ovf_res().
   10099 #
   10100 fabs_sd_ovfl_dis:
   10101 	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
   10102 	sne		%d1			# set sign param accordingly
   10103 	mov.l		L_SCR3(%a6),%d0		# pass: prec,mode
   10104 	bsr.l		ovf_res			# calculate default result
   10105 	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
   10106 	fmovm.x		(%a0),&0x80		# return default result in fp0
   10107 	rts
   10108 
   10109 #
   10110 # OVFL is enabled.
   10111 # the INEX2 bit has already been updated by the round to the correct precision.
   10112 # now, round to extended(and don't alter the FPSR).
   10113 #
   10114 fabs_sd_ovfl_ena:
   10115 	mov.l		%d2,-(%sp)		# save d2
   10116 	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
   10117 	mov.l		%d1,%d2			# make a copy
   10118 	andi.l		&0x7fff,%d1		# strip sign
   10119 	andi.w		&0x8000,%d2		# keep old sign
   10120 	sub.l		%d0,%d1			# add scale factor
   10121 	subi.l		&0x6000,%d1		# subtract bias
   10122 	andi.w		&0x7fff,%d1
   10123 	or.w		%d2,%d1			# concat sign,exp
   10124 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
   10125 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
   10126 	mov.l		(%sp)+,%d2		# restore d2
   10127 	bra.b		fabs_sd_ovfl_dis
   10128 
   10129 #
   10130 # the move in MAY underflow. so...
   10131 #
   10132 fabs_sd_may_ovfl:
   10133 	fmov.l		&0x0,%fpsr		# clear FPSR
   10134 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   10135 
   10136 	fabs.x		FP_SCR0(%a6),%fp0	# perform absolute
   10137 
   10138 	fmov.l		%fpsr,%d1		# save status
   10139 	fmov.l		&0x0,%fpcr		# clear FPCR
   10140 
   10141 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
   10142 
   10143 	fabs.x		%fp0,%fp1		# make a copy of result
   10144 	fcmp.b		%fp1,&0x2		# is |result| >= 2.b?
   10145 	fbge.w		fabs_sd_ovfl_tst	# yes; overflow has occurred
   10146 
   10147 # no, it didn't overflow; we have correct result
   10148 	bra.w		fabs_sd_normal_exit
   10149 
   10150 ##########################################################################
   10151 
   10152 #
   10153 # input is not normalized; what is it?
   10154 #
   10155 fabs_not_norm:
   10156 	cmpi.b		%d1,&DENORM		# weed out DENORM
   10157 	beq.w		fabs_denorm
   10158 	cmpi.b		%d1,&SNAN		# weed out SNAN
   10159 	beq.l		res_snan_1op
   10160 	cmpi.b		%d1,&QNAN		# weed out QNAN
   10161 	beq.l		res_qnan_1op
   10162 
   10163 	fabs.x		SRC(%a0),%fp0		# force absolute value
   10164 
   10165 	cmpi.b		%d1,&INF		# weed out INF
   10166 	beq.b		fabs_inf
   10167 fabs_zero:
   10168 	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
   10169 	rts
   10170 fabs_inf:
   10171 	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'I' ccode bit
   10172 	rts
   10173 
   10174 #########################################################################
   10175 # XDEF ****************************************************************	#
   10176 # 	fcmp(): fp compare op routine					#
   10177 #									#
   10178 # XREF ****************************************************************	#
   10179 # 	res_qnan() - return QNAN result					#
   10180 #	res_snan() - return SNAN result					#
   10181 #									#
   10182 # INPUT ***************************************************************	#
   10183 #	a0 = pointer to extended precision source operand		#
   10184 #	a1 = pointer to extended precision destination operand		#
   10185 #	d0 = round prec/mode						#
   10186 #									#
   10187 # OUTPUT ************************************************************** #
   10188 #	None								#
   10189 #									#
   10190 # ALGORITHM ***********************************************************	#
   10191 # 	Handle NANs and denorms as special cases. For everything else,	#
   10192 # just use the actual fcmp instruction to produce the correct condition	#
   10193 # codes.								#
   10194 #									#
   10195 #########################################################################
   10196 
   10197 	global		fcmp
   10198 fcmp:
   10199 	clr.w		%d1
   10200 	mov.b		DTAG(%a6),%d1
   10201 	lsl.b		&0x3,%d1
   10202 	or.b		STAG(%a6),%d1
   10203 	bne.b		fcmp_not_norm		# optimize on non-norm input
   10204 
   10205 #
   10206 # COMPARE FP OPs : NORMs, ZEROs, INFs, and "corrected" DENORMs
   10207 #
   10208 fcmp_norm:
   10209 	fmovm.x		DST(%a1),&0x80		# load dst op
   10210 
   10211 	fcmp.x 		%fp0,SRC(%a0)		# do compare
   10212 
   10213 	fmov.l		%fpsr,%d0		# save FPSR
   10214 	rol.l		&0x8,%d0		# extract ccode bits
   10215 	mov.b		%d0,FPSR_CC(%a6)	# set ccode bits(no exc bits are set)
   10216 
   10217 	rts
   10218 
   10219 #
   10220 # fcmp: inputs are not both normalized; what are they?
   10221 #
   10222 fcmp_not_norm:
   10223 	mov.w		(tbl_fcmp_op.b,%pc,%d1.w*2),%d1
   10224 	jmp		(tbl_fcmp_op.b,%pc,%d1.w*1)
   10225 
   10226 	swbeg		&48
   10227 tbl_fcmp_op:
   10228 	short		fcmp_norm	- tbl_fcmp_op # NORM - NORM
   10229 	short		fcmp_norm	- tbl_fcmp_op # NORM - ZERO
   10230 	short		fcmp_norm	- tbl_fcmp_op # NORM - INF
   10231 	short		fcmp_res_qnan	- tbl_fcmp_op # NORM - QNAN
   10232 	short		fcmp_nrm_dnrm 	- tbl_fcmp_op # NORM - DENORM
   10233 	short		fcmp_res_snan	- tbl_fcmp_op # NORM - SNAN
   10234 	short		tbl_fcmp_op	- tbl_fcmp_op #
   10235 	short		tbl_fcmp_op	- tbl_fcmp_op #
   10236 
   10237 	short		fcmp_norm	- tbl_fcmp_op # ZERO - NORM
   10238 	short		fcmp_norm	- tbl_fcmp_op # ZERO - ZERO
   10239 	short		fcmp_norm	- tbl_fcmp_op # ZERO - INF
   10240 	short		fcmp_res_qnan	- tbl_fcmp_op # ZERO - QNAN
   10241 	short		fcmp_dnrm_s	- tbl_fcmp_op # ZERO - DENORM
   10242 	short		fcmp_res_snan	- tbl_fcmp_op # ZERO - SNAN
   10243 	short		tbl_fcmp_op	- tbl_fcmp_op #
   10244 	short		tbl_fcmp_op	- tbl_fcmp_op #
   10245 
   10246 	short		fcmp_norm	- tbl_fcmp_op # INF - NORM
   10247 	short		fcmp_norm	- tbl_fcmp_op # INF - ZERO
   10248 	short		fcmp_norm	- tbl_fcmp_op # INF - INF
   10249 	short		fcmp_res_qnan	- tbl_fcmp_op # INF - QNAN
   10250 	short		fcmp_dnrm_s	- tbl_fcmp_op # INF - DENORM
   10251 	short		fcmp_res_snan	- tbl_fcmp_op # INF - SNAN
   10252 	short		tbl_fcmp_op	- tbl_fcmp_op #
   10253 	short		tbl_fcmp_op	- tbl_fcmp_op #
   10254 
   10255 	short		fcmp_res_qnan	- tbl_fcmp_op # QNAN - NORM
   10256 	short		fcmp_res_qnan	- tbl_fcmp_op # QNAN - ZERO
   10257 	short		fcmp_res_qnan	- tbl_fcmp_op # QNAN - INF
   10258 	short		fcmp_res_qnan	- tbl_fcmp_op # QNAN - QNAN
   10259 	short		fcmp_res_qnan	- tbl_fcmp_op # QNAN - DENORM
   10260 	short		fcmp_res_snan	- tbl_fcmp_op # QNAN - SNAN
   10261 	short		tbl_fcmp_op	- tbl_fcmp_op #
   10262 	short		tbl_fcmp_op	- tbl_fcmp_op #
   10263 
   10264 	short		fcmp_dnrm_nrm	- tbl_fcmp_op # DENORM - NORM
   10265 	short		fcmp_dnrm_d	- tbl_fcmp_op # DENORM - ZERO
   10266 	short		fcmp_dnrm_d	- tbl_fcmp_op # DENORM - INF
   10267 	short		fcmp_res_qnan	- tbl_fcmp_op # DENORM - QNAN
   10268 	short		fcmp_dnrm_sd	- tbl_fcmp_op # DENORM - DENORM
   10269 	short		fcmp_res_snan	- tbl_fcmp_op # DENORM - SNAN
   10270 	short		tbl_fcmp_op	- tbl_fcmp_op #
   10271 	short		tbl_fcmp_op	- tbl_fcmp_op #
   10272 
   10273 	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - NORM
   10274 	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - ZERO
   10275 	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - INF
   10276 	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - QNAN
   10277 	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - DENORM
   10278 	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - SNAN
   10279 	short		tbl_fcmp_op	- tbl_fcmp_op #
   10280 	short		tbl_fcmp_op	- tbl_fcmp_op #
   10281 
   10282 # unlike all other functions for QNAN and SNAN, fcmp does NOT set the
   10283 # 'N' bit for a negative QNAN or SNAN input so we must squelch it here.
   10284 fcmp_res_qnan:
   10285 	bsr.l		res_qnan
   10286 	andi.b		&0xf7,FPSR_CC(%a6)
   10287 	rts
   10288 fcmp_res_snan:
   10289 	bsr.l		res_snan
   10290 	andi.b		&0xf7,FPSR_CC(%a6)
   10291 	rts
   10292 
   10293 #
   10294 # DENORMs are a little more difficult.
   10295 # If you have a 2 DENORMs, then you can just force the j-bit to a one
   10296 # and use the fcmp_norm routine.
   10297 # If you have a DENORM and an INF or ZERO, just force the DENORM's j-bit to a one
   10298 # and use the fcmp_norm routine.
   10299 # If you have a DENORM and a NORM with opposite signs, then use fcmp_norm, also.
   10300 # But with a DENORM and a NORM of the same sign, the neg bit is set if the
   10301 # (1) signs are (+) and the DENORM is the dst or
   10302 # (2) signs are (-) and the DENORM is the src
   10303 #
   10304 
   10305 fcmp_dnrm_s:
   10306 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
   10307 	mov.l		SRC_HI(%a0),%d0
   10308 	bset		&31,%d0			# DENORM src; make into small norm
   10309 	mov.l		%d0,FP_SCR0_HI(%a6)
   10310 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
   10311 	lea		FP_SCR0(%a6),%a0
   10312 	bra.w		fcmp_norm
   10313 
   10314 fcmp_dnrm_d:
   10315 	mov.l		DST_EX(%a1),FP_SCR0_EX(%a6)
   10316 	mov.l		DST_HI(%a1),%d0
   10317 	bset		&31,%d0			# DENORM src; make into small norm
   10318 	mov.l		%d0,FP_SCR0_HI(%a6)
   10319 	mov.l		DST_LO(%a1),FP_SCR0_LO(%a6)
   10320 	lea		FP_SCR0(%a6),%a1
   10321 	bra.w		fcmp_norm
   10322 
   10323 fcmp_dnrm_sd:
   10324 	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
   10325 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
   10326 	mov.l		DST_HI(%a1),%d0
   10327 	bset		&31,%d0			# DENORM dst; make into small norm
   10328 	mov.l		%d0,FP_SCR1_HI(%a6)
   10329 	mov.l		SRC_HI(%a0),%d0
   10330 	bset		&31,%d0			# DENORM dst; make into small norm
   10331 	mov.l		%d0,FP_SCR0_HI(%a6)
   10332 	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
   10333 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
   10334 	lea		FP_SCR1(%a6),%a1
   10335 	lea		FP_SCR0(%a6),%a0
   10336 	bra.w		fcmp_norm
   10337 
   10338 fcmp_nrm_dnrm:
   10339 	mov.b		SRC_EX(%a0),%d0		# determine if like signs
   10340 	mov.b		DST_EX(%a1),%d1
   10341 	eor.b		%d0,%d1
   10342 	bmi.w		fcmp_dnrm_s
   10343 
   10344 # signs are the same, so must determine the answer ourselves.
   10345 	tst.b		%d0			# is src op negative?
   10346 	bmi.b		fcmp_nrm_dnrm_m		# yes
   10347 	rts
   10348 fcmp_nrm_dnrm_m:
   10349 	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
   10350 	rts
   10351 
   10352 fcmp_dnrm_nrm:
   10353 	mov.b		SRC_EX(%a0),%d0		# determine if like signs
   10354 	mov.b		DST_EX(%a1),%d1
   10355 	eor.b		%d0,%d1
   10356 	bmi.w		fcmp_dnrm_d
   10357 
   10358 # signs are the same, so must determine the answer ourselves.
   10359 	tst.b		%d0			# is src op negative?
   10360 	bpl.b		fcmp_dnrm_nrm_m		# no
   10361 	rts
   10362 fcmp_dnrm_nrm_m:
   10363 	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
   10364 	rts
   10365 
   10366 #########################################################################
   10367 # XDEF ****************************************************************	#
   10368 # 	fsglmul(): emulates the fsglmul instruction			#
   10369 #									#
   10370 # XREF ****************************************************************	#
   10371 #	scale_to_zero_src() - scale src exponent to zero		#
   10372 #	scale_to_zero_dst() - scale dst exponent to zero		#
   10373 #	unf_res4() - return default underflow result for sglop		#
   10374 #	ovf_res() - return default overflow result			#
   10375 # 	res_qnan() - return QNAN result					#
   10376 # 	res_snan() - return SNAN result					#
   10377 #									#
   10378 # INPUT ***************************************************************	#
   10379 #	a0 = pointer to extended precision source operand		#
   10380 #	a1 = pointer to extended precision destination operand		#
   10381 #	d0  rnd prec,mode						#
   10382 #									#
   10383 # OUTPUT **************************************************************	#
   10384 #	fp0 = result							#
   10385 #	fp1 = EXOP (if exception occurred)				#
   10386 #									#
   10387 # ALGORITHM ***********************************************************	#
   10388 #	Handle NANs, infinities, and zeroes as special cases. Divide	#
   10389 # norms/denorms into ext/sgl/dbl precision.				#
   10390 #	For norms/denorms, scale the exponents such that a multiply	#
   10391 # instruction won't cause an exception. Use the regular fsglmul to	#
   10392 # compute a result. Check if the regular operands would have taken	#
   10393 # an exception. If so, return the default overflow/underflow result	#
   10394 # and return the EXOP if exceptions are enabled. Else, scale the 	#
   10395 # result operand to the proper exponent.				#
   10396 #									#
   10397 #########################################################################
   10398 
   10399 	global		fsglmul
   10400 fsglmul:
   10401 	mov.l		%d0,L_SCR3(%a6)		# store rnd info
   10402 
   10403 	clr.w		%d1
   10404 	mov.b		DTAG(%a6),%d1
   10405 	lsl.b		&0x3,%d1
   10406 	or.b		STAG(%a6),%d1
   10407 
   10408 	bne.w		fsglmul_not_norm	# optimize on non-norm input
   10409 
   10410 fsglmul_norm:
   10411 	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
   10412 	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
   10413 	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
   10414 
   10415 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
   10416 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
   10417 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
   10418 
   10419 	bsr.l		scale_to_zero_src	# scale exponent
   10420 	mov.l		%d0,-(%sp)		# save scale factor 1
   10421 
   10422 	bsr.l		scale_to_zero_dst	# scale dst exponent
   10423 
   10424 	add.l		(%sp)+,%d0		# SCALE_FACTOR = scale1 + scale2
   10425 
   10426 	cmpi.l		%d0,&0x3fff-0x7ffe 	# would result ovfl?
   10427 	beq.w		fsglmul_may_ovfl	# result may rnd to overflow
   10428 	blt.w		fsglmul_ovfl		# result will overflow
   10429 
   10430 	cmpi.l		%d0,&0x3fff+0x0001 	# would result unfl?
   10431 	beq.w		fsglmul_may_unfl	# result may rnd to no unfl
   10432 	bgt.w		fsglmul_unfl		# result will underflow
   10433 
   10434 fsglmul_normal:
   10435 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
   10436 
   10437 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   10438 	fmov.l		&0x0,%fpsr		# clear FPSR
   10439 
   10440 	fsglmul.x	FP_SCR0(%a6),%fp0	# execute sgl multiply
   10441 
   10442 	fmov.l		%fpsr,%d1		# save status
   10443 	fmov.l		&0x0,%fpcr		# clear FPCR
   10444 
   10445 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
   10446 
   10447 fsglmul_normal_exit:
   10448 	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
   10449 	mov.l		%d2,-(%sp)		# save d2
   10450 	mov.w		FP_SCR0_EX(%a6),%d1	# load {sgn,exp}
   10451 	mov.l		%d1,%d2			# make a copy
   10452 	andi.l		&0x7fff,%d1		# strip sign
   10453 	andi.w		&0x8000,%d2		# keep old sign
   10454 	sub.l		%d0,%d1			# add scale factor
   10455 	or.w		%d2,%d1			# concat old sign,new exp
   10456 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
   10457 	mov.l		(%sp)+,%d2		# restore d2
   10458 	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
   10459 	rts
   10460 
   10461 fsglmul_ovfl:
   10462 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
   10463 
   10464 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   10465 	fmov.l		&0x0,%fpsr		# clear FPSR
   10466 
   10467 	fsglmul.x	FP_SCR0(%a6),%fp0	# execute sgl multiply
   10468 
   10469 	fmov.l		%fpsr,%d1		# save status
   10470 	fmov.l		&0x0,%fpcr		# clear FPCR
   10471 
   10472 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
   10473 
   10474 fsglmul_ovfl_tst:
   10475 
   10476 # save setting this until now because this is where fsglmul_may_ovfl may jump in
   10477 	or.l		&ovfl_inx_mask, USER_FPSR(%a6) # set ovfl/aovfl/ainex
   10478 
   10479 	mov.b		FPCR_ENABLE(%a6),%d1
   10480 	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
   10481 	bne.b		fsglmul_ovfl_ena	# yes
   10482 
   10483 fsglmul_ovfl_dis:
   10484 	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
   10485 	sne		%d1			# set sign param accordingly
   10486 	mov.l		L_SCR3(%a6),%d0		# pass prec:rnd
   10487 	andi.b		&0x30,%d0		# force prec = ext
   10488 	bsr.l		ovf_res			# calculate default result
   10489 	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
   10490 	fmovm.x		(%a0),&0x80		# return default result in fp0
   10491 	rts
   10492 
   10493 fsglmul_ovfl_ena:
   10494 	fmovm.x		&0x80,FP_SCR0(%a6)	# move result to stack
   10495 
   10496 	mov.l		%d2,-(%sp)		# save d2
   10497 	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
   10498 	mov.l		%d1,%d2			# make a copy
   10499 	andi.l		&0x7fff,%d1		# strip sign
   10500 	sub.l		%d0,%d1			# add scale factor
   10501 	subi.l		&0x6000,%d1		# subtract bias
   10502 	andi.w		&0x7fff,%d1
   10503 	andi.w		&0x8000,%d2		# keep old sign
   10504 	or.w		%d2,%d1			# concat old sign,new exp
   10505 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
   10506 	mov.l		(%sp)+,%d2		# restore d2
   10507 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
   10508 	bra.b		fsglmul_ovfl_dis
   10509 
   10510 fsglmul_may_ovfl:
   10511 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
   10512 
   10513 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   10514 	fmov.l		&0x0,%fpsr		# clear FPSR
   10515 
   10516 	fsglmul.x	FP_SCR0(%a6),%fp0	# execute sgl multiply
   10517 
   10518 	fmov.l		%fpsr,%d1		# save status
   10519 	fmov.l		&0x0,%fpcr		# clear FPCR
   10520 
   10521 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
   10522 
   10523 	fabs.x		%fp0,%fp1		# make a copy of result
   10524 	fcmp.b		%fp1,&0x2		# is |result| >= 2.b?
   10525 	fbge.w		fsglmul_ovfl_tst	# yes; overflow has occurred
   10526 
   10527 # no, it didn't overflow; we have correct result
   10528 	bra.w		fsglmul_normal_exit
   10529 
   10530 fsglmul_unfl:
   10531 	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
   10532 
   10533 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
   10534 
   10535 	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
   10536 	fmov.l		&0x0,%fpsr		# clear FPSR
   10537 
   10538 	fsglmul.x	FP_SCR0(%a6),%fp0	# execute sgl multiply
   10539 
   10540 	fmov.l		%fpsr,%d1		# save status
   10541 	fmov.l		&0x0,%fpcr		# clear FPCR
   10542 
   10543 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
   10544 
   10545 	mov.b		FPCR_ENABLE(%a6),%d1
   10546 	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
   10547 	bne.b		fsglmul_unfl_ena	# yes
   10548 
   10549 fsglmul_unfl_dis:
   10550 	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
   10551 
   10552 	lea		FP_SCR0(%a6),%a0	# pass: result addr
   10553 	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
   10554 	bsr.l		unf_res4		# calculate default result
   10555 	or.b		%d0,FPSR_CC(%a6)	# 'Z' bit may have been set
   10556 	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
   10557 	rts
   10558 
   10559 #
   10560 # UNFL is enabled.
   10561 #
   10562 fsglmul_unfl_ena:
   10563 	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op
   10564 
   10565 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   10566 	fmov.l		&0x0,%fpsr		# clear FPSR
   10567 
   10568 	fsglmul.x	FP_SCR0(%a6),%fp1	# execute sgl multiply
   10569 
   10570 	fmov.l		&0x0,%fpcr		# clear FPCR
   10571 
   10572 	fmovm.x		&0x40,FP_SCR0(%a6)	# save result to stack
   10573 	mov.l		%d2,-(%sp)		# save d2
   10574 	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
   10575 	mov.l		%d1,%d2			# make a copy
   10576 	andi.l		&0x7fff,%d1		# strip sign
   10577 	andi.w		&0x8000,%d2		# keep old sign
   10578 	sub.l		%d0,%d1			# add scale factor
   10579 	addi.l		&0x6000,%d1		# add bias
   10580 	andi.w		&0x7fff,%d1
   10581 	or.w		%d2,%d1			# concat old sign,new exp
   10582 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
   10583 	mov.l		(%sp)+,%d2		# restore d2
   10584 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
   10585 	bra.w		fsglmul_unfl_dis
   10586 
   10587 fsglmul_may_unfl:
   10588 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
   10589 
   10590 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   10591 	fmov.l		&0x0,%fpsr		# clear FPSR
   10592 
   10593 	fsglmul.x	FP_SCR0(%a6),%fp0	# execute sgl multiply
   10594 
   10595 	fmov.l		%fpsr,%d1		# save status
   10596 	fmov.l		&0x0,%fpcr		# clear FPCR
   10597 
   10598 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
   10599 
   10600 	fabs.x		%fp0,%fp1		# make a copy of result
   10601 	fcmp.b		%fp1,&0x2		# is |result| > 2.b?
   10602 	fbgt.w		fsglmul_normal_exit	# no; no underflow occurred
   10603 	fblt.w		fsglmul_unfl		# yes; underflow occurred
   10604 
   10605 #
   10606 # we still don't know if underflow occurred. result is ~ equal to 2. but,
   10607 # we don't know if the result was an underflow that rounded up to a 2 or
   10608 # a normalized number that rounded down to a 2. so, redo the entire operation
   10609 # using RZ as the rounding mode to see what the pre-rounded result is.
   10610 # this case should be relatively rare.
   10611 #
   10612 	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op into fp1
   10613 
   10614 	mov.l		L_SCR3(%a6),%d1
   10615 	andi.b		&0xc0,%d1		# keep rnd prec
   10616 	ori.b		&rz_mode*0x10,%d1	# insert RZ
   10617 
   10618 	fmov.l		%d1,%fpcr		# set FPCR
   10619 	fmov.l		&0x0,%fpsr		# clear FPSR
   10620 
   10621 	fsglmul.x	FP_SCR0(%a6),%fp1	# execute sgl multiply
   10622 
   10623 	fmov.l		&0x0,%fpcr		# clear FPCR
   10624 	fabs.x		%fp1			# make absolute value
   10625 	fcmp.b		%fp1,&0x2		# is |result| < 2.b?
   10626 	fbge.w		fsglmul_normal_exit	# no; no underflow occurred
   10627 	bra.w		fsglmul_unfl		# yes, underflow occurred
   10628 
   10629 ##############################################################################
   10630 
   10631 #
   10632 # Single Precision Multiply: inputs are not both normalized; what are they?
   10633 #
   10634 fsglmul_not_norm:
   10635 	mov.w		(tbl_fsglmul_op.b,%pc,%d1.w*2),%d1
   10636 	jmp		(tbl_fsglmul_op.b,%pc,%d1.w*1)
   10637 
   10638 	swbeg		&48
   10639 tbl_fsglmul_op:
   10640 	short		fsglmul_norm		- tbl_fsglmul_op # NORM x NORM
   10641 	short		fsglmul_zero		- tbl_fsglmul_op # NORM x ZERO
   10642 	short		fsglmul_inf_src		- tbl_fsglmul_op # NORM x INF
   10643 	short		fsglmul_res_qnan	- tbl_fsglmul_op # NORM x QNAN
   10644 	short		fsglmul_norm		- tbl_fsglmul_op # NORM x DENORM
   10645 	short		fsglmul_res_snan	- tbl_fsglmul_op # NORM x SNAN
   10646 	short		tbl_fsglmul_op		- tbl_fsglmul_op #
   10647 	short		tbl_fsglmul_op		- tbl_fsglmul_op #
   10648 
   10649 	short		fsglmul_zero		- tbl_fsglmul_op # ZERO x NORM
   10650 	short		fsglmul_zero		- tbl_fsglmul_op # ZERO x ZERO
   10651 	short		fsglmul_res_operr	- tbl_fsglmul_op # ZERO x INF
   10652 	short		fsglmul_res_qnan	- tbl_fsglmul_op # ZERO x QNAN
   10653 	short		fsglmul_zero		- tbl_fsglmul_op # ZERO x DENORM
   10654 	short		fsglmul_res_snan	- tbl_fsglmul_op # ZERO x SNAN
   10655 	short		tbl_fsglmul_op		- tbl_fsglmul_op #
   10656 	short		tbl_fsglmul_op		- tbl_fsglmul_op #
   10657 
   10658 	short		fsglmul_inf_dst		- tbl_fsglmul_op # INF x NORM
   10659 	short		fsglmul_res_operr	- tbl_fsglmul_op # INF x ZERO
   10660 	short		fsglmul_inf_dst		- tbl_fsglmul_op # INF x INF
   10661 	short		fsglmul_res_qnan	- tbl_fsglmul_op # INF x QNAN
   10662 	short		fsglmul_inf_dst		- tbl_fsglmul_op # INF x DENORM
   10663 	short		fsglmul_res_snan	- tbl_fsglmul_op # INF x SNAN
   10664 	short		tbl_fsglmul_op		- tbl_fsglmul_op #
   10665 	short		tbl_fsglmul_op		- tbl_fsglmul_op #
   10666 
   10667 	short		fsglmul_res_qnan	- tbl_fsglmul_op # QNAN x NORM
   10668 	short		fsglmul_res_qnan	- tbl_fsglmul_op # QNAN x ZERO
   10669 	short		fsglmul_res_qnan	- tbl_fsglmul_op # QNAN x INF
   10670 	short		fsglmul_res_qnan	- tbl_fsglmul_op # QNAN x QNAN
   10671 	short		fsglmul_res_qnan	- tbl_fsglmul_op # QNAN x DENORM
   10672 	short		fsglmul_res_snan	- tbl_fsglmul_op # QNAN x SNAN
   10673 	short		tbl_fsglmul_op		- tbl_fsglmul_op #
   10674 	short		tbl_fsglmul_op		- tbl_fsglmul_op #
   10675 
   10676 	short		fsglmul_norm		- tbl_fsglmul_op # NORM x NORM
   10677 	short		fsglmul_zero		- tbl_fsglmul_op # NORM x ZERO
   10678 	short		fsglmul_inf_src		- tbl_fsglmul_op # NORM x INF
   10679 	short		fsglmul_res_qnan	- tbl_fsglmul_op # NORM x QNAN
   10680 	short		fsglmul_norm		- tbl_fsglmul_op # NORM x DENORM
   10681 	short		fsglmul_res_snan	- tbl_fsglmul_op # NORM x SNAN
   10682 	short		tbl_fsglmul_op		- tbl_fsglmul_op #
   10683 	short		tbl_fsglmul_op		- tbl_fsglmul_op #
   10684 
   10685 	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x NORM
   10686 	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x ZERO
   10687 	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x INF
   10688 	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x QNAN
   10689 	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x DENORM
   10690 	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x SNAN
   10691 	short		tbl_fsglmul_op		- tbl_fsglmul_op #
   10692 	short		tbl_fsglmul_op		- tbl_fsglmul_op #
   10693 
   10694 fsglmul_res_operr:
   10695 	bra.l		res_operr
   10696 fsglmul_res_snan:
   10697 	bra.l		res_snan
   10698 fsglmul_res_qnan:
   10699 	bra.l		res_qnan
   10700 fsglmul_zero:
   10701 	bra.l		fmul_zero
   10702 fsglmul_inf_src:
   10703 	bra.l		fmul_inf_src
   10704 fsglmul_inf_dst:
   10705 	bra.l		fmul_inf_dst
   10706 
   10707 #########################################################################
   10708 # XDEF ****************************************************************	#
   10709 # 	fsgldiv(): emulates the fsgldiv instruction			#
   10710 #									#
   10711 # XREF ****************************************************************	#
   10712 #	scale_to_zero_src() - scale src exponent to zero		#
   10713 #	scale_to_zero_dst() - scale dst exponent to zero		#
   10714 #	unf_res4() - return default underflow result for sglop		#
   10715 #	ovf_res() - return default overflow result			#
   10716 # 	res_qnan() - return QNAN result					#
   10717 # 	res_snan() - return SNAN result					#
   10718 #									#
   10719 # INPUT ***************************************************************	#
   10720 #	a0 = pointer to extended precision source operand		#
   10721 #	a1 = pointer to extended precision destination operand		#
   10722 #	d0  rnd prec,mode						#
   10723 #									#
   10724 # OUTPUT **************************************************************	#
   10725 #	fp0 = result							#
   10726 #	fp1 = EXOP (if exception occurred)				#
   10727 #									#
   10728 # ALGORITHM ***********************************************************	#
   10729 #	Handle NANs, infinities, and zeroes as special cases. Divide	#
   10730 # norms/denorms into ext/sgl/dbl precision.				#
   10731 #	For norms/denorms, scale the exponents such that a divide	#
   10732 # instruction won't cause an exception. Use the regular fsgldiv to	#
   10733 # compute a result. Check if the regular operands would have taken	#
   10734 # an exception. If so, return the default overflow/underflow result	#
   10735 # and return the EXOP if exceptions are enabled. Else, scale the 	#
   10736 # result operand to the proper exponent.				#
   10737 #									#
   10738 #########################################################################
   10739 
   10740 	global		fsgldiv
   10741 fsgldiv:
   10742 	mov.l		%d0,L_SCR3(%a6)		# store rnd info
   10743 
   10744 	clr.w		%d1
   10745 	mov.b		DTAG(%a6),%d1
   10746 	lsl.b		&0x3,%d1
   10747 	or.b		STAG(%a6),%d1		# combine src tags
   10748 
   10749 	bne.w		fsgldiv_not_norm	# optimize on non-norm input
   10750 
   10751 #
   10752 # DIVIDE: NORMs and DENORMs ONLY!
   10753 #
   10754 fsgldiv_norm:
   10755 	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
   10756 	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
   10757 	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
   10758 
   10759 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
   10760 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
   10761 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
   10762 
   10763 	bsr.l		scale_to_zero_src	# calculate scale factor 1
   10764 	mov.l		%d0,-(%sp)		# save scale factor 1
   10765 
   10766 	bsr.l		scale_to_zero_dst	# calculate scale factor 2
   10767 
   10768 	neg.l		(%sp)			# S.F. = scale1 - scale2
   10769 	add.l		%d0,(%sp)
   10770 
   10771 	mov.w		2+L_SCR3(%a6),%d1	# fetch precision,mode
   10772 	lsr.b		&0x6,%d1
   10773 	mov.l		(%sp)+,%d0
   10774 	cmpi.l		%d0,&0x3fff-0x7ffe
   10775 	ble.w		fsgldiv_may_ovfl
   10776 
   10777 	cmpi.l		%d0,&0x3fff-0x0000 	# will result underflow?
   10778 	beq.w		fsgldiv_may_unfl	# maybe
   10779 	bgt.w		fsgldiv_unfl		# yes; go handle underflow
   10780 
   10781 fsgldiv_normal:
   10782 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
   10783 
   10784 	fmov.l		L_SCR3(%a6),%fpcr	# save FPCR
   10785 	fmov.l		&0x0,%fpsr		# clear FPSR
   10786 
   10787 	fsgldiv.x	FP_SCR0(%a6),%fp0	# perform sgl divide
   10788 
   10789 	fmov.l		%fpsr,%d1		# save FPSR
   10790 	fmov.l		&0x0,%fpcr		# clear FPCR
   10791 
   10792 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
   10793 
   10794 fsgldiv_normal_exit:
   10795 	fmovm.x		&0x80,FP_SCR0(%a6)	# store result on stack
   10796 	mov.l		%d2,-(%sp)		# save d2
   10797 	mov.w		FP_SCR0_EX(%a6),%d1	# load {sgn,exp}
   10798 	mov.l		%d1,%d2			# make a copy
   10799 	andi.l		&0x7fff,%d1		# strip sign
   10800 	andi.w		&0x8000,%d2		# keep old sign
   10801 	sub.l		%d0,%d1			# add scale factor
   10802 	or.w		%d2,%d1			# concat old sign,new exp
   10803 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
   10804 	mov.l		(%sp)+,%d2		# restore d2
   10805 	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
   10806 	rts
   10807 
   10808 fsgldiv_may_ovfl:
   10809 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
   10810 
   10811 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   10812 	fmov.l		&0x0,%fpsr		# set FPSR
   10813 
   10814 	fsgldiv.x	FP_SCR0(%a6),%fp0	# execute divide
   10815 
   10816 	fmov.l		%fpsr,%d1
   10817 	fmov.l		&0x0,%fpcr
   10818 
   10819 	or.l		%d1,USER_FPSR(%a6)	# save INEX,N
   10820 
   10821 	fmovm.x		&0x01,-(%sp)		# save result to stack
   10822 	mov.w		(%sp),%d1		# fetch new exponent
   10823 	add.l		&0xc,%sp		# clear result
   10824 	andi.l		&0x7fff,%d1		# strip sign
   10825 	sub.l		%d0,%d1			# add scale factor
   10826 	cmp.l		%d1,&0x7fff		# did divide overflow?
   10827 	blt.b		fsgldiv_normal_exit
   10828 
   10829 fsgldiv_ovfl_tst:
   10830 	or.w		&ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
   10831 
   10832 	mov.b		FPCR_ENABLE(%a6),%d1
   10833 	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
   10834 	bne.b		fsgldiv_ovfl_ena	# yes
   10835 
   10836 fsgldiv_ovfl_dis:
   10837 	btst		&neg_bit,FPSR_CC(%a6) 	# is result negative
   10838 	sne		%d1			# set sign param accordingly
   10839 	mov.l		L_SCR3(%a6),%d0		# pass prec:rnd
   10840 	andi.b		&0x30,%d0		# kill precision
   10841 	bsr.l		ovf_res			# calculate default result
   10842 	or.b		%d0,FPSR_CC(%a6)	# set INF if applicable
   10843 	fmovm.x		(%a0),&0x80		# return default result in fp0
   10844 	rts
   10845 
   10846 fsgldiv_ovfl_ena:
   10847 	fmovm.x		&0x80,FP_SCR0(%a6)	# move result to stack
   10848 
   10849 	mov.l		%d2,-(%sp)		# save d2
   10850 	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
   10851 	mov.l		%d1,%d2			# make a copy
   10852 	andi.l		&0x7fff,%d1		# strip sign
   10853 	andi.w		&0x8000,%d2		# keep old sign
   10854 	sub.l		%d0,%d1			# add scale factor
   10855 	subi.l		&0x6000,%d1		# subtract new bias
   10856 	andi.w		&0x7fff,%d1		# clear ms bit
   10857 	or.w		%d2,%d1			# concat old sign,new exp
   10858 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
   10859 	mov.l		(%sp)+,%d2		# restore d2
   10860 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
   10861 	bra.b		fsgldiv_ovfl_dis
   10862 
   10863 fsgldiv_unfl:
   10864 	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
   10865 
   10866 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
   10867 
   10868 	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
   10869 	fmov.l		&0x0,%fpsr		# clear FPSR
   10870 
   10871 	fsgldiv.x	FP_SCR0(%a6),%fp0	# execute sgl divide
   10872 
   10873 	fmov.l		%fpsr,%d1		# save status
   10874 	fmov.l		&0x0,%fpcr		# clear FPCR
   10875 
   10876 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
   10877 
   10878 	mov.b		FPCR_ENABLE(%a6),%d1
   10879 	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
   10880 	bne.b		fsgldiv_unfl_ena	# yes
   10881 
   10882 fsgldiv_unfl_dis:
   10883 	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
   10884 
   10885 	lea		FP_SCR0(%a6),%a0	# pass: result addr
   10886 	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
   10887 	bsr.l		unf_res4		# calculate default result
   10888 	or.b		%d0,FPSR_CC(%a6)	# 'Z' bit may have been set
   10889 	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
   10890 	rts
   10891 
   10892 #
   10893 # UNFL is enabled.
   10894 #
   10895 fsgldiv_unfl_ena:
   10896 	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op
   10897 
   10898 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   10899 	fmov.l		&0x0,%fpsr		# clear FPSR
   10900 
   10901 	fsgldiv.x	FP_SCR0(%a6),%fp1	# execute sgl divide
   10902 
   10903 	fmov.l		&0x0,%fpcr		# clear FPCR
   10904 
   10905 	fmovm.x		&0x40,FP_SCR0(%a6)	# save result to stack
   10906 	mov.l		%d2,-(%sp)		# save d2
   10907 	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
   10908 	mov.l		%d1,%d2			# make a copy
   10909 	andi.l		&0x7fff,%d1		# strip sign
   10910 	andi.w		&0x8000,%d2		# keep old sign
   10911 	sub.l		%d0,%d1			# add scale factor
   10912 	addi.l		&0x6000,%d1		# add bias
   10913 	andi.w		&0x7fff,%d1		# clear top bit
   10914 	or.w		%d2,%d1			# concat old sign, new exp
   10915 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
   10916 	mov.l		(%sp)+,%d2		# restore d2
   10917 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
   10918 	bra.b		fsgldiv_unfl_dis
   10919 
   10920 #
   10921 # the divide operation MAY underflow:
   10922 #
   10923 fsgldiv_may_unfl:
   10924 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
   10925 
   10926 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   10927 	fmov.l		&0x0,%fpsr		# clear FPSR
   10928 
   10929 	fsgldiv.x	FP_SCR0(%a6),%fp0	# execute sgl divide
   10930 
   10931 	fmov.l		%fpsr,%d1		# save status
   10932 	fmov.l		&0x0,%fpcr		# clear FPCR
   10933 
   10934 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
   10935 
   10936 	fabs.x		%fp0,%fp1		# make a copy of result
   10937 	fcmp.b		%fp1,&0x1		# is |result| > 1.b?
   10938 	fbgt.w		fsgldiv_normal_exit	# no; no underflow occurred
   10939 	fblt.w		fsgldiv_unfl		# yes; underflow occurred
   10940 
   10941 #
   10942 # we still don't know if underflow occurred. result is ~ equal to 1. but,
   10943 # we don't know if the result was an underflow that rounded up to a 1
   10944 # or a normalized number that rounded down to a 1. so, redo the entire
   10945 # operation using RZ as the rounding mode to see what the pre-rounded
   10946 # result is. this case should be relatively rare.
   10947 #
   10948 	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op into %fp1
   10949 
   10950 	clr.l		%d1			# clear scratch register
   10951 	ori.b		&rz_mode*0x10,%d1	# force RZ rnd mode
   10952 
   10953 	fmov.l		%d1,%fpcr		# set FPCR
   10954 	fmov.l		&0x0,%fpsr		# clear FPSR
   10955 
   10956 	fsgldiv.x	FP_SCR0(%a6),%fp1	# execute sgl divide
   10957 
   10958 	fmov.l		&0x0,%fpcr		# clear FPCR
   10959 	fabs.x		%fp1			# make absolute value
   10960 	fcmp.b		%fp1,&0x1		# is |result| < 1.b?
   10961 	fbge.w		fsgldiv_normal_exit	# no; no underflow occurred
   10962 	bra.w		fsgldiv_unfl		# yes; underflow occurred
   10963 
   10964 ############################################################################
   10965 
   10966 #
   10967 # Divide: inputs are not both normalized; what are they?
   10968 #
   10969 fsgldiv_not_norm:
   10970 	mov.w		(tbl_fsgldiv_op.b,%pc,%d1.w*2),%d1
   10971 	jmp		(tbl_fsgldiv_op.b,%pc,%d1.w*1)
   10972 
   10973 	swbeg		&48
   10974 tbl_fsgldiv_op:
   10975 	short		fsgldiv_norm		- tbl_fsgldiv_op # NORM / NORM
   10976 	short		fsgldiv_inf_load	- tbl_fsgldiv_op # NORM / ZERO
   10977 	short		fsgldiv_zero_load	- tbl_fsgldiv_op # NORM / INF
   10978 	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # NORM / QNAN
   10979 	short		fsgldiv_norm		- tbl_fsgldiv_op # NORM / DENORM
   10980 	short		fsgldiv_res_snan	- tbl_fsgldiv_op # NORM / SNAN
   10981 	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
   10982 	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
   10983 
   10984 	short		fsgldiv_zero_load	- tbl_fsgldiv_op # ZERO / NORM
   10985 	short		fsgldiv_res_operr	- tbl_fsgldiv_op # ZERO / ZERO
   10986 	short		fsgldiv_zero_load	- tbl_fsgldiv_op # ZERO / INF
   10987 	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # ZERO / QNAN
   10988 	short		fsgldiv_zero_load	- tbl_fsgldiv_op # ZERO / DENORM
   10989 	short		fsgldiv_res_snan	- tbl_fsgldiv_op # ZERO / SNAN
   10990 	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
   10991 	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
   10992 
   10993 	short		fsgldiv_inf_dst		- tbl_fsgldiv_op # INF / NORM
   10994 	short		fsgldiv_inf_dst		- tbl_fsgldiv_op # INF / ZERO
   10995 	short		fsgldiv_res_operr	- tbl_fsgldiv_op # INF / INF
   10996 	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # INF / QNAN
   10997 	short		fsgldiv_inf_dst		- tbl_fsgldiv_op # INF / DENORM
   10998 	short		fsgldiv_res_snan	- tbl_fsgldiv_op # INF / SNAN
   10999 	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
   11000 	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
   11001 
   11002 	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # QNAN / NORM
   11003 	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # QNAN / ZERO
   11004 	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # QNAN / INF
   11005 	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # QNAN / QNAN
   11006 	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # QNAN / DENORM
   11007 	short		fsgldiv_res_snan	- tbl_fsgldiv_op # QNAN / SNAN
   11008 	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
   11009 	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
   11010 
   11011 	short		fsgldiv_norm		- tbl_fsgldiv_op # DENORM / NORM
   11012 	short		fsgldiv_inf_load	- tbl_fsgldiv_op # DENORM / ZERO
   11013 	short		fsgldiv_zero_load	- tbl_fsgldiv_op # DENORM / INF
   11014 	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # DENORM / QNAN
   11015 	short		fsgldiv_norm		- tbl_fsgldiv_op # DENORM / DENORM
   11016 	short		fsgldiv_res_snan	- tbl_fsgldiv_op # DENORM / SNAN
   11017 	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
   11018 	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
   11019 
   11020 	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / NORM
   11021 	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / ZERO
   11022 	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / INF
   11023 	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / QNAN
   11024 	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / DENORM
   11025 	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / SNAN
   11026 	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
   11027 	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
   11028 
   11029 fsgldiv_res_qnan:
   11030 	bra.l		res_qnan
   11031 fsgldiv_res_snan:
   11032 	bra.l		res_snan
   11033 fsgldiv_res_operr:
   11034 	bra.l		res_operr
   11035 fsgldiv_inf_load:
   11036 	bra.l		fdiv_inf_load
   11037 fsgldiv_zero_load:
   11038 	bra.l		fdiv_zero_load
   11039 fsgldiv_inf_dst:
   11040 	bra.l		fdiv_inf_dst
   11041 
   11042 #########################################################################
   11043 # XDEF ****************************************************************	#
   11044 #	fadd(): emulates the fadd instruction				#
   11045 #	fsadd(): emulates the fadd instruction				#
   11046 #	fdadd(): emulates the fdadd instruction				#
   11047 #									#
   11048 # XREF ****************************************************************	#
   11049 # 	addsub_scaler2() - scale the operands so they won't take exc	#
   11050 #	ovf_res() - return default overflow result			#
   11051 #	unf_res() - return default underflow result			#
   11052 #	res_qnan() - set QNAN result					#
   11053 # 	res_snan() - set SNAN result					#
   11054 #	res_operr() - set OPERR result					#
   11055 #	scale_to_zero_src() - set src operand exponent equal to zero	#
   11056 #	scale_to_zero_dst() - set dst operand exponent equal to zero	#
   11057 #									#
   11058 # INPUT ***************************************************************	#
   11059 #	a0 = pointer to extended precision source operand		#
   11060 # 	a1 = pointer to extended precision destination operand		#
   11061 #									#
   11062 # OUTPUT **************************************************************	#
   11063 #	fp0 = result							#
   11064 #	fp1 = EXOP (if exception occurred)				#
   11065 #									#
   11066 # ALGORITHM ***********************************************************	#
   11067 # 	Handle NANs, infinities, and zeroes as special cases. Divide	#
   11068 # norms into extended, single, and double precision.			#
   11069 #	Do addition after scaling exponents such that exception won't	#
   11070 # occur. Then, check result exponent to see if exception would have	#
   11071 # occurred. If so, return default result and maybe EXOP. Else, insert	#
   11072 # the correct result exponent and return. Set FPSR bits as appropriate.	#
   11073 #									#
   11074 #########################################################################
   11075 
   11076 	global		fsadd
   11077 fsadd:
   11078 	andi.b		&0x30,%d0		# clear rnd prec
   11079 	ori.b		&s_mode*0x10,%d0	# insert sgl prec
   11080 	bra.b		fadd
   11081 
   11082 	global		fdadd
   11083 fdadd:
   11084 	andi.b		&0x30,%d0		# clear rnd prec
   11085 	ori.b		&d_mode*0x10,%d0	# insert dbl prec
   11086 
   11087 	global		fadd
   11088 fadd:
   11089 	mov.l		%d0,L_SCR3(%a6)		# store rnd info
   11090 
   11091 	clr.w		%d1
   11092 	mov.b		DTAG(%a6),%d1
   11093 	lsl.b		&0x3,%d1
   11094 	or.b		STAG(%a6),%d1		# combine src tags
   11095 
   11096 	bne.w		fadd_not_norm		# optimize on non-norm input
   11097 
   11098 #
   11099 # ADD: norms and denorms
   11100 #
   11101 fadd_norm:
   11102 	bsr.l		addsub_scaler2		# scale exponents
   11103 
   11104 fadd_zero_entry:
   11105 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
   11106 
   11107 	fmov.l		&0x0,%fpsr		# clear FPSR
   11108 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   11109 
   11110 	fadd.x		FP_SCR0(%a6),%fp0	# execute add
   11111 
   11112 	fmov.l		&0x0,%fpcr		# clear FPCR
   11113 	fmov.l		%fpsr,%d1		# fetch INEX2,N,Z
   11114 
   11115 	or.l		%d1,USER_FPSR(%a6)	# save exc and ccode bits
   11116 
   11117 	fbeq.w		fadd_zero_exit		# if result is zero, end now
   11118 
   11119 	mov.l		%d2,-(%sp)		# save d2
   11120 
   11121 	fmovm.x		&0x01,-(%sp)		# save result to stack
   11122 
   11123 	mov.w		2+L_SCR3(%a6),%d1
   11124 	lsr.b		&0x6,%d1
   11125 
   11126 	mov.w		(%sp),%d2		# fetch new sign, exp
   11127 	andi.l		&0x7fff,%d2		# strip sign
   11128 	sub.l		%d0,%d2			# add scale factor
   11129 
   11130 	cmp.l		%d2,(tbl_fadd_ovfl.b,%pc,%d1.w*4) # is it an overflow?
   11131 	bge.b		fadd_ovfl		# yes
   11132 
   11133 	cmp.l		%d2,(tbl_fadd_unfl.b,%pc,%d1.w*4) # is it an underflow?
   11134 	blt.w		fadd_unfl		# yes
   11135 	beq.w		fadd_may_unfl		# maybe; go find out
   11136 
   11137 fadd_normal:
   11138 	mov.w		(%sp),%d1
   11139 	andi.w		&0x8000,%d1		# keep sign
   11140 	or.w		%d2,%d1			# concat sign,new exp
   11141 	mov.w		%d1,(%sp)		# insert new exponent
   11142 
   11143 	fmovm.x		(%sp)+,&0x80		# return result in fp0
   11144 
   11145 	mov.l		(%sp)+,%d2		# restore d2
   11146 	rts
   11147 
   11148 fadd_zero_exit:
   11149 #	fmov.s		&0x00000000,%fp0	# return zero in fp0
   11150 	rts
   11151 
   11152 tbl_fadd_ovfl:
   11153 	long		0x7fff			# ext ovfl
   11154 	long		0x407f			# sgl ovfl
   11155 	long		0x43ff			# dbl ovfl
   11156 
   11157 tbl_fadd_unfl:
   11158 	long	        0x0000			# ext unfl
   11159 	long		0x3f81			# sgl unfl
   11160 	long		0x3c01			# dbl unfl
   11161 
   11162 fadd_ovfl:
   11163 	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
   11164 
   11165 	mov.b		FPCR_ENABLE(%a6),%d1
   11166 	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
   11167 	bne.b		fadd_ovfl_ena		# yes
   11168 
   11169 	add.l		&0xc,%sp
   11170 fadd_ovfl_dis:
   11171 	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
   11172 	sne		%d1			# set sign param accordingly
   11173 	mov.l		L_SCR3(%a6),%d0		# pass prec:rnd
   11174 	bsr.l		ovf_res			# calculate default result
   11175 	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
   11176 	fmovm.x		(%a0),&0x80		# return default result in fp0
   11177 	mov.l		(%sp)+,%d2		# restore d2
   11178 	rts
   11179 
   11180 fadd_ovfl_ena:
   11181 	mov.b		L_SCR3(%a6),%d1
   11182 	andi.b		&0xc0,%d1		# is precision extended?
   11183 	bne.b		fadd_ovfl_ena_sd	# no; prec = sgl or dbl
   11184 
   11185 fadd_ovfl_ena_cont:
   11186 	mov.w		(%sp),%d1
   11187 	andi.w		&0x8000,%d1		# keep sign
   11188 	subi.l		&0x6000,%d2		# add extra bias
   11189 	andi.w		&0x7fff,%d2
   11190 	or.w		%d2,%d1			# concat sign,new exp
   11191 	mov.w		%d1,(%sp)		# insert new exponent
   11192 
   11193 	fmovm.x		(%sp)+,&0x40		# return EXOP in fp1
   11194 	bra.b		fadd_ovfl_dis
   11195 
   11196 fadd_ovfl_ena_sd:
   11197 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
   11198 
   11199 	mov.l		L_SCR3(%a6),%d1
   11200 	andi.b		&0x30,%d1		# keep rnd mode
   11201 	fmov.l		%d1,%fpcr		# set FPCR
   11202 
   11203 	fadd.x		FP_SCR0(%a6),%fp0	# execute add
   11204 
   11205 	fmov.l		&0x0,%fpcr		# clear FPCR
   11206 
   11207 	add.l		&0xc,%sp
   11208 	fmovm.x		&0x01,-(%sp)
   11209 	bra.b		fadd_ovfl_ena_cont
   11210 
   11211 fadd_unfl:
   11212 	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
   11213 
   11214 	add.l		&0xc,%sp
   11215 
   11216 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
   11217 
   11218 	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
   11219 	fmov.l		&0x0,%fpsr		# clear FPSR
   11220 
   11221 	fadd.x		FP_SCR0(%a6),%fp0	# execute add
   11222 
   11223 	fmov.l		&0x0,%fpcr		# clear FPCR
   11224 	fmov.l		%fpsr,%d1		# save status
   11225 
   11226 	or.l		%d1,USER_FPSR(%a6)	# save INEX,N
   11227 
   11228 	mov.b		FPCR_ENABLE(%a6),%d1
   11229 	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
   11230 	bne.b		fadd_unfl_ena		# yes
   11231 
   11232 fadd_unfl_dis:
   11233 	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
   11234 
   11235 	lea		FP_SCR0(%a6),%a0	# pass: result addr
   11236 	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
   11237 	bsr.l		unf_res			# calculate default result
   11238 	or.b		%d0,FPSR_CC(%a6)	# 'Z' bit may have been set
   11239 	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
   11240 	mov.l		(%sp)+,%d2		# restore d2
   11241 	rts
   11242 
   11243 fadd_unfl_ena:
   11244 	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op
   11245 
   11246 	mov.l		L_SCR3(%a6),%d1
   11247 	andi.b		&0xc0,%d1		# is precision extended?
   11248 	bne.b		fadd_unfl_ena_sd	# no; sgl or dbl
   11249 
   11250 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   11251 
   11252 fadd_unfl_ena_cont:
   11253 	fmov.l		&0x0,%fpsr		# clear FPSR
   11254 
   11255 	fadd.x		FP_SCR0(%a6),%fp1	# execute multiply
   11256 
   11257 	fmov.l		&0x0,%fpcr		# clear FPCR
   11258 
   11259 	fmovm.x		&0x40,FP_SCR0(%a6)	# save result to stack
   11260 	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
   11261 	mov.l		%d1,%d2			# make a copy
   11262 	andi.l		&0x7fff,%d1		# strip sign
   11263 	andi.w		&0x8000,%d2		# keep old sign
   11264 	sub.l		%d0,%d1			# add scale factor
   11265 	addi.l		&0x6000,%d1		# add new bias
   11266 	andi.w		&0x7fff,%d1		# clear top bit
   11267 	or.w		%d2,%d1			# concat sign,new exp
   11268 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
   11269 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
   11270 	bra.w		fadd_unfl_dis
   11271 
   11272 fadd_unfl_ena_sd:
   11273 	mov.l		L_SCR3(%a6),%d1
   11274 	andi.b		&0x30,%d1		# use only rnd mode
   11275 	fmov.l		%d1,%fpcr		# set FPCR
   11276 
   11277 	bra.b		fadd_unfl_ena_cont
   11278 
   11279 #
   11280 # result is equal to the smallest normalized number in the selected precision
   11281 # if the precision is extended, this result could not have come from an
   11282 # underflow that rounded up.
   11283 #
   11284 fadd_may_unfl:
   11285 	mov.l		L_SCR3(%a6),%d1
   11286 	andi.b		&0xc0,%d1
   11287 	beq.w		fadd_normal		# yes; no underflow occurred
   11288 
   11289 	mov.l		0x4(%sp),%d1		# extract hi(man)
   11290 	cmpi.l		%d1,&0x80000000		# is hi(man) = 0x80000000?
   11291 	bne.w		fadd_normal		# no; no underflow occurred
   11292 
   11293 	tst.l		0x8(%sp)		# is lo(man) = 0x0?
   11294 	bne.w		fadd_normal		# no; no underflow occurred
   11295 
   11296 	btst		&inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
   11297 	beq.w		fadd_normal		# no; no underflow occurred
   11298 
   11299 #
   11300 # ok, so now the result has a exponent equal to the smallest normalized
   11301 # exponent for the selected precision. also, the mantissa is equal to
   11302 # 0x8000000000000000 and this mantissa is the result of rounding non-zero
   11303 # g,r,s.
   11304 # now, we must determine whether the pre-rounded result was an underflow
   11305 # rounded "up" or a normalized number rounded "down".
   11306 # so, we do this be re-executing the add using RZ as the rounding mode and
   11307 # seeing if the new result is smaller or equal to the current result.
   11308 #
   11309 	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op into fp1
   11310 
   11311 	mov.l		L_SCR3(%a6),%d1
   11312 	andi.b		&0xc0,%d1		# keep rnd prec
   11313 	ori.b		&rz_mode*0x10,%d1	# insert rnd mode
   11314 	fmov.l		%d1,%fpcr		# set FPCR
   11315 	fmov.l		&0x0,%fpsr		# clear FPSR
   11316 
   11317 	fadd.x		FP_SCR0(%a6),%fp1	# execute add
   11318 
   11319 	fmov.l		&0x0,%fpcr		# clear FPCR
   11320 
   11321 	fabs.x		%fp0			# compare absolute values
   11322 	fabs.x		%fp1
   11323 	fcmp.x		%fp0,%fp1		# is first result > second?
   11324 
   11325 	fbgt.w		fadd_unfl		# yes; it's an underflow
   11326 	bra.w		fadd_normal		# no; it's not an underflow
   11327 
   11328 ##########################################################################
   11329 
   11330 #
   11331 # Add: inputs are not both normalized; what are they?
   11332 #
   11333 fadd_not_norm:
   11334 	mov.w		(tbl_fadd_op.b,%pc,%d1.w*2),%d1
   11335 	jmp		(tbl_fadd_op.b,%pc,%d1.w*1)
   11336 
   11337 	swbeg		&48
   11338 tbl_fadd_op:
   11339 	short		fadd_norm	- tbl_fadd_op # NORM + NORM
   11340 	short		fadd_zero_src	- tbl_fadd_op # NORM + ZERO
   11341 	short		fadd_inf_src	- tbl_fadd_op # NORM + INF
   11342 	short		fadd_res_qnan	- tbl_fadd_op # NORM + QNAN
   11343 	short		fadd_norm	- tbl_fadd_op # NORM + DENORM
   11344 	short		fadd_res_snan	- tbl_fadd_op # NORM + SNAN
   11345 	short		tbl_fadd_op	- tbl_fadd_op #
   11346 	short		tbl_fadd_op	- tbl_fadd_op #
   11347 
   11348 	short		fadd_zero_dst	- tbl_fadd_op # ZERO + NORM
   11349 	short		fadd_zero_2	- tbl_fadd_op # ZERO + ZERO
   11350 	short		fadd_inf_src	- tbl_fadd_op # ZERO + INF
   11351 	short		fadd_res_qnan	- tbl_fadd_op # NORM + QNAN
   11352 	short		fadd_zero_dst	- tbl_fadd_op # ZERO + DENORM
   11353 	short		fadd_res_snan	- tbl_fadd_op # NORM + SNAN
   11354 	short		tbl_fadd_op	- tbl_fadd_op #
   11355 	short		tbl_fadd_op	- tbl_fadd_op #
   11356 
   11357 	short		fadd_inf_dst	- tbl_fadd_op # INF + NORM
   11358 	short		fadd_inf_dst	- tbl_fadd_op # INF + ZERO
   11359 	short		fadd_inf_2	- tbl_fadd_op # INF + INF
   11360 	short		fadd_res_qnan	- tbl_fadd_op # NORM + QNAN
   11361 	short		fadd_inf_dst	- tbl_fadd_op # INF + DENORM
   11362 	short		fadd_res_snan	- tbl_fadd_op # NORM + SNAN
   11363 	short		tbl_fadd_op	- tbl_fadd_op #
   11364 	short		tbl_fadd_op	- tbl_fadd_op #
   11365 
   11366 	short		fadd_res_qnan	- tbl_fadd_op # QNAN + NORM
   11367 	short		fadd_res_qnan	- tbl_fadd_op # QNAN + ZERO
   11368 	short		fadd_res_qnan	- tbl_fadd_op # QNAN + INF
   11369 	short		fadd_res_qnan	- tbl_fadd_op # QNAN + QNAN
   11370 	short		fadd_res_qnan	- tbl_fadd_op # QNAN + DENORM
   11371 	short		fadd_res_snan	- tbl_fadd_op # QNAN + SNAN
   11372 	short		tbl_fadd_op	- tbl_fadd_op #
   11373 	short		tbl_fadd_op	- tbl_fadd_op #
   11374 
   11375 	short		fadd_norm	- tbl_fadd_op # DENORM + NORM
   11376 	short		fadd_zero_src	- tbl_fadd_op # DENORM + ZERO
   11377 	short		fadd_inf_src	- tbl_fadd_op # DENORM + INF
   11378 	short		fadd_res_qnan	- tbl_fadd_op # NORM + QNAN
   11379 	short		fadd_norm	- tbl_fadd_op # DENORM + DENORM
   11380 	short		fadd_res_snan	- tbl_fadd_op # NORM + SNAN
   11381 	short		tbl_fadd_op	- tbl_fadd_op #
   11382 	short		tbl_fadd_op	- tbl_fadd_op #
   11383 
   11384 	short		fadd_res_snan	- tbl_fadd_op # SNAN + NORM
   11385 	short		fadd_res_snan	- tbl_fadd_op # SNAN + ZERO
   11386 	short		fadd_res_snan	- tbl_fadd_op # SNAN + INF
   11387 	short		fadd_res_snan	- tbl_fadd_op # SNAN + QNAN
   11388 	short		fadd_res_snan	- tbl_fadd_op # SNAN + DENORM
   11389 	short		fadd_res_snan	- tbl_fadd_op # SNAN + SNAN
   11390 	short		tbl_fadd_op	- tbl_fadd_op #
   11391 	short		tbl_fadd_op	- tbl_fadd_op #
   11392 
   11393 fadd_res_qnan:
   11394 	bra.l		res_qnan
   11395 fadd_res_snan:
   11396 	bra.l		res_snan
   11397 
   11398 #
   11399 # both operands are ZEROes
   11400 #
   11401 fadd_zero_2:
   11402 	mov.b		SRC_EX(%a0),%d0		# are the signs opposite
   11403 	mov.b		DST_EX(%a1),%d1
   11404 	eor.b		%d0,%d1
   11405 	bmi.w		fadd_zero_2_chk_rm	# weed out (-ZERO)+(+ZERO)
   11406 
   11407 # the signs are the same. so determine whether they are positive or negative
   11408 # and return the appropriately signed zero.
   11409 	tst.b		%d0			# are ZEROes positive or negative?
   11410 	bmi.b		fadd_zero_rm		# negative
   11411 	fmov.s		&0x00000000,%fp0	# return +ZERO
   11412 	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
   11413 	rts
   11414 
   11415 #
   11416 # the ZEROes have opposite signs:
   11417 # - therefore, we return +ZERO if the rounding modes are RN,RZ, or RP.
   11418 # - -ZERO is returned in the case of RM.
   11419 #
   11420 fadd_zero_2_chk_rm:
   11421 	mov.b		3+L_SCR3(%a6),%d1
   11422 	andi.b		&0x30,%d1		# extract rnd mode
   11423 	cmpi.b		%d1,&rm_mode*0x10	# is rnd mode == RM?
   11424 	beq.b		fadd_zero_rm		# yes
   11425 	fmov.s		&0x00000000,%fp0	# return +ZERO
   11426 	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
   11427 	rts
   11428 
   11429 fadd_zero_rm:
   11430 	fmov.s		&0x80000000,%fp0	# return -ZERO
   11431 	mov.b		&neg_bmask+z_bmask,FPSR_CC(%a6) # set NEG/Z
   11432 	rts
   11433 
   11434 #
   11435 # one operand is a ZERO and the other is a DENORM or NORM. scale
   11436 # the DENORM or NORM and jump to the regular fadd routine.
   11437 #
   11438 fadd_zero_dst:
   11439 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
   11440 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
   11441 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
   11442 	bsr.l		scale_to_zero_src	# scale the operand
   11443 	clr.w		FP_SCR1_EX(%a6)
   11444 	clr.l		FP_SCR1_HI(%a6)
   11445 	clr.l		FP_SCR1_LO(%a6)
   11446 	bra.w		fadd_zero_entry		# go execute fadd
   11447 
   11448 fadd_zero_src:
   11449 	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
   11450 	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
   11451 	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
   11452 	bsr.l		scale_to_zero_dst	# scale the operand
   11453 	clr.w		FP_SCR0_EX(%a6)
   11454 	clr.l		FP_SCR0_HI(%a6)
   11455 	clr.l		FP_SCR0_LO(%a6)
   11456 	bra.w		fadd_zero_entry		# go execute fadd
   11457 
   11458 #
   11459 # both operands are INFs. an OPERR will result if the INFs have
   11460 # different signs. else, an INF of the same sign is returned
   11461 #
   11462 fadd_inf_2:
   11463 	mov.b		SRC_EX(%a0),%d0		# exclusive or the signs
   11464 	mov.b		DST_EX(%a1),%d1
   11465 	eor.b		%d1,%d0
   11466 	bmi.l		res_operr		# weed out (-INF)+(+INF)
   11467 
   11468 # ok, so it's not an OPERR. but, we do have to remember to return the
   11469 # src INF since that's where the 881/882 gets the j-bit from...
   11470 
   11471 #
   11472 # operands are INF and one of {ZERO, INF, DENORM, NORM}
   11473 #
   11474 fadd_inf_src:
   11475 	fmovm.x		SRC(%a0),&0x80		# return src INF
   11476 	tst.b		SRC_EX(%a0)		# is INF positive?
   11477 	bpl.b		fadd_inf_done		# yes; we're done
   11478 	mov.b		&neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
   11479 	rts
   11480 
   11481 #
   11482 # operands are INF and one of {ZERO, INF, DENORM, NORM}
   11483 #
   11484 fadd_inf_dst:
   11485 	fmovm.x		DST(%a1),&0x80		# return dst INF
   11486 	tst.b		DST_EX(%a1)		# is INF positive?
   11487 	bpl.b		fadd_inf_done		# yes; we're done
   11488 	mov.b		&neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
   11489 	rts
   11490 
   11491 fadd_inf_done:
   11492 	mov.b		&inf_bmask,FPSR_CC(%a6) # set INF
   11493 	rts
   11494 
   11495 #########################################################################
   11496 # XDEF ****************************************************************	#
   11497 #	fsub(): emulates the fsub instruction				#
   11498 #	fssub(): emulates the fssub instruction				#
   11499 #	fdsub(): emulates the fdsub instruction				#
   11500 #									#
   11501 # XREF ****************************************************************	#
   11502 # 	addsub_scaler2() - scale the operands so they won't take exc	#
   11503 #	ovf_res() - return default overflow result			#
   11504 #	unf_res() - return default underflow result			#
   11505 #	res_qnan() - set QNAN result					#
   11506 # 	res_snan() - set SNAN result					#
   11507 #	res_operr() - set OPERR result					#
   11508 #	scale_to_zero_src() - set src operand exponent equal to zero	#
   11509 #	scale_to_zero_dst() - set dst operand exponent equal to zero	#
   11510 #									#
   11511 # INPUT ***************************************************************	#
   11512 #	a0 = pointer to extended precision source operand		#
   11513 # 	a1 = pointer to extended precision destination operand		#
   11514 #									#
   11515 # OUTPUT **************************************************************	#
   11516 #	fp0 = result							#
   11517 #	fp1 = EXOP (if exception occurred)				#
   11518 #									#
   11519 # ALGORITHM ***********************************************************	#
   11520 # 	Handle NANs, infinities, and zeroes as special cases. Divide	#
   11521 # norms into extended, single, and double precision.			#
   11522 #	Do subtraction after scaling exponents such that exception won't#
   11523 # occur. Then, check result exponent to see if exception would have	#
   11524 # occurred. If so, return default result and maybe EXOP. Else, insert	#
   11525 # the correct result exponent and return. Set FPSR bits as appropriate.	#
   11526 #									#
   11527 #########################################################################
   11528 
   11529 	global		fssub
   11530 fssub:
   11531 	andi.b		&0x30,%d0		# clear rnd prec
   11532 	ori.b		&s_mode*0x10,%d0	# insert sgl prec
   11533 	bra.b		fsub
   11534 
   11535 	global		fdsub
   11536 fdsub:
   11537 	andi.b		&0x30,%d0		# clear rnd prec
   11538 	ori.b		&d_mode*0x10,%d0	# insert dbl prec
   11539 
   11540 	global		fsub
   11541 fsub:
   11542 	mov.l		%d0,L_SCR3(%a6)		# store rnd info
   11543 
   11544 	clr.w		%d1
   11545 	mov.b		DTAG(%a6),%d1
   11546 	lsl.b		&0x3,%d1
   11547 	or.b		STAG(%a6),%d1		# combine src tags
   11548 
   11549 	bne.w		fsub_not_norm		# optimize on non-norm input
   11550 
   11551 #
   11552 # SUB: norms and denorms
   11553 #
   11554 fsub_norm:
   11555 	bsr.l		addsub_scaler2		# scale exponents
   11556 
   11557 fsub_zero_entry:
   11558 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
   11559 
   11560 	fmov.l		&0x0,%fpsr		# clear FPSR
   11561 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   11562 
   11563 	fsub.x		FP_SCR0(%a6),%fp0	# execute subtract
   11564 
   11565 	fmov.l		&0x0,%fpcr		# clear FPCR
   11566 	fmov.l		%fpsr,%d1		# fetch INEX2, N, Z
   11567 
   11568 	or.l		%d1,USER_FPSR(%a6)	# save exc and ccode bits
   11569 
   11570 	fbeq.w		fsub_zero_exit		# if result zero, end now
   11571 
   11572 	mov.l		%d2,-(%sp)		# save d2
   11573 
   11574 	fmovm.x		&0x01,-(%sp)		# save result to stack
   11575 
   11576 	mov.w		2+L_SCR3(%a6),%d1
   11577 	lsr.b		&0x6,%d1
   11578 
   11579 	mov.w		(%sp),%d2		# fetch new exponent
   11580 	andi.l		&0x7fff,%d2		# strip sign
   11581 	sub.l		%d0,%d2			# add scale factor
   11582 
   11583 	cmp.l		%d2,(tbl_fsub_ovfl.b,%pc,%d1.w*4) # is it an overflow?
   11584 	bge.b		fsub_ovfl		# yes
   11585 
   11586 	cmp.l		%d2,(tbl_fsub_unfl.b,%pc,%d1.w*4) # is it an underflow?
   11587 	blt.w		fsub_unfl		# yes
   11588 	beq.w		fsub_may_unfl		# maybe; go find out
   11589 
   11590 fsub_normal:
   11591 	mov.w		(%sp),%d1
   11592 	andi.w		&0x8000,%d1		# keep sign
   11593 	or.w		%d2,%d1			# insert new exponent
   11594 	mov.w		%d1,(%sp)		# insert new exponent
   11595 
   11596 	fmovm.x		(%sp)+,&0x80		# return result in fp0
   11597 
   11598 	mov.l		(%sp)+,%d2		# restore d2
   11599 	rts
   11600 
   11601 fsub_zero_exit:
   11602 #	fmov.s		&0x00000000,%fp0	# return zero in fp0
   11603 	rts
   11604 
   11605 tbl_fsub_ovfl:
   11606 	long		0x7fff			# ext ovfl
   11607 	long		0x407f			# sgl ovfl
   11608 	long		0x43ff			# dbl ovfl
   11609 
   11610 tbl_fsub_unfl:
   11611 	long	        0x0000			# ext unfl
   11612 	long		0x3f81			# sgl unfl
   11613 	long		0x3c01			# dbl unfl
   11614 
   11615 fsub_ovfl:
   11616 	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
   11617 
   11618 	mov.b		FPCR_ENABLE(%a6),%d1
   11619 	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
   11620 	bne.b		fsub_ovfl_ena		# yes
   11621 
   11622 	add.l		&0xc,%sp
   11623 fsub_ovfl_dis:
   11624 	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
   11625 	sne		%d1			# set sign param accordingly
   11626 	mov.l		L_SCR3(%a6),%d0		# pass prec:rnd
   11627 	bsr.l		ovf_res			# calculate default result
   11628 	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
   11629 	fmovm.x		(%a0),&0x80		# return default result in fp0
   11630 	mov.l		(%sp)+,%d2		# restore d2
   11631 	rts
   11632 
   11633 fsub_ovfl_ena:
   11634 	mov.b		L_SCR3(%a6),%d1
   11635 	andi.b		&0xc0,%d1		# is precision extended?
   11636 	bne.b		fsub_ovfl_ena_sd	# no
   11637 
   11638 fsub_ovfl_ena_cont:
   11639 	mov.w		(%sp),%d1		# fetch {sgn,exp}
   11640 	andi.w		&0x8000,%d1		# keep sign
   11641 	subi.l		&0x6000,%d2		# subtract new bias
   11642 	andi.w		&0x7fff,%d2		# clear top bit
   11643 	or.w		%d2,%d1			# concat sign,exp
   11644 	mov.w		%d1,(%sp)		# insert new exponent
   11645 
   11646 	fmovm.x		(%sp)+,&0x40		# return EXOP in fp1
   11647 	bra.b		fsub_ovfl_dis
   11648 
   11649 fsub_ovfl_ena_sd:
   11650 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
   11651 
   11652 	mov.l		L_SCR3(%a6),%d1
   11653 	andi.b		&0x30,%d1		# clear rnd prec
   11654 	fmov.l		%d1,%fpcr		# set FPCR
   11655 
   11656 	fsub.x		FP_SCR0(%a6),%fp0	# execute subtract
   11657 
   11658 	fmov.l		&0x0,%fpcr		# clear FPCR
   11659 
   11660 	add.l		&0xc,%sp
   11661 	fmovm.x		&0x01,-(%sp)
   11662 	bra.b		fsub_ovfl_ena_cont
   11663 
   11664 fsub_unfl:
   11665 	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
   11666 
   11667 	add.l		&0xc,%sp
   11668 
   11669 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
   11670 
   11671 	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
   11672 	fmov.l		&0x0,%fpsr		# clear FPSR
   11673 
   11674 	fsub.x		FP_SCR0(%a6),%fp0	# execute subtract
   11675 
   11676 	fmov.l		&0x0,%fpcr		# clear FPCR
   11677 	fmov.l		%fpsr,%d1		# save status
   11678 
   11679 	or.l		%d1,USER_FPSR(%a6)
   11680 
   11681 	mov.b		FPCR_ENABLE(%a6),%d1
   11682 	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
   11683 	bne.b		fsub_unfl_ena		# yes
   11684 
   11685 fsub_unfl_dis:
   11686 	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
   11687 
   11688 	lea		FP_SCR0(%a6),%a0	# pass: result addr
   11689 	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
   11690 	bsr.l		unf_res			# calculate default result
   11691 	or.b		%d0,FPSR_CC(%a6)	# 'Z' may have been set
   11692 	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
   11693 	mov.l		(%sp)+,%d2		# restore d2
   11694 	rts
   11695 
   11696 fsub_unfl_ena:
   11697 	fmovm.x		FP_SCR1(%a6),&0x40
   11698 
   11699 	mov.l		L_SCR3(%a6),%d1
   11700 	andi.b		&0xc0,%d1		# is precision extended?
   11701 	bne.b		fsub_unfl_ena_sd	# no
   11702 
   11703 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   11704 
   11705 fsub_unfl_ena_cont:
   11706 	fmov.l		&0x0,%fpsr		# clear FPSR
   11707 
   11708 	fsub.x		FP_SCR0(%a6),%fp1	# execute subtract
   11709 
   11710 	fmov.l		&0x0,%fpcr		# clear FPCR
   11711 
   11712 	fmovm.x		&0x40,FP_SCR0(%a6)	# store result to stack
   11713 	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
   11714 	mov.l		%d1,%d2			# make a copy
   11715 	andi.l		&0x7fff,%d1		# strip sign
   11716 	andi.w		&0x8000,%d2		# keep old sign
   11717 	sub.l		%d0,%d1			# add scale factor
   11718 	addi.l		&0x6000,%d1		# subtract new bias
   11719 	andi.w		&0x7fff,%d1		# clear top bit
   11720 	or.w		%d2,%d1			# concat sgn,exp
   11721 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
   11722 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
   11723 	bra.w		fsub_unfl_dis
   11724 
   11725 fsub_unfl_ena_sd:
   11726 	mov.l		L_SCR3(%a6),%d1
   11727 	andi.b		&0x30,%d1		# clear rnd prec
   11728 	fmov.l		%d1,%fpcr		# set FPCR
   11729 
   11730 	bra.b		fsub_unfl_ena_cont
   11731 
   11732 #
   11733 # result is equal to the smallest normalized number in the selected precision
   11734 # if the precision is extended, this result could not have come from an
   11735 # underflow that rounded up.
   11736 #
   11737 fsub_may_unfl:
   11738 	mov.l		L_SCR3(%a6),%d1
   11739 	andi.b		&0xc0,%d1		# fetch rnd prec
   11740 	beq.w		fsub_normal		# yes; no underflow occurred
   11741 
   11742 	mov.l		0x4(%sp),%d1
   11743 	cmpi.l		%d1,&0x80000000		# is hi(man) = 0x80000000?
   11744 	bne.w		fsub_normal		# no; no underflow occurred
   11745 
   11746 	tst.l		0x8(%sp)		# is lo(man) = 0x0?
   11747 	bne.w		fsub_normal		# no; no underflow occurred
   11748 
   11749 	btst		&inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
   11750 	beq.w		fsub_normal		# no; no underflow occurred
   11751 
   11752 #
   11753 # ok, so now the result has a exponent equal to the smallest normalized
   11754 # exponent for the selected precision. also, the mantissa is equal to
   11755 # 0x8000000000000000 and this mantissa is the result of rounding non-zero
   11756 # g,r,s.
   11757 # now, we must determine whether the pre-rounded result was an underflow
   11758 # rounded "up" or a normalized number rounded "down".
   11759 # so, we do this be re-executing the add using RZ as the rounding mode and
   11760 # seeing if the new result is smaller or equal to the current result.
   11761 #
   11762 	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op into fp1
   11763 
   11764 	mov.l		L_SCR3(%a6),%d1
   11765 	andi.b		&0xc0,%d1		# keep rnd prec
   11766 	ori.b		&rz_mode*0x10,%d1	# insert rnd mode
   11767 	fmov.l		%d1,%fpcr		# set FPCR
   11768 	fmov.l		&0x0,%fpsr		# clear FPSR
   11769 
   11770 	fsub.x		FP_SCR0(%a6),%fp1	# execute subtract
   11771 
   11772 	fmov.l		&0x0,%fpcr		# clear FPCR
   11773 
   11774 	fabs.x		%fp0			# compare absolute values
   11775 	fabs.x		%fp1
   11776 	fcmp.x		%fp0,%fp1		# is first result > second?
   11777 
   11778 	fbgt.w		fsub_unfl		# yes; it's an underflow
   11779 	bra.w		fsub_normal		# no; it's not an underflow
   11780 
   11781 ##########################################################################
   11782 
   11783 #
   11784 # Sub: inputs are not both normalized; what are they?
   11785 #
   11786 fsub_not_norm:
   11787 	mov.w		(tbl_fsub_op.b,%pc,%d1.w*2),%d1
   11788 	jmp		(tbl_fsub_op.b,%pc,%d1.w*1)
   11789 
   11790 	swbeg		&48
   11791 tbl_fsub_op:
   11792 	short		fsub_norm	- tbl_fsub_op # NORM - NORM
   11793 	short		fsub_zero_src	- tbl_fsub_op # NORM - ZERO
   11794 	short		fsub_inf_src	- tbl_fsub_op # NORM - INF
   11795 	short		fsub_res_qnan	- tbl_fsub_op # NORM - QNAN
   11796 	short		fsub_norm	- tbl_fsub_op # NORM - DENORM
   11797 	short		fsub_res_snan	- tbl_fsub_op # NORM - SNAN
   11798 	short		tbl_fsub_op	- tbl_fsub_op #
   11799 	short		tbl_fsub_op	- tbl_fsub_op #
   11800 
   11801 	short		fsub_zero_dst	- tbl_fsub_op # ZERO - NORM
   11802 	short		fsub_zero_2	- tbl_fsub_op # ZERO - ZERO
   11803 	short		fsub_inf_src	- tbl_fsub_op # ZERO - INF
   11804 	short		fsub_res_qnan	- tbl_fsub_op # NORM - QNAN
   11805 	short		fsub_zero_dst	- tbl_fsub_op # ZERO - DENORM
   11806 	short		fsub_res_snan	- tbl_fsub_op # NORM - SNAN
   11807 	short		tbl_fsub_op	- tbl_fsub_op #
   11808 	short		tbl_fsub_op	- tbl_fsub_op #
   11809 
   11810 	short		fsub_inf_dst	- tbl_fsub_op # INF - NORM
   11811 	short		fsub_inf_dst	- tbl_fsub_op # INF - ZERO
   11812 	short		fsub_inf_2	- tbl_fsub_op # INF - INF
   11813 	short		fsub_res_qnan	- tbl_fsub_op # NORM - QNAN
   11814 	short		fsub_inf_dst	- tbl_fsub_op # INF - DENORM
   11815 	short		fsub_res_snan	- tbl_fsub_op # NORM - SNAN
   11816 	short		tbl_fsub_op	- tbl_fsub_op #
   11817 	short		tbl_fsub_op	- tbl_fsub_op #
   11818 
   11819 	short		fsub_res_qnan	- tbl_fsub_op # QNAN - NORM
   11820 	short		fsub_res_qnan	- tbl_fsub_op # QNAN - ZERO
   11821 	short		fsub_res_qnan	- tbl_fsub_op # QNAN - INF
   11822 	short		fsub_res_qnan	- tbl_fsub_op # QNAN - QNAN
   11823 	short		fsub_res_qnan	- tbl_fsub_op # QNAN - DENORM
   11824 	short		fsub_res_snan	- tbl_fsub_op # QNAN - SNAN
   11825 	short		tbl_fsub_op	- tbl_fsub_op #
   11826 	short		tbl_fsub_op	- tbl_fsub_op #
   11827 
   11828 	short		fsub_norm	- tbl_fsub_op # DENORM - NORM
   11829 	short		fsub_zero_src	- tbl_fsub_op # DENORM - ZERO
   11830 	short		fsub_inf_src	- tbl_fsub_op # DENORM - INF
   11831 	short		fsub_res_qnan	- tbl_fsub_op # NORM - QNAN
   11832 	short		fsub_norm	- tbl_fsub_op # DENORM - DENORM
   11833 	short		fsub_res_snan	- tbl_fsub_op # NORM - SNAN
   11834 	short		tbl_fsub_op	- tbl_fsub_op #
   11835 	short		tbl_fsub_op	- tbl_fsub_op #
   11836 
   11837 	short		fsub_res_snan	- tbl_fsub_op # SNAN - NORM
   11838 	short		fsub_res_snan	- tbl_fsub_op # SNAN - ZERO
   11839 	short		fsub_res_snan	- tbl_fsub_op # SNAN - INF
   11840 	short		fsub_res_snan	- tbl_fsub_op # SNAN - QNAN
   11841 	short		fsub_res_snan	- tbl_fsub_op # SNAN - DENORM
   11842 	short		fsub_res_snan	- tbl_fsub_op # SNAN - SNAN
   11843 	short		tbl_fsub_op	- tbl_fsub_op #
   11844 	short		tbl_fsub_op	- tbl_fsub_op #
   11845 
   11846 fsub_res_qnan:
   11847 	bra.l		res_qnan
   11848 fsub_res_snan:
   11849 	bra.l		res_snan
   11850 
   11851 #
   11852 # both operands are ZEROes
   11853 #
   11854 fsub_zero_2:
   11855 	mov.b		SRC_EX(%a0),%d0
   11856 	mov.b		DST_EX(%a1),%d1
   11857 	eor.b		%d1,%d0
   11858 	bpl.b		fsub_zero_2_chk_rm
   11859 
   11860 # the signs are opposite, so, return a ZERO w/ the sign of the dst ZERO
   11861 	tst.b		%d0			# is dst negative?
   11862 	bmi.b		fsub_zero_2_rm		# yes
   11863 	fmov.s		&0x00000000,%fp0	# no; return +ZERO
   11864 	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
   11865 	rts
   11866 
   11867 #
   11868 # the ZEROes have the same signs:
   11869 # - therefore, we return +ZERO if the rounding mode is RN,RZ, or RP
   11870 # - -ZERO is returned in the case of RM.
   11871 #
   11872 fsub_zero_2_chk_rm:
   11873 	mov.b		3+L_SCR3(%a6),%d1
   11874 	andi.b		&0x30,%d1		# extract rnd mode
   11875 	cmpi.b		%d1,&rm_mode*0x10	# is rnd mode = RM?
   11876 	beq.b		fsub_zero_2_rm		# yes
   11877 	fmov.s		&0x00000000,%fp0	# no; return +ZERO
   11878 	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
   11879 	rts
   11880 
   11881 fsub_zero_2_rm:
   11882 	fmov.s		&0x80000000,%fp0	# return -ZERO
   11883 	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6)	# set Z/NEG
   11884 	rts
   11885 
   11886 #
   11887 # one operand is a ZERO and the other is a DENORM or a NORM.
   11888 # scale the DENORM or NORM and jump to the regular fsub routine.
   11889 #
   11890 fsub_zero_dst:
   11891 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
   11892 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
   11893 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
   11894 	bsr.l		scale_to_zero_src	# scale the operand
   11895 	clr.w		FP_SCR1_EX(%a6)
   11896 	clr.l		FP_SCR1_HI(%a6)
   11897 	clr.l		FP_SCR1_LO(%a6)
   11898 	bra.w		fsub_zero_entry		# go execute fsub
   11899 
   11900 fsub_zero_src:
   11901 	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
   11902 	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
   11903 	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
   11904 	bsr.l		scale_to_zero_dst	# scale the operand
   11905 	clr.w		FP_SCR0_EX(%a6)
   11906 	clr.l		FP_SCR0_HI(%a6)
   11907 	clr.l		FP_SCR0_LO(%a6)
   11908 	bra.w		fsub_zero_entry		# go execute fsub
   11909 
   11910 #
   11911 # both operands are INFs. an OPERR will result if the INFs have the
   11912 # same signs. else,
   11913 #
   11914 fsub_inf_2:
   11915 	mov.b		SRC_EX(%a0),%d0		# exclusive or the signs
   11916 	mov.b		DST_EX(%a1),%d1
   11917 	eor.b		%d1,%d0
   11918 	bpl.l		res_operr		# weed out (-INF)+(+INF)
   11919 
   11920 # ok, so it's not an OPERR. but we do have to remember to return
   11921 # the src INF since that's where the 881/882 gets the j-bit.
   11922 
   11923 fsub_inf_src:
   11924 	fmovm.x		SRC(%a0),&0x80		# return src INF
   11925 	fneg.x		%fp0			# invert sign
   11926 	fbge.w		fsub_inf_done		# sign is now positive
   11927 	mov.b		&neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
   11928 	rts
   11929 
   11930 fsub_inf_dst:
   11931 	fmovm.x		DST(%a1),&0x80		# return dst INF
   11932 	tst.b		DST_EX(%a1)		# is INF negative?
   11933 	bpl.b		fsub_inf_done		# no
   11934 	mov.b		&neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
   11935 	rts
   11936 
   11937 fsub_inf_done:
   11938 	mov.b		&inf_bmask,FPSR_CC(%a6)	# set INF
   11939 	rts
   11940 
   11941 #########################################################################
   11942 # XDEF ****************************************************************	#
   11943 # 	fsqrt(): emulates the fsqrt instruction				#
   11944 #	fssqrt(): emulates the fssqrt instruction			#
   11945 #	fdsqrt(): emulates the fdsqrt instruction			#
   11946 #									#
   11947 # XREF ****************************************************************	#
   11948 #	scale_sqrt() - scale the source operand				#
   11949 #	unf_res() - return default underflow result			#
   11950 #	ovf_res() - return default overflow result			#
   11951 # 	res_qnan_1op() - return QNAN result				#
   11952 # 	res_snan_1op() - return SNAN result				#
   11953 #									#
   11954 # INPUT ***************************************************************	#
   11955 #	a0 = pointer to extended precision source operand		#
   11956 #	d0  rnd prec,mode						#
   11957 #									#
   11958 # OUTPUT **************************************************************	#
   11959 #	fp0 = result							#
   11960 #	fp1 = EXOP (if exception occurred)				#
   11961 #									#
   11962 # ALGORITHM ***********************************************************	#
   11963 #	Handle NANs, infinities, and zeroes as special cases. Divide	#
   11964 # norms/denorms into ext/sgl/dbl precision.				#
   11965 #	For norms/denorms, scale the exponents such that a sqrt		#
   11966 # instruction won't cause an exception. Use the regular fsqrt to	#
   11967 # compute a result. Check if the regular operands would have taken	#
   11968 # an exception. If so, return the default overflow/underflow result	#
   11969 # and return the EXOP if exceptions are enabled. Else, scale the 	#
   11970 # result operand to the proper exponent.				#
   11971 #									#
   11972 #########################################################################
   11973 
   11974 	global		fssqrt
   11975 fssqrt:
   11976 	andi.b		&0x30,%d0		# clear rnd prec
   11977 	ori.b		&s_mode*0x10,%d0	# insert sgl precision
   11978 	bra.b		fsqrt
   11979 
   11980 	global		fdsqrt
   11981 fdsqrt:
   11982 	andi.b		&0x30,%d0		# clear rnd prec
   11983 	ori.b		&d_mode*0x10,%d0	# insert dbl precision
   11984 
   11985 	global		fsqrt
   11986 fsqrt:
   11987 	mov.l		%d0,L_SCR3(%a6)		# store rnd info
   11988 	clr.w		%d1
   11989 	mov.b		STAG(%a6),%d1
   11990 	bne.w		fsqrt_not_norm		# optimize on non-norm input
   11991 
   11992 #
   11993 # SQUARE ROOT: norms and denorms ONLY!
   11994 #
   11995 fsqrt_norm:
   11996 	tst.b		SRC_EX(%a0)		# is operand negative?
   11997 	bmi.l		res_operr		# yes
   11998 
   11999 	andi.b		&0xc0,%d0		# is precision extended?
   12000 	bne.b		fsqrt_not_ext		# no; go handle sgl or dbl
   12001 
   12002 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   12003 	fmov.l		&0x0,%fpsr		# clear FPSR
   12004 
   12005 	fsqrt.x		(%a0),%fp0		# execute square root
   12006 
   12007 	fmov.l		%fpsr,%d1
   12008 	or.l		%d1,USER_FPSR(%a6)	# set N,INEX
   12009 
   12010 	rts
   12011 
   12012 fsqrt_denorm:
   12013 	tst.b		SRC_EX(%a0)		# is operand negative?
   12014 	bmi.l		res_operr		# yes
   12015 
   12016 	andi.b		&0xc0,%d0		# is precision extended?
   12017 	bne.b		fsqrt_not_ext		# no; go handle sgl or dbl
   12018 
   12019 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
   12020 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
   12021 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
   12022 
   12023 	bsr.l		scale_sqrt		# calculate scale factor
   12024 
   12025 	bra.w		fsqrt_sd_normal
   12026 
   12027 #
   12028 # operand is either single or double
   12029 #
   12030 fsqrt_not_ext:
   12031 	cmpi.b		%d0,&s_mode*0x10	# separate sgl/dbl prec
   12032 	bne.w		fsqrt_dbl
   12033 
   12034 #
   12035 # operand is to be rounded to single precision
   12036 #
   12037 fsqrt_sgl:
   12038 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
   12039 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
   12040 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
   12041 
   12042 	bsr.l		scale_sqrt		# calculate scale factor
   12043 
   12044 	cmpi.l		%d0,&0x3fff-0x3f81	# will move in underflow?
   12045 	beq.w		fsqrt_sd_may_unfl
   12046 	bgt.w		fsqrt_sd_unfl		# yes; go handle underflow
   12047 	cmpi.l		%d0,&0x3fff-0x407f	# will move in overflow?
   12048 	beq.w		fsqrt_sd_may_ovfl	# maybe; go check
   12049 	blt.w		fsqrt_sd_ovfl		# yes; go handle overflow
   12050 
   12051 #
   12052 # operand will NOT overflow or underflow when moved in to the fp reg file
   12053 #
   12054 fsqrt_sd_normal:
   12055 	fmov.l		&0x0,%fpsr		# clear FPSR
   12056 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   12057 
   12058 	fsqrt.x		FP_SCR0(%a6),%fp0	# perform absolute
   12059 
   12060 	fmov.l		%fpsr,%d1		# save FPSR
   12061 	fmov.l		&0x0,%fpcr		# clear FPCR
   12062 
   12063 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
   12064 
   12065 fsqrt_sd_normal_exit:
   12066 	mov.l		%d2,-(%sp)		# save d2
   12067 	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
   12068 	mov.w		FP_SCR0_EX(%a6),%d1	# load sgn,exp
   12069 	mov.l		%d1,%d2			# make a copy
   12070 	andi.l		&0x7fff,%d1		# strip sign
   12071 	sub.l		%d0,%d1			# add scale factor
   12072 	andi.w		&0x8000,%d2		# keep old sign
   12073 	or.w		%d1,%d2			# concat old sign,new exp
   12074 	mov.w		%d2,FP_SCR0_EX(%a6)	# insert new exponent
   12075 	mov.l		(%sp)+,%d2		# restore d2
   12076 	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
   12077 	rts
   12078 
   12079 #
   12080 # operand is to be rounded to double precision
   12081 #
   12082 fsqrt_dbl:
   12083 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
   12084 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
   12085 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
   12086 
   12087 	bsr.l		scale_sqrt		# calculate scale factor
   12088 
   12089 	cmpi.l		%d0,&0x3fff-0x3c01	# will move in underflow?
   12090 	beq.w		fsqrt_sd_may_unfl
   12091 	bgt.b		fsqrt_sd_unfl		# yes; go handle underflow
   12092 	cmpi.l		%d0,&0x3fff-0x43ff	# will move in overflow?
   12093 	beq.w		fsqrt_sd_may_ovfl	# maybe; go check
   12094 	blt.w		fsqrt_sd_ovfl		# yes; go handle overflow
   12095 	bra.w		fsqrt_sd_normal		# no; ho handle normalized op
   12096 
   12097 # we're on the line here and the distinguising characteristic is whether
   12098 # the exponent is 3fff or 3ffe. if it's 3ffe, then it's a safe number
   12099 # elsewise fall through to underflow.
   12100 fsqrt_sd_may_unfl:
   12101 	btst		&0x0,1+FP_SCR0_EX(%a6)	# is exponent 0x3fff?
   12102 	bne.w		fsqrt_sd_normal		# yes, so no underflow
   12103 
   12104 #
   12105 # operand WILL underflow when moved in to the fp register file
   12106 #
   12107 fsqrt_sd_unfl:
   12108 	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
   12109 
   12110 	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
   12111 	fmov.l		&0x0,%fpsr		# clear FPSR
   12112 
   12113 	fsqrt.x 	FP_SCR0(%a6),%fp0	# execute square root
   12114 
   12115 	fmov.l		%fpsr,%d1		# save status
   12116 	fmov.l		&0x0,%fpcr		# clear FPCR
   12117 
   12118 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
   12119 
   12120 # if underflow or inexact is enabled, go calculate EXOP first.
   12121 	mov.b		FPCR_ENABLE(%a6),%d1
   12122 	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
   12123 	bne.b		fsqrt_sd_unfl_ena	# yes
   12124 
   12125 fsqrt_sd_unfl_dis:
   12126 	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
   12127 
   12128 	lea		FP_SCR0(%a6),%a0	# pass: result addr
   12129 	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
   12130 	bsr.l		unf_res			# calculate default result
   12131 	or.b		%d0,FPSR_CC(%a6)	# set possible 'Z' ccode
   12132 	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
   12133 	rts
   12134 
   12135 #
   12136 # operand will underflow AND underflow is enabled.
   12137 # therefore, we must return the result rounded to extended precision.
   12138 #
   12139 fsqrt_sd_unfl_ena:
   12140 	mov.l		FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
   12141 	mov.l		FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
   12142 	mov.w		FP_SCR0_EX(%a6),%d1	# load current exponent
   12143 
   12144 	mov.l		%d2,-(%sp)		# save d2
   12145 	mov.l		%d1,%d2			# make a copy
   12146 	andi.l		&0x7fff,%d1		# strip sign
   12147 	andi.w		&0x8000,%d2		# keep old sign
   12148 	sub.l		%d0,%d1			# subtract scale factor
   12149 	addi.l		&0x6000,%d1		# add new bias
   12150 	andi.w		&0x7fff,%d1
   12151 	or.w		%d2,%d1			# concat new sign,new exp
   12152 	mov.w		%d1,FP_SCR1_EX(%a6)	# insert new exp
   12153 	fmovm.x		FP_SCR1(%a6),&0x40	# return EXOP in fp1
   12154 	mov.l		(%sp)+,%d2		# restore d2
   12155 	bra.b		fsqrt_sd_unfl_dis
   12156 
   12157 #
   12158 # operand WILL overflow.
   12159 #
   12160 fsqrt_sd_ovfl:
   12161 	fmov.l		&0x0,%fpsr		# clear FPSR
   12162 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   12163 
   12164 	fsqrt.x		FP_SCR0(%a6),%fp0	# perform square root
   12165 
   12166 	fmov.l		&0x0,%fpcr		# clear FPCR
   12167 	fmov.l		%fpsr,%d1		# save FPSR
   12168 
   12169 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
   12170 
   12171 fsqrt_sd_ovfl_tst:
   12172 	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
   12173 
   12174 	mov.b		FPCR_ENABLE(%a6),%d1
   12175 	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
   12176 	bne.b		fsqrt_sd_ovfl_ena	# yes
   12177 
   12178 #
   12179 # OVFL is not enabled; therefore, we must create the default result by
   12180 # calling ovf_res().
   12181 #
   12182 fsqrt_sd_ovfl_dis:
   12183 	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
   12184 	sne		%d1			# set sign param accordingly
   12185 	mov.l		L_SCR3(%a6),%d0		# pass: prec,mode
   12186 	bsr.l		ovf_res			# calculate default result
   12187 	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
   12188 	fmovm.x		(%a0),&0x80		# return default result in fp0
   12189 	rts
   12190 
   12191 #
   12192 # OVFL is enabled.
   12193 # the INEX2 bit has already been updated by the round to the correct precision.
   12194 # now, round to extended(and don't alter the FPSR).
   12195 #
   12196 fsqrt_sd_ovfl_ena:
   12197 	mov.l		%d2,-(%sp)		# save d2
   12198 	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
   12199 	mov.l		%d1,%d2			# make a copy
   12200 	andi.l		&0x7fff,%d1		# strip sign
   12201 	andi.w		&0x8000,%d2		# keep old sign
   12202 	sub.l		%d0,%d1			# add scale factor
   12203 	subi.l		&0x6000,%d1		# subtract bias
   12204 	andi.w		&0x7fff,%d1
   12205 	or.w		%d2,%d1			# concat sign,exp
   12206 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
   12207 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
   12208 	mov.l		(%sp)+,%d2		# restore d2
   12209 	bra.b		fsqrt_sd_ovfl_dis
   12210 
   12211 #
   12212 # the move in MAY underflow. so...
   12213 #
   12214 fsqrt_sd_may_ovfl:
   12215 	btst		&0x0,1+FP_SCR0_EX(%a6)	# is exponent 0x3fff?
   12216 	bne.w		fsqrt_sd_ovfl		# yes, so overflow
   12217 
   12218 	fmov.l		&0x0,%fpsr		# clear FPSR
   12219 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   12220 
   12221 	fsqrt.x		FP_SCR0(%a6),%fp0	# perform absolute
   12222 
   12223 	fmov.l		%fpsr,%d1		# save status
   12224 	fmov.l		&0x0,%fpcr		# clear FPCR
   12225 
   12226 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
   12227 
   12228 	fmov.x		%fp0,%fp1		# make a copy of result
   12229 	fcmp.b		%fp1,&0x1		# is |result| >= 1.b?
   12230 	fbge.w		fsqrt_sd_ovfl_tst	# yes; overflow has occurred
   12231 
   12232 # no, it didn't overflow; we have correct result
   12233 	bra.w		fsqrt_sd_normal_exit
   12234 
   12235 ##########################################################################
   12236 
   12237 #
   12238 # input is not normalized; what is it?
   12239 #
   12240 fsqrt_not_norm:
   12241 	cmpi.b		%d1,&DENORM		# weed out DENORM
   12242 	beq.w		fsqrt_denorm
   12243 	cmpi.b		%d1,&ZERO		# weed out ZERO
   12244 	beq.b		fsqrt_zero
   12245 	cmpi.b		%d1,&INF		# weed out INF
   12246 	beq.b		fsqrt_inf
   12247 	cmpi.b		%d1,&SNAN		# weed out SNAN
   12248 	beq.l		res_snan_1op
   12249 	bra.l		res_qnan_1op
   12250 
   12251 #
   12252 # 	fsqrt(+0) = +0
   12253 # 	fsqrt(-0) = -0
   12254 #	fsqrt(+INF) = +INF
   12255 # 	fsqrt(-INF) = OPERR
   12256 #
   12257 fsqrt_zero:
   12258 	tst.b		SRC_EX(%a0)		# is ZERO positive or negative?
   12259 	bmi.b		fsqrt_zero_m		# negative
   12260 fsqrt_zero_p:
   12261 	fmov.s		&0x00000000,%fp0	# return +ZERO
   12262 	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
   12263 	rts
   12264 fsqrt_zero_m:
   12265 	fmov.s		&0x80000000,%fp0	# return -ZERO
   12266 	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6)	# set 'Z','N' ccode bits
   12267 	rts
   12268 
   12269 fsqrt_inf:
   12270 	tst.b		SRC_EX(%a0)		# is INF positive or negative?
   12271 	bmi.l		res_operr		# negative
   12272 fsqrt_inf_p:
   12273 	fmovm.x		SRC(%a0),&0x80		# return +INF in fp0
   12274 	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'I' ccode bit
   12275 	rts
   12276 
   12277 #########################################################################
   12278 # XDEF ****************************************************************	#
   12279 #	fetch_dreg(): fetch register according to index in d1		#
   12280 #									#
   12281 # XREF ****************************************************************	#
   12282 #	None								#
   12283 #									#
   12284 # INPUT ***************************************************************	#
   12285 #	d1 = index of register to fetch from				#
   12286 # 									#
   12287 # OUTPUT **************************************************************	#
   12288 #	d0 = value of register fetched					#
   12289 #									#
   12290 # ALGORITHM ***********************************************************	#
   12291 #	According to the index value in d1 which can range from zero 	#
   12292 # to fifteen, load the corresponding register file value (where 	#
   12293 # address register indexes start at 8). D0/D1/A0/A1/A6/A7 are on the	#
   12294 # stack. The rest should still be in their original places.		#
   12295 #									#
   12296 #########################################################################
   12297 
   12298 # this routine leaves d1 intact for subsequent store_dreg calls.
   12299 	global		fetch_dreg
   12300 fetch_dreg:
   12301 	mov.w		(tbl_fdreg.b,%pc,%d1.w*2),%d0
   12302 	jmp		(tbl_fdreg.b,%pc,%d0.w*1)
   12303 
   12304 tbl_fdreg:
   12305 	short		fdreg0 - tbl_fdreg
   12306 	short		fdreg1 - tbl_fdreg
   12307 	short		fdreg2 - tbl_fdreg
   12308 	short		fdreg3 - tbl_fdreg
   12309 	short		fdreg4 - tbl_fdreg
   12310 	short		fdreg5 - tbl_fdreg
   12311 	short		fdreg6 - tbl_fdreg
   12312 	short		fdreg7 - tbl_fdreg
   12313 	short		fdreg8 - tbl_fdreg
   12314 	short		fdreg9 - tbl_fdreg
   12315 	short		fdrega - tbl_fdreg
   12316 	short		fdregb - tbl_fdreg
   12317 	short		fdregc - tbl_fdreg
   12318 	short		fdregd - tbl_fdreg
   12319 	short		fdrege - tbl_fdreg
   12320 	short		fdregf - tbl_fdreg
   12321 
   12322 fdreg0:
   12323 	mov.l		EXC_DREGS+0x0(%a6),%d0
   12324 	rts
   12325 fdreg1:
   12326 	mov.l		EXC_DREGS+0x4(%a6),%d0
   12327 	rts
   12328 fdreg2:
   12329 	mov.l		%d2,%d0
   12330 	rts
   12331 fdreg3:
   12332 	mov.l		%d3,%d0
   12333 	rts
   12334 fdreg4:
   12335 	mov.l		%d4,%d0
   12336 	rts
   12337 fdreg5:
   12338 	mov.l		%d5,%d0
   12339 	rts
   12340 fdreg6:
   12341 	mov.l		%d6,%d0
   12342 	rts
   12343 fdreg7:
   12344 	mov.l		%d7,%d0
   12345 	rts
   12346 fdreg8:
   12347 	mov.l		EXC_DREGS+0x8(%a6),%d0
   12348 	rts
   12349 fdreg9:
   12350 	mov.l		EXC_DREGS+0xc(%a6),%d0
   12351 	rts
   12352 fdrega:
   12353 	mov.l		%a2,%d0
   12354 	rts
   12355 fdregb:
   12356 	mov.l		%a3,%d0
   12357 	rts
   12358 fdregc:
   12359 	mov.l		%a4,%d0
   12360 	rts
   12361 fdregd:
   12362 	mov.l		%a5,%d0
   12363 	rts
   12364 fdrege:
   12365 	mov.l		(%a6),%d0
   12366 	rts
   12367 fdregf:
   12368 	mov.l		EXC_A7(%a6),%d0
   12369 	rts
   12370 
   12371 #########################################################################
   12372 # XDEF ****************************************************************	#
   12373 #	store_dreg_l(): store longword to data register specified by d1	#
   12374 #									#
   12375 # XREF ****************************************************************	#
   12376 #	None								#
   12377 #									#
   12378 # INPUT ***************************************************************	#
   12379 #	d0 = longowrd value to store					#
   12380 #	d1 = index of register to fetch from				#
   12381 # 									#
   12382 # OUTPUT **************************************************************	#
   12383 #	(data register is updated)					#
   12384 #									#
   12385 # ALGORITHM ***********************************************************	#
   12386 #	According to the index value in d1, store the longword value	#
   12387 # in d0 to the corresponding data register. D0/D1 are on the stack	#
   12388 # while the rest are in their initial places.				#
   12389 #									#
   12390 #########################################################################
   12391 
   12392 	global		store_dreg_l
   12393 store_dreg_l:
   12394 	mov.w		(tbl_sdregl.b,%pc,%d1.w*2),%d1
   12395 	jmp		(tbl_sdregl.b,%pc,%d1.w*1)
   12396 
   12397 tbl_sdregl:
   12398 	short		sdregl0 - tbl_sdregl
   12399 	short		sdregl1 - tbl_sdregl
   12400 	short		sdregl2 - tbl_sdregl
   12401 	short		sdregl3 - tbl_sdregl
   12402 	short		sdregl4 - tbl_sdregl
   12403 	short		sdregl5 - tbl_sdregl
   12404 	short		sdregl6 - tbl_sdregl
   12405 	short		sdregl7 - tbl_sdregl
   12406 
   12407 sdregl0:
   12408 	mov.l		%d0,EXC_DREGS+0x0(%a6)
   12409 	rts
   12410 sdregl1:
   12411 	mov.l		%d0,EXC_DREGS+0x4(%a6)
   12412 	rts
   12413 sdregl2:
   12414 	mov.l		%d0,%d2
   12415 	rts
   12416 sdregl3:
   12417 	mov.l		%d0,%d3
   12418 	rts
   12419 sdregl4:
   12420 	mov.l		%d0,%d4
   12421 	rts
   12422 sdregl5:
   12423 	mov.l		%d0,%d5
   12424 	rts
   12425 sdregl6:
   12426 	mov.l		%d0,%d6
   12427 	rts
   12428 sdregl7:
   12429 	mov.l		%d0,%d7
   12430 	rts
   12431 
   12432 #########################################################################
   12433 # XDEF ****************************************************************	#
   12434 #	store_dreg_w(): store word to data register specified by d1	#
   12435 #									#
   12436 # XREF ****************************************************************	#
   12437 #	None								#
   12438 #									#
   12439 # INPUT ***************************************************************	#
   12440 #	d0 = word value to store					#
   12441 #	d1 = index of register to fetch from				#
   12442 # 									#
   12443 # OUTPUT **************************************************************	#
   12444 #	(data register is updated)					#
   12445 #									#
   12446 # ALGORITHM ***********************************************************	#
   12447 #	According to the index value in d1, store the word value	#
   12448 # in d0 to the corresponding data register. D0/D1 are on the stack	#
   12449 # while the rest are in their initial places.				#
   12450 #									#
   12451 #########################################################################
   12452 
   12453 	global		store_dreg_w
   12454 store_dreg_w:
   12455 	mov.w		(tbl_sdregw.b,%pc,%d1.w*2),%d1
   12456 	jmp		(tbl_sdregw.b,%pc,%d1.w*1)
   12457 
   12458 tbl_sdregw:
   12459 	short		sdregw0 - tbl_sdregw
   12460 	short		sdregw1 - tbl_sdregw
   12461 	short		sdregw2 - tbl_sdregw
   12462 	short		sdregw3 - tbl_sdregw
   12463 	short		sdregw4 - tbl_sdregw
   12464 	short		sdregw5 - tbl_sdregw
   12465 	short		sdregw6 - tbl_sdregw
   12466 	short		sdregw7 - tbl_sdregw
   12467 
   12468 sdregw0:
   12469 	mov.w		%d0,2+EXC_DREGS+0x0(%a6)
   12470 	rts
   12471 sdregw1:
   12472 	mov.w		%d0,2+EXC_DREGS+0x4(%a6)
   12473 	rts
   12474 sdregw2:
   12475 	mov.w		%d0,%d2
   12476 	rts
   12477 sdregw3:
   12478 	mov.w		%d0,%d3
   12479 	rts
   12480 sdregw4:
   12481 	mov.w		%d0,%d4
   12482 	rts
   12483 sdregw5:
   12484 	mov.w		%d0,%d5
   12485 	rts
   12486 sdregw6:
   12487 	mov.w		%d0,%d6
   12488 	rts
   12489 sdregw7:
   12490 	mov.w		%d0,%d7
   12491 	rts
   12492 
   12493 #########################################################################
   12494 # XDEF ****************************************************************	#
   12495 #	store_dreg_b(): store byte to data register specified by d1	#
   12496 #									#
   12497 # XREF ****************************************************************	#
   12498 #	None								#
   12499 #									#
   12500 # INPUT ***************************************************************	#
   12501 #	d0 = byte value to store					#
   12502 #	d1 = index of register to fetch from				#
   12503 # 									#
   12504 # OUTPUT **************************************************************	#
   12505 #	(data register is updated)					#
   12506 #									#
   12507 # ALGORITHM ***********************************************************	#
   12508 #	According to the index value in d1, store the byte value	#
   12509 # in d0 to the corresponding data register. D0/D1 are on the stack	#
   12510 # while the rest are in their initial places.				#
   12511 #									#
   12512 #########################################################################
   12513 
   12514 	global		store_dreg_b
   12515 store_dreg_b:
   12516 	mov.w		(tbl_sdregb.b,%pc,%d1.w*2),%d1
   12517 	jmp		(tbl_sdregb.b,%pc,%d1.w*1)
   12518 
   12519 tbl_sdregb:
   12520 	short		sdregb0 - tbl_sdregb
   12521 	short		sdregb1 - tbl_sdregb
   12522 	short		sdregb2 - tbl_sdregb
   12523 	short		sdregb3 - tbl_sdregb
   12524 	short		sdregb4 - tbl_sdregb
   12525 	short		sdregb5 - tbl_sdregb
   12526 	short		sdregb6 - tbl_sdregb
   12527 	short		sdregb7 - tbl_sdregb
   12528 
   12529 sdregb0:
   12530 	mov.b		%d0,3+EXC_DREGS+0x0(%a6)
   12531 	rts
   12532 sdregb1:
   12533 	mov.b		%d0,3+EXC_DREGS+0x4(%a6)
   12534 	rts
   12535 sdregb2:
   12536 	mov.b		%d0,%d2
   12537 	rts
   12538 sdregb3:
   12539 	mov.b		%d0,%d3
   12540 	rts
   12541 sdregb4:
   12542 	mov.b		%d0,%d4
   12543 	rts
   12544 sdregb5:
   12545 	mov.b		%d0,%d5
   12546 	rts
   12547 sdregb6:
   12548 	mov.b		%d0,%d6
   12549 	rts
   12550 sdregb7:
   12551 	mov.b		%d0,%d7
   12552 	rts
   12553 
   12554 #########################################################################
   12555 # XDEF ****************************************************************	#
   12556 #	inc_areg(): increment an address register by the value in d0	#
   12557 #									#
   12558 # XREF ****************************************************************	#
   12559 #	None								#
   12560 #									#
   12561 # INPUT ***************************************************************	#
   12562 #	d0 = amount to increment by					#
   12563 #	d1 = index of address register to increment			#
   12564 # 									#
   12565 # OUTPUT **************************************************************	#
   12566 #	(address register is updated)					#
   12567 #									#
   12568 # ALGORITHM ***********************************************************	#
   12569 # 	Typically used for an instruction w/ a post-increment <ea>, 	#
   12570 # this routine adds the increment value in d0 to the address register	#
   12571 # specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside	#
   12572 # in their original places.						#
   12573 # 	For a7, if the increment amount is one, then we have to 	#
   12574 # increment by two. For any a7 update, set the mia7_flag so that if	#
   12575 # an access error exception occurs later in emulation, this address	#
   12576 # register update can be undone.					#
   12577 #									#
   12578 #########################################################################
   12579 
   12580 	global		inc_areg
   12581 inc_areg:
   12582 	mov.w		(tbl_iareg.b,%pc,%d1.w*2),%d1
   12583 	jmp		(tbl_iareg.b,%pc,%d1.w*1)
   12584 
   12585 tbl_iareg:
   12586 	short		iareg0 - tbl_iareg
   12587 	short		iareg1 - tbl_iareg
   12588 	short		iareg2 - tbl_iareg
   12589 	short		iareg3 - tbl_iareg
   12590 	short		iareg4 - tbl_iareg
   12591 	short		iareg5 - tbl_iareg
   12592 	short		iareg6 - tbl_iareg
   12593 	short		iareg7 - tbl_iareg
   12594 
   12595 iareg0:	add.l		%d0,EXC_DREGS+0x8(%a6)
   12596 	rts
   12597 iareg1:	add.l		%d0,EXC_DREGS+0xc(%a6)
   12598 	rts
   12599 iareg2:	add.l		%d0,%a2
   12600 	rts
   12601 iareg3:	add.l		%d0,%a3
   12602 	rts
   12603 iareg4:	add.l		%d0,%a4
   12604 	rts
   12605 iareg5:	add.l		%d0,%a5
   12606 	rts
   12607 iareg6:	add.l		%d0,(%a6)
   12608 	rts
   12609 iareg7:	mov.b		&mia7_flg,SPCOND_FLG(%a6)
   12610 	cmpi.b		%d0,&0x1
   12611 	beq.b		iareg7b
   12612 	add.l		%d0,EXC_A7(%a6)
   12613 	rts
   12614 iareg7b:
   12615 	addq.l		&0x2,EXC_A7(%a6)
   12616 	rts
   12617 
   12618 #########################################################################
   12619 # XDEF ****************************************************************	#
   12620 #	dec_areg(): decrement an address register by the value in d0	#
   12621 #									#
   12622 # XREF ****************************************************************	#
   12623 #	None								#
   12624 #									#
   12625 # INPUT ***************************************************************	#
   12626 #	d0 = amount to decrement by					#
   12627 #	d1 = index of address register to decrement			#
   12628 # 									#
   12629 # OUTPUT **************************************************************	#
   12630 #	(address register is updated)					#
   12631 #									#
   12632 # ALGORITHM ***********************************************************	#
   12633 # 	Typically used for an instruction w/ a pre-decrement <ea>, 	#
   12634 # this routine adds the decrement value in d0 to the address register	#
   12635 # specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside	#
   12636 # in their original places.						#
   12637 # 	For a7, if the decrement amount is one, then we have to 	#
   12638 # decrement by two. For any a7 update, set the mda7_flag so that if	#
   12639 # an access error exception occurs later in emulation, this address	#
   12640 # register update can be undone.					#
   12641 #									#
   12642 #########################################################################
   12643 
   12644 	global		dec_areg
   12645 dec_areg:
   12646 	mov.w		(tbl_dareg.b,%pc,%d1.w*2),%d1
   12647 	jmp		(tbl_dareg.b,%pc,%d1.w*1)
   12648 
   12649 tbl_dareg:
   12650 	short		dareg0 - tbl_dareg
   12651 	short		dareg1 - tbl_dareg
   12652 	short		dareg2 - tbl_dareg
   12653 	short		dareg3 - tbl_dareg
   12654 	short		dareg4 - tbl_dareg
   12655 	short		dareg5 - tbl_dareg
   12656 	short		dareg6 - tbl_dareg
   12657 	short		dareg7 - tbl_dareg
   12658 
   12659 dareg0:	sub.l		%d0,EXC_DREGS+0x8(%a6)
   12660 	rts
   12661 dareg1:	sub.l		%d0,EXC_DREGS+0xc(%a6)
   12662 	rts
   12663 dareg2:	sub.l		%d0,%a2
   12664 	rts
   12665 dareg3:	sub.l		%d0,%a3
   12666 	rts
   12667 dareg4:	sub.l		%d0,%a4
   12668 	rts
   12669 dareg5:	sub.l		%d0,%a5
   12670 	rts
   12671 dareg6:	sub.l		%d0,(%a6)
   12672 	rts
   12673 dareg7:	mov.b		&mda7_flg,SPCOND_FLG(%a6)
   12674 	cmpi.b		%d0,&0x1
   12675 	beq.b		dareg7b
   12676 	sub.l		%d0,EXC_A7(%a6)
   12677 	rts
   12678 dareg7b:
   12679 	subq.l		&0x2,EXC_A7(%a6)
   12680 	rts
   12681 
   12682 ##############################################################################
   12683 
   12684 #########################################################################
   12685 # XDEF ****************************************************************	#
   12686 #	load_fpn1(): load FP register value into FP_SRC(a6).		#
   12687 #									#
   12688 # XREF ****************************************************************	#
   12689 #	None								#
   12690 #									#
   12691 # INPUT ***************************************************************	#
   12692 #	d0 = index of FP register to load				#
   12693 # 									#
   12694 # OUTPUT **************************************************************	#
   12695 #	FP_SRC(a6) = value loaded from FP register file			#
   12696 #									#
   12697 # ALGORITHM ***********************************************************	#
   12698 #	Using the index in d0, load FP_SRC(a6) with a number from the 	#
   12699 # FP register file.							#
   12700 #									#
   12701 #########################################################################
   12702 
   12703 	global 		load_fpn1
   12704 load_fpn1:
   12705 	mov.w		(tbl_load_fpn1.b,%pc,%d0.w*2), %d0
   12706 	jmp		(tbl_load_fpn1.b,%pc,%d0.w*1)
   12707 
   12708 tbl_load_fpn1:
   12709 	short		load_fpn1_0 - tbl_load_fpn1
   12710 	short		load_fpn1_1 - tbl_load_fpn1
   12711 	short		load_fpn1_2 - tbl_load_fpn1
   12712 	short		load_fpn1_3 - tbl_load_fpn1
   12713 	short		load_fpn1_4 - tbl_load_fpn1
   12714 	short		load_fpn1_5 - tbl_load_fpn1
   12715 	short		load_fpn1_6 - tbl_load_fpn1
   12716 	short		load_fpn1_7 - tbl_load_fpn1
   12717 
   12718 load_fpn1_0:
   12719 	mov.l		0+EXC_FP0(%a6), 0+FP_SRC(%a6)
   12720 	mov.l		4+EXC_FP0(%a6), 4+FP_SRC(%a6)
   12721 	mov.l		8+EXC_FP0(%a6), 8+FP_SRC(%a6)
   12722 	lea		FP_SRC(%a6), %a0
   12723 	rts
   12724 load_fpn1_1:
   12725 	mov.l		0+EXC_FP1(%a6), 0+FP_SRC(%a6)
   12726 	mov.l		4+EXC_FP1(%a6), 4+FP_SRC(%a6)
   12727 	mov.l		8+EXC_FP1(%a6), 8+FP_SRC(%a6)
   12728 	lea		FP_SRC(%a6), %a0
   12729 	rts
   12730 load_fpn1_2:
   12731 	fmovm.x		&0x20, FP_SRC(%a6)
   12732 	lea		FP_SRC(%a6), %a0
   12733 	rts
   12734 load_fpn1_3:
   12735 	fmovm.x		&0x10, FP_SRC(%a6)
   12736 	lea		FP_SRC(%a6), %a0
   12737 	rts
   12738 load_fpn1_4:
   12739 	fmovm.x		&0x08, FP_SRC(%a6)
   12740 	lea		FP_SRC(%a6), %a0
   12741 	rts
   12742 load_fpn1_5:
   12743 	fmovm.x		&0x04, FP_SRC(%a6)
   12744 	lea		FP_SRC(%a6), %a0
   12745 	rts
   12746 load_fpn1_6:
   12747 	fmovm.x		&0x02, FP_SRC(%a6)
   12748 	lea		FP_SRC(%a6), %a0
   12749 	rts
   12750 load_fpn1_7:
   12751 	fmovm.x		&0x01, FP_SRC(%a6)
   12752 	lea		FP_SRC(%a6), %a0
   12753 	rts
   12754 
   12755 #############################################################################
   12756 
   12757 #########################################################################
   12758 # XDEF ****************************************************************	#
   12759 #	load_fpn2(): load FP register value into FP_DST(a6).		#
   12760 #									#
   12761 # XREF ****************************************************************	#
   12762 #	None								#
   12763 #									#
   12764 # INPUT ***************************************************************	#
   12765 #	d0 = index of FP register to load				#
   12766 # 									#
   12767 # OUTPUT **************************************************************	#
   12768 #	FP_DST(a6) = value loaded from FP register file			#
   12769 #									#
   12770 # ALGORITHM ***********************************************************	#
   12771 #	Using the index in d0, load FP_DST(a6) with a number from the 	#
   12772 # FP register file.							#
   12773 #									#
   12774 #########################################################################
   12775 
   12776 	global		load_fpn2
   12777 load_fpn2:
   12778 	mov.w		(tbl_load_fpn2.b,%pc,%d0.w*2), %d0
   12779 	jmp		(tbl_load_fpn2.b,%pc,%d0.w*1)
   12780 
   12781 tbl_load_fpn2:
   12782 	short		load_fpn2_0 - tbl_load_fpn2
   12783 	short		load_fpn2_1 - tbl_load_fpn2
   12784 	short		load_fpn2_2 - tbl_load_fpn2
   12785 	short		load_fpn2_3 - tbl_load_fpn2
   12786 	short		load_fpn2_4 - tbl_load_fpn2
   12787 	short		load_fpn2_5 - tbl_load_fpn2
   12788 	short		load_fpn2_6 - tbl_load_fpn2
   12789 	short		load_fpn2_7 - tbl_load_fpn2
   12790 
   12791 load_fpn2_0:
   12792 	mov.l		0+EXC_FP0(%a6), 0+FP_DST(%a6)
   12793 	mov.l		4+EXC_FP0(%a6), 4+FP_DST(%a6)
   12794 	mov.l		8+EXC_FP0(%a6), 8+FP_DST(%a6)
   12795 	lea		FP_DST(%a6), %a0
   12796 	rts
   12797 load_fpn2_1:
   12798 	mov.l		0+EXC_FP1(%a6), 0+FP_DST(%a6)
   12799 	mov.l		4+EXC_FP1(%a6), 4+FP_DST(%a6)
   12800 	mov.l		8+EXC_FP1(%a6), 8+FP_DST(%a6)
   12801 	lea		FP_DST(%a6), %a0
   12802 	rts
   12803 load_fpn2_2:
   12804 	fmovm.x		&0x20, FP_DST(%a6)
   12805 	lea		FP_DST(%a6), %a0
   12806 	rts
   12807 load_fpn2_3:
   12808 	fmovm.x		&0x10, FP_DST(%a6)
   12809 	lea		FP_DST(%a6), %a0
   12810 	rts
   12811 load_fpn2_4:
   12812 	fmovm.x		&0x08, FP_DST(%a6)
   12813 	lea		FP_DST(%a6), %a0
   12814 	rts
   12815 load_fpn2_5:
   12816 	fmovm.x		&0x04, FP_DST(%a6)
   12817 	lea		FP_DST(%a6), %a0
   12818 	rts
   12819 load_fpn2_6:
   12820 	fmovm.x		&0x02, FP_DST(%a6)
   12821 	lea		FP_DST(%a6), %a0
   12822 	rts
   12823 load_fpn2_7:
   12824 	fmovm.x		&0x01, FP_DST(%a6)
   12825 	lea		FP_DST(%a6), %a0
   12826 	rts
   12827 
   12828 #############################################################################
   12829 
   12830 #########################################################################
   12831 # XDEF ****************************************************************	#
   12832 # 	store_fpreg(): store an fp value to the fpreg designated d0.	#
   12833 #									#
   12834 # XREF ****************************************************************	#
   12835 #	None								#
   12836 #									#
   12837 # INPUT ***************************************************************	#
   12838 #	fp0 = extended precision value to store				#
   12839 #	d0  = index of floating-point register				#
   12840 # 									#
   12841 # OUTPUT **************************************************************	#
   12842 #	None								#
   12843 #									#
   12844 # ALGORITHM ***********************************************************	#
   12845 #	Store the value in fp0 to the FP register designated by the	#
   12846 # value in d0. The FP number can be DENORM or SNAN so we have to be	#
   12847 # careful that we don't take an exception here.				#
   12848 #									#
   12849 #########################################################################
   12850 
   12851 	global		store_fpreg
   12852 store_fpreg:
   12853 	mov.w		(tbl_store_fpreg.b,%pc,%d0.w*2), %d0
   12854 	jmp		(tbl_store_fpreg.b,%pc,%d0.w*1)
   12855 
   12856 tbl_store_fpreg:
   12857 	short		store_fpreg_0 - tbl_store_fpreg
   12858 	short		store_fpreg_1 - tbl_store_fpreg
   12859 	short		store_fpreg_2 - tbl_store_fpreg
   12860 	short		store_fpreg_3 - tbl_store_fpreg
   12861 	short		store_fpreg_4 - tbl_store_fpreg
   12862 	short		store_fpreg_5 - tbl_store_fpreg
   12863 	short		store_fpreg_6 - tbl_store_fpreg
   12864 	short		store_fpreg_7 - tbl_store_fpreg
   12865 
   12866 store_fpreg_0:
   12867 	fmovm.x		&0x80, EXC_FP0(%a6)
   12868 	rts
   12869 store_fpreg_1:
   12870 	fmovm.x		&0x80, EXC_FP1(%a6)
   12871 	rts
   12872 store_fpreg_2:
   12873 	fmovm.x 	&0x01, -(%sp)
   12874 	fmovm.x		(%sp)+, &0x20
   12875 	rts
   12876 store_fpreg_3:
   12877 	fmovm.x 	&0x01, -(%sp)
   12878 	fmovm.x		(%sp)+, &0x10
   12879 	rts
   12880 store_fpreg_4:
   12881 	fmovm.x 	&0x01, -(%sp)
   12882 	fmovm.x		(%sp)+, &0x08
   12883 	rts
   12884 store_fpreg_5:
   12885 	fmovm.x 	&0x01, -(%sp)
   12886 	fmovm.x		(%sp)+, &0x04
   12887 	rts
   12888 store_fpreg_6:
   12889 	fmovm.x 	&0x01, -(%sp)
   12890 	fmovm.x		(%sp)+, &0x02
   12891 	rts
   12892 store_fpreg_7:
   12893 	fmovm.x 	&0x01, -(%sp)
   12894 	fmovm.x		(%sp)+, &0x01
   12895 	rts
   12896 
   12897 #########################################################################
   12898 # XDEF ****************************************************************	#
   12899 #	get_packed(): fetch a packed operand from memory and then	#
   12900 #		      convert it to a floating-point binary number.	#
   12901 #									#
   12902 # XREF ****************************************************************	#
   12903 #	_dcalc_ea() - calculate the correct <ea>			#
   12904 #	_mem_read() - fetch the packed operand from memory		#
   12905 #	facc_in_x() - the fetch failed so jump to special exit code	#
   12906 #	decbin()    - convert packed to binary extended precision	#
   12907 #									#
   12908 # INPUT ***************************************************************	#
   12909 #	None								#
   12910 # 									#
   12911 # OUTPUT **************************************************************	#
   12912 #	If no failure on _mem_read():					#
   12913 # 	FP_SRC(a6) = packed operand now as a binary FP number		#
   12914 #									#
   12915 # ALGORITHM ***********************************************************	#
   12916 #	Get the correct <ea> whihc is the value on the exception stack 	#
   12917 # frame w/ maybe a correction factor if the <ea> is -(an) or (an)+.	#
   12918 # Then, fetch the operand from memory. If the fetch fails, exit		#
   12919 # through facc_in_x().							#
   12920 #	If the packed operand is a ZERO,NAN, or INF, convert it to	#
   12921 # its binary representation here. Else, call decbin() which will 	#
   12922 # convert the packed value to an extended precision binary value.	#
   12923 #									#
   12924 #########################################################################
   12925 
   12926 # the stacked <ea> for packed is correct except for -(An).
   12927 # the base reg must be updated for both -(An) and (An)+.
   12928 	global		get_packed
   12929 get_packed:
   12930 	mov.l		&0xc,%d0		# packed is 12 bytes
   12931 	bsr.l		_dcalc_ea		# fetch <ea>; correct An
   12932 
   12933 	lea		FP_SRC(%a6),%a1		# pass: ptr to super dst
   12934 	mov.l		&0xc,%d0		# pass: 12 bytes
   12935 	bsr.l		_dmem_read		# read packed operand
   12936 
   12937 	tst.l		%d1			# did dfetch fail?
   12938 	bne.l		facc_in_x		# yes
   12939 
   12940 # The packed operand is an INF or a NAN if the exponent field is all ones.
   12941 	bfextu		FP_SRC(%a6){&1:&15},%d0	# get exp
   12942 	cmpi.w		%d0,&0x7fff		# INF or NAN?
   12943 	bne.b		gp_try_zero		# no
   12944 	rts					# operand is an INF or NAN
   12945 
   12946 # The packed operand is a zero if the mantissa is all zero, else it's
   12947 # a normal packed op.
   12948 gp_try_zero:
   12949 	mov.b		3+FP_SRC(%a6),%d0	# get byte 4
   12950 	andi.b		&0x0f,%d0		# clear all but last nybble
   12951 	bne.b		gp_not_spec		# not a zero
   12952 	tst.l		FP_SRC_HI(%a6)		# is lw 2 zero?
   12953 	bne.b		gp_not_spec		# not a zero
   12954 	tst.l		FP_SRC_LO(%a6)		# is lw 3 zero?
   12955 	bne.b		gp_not_spec		# not a zero
   12956 	rts					# operand is a ZERO
   12957 gp_not_spec:
   12958 	lea		FP_SRC(%a6),%a0		# pass: ptr to packed op
   12959 	bsr.l		decbin			# convert to extended
   12960 	fmovm.x		&0x80,FP_SRC(%a6)	# make this the srcop
   12961 	rts
   12962 
   12963 #########################################################################
   12964 # decbin(): Converts normalized packed bcd value pointed to by register	#
   12965 #	    a0 to extended-precision value in fp0.			#
   12966 #									#
   12967 # INPUT ***************************************************************	#
   12968 #	a0 = pointer to normalized packed bcd value			#
   12969 #									#
   12970 # OUTPUT **************************************************************	#
   12971 #	fp0 = exact fp representation of the packed bcd value.		#
   12972 #									#
   12973 # ALGORITHM ***********************************************************	#
   12974 #	Expected is a normal bcd (i.e. non-exceptional; all inf, zero,	#
   12975 #	and NaN operands are dispatched without entering this routine)	#
   12976 #	value in 68881/882 format at location (a0).			#
   12977 #									#
   12978 #	A1. Convert the bcd exponent to binary by successive adds and 	#
   12979 #	muls. Set the sign according to SE. Subtract 16 to compensate	#
   12980 #	for the mantissa which is to be interpreted as 17 integer	#
   12981 #	digits, rather than 1 integer and 16 fraction digits.		#
   12982 #	Note: this operation can never overflow.			#
   12983 #									#
   12984 #	A2. Convert the bcd mantissa to binary by successive		#
   12985 #	adds and muls in FP0. Set the sign according to SM.		#
   12986 #	The mantissa digits will be converted with the decimal point	#
   12987 #	assumed following the least-significant digit.			#
   12988 #	Note: this operation can never overflow.			#
   12989 #									#
   12990 #	A3. Count the number of leading/trailing zeros in the		#
   12991 #	bcd string.  If SE is positive, count the leading zeros;	#
   12992 #	if negative, count the trailing zeros.  Set the adjusted	#
   12993 #	exponent equal to the exponent from A1 and the zero count	#
   12994 #	added if SM = 1 and subtracted if SM = 0.  Scale the		#
   12995 #	mantissa the equivalent of forcing in the bcd value:		#
   12996 #									#
   12997 #	SM = 0	a non-zero digit in the integer position		#
   12998 #	SM = 1	a non-zero digit in Mant0, lsd of the fraction		#
   12999 #									#
   13000 #	this will insure that any value, regardless of its		#
   13001 #	representation (ex. 0.1E2, 1E1, 10E0, 100E-1), is converted	#
   13002 #	consistently.							#
   13003 #									#
   13004 #	A4. Calculate the factor 10^exp in FP1 using a table of		#
   13005 #	10^(2^n) values.  To reduce the error in forming factors	#
   13006 #	greater than 10^27, a directed rounding scheme is used with	#
   13007 #	tables rounded to RN, RM, and RP, according to the table	#
   13008 #	in the comments of the pwrten section.				#
   13009 #									#
   13010 #	A5. Form the final binary number by scaling the mantissa by	#
   13011 #	the exponent factor.  This is done by multiplying the		#
   13012 #	mantissa in FP0 by the factor in FP1 if the adjusted		#
   13013 #	exponent sign is positive, and dividing FP0 by FP1 if		#
   13014 #	it is negative.							#
   13015 #									#
   13016 #	Clean up and return. Check if the final mul or div was inexact.	#
   13017 #	If so, set INEX1 in USER_FPSR.					#
   13018 #									#
   13019 #########################################################################
   13020 
   13021 #
   13022 #	PTENRN, PTENRM, and PTENRP are arrays of powers of 10 rounded
   13023 #	to nearest, minus, and plus, respectively.  The tables include
   13024 #	10**{1,2,4,8,16,32,64,128,256,512,1024,2048,4096}.  No rounding
   13025 #	is required until the power is greater than 27, however, all
   13026 #	tables include the first 5 for ease of indexing.
   13027 #
   13028 RTABLE:
   13029 	byte		0,0,0,0
   13030 	byte		2,3,2,3
   13031 	byte		2,3,3,2
   13032 	byte		3,2,2,3
   13033 
   13034 	set		FNIBS,7
   13035 	set		FSTRT,0
   13036 
   13037 	set		ESTRT,4
   13038 	set		EDIGITS,2
   13039 
   13040 	global		decbin
   13041 decbin:
   13042 	mov.l		0x0(%a0),FP_SCR0_EX(%a6) # make a copy of input
   13043 	mov.l		0x4(%a0),FP_SCR0_HI(%a6) # so we don't alter it
   13044 	mov.l		0x8(%a0),FP_SCR0_LO(%a6)
   13045 
   13046 	lea		FP_SCR0(%a6),%a0
   13047 
   13048 	movm.l		&0x3c00,-(%sp)		# save d2-d5
   13049 	fmovm.x		&0x1,-(%sp)		# save fp1
   13050 #
   13051 # Calculate exponent:
   13052 #  1. Copy bcd value in memory for use as a working copy.
   13053 #  2. Calculate absolute value of exponent in d1 by mul and add.
   13054 #  3. Correct for exponent sign.
   13055 #  4. Subtract 16 to compensate for interpreting the mant as all integer digits.
   13056 #     (i.e., all digits assumed left of the decimal point.)
   13057 #
   13058 # Register usage:
   13059 #
   13060 #  calc_e:
   13061 #	(*)  d0: temp digit storage
   13062 #	(*)  d1: accumulator for binary exponent
   13063 #	(*)  d2: digit count
   13064 #	(*)  d3: offset pointer
   13065 #	( )  d4: first word of bcd
   13066 #	( )  a0: pointer to working bcd value
   13067 #	( )  a6: pointer to original bcd value
   13068 #	(*)  FP_SCR1: working copy of original bcd value
   13069 #	(*)  L_SCR1: copy of original exponent word
   13070 #
   13071 calc_e:
   13072 	mov.l		&EDIGITS,%d2		# # of nibbles (digits) in fraction part
   13073 	mov.l		&ESTRT,%d3		# counter to pick up digits
   13074 	mov.l		(%a0),%d4		# get first word of bcd
   13075 	clr.l		%d1			# zero d1 for accumulator
   13076 e_gd:
   13077 	mulu.l		&0xa,%d1		# mul partial product by one digit place
   13078 	bfextu		%d4{%d3:&4},%d0		# get the digit and zero extend into d0
   13079 	add.l		%d0,%d1			# d1 = d1 + d0
   13080 	addq.b		&4,%d3			# advance d3 to the next digit
   13081 	dbf.w		%d2,e_gd		# if we have used all 3 digits, exit loop
   13082 	btst		&30,%d4			# get SE
   13083 	beq.b		e_pos			# don't negate if pos
   13084 	neg.l		%d1			# negate before subtracting
   13085 e_pos:
   13086 	sub.l		&16,%d1			# sub to compensate for shift of mant
   13087 	bge.b		e_save			# if still pos, do not neg
   13088 	neg.l		%d1			# now negative, make pos and set SE
   13089 	or.l		&0x40000000,%d4		# set SE in d4,
   13090 	or.l		&0x40000000,(%a0)	# and in working bcd
   13091 e_save:
   13092 	mov.l		%d1,-(%sp)		# save exp on stack
   13093 #
   13094 #
   13095 # Calculate mantissa:
   13096 #  1. Calculate absolute value of mantissa in fp0 by mul and add.
   13097 #  2. Correct for mantissa sign.
   13098 #     (i.e., all digits assumed left of the decimal point.)
   13099 #
   13100 # Register usage:
   13101 #
   13102 #  calc_m:
   13103 #	(*)  d0: temp digit storage
   13104 #	(*)  d1: lword counter
   13105 #	(*)  d2: digit count
   13106 #	(*)  d3: offset pointer
   13107 #	( )  d4: words 2 and 3 of bcd
   13108 #	( )  a0: pointer to working bcd value
   13109 #	( )  a6: pointer to original bcd value
   13110 #	(*) fp0: mantissa accumulator
   13111 #	( )  FP_SCR1: working copy of original bcd value
   13112 #	( )  L_SCR1: copy of original exponent word
   13113 #
   13114 calc_m:
   13115 	mov.l		&1,%d1			# word counter, init to 1
   13116 	fmov.s		&0x00000000,%fp0	# accumulator
   13117 #
   13118 #
   13119 #  Since the packed number has a long word between the first & second parts,
   13120 #  get the integer digit then skip down & get the rest of the
   13121 #  mantissa.  We will unroll the loop once.
   13122 #
   13123 	bfextu		(%a0){&28:&4},%d0	# integer part is ls digit in long word
   13124 	fadd.b		%d0,%fp0		# add digit to sum in fp0
   13125 #
   13126 #
   13127 #  Get the rest of the mantissa.
   13128 #
   13129 loadlw:
   13130 	mov.l		(%a0,%d1.L*4),%d4	# load mantissa lonqword into d4
   13131 	mov.l		&FSTRT,%d3		# counter to pick up digits
   13132 	mov.l		&FNIBS,%d2		# reset number of digits per a0 ptr
   13133 md2b:
   13134 	fmul.s		&0x41200000,%fp0	# fp0 = fp0 * 10
   13135 	bfextu		%d4{%d3:&4},%d0		# get the digit and zero extend
   13136 	fadd.b		%d0,%fp0		# fp0 = fp0 + digit
   13137 #
   13138 #
   13139 #  If all the digits (8) in that long word have been converted (d2=0),
   13140 #  then inc d1 (=2) to point to the next long word and reset d3 to 0
   13141 #  to initialize the digit offset, and set d2 to 7 for the digit count;
   13142 #  else continue with this long word.
   13143 #
   13144 	addq.b		&4,%d3			# advance d3 to the next digit
   13145 	dbf.w		%d2,md2b		# check for last digit in this lw
   13146 nextlw:
   13147 	addq.l		&1,%d1			# inc lw pointer in mantissa
   13148 	cmp.l		%d1,&2			# test for last lw
   13149 	ble.b		loadlw			# if not, get last one
   13150 #
   13151 #  Check the sign of the mant and make the value in fp0 the same sign.
   13152 #
   13153 m_sign:
   13154 	btst		&31,(%a0)		# test sign of the mantissa
   13155 	beq.b		ap_st_z			# if clear, go to append/strip zeros
   13156 	fneg.x		%fp0			# if set, negate fp0
   13157 #
   13158 # Append/strip zeros:
   13159 #
   13160 #  For adjusted exponents which have an absolute value greater than 27*,
   13161 #  this routine calculates the amount needed to normalize the mantissa
   13162 #  for the adjusted exponent.  That number is subtracted from the exp
   13163 #  if the exp was positive, and added if it was negative.  The purpose
   13164 #  of this is to reduce the value of the exponent and the possibility
   13165 #  of error in calculation of pwrten.
   13166 #
   13167 #  1. Branch on the sign of the adjusted exponent.
   13168 #  2p.(positive exp)
   13169 #   2. Check M16 and the digits in lwords 2 and 3 in decending order.
   13170 #   3. Add one for each zero encountered until a non-zero digit.
   13171 #   4. Subtract the count from the exp.
   13172 #   5. Check if the exp has crossed zero in #3 above; make the exp abs
   13173 #	   and set SE.
   13174 #	6. Multiply the mantissa by 10**count.
   13175 #  2n.(negative exp)
   13176 #   2. Check the digits in lwords 3 and 2 in decending order.
   13177 #   3. Add one for each zero encountered until a non-zero digit.
   13178 #   4. Add the count to the exp.
   13179 #   5. Check if the exp has crossed zero in #3 above; clear SE.
   13180 #   6. Divide the mantissa by 10**count.
   13181 #
   13182 #  *Why 27?  If the adjusted exponent is within -28 < expA < 28, than
   13183 #   any adjustment due to append/strip zeros will drive the resultane
   13184 #   exponent towards zero.  Since all pwrten constants with a power
   13185 #   of 27 or less are exact, there is no need to use this routine to
   13186 #   attempt to lessen the resultant exponent.
   13187 #
   13188 # Register usage:
   13189 #
   13190 #  ap_st_z:
   13191 #	(*)  d0: temp digit storage
   13192 #	(*)  d1: zero count
   13193 #	(*)  d2: digit count
   13194 #	(*)  d3: offset pointer
   13195 #	( )  d4: first word of bcd
   13196 #	(*)  d5: lword counter
   13197 #	( )  a0: pointer to working bcd value
   13198 #	( )  FP_SCR1: working copy of original bcd value
   13199 #	( )  L_SCR1: copy of original exponent word
   13200 #
   13201 #
   13202 # First check the absolute value of the exponent to see if this
   13203 # routine is necessary.  If so, then check the sign of the exponent
   13204 # and do append (+) or strip (-) zeros accordingly.
   13205 # This section handles a positive adjusted exponent.
   13206 #
   13207 ap_st_z:
   13208 	mov.l		(%sp),%d1		# load expA for range test
   13209 	cmp.l		%d1,&27			# test is with 27
   13210 	ble.w		pwrten			# if abs(expA) <28, skip ap/st zeros
   13211 	btst		&30,(%a0)		# check sign of exp
   13212 	bne.b		ap_st_n			# if neg, go to neg side
   13213 	clr.l		%d1			# zero count reg
   13214 	mov.l		(%a0),%d4		# load lword 1 to d4
   13215 	bfextu		%d4{&28:&4},%d0		# get M16 in d0
   13216 	bne.b		ap_p_fx			# if M16 is non-zero, go fix exp
   13217 	addq.l		&1,%d1			# inc zero count
   13218 	mov.l		&1,%d5			# init lword counter
   13219 	mov.l		(%a0,%d5.L*4),%d4	# get lword 2 to d4
   13220 	bne.b		ap_p_cl			# if lw 2 is zero, skip it
   13221 	addq.l		&8,%d1			# and inc count by 8
   13222 	addq.l		&1,%d5			# inc lword counter
   13223 	mov.l		(%a0,%d5.L*4),%d4	# get lword 3 to d4
   13224 ap_p_cl:
   13225 	clr.l		%d3			# init offset reg
   13226 	mov.l		&7,%d2			# init digit counter
   13227 ap_p_gd:
   13228 	bfextu		%d4{%d3:&4},%d0		# get digit
   13229 	bne.b		ap_p_fx			# if non-zero, go to fix exp
   13230 	addq.l		&4,%d3			# point to next digit
   13231 	addq.l		&1,%d1			# inc digit counter
   13232 	dbf.w		%d2,ap_p_gd		# get next digit
   13233 ap_p_fx:
   13234 	mov.l		%d1,%d0			# copy counter to d2
   13235 	mov.l		(%sp),%d1		# get adjusted exp from memory
   13236 	sub.l		%d0,%d1			# subtract count from exp
   13237 	bge.b		ap_p_fm			# if still pos, go to pwrten
   13238 	neg.l		%d1			# now its neg; get abs
   13239 	mov.l		(%a0),%d4		# load lword 1 to d4
   13240 	or.l		&0x40000000,%d4		# and set SE in d4
   13241 	or.l		&0x40000000,(%a0)	# and in memory
   13242 #
   13243 # Calculate the mantissa multiplier to compensate for the striping of
   13244 # zeros from the mantissa.
   13245 #
   13246 ap_p_fm:
   13247 	lea.l		PTENRN(%pc),%a1		# get address of power-of-ten table
   13248 	clr.l		%d3			# init table index
   13249 	fmov.s		&0x3f800000,%fp1	# init fp1 to 1
   13250 	mov.l		&3,%d2			# init d2 to count bits in counter
   13251 ap_p_el:
   13252 	asr.l		&1,%d0			# shift lsb into carry
   13253 	bcc.b		ap_p_en			# if 1, mul fp1 by pwrten factor
   13254 	fmul.x		(%a1,%d3),%fp1		# mul by 10**(d3_bit_no)
   13255 ap_p_en:
   13256 	add.l		&12,%d3			# inc d3 to next rtable entry
   13257 	tst.l		%d0			# check if d0 is zero
   13258 	bne.b		ap_p_el			# if not, get next bit
   13259 	fmul.x		%fp1,%fp0		# mul mantissa by 10**(no_bits_shifted)
   13260 	bra.b		pwrten			# go calc pwrten
   13261 #
   13262 # This section handles a negative adjusted exponent.
   13263 #
   13264 ap_st_n:
   13265 	clr.l		%d1			# clr counter
   13266 	mov.l		&2,%d5			# set up d5 to point to lword 3
   13267 	mov.l		(%a0,%d5.L*4),%d4	# get lword 3
   13268 	bne.b		ap_n_cl			# if not zero, check digits
   13269 	sub.l		&1,%d5			# dec d5 to point to lword 2
   13270 	addq.l		&8,%d1			# inc counter by 8
   13271 	mov.l		(%a0,%d5.L*4),%d4	# get lword 2
   13272 ap_n_cl:
   13273 	mov.l		&28,%d3			# point to last digit
   13274 	mov.l		&7,%d2			# init digit counter
   13275 ap_n_gd:
   13276 	bfextu		%d4{%d3:&4},%d0		# get digit
   13277 	bne.b		ap_n_fx			# if non-zero, go to exp fix
   13278 	subq.l		&4,%d3			# point to previous digit
   13279 	addq.l		&1,%d1			# inc digit counter
   13280 	dbf.w		%d2,ap_n_gd		# get next digit
   13281 ap_n_fx:
   13282 	mov.l		%d1,%d0			# copy counter to d0
   13283 	mov.l		(%sp),%d1		# get adjusted exp from memory
   13284 	sub.l		%d0,%d1			# subtract count from exp
   13285 	bgt.b		ap_n_fm			# if still pos, go fix mantissa
   13286 	neg.l		%d1			# take abs of exp and clr SE
   13287 	mov.l		(%a0),%d4		# load lword 1 to d4
   13288 	and.l		&0xbfffffff,%d4		# and clr SE in d4
   13289 	and.l		&0xbfffffff,(%a0)	# and in memory
   13290 #
   13291 # Calculate the mantissa multiplier to compensate for the appending of
   13292 # zeros to the mantissa.
   13293 #
   13294 ap_n_fm:
   13295 	lea.l		PTENRN(%pc),%a1		# get address of power-of-ten table
   13296 	clr.l		%d3			# init table index
   13297 	fmov.s		&0x3f800000,%fp1	# init fp1 to 1
   13298 	mov.l		&3,%d2			# init d2 to count bits in counter
   13299 ap_n_el:
   13300 	asr.l		&1,%d0			# shift lsb into carry
   13301 	bcc.b		ap_n_en			# if 1, mul fp1 by pwrten factor
   13302 	fmul.x		(%a1,%d3),%fp1		# mul by 10**(d3_bit_no)
   13303 ap_n_en:
   13304 	add.l		&12,%d3			# inc d3 to next rtable entry
   13305 	tst.l		%d0			# check if d0 is zero
   13306 	bne.b		ap_n_el			# if not, get next bit
   13307 	fdiv.x		%fp1,%fp0		# div mantissa by 10**(no_bits_shifted)
   13308 #
   13309 #
   13310 # Calculate power-of-ten factor from adjusted and shifted exponent.
   13311 #
   13312 # Register usage:
   13313 #
   13314 #  pwrten:
   13315 #	(*)  d0: temp
   13316 #	( )  d1: exponent
   13317 #	(*)  d2: {FPCR[6:5],SM,SE} as index in RTABLE; temp
   13318 #	(*)  d3: FPCR work copy
   13319 #	( )  d4: first word of bcd
   13320 #	(*)  a1: RTABLE pointer
   13321 #  calc_p:
   13322 #	(*)  d0: temp
   13323 #	( )  d1: exponent
   13324 #	(*)  d3: PWRTxx table index
   13325 #	( )  a0: pointer to working copy of bcd
   13326 #	(*)  a1: PWRTxx pointer
   13327 #	(*) fp1: power-of-ten accumulator
   13328 #
   13329 # Pwrten calculates the exponent factor in the selected rounding mode
   13330 # according to the following table:
   13331 #
   13332 #	Sign of Mant  Sign of Exp  Rounding Mode  PWRTEN Rounding Mode
   13333 #
   13334 #	ANY	  ANY	RN	RN
   13335 #
   13336 #	 +	   +	RP	RP
   13337 #	 -	   +	RP	RM
   13338 #	 +	   -	RP	RM
   13339 #	 -	   -	RP	RP
   13340 #
   13341 #	 +	   +	RM	RM
   13342 #	 -	   +	RM	RP
   13343 #	 +	   -	RM	RP
   13344 #	 -	   -	RM	RM
   13345 #
   13346 #	 +	   +	RZ	RM
   13347 #	 -	   +	RZ	RM
   13348 #	 +	   -	RZ	RP
   13349 #	 -	   -	RZ	RP
   13350 #
   13351 #
   13352 pwrten:
   13353 	mov.l		USER_FPCR(%a6),%d3	# get user's FPCR
   13354 	bfextu		%d3{&26:&2},%d2		# isolate rounding mode bits
   13355 	mov.l		(%a0),%d4		# reload 1st bcd word to d4
   13356 	asl.l		&2,%d2			# format d2 to be
   13357 	bfextu		%d4{&0:&2},%d0		# {FPCR[6],FPCR[5],SM,SE}
   13358 	add.l		%d0,%d2			# in d2 as index into RTABLE
   13359 	lea.l		RTABLE(%pc),%a1		# load rtable base
   13360 	mov.b		(%a1,%d2),%d0		# load new rounding bits from table
   13361 	clr.l		%d3			# clear d3 to force no exc and extended
   13362 	bfins		%d0,%d3{&26:&2}		# stuff new rounding bits in FPCR
   13363 	fmov.l		%d3,%fpcr		# write new FPCR
   13364 	asr.l		&1,%d0			# write correct PTENxx table
   13365 	bcc.b		not_rp			# to a1
   13366 	lea.l		PTENRP(%pc),%a1		# it is RP
   13367 	bra.b		calc_p			# go to init section
   13368 not_rp:
   13369 	asr.l		&1,%d0			# keep checking
   13370 	bcc.b		not_rm
   13371 	lea.l		PTENRM(%pc),%a1		# it is RM
   13372 	bra.b		calc_p			# go to init section
   13373 not_rm:
   13374 	lea.l		PTENRN(%pc),%a1		# it is RN
   13375 calc_p:
   13376 	mov.l		%d1,%d0			# copy exp to d0;use d0
   13377 	bpl.b		no_neg			# if exp is negative,
   13378 	neg.l		%d0			# invert it
   13379 	or.l		&0x40000000,(%a0)	# and set SE bit
   13380 no_neg:
   13381 	clr.l		%d3			# table index
   13382 	fmov.s		&0x3f800000,%fp1	# init fp1 to 1
   13383 e_loop:
   13384 	asr.l		&1,%d0			# shift next bit into carry
   13385 	bcc.b		e_next			# if zero, skip the mul
   13386 	fmul.x		(%a1,%d3),%fp1		# mul by 10**(d3_bit_no)
   13387 e_next:
   13388 	add.l		&12,%d3			# inc d3 to next rtable entry
   13389 	tst.l		%d0			# check if d0 is zero
   13390 	bne.b		e_loop			# not zero, continue shifting
   13391 #
   13392 #
   13393 #  Check the sign of the adjusted exp and make the value in fp0 the
   13394 #  same sign. If the exp was pos then multiply fp1*fp0;
   13395 #  else divide fp0/fp1.
   13396 #
   13397 # Register Usage:
   13398 #  norm:
   13399 #	( )  a0: pointer to working bcd value
   13400 #	(*) fp0: mantissa accumulator
   13401 #	( ) fp1: scaling factor - 10**(abs(exp))
   13402 #
   13403 pnorm:
   13404 	btst		&30,(%a0)		# test the sign of the exponent
   13405 	beq.b		mul			# if clear, go to multiply
   13406 div:
   13407 	fdiv.x		%fp1,%fp0		# exp is negative, so divide mant by exp
   13408 	bra.b		end_dec
   13409 mul:
   13410 	fmul.x		%fp1,%fp0		# exp is positive, so multiply by exp
   13411 #
   13412 #
   13413 # Clean up and return with result in fp0.
   13414 #
   13415 # If the final mul/div in decbin incurred an inex exception,
   13416 # it will be inex2, but will be reported as inex1 by get_op.
   13417 #
   13418 end_dec:
   13419 	fmov.l		%fpsr,%d0		# get status register
   13420 	bclr		&inex2_bit+8,%d0	# test for inex2 and clear it
   13421 	beq.b		no_exc			# skip this if no exc
   13422 	ori.w		&inx1a_mask,2+USER_FPSR(%a6) # set INEX1/AINEX
   13423 no_exc:
   13424 	add.l		&0x4,%sp		# clear 1 lw param
   13425 	fmovm.x		(%sp)+,&0x40		# restore fp1
   13426 	movm.l		(%sp)+,&0x3c		# restore d2-d5
   13427 	fmov.l		&0x0,%fpcr
   13428 	fmov.l		&0x0,%fpsr
   13429 	rts
   13430 
   13431 #########################################################################
   13432 # bindec(): Converts an input in extended precision format to bcd format#
   13433 #									#
   13434 # INPUT ***************************************************************	#
   13435 #	a0 = pointer to the input extended precision value in memory.	#
   13436 #	     the input may be either normalized, unnormalized, or 	#
   13437 #	     denormalized.						#
   13438 #	d0 = contains the k-factor sign-extended to 32-bits. 		#
   13439 #									#
   13440 # OUTPUT **************************************************************	#
   13441 #	FP_SCR0(a6) = bcd format result on the stack.			#
   13442 #									#
   13443 # ALGORITHM ***********************************************************	#
   13444 #									#
   13445 #	A1.	Set RM and size ext;  Set SIGMA = sign of input.  	#
   13446 #		The k-factor is saved for use in d7. Clear the		#
   13447 #		BINDEC_FLG for separating normalized/denormalized	#
   13448 #		input.  If input is unnormalized or denormalized,	#
   13449 #		normalize it.						#
   13450 #									#
   13451 #	A2.	Set X = abs(input).					#
   13452 #									#
   13453 #	A3.	Compute ILOG.						#
   13454 #		ILOG is the log base 10 of the input value.  It is	#
   13455 #		approximated by adding e + 0.f when the original 	#
   13456 #		value is viewed as 2^^e * 1.f in extended precision.  	#
   13457 #		This value is stored in d6.				#
   13458 #									#
   13459 #	A4.	Clr INEX bit.						#
   13460 #		The operation in A3 above may have set INEX2.  		#
   13461 #									#
   13462 #	A5.	Set ICTR = 0;						#
   13463 #		ICTR is a flag used in A13.  It must be set before the 	#
   13464 #		loop entry A6.						#
   13465 #									#
   13466 #	A6.	Calculate LEN.						#
   13467 #		LEN is the number of digits to be displayed.  The	#
   13468 #		k-factor can dictate either the total number of digits,	#
   13469 #		if it is a positive number, or the number of digits	#
   13470 #		after the decimal point which are to be included as	#
   13471 #		significant.  See the 68882 manual for examples.	#
   13472 #		If LEN is computed to be greater than 17, set OPERR in	#
   13473 #		USER_FPSR.  LEN is stored in d4.			#
   13474 #									#
   13475 #	A7.	Calculate SCALE.					#
   13476 #		SCALE is equal to 10^ISCALE, where ISCALE is the number	#
   13477 #		of decimal places needed to insure LEN integer digits	#
   13478 #		in the output before conversion to bcd. LAMBDA is the	#
   13479 #		sign of ISCALE, used in A9. Fp1 contains		#
   13480 #		10^^(abs(ISCALE)) using a rounding mode which is a	#
   13481 #		function of the original rounding mode and the signs	#
   13482 #		of ISCALE and X.  A table is given in the code.		#
   13483 #									#
   13484 #	A8.	Clr INEX; Force RZ.					#
   13485 #		The operation in A3 above may have set INEX2.  		#
   13486 #		RZ mode is forced for the scaling operation to insure	#
   13487 #		only one rounding error.  The grs bits are collected in #
   13488 #		the INEX flag for use in A10.				#
   13489 #									#
   13490 #	A9.	Scale X -> Y.						#
   13491 #		The mantissa is scaled to the desired number of		#
   13492 #		significant digits.  The excess digits are collected	#
   13493 #		in INEX2.						#
   13494 #									#
   13495 #	A10.	Or in INEX.						#
   13496 #		If INEX is set, round error occurred.  This is		#
   13497 #		compensated for by 'or-ing' in the INEX2 flag to	#
   13498 #		the lsb of Y.						#
   13499 #									#
   13500 #	A11.	Restore original FPCR; set size ext.			#
   13501 #		Perform FINT operation in the user's rounding mode.	#
   13502 #		Keep the size to extended.				#
   13503 #									#
   13504 #	A12.	Calculate YINT = FINT(Y) according to user's rounding	#
   13505 #		mode.  The FPSP routine sintd0 is used.  The output	#
   13506 #		is in fp0.						#
   13507 #									#
   13508 #	A13.	Check for LEN digits.					#
   13509 #		If the int operation results in more than LEN digits,	#
   13510 #		or less than LEN -1 digits, adjust ILOG and repeat from	#
   13511 #		A6.  This test occurs only on the first pass.  If the	#
   13512 #		result is exactly 10^LEN, decrement ILOG and divide	#
   13513 #		the mantissa by 10.					#
   13514 #									#
   13515 #	A14.	Convert the mantissa to bcd.				#
   13516 #		The binstr routine is used to convert the LEN digit 	#
   13517 #		mantissa to bcd in memory.  The input to binstr is	#
   13518 #		to be a fraction; i.e. (mantissa)/10^LEN and adjusted	#
   13519 #		such that the decimal point is to the left of bit 63.	#
   13520 #		The bcd digits are stored in the correct position in 	#
   13521 #		the final string area in memory.			#
   13522 #									#
   13523 #	A15.	Convert the exponent to bcd.				#
   13524 #		As in A14 above, the exp is converted to bcd and the	#
   13525 #		digits are stored in the final string.			#
   13526 #		Test the length of the final exponent string.  If the	#
   13527 #		length is 4, set operr.					#
   13528 #									#
   13529 #	A16.	Write sign bits to final string.			#
   13530 #									#
   13531 #########################################################################
   13532 
   13533 set	BINDEC_FLG,	EXC_TEMP	# DENORM flag
   13534 
   13535 # Constants in extended precision
   13536 PLOG2:
   13537 	long		0x3FFD0000,0x9A209A84,0xFBCFF798,0x00000000
   13538 PLOG2UP1:
   13539 	long		0x3FFD0000,0x9A209A84,0xFBCFF799,0x00000000
   13540 
   13541 # Constants in single precision
   13542 FONE:
   13543 	long		0x3F800000,0x00000000,0x00000000,0x00000000
   13544 FTWO:
   13545 	long		0x40000000,0x00000000,0x00000000,0x00000000
   13546 FTEN:
   13547 	long		0x41200000,0x00000000,0x00000000,0x00000000
   13548 F4933:
   13549 	long		0x459A2800,0x00000000,0x00000000,0x00000000
   13550 
   13551 RBDTBL:
   13552 	byte		0,0,0,0
   13553 	byte		3,3,2,2
   13554 	byte		3,2,2,3
   13555 	byte		2,3,3,2
   13556 
   13557 #	Implementation Notes:
   13558 #
   13559 #	The registers are used as follows:
   13560 #
   13561 #		d0: scratch; LEN input to binstr
   13562 #		d1: scratch
   13563 #		d2: upper 32-bits of mantissa for binstr
   13564 #		d3: scratch;lower 32-bits of mantissa for binstr
   13565 #		d4: LEN
   13566 #      		d5: LAMBDA/ICTR
   13567 #		d6: ILOG
   13568 #		d7: k-factor
   13569 #		a0: ptr for original operand/final result
   13570 #		a1: scratch pointer
   13571 #		a2: pointer to FP_X; abs(original value) in ext
   13572 #		fp0: scratch
   13573 #		fp1: scratch
   13574 #		fp2: scratch
   13575 #		F_SCR1:
   13576 #		F_SCR2:
   13577 #		L_SCR1:
   13578 #		L_SCR2:
   13579 
   13580 	global		bindec
   13581 bindec:
   13582 	movm.l		&0x3f20,-(%sp)	#  {%d2-%d7/%a2}
   13583 	fmovm.x		&0x7,-(%sp)	#  {%fp0-%fp2}
   13584 
   13585 # A1. Set RM and size ext. Set SIGMA = sign input;
   13586 #     The k-factor is saved for use in d7.  Clear BINDEC_FLG for
   13587 #     separating  normalized/denormalized input.  If the input
   13588 #     is a denormalized number, set the BINDEC_FLG memory word
   13589 #     to signal denorm.  If the input is unnormalized, normalize
   13590 #     the input and test for denormalized result.
   13591 #
   13592 	fmov.l		&rm_mode*0x10,%fpcr	# set RM and ext
   13593 	mov.l		(%a0),L_SCR2(%a6)	# save exponent for sign check
   13594 	mov.l		%d0,%d7		# move k-factor to d7
   13595 
   13596 	clr.b		BINDEC_FLG(%a6)	# clr norm/denorm flag
   13597 	cmpi.b		STAG(%a6),&DENORM # is input a DENORM?
   13598 	bne.w		A2_str		# no; input is a NORM
   13599 
   13600 #
   13601 # Normalize the denorm
   13602 #
   13603 un_de_norm:
   13604 	mov.w		(%a0),%d0
   13605 	and.w		&0x7fff,%d0	# strip sign of normalized exp
   13606 	mov.l		4(%a0),%d1
   13607 	mov.l		8(%a0),%d2
   13608 norm_loop:
   13609 	sub.w		&1,%d0
   13610 	lsl.l		&1,%d2
   13611 	roxl.l		&1,%d1
   13612 	tst.l		%d1
   13613 	bge.b		norm_loop
   13614 #
   13615 # Test if the normalized input is denormalized
   13616 #
   13617 	tst.w		%d0
   13618 	bgt.b		pos_exp		# if greater than zero, it is a norm
   13619 	st		BINDEC_FLG(%a6)	# set flag for denorm
   13620 pos_exp:
   13621 	and.w		&0x7fff,%d0	# strip sign of normalized exp
   13622 	mov.w		%d0,(%a0)
   13623 	mov.l		%d1,4(%a0)
   13624 	mov.l		%d2,8(%a0)
   13625 
   13626 # A2. Set X = abs(input).
   13627 #
   13628 A2_str:
   13629 	mov.l		(%a0),FP_SCR1(%a6)	# move input to work space
   13630 	mov.l		4(%a0),FP_SCR1+4(%a6)	# move input to work space
   13631 	mov.l		8(%a0),FP_SCR1+8(%a6)	# move input to work space
   13632 	and.l		&0x7fffffff,FP_SCR1(%a6)	# create abs(X)
   13633 
   13634 # A3. Compute ILOG.
   13635 #     ILOG is the log base 10 of the input value.  It is approx-
   13636 #     imated by adding e + 0.f when the original value is viewed
   13637 #     as 2^^e * 1.f in extended precision.  This value is stored
   13638 #     in d6.
   13639 #
   13640 # Register usage:
   13641 #	Input/Output
   13642 #	d0: k-factor/exponent
   13643 #	d2: x/x
   13644 #	d3: x/x
   13645 #	d4: x/x
   13646 #	d5: x/x
   13647 #	d6: x/ILOG
   13648 #	d7: k-factor/Unchanged
   13649 #	a0: ptr for original operand/final result
   13650 #	a1: x/x
   13651 #	a2: x/x
   13652 #	fp0: x/float(ILOG)
   13653 #	fp1: x/x
   13654 #	fp2: x/x
   13655 #	F_SCR1:x/x
   13656 #	F_SCR2:Abs(X)/Abs(X) with $3fff exponent
   13657 #	L_SCR1:x/x
   13658 #	L_SCR2:first word of X packed/Unchanged
   13659 
   13660 	tst.b		BINDEC_FLG(%a6)	# check for denorm
   13661 	beq.b		A3_cont		# if clr, continue with norm
   13662 	mov.l		&-4933,%d6	# force ILOG = -4933
   13663 	bra.b		A4_str
   13664 A3_cont:
   13665 	mov.w		FP_SCR1(%a6),%d0	# move exp to d0
   13666 	mov.w		&0x3fff,FP_SCR1(%a6)	# replace exponent with 0x3fff
   13667 	fmov.x		FP_SCR1(%a6),%fp0	# now fp0 has 1.f
   13668 	sub.w		&0x3fff,%d0	# strip off bias
   13669 	fadd.w		%d0,%fp0	# add in exp
   13670 	fsub.s		FONE(%pc),%fp0	# subtract off 1.0
   13671 	fbge.w		pos_res		# if pos, branch
   13672 	fmul.x		PLOG2UP1(%pc),%fp0	# if neg, mul by LOG2UP1
   13673 	fmov.l		%fp0,%d6	# put ILOG in d6 as a lword
   13674 	bra.b		A4_str		# go move out ILOG
   13675 pos_res:
   13676 	fmul.x		PLOG2(%pc),%fp0	# if pos, mul by LOG2
   13677 	fmov.l		%fp0,%d6	# put ILOG in d6 as a lword
   13678 
   13679 
   13680 # A4. Clr INEX bit.
   13681 #     The operation in A3 above may have set INEX2.
   13682 
   13683 A4_str:
   13684 	fmov.l		&0,%fpsr	# zero all of fpsr - nothing needed
   13685 
   13686 
   13687 # A5. Set ICTR = 0;
   13688 #     ICTR is a flag used in A13.  It must be set before the
   13689 #     loop entry A6. The lower word of d5 is used for ICTR.
   13690 
   13691 	clr.w		%d5		# clear ICTR
   13692 
   13693 # A6. Calculate LEN.
   13694 #     LEN is the number of digits to be displayed.  The k-factor
   13695 #     can dictate either the total number of digits, if it is
   13696 #     a positive number, or the number of digits after the
   13697 #     original decimal point which are to be included as
   13698 #     significant.  See the 68882 manual for examples.
   13699 #     If LEN is computed to be greater than 17, set OPERR in
   13700 #     USER_FPSR.  LEN is stored in d4.
   13701 #
   13702 # Register usage:
   13703 #	Input/Output
   13704 #	d0: exponent/Unchanged
   13705 #	d2: x/x/scratch
   13706 #	d3: x/x
   13707 #	d4: exc picture/LEN
   13708 #	d5: ICTR/Unchanged
   13709 #	d6: ILOG/Unchanged
   13710 #	d7: k-factor/Unchanged
   13711 #	a0: ptr for original operand/final result
   13712 #	a1: x/x
   13713 #	a2: x/x
   13714 #	fp0: float(ILOG)/Unchanged
   13715 #	fp1: x/x
   13716 #	fp2: x/x
   13717 #	F_SCR1:x/x
   13718 #	F_SCR2:Abs(X) with $3fff exponent/Unchanged
   13719 #	L_SCR1:x/x
   13720 #	L_SCR2:first word of X packed/Unchanged
   13721 
   13722 A6_str:
   13723 	tst.l		%d7		# branch on sign of k
   13724 	ble.b		k_neg		# if k <= 0, LEN = ILOG + 1 - k
   13725 	mov.l		%d7,%d4		# if k > 0, LEN = k
   13726 	bra.b		len_ck		# skip to LEN check
   13727 k_neg:
   13728 	mov.l		%d6,%d4		# first load ILOG to d4
   13729 	sub.l		%d7,%d4		# subtract off k
   13730 	addq.l		&1,%d4		# add in the 1
   13731 len_ck:
   13732 	tst.l		%d4		# LEN check: branch on sign of LEN
   13733 	ble.b		LEN_ng		# if neg, set LEN = 1
   13734 	cmp.l		%d4,&17		# test if LEN > 17
   13735 	ble.b		A7_str		# if not, forget it
   13736 	mov.l		&17,%d4		# set max LEN = 17
   13737 	tst.l		%d7		# if negative, never set OPERR
   13738 	ble.b		A7_str		# if positive, continue
   13739 	or.l		&opaop_mask,USER_FPSR(%a6)	# set OPERR & AIOP in USER_FPSR
   13740 	bra.b		A7_str		# finished here
   13741 LEN_ng:
   13742 	mov.l		&1,%d4		# min LEN is 1
   13743 
   13744 
   13745 # A7. Calculate SCALE.
   13746 #     SCALE is equal to 10^ISCALE, where ISCALE is the number
   13747 #     of decimal places needed to insure LEN integer digits
   13748 #     in the output before conversion to bcd. LAMBDA is the sign
   13749 #     of ISCALE, used in A9.  Fp1 contains 10^^(abs(ISCALE)) using
   13750 #     the rounding mode as given in the following table (see
   13751 #     Coonen, p. 7.23 as ref.; however, the SCALE variable is
   13752 #     of opposite sign in bindec.sa from Coonen).
   13753 #
   13754 #	Initial					USE
   13755 #	FPCR[6:5]	LAMBDA	SIGN(X)		FPCR[6:5]
   13756 #	----------------------------------------------
   13757 #	 RN	00	   0	   0		00/0	RN
   13758 #	 RN	00	   0	   1		00/0	RN
   13759 #	 RN	00	   1	   0		00/0	RN
   13760 #	 RN	00	   1	   1		00/0	RN
   13761 #	 RZ	01	   0	   0		11/3	RP
   13762 #	 RZ	01	   0	   1		11/3	RP
   13763 #	 RZ	01	   1	   0		10/2	RM
   13764 #	 RZ	01	   1	   1		10/2	RM
   13765 #	 RM	10	   0	   0		11/3	RP
   13766 #	 RM	10	   0	   1		10/2	RM
   13767 #	 RM	10	   1	   0		10/2	RM
   13768 #	 RM	10	   1	   1		11/3	RP
   13769 #	 RP	11	   0	   0		10/2	RM
   13770 #	 RP	11	   0	   1		11/3	RP
   13771 #	 RP	11	   1	   0		11/3	RP
   13772 #	 RP	11	   1	   1		10/2	RM
   13773 #
   13774 # Register usage:
   13775 #	Input/Output
   13776 #	d0: exponent/scratch - final is 0
   13777 #	d2: x/0 or 24 for A9
   13778 #	d3: x/scratch - offset ptr into PTENRM array
   13779 #	d4: LEN/Unchanged
   13780 #	d5: 0/ICTR:LAMBDA
   13781 #	d6: ILOG/ILOG or k if ((k<=0)&(ILOG<k))
   13782 #	d7: k-factor/Unchanged
   13783 #	a0: ptr for original operand/final result
   13784 #	a1: x/ptr to PTENRM array
   13785 #	a2: x/x
   13786 #	fp0: float(ILOG)/Unchanged
   13787 #	fp1: x/10^ISCALE
   13788 #	fp2: x/x
   13789 #	F_SCR1:x/x
   13790 #	F_SCR2:Abs(X) with $3fff exponent/Unchanged
   13791 #	L_SCR1:x/x
   13792 #	L_SCR2:first word of X packed/Unchanged
   13793 
   13794 A7_str:
   13795 	tst.l		%d7		# test sign of k
   13796 	bgt.b		k_pos		# if pos and > 0, skip this
   13797 	cmp.l		%d7,%d6		# test k - ILOG
   13798 	blt.b		k_pos		# if ILOG >= k, skip this
   13799 	mov.l		%d7,%d6		# if ((k<0) & (ILOG < k)) ILOG = k
   13800 k_pos:
   13801 	mov.l		%d6,%d0		# calc ILOG + 1 - LEN in d0
   13802 	addq.l		&1,%d0		# add the 1
   13803 	sub.l		%d4,%d0		# sub off LEN
   13804 	swap		%d5		# use upper word of d5 for LAMBDA
   13805 	clr.w		%d5		# set it zero initially
   13806 	clr.w		%d2		# set up d2 for very small case
   13807 	tst.l		%d0		# test sign of ISCALE
   13808 	bge.b		iscale		# if pos, skip next inst
   13809 	addq.w		&1,%d5		# if neg, set LAMBDA true
   13810 	cmp.l		%d0,&0xffffecd4	# test iscale <= -4908
   13811 	bgt.b		no_inf		# if false, skip rest
   13812 	add.l		&24,%d0		# add in 24 to iscale
   13813 	mov.l		&24,%d2		# put 24 in d2 for A9
   13814 no_inf:
   13815 	neg.l		%d0		# and take abs of ISCALE
   13816 iscale:
   13817 	fmov.s		FONE(%pc),%fp1	# init fp1 to 1
   13818 	bfextu		USER_FPCR(%a6){&26:&2},%d1	# get initial rmode bits
   13819 	lsl.w		&1,%d1		# put them in bits 2:1
   13820 	add.w		%d5,%d1		# add in LAMBDA
   13821 	lsl.w		&1,%d1		# put them in bits 3:1
   13822 	tst.l		L_SCR2(%a6)	# test sign of original x
   13823 	bge.b		x_pos		# if pos, don't set bit 0
   13824 	addq.l		&1,%d1		# if neg, set bit 0
   13825 x_pos:
   13826 	lea.l		RBDTBL(%pc),%a2	# load rbdtbl base
   13827 	mov.b		(%a2,%d1),%d3	# load d3 with new rmode
   13828 	lsl.l		&4,%d3		# put bits in proper position
   13829 	fmov.l		%d3,%fpcr	# load bits into fpu
   13830 	lsr.l		&4,%d3		# put bits in proper position
   13831 	tst.b		%d3		# decode new rmode for pten table
   13832 	bne.b		not_rn		# if zero, it is RN
   13833 	lea.l		PTENRN(%pc),%a1	# load a1 with RN table base
   13834 	bra.b		rmode		# exit decode
   13835 not_rn:
   13836 	lsr.b		&1,%d3		# get lsb in carry
   13837 	bcc.b		not_rp2		# if carry clear, it is RM
   13838 	lea.l		PTENRP(%pc),%a1	# load a1 with RP table base
   13839 	bra.b		rmode		# exit decode
   13840 not_rp2:
   13841 	lea.l		PTENRM(%pc),%a1	# load a1 with RM table base
   13842 rmode:
   13843 	clr.l		%d3		# clr table index
   13844 e_loop2:
   13845 	lsr.l		&1,%d0		# shift next bit into carry
   13846 	bcc.b		e_next2		# if zero, skip the mul
   13847 	fmul.x		(%a1,%d3),%fp1	# mul by 10**(d3_bit_no)
   13848 e_next2:
   13849 	add.l		&12,%d3		# inc d3 to next pwrten table entry
   13850 	tst.l		%d0		# test if ISCALE is zero
   13851 	bne.b		e_loop2		# if not, loop
   13852 
   13853 # A8. Clr INEX; Force RZ.
   13854 #     The operation in A3 above may have set INEX2.
   13855 #     RZ mode is forced for the scaling operation to insure
   13856 #     only one rounding error.  The grs bits are collected in
   13857 #     the INEX flag for use in A10.
   13858 #
   13859 # Register usage:
   13860 #	Input/Output
   13861 
   13862 	fmov.l		&0,%fpsr	# clr INEX
   13863 	fmov.l		&rz_mode*0x10,%fpcr	# set RZ rounding mode
   13864 
   13865 # A9. Scale X -> Y.
   13866 #     The mantissa is scaled to the desired number of significant
   13867 #     digits.  The excess digits are collected in INEX2. If mul,
   13868 #     Check d2 for excess 10 exponential value.  If not zero,
   13869 #     the iscale value would have caused the pwrten calculation
   13870 #     to overflow.  Only a negative iscale can cause this, so
   13871 #     multiply by 10^(d2), which is now only allowed to be 24,
   13872 #     with a multiply by 10^8 and 10^16, which is exact since
   13873 #     10^24 is exact.  If the input was denormalized, we must
   13874 #     create a busy stack frame with the mul command and the
   13875 #     two operands, and allow the fpu to complete the multiply.
   13876 #
   13877 # Register usage:
   13878 #	Input/Output
   13879 #	d0: FPCR with RZ mode/Unchanged
   13880 #	d2: 0 or 24/unchanged
   13881 #	d3: x/x
   13882 #	d4: LEN/Unchanged
   13883 #	d5: ICTR:LAMBDA
   13884 #	d6: ILOG/Unchanged
   13885 #	d7: k-factor/Unchanged
   13886 #	a0: ptr for original operand/final result
   13887 #	a1: ptr to PTENRM array/Unchanged
   13888 #	a2: x/x
   13889 #	fp0: float(ILOG)/X adjusted for SCALE (Y)
   13890 #	fp1: 10^ISCALE/Unchanged
   13891 #	fp2: x/x
   13892 #	F_SCR1:x/x
   13893 #	F_SCR2:Abs(X) with $3fff exponent/Unchanged
   13894 #	L_SCR1:x/x
   13895 #	L_SCR2:first word of X packed/Unchanged
   13896 
   13897 A9_str:
   13898 	fmov.x		(%a0),%fp0	# load X from memory
   13899 	fabs.x		%fp0		# use abs(X)
   13900 	tst.w		%d5		# LAMBDA is in lower word of d5
   13901 	bne.b		sc_mul		# if neg (LAMBDA = 1), scale by mul
   13902 	fdiv.x		%fp1,%fp0	# calculate X / SCALE -> Y to fp0
   13903 	bra.w		A10_st		# branch to A10
   13904 
   13905 sc_mul:
   13906 	tst.b		BINDEC_FLG(%a6)	# check for denorm
   13907 	beq.w		A9_norm		# if norm, continue with mul
   13908 
   13909 # for DENORM, we must calculate:
   13910 #	fp0 = input_op * 10^ISCALE * 10^24
   13911 # since the input operand is a DENORM, we can't multiply it directly.
   13912 # so, we do the multiplication of the exponents and mantissas separately.
   13913 # in this way, we avoid underflow on intermediate stages of the
   13914 # multiplication and guarantee a result without exception.
   13915 	fmovm.x		&0x2,-(%sp)	# save 10^ISCALE to stack
   13916 
   13917 	mov.w		(%sp),%d3	# grab exponent
   13918 	andi.w		&0x7fff,%d3	# clear sign
   13919 	ori.w		&0x8000,(%a0)	# make DENORM exp negative
   13920 	add.w		(%a0),%d3	# add DENORM exp to 10^ISCALE exp
   13921 	subi.w		&0x3fff,%d3	# subtract BIAS
   13922 	add.w		36(%a1),%d3
   13923 	subi.w		&0x3fff,%d3	# subtract BIAS
   13924 	add.w		48(%a1),%d3
   13925 	subi.w		&0x3fff,%d3	# subtract BIAS
   13926 
   13927 	bmi.w		sc_mul_err	# is result is DENORM, punt!!!
   13928 
   13929 	andi.w		&0x8000,(%sp)	# keep sign
   13930 	or.w		%d3,(%sp)	# insert new exponent
   13931 	andi.w		&0x7fff,(%a0)	# clear sign bit on DENORM again
   13932 	mov.l		0x8(%a0),-(%sp) # put input op mantissa on stk
   13933 	mov.l		0x4(%a0),-(%sp)
   13934 	mov.l		&0x3fff0000,-(%sp) # force exp to zero
   13935 	fmovm.x		(%sp)+,&0x80	# load normalized DENORM into fp0
   13936 	fmul.x		(%sp)+,%fp0
   13937 
   13938 #	fmul.x	36(%a1),%fp0	# multiply fp0 by 10^8
   13939 #	fmul.x	48(%a1),%fp0	# multiply fp0 by 10^16
   13940 	mov.l		36+8(%a1),-(%sp) # get 10^8 mantissa
   13941 	mov.l		36+4(%a1),-(%sp)
   13942 	mov.l		&0x3fff0000,-(%sp) # force exp to zero
   13943 	mov.l		48+8(%a1),-(%sp) # get 10^16 mantissa
   13944 	mov.l		48+4(%a1),-(%sp)
   13945 	mov.l		&0x3fff0000,-(%sp)# force exp to zero
   13946 	fmul.x		(%sp)+,%fp0	# multiply fp0 by 10^8
   13947 	fmul.x		(%sp)+,%fp0	# multiply fp0 by 10^16
   13948 	bra.b		A10_st
   13949 
   13950 sc_mul_err:
   13951 	bra.b		sc_mul_err
   13952 
   13953 A9_norm:
   13954 	tst.w		%d2		# test for small exp case
   13955 	beq.b		A9_con		# if zero, continue as normal
   13956 	fmul.x		36(%a1),%fp0	# multiply fp0 by 10^8
   13957 	fmul.x		48(%a1),%fp0	# multiply fp0 by 10^16
   13958 A9_con:
   13959 	fmul.x		%fp1,%fp0	# calculate X * SCALE -> Y to fp0
   13960 
   13961 # A10. Or in INEX.
   13962 #      If INEX is set, round error occurred.  This is compensated
   13963 #      for by 'or-ing' in the INEX2 flag to the lsb of Y.
   13964 #
   13965 # Register usage:
   13966 #	Input/Output
   13967 #	d0: FPCR with RZ mode/FPSR with INEX2 isolated
   13968 #	d2: x/x
   13969 #	d3: x/x
   13970 #	d4: LEN/Unchanged
   13971 #	d5: ICTR:LAMBDA
   13972 #	d6: ILOG/Unchanged
   13973 #	d7: k-factor/Unchanged
   13974 #	a0: ptr for original operand/final result
   13975 #	a1: ptr to PTENxx array/Unchanged
   13976 #	a2: x/ptr to FP_SCR1(a6)
   13977 #	fp0: Y/Y with lsb adjusted
   13978 #	fp1: 10^ISCALE/Unchanged
   13979 #	fp2: x/x
   13980 
   13981 A10_st:
   13982 	fmov.l		%fpsr,%d0	# get FPSR
   13983 	fmov.x		%fp0,FP_SCR1(%a6)	# move Y to memory
   13984 	lea.l		FP_SCR1(%a6),%a2	# load a2 with ptr to FP_SCR1
   13985 	btst		&9,%d0		# check if INEX2 set
   13986 	beq.b		A11_st		# if clear, skip rest
   13987 	or.l		&1,8(%a2)	# or in 1 to lsb of mantissa
   13988 	fmov.x		FP_SCR1(%a6),%fp0	# write adjusted Y back to fpu
   13989 
   13990 
   13991 # A11. Restore original FPCR; set size ext.
   13992 #      Perform FINT operation in the user's rounding mode.  Keep
   13993 #      the size to extended.  The sintdo entry point in the sint
   13994 #      routine expects the FPCR value to be in USER_FPCR for
   13995 #      mode and precision.  The original FPCR is saved in L_SCR1.
   13996 
   13997 A11_st:
   13998 	mov.l		USER_FPCR(%a6),L_SCR1(%a6)	# save it for later
   13999 	and.l		&0x00000030,USER_FPCR(%a6)	# set size to ext,
   14000 #					;block exceptions
   14001 
   14002 
   14003 # A12. Calculate YINT = FINT(Y) according to user's rounding mode.
   14004 #      The FPSP routine sintd0 is used.  The output is in fp0.
   14005 #
   14006 # Register usage:
   14007 #	Input/Output
   14008 #	d0: FPSR with AINEX cleared/FPCR with size set to ext
   14009 #	d2: x/x/scratch
   14010 #	d3: x/x
   14011 #	d4: LEN/Unchanged
   14012 #	d5: ICTR:LAMBDA/Unchanged
   14013 #	d6: ILOG/Unchanged
   14014 #	d7: k-factor/Unchanged
   14015 #	a0: ptr for original operand/src ptr for sintdo
   14016 #	a1: ptr to PTENxx array/Unchanged
   14017 #	a2: ptr to FP_SCR1(a6)/Unchanged
   14018 #	a6: temp pointer to FP_SCR1(a6) - orig value saved and restored
   14019 #	fp0: Y/YINT
   14020 #	fp1: 10^ISCALE/Unchanged
   14021 #	fp2: x/x
   14022 #	F_SCR1:x/x
   14023 #	F_SCR2:Y adjusted for inex/Y with original exponent
   14024 #	L_SCR1:x/original USER_FPCR
   14025 #	L_SCR2:first word of X packed/Unchanged
   14026 
   14027 A12_st:
   14028 	movm.l	&0xc0c0,-(%sp)	# save regs used by sintd0	 {%d0-%d1/%a0-%a1}
   14029 	mov.l	L_SCR1(%a6),-(%sp)
   14030 	mov.l	L_SCR2(%a6),-(%sp)
   14031 
   14032 	lea.l		FP_SCR1(%a6),%a0	# a0 is ptr to FP_SCR1(a6)
   14033 	fmov.x		%fp0,(%a0)	# move Y to memory at FP_SCR1(a6)
   14034 	tst.l		L_SCR2(%a6)	# test sign of original operand
   14035 	bge.b		do_fint12		# if pos, use Y
   14036 	or.l		&0x80000000,(%a0)	# if neg, use -Y
   14037 do_fint12:
   14038 	mov.l	USER_FPSR(%a6),-(%sp)
   14039 #	bsr	sintdo		# sint routine returns int in fp0
   14040 
   14041 	fmov.l	USER_FPCR(%a6),%fpcr
   14042 	fmov.l	&0x0,%fpsr			# clear the AEXC bits!!!
   14043 ##	mov.l		USER_FPCR(%a6),%d0	# ext prec/keep rnd mode
   14044 ##	andi.l		&0x00000030,%d0
   14045 ##	fmov.l		%d0,%fpcr
   14046 	fint.x		FP_SCR1(%a6),%fp0	# do fint()
   14047 	fmov.l	%fpsr,%d0
   14048 	or.w	%d0,FPSR_EXCEPT(%a6)
   14049 ##	fmov.l		&0x0,%fpcr
   14050 ##	fmov.l		%fpsr,%d0		# don't keep ccodes
   14051 ##	or.w		%d0,FPSR_EXCEPT(%a6)
   14052 
   14053 	mov.b	(%sp),USER_FPSR(%a6)
   14054 	add.l	&4,%sp
   14055 
   14056 	mov.l	(%sp)+,L_SCR2(%a6)
   14057 	mov.l	(%sp)+,L_SCR1(%a6)
   14058 	movm.l	(%sp)+,&0x303	# restore regs used by sint	 {%d0-%d1/%a0-%a1}
   14059 
   14060 	mov.l	L_SCR2(%a6),FP_SCR1(%a6)	# restore original exponent
   14061 	mov.l	L_SCR1(%a6),USER_FPCR(%a6)	# restore user's FPCR
   14062 
   14063 # A13. Check for LEN digits.
   14064 #      If the int operation results in more than LEN digits,
   14065 #      or less than LEN -1 digits, adjust ILOG and repeat from
   14066 #      A6.  This test occurs only on the first pass.  If the
   14067 #      result is exactly 10^LEN, decrement ILOG and divide
   14068 #      the mantissa by 10.  The calculation of 10^LEN cannot
   14069 #      be inexact, since all powers of ten upto 10^27 are exact
   14070 #      in extended precision, so the use of a previous power-of-ten
   14071 #      table will introduce no error.
   14072 #
   14073 #
   14074 # Register usage:
   14075 #	Input/Output
   14076 #	d0: FPCR with size set to ext/scratch final = 0
   14077 #	d2: x/x
   14078 #	d3: x/scratch final = x
   14079 #	d4: LEN/LEN adjusted
   14080 #	d5: ICTR:LAMBDA/LAMBDA:ICTR
   14081 #	d6: ILOG/ILOG adjusted
   14082 #	d7: k-factor/Unchanged
   14083 #	a0: pointer into memory for packed bcd string formation
   14084 #	a1: ptr to PTENxx array/Unchanged
   14085 #	a2: ptr to FP_SCR1(a6)/Unchanged
   14086 #	fp0: int portion of Y/abs(YINT) adjusted
   14087 #	fp1: 10^ISCALE/Unchanged
   14088 #	fp2: x/10^LEN
   14089 #	F_SCR1:x/x
   14090 #	F_SCR2:Y with original exponent/Unchanged
   14091 #	L_SCR1:original USER_FPCR/Unchanged
   14092 #	L_SCR2:first word of X packed/Unchanged
   14093 
   14094 A13_st:
   14095 	swap		%d5		# put ICTR in lower word of d5
   14096 	tst.w		%d5		# check if ICTR = 0
   14097 	bne		not_zr		# if non-zero, go to second test
   14098 #
   14099 # Compute 10^(LEN-1)
   14100 #
   14101 	fmov.s		FONE(%pc),%fp2	# init fp2 to 1.0
   14102 	mov.l		%d4,%d0		# put LEN in d0
   14103 	subq.l		&1,%d0		# d0 = LEN -1
   14104 	clr.l		%d3		# clr table index
   14105 l_loop:
   14106 	lsr.l		&1,%d0		# shift next bit into carry
   14107 	bcc.b		l_next		# if zero, skip the mul
   14108 	fmul.x		(%a1,%d3),%fp2	# mul by 10**(d3_bit_no)
   14109 l_next:
   14110 	add.l		&12,%d3		# inc d3 to next pwrten table entry
   14111 	tst.l		%d0		# test if LEN is zero
   14112 	bne.b		l_loop		# if not, loop
   14113 #
   14114 # 10^LEN-1 is computed for this test and A14.  If the input was
   14115 # denormalized, check only the case in which YINT > 10^LEN.
   14116 #
   14117 	tst.b		BINDEC_FLG(%a6)	# check if input was norm
   14118 	beq.b		A13_con		# if norm, continue with checking
   14119 	fabs.x		%fp0		# take abs of YINT
   14120 	bra		test_2
   14121 #
   14122 # Compare abs(YINT) to 10^(LEN-1) and 10^LEN
   14123 #
   14124 A13_con:
   14125 	fabs.x		%fp0		# take abs of YINT
   14126 	fcmp.x		%fp0,%fp2	# compare abs(YINT) with 10^(LEN-1)
   14127 	fbge.w		test_2		# if greater, do next test
   14128 	subq.l		&1,%d6		# subtract 1 from ILOG
   14129 	mov.w		&1,%d5		# set ICTR
   14130 	fmov.l		&rm_mode*0x10,%fpcr	# set rmode to RM
   14131 	fmul.s		FTEN(%pc),%fp2	# compute 10^LEN
   14132 	bra.w		A6_str		# return to A6 and recompute YINT
   14133 test_2:
   14134 	fmul.s		FTEN(%pc),%fp2	# compute 10^LEN
   14135 	fcmp.x		%fp0,%fp2	# compare abs(YINT) with 10^LEN
   14136 	fblt.w		A14_st		# if less, all is ok, go to A14
   14137 	fbgt.w		fix_ex		# if greater, fix and redo
   14138 	fdiv.s		FTEN(%pc),%fp0	# if equal, divide by 10
   14139 	addq.l		&1,%d6		# and inc ILOG
   14140 	bra.b		A14_st		# and continue elsewhere
   14141 fix_ex:
   14142 	addq.l		&1,%d6		# increment ILOG by 1
   14143 	mov.w		&1,%d5		# set ICTR
   14144 	fmov.l		&rm_mode*0x10,%fpcr	# set rmode to RM
   14145 	bra.w		A6_str		# return to A6 and recompute YINT
   14146 #
   14147 # Since ICTR <> 0, we have already been through one adjustment,
   14148 # and shouldn't have another; this is to check if abs(YINT) = 10^LEN
   14149 # 10^LEN is again computed using whatever table is in a1 since the
   14150 # value calculated cannot be inexact.
   14151 #
   14152 not_zr:
   14153 	fmov.s		FONE(%pc),%fp2	# init fp2 to 1.0
   14154 	mov.l		%d4,%d0		# put LEN in d0
   14155 	clr.l		%d3		# clr table index
   14156 z_loop:
   14157 	lsr.l		&1,%d0		# shift next bit into carry
   14158 	bcc.b		z_next		# if zero, skip the mul
   14159 	fmul.x		(%a1,%d3),%fp2	# mul by 10**(d3_bit_no)
   14160 z_next:
   14161 	add.l		&12,%d3		# inc d3 to next pwrten table entry
   14162 	tst.l		%d0		# test if LEN is zero
   14163 	bne.b		z_loop		# if not, loop
   14164 	fabs.x		%fp0		# get abs(YINT)
   14165 	fcmp.x		%fp0,%fp2	# check if abs(YINT) = 10^LEN
   14166 	fbneq.w		A14_st		# if not, skip this
   14167 	fdiv.s		FTEN(%pc),%fp0	# divide abs(YINT) by 10
   14168 	addq.l		&1,%d6		# and inc ILOG by 1
   14169 	addq.l		&1,%d4		# and inc LEN
   14170 	fmul.s		FTEN(%pc),%fp2	# if LEN++, the get 10^^LEN
   14171 
   14172 # A14. Convert the mantissa to bcd.
   14173 #      The binstr routine is used to convert the LEN digit
   14174 #      mantissa to bcd in memory.  The input to binstr is
   14175 #      to be a fraction; i.e. (mantissa)/10^LEN and adjusted
   14176 #      such that the decimal point is to the left of bit 63.
   14177 #      The bcd digits are stored in the correct position in
   14178 #      the final string area in memory.
   14179 #
   14180 #
   14181 # Register usage:
   14182 #	Input/Output
   14183 #	d0: x/LEN call to binstr - final is 0
   14184 #	d1: x/0
   14185 #	d2: x/ms 32-bits of mant of abs(YINT)
   14186 #	d3: x/ls 32-bits of mant of abs(YINT)
   14187 #	d4: LEN/Unchanged
   14188 #	d5: ICTR:LAMBDA/LAMBDA:ICTR
   14189 #	d6: ILOG
   14190 #	d7: k-factor/Unchanged
   14191 #	a0: pointer into memory for packed bcd string formation
   14192 #	    /ptr to first mantissa byte in result string
   14193 #	a1: ptr to PTENxx array/Unchanged
   14194 #	a2: ptr to FP_SCR1(a6)/Unchanged
   14195 #	fp0: int portion of Y/abs(YINT) adjusted
   14196 #	fp1: 10^ISCALE/Unchanged
   14197 #	fp2: 10^LEN/Unchanged
   14198 #	F_SCR1:x/Work area for final result
   14199 #	F_SCR2:Y with original exponent/Unchanged
   14200 #	L_SCR1:original USER_FPCR/Unchanged
   14201 #	L_SCR2:first word of X packed/Unchanged
   14202 
   14203 A14_st:
   14204 	fmov.l		&rz_mode*0x10,%fpcr	# force rz for conversion
   14205 	fdiv.x		%fp2,%fp0	# divide abs(YINT) by 10^LEN
   14206 	lea.l		FP_SCR0(%a6),%a0
   14207 	fmov.x		%fp0,(%a0)	# move abs(YINT)/10^LEN to memory
   14208 	mov.l		4(%a0),%d2	# move 2nd word of FP_RES to d2
   14209 	mov.l		8(%a0),%d3	# move 3rd word of FP_RES to d3
   14210 	clr.l		4(%a0)		# zero word 2 of FP_RES
   14211 	clr.l		8(%a0)		# zero word 3 of FP_RES
   14212 	mov.l		(%a0),%d0	# move exponent to d0
   14213 	swap		%d0		# put exponent in lower word
   14214 	beq.b		no_sft		# if zero, don't shift
   14215 	sub.l		&0x3ffd,%d0	# sub bias less 2 to make fract
   14216 	tst.l		%d0		# check if > 1
   14217 	bgt.b		no_sft		# if so, don't shift
   14218 	neg.l		%d0		# make exp positive
   14219 m_loop:
   14220 	lsr.l		&1,%d2		# shift d2:d3 right, add 0s
   14221 	roxr.l		&1,%d3		# the number of places
   14222 	dbf.w		%d0,m_loop	# given in d0
   14223 no_sft:
   14224 	tst.l		%d2		# check for mantissa of zero
   14225 	bne.b		no_zr		# if not, go on
   14226 	tst.l		%d3		# continue zero check
   14227 	beq.b		zer_m		# if zero, go directly to binstr
   14228 no_zr:
   14229 	clr.l		%d1		# put zero in d1 for addx
   14230 	add.l		&0x00000080,%d3	# inc at bit 7
   14231 	addx.l		%d1,%d2		# continue inc
   14232 	and.l		&0xffffff80,%d3	# strip off lsb not used by 882
   14233 zer_m:
   14234 	mov.l		%d4,%d0		# put LEN in d0 for binstr call
   14235 	addq.l		&3,%a0		# a0 points to M16 byte in result
   14236 	bsr		binstr		# call binstr to convert mant
   14237 
   14238 
   14239 # A15. Convert the exponent to bcd.
   14240 #      As in A14 above, the exp is converted to bcd and the
   14241 #      digits are stored in the final string.
   14242 #
   14243 #      Digits are stored in L_SCR1(a6) on return from BINDEC as:
   14244 #
   14245 #  	 32               16 15                0
   14246 #	-----------------------------------------
   14247 #  	|  0 | e3 | e2 | e1 | e4 |  X |  X |  X |
   14248 #	-----------------------------------------
   14249 #
   14250 # And are moved into their proper places in FP_SCR0.  If digit e4
   14251 # is non-zero, OPERR is signaled.  In all cases, all 4 digits are
   14252 # written as specified in the 881/882 manual for packed decimal.
   14253 #
   14254 # Register usage:
   14255 #	Input/Output
   14256 #	d0: x/LEN call to binstr - final is 0
   14257 #	d1: x/scratch (0);shift count for final exponent packing
   14258 #	d2: x/ms 32-bits of exp fraction/scratch
   14259 #	d3: x/ls 32-bits of exp fraction
   14260 #	d4: LEN/Unchanged
   14261 #	d5: ICTR:LAMBDA/LAMBDA:ICTR
   14262 #	d6: ILOG
   14263 #	d7: k-factor/Unchanged
   14264 #	a0: ptr to result string/ptr to L_SCR1(a6)
   14265 #	a1: ptr to PTENxx array/Unchanged
   14266 #	a2: ptr to FP_SCR1(a6)/Unchanged
   14267 #	fp0: abs(YINT) adjusted/float(ILOG)
   14268 #	fp1: 10^ISCALE/Unchanged
   14269 #	fp2: 10^LEN/Unchanged
   14270 #	F_SCR1:Work area for final result/BCD result
   14271 #	F_SCR2:Y with original exponent/ILOG/10^4
   14272 #	L_SCR1:original USER_FPCR/Exponent digits on return from binstr
   14273 #	L_SCR2:first word of X packed/Unchanged
   14274 
   14275 A15_st:
   14276 	tst.b		BINDEC_FLG(%a6)	# check for denorm
   14277 	beq.b		not_denorm
   14278 	ftest.x		%fp0		# test for zero
   14279 	fbeq.w		den_zero	# if zero, use k-factor or 4933
   14280 	fmov.l		%d6,%fp0	# float ILOG
   14281 	fabs.x		%fp0		# get abs of ILOG
   14282 	bra.b		convrt
   14283 den_zero:
   14284 	tst.l		%d7		# check sign of the k-factor
   14285 	blt.b		use_ilog	# if negative, use ILOG
   14286 	fmov.s		F4933(%pc),%fp0	# force exponent to 4933
   14287 	bra.b		convrt		# do it
   14288 use_ilog:
   14289 	fmov.l		%d6,%fp0	# float ILOG
   14290 	fabs.x		%fp0		# get abs of ILOG
   14291 	bra.b		convrt
   14292 not_denorm:
   14293 	ftest.x		%fp0		# test for zero
   14294 	fbneq.w		not_zero	# if zero, force exponent
   14295 	fmov.s		FONE(%pc),%fp0	# force exponent to 1
   14296 	bra.b		convrt		# do it
   14297 not_zero:
   14298 	fmov.l		%d6,%fp0	# float ILOG
   14299 	fabs.x		%fp0		# get abs of ILOG
   14300 convrt:
   14301 	fdiv.x		24(%a1),%fp0	# compute ILOG/10^4
   14302 	fmov.x		%fp0,FP_SCR1(%a6)	# store fp0 in memory
   14303 	mov.l		4(%a2),%d2	# move word 2 to d2
   14304 	mov.l		8(%a2),%d3	# move word 3 to d3
   14305 	mov.w		(%a2),%d0	# move exp to d0
   14306 	beq.b		x_loop_fin	# if zero, skip the shift
   14307 	sub.w		&0x3ffd,%d0	# subtract off bias
   14308 	neg.w		%d0		# make exp positive
   14309 x_loop:
   14310 	lsr.l		&1,%d2		# shift d2:d3 right
   14311 	roxr.l		&1,%d3		# the number of places
   14312 	dbf.w		%d0,x_loop	# given in d0
   14313 x_loop_fin:
   14314 	clr.l		%d1		# put zero in d1 for addx
   14315 	add.l		&0x00000080,%d3	# inc at bit 6
   14316 	addx.l		%d1,%d2		# continue inc
   14317 	and.l		&0xffffff80,%d3	# strip off lsb not used by 882
   14318 	mov.l		&4,%d0		# put 4 in d0 for binstr call
   14319 	lea.l		L_SCR1(%a6),%a0	# a0 is ptr to L_SCR1 for exp digits
   14320 	bsr		binstr		# call binstr to convert exp
   14321 	mov.l		L_SCR1(%a6),%d0	# load L_SCR1 lword to d0
   14322 	mov.l		&12,%d1		# use d1 for shift count
   14323 	lsr.l		%d1,%d0		# shift d0 right by 12
   14324 	bfins		%d0,FP_SCR0(%a6){&4:&12}	# put e3:e2:e1 in FP_SCR0
   14325 	lsr.l		%d1,%d0		# shift d0 right by 12
   14326 	bfins		%d0,FP_SCR0(%a6){&16:&4}	# put e4 in FP_SCR0
   14327 	tst.b		%d0		# check if e4 is zero
   14328 	beq.b		A16_st		# if zero, skip rest
   14329 	or.l		&opaop_mask,USER_FPSR(%a6)	# set OPERR & AIOP in USER_FPSR
   14330 
   14331 
   14332 # A16. Write sign bits to final string.
   14333 #	   Sigma is bit 31 of initial value; RHO is bit 31 of d6 (ILOG).
   14334 #
   14335 # Register usage:
   14336 #	Input/Output
   14337 #	d0: x/scratch - final is x
   14338 #	d2: x/x
   14339 #	d3: x/x
   14340 #	d4: LEN/Unchanged
   14341 #	d5: ICTR:LAMBDA/LAMBDA:ICTR
   14342 #	d6: ILOG/ILOG adjusted
   14343 #	d7: k-factor/Unchanged
   14344 #	a0: ptr to L_SCR1(a6)/Unchanged
   14345 #	a1: ptr to PTENxx array/Unchanged
   14346 #	a2: ptr to FP_SCR1(a6)/Unchanged
   14347 #	fp0: float(ILOG)/Unchanged
   14348 #	fp1: 10^ISCALE/Unchanged
   14349 #	fp2: 10^LEN/Unchanged
   14350 #	F_SCR1:BCD result with correct signs
   14351 #	F_SCR2:ILOG/10^4
   14352 #	L_SCR1:Exponent digits on return from binstr
   14353 #	L_SCR2:first word of X packed/Unchanged
   14354 
   14355 A16_st:
   14356 	clr.l		%d0		# clr d0 for collection of signs
   14357 	and.b		&0x0f,FP_SCR0(%a6)	# clear first nibble of FP_SCR0
   14358 	tst.l		L_SCR2(%a6)	# check sign of original mantissa
   14359 	bge.b		mant_p		# if pos, don't set SM
   14360 	mov.l		&2,%d0		# move 2 in to d0 for SM
   14361 mant_p:
   14362 	tst.l		%d6		# check sign of ILOG
   14363 	bge.b		wr_sgn		# if pos, don't set SE
   14364 	addq.l		&1,%d0		# set bit 0 in d0 for SE
   14365 wr_sgn:
   14366 	bfins		%d0,FP_SCR0(%a6){&0:&2}	# insert SM and SE into FP_SCR0
   14367 
   14368 # Clean up and restore all registers used.
   14369 
   14370 	fmov.l		&0,%fpsr	# clear possible inex2/ainex bits
   14371 	fmovm.x		(%sp)+,&0xe0	#  {%fp0-%fp2}
   14372 	movm.l		(%sp)+,&0x4fc	#  {%d2-%d7/%a2}
   14373 	rts
   14374 
   14375 	global		PTENRN
   14376 PTENRN:
   14377 	long		0x40020000,0xA0000000,0x00000000	# 10 ^ 1
   14378 	long		0x40050000,0xC8000000,0x00000000	# 10 ^ 2
   14379 	long		0x400C0000,0x9C400000,0x00000000	# 10 ^ 4
   14380 	long		0x40190000,0xBEBC2000,0x00000000	# 10 ^ 8
   14381 	long		0x40340000,0x8E1BC9BF,0x04000000	# 10 ^ 16
   14382 	long		0x40690000,0x9DC5ADA8,0x2B70B59E	# 10 ^ 32
   14383 	long		0x40D30000,0xC2781F49,0xFFCFA6D5	# 10 ^ 64
   14384 	long		0x41A80000,0x93BA47C9,0x80E98CE0	# 10 ^ 128
   14385 	long		0x43510000,0xAA7EEBFB,0x9DF9DE8E	# 10 ^ 256
   14386 	long		0x46A30000,0xE319A0AE,0xA60E91C7	# 10 ^ 512
   14387 	long		0x4D480000,0xC9767586,0x81750C17	# 10 ^ 1024
   14388 	long		0x5A920000,0x9E8B3B5D,0xC53D5DE5	# 10 ^ 2048
   14389 	long		0x75250000,0xC4605202,0x8A20979B	# 10 ^ 4096
   14390 
   14391 	global		PTENRP
   14392 PTENRP:
   14393 	long		0x40020000,0xA0000000,0x00000000	# 10 ^ 1
   14394 	long		0x40050000,0xC8000000,0x00000000	# 10 ^ 2
   14395 	long		0x400C0000,0x9C400000,0x00000000	# 10 ^ 4
   14396 	long		0x40190000,0xBEBC2000,0x00000000	# 10 ^ 8
   14397 	long		0x40340000,0x8E1BC9BF,0x04000000	# 10 ^ 16
   14398 	long		0x40690000,0x9DC5ADA8,0x2B70B59E	# 10 ^ 32
   14399 	long		0x40D30000,0xC2781F49,0xFFCFA6D6	# 10 ^ 64
   14400 	long		0x41A80000,0x93BA47C9,0x80E98CE0	# 10 ^ 128
   14401 	long		0x43510000,0xAA7EEBFB,0x9DF9DE8E	# 10 ^ 256
   14402 	long		0x46A30000,0xE319A0AE,0xA60E91C7	# 10 ^ 512
   14403 	long		0x4D480000,0xC9767586,0x81750C18	# 10 ^ 1024
   14404 	long		0x5A920000,0x9E8B3B5D,0xC53D5DE5	# 10 ^ 2048
   14405 	long		0x75250000,0xC4605202,0x8A20979B	# 10 ^ 4096
   14406 
   14407 	global		PTENRM
   14408 PTENRM:
   14409 	long		0x40020000,0xA0000000,0x00000000	# 10 ^ 1
   14410 	long		0x40050000,0xC8000000,0x00000000	# 10 ^ 2
   14411 	long		0x400C0000,0x9C400000,0x00000000	# 10 ^ 4
   14412 	long		0x40190000,0xBEBC2000,0x00000000	# 10 ^ 8
   14413 	long		0x40340000,0x8E1BC9BF,0x04000000	# 10 ^ 16
   14414 	long		0x40690000,0x9DC5ADA8,0x2B70B59D	# 10 ^ 32
   14415 	long		0x40D30000,0xC2781F49,0xFFCFA6D5	# 10 ^ 64
   14416 	long		0x41A80000,0x93BA47C9,0x80E98CDF	# 10 ^ 128
   14417 	long		0x43510000,0xAA7EEBFB,0x9DF9DE8D	# 10 ^ 256
   14418 	long		0x46A30000,0xE319A0AE,0xA60E91C6	# 10 ^ 512
   14419 	long		0x4D480000,0xC9767586,0x81750C17	# 10 ^ 1024
   14420 	long		0x5A920000,0x9E8B3B5D,0xC53D5DE4	# 10 ^ 2048
   14421 	long		0x75250000,0xC4605202,0x8A20979A	# 10 ^ 4096
   14422 
   14423 #########################################################################
   14424 # binstr(): Converts a 64-bit binary integer to bcd.			#
   14425 #									#
   14426 # INPUT *************************************************************** #
   14427 #	d2:d3 = 64-bit binary integer					#
   14428 #	d0    = desired length (LEN)					#
   14429 #	a0    = pointer to start in memory for bcd characters		#
   14430 #          	(This pointer must point to byte 4 of the first		#
   14431 #          	 lword of the packed decimal memory string.)		#
   14432 #									#
   14433 # OUTPUT ************************************************************** #
   14434 #	a0 = pointer to LEN bcd digits representing the 64-bit integer.	#
   14435 #									#
   14436 # ALGORITHM ***********************************************************	#
   14437 #	The 64-bit binary is assumed to have a decimal point before	#
   14438 #	bit 63.  The fraction is multiplied by 10 using a mul by 2	#
   14439 #	shift and a mul by 8 shift.  The bits shifted out of the	#
   14440 #	msb form a decimal digit.  This process is iterated until	#
   14441 #	LEN digits are formed.						#
   14442 #									#
   14443 # A1. Init d7 to 1.  D7 is the byte digit counter, and if 1, the	#
   14444 #     digit formed will be assumed the least significant.  This is	#
   14445 #     to force the first byte formed to have a 0 in the upper 4 bits.	#
   14446 #									#
   14447 # A2. Beginning of the loop:						#
   14448 #     Copy the fraction in d2:d3 to d4:d5.				#
   14449 #									#
   14450 # A3. Multiply the fraction in d2:d3 by 8 using bit-field		#
   14451 #     extracts and shifts.  The three msbs from d2 will go into d1.	#
   14452 #									#
   14453 # A4. Multiply the fraction in d4:d5 by 2 using shifts.  The msb	#
   14454 #     will be collected by the carry.					#
   14455 #									#
   14456 # A5. Add using the carry the 64-bit quantities in d2:d3 and d4:d5	#
   14457 #     into d2:d3.  D1 will contain the bcd digit formed.		#
   14458 #									#
   14459 # A6. Test d7.  If zero, the digit formed is the ms digit.  If non-	#
   14460 #     zero, it is the ls digit.  Put the digit in its place in the	#
   14461 #     upper word of d0.  If it is the ls digit, write the word		#
   14462 #     from d0 to memory.						#
   14463 #									#
   14464 # A7. Decrement d6 (LEN counter) and repeat the loop until zero.	#
   14465 #									#
   14466 #########################################################################
   14467 
   14468 #	Implementation Notes:
   14469 #
   14470 #	The registers are used as follows:
   14471 #
   14472 #		d0: LEN counter
   14473 #		d1: temp used to form the digit
   14474 #		d2: upper 32-bits of fraction for mul by 8
   14475 #		d3: lower 32-bits of fraction for mul by 8
   14476 #		d4: upper 32-bits of fraction for mul by 2
   14477 #		d5: lower 32-bits of fraction for mul by 2
   14478 #		d6: temp for bit-field extracts
   14479 #		d7: byte digit formation word;digit count {0,1}
   14480 #		a0: pointer into memory for packed bcd string formation
   14481 #
   14482 
   14483 	global		binstr
   14484 binstr:
   14485 	movm.l		&0xff00,-(%sp)	#  {%d0-%d7}
   14486 
   14487 #
   14488 # A1: Init d7
   14489 #
   14490 	mov.l		&1,%d7		# init d7 for second digit
   14491 	subq.l		&1,%d0		# for dbf d0 would have LEN+1 passes
   14492 #
   14493 # A2. Copy d2:d3 to d4:d5.  Start loop.
   14494 #
   14495 loop:
   14496 	mov.l		%d2,%d4		# copy the fraction before muls
   14497 	mov.l		%d3,%d5		# to d4:d5
   14498 #
   14499 # A3. Multiply d2:d3 by 8; extract msbs into d1.
   14500 #
   14501 	bfextu		%d2{&0:&3},%d1	# copy 3 msbs of d2 into d1
   14502 	asl.l		&3,%d2		# shift d2 left by 3 places
   14503 	bfextu		%d3{&0:&3},%d6	# copy 3 msbs of d3 into d6
   14504 	asl.l		&3,%d3		# shift d3 left by 3 places
   14505 	or.l		%d6,%d2		# or in msbs from d3 into d2
   14506 #
   14507 # A4. Multiply d4:d5 by 2; add carry out to d1.
   14508 #
   14509 	asl.l		&1,%d5		# mul d5 by 2
   14510 	roxl.l		&1,%d4		# mul d4 by 2
   14511 	swap		%d6		# put 0 in d6 lower word
   14512 	addx.w		%d6,%d1		# add in extend from mul by 2
   14513 #
   14514 # A5. Add mul by 8 to mul by 2.  D1 contains the digit formed.
   14515 #
   14516 	add.l		%d5,%d3		# add lower 32 bits
   14517 	nop				# ERRATA FIX #13 (Rev. 1.2 6/6/90)
   14518 	addx.l		%d4,%d2		# add with extend upper 32 bits
   14519 	nop				# ERRATA FIX #13 (Rev. 1.2 6/6/90)
   14520 	addx.w		%d6,%d1		# add in extend from add to d1
   14521 	swap		%d6		# with d6 = 0; put 0 in upper word
   14522 #
   14523 # A6. Test d7 and branch.
   14524 #
   14525 	tst.w		%d7		# if zero, store digit & to loop
   14526 	beq.b		first_d		# if non-zero, form byte & write
   14527 sec_d:
   14528 	swap		%d7		# bring first digit to word d7b
   14529 	asl.w		&4,%d7		# first digit in upper 4 bits d7b
   14530 	add.w		%d1,%d7		# add in ls digit to d7b
   14531 	mov.b		%d7,(%a0)+	# store d7b byte in memory
   14532 	swap		%d7		# put LEN counter in word d7a
   14533 	clr.w		%d7		# set d7a to signal no digits done
   14534 	dbf.w		%d0,loop	# do loop some more!
   14535 	bra.b		end_bstr	# finished, so exit
   14536 first_d:
   14537 	swap		%d7		# put digit word in d7b
   14538 	mov.w		%d1,%d7		# put new digit in d7b
   14539 	swap		%d7		# put LEN counter in word d7a
   14540 	addq.w		&1,%d7		# set d7a to signal first digit done
   14541 	dbf.w		%d0,loop	# do loop some more!
   14542 	swap		%d7		# put last digit in string
   14543 	lsl.w		&4,%d7		# move it to upper 4 bits
   14544 	mov.b		%d7,(%a0)+	# store it in memory string
   14545 #
   14546 # Clean up and return with result in fp0.
   14547 #
   14548 end_bstr:
   14549 	movm.l		(%sp)+,&0xff	#  {%d0-%d7}
   14550 	rts
   14551 
   14552 #########################################################################
   14553 # XDEF ****************************************************************	#
   14554 #	facc_in_b(): dmem_read_byte failed				#
   14555 #	facc_in_w(): dmem_read_word failed				#
   14556 #	facc_in_l(): dmem_read_long failed				#
   14557 #	facc_in_d(): dmem_read of dbl prec failed			#
   14558 #	facc_in_x(): dmem_read of ext prec failed			#
   14559 #									#
   14560 #	facc_out_b(): dmem_write_byte failed				#
   14561 #	facc_out_w(): dmem_write_word failed				#
   14562 #	facc_out_l(): dmem_write_long failed				#
   14563 #	facc_out_d(): dmem_write of dbl prec failed			#
   14564 #	facc_out_x(): dmem_write of ext prec failed			#
   14565 #									#
   14566 # XREF ****************************************************************	#
   14567 #	_real_access() - exit through access error handler		#
   14568 #									#
   14569 # INPUT ***************************************************************	#
   14570 #	None								#
   14571 # 									#
   14572 # OUTPUT **************************************************************	#
   14573 #	None								#
   14574 #									#
   14575 # ALGORITHM ***********************************************************	#
   14576 # 	Flow jumps here when an FP data fetch call gets an error 	#
   14577 # result. This means the operating system wants an access error frame	#
   14578 # made out of the current exception stack frame. 			#
   14579 #	So, we first call restore() which makes sure that any updated	#
   14580 # -(an)+ register gets returned to its pre-exception value and then	#
   14581 # we change the stack to an acess error stack frame.			#
   14582 #									#
   14583 #########################################################################
   14584 
   14585 facc_in_b:
   14586 	movq.l		&0x1,%d0			# one byte
   14587 	bsr.w		restore				# fix An
   14588 
   14589 	mov.w		&0x0121,EXC_VOFF(%a6)		# set FSLW
   14590 	bra.w		facc_finish
   14591 
   14592 facc_in_w:
   14593 	movq.l		&0x2,%d0			# two bytes
   14594 	bsr.w		restore				# fix An
   14595 
   14596 	mov.w		&0x0141,EXC_VOFF(%a6)		# set FSLW
   14597 	bra.b		facc_finish
   14598 
   14599 facc_in_l:
   14600 	movq.l		&0x4,%d0			# four bytes
   14601 	bsr.w		restore				# fix An
   14602 
   14603 	mov.w		&0x0101,EXC_VOFF(%a6)		# set FSLW
   14604 	bra.b		facc_finish
   14605 
   14606 facc_in_d:
   14607 	movq.l		&0x8,%d0			# eight bytes
   14608 	bsr.w		restore				# fix An
   14609 
   14610 	mov.w		&0x0161,EXC_VOFF(%a6)		# set FSLW
   14611 	bra.b		facc_finish
   14612 
   14613 facc_in_x:
   14614 	movq.l		&0xc,%d0			# twelve bytes
   14615 	bsr.w		restore				# fix An
   14616 
   14617 	mov.w		&0x0161,EXC_VOFF(%a6)		# set FSLW
   14618 	bra.b		facc_finish
   14619 
   14620 ################################################################
   14621 
   14622 facc_out_b:
   14623 	movq.l		&0x1,%d0			# one byte
   14624 	bsr.w		restore				# restore An
   14625 
   14626 	mov.w		&0x00a1,EXC_VOFF(%a6)		# set FSLW
   14627 	bra.b		facc_finish
   14628 
   14629 facc_out_w:
   14630 	movq.l		&0x2,%d0			# two bytes
   14631 	bsr.w		restore				# restore An
   14632 
   14633 	mov.w		&0x00c1,EXC_VOFF(%a6)		# set FSLW
   14634 	bra.b		facc_finish
   14635 
   14636 facc_out_l:
   14637 	movq.l		&0x4,%d0			# four bytes
   14638 	bsr.w		restore				# restore An
   14639 
   14640 	mov.w		&0x0081,EXC_VOFF(%a6)		# set FSLW
   14641 	bra.b		facc_finish
   14642 
   14643 facc_out_d:
   14644 	movq.l		&0x8,%d0			# eight bytes
   14645 	bsr.w		restore				# restore An
   14646 
   14647 	mov.w		&0x00e1,EXC_VOFF(%a6)		# set FSLW
   14648 	bra.b		facc_finish
   14649 
   14650 facc_out_x:
   14651 	mov.l		&0xc,%d0			# twelve bytes
   14652 	bsr.w		restore				# restore An
   14653 
   14654 	mov.w		&0x00e1,EXC_VOFF(%a6)		# set FSLW
   14655 
   14656 # here's where we actually create the access error frame from the
   14657 # current exception stack frame.
   14658 facc_finish:
   14659 	mov.l		USER_FPIAR(%a6),EXC_PC(%a6) # store current PC
   14660 
   14661 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
   14662 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   14663 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   14664 
   14665 	unlk		%a6
   14666 
   14667 	mov.l		(%sp),-(%sp)		# store SR, hi(PC)
   14668 	mov.l		0x8(%sp),0x4(%sp)	# store lo(PC)
   14669 	mov.l		0xc(%sp),0x8(%sp)	# store EA
   14670 	mov.l		&0x00000001,0xc(%sp)	# store FSLW
   14671 	mov.w		0x6(%sp),0xc(%sp)	# fix FSLW (size)
   14672 	mov.w		&0x4008,0x6(%sp)	# store voff
   14673 
   14674 	btst		&0x5,(%sp)		# supervisor or user mode?
   14675 	beq.b		facc_out2		# user
   14676 	bset		&0x2,0xd(%sp)		# set supervisor TM bit
   14677 
   14678 facc_out2:
   14679 	bra.l		_real_access
   14680 
   14681 ##################################################################
   14682 
   14683 # if the effective addressing mode was predecrement or postincrement,
   14684 # the emulation has already changed its value to the correct post-
   14685 # instruction value. but since we're exiting to the access error
   14686 # handler, then AN must be returned to its pre-instruction value.
   14687 # we do that here.
   14688 restore:
   14689 	mov.b		EXC_OPWORD+0x1(%a6),%d1
   14690 	andi.b		&0x38,%d1		# extract opmode
   14691 	cmpi.b		%d1,&0x18		# postinc?
   14692 	beq.w		rest_inc
   14693 	cmpi.b		%d1,&0x20		# predec?
   14694 	beq.w		rest_dec
   14695 	rts
   14696 
   14697 rest_inc:
   14698 	mov.b		EXC_OPWORD+0x1(%a6),%d1
   14699 	andi.w		&0x0007,%d1		# fetch An
   14700 
   14701 	mov.w		(tbl_rest_inc.b,%pc,%d1.w*2),%d1
   14702 	jmp		(tbl_rest_inc.b,%pc,%d1.w*1)
   14703 
   14704 tbl_rest_inc:
   14705 	short		ri_a0 - tbl_rest_inc
   14706 	short		ri_a1 - tbl_rest_inc
   14707 	short		ri_a2 - tbl_rest_inc
   14708 	short		ri_a3 - tbl_rest_inc
   14709 	short		ri_a4 - tbl_rest_inc
   14710 	short		ri_a5 - tbl_rest_inc
   14711 	short		ri_a6 - tbl_rest_inc
   14712 	short		ri_a7 - tbl_rest_inc
   14713 
   14714 ri_a0:
   14715 	sub.l		%d0,EXC_DREGS+0x8(%a6)	# fix stacked a0
   14716 	rts
   14717 ri_a1:
   14718 	sub.l		%d0,EXC_DREGS+0xc(%a6)	# fix stacked a1
   14719 	rts
   14720 ri_a2:
   14721 	sub.l		%d0,%a2			# fix a2
   14722 	rts
   14723 ri_a3:
   14724 	sub.l		%d0,%a3			# fix a3
   14725 	rts
   14726 ri_a4:
   14727 	sub.l		%d0,%a4			# fix a4
   14728 	rts
   14729 ri_a5:
   14730 	sub.l		%d0,%a5			# fix a5
   14731 	rts
   14732 ri_a6:
   14733 	sub.l		%d0,(%a6)		# fix stacked a6
   14734 	rts
   14735 # if it's a fmove out instruction, we don't have to fix a7
   14736 # because we hadn't changed it yet. if it's an opclass two
   14737 # instruction (data moved in) and the exception was in supervisor
   14738 # mode, then also also wasn't updated. if it was user mode, then
   14739 # restore the correct a7 which is in the USP currently.
   14740 ri_a7:
   14741 	cmpi.b		EXC_VOFF(%a6),&0x30	# move in or out?
   14742 	bne.b		ri_a7_done		# out
   14743 
   14744 	btst		&0x5,EXC_SR(%a6)	# user or supervisor?
   14745 	bne.b		ri_a7_done		# supervisor
   14746 	movc		%usp,%a0		# restore USP
   14747 	sub.l		%d0,%a0
   14748 	movc		%a0,%usp
   14749 ri_a7_done:
   14750 	rts
   14751 
   14752 # need to invert adjustment value if the <ea> was predec
   14753 rest_dec:
   14754 	neg.l		%d0
   14755 	bra.b		rest_inc
   14756