Home | History | Annotate | Line # | Download | only in dist
fpsp.s revision 1.2
      1 #
      2 # $NetBSD: fpsp.s,v 1.2 2001/09/16 16:34:30 wiz Exp $
      3 #
      4 
      5 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
      6 # MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
      7 # M68000 Hi-Performance Microprocessor Division
      8 # M68060 Software Package Production Release
      9 #
     10 # M68060 Software Package Copyright (C) 1993, 1994, 1995, 1996 Motorola Inc.
     11 # All rights reserved.
     12 #
     13 # THE SOFTWARE is provided on an "AS IS" basis and without warranty.
     14 # To the maximum extent permitted by applicable law,
     15 # MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
     16 # INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS
     17 # FOR A PARTICULAR PURPOSE and any warranty against infringement with
     18 # regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
     19 # and any accompanying written materials.
     20 #
     21 # To the maximum extent permitted by applicable law,
     22 # IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
     23 # (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS,
     24 # BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS)
     25 # ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE.
     26 #
     27 # Motorola assumes no responsibility for the maintenance and support
     28 # of the SOFTWARE.
     29 #
     30 # You are hereby granted a copyright license to use, modify, and distribute the
     31 # SOFTWARE so long as this entire notice is retained without alteration
     32 # in any modified and/or redistributed versions, and that such modified
     33 # versions are clearly identified as such.
     34 # No licenses are granted by implication, estoppel or otherwise under any
     35 # patents or trademarks of Motorola, Inc.
     36 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     37 
     38 #
     39 # freal.s:
     40 #	This file is appended to the top of the 060FPSP package
     41 # and contains the entry points into the package. The user, in
     42 # effect, branches to one of the branch table entries located
     43 # after _060FPSP_TABLE.
     44 #	Also, subroutine stubs exist in this file (_fpsp_done for
     45 # example) that are referenced by the FPSP package itself in order
     46 # to call a given routine. The stub routine actually performs the
     47 # callout. The FPSP code does a "bsr" to the stub routine. This
     48 # extra layer of hierarchy adds a slight performance penalty but
     49 # it makes the FPSP code easier to read and more mainatinable.
     50 #
     51 
     52 set	_off_bsun,	0x00
     53 set	_off_snan,	0x04
     54 set	_off_operr,	0x08
     55 set	_off_ovfl,	0x0c
     56 set	_off_unfl,	0x10
     57 set	_off_dz,	0x14
     58 set	_off_inex,	0x18
     59 set	_off_fline,	0x1c
     60 set	_off_fpu_dis,	0x20
     61 set	_off_trap,	0x24
     62 set	_off_trace,	0x28
     63 set	_off_access,	0x2c
     64 set	_off_done,	0x30
     65 
     66 set	_off_imr,	0x40
     67 set	_off_dmr,	0x44
     68 set	_off_dmw,	0x48
     69 set	_off_irw,	0x4c
     70 set	_off_irl,	0x50
     71 set	_off_drb,	0x54
     72 set	_off_drw,	0x58
     73 set	_off_drl,	0x5c
     74 set	_off_dwb,	0x60
     75 set	_off_dww,	0x64
     76 set	_off_dwl,	0x68
     77 
     78 _060FPSP_TABLE:
     79 
     80 ###############################################################
     81 
     82 # Here's the table of ENTRY POINTS for those linking the package.
     83 	bra.l		_fpsp_snan
     84 	short		0x0000
     85 	bra.l		_fpsp_operr
     86 	short		0x0000
     87 	bra.l		_fpsp_ovfl
     88 	short		0x0000
     89 	bra.l		_fpsp_unfl
     90 	short		0x0000
     91 	bra.l		_fpsp_dz
     92 	short		0x0000
     93 	bra.l		_fpsp_inex
     94 	short		0x0000
     95 	bra.l		_fpsp_fline
     96 	short		0x0000
     97 	bra.l		_fpsp_unsupp
     98 	short		0x0000
     99 	bra.l		_fpsp_effadd
    100 	short		0x0000
    101 
    102 	space 		56
    103 
    104 ###############################################################
    105 	global		_fpsp_done
    106 _fpsp_done:
    107 	mov.l		%d0,-(%sp)
    108 	mov.l		(_060FPSP_TABLE-0x80+_off_done,%pc),%d0
    109 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    110 	mov.l		0x4(%sp),%d0
    111 	rtd		&0x4
    112 
    113 	global		_real_ovfl
    114 _real_ovfl:
    115 	mov.l		%d0,-(%sp)
    116 	mov.l		(_060FPSP_TABLE-0x80+_off_ovfl,%pc),%d0
    117 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    118 	mov.l		0x4(%sp),%d0
    119 	rtd		&0x4
    120 
    121 	global		_real_unfl
    122 _real_unfl:
    123 	mov.l		%d0,-(%sp)
    124 	mov.l		(_060FPSP_TABLE-0x80+_off_unfl,%pc),%d0
    125 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    126 	mov.l		0x4(%sp),%d0
    127 	rtd		&0x4
    128 
    129 	global		_real_inex
    130 _real_inex:
    131 	mov.l		%d0,-(%sp)
    132 	mov.l		(_060FPSP_TABLE-0x80+_off_inex,%pc),%d0
    133 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    134 	mov.l		0x4(%sp),%d0
    135 	rtd		&0x4
    136 
    137 	global		_real_bsun
    138 _real_bsun:
    139 	mov.l		%d0,-(%sp)
    140 	mov.l		(_060FPSP_TABLE-0x80+_off_bsun,%pc),%d0
    141 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    142 	mov.l		0x4(%sp),%d0
    143 	rtd		&0x4
    144 
    145 	global		_real_operr
    146 _real_operr:
    147 	mov.l		%d0,-(%sp)
    148 	mov.l		(_060FPSP_TABLE-0x80+_off_operr,%pc),%d0
    149 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    150 	mov.l		0x4(%sp),%d0
    151 	rtd		&0x4
    152 
    153 	global		_real_snan
    154 _real_snan:
    155 	mov.l		%d0,-(%sp)
    156 	mov.l		(_060FPSP_TABLE-0x80+_off_snan,%pc),%d0
    157 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    158 	mov.l		0x4(%sp),%d0
    159 	rtd		&0x4
    160 
    161 	global		_real_dz
    162 _real_dz:
    163 	mov.l		%d0,-(%sp)
    164 	mov.l		(_060FPSP_TABLE-0x80+_off_dz,%pc),%d0
    165 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    166 	mov.l		0x4(%sp),%d0
    167 	rtd		&0x4
    168 
    169 	global		_real_fline
    170 _real_fline:
    171 	mov.l		%d0,-(%sp)
    172 	mov.l		(_060FPSP_TABLE-0x80+_off_fline,%pc),%d0
    173 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    174 	mov.l		0x4(%sp),%d0
    175 	rtd		&0x4
    176 
    177 	global		_real_fpu_disabled
    178 _real_fpu_disabled:
    179 	mov.l		%d0,-(%sp)
    180 	mov.l		(_060FPSP_TABLE-0x80+_off_fpu_dis,%pc),%d0
    181 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    182 	mov.l		0x4(%sp),%d0
    183 	rtd		&0x4
    184 
    185 	global		_real_trap
    186 _real_trap:
    187 	mov.l		%d0,-(%sp)
    188 	mov.l		(_060FPSP_TABLE-0x80+_off_trap,%pc),%d0
    189 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    190 	mov.l		0x4(%sp),%d0
    191 	rtd		&0x4
    192 
    193 	global		_real_trace
    194 _real_trace:
    195 	mov.l		%d0,-(%sp)
    196 	mov.l		(_060FPSP_TABLE-0x80+_off_trace,%pc),%d0
    197 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    198 	mov.l		0x4(%sp),%d0
    199 	rtd		&0x4
    200 
    201 	global		_real_access
    202 _real_access:
    203 	mov.l		%d0,-(%sp)
    204 	mov.l		(_060FPSP_TABLE-0x80+_off_access,%pc),%d0
    205 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    206 	mov.l		0x4(%sp),%d0
    207 	rtd		&0x4
    208 
    209 #######################################
    210 
    211 	global		_imem_read
    212 _imem_read:
    213 	mov.l		%d0,-(%sp)
    214 	mov.l		(_060FPSP_TABLE-0x80+_off_imr,%pc),%d0
    215 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    216 	mov.l		0x4(%sp),%d0
    217 	rtd		&0x4
    218 
    219 	global		_dmem_read
    220 _dmem_read:
    221 	mov.l		%d0,-(%sp)
    222 	mov.l		(_060FPSP_TABLE-0x80+_off_dmr,%pc),%d0
    223 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    224 	mov.l		0x4(%sp),%d0
    225 	rtd		&0x4
    226 
    227 	global		_dmem_write
    228 _dmem_write:
    229 	mov.l		%d0,-(%sp)
    230 	mov.l		(_060FPSP_TABLE-0x80+_off_dmw,%pc),%d0
    231 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    232 	mov.l		0x4(%sp),%d0
    233 	rtd		&0x4
    234 
    235 	global		_imem_read_word
    236 _imem_read_word:
    237 	mov.l		%d0,-(%sp)
    238 	mov.l		(_060FPSP_TABLE-0x80+_off_irw,%pc),%d0
    239 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    240 	mov.l		0x4(%sp),%d0
    241 	rtd		&0x4
    242 
    243 	global		_imem_read_long
    244 _imem_read_long:
    245 	mov.l		%d0,-(%sp)
    246 	mov.l		(_060FPSP_TABLE-0x80+_off_irl,%pc),%d0
    247 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    248 	mov.l		0x4(%sp),%d0
    249 	rtd		&0x4
    250 
    251 	global		_dmem_read_byte
    252 _dmem_read_byte:
    253 	mov.l		%d0,-(%sp)
    254 	mov.l		(_060FPSP_TABLE-0x80+_off_drb,%pc),%d0
    255 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    256 	mov.l		0x4(%sp),%d0
    257 	rtd		&0x4
    258 
    259 	global		_dmem_read_word
    260 _dmem_read_word:
    261 	mov.l		%d0,-(%sp)
    262 	mov.l		(_060FPSP_TABLE-0x80+_off_drw,%pc),%d0
    263 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    264 	mov.l		0x4(%sp),%d0
    265 	rtd		&0x4
    266 
    267 	global		_dmem_read_long
    268 _dmem_read_long:
    269 	mov.l		%d0,-(%sp)
    270 	mov.l		(_060FPSP_TABLE-0x80+_off_drl,%pc),%d0
    271 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    272 	mov.l		0x4(%sp),%d0
    273 	rtd		&0x4
    274 
    275 	global		_dmem_write_byte
    276 _dmem_write_byte:
    277 	mov.l		%d0,-(%sp)
    278 	mov.l		(_060FPSP_TABLE-0x80+_off_dwb,%pc),%d0
    279 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    280 	mov.l		0x4(%sp),%d0
    281 	rtd		&0x4
    282 
    283 	global		_dmem_write_word
    284 _dmem_write_word:
    285 	mov.l		%d0,-(%sp)
    286 	mov.l		(_060FPSP_TABLE-0x80+_off_dww,%pc),%d0
    287 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    288 	mov.l		0x4(%sp),%d0
    289 	rtd		&0x4
    290 
    291 	global		_dmem_write_long
    292 _dmem_write_long:
    293 	mov.l		%d0,-(%sp)
    294 	mov.l		(_060FPSP_TABLE-0x80+_off_dwl,%pc),%d0
    295 	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    296 	mov.l		0x4(%sp),%d0
    297 	rtd		&0x4
    298 
    299 #
    300 # This file contains a set of define statements for constants
    301 # in order to promote readability within the corecode itself.
    302 #
    303 
    304 set LOCAL_SIZE,		192			# stack frame size(bytes)
    305 set LV,			-LOCAL_SIZE		# stack offset
    306 
    307 set EXC_SR,		0x4			# stack status register
    308 set EXC_PC,		0x6			# stack pc
    309 set EXC_VOFF,		0xa			# stacked vector offset
    310 set EXC_EA,		0xc			# stacked <ea>
    311 
    312 set EXC_FP,		0x0			# frame pointer
    313 
    314 set EXC_AREGS,		-68			# offset of all address regs
    315 set EXC_DREGS,		-100			# offset of all data regs
    316 set EXC_FPREGS,		-36			# offset of all fp regs
    317 
    318 set EXC_A7,		EXC_AREGS+(7*4)		# offset of saved a7
    319 set OLD_A7,		EXC_AREGS+(6*4)		# extra copy of saved a7
    320 set EXC_A6,		EXC_AREGS+(6*4)		# offset of saved a6
    321 set EXC_A5,		EXC_AREGS+(5*4)
    322 set EXC_A4,		EXC_AREGS+(4*4)
    323 set EXC_A3,		EXC_AREGS+(3*4)
    324 set EXC_A2,		EXC_AREGS+(2*4)
    325 set EXC_A1,		EXC_AREGS+(1*4)
    326 set EXC_A0,		EXC_AREGS+(0*4)
    327 set EXC_D7,		EXC_DREGS+(7*4)
    328 set EXC_D6,		EXC_DREGS+(6*4)
    329 set EXC_D5,		EXC_DREGS+(5*4)
    330 set EXC_D4,		EXC_DREGS+(4*4)
    331 set EXC_D3,		EXC_DREGS+(3*4)
    332 set EXC_D2,		EXC_DREGS+(2*4)
    333 set EXC_D1,		EXC_DREGS+(1*4)
    334 set EXC_D0,		EXC_DREGS+(0*4)
    335 
    336 set EXC_FP0, 		EXC_FPREGS+(0*12)	# offset of saved fp0
    337 set EXC_FP1, 		EXC_FPREGS+(1*12)	# offset of saved fp1
    338 set EXC_FP2, 		EXC_FPREGS+(2*12)	# offset of saved fp2 (not used)
    339 
    340 set FP_SCR1, 		LV+80			# fp scratch 1
    341 set FP_SCR1_EX, 	FP_SCR1+0
    342 set FP_SCR1_SGN,	FP_SCR1+2
    343 set FP_SCR1_HI, 	FP_SCR1+4
    344 set FP_SCR1_LO, 	FP_SCR1+8
    345 
    346 set FP_SCR0, 		LV+68			# fp scratch 0
    347 set FP_SCR0_EX, 	FP_SCR0+0
    348 set FP_SCR0_SGN,	FP_SCR0+2
    349 set FP_SCR0_HI, 	FP_SCR0+4
    350 set FP_SCR0_LO, 	FP_SCR0+8
    351 
    352 set FP_DST, 		LV+56			# fp destination operand
    353 set FP_DST_EX, 		FP_DST+0
    354 set FP_DST_SGN,		FP_DST+2
    355 set FP_DST_HI, 		FP_DST+4
    356 set FP_DST_LO, 		FP_DST+8
    357 
    358 set FP_SRC, 		LV+44			# fp source operand
    359 set FP_SRC_EX, 		FP_SRC+0
    360 set FP_SRC_SGN,		FP_SRC+2
    361 set FP_SRC_HI, 		FP_SRC+4
    362 set FP_SRC_LO, 		FP_SRC+8
    363 
    364 set USER_FPIAR,		LV+40			# FP instr address register
    365 
    366 set USER_FPSR,		LV+36			# FP status register
    367 set FPSR_CC,		USER_FPSR+0		# FPSR condition codes
    368 set FPSR_QBYTE,		USER_FPSR+1		# FPSR qoutient byte
    369 set FPSR_EXCEPT,	USER_FPSR+2		# FPSR exception status byte
    370 set FPSR_AEXCEPT,	USER_FPSR+3		# FPSR accrued exception byte
    371 
    372 set USER_FPCR,		LV+32			# FP control register
    373 set FPCR_ENABLE,	USER_FPCR+2		# FPCR exception enable
    374 set FPCR_MODE,		USER_FPCR+3		# FPCR rounding mode control
    375 
    376 set L_SCR3,		LV+28			# integer scratch 3
    377 set L_SCR2,		LV+24			# integer scratch 2
    378 set L_SCR1,		LV+20			# integer scratch 1
    379 
    380 set STORE_FLG,		LV+19			# flag: operand store (ie. not fcmp/ftst)
    381 
    382 set EXC_TEMP2,		LV+24			# temporary space
    383 set EXC_TEMP,		LV+16			# temporary space
    384 
    385 set DTAG,		LV+15			# destination operand type
    386 set STAG, 		LV+14			# source operand type
    387 
    388 set SPCOND_FLG,		LV+10			# flag: special case (see below)
    389 
    390 set EXC_CC,		LV+8			# saved condition codes
    391 set EXC_EXTWPTR,	LV+4			# saved current PC (active)
    392 set EXC_EXTWORD,	LV+2			# saved extension word
    393 set EXC_CMDREG,		LV+2			# saved extension word
    394 set EXC_OPWORD,		LV+0			# saved operation word
    395 
    396 ################################
    397 
    398 # Helpful macros
    399 
    400 set FTEMP,		0			# offsets within an
    401 set FTEMP_EX, 		0			# extended precision
    402 set FTEMP_SGN,		2			# value saved in memory.
    403 set FTEMP_HI, 		4
    404 set FTEMP_LO, 		8
    405 set FTEMP_GRS,		12
    406 
    407 set LOCAL,		0			# offsets within an
    408 set LOCAL_EX, 		0			# extended precision
    409 set LOCAL_SGN,		2			# value saved in memory.
    410 set LOCAL_HI, 		4
    411 set LOCAL_LO, 		8
    412 set LOCAL_GRS,		12
    413 
    414 set DST,		0			# offsets within an
    415 set DST_EX,		0			# extended precision
    416 set DST_HI,		4			# value saved in memory.
    417 set DST_LO,		8
    418 
    419 set SRC,		0			# offsets within an
    420 set SRC_EX,		0			# extended precision
    421 set SRC_HI,		4			# value saved in memory.
    422 set SRC_LO,		8
    423 
    424 set SGL_LO,		0x3f81			# min sgl prec exponent
    425 set SGL_HI,		0x407e			# max sgl prec exponent
    426 set DBL_LO,		0x3c01			# min dbl prec exponent
    427 set DBL_HI,		0x43fe			# max dbl prec exponent
    428 set EXT_LO,		0x0			# min ext prec exponent
    429 set EXT_HI,		0x7ffe			# max ext prec exponent
    430 
    431 set EXT_BIAS,		0x3fff			# extended precision bias
    432 set SGL_BIAS,		0x007f			# single precision bias
    433 set DBL_BIAS,		0x03ff			# double precision bias
    434 
    435 set NORM,		0x00			# operand type for STAG/DTAG
    436 set ZERO,		0x01			# operand type for STAG/DTAG
    437 set INF,		0x02			# operand type for STAG/DTAG
    438 set QNAN,		0x03			# operand type for STAG/DTAG
    439 set DENORM,		0x04			# operand type for STAG/DTAG
    440 set SNAN,		0x05			# operand type for STAG/DTAG
    441 set UNNORM,		0x06			# operand type for STAG/DTAG
    442 
    443 ##################
    444 # FPSR/FPCR bits #
    445 ##################
    446 set neg_bit,		0x3			# negative result
    447 set z_bit,		0x2			# zero result
    448 set inf_bit,		0x1			# infinite result
    449 set nan_bit,		0x0			# NAN result
    450 
    451 set q_sn_bit,		0x7			# sign bit of quotient byte
    452 
    453 set bsun_bit,		7			# branch on unordered
    454 set snan_bit,		6			# signalling NAN
    455 set operr_bit,		5			# operand error
    456 set ovfl_bit,		4			# overflow
    457 set unfl_bit,		3			# underflow
    458 set dz_bit,		2			# divide by zero
    459 set inex2_bit,		1			# inexact result 2
    460 set inex1_bit,		0			# inexact result 1
    461 
    462 set aiop_bit,		7			# accrued inexact operation bit
    463 set aovfl_bit,		6			# accrued overflow bit
    464 set aunfl_bit,		5			# accrued underflow bit
    465 set adz_bit,		4			# accrued dz bit
    466 set ainex_bit,		3			# accrued inexact bit
    467 
    468 #############################
    469 # FPSR individual bit masks #
    470 #############################
    471 set neg_mask,		0x08000000		# negative bit mask (lw)
    472 set inf_mask,		0x02000000		# infinity bit mask (lw)
    473 set z_mask,		0x04000000		# zero bit mask (lw)
    474 set nan_mask,		0x01000000		# nan bit mask (lw)
    475 
    476 set neg_bmask,		0x08			# negative bit mask (byte)
    477 set inf_bmask,		0x02			# infinity bit mask (byte)
    478 set z_bmask,		0x04			# zero bit mask (byte)
    479 set nan_bmask,		0x01			# nan bit mask (byte)
    480 
    481 set bsun_mask,		0x00008000		# bsun exception mask
    482 set snan_mask,		0x00004000		# snan exception mask
    483 set operr_mask,		0x00002000		# operr exception mask
    484 set ovfl_mask,		0x00001000		# overflow exception mask
    485 set unfl_mask,		0x00000800		# underflow exception mask
    486 set dz_mask,		0x00000400		# dz exception mask
    487 set inex2_mask,		0x00000200		# inex2 exception mask
    488 set inex1_mask,		0x00000100		# inex1 exception mask
    489 
    490 set aiop_mask,		0x00000080		# accrued illegal operation
    491 set aovfl_mask,		0x00000040		# accrued overflow
    492 set aunfl_mask,		0x00000020		# accrued underflow
    493 set adz_mask,		0x00000010		# accrued divide by zero
    494 set ainex_mask,		0x00000008		# accrued inexact
    495 
    496 ######################################
    497 # FPSR combinations used in the FPSP #
    498 ######################################
    499 set dzinf_mask,		inf_mask+dz_mask+adz_mask
    500 set opnan_mask,		nan_mask+operr_mask+aiop_mask
    501 set nzi_mask,		0x01ffffff 		#clears N, Z, and I
    502 set unfinx_mask,	unfl_mask+inex2_mask+aunfl_mask+ainex_mask
    503 set unf2inx_mask,	unfl_mask+inex2_mask+ainex_mask
    504 set ovfinx_mask,	ovfl_mask+inex2_mask+aovfl_mask+ainex_mask
    505 set inx1a_mask,		inex1_mask+ainex_mask
    506 set inx2a_mask,		inex2_mask+ainex_mask
    507 set snaniop_mask, 	nan_mask+snan_mask+aiop_mask
    508 set snaniop2_mask,	snan_mask+aiop_mask
    509 set naniop_mask,	nan_mask+aiop_mask
    510 set neginf_mask,	neg_mask+inf_mask
    511 set infaiop_mask, 	inf_mask+aiop_mask
    512 set negz_mask,		neg_mask+z_mask
    513 set opaop_mask,		operr_mask+aiop_mask
    514 set unfl_inx_mask,	unfl_mask+aunfl_mask+ainex_mask
    515 set ovfl_inx_mask,	ovfl_mask+aovfl_mask+ainex_mask
    516 
    517 #########
    518 # misc. #
    519 #########
    520 set rnd_stky_bit,	29			# stky bit pos in longword
    521 
    522 set sign_bit,		0x7			# sign bit
    523 set signan_bit,		0x6			# signalling nan bit
    524 
    525 set sgl_thresh,		0x3f81			# minimum sgl exponent
    526 set dbl_thresh,		0x3c01			# minimum dbl exponent
    527 
    528 set x_mode,		0x0			# extended precision
    529 set s_mode,		0x4			# single precision
    530 set d_mode,		0x8			# double precision
    531 
    532 set rn_mode,		0x0			# round-to-nearest
    533 set rz_mode,		0x1			# round-to-zero
    534 set rm_mode,		0x2			# round-tp-minus-infinity
    535 set rp_mode,		0x3			# round-to-plus-infinity
    536 
    537 set mantissalen,	64			# length of mantissa in bits
    538 
    539 set BYTE,		1			# len(byte) == 1 byte
    540 set WORD, 		2			# len(word) == 2 bytes
    541 set LONG, 		4			# len(longword) == 2 bytes
    542 
    543 set BSUN_VEC,		0xc0			# bsun    vector offset
    544 set INEX_VEC,		0xc4			# inexact vector offset
    545 set DZ_VEC,		0xc8			# dz      vector offset
    546 set UNFL_VEC,		0xcc			# unfl    vector offset
    547 set OPERR_VEC,		0xd0			# operr   vector offset
    548 set OVFL_VEC,		0xd4			# ovfl    vector offset
    549 set SNAN_VEC,		0xd8			# snan    vector offset
    550 
    551 ###########################
    552 # SPecial CONDition FLaGs #
    553 ###########################
    554 set ftrapcc_flg,	0x01			# flag bit: ftrapcc exception
    555 set fbsun_flg,		0x02			# flag bit: bsun exception
    556 set mia7_flg,		0x04			# flag bit: (a7)+ <ea>
    557 set mda7_flg,		0x08			# flag bit: -(a7) <ea>
    558 set fmovm_flg,		0x40			# flag bit: fmovm instruction
    559 set immed_flg,		0x80			# flag bit: &<data> <ea>
    560 
    561 set ftrapcc_bit,	0x0
    562 set fbsun_bit,		0x1
    563 set mia7_bit,		0x2
    564 set mda7_bit,		0x3
    565 set immed_bit,		0x7
    566 
    567 ##################################
    568 # TRANSCENDENTAL "LAST-OP" FLAGS #
    569 ##################################
    570 set FMUL_OP,		0x0			# fmul instr performed last
    571 set FDIV_OP,		0x1			# fdiv performed last
    572 set FADD_OP,		0x2			# fadd performed last
    573 set FMOV_OP,		0x3			# fmov performed last
    574 
    575 #############
    576 # CONSTANTS #
    577 #############
    578 T1:	long		0x40C62D38,0xD3D64634	# 16381 LOG2 LEAD
    579 T2:	long		0x3D6F90AE,0xB1E75CC7	# 16381 LOG2 TRAIL
    580 
    581 PI:	long		0x40000000,0xC90FDAA2,0x2168C235,0x00000000
    582 PIBY2:	long		0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
    583 
    584 TWOBYPI:
    585 	long		0x3FE45F30,0x6DC9C883
    586 
    587 #########################################################################
    588 # XDEF ****************************************************************	#
    589 #	_fpsp_ovfl(): 060FPSP entry point for FP Overflow exception.	#
    590 #									#
    591 #	This handler should be the first code executed upon taking the	#
    592 #	FP Overflow exception in an operating system.			#
    593 #									#
    594 # XREF ****************************************************************	#
    595 #	_imem_read_long() - read instruction longword			#
    596 #	fix_skewed_ops() - adjust src operand in fsave frame		#
    597 #	set_tag_x() - determine optype of src/dst operands		#
    598 #	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
    599 #	unnorm_fix() - change UNNORM operands to NORM or ZERO		#
    600 #	load_fpn2() - load dst operand from FP regfile			#
    601 #	fout() - emulate an opclass 3 instruction			#
    602 #	tbl_unsupp - add of table of emulation routines for opclass 0,2	#
    603 #	_fpsp_done() - "callout" for 060FPSP exit (all work done!)	#
    604 #	_real_ovfl() - "callout" for Overflow exception enabled code	#
    605 #	_real_inex() - "callout" for Inexact exception enabled code	#
    606 #	_real_trace() - "callout" for Trace exception code		#
    607 #									#
    608 # INPUT ***************************************************************	#
    609 #	- The system stack contains the FP Ovfl exception stack frame	#
    610 #	- The fsave frame contains the source operand			#
    611 # 									#
    612 # OUTPUT **************************************************************	#
    613 #	Overflow Exception enabled:					#
    614 #	- The system stack is unchanged					#
    615 #	- The fsave frame contains the adjusted src op for opclass 0,2	#
    616 #	Overflow Exception disabled:					#
    617 #	- The system stack is unchanged					#
    618 #	- The "exception present" flag in the fsave frame is cleared	#
    619 #									#
    620 # ALGORITHM ***********************************************************	#
    621 #	On the 060, if an FP overflow is present as the result of any	#
    622 # instruction, the 060 will take an overflow exception whether the 	#
    623 # exception is enabled or disabled in the FPCR. For the disabled case, 	#
    624 # This handler emulates the instruction to determine what the correct	#
    625 # default result should be for the operation. This default result is	#
    626 # then stored in either the FP regfile, data regfile, or memory. 	#
    627 # Finally, the handler exits through the "callout" _fpsp_done() 	#
    628 # denoting that no exceptional conditions exist within the machine.	#
    629 # 	If the exception is enabled, then this handler must create the	#
    630 # exceptional operand and plave it in the fsave state frame, and store	#
    631 # the default result (only if the instruction is opclass 3). For 	#
    632 # exceptions enabled, this handler must exit through the "callout" 	#
    633 # _real_ovfl() so that the operating system enabled overflow handler	#
    634 # can handle this case.							#
    635 #	Two other conditions exist. First, if overflow was disabled 	#
    636 # but the inexact exception was enabled, this handler must exit 	#
    637 # through the "callout" _real_inex() regardless of whether the result	#
    638 # was inexact.								#
    639 #	Also, in the case of an opclass three instruction where 	#
    640 # overflow was disabled and the trace exception was enabled, this	#
    641 # handler must exit through the "callout" _real_trace().		#
    642 #									#
    643 #########################################################################
    644 
    645 	global		_fpsp_ovfl
    646 _fpsp_ovfl:
    647 
    648 #$#	sub.l		&24,%sp			# make room for src/dst
    649 
    650 	link.w		%a6,&-LOCAL_SIZE	# init stack frame
    651 
    652 	fsave		FP_SRC(%a6)		# grab the "busy" frame
    653 
    654  	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
    655 	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
    656  	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
    657 
    658 # the FPIAR holds the "current PC" of the faulting instruction
    659 	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
    660 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
    661 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
    662 	bsr.l		_imem_read_long		# fetch the instruction words
    663 	mov.l		%d0,EXC_OPWORD(%a6)
    664 
    665 ##############################################################################
    666 
    667 	btst		&0x5,EXC_CMDREG(%a6)	# is instr an fmove out?
    668 	bne.w		fovfl_out
    669 
    670 
    671 	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
    672 	bsr.l		fix_skewed_ops		# fix src op
    673 
    674 # since, I believe, only NORMs and DENORMs can come through here,
    675 # maybe we can avoid the subroutine call.
    676 	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
    677 	bsr.l		set_tag_x		# tag the operand type
    678 	mov.b		%d0,STAG(%a6)		# maybe NORM,DENORM
    679 
    680 # bit five of the fp extension word separates the monadic and dyadic operations
    681 # that can pass through fpsp_ovfl(). remember that fcmp, ftst, and fsincos
    682 # will never take this exception.
    683 	btst		&0x5,1+EXC_CMDREG(%a6)	# is operation monadic or dyadic?
    684 	beq.b		fovfl_extract		# monadic
    685 
    686 	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
    687 	bsr.l		load_fpn2		# load dst into FP_DST
    688 
    689 	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
    690 	bsr.l		set_tag_x		# tag the operand type
    691 	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
    692 	bne.b		fovfl_op2_done		# no
    693 	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
    694 fovfl_op2_done:
    695 	mov.b		%d0,DTAG(%a6)		# save dst optype tag
    696 
    697 fovfl_extract:
    698 
    699 #$#	mov.l		FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
    700 #$#	mov.l		FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
    701 #$#	mov.l		FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
    702 #$#	mov.l		FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6)
    703 #$#	mov.l		FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6)
    704 #$#	mov.l		FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6)
    705 
    706 	clr.l		%d0
    707 	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec/mode
    708 
    709 	mov.b		1+EXC_CMDREG(%a6),%d1
    710 	andi.w		&0x007f,%d1		# extract extension
    711 
    712 	andi.l		&0x00ff01ff,USER_FPSR(%a6) # zero all but accured field
    713 
    714 	fmov.l		&0x0,%fpcr		# zero current control regs
    715 	fmov.l		&0x0,%fpsr
    716 
    717 	lea		FP_SRC(%a6),%a0
    718 	lea		FP_DST(%a6),%a1
    719 
    720 # maybe we can make these entry points ONLY the OVFL entry points of each routine.
    721 	mov.l		(tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
    722 	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
    723 
    724 # the operation has been emulated. the result is in fp0.
    725 # the EXOP, if an exception occurred, is in fp1.
    726 # we must save the default result regardless of whether
    727 # traps are enabled or disabled.
    728 	bfextu		EXC_CMDREG(%a6){&6:&3},%d0
    729 	bsr.l		store_fpreg
    730 
    731 # the exceptional possibilities we have left ourselves with are ONLY overflow
    732 # and inexact. and, the inexact is such that overflow occurred and was disabled
    733 # but inexact was enabled.
    734 	btst		&ovfl_bit,FPCR_ENABLE(%a6)
    735 	bne.b		fovfl_ovfl_on
    736 
    737 	btst		&inex2_bit,FPCR_ENABLE(%a6)
    738 	bne.b		fovfl_inex_on
    739 
    740 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
    741 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
    742 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
    743 
    744 	unlk		%a6
    745 #$#	add.l		&24,%sp
    746 	bra.l		_fpsp_done
    747 
    748 # overflow is enabled AND overflow, of course, occurred. so, we have the EXOP
    749 # in fp1. now, simply jump to _real_ovfl()!
    750 fovfl_ovfl_on:
    751 	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP (fp1) to stack
    752 
    753 	mov.w		&0xe005,2+FP_SRC(%a6) 	# save exc status
    754 
    755 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
    756 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
    757 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
    758 
    759 	frestore	FP_SRC(%a6)		# do this after fmovm,other f<op>s!
    760 
    761 	unlk		%a6
    762 
    763 	bra.l		_real_ovfl
    764 
    765 # overflow occurred but is disabled. meanwhile, inexact is enabled. therefore,
    766 # we must jump to real_inex().
    767 fovfl_inex_on:
    768 
    769 	fmovm.x		&0x40,FP_SRC(%a6) 	# save EXOP (fp1) to stack
    770 
    771 	mov.b		&0xc4,1+EXC_VOFF(%a6)	# vector offset = 0xc4
    772 	mov.w		&0xe001,2+FP_SRC(%a6) 	# save exc status
    773 
    774 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
    775 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
    776 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
    777 
    778 	frestore	FP_SRC(%a6)		# do this after fmovm,other f<op>s!
    779 
    780 	unlk		%a6
    781 
    782 	bra.l		_real_inex
    783 
    784 ########################################################################
    785 fovfl_out:
    786 
    787 
    788 #$#	mov.l		FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
    789 #$#	mov.l		FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
    790 #$#	mov.l		FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
    791 
    792 # the src operand is definitely a NORM(!), so tag it as such
    793 	mov.b		&NORM,STAG(%a6)		# set src optype tag
    794 
    795 	clr.l		%d0
    796 	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec/mode
    797 
    798 	and.l		&0xffff00ff,USER_FPSR(%a6) # zero all but accured field
    799 
    800 	fmov.l		&0x0,%fpcr		# zero current control regs
    801 	fmov.l		&0x0,%fpsr
    802 
    803 	lea		FP_SRC(%a6),%a0		# pass ptr to src operand
    804 
    805 	bsr.l		fout
    806 
    807 	btst		&ovfl_bit,FPCR_ENABLE(%a6)
    808 	bne.w		fovfl_ovfl_on
    809 
    810 	btst		&inex2_bit,FPCR_ENABLE(%a6)
    811 	bne.w		fovfl_inex_on
    812 
    813 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
    814 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
    815 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
    816 
    817 	unlk		%a6
    818 #$#	add.l		&24,%sp
    819 
    820 	btst		&0x7,(%sp)		# is trace on?
    821 	beq.l		_fpsp_done		# no
    822 
    823 	fmov.l		%fpiar,0x8(%sp)		# "Current PC" is in FPIAR
    824 	mov.w		&0x2024,0x6(%sp)	# stk fmt = 0x2; voff = 0x024
    825 	bra.l		_real_trace
    826 
    827 #########################################################################
    828 # XDEF ****************************************************************	#
    829 #	_fpsp_unfl(): 060FPSP entry point for FP Underflow exception.	#
    830 #									#
    831 #	This handler should be the first code executed upon taking the	#
    832 #	FP Underflow exception in an operating system.			#
    833 #									#
    834 # XREF ****************************************************************	#
    835 #	_imem_read_long() - read instruction longword			#
    836 #	fix_skewed_ops() - adjust src operand in fsave frame		#
    837 #	set_tag_x() - determine optype of src/dst operands		#
    838 #	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
    839 #	unnorm_fix() - change UNNORM operands to NORM or ZERO		#
    840 #	load_fpn2() - load dst operand from FP regfile			#
    841 #	fout() - emulate an opclass 3 instruction			#
    842 #	tbl_unsupp - add of table of emulation routines for opclass 0,2	#
    843 #	_fpsp_done() - "callout" for 060FPSP exit (all work done!)	#
    844 #	_real_ovfl() - "callout" for Overflow exception enabled code	#
    845 #	_real_inex() - "callout" for Inexact exception enabled code	#
    846 #	_real_trace() - "callout" for Trace exception code		#
    847 #									#
    848 # INPUT ***************************************************************	#
    849 #	- The system stack contains the FP Unfl exception stack frame	#
    850 #	- The fsave frame contains the source operand			#
    851 # 									#
    852 # OUTPUT **************************************************************	#
    853 #	Underflow Exception enabled:					#
    854 #	- The system stack is unchanged					#
    855 #	- The fsave frame contains the adjusted src op for opclass 0,2	#
    856 #	Underflow Exception disabled:					#
    857 #	- The system stack is unchanged					#
    858 #	- The "exception present" flag in the fsave frame is cleared	#
    859 #									#
    860 # ALGORITHM ***********************************************************	#
    861 #	On the 060, if an FP underflow is present as the result of any	#
    862 # instruction, the 060 will take an underflow exception whether the 	#
    863 # exception is enabled or disabled in the FPCR. For the disabled case, 	#
    864 # This handler emulates the instruction to determine what the correct	#
    865 # default result should be for the operation. This default result is	#
    866 # then stored in either the FP regfile, data regfile, or memory. 	#
    867 # Finally, the handler exits through the "callout" _fpsp_done() 	#
    868 # denoting that no exceptional conditions exist within the machine.	#
    869 # 	If the exception is enabled, then this handler must create the	#
    870 # exceptional operand and plave it in the fsave state frame, and store	#
    871 # the default result (only if the instruction is opclass 3). For 	#
    872 # exceptions enabled, this handler must exit through the "callout" 	#
    873 # _real_unfl() so that the operating system enabled overflow handler	#
    874 # can handle this case.							#
    875 #	Two other conditions exist. First, if underflow was disabled 	#
    876 # but the inexact exception was enabled and the result was inexact, 	#
    877 # this handler must exit through the "callout" _real_inex().		#
    878 # was inexact.								#
    879 #	Also, in the case of an opclass three instruction where 	#
    880 # underflow was disabled and the trace exception was enabled, this	#
    881 # handler must exit through the "callout" _real_trace().		#
    882 #									#
    883 #########################################################################
    884 
    885 	global		_fpsp_unfl
    886 _fpsp_unfl:
    887 
    888 #$#	sub.l		&24,%sp			# make room for src/dst
    889 
    890 	link.w		%a6,&-LOCAL_SIZE	# init stack frame
    891 
    892 	fsave		FP_SRC(%a6)		# grab the "busy" frame
    893 
    894  	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
    895 	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
    896  	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
    897 
    898 # the FPIAR holds the "current PC" of the faulting instruction
    899 	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
    900 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
    901 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
    902 	bsr.l		_imem_read_long		# fetch the instruction words
    903 	mov.l		%d0,EXC_OPWORD(%a6)
    904 
    905 ##############################################################################
    906 
    907 	btst		&0x5,EXC_CMDREG(%a6)	# is instr an fmove out?
    908 	bne.w		funfl_out
    909 
    910 
    911 	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
    912 	bsr.l		fix_skewed_ops		# fix src op
    913 
    914 	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
    915 	bsr.l		set_tag_x		# tag the operand type
    916 	mov.b		%d0,STAG(%a6)		# maybe NORM,DENORM
    917 
    918 # bit five of the fp ext word separates the monadic and dyadic operations
    919 # that can pass through fpsp_unfl(). remember that fcmp, and ftst
    920 # will never take this exception.
    921 	btst		&0x5,1+EXC_CMDREG(%a6)	# is op monadic or dyadic?
    922 	beq.b		funfl_extract		# monadic
    923 
    924 # now, what's left that's not dyadic is fsincos. we can distinguish it
    925 # from all dyadics by the '0110xxx pattern
    926 	btst		&0x4,1+EXC_CMDREG(%a6)	# is op an fsincos?
    927 	bne.b		funfl_extract		# yes
    928 
    929 	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
    930 	bsr.l		load_fpn2		# load dst into FP_DST
    931 
    932 	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
    933 	bsr.l		set_tag_x		# tag the operand type
    934 	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
    935 	bne.b		funfl_op2_done		# no
    936 	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
    937 funfl_op2_done:
    938 	mov.b		%d0,DTAG(%a6)		# save dst optype tag
    939 
    940 funfl_extract:
    941 
    942 #$#	mov.l		FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
    943 #$#	mov.l		FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
    944 #$#	mov.l		FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
    945 #$#	mov.l		FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6)
    946 #$#	mov.l		FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6)
    947 #$#	mov.l		FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6)
    948 
    949 	clr.l		%d0
    950 	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec/mode
    951 
    952 	mov.b		1+EXC_CMDREG(%a6),%d1
    953 	andi.w		&0x007f,%d1		# extract extension
    954 
    955 	andi.l		&0x00ff01ff,USER_FPSR(%a6)
    956 
    957 	fmov.l		&0x0,%fpcr		# zero current control regs
    958 	fmov.l		&0x0,%fpsr
    959 
    960 	lea		FP_SRC(%a6),%a0
    961 	lea		FP_DST(%a6),%a1
    962 
    963 # maybe we can make these entry points ONLY the OVFL entry points of each routine.
    964 	mov.l		(tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
    965 	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
    966 
    967 	bfextu		EXC_CMDREG(%a6){&6:&3},%d0
    968 	bsr.l		store_fpreg
    969 
    970 # The `060 FPU multiplier hardware is such that if the result of a
    971 # multiply operation is the smallest possible normalized number
    972 # (0x00000000_80000000_00000000), then the machine will take an
    973 # underflow exception. Since this is incorrect, we need to check
    974 # if our emulation, after re-doing the operation, decided that
    975 # no underflow was called for. We do these checks only in
    976 # funfl_{unfl,inex}_on() because w/ both exceptions disabled, this
    977 # special case will simply exit gracefully with the correct result.
    978 
    979 # the exceptional possibilities we have left ourselves with are ONLY overflow
    980 # and inexact. and, the inexact is such that overflow occurred and was disabled
    981 # but inexact was enabled.
    982 	btst		&unfl_bit,FPCR_ENABLE(%a6)
    983 	bne.b		funfl_unfl_on
    984 
    985 funfl_chkinex:
    986 	btst		&inex2_bit,FPCR_ENABLE(%a6)
    987 	bne.b		funfl_inex_on
    988 
    989 funfl_exit:
    990 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
    991 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
    992 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
    993 
    994 	unlk		%a6
    995 #$#	add.l		&24,%sp
    996 	bra.l		_fpsp_done
    997 
    998 # overflow is enabled AND overflow, of course, occurred. so, we have the EXOP
    999 # in fp1 (don't forget to save fp0). what to do now?
   1000 # well, we simply have to get to go to _real_unfl()!
   1001 funfl_unfl_on:
   1002 
   1003 # The `060 FPU multiplier hardware is such that if the result of a
   1004 # multiply operation is the smallest possible normalized number
   1005 # (0x00000000_80000000_00000000), then the machine will take an
   1006 # underflow exception. Since this is incorrect, we check here to see
   1007 # if our emulation, after re-doing the operation, decided that
   1008 # no underflow was called for.
   1009 	btst		&unfl_bit,FPSR_EXCEPT(%a6)
   1010 	beq.w		funfl_chkinex
   1011 
   1012 funfl_unfl_on2:
   1013 	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP (fp1) to stack
   1014 
   1015 	mov.w		&0xe003,2+FP_SRC(%a6) 	# save exc status
   1016 
   1017 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
   1018 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   1019 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   1020 
   1021 	frestore	FP_SRC(%a6)		# do this after fmovm,other f<op>s!
   1022 
   1023 	unlk		%a6
   1024 
   1025 	bra.l		_real_unfl
   1026 
   1027 # undeflow occurred but is disabled. meanwhile, inexact is enabled. therefore,
   1028 # we must jump to real_inex().
   1029 funfl_inex_on:
   1030 
   1031 # The `060 FPU multiplier hardware is such that if the result of a
   1032 # multiply operation is the smallest possible normalized number
   1033 # (0x00000000_80000000_00000000), then the machine will take an
   1034 # underflow exception.
   1035 # But, whether bogus or not, if inexact is enabled AND it occurred,
   1036 # then we have to branch to real_inex.
   1037 
   1038 	btst		&inex2_bit,FPSR_EXCEPT(%a6)
   1039 	beq.w		funfl_exit
   1040 
   1041 funfl_inex_on2:
   1042 
   1043 	fmovm.x		&0x40,FP_SRC(%a6) 	# save EXOP to stack
   1044 
   1045 	mov.b		&0xc4,1+EXC_VOFF(%a6)	# vector offset = 0xc4
   1046 	mov.w		&0xe001,2+FP_SRC(%a6) 	# save exc status
   1047 
   1048 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
   1049 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   1050 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   1051 
   1052 	frestore	FP_SRC(%a6)		# do this after fmovm,other f<op>s!
   1053 
   1054 	unlk		%a6
   1055 
   1056 	bra.l		_real_inex
   1057 
   1058 #######################################################################
   1059 funfl_out:
   1060 
   1061 
   1062 #$#	mov.l		FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
   1063 #$#	mov.l		FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
   1064 #$#	mov.l		FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
   1065 
   1066 # the src operand is definitely a NORM(!), so tag it as such
   1067 	mov.b		&NORM,STAG(%a6)		# set src optype tag
   1068 
   1069 	clr.l		%d0
   1070 	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec/mode
   1071 
   1072 	and.l		&0xffff00ff,USER_FPSR(%a6) # zero all but accured field
   1073 
   1074 	fmov.l		&0x0,%fpcr		# zero current control regs
   1075 	fmov.l		&0x0,%fpsr
   1076 
   1077 	lea		FP_SRC(%a6),%a0		# pass ptr to src operand
   1078 
   1079 	bsr.l		fout
   1080 
   1081 	btst		&unfl_bit,FPCR_ENABLE(%a6)
   1082 	bne.w		funfl_unfl_on2
   1083 
   1084 	btst		&inex2_bit,FPCR_ENABLE(%a6)
   1085 	bne.w		funfl_inex_on2
   1086 
   1087 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
   1088 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   1089 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   1090 
   1091 	unlk		%a6
   1092 #$#	add.l		&24,%sp
   1093 
   1094 	btst		&0x7,(%sp)		# is trace on?
   1095 	beq.l		_fpsp_done		# no
   1096 
   1097 	fmov.l		%fpiar,0x8(%sp)		# "Current PC" is in FPIAR
   1098 	mov.w		&0x2024,0x6(%sp)	# stk fmt = 0x2; voff = 0x024
   1099 	bra.l		_real_trace
   1100 
   1101 #########################################################################
   1102 # XDEF ****************************************************************	#
   1103 #	_fpsp_unsupp(): 060FPSP entry point for FP "Unimplemented	#
   1104 #		        Data Type" exception.				#
   1105 #									#
   1106 #	This handler should be the first code executed upon taking the	#
   1107 #	FP Unimplemented Data Type exception in an operating system.	#
   1108 #									#
   1109 # XREF ****************************************************************	#
   1110 #	_imem_read_{word,long}() - read instruction word/longword	#
   1111 #	fix_skewed_ops() - adjust src operand in fsave frame		#
   1112 #	set_tag_x() - determine optype of src/dst operands		#
   1113 #	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
   1114 #	unnorm_fix() - change UNNORM operands to NORM or ZERO		#
   1115 #	load_fpn2() - load dst operand from FP regfile			#
   1116 #	load_fpn1() - load src operand from FP regfile			#
   1117 #	fout() - emulate an opclass 3 instruction			#
   1118 #	tbl_unsupp - add of table of emulation routines for opclass 0,2	#
   1119 #	_real_inex() - "callout" to operating system inexact handler	#
   1120 #	_fpsp_done() - "callout" for exit; work all done		#
   1121 #	_real_trace() - "callout" for Trace enabled exception		#
   1122 #	funimp_skew() - adjust fsave src ops to "incorrect" value	#
   1123 #	_real_snan() - "callout" for SNAN exception			#
   1124 #	_real_operr() - "callout" for OPERR exception			#
   1125 #	_real_ovfl() - "callout" for OVFL exception			#
   1126 #	_real_unfl() - "callout" for UNFL exception			#
   1127 #	get_packed() - fetch packed operand from memory			#
   1128 #									#
   1129 # INPUT ***************************************************************	#
   1130 #	- The system stack contains the "Unimp Data Type" stk frame	#
   1131 #	- The fsave frame contains the ssrc op (for UNNORM/DENORM)	#
   1132 # 									#
   1133 # OUTPUT **************************************************************	#
   1134 #	If Inexact exception (opclass 3):				#
   1135 #	- The system stack is changed to an Inexact exception stk frame	#
   1136 #	If SNAN exception (opclass 3):					#
   1137 #	- The system stack is changed to an SNAN exception stk frame	#
   1138 #	If OPERR exception (opclass 3):					#
   1139 #	- The system stack is changed to an OPERR exception stk frame	#
   1140 #	If OVFL exception (opclass 3):					#
   1141 #	- The system stack is changed to an OVFL exception stk frame	#
   1142 #	If UNFL exception (opclass 3):					#
   1143 #	- The system stack is changed to an UNFL exception stack frame	#
   1144 #	If Trace exception enabled:					#
   1145 #	- The system stack is changed to a Trace exception stack frame	#
   1146 #	Else: (normal case)						#
   1147 #	- Correct result has been stored as appropriate			#
   1148 #									#
   1149 # ALGORITHM ***********************************************************	#
   1150 #	Two main instruction types can enter here: (1) DENORM or UNNORM	#
   1151 # unimplemented data types. These can be either opclass 0,2 or 3 	#
   1152 # instructions, and (2) PACKED unimplemented data format instructions	#
   1153 # also of opclasses 0,2, or 3.						#
   1154 #	For UNNORM/DENORM opclass 0 and 2, the handler fetches the src	#
   1155 # operand from the fsave state frame and the dst operand (if dyadic)	#
   1156 # from the FP register file. The instruction is then emulated by 	#
   1157 # choosing an emulation routine from a table of routines indexed by	#
   1158 # instruction type. Once the instruction has been emulated and result	#
   1159 # saved, then we check to see if any enabled exceptions resulted from	#
   1160 # instruction emulation. If none, then we exit through the "callout"	#
   1161 # _fpsp_done(). If there is an enabled FP exception, then we insert	#
   1162 # this exception into the FPU in the fsave state frame and then exit	#
   1163 # through _fpsp_done().							#
   1164 #	PACKED opclass 0 and 2 is similar in how the instruction is	#
   1165 # emulated and exceptions handled. The differences occur in how the	#
   1166 # handler loads the packed op (by calling get_packed() routine) and	#
   1167 # by the fact that a Trace exception could be pending for PACKED ops.	#
   1168 # If a Trace exception is pending, then the current exception stack	#
   1169 # frame is changed to a Trace exception stack frame and an exit is	#
   1170 # made through _real_trace().						#
   1171 #	For UNNORM/DENORM opclass 3, the actual move out to memory is	#
   1172 # performed by calling the routine fout(). If no exception should occur	#
   1173 # as the result of emulation, then an exit either occurs through	#
   1174 # _fpsp_done() or through _real_trace() if a Trace exception is pending	#
   1175 # (a Trace stack frame must be created here, too). If an FP exception	#
   1176 # should occur, then we must create an exception stack frame of that	#
   1177 # type and jump to either _real_snan(), _real_operr(), _real_inex(),	#
   1178 # _real_unfl(), or _real_ovfl() as appropriate. PACKED opclass 3 	#
   1179 # emulation is performed in a similar manner.				#
   1180 #									#
   1181 #########################################################################
   1182 
   1183 #
   1184 # (1) DENORM and UNNORM (unimplemented) data types:
   1185 #
   1186 #				post-instruction
   1187 #				*****************
   1188 #				*      EA	*
   1189 #	 pre-instruction	*		*
   1190 # 	*****************	*****************
   1191 #	* 0x0 *  0x0dc  *	* 0x3 *  0x0dc  *
   1192 #	*****************	*****************
   1193 #	*     Next	*	*     Next	*
   1194 #	*      PC	*	*      PC	*
   1195 #	*****************	*****************
   1196 #	*      SR	*	*      SR	*
   1197 #	*****************	*****************
   1198 #
   1199 # (2) PACKED format (unsupported) opclasses two and three:
   1200 #	*****************
   1201 #	*      EA	*
   1202 #	*		*
   1203 #	*****************
   1204 #	* 0x2 *  0x0dc	*
   1205 #	*****************
   1206 #	*     Next	*
   1207 #	*      PC	*
   1208 #	*****************
   1209 #	*      SR	*
   1210 #	*****************
   1211 #
   1212 	global		_fpsp_unsupp
   1213 _fpsp_unsupp:
   1214 
   1215 	link.w		%a6,&-LOCAL_SIZE	# init stack frame
   1216 
   1217 	fsave		FP_SRC(%a6)		# save fp state
   1218 
   1219  	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
   1220 	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
   1221  	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
   1222 
   1223 	btst		&0x5,EXC_SR(%a6)	# user or supervisor mode?
   1224 	bne.b		fu_s
   1225 fu_u:
   1226 	mov.l		%usp,%a0		# fetch user stack pointer
   1227 	mov.l		%a0,EXC_A7(%a6)		# save on stack
   1228 	bra.b		fu_cont
   1229 # if the exception is an opclass zero or two unimplemented data type
   1230 # exception, then the a7' calculated here is wrong since it doesn't
   1231 # stack an ea. however, we don't need an a7' for this case anyways.
   1232 fu_s:
   1233 	lea		0x4+EXC_EA(%a6),%a0	# load old a7'
   1234 	mov.l		%a0,EXC_A7(%a6)		# save on stack
   1235 
   1236 fu_cont:
   1237 
   1238 # the FPIAR holds the "current PC" of the faulting instruction
   1239 # the FPIAR should be set correctly for ALL exceptions passing through
   1240 # this point.
   1241 	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
   1242 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   1243 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
   1244 	bsr.l		_imem_read_long		# fetch the instruction words
   1245 	mov.l		%d0,EXC_OPWORD(%a6)	# store OPWORD and EXTWORD
   1246 
   1247 ############################
   1248 
   1249 	clr.b		SPCOND_FLG(%a6)		# clear special condition flag
   1250 
   1251 # Separate opclass three (fpn-to-mem) ops since they have a different
   1252 # stack frame and protocol.
   1253 	btst		&0x5,EXC_CMDREG(%a6)	# is it an fmove out?
   1254 	bne.w		fu_out			# yes
   1255 
   1256 # Separate packed opclass two instructions.
   1257 	bfextu		EXC_CMDREG(%a6){&0:&6},%d0
   1258 	cmpi.b		%d0,&0x13
   1259 	beq.w		fu_in_pack
   1260 
   1261 
   1262 # I'm not sure at this point what FPSR bits are valid for this instruction.
   1263 # so, since the emulation routines re-create them anyways, zero exception field
   1264 	andi.l		&0x00ff00ff,USER_FPSR(%a6) # zero exception field
   1265 
   1266 	fmov.l		&0x0,%fpcr		# zero current control regs
   1267 	fmov.l		&0x0,%fpsr
   1268 
   1269 # Opclass two w/ memory-to-fpn operation will have an incorrect extended
   1270 # precision format if the src format was single or double and the
   1271 # source data type was an INF, NAN, DENORM, or UNNORM
   1272 	lea		FP_SRC(%a6),%a0		# pass ptr to input
   1273 	bsr.l		fix_skewed_ops
   1274 
   1275 # we don't know whether the src operand or the dst operand (or both) is the
   1276 # UNNORM or DENORM. call the function that tags the operand type. if the
   1277 # input is an UNNORM, then convert it to a NORM, DENORM, or ZERO.
   1278 	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
   1279 	bsr.l		set_tag_x		# tag the operand type
   1280 	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
   1281 	bne.b		fu_op2			# no
   1282 	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
   1283 
   1284 fu_op2:
   1285 	mov.b		%d0,STAG(%a6)		# save src optype tag
   1286 
   1287 	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
   1288 
   1289 # bit five of the fp extension word separates the monadic and dyadic operations
   1290 # at this point
   1291 	btst		&0x5,1+EXC_CMDREG(%a6)	# is operation monadic or dyadic?
   1292 	beq.b		fu_extract		# monadic
   1293 	cmpi.b		1+EXC_CMDREG(%a6),&0x3a	# is operation an ftst?
   1294 	beq.b		fu_extract		# yes, so it's monadic, too
   1295 
   1296 	bsr.l		load_fpn2		# load dst into FP_DST
   1297 
   1298 	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
   1299 	bsr.l		set_tag_x		# tag the operand type
   1300 	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
   1301 	bne.b		fu_op2_done		# no
   1302 	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
   1303 fu_op2_done:
   1304 	mov.b		%d0,DTAG(%a6)		# save dst optype tag
   1305 
   1306 fu_extract:
   1307 	clr.l		%d0
   1308 	mov.b		FPCR_MODE(%a6),%d0	# fetch rnd mode/prec
   1309 
   1310 	bfextu		1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension
   1311 
   1312 	lea		FP_SRC(%a6),%a0
   1313 	lea		FP_DST(%a6),%a1
   1314 
   1315 	mov.l		(tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr
   1316 	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
   1317 
   1318 #
   1319 # Exceptions in order of precedence:
   1320 # 	BSUN	: none
   1321 #	SNAN	: all dyadic ops
   1322 #	OPERR	: fsqrt(-NORM)
   1323 #	OVFL	: all except ftst,fcmp
   1324 #	UNFL	: all except ftst,fcmp
   1325 #	DZ	: fdiv
   1326 # 	INEX2	: all except ftst,fcmp
   1327 #	INEX1	: none (packed doesn't go through here)
   1328 #
   1329 
   1330 # we determine the highest priority exception(if any) set by the
   1331 # emulation routine that has also been enabled by the user.
   1332 	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions set
   1333 	bne.b		fu_in_ena		# some are enabled
   1334 
   1335 fu_in_cont:
   1336 # fcmp and ftst do not store any result.
   1337 	mov.b		1+EXC_CMDREG(%a6),%d0	# fetch extension
   1338 	andi.b		&0x38,%d0		# extract bits 3-5
   1339 	cmpi.b		%d0,&0x38		# is instr fcmp or ftst?
   1340 	beq.b		fu_in_exit		# yes
   1341 
   1342 	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
   1343 	bsr.l		store_fpreg		# store the result
   1344 
   1345 fu_in_exit:
   1346 
   1347 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
   1348 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   1349 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   1350 
   1351 	unlk		%a6
   1352 
   1353 	bra.l		_fpsp_done
   1354 
   1355 fu_in_ena:
   1356 	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enabled
   1357 	bfffo		%d0{&24:&8},%d0		# find highest priority exception
   1358 	bne.b		fu_in_exc		# there is at least one set
   1359 
   1360 #
   1361 # No exceptions occurred that were also enabled. Now:
   1362 #
   1363 #   	if (OVFL && ovfl_disabled && inexact_enabled) {
   1364 #	    branch to _real_inex() (even if the result was exact!);
   1365 #     	} else {
   1366 #	    save the result in the proper fp reg (unless the op is fcmp or ftst);
   1367 #	    return;
   1368 #     	}
   1369 #
   1370 	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
   1371 	beq.b		fu_in_cont		# no
   1372 
   1373 fu_in_ovflchk:
   1374 	btst		&inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
   1375 	beq.b		fu_in_cont		# no
   1376 	bra.w		fu_in_exc_ovfl		# go insert overflow frame
   1377 
   1378 #
   1379 # An exception occurred and that exception was enabled:
   1380 #
   1381 #	shift enabled exception field into lo byte of d0;
   1382 #	if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) ||
   1383 #	    ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) {
   1384 #		/*
   1385 #		 * this is the case where we must call _real_inex() now or else
   1386 #		 * there will be no other way to pass it the exceptional operand
   1387 #		 */
   1388 #		call _real_inex();
   1389 #	} else {
   1390 #		restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU;
   1391 #	}
   1392 #
   1393 fu_in_exc:
   1394 	subi.l		&24,%d0			# fix offset to be 0-8
   1395 	cmpi.b		%d0,&0x6		# is exception INEX? (6)
   1396 	bne.b		fu_in_exc_exit		# no
   1397 
   1398 # the enabled exception was inexact
   1399 	btst		&unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur?
   1400 	bne.w		fu_in_exc_unfl		# yes
   1401 	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur?
   1402 	bne.w		fu_in_exc_ovfl		# yes
   1403 
   1404 # here, we insert the correct fsave status value into the fsave frame for the
   1405 # corresponding exception. the operand in the fsave frame should be the original
   1406 # src operand.
   1407 fu_in_exc_exit:
   1408 	mov.l		%d0,-(%sp)		# save d0
   1409 	bsr.l		funimp_skew		# skew sgl or dbl inputs
   1410 	mov.l		(%sp)+,%d0		# restore d0
   1411 
   1412 	mov.w		(tbl_except.b,%pc,%d0.w*2),2+FP_SRC(%a6) # create exc status
   1413 
   1414 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
   1415 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   1416 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   1417 
   1418 	frestore	FP_SRC(%a6)		# restore src op
   1419 
   1420 	unlk		%a6
   1421 
   1422 	bra.l		_fpsp_done
   1423 
   1424 tbl_except:
   1425 	short		0xe000,0xe006,0xe004,0xe005
   1426 	short		0xe003,0xe002,0xe001,0xe001
   1427 
   1428 fu_in_exc_unfl:
   1429 	mov.w		&0x4,%d0
   1430 	bra.b		fu_in_exc_exit
   1431 fu_in_exc_ovfl:
   1432 	mov.w		&0x03,%d0
   1433 	bra.b		fu_in_exc_exit
   1434 
   1435 # If the input operand to this operation was opclass two and a single
   1436 # or double precision denorm, inf, or nan, the operand needs to be
   1437 # "corrected" in order to have the proper equivalent extended precision
   1438 # number.
   1439 	global		fix_skewed_ops
   1440 fix_skewed_ops:
   1441 	bfextu		EXC_CMDREG(%a6){&0:&6},%d0 # extract opclass,src fmt
   1442 	cmpi.b		%d0,&0x11		# is class = 2 & fmt = sgl?
   1443 	beq.b		fso_sgl			# yes
   1444 	cmpi.b		%d0,&0x15		# is class = 2 & fmt = dbl?
   1445 	beq.b		fso_dbl			# yes
   1446 	rts					# no
   1447 
   1448 fso_sgl:
   1449 	mov.w		LOCAL_EX(%a0),%d0	# fetch src exponent
   1450 	andi.w		&0x7fff,%d0		# strip sign
   1451 	cmpi.w		%d0,&0x3f80		# is |exp| == $3f80?
   1452 	beq.b		fso_sgl_dnrm_zero	# yes
   1453 	cmpi.w		%d0,&0x407f		# no; is |exp| == $407f?
   1454 	beq.b		fso_infnan		# yes
   1455 	rts					# no
   1456 
   1457 fso_sgl_dnrm_zero:
   1458 	andi.l		&0x7fffffff,LOCAL_HI(%a0) # clear j-bit
   1459 	beq.b		fso_zero		# it's a skewed zero
   1460 fso_sgl_dnrm:
   1461 # here, we count on norm not to alter a0...
   1462 	bsr.l		norm			# normalize mantissa
   1463 	neg.w		%d0			# -shft amt
   1464 	addi.w		&0x3f81,%d0		# adjust new exponent
   1465 	andi.w		&0x8000,LOCAL_EX(%a0) 	# clear old exponent
   1466 	or.w		%d0,LOCAL_EX(%a0)	# insert new exponent
   1467 	rts
   1468 
   1469 fso_zero:
   1470 	andi.w		&0x8000,LOCAL_EX(%a0)	# clear bogus exponent
   1471 	rts
   1472 
   1473 fso_infnan:
   1474 	andi.b		&0x7f,LOCAL_HI(%a0) 	# clear j-bit
   1475 	ori.w		&0x7fff,LOCAL_EX(%a0)	# make exponent = $7fff
   1476 	rts
   1477 
   1478 fso_dbl:
   1479 	mov.w		LOCAL_EX(%a0),%d0	# fetch src exponent
   1480 	andi.w		&0x7fff,%d0		# strip sign
   1481 	cmpi.w		%d0,&0x3c00		# is |exp| == $3c00?
   1482 	beq.b		fso_dbl_dnrm_zero	# yes
   1483 	cmpi.w		%d0,&0x43ff		# no; is |exp| == $43ff?
   1484 	beq.b		fso_infnan		# yes
   1485 	rts					# no
   1486 
   1487 fso_dbl_dnrm_zero:
   1488 	andi.l		&0x7fffffff,LOCAL_HI(%a0) # clear j-bit
   1489 	bne.b		fso_dbl_dnrm		# it's a skewed denorm
   1490 	tst.l		LOCAL_LO(%a0)		# is it a zero?
   1491 	beq.b		fso_zero		# yes
   1492 fso_dbl_dnrm:
   1493 # here, we count on norm not to alter a0...
   1494 	bsr.l		norm			# normalize mantissa
   1495 	neg.w		%d0			# -shft amt
   1496 	addi.w		&0x3c01,%d0		# adjust new exponent
   1497 	andi.w		&0x8000,LOCAL_EX(%a0) 	# clear old exponent
   1498 	or.w		%d0,LOCAL_EX(%a0)	# insert new exponent
   1499 	rts
   1500 
   1501 #################################################################
   1502 
   1503 # fmove out took an unimplemented data type exception.
   1504 # the src operand is in FP_SRC. Call _fout() to write out the result and
   1505 # to determine which exceptions, if any, to take.
   1506 fu_out:
   1507 
   1508 # Separate packed move outs from the UNNORM and DENORM move outs.
   1509 	bfextu		EXC_CMDREG(%a6){&3:&3},%d0
   1510 	cmpi.b		%d0,&0x3
   1511 	beq.w		fu_out_pack
   1512 	cmpi.b		%d0,&0x7
   1513 	beq.w		fu_out_pack
   1514 
   1515 
   1516 # I'm not sure at this point what FPSR bits are valid for this instruction.
   1517 # so, since the emulation routines re-create them anyways, zero exception field.
   1518 # fmove out doesn't affect ccodes.
   1519 	and.l		&0xffff00ff,USER_FPSR(%a6) # zero exception field
   1520 
   1521 	fmov.l		&0x0,%fpcr		# zero current control regs
   1522 	fmov.l		&0x0,%fpsr
   1523 
   1524 # the src can ONLY be a DENORM or an UNNORM! so, don't make any big subroutine
   1525 # call here. just figure out what it is...
   1526 	mov.w		FP_SRC_EX(%a6),%d0	# get exponent
   1527 	andi.w		&0x7fff,%d0		# strip sign
   1528 	beq.b		fu_out_denorm		# it's a DENORM
   1529 
   1530 	lea		FP_SRC(%a6),%a0
   1531 	bsr.l		unnorm_fix		# yes; fix it
   1532 
   1533 	mov.b		%d0,STAG(%a6)
   1534 
   1535 	bra.b		fu_out_cont
   1536 fu_out_denorm:
   1537 	mov.b		&DENORM,STAG(%a6)
   1538 fu_out_cont:
   1539 
   1540 	clr.l		%d0
   1541 	mov.b		FPCR_MODE(%a6),%d0	# fetch rnd mode/prec
   1542 
   1543 	lea		FP_SRC(%a6),%a0		# pass ptr to src operand
   1544 
   1545 	mov.l		(%a6),EXC_A6(%a6)	# in case a6 changes
   1546 	bsr.l		fout			# call fmove out routine
   1547 
   1548 # Exceptions in order of precedence:
   1549 # 	BSUN	: none
   1550 #	SNAN	: none
   1551 #	OPERR	: fmove.{b,w,l} out of large UNNORM
   1552 #	OVFL	: fmove.{s,d}
   1553 #	UNFL	: fmove.{s,d,x}
   1554 #	DZ	: none
   1555 # 	INEX2	: all
   1556 #	INEX1	: none (packed doesn't travel through here)
   1557 
   1558 # determine the highest priority exception(if any) set by the
   1559 # emulation routine that has also been enabled by the user.
   1560 	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions enabled
   1561 	bne.w		fu_out_ena		# some are enabled
   1562 
   1563 fu_out_done:
   1564 
   1565 	mov.l		EXC_A6(%a6),(%a6)	# in case a6 changed
   1566 
   1567 # on extended precision opclass three instructions using pre-decrement or
   1568 # post-increment addressing mode, the address register is not updated. is the
   1569 # address register was the stack pointer used from user mode, then let's update
   1570 # it here. if it was used from supervisor mode, then we have to handle this
   1571 # as a special case.
   1572 	btst		&0x5,EXC_SR(%a6)
   1573 	bne.b		fu_out_done_s
   1574 
   1575 	mov.l		EXC_A7(%a6),%a0		# restore a7
   1576 	mov.l		%a0,%usp
   1577 
   1578 fu_out_done_cont:
   1579 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
   1580 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   1581 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   1582 
   1583 	unlk		%a6
   1584 
   1585 	btst		&0x7,(%sp)		# is trace on?
   1586 	bne.b		fu_out_trace		# yes
   1587 
   1588 	bra.l		_fpsp_done
   1589 
   1590 # is the ea mode pre-decrement of the stack pointer from supervisor mode?
   1591 # ("fmov.x fpm,-(a7)") if so,
   1592 fu_out_done_s:
   1593 	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
   1594 	bne.b		fu_out_done_cont
   1595 
   1596 # the extended precision result is still in fp0. but, we need to save it
   1597 # somewhere on the stack until we can copy it to its final resting place.
   1598 # here, we're counting on the top of the stack to be the old place-holders
   1599 # for fp0/fp1 which have already been restored. that way, we can write
   1600 # over those destinations with the shifted stack frame.
   1601 	fmovm.x		&0x80,FP_SRC(%a6)	# put answer on stack
   1602 
   1603 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
   1604 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   1605 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   1606 
   1607 	mov.l		(%a6),%a6		# restore frame pointer
   1608 
   1609 	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
   1610 	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
   1611 
   1612 # now, copy the result to the proper place on the stack
   1613 	mov.l		LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
   1614 	mov.l		LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
   1615 	mov.l		LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
   1616 
   1617 	add.l		&LOCAL_SIZE-0x8,%sp
   1618 
   1619 	btst		&0x7,(%sp)
   1620 	bne.b		fu_out_trace
   1621 
   1622 	bra.l		_fpsp_done
   1623 
   1624 fu_out_ena:
   1625 	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enabled
   1626 	bfffo		%d0{&24:&8},%d0		# find highest priority exception
   1627 	bne.b		fu_out_exc		# there is at least one set
   1628 
   1629 # no exceptions were set.
   1630 # if a disabled overflow occurred and inexact was enabled but the result
   1631 # was exact, then a branch to _real_inex() is made.
   1632 	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
   1633 	beq.w		fu_out_done		# no
   1634 
   1635 fu_out_ovflchk:
   1636 	btst		&inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
   1637 	beq.w		fu_out_done		# no
   1638 	bra.w		fu_inex			# yes
   1639 
   1640 #
   1641 # The fp move out that took the "Unimplemented Data Type" exception was
   1642 # being traced. Since the stack frames are similar, get the "current" PC
   1643 # from FPIAR and put it in the trace stack frame then jump to _real_trace().
   1644 #
   1645 #		  UNSUPP FRAME		   TRACE FRAME
   1646 # 		*****************	*****************
   1647 #		*      EA	*	*    Current	*
   1648 #		*		*	*      PC	*
   1649 #		*****************	*****************
   1650 #		* 0x3 *  0x0dc	*	* 0x2 *  0x024	*
   1651 #		*****************	*****************
   1652 #		*     Next	*	*     Next	*
   1653 #		*      PC	*	*      PC	*
   1654 #		*****************	*****************
   1655 #		*      SR	*	*      SR	*
   1656 #		*****************	*****************
   1657 #
   1658 fu_out_trace:
   1659 	mov.w		&0x2024,0x6(%sp)
   1660 	fmov.l		%fpiar,0x8(%sp)
   1661 	bra.l		_real_trace
   1662 
   1663 # an exception occurred and that exception was enabled.
   1664 fu_out_exc:
   1665 	subi.l		&24,%d0			# fix offset to be 0-8
   1666 
   1667 # we don't mess with the existing fsave frame. just re-insert it and
   1668 # jump to the "_real_{}()" handler...
   1669 	mov.w		(tbl_fu_out.b,%pc,%d0.w*2),%d0
   1670 	jmp		(tbl_fu_out.b,%pc,%d0.w*1)
   1671 
   1672 	swbeg		&0x8
   1673 tbl_fu_out:
   1674 	short		tbl_fu_out	- tbl_fu_out	# BSUN can't happen
   1675 	short		tbl_fu_out 	- tbl_fu_out	# SNAN can't happen
   1676 	short		fu_operr	- tbl_fu_out	# OPERR
   1677 	short		fu_ovfl 	- tbl_fu_out	# OVFL
   1678 	short		fu_unfl 	- tbl_fu_out	# UNFL
   1679 	short		tbl_fu_out	- tbl_fu_out	# DZ can't happen
   1680 	short		fu_inex 	- tbl_fu_out	# INEX2
   1681 	short		tbl_fu_out	- tbl_fu_out	# INEX1 won't make it here
   1682 
   1683 # for snan,operr,ovfl,unfl, src op is still in FP_SRC so just
   1684 # frestore it.
   1685 fu_snan:
   1686 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
   1687 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   1688 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   1689 
   1690 	mov.w		&0x30d8,EXC_VOFF(%a6)	# vector offset = 0xd8
   1691 	mov.w		&0xe006,2+FP_SRC(%a6)
   1692 
   1693 	frestore	FP_SRC(%a6)
   1694 
   1695 	unlk		%a6
   1696 
   1697 
   1698 	bra.l		_real_snan
   1699 
   1700 fu_operr:
   1701 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
   1702 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   1703 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   1704 
   1705 	mov.w		&0x30d0,EXC_VOFF(%a6)	# vector offset = 0xd0
   1706 	mov.w		&0xe004,2+FP_SRC(%a6)
   1707 
   1708 	frestore	FP_SRC(%a6)
   1709 
   1710 	unlk		%a6
   1711 
   1712 
   1713 	bra.l		_real_operr
   1714 
   1715 fu_ovfl:
   1716 	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP to the stack
   1717 
   1718 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
   1719 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   1720 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   1721 
   1722 	mov.w		&0x30d4,EXC_VOFF(%a6)	# vector offset = 0xd4
   1723 	mov.w		&0xe005,2+FP_SRC(%a6)
   1724 
   1725 	frestore	FP_SRC(%a6)		# restore EXOP
   1726 
   1727 	unlk		%a6
   1728 
   1729 	bra.l		_real_ovfl
   1730 
   1731 # underflow can happen for extended precision. extended precision opclass
   1732 # three instruction exceptions don't update the stack pointer. so, if the
   1733 # exception occurred from user mode, then simply update a7 and exit normally.
   1734 # if the exception occurred from supervisor mode, check if
   1735 fu_unfl:
   1736 	mov.l		EXC_A6(%a6),(%a6)	# restore a6
   1737 
   1738 	btst		&0x5,EXC_SR(%a6)
   1739 	bne.w		fu_unfl_s
   1740 
   1741 	mov.l		EXC_A7(%a6),%a0		# restore a7 whether we need
   1742 	mov.l		%a0,%usp		# to or not...
   1743 
   1744 fu_unfl_cont:
   1745 	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP to the stack
   1746 
   1747 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
   1748 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   1749 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   1750 
   1751 	mov.w		&0x30cc,EXC_VOFF(%a6)	# vector offset = 0xcc
   1752 	mov.w		&0xe003,2+FP_SRC(%a6)
   1753 
   1754 	frestore	FP_SRC(%a6)		# restore EXOP
   1755 
   1756 	unlk		%a6
   1757 
   1758 	bra.l		_real_unfl
   1759 
   1760 fu_unfl_s:
   1761 	cmpi.b		SPCOND_FLG(%a6),&mda7_flg # was the <ea> mode -(sp)?
   1762 	bne.b		fu_unfl_cont
   1763 
   1764 # the extended precision result is still in fp0. but, we need to save it
   1765 # somewhere on the stack until we can copy it to its final resting place
   1766 # (where the exc frame is currently). make sure it's not at the top of the
   1767 # frame or it will get overwritten when the exc stack frame is shifted "down".
   1768 	fmovm.x		&0x80,FP_SRC(%a6)	# put answer on stack
   1769 	fmovm.x		&0x40,FP_DST(%a6)	# put EXOP on stack
   1770 
   1771 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
   1772 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   1773 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   1774 
   1775 	mov.w		&0x30cc,EXC_VOFF(%a6)	# vector offset = 0xcc
   1776 	mov.w		&0xe003,2+FP_DST(%a6)
   1777 
   1778 	frestore	FP_DST(%a6)		# restore EXOP
   1779 
   1780 	mov.l		(%a6),%a6		# restore frame pointer
   1781 
   1782 	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
   1783 	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
   1784 	mov.l		LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
   1785 
   1786 # now, copy the result to the proper place on the stack
   1787 	mov.l		LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
   1788 	mov.l		LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
   1789 	mov.l		LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
   1790 
   1791 	add.l		&LOCAL_SIZE-0x8,%sp
   1792 
   1793 	bra.l		_real_unfl
   1794 
   1795 # fmove in and out enter here.
   1796 fu_inex:
   1797 	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP to the stack
   1798 
   1799 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
   1800 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   1801 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   1802 
   1803 	mov.w		&0x30c4,EXC_VOFF(%a6)	# vector offset = 0xc4
   1804 	mov.w		&0xe001,2+FP_SRC(%a6)
   1805 
   1806 	frestore	FP_SRC(%a6)		# restore EXOP
   1807 
   1808 	unlk		%a6
   1809 
   1810 
   1811 	bra.l		_real_inex
   1812 
   1813 #########################################################################
   1814 #########################################################################
   1815 fu_in_pack:
   1816 
   1817 
   1818 # I'm not sure at this point what FPSR bits are valid for this instruction.
   1819 # so, since the emulation routines re-create them anyways, zero exception field
   1820 	andi.l		&0x0ff00ff,USER_FPSR(%a6) # zero exception field
   1821 
   1822 	fmov.l		&0x0,%fpcr		# zero current control regs
   1823 	fmov.l		&0x0,%fpsr
   1824 
   1825 	bsr.l		get_packed		# fetch packed src operand
   1826 
   1827 	lea		FP_SRC(%a6),%a0		# pass ptr to src
   1828 	bsr.l		set_tag_x		# set src optype tag
   1829 
   1830 	mov.b		%d0,STAG(%a6)		# save src optype tag
   1831 
   1832 	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
   1833 
   1834 # bit five of the fp extension word separates the monadic and dyadic operations
   1835 # at this point
   1836 	btst		&0x5,1+EXC_CMDREG(%a6)	# is operation monadic or dyadic?
   1837 	beq.b		fu_extract_p		# monadic
   1838 	cmpi.b		1+EXC_CMDREG(%a6),&0x3a	# is operation an ftst?
   1839 	beq.b		fu_extract_p		# yes, so it's monadic, too
   1840 
   1841 	bsr.l		load_fpn2		# load dst into FP_DST
   1842 
   1843 	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
   1844 	bsr.l		set_tag_x		# tag the operand type
   1845 	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
   1846 	bne.b		fu_op2_done_p		# no
   1847 	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
   1848 fu_op2_done_p:
   1849 	mov.b		%d0,DTAG(%a6)		# save dst optype tag
   1850 
   1851 fu_extract_p:
   1852 	clr.l		%d0
   1853 	mov.b		FPCR_MODE(%a6),%d0	# fetch rnd mode/prec
   1854 
   1855 	bfextu		1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension
   1856 
   1857 	lea		FP_SRC(%a6),%a0
   1858 	lea		FP_DST(%a6),%a1
   1859 
   1860 	mov.l		(tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr
   1861 	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
   1862 
   1863 #
   1864 # Exceptions in order of precedence:
   1865 # 	BSUN	: none
   1866 #	SNAN	: all dyadic ops
   1867 #	OPERR	: fsqrt(-NORM)
   1868 #	OVFL	: all except ftst,fcmp
   1869 #	UNFL	: all except ftst,fcmp
   1870 #	DZ	: fdiv
   1871 # 	INEX2	: all except ftst,fcmp
   1872 #	INEX1	: all
   1873 #
   1874 
   1875 # we determine the highest priority exception(if any) set by the
   1876 # emulation routine that has also been enabled by the user.
   1877 	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions enabled
   1878 	bne.w		fu_in_ena_p		# some are enabled
   1879 
   1880 fu_in_cont_p:
   1881 # fcmp and ftst do not store any result.
   1882 	mov.b		1+EXC_CMDREG(%a6),%d0	# fetch extension
   1883 	andi.b		&0x38,%d0		# extract bits 3-5
   1884 	cmpi.b		%d0,&0x38		# is instr fcmp or ftst?
   1885 	beq.b		fu_in_exit_p		# yes
   1886 
   1887 	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
   1888 	bsr.l		store_fpreg		# store the result
   1889 
   1890 fu_in_exit_p:
   1891 
   1892 	btst		&0x5,EXC_SR(%a6)	# user or supervisor?
   1893 	bne.w		fu_in_exit_s_p		# supervisor
   1894 
   1895 	mov.l		EXC_A7(%a6),%a0		# update user a7
   1896 	mov.l		%a0,%usp
   1897 
   1898 fu_in_exit_cont_p:
   1899 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
   1900 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   1901 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   1902 
   1903 	unlk		%a6			# unravel stack frame
   1904 
   1905 	btst		&0x7,(%sp)		# is trace on?
   1906 	bne.w		fu_trace_p		# yes
   1907 
   1908 	bra.l		_fpsp_done		# exit to os
   1909 
   1910 # the exception occurred in supervisor mode. check to see if the
   1911 # addressing mode was (a7)+. if so, we'll need to shift the
   1912 # stack frame "up".
   1913 fu_in_exit_s_p:
   1914 	btst		&mia7_bit,SPCOND_FLG(%a6) # was ea mode (a7)+
   1915 	beq.b		fu_in_exit_cont_p	# no
   1916 
   1917 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
   1918 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   1919 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   1920 
   1921 	unlk		%a6			# unravel stack frame
   1922 
   1923 # shift the stack frame "up". we don't really care about the <ea> field.
   1924 	mov.l		0x4(%sp),0x10(%sp)
   1925 	mov.l		0x0(%sp),0xc(%sp)
   1926 	add.l		&0xc,%sp
   1927 
   1928 	btst		&0x7,(%sp)		# is trace on?
   1929 	bne.w		fu_trace_p		# yes
   1930 
   1931 	bra.l		_fpsp_done		# exit to os
   1932 
   1933 fu_in_ena_p:
   1934 	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enabled & set
   1935 	bfffo		%d0{&24:&8},%d0		# find highest priority exception
   1936 	bne.b		fu_in_exc_p		# at least one was set
   1937 
   1938 #
   1939 # No exceptions occurred that were also enabled. Now:
   1940 #
   1941 #   	if (OVFL && ovfl_disabled && inexact_enabled) {
   1942 #	    branch to _real_inex() (even if the result was exact!);
   1943 #     	} else {
   1944 #	    save the result in the proper fp reg (unless the op is fcmp or ftst);
   1945 #	    return;
   1946 #     	}
   1947 #
   1948 	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
   1949 	beq.w		fu_in_cont_p		# no
   1950 
   1951 fu_in_ovflchk_p:
   1952 	btst		&inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
   1953 	beq.w		fu_in_cont_p		# no
   1954 	bra.w		fu_in_exc_ovfl_p	# do _real_inex() now
   1955 
   1956 #
   1957 # An exception occurred and that exception was enabled:
   1958 #
   1959 #	shift enabled exception field into lo byte of d0;
   1960 #	if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) ||
   1961 #	    ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) {
   1962 #		/*
   1963 #		 * this is the case where we must call _real_inex() now or else
   1964 #		 * there will be no other way to pass it the exceptional operand
   1965 #		 */
   1966 #		call _real_inex();
   1967 #	} else {
   1968 #		restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU;
   1969 #	}
   1970 #
   1971 fu_in_exc_p:
   1972 	subi.l		&24,%d0			# fix offset to be 0-8
   1973 	cmpi.b		%d0,&0x6		# is exception INEX? (6 or 7)
   1974 	blt.b		fu_in_exc_exit_p	# no
   1975 
   1976 # the enabled exception was inexact
   1977 	btst		&unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur?
   1978 	bne.w		fu_in_exc_unfl_p	# yes
   1979 	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur?
   1980 	bne.w		fu_in_exc_ovfl_p	# yes
   1981 
   1982 # here, we insert the correct fsave status value into the fsave frame for the
   1983 # corresponding exception. the operand in the fsave frame should be the original
   1984 # src operand.
   1985 # as a reminder for future predicted pain and agony, we are passing in fsave the
   1986 # "non-skewed" operand for cases of sgl and dbl src INFs,NANs, and DENORMs.
   1987 # this is INCORRECT for enabled SNAN which would give to the user the skewed SNAN!!!
   1988 fu_in_exc_exit_p:
   1989 	btst		&0x5,EXC_SR(%a6)	# user or supervisor?
   1990 	bne.w		fu_in_exc_exit_s_p	# supervisor
   1991 
   1992 	mov.l		EXC_A7(%a6),%a0		# update user a7
   1993 	mov.l		%a0,%usp
   1994 
   1995 fu_in_exc_exit_cont_p:
   1996 	mov.w		(tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6)
   1997 
   1998 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
   1999 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   2000 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   2001 
   2002 	frestore	FP_SRC(%a6)		# restore src op
   2003 
   2004 	unlk		%a6
   2005 
   2006 	btst		&0x7,(%sp)		# is trace enabled?
   2007 	bne.w		fu_trace_p		# yes
   2008 
   2009 	bra.l		_fpsp_done
   2010 
   2011 tbl_except_p:
   2012 	short		0xe000,0xe006,0xe004,0xe005
   2013 	short		0xe003,0xe002,0xe001,0xe001
   2014 
   2015 fu_in_exc_ovfl_p:
   2016 	mov.w		&0x3,%d0
   2017 	bra.w		fu_in_exc_exit_p
   2018 
   2019 fu_in_exc_unfl_p:
   2020 	mov.w		&0x4,%d0
   2021 	bra.w		fu_in_exc_exit_p
   2022 
   2023 fu_in_exc_exit_s_p:
   2024 	btst		&mia7_bit,SPCOND_FLG(%a6)
   2025 	beq.b		fu_in_exc_exit_cont_p
   2026 
   2027 	mov.w		(tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6)
   2028 
   2029 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
   2030 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   2031 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   2032 
   2033 	frestore	FP_SRC(%a6)		# restore src op
   2034 
   2035 	unlk		%a6			# unravel stack frame
   2036 
   2037 # shift stack frame "up". who cares about <ea> field.
   2038 	mov.l		0x4(%sp),0x10(%sp)
   2039 	mov.l		0x0(%sp),0xc(%sp)
   2040 	add.l		&0xc,%sp
   2041 
   2042 	btst		&0x7,(%sp)		# is trace on?
   2043 	bne.b		fu_trace_p		# yes
   2044 
   2045 	bra.l		_fpsp_done		# exit to os
   2046 
   2047 #
   2048 # The opclass two PACKED instruction that took an "Unimplemented Data Type"
   2049 # exception was being traced. Make the "current" PC the FPIAR and put it in the
   2050 # trace stack frame then jump to _real_trace().
   2051 #
   2052 #		  UNSUPP FRAME		   TRACE FRAME
   2053 #		*****************	*****************
   2054 #		*      EA	*	*    Current	*
   2055 #		*		*	*      PC	*
   2056 #		*****************	*****************
   2057 #		* 0x2 *	0x0dc	* 	* 0x2 *  0x024	*
   2058 #		*****************	*****************
   2059 #		*     Next	*	*     Next	*
   2060 #		*      PC	*      	*      PC	*
   2061 #		*****************	*****************
   2062 #		*      SR	*	*      SR	*
   2063 #		*****************	*****************
   2064 fu_trace_p:
   2065 	mov.w		&0x2024,0x6(%sp)
   2066 	fmov.l		%fpiar,0x8(%sp)
   2067 
   2068 	bra.l		_real_trace
   2069 
   2070 #########################################################
   2071 #########################################################
   2072 fu_out_pack:
   2073 
   2074 
   2075 # I'm not sure at this point what FPSR bits are valid for this instruction.
   2076 # so, since the emulation routines re-create them anyways, zero exception field.
   2077 # fmove out doesn't affect ccodes.
   2078 	and.l		&0xffff00ff,USER_FPSR(%a6) # zero exception field
   2079 
   2080 	fmov.l		&0x0,%fpcr		# zero current control regs
   2081 	fmov.l		&0x0,%fpsr
   2082 
   2083 	bfextu		EXC_CMDREG(%a6){&6:&3},%d0
   2084 	bsr.l		load_fpn1
   2085 
   2086 # unlike other opclass 3, unimplemented data type exceptions, packed must be
   2087 # able to detect all operand types.
   2088 	lea		FP_SRC(%a6),%a0
   2089 	bsr.l		set_tag_x		# tag the operand type
   2090 	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
   2091 	bne.b		fu_op2_p		# no
   2092 	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
   2093 
   2094 fu_op2_p:
   2095 	mov.b		%d0,STAG(%a6)		# save src optype tag
   2096 
   2097 	clr.l		%d0
   2098 	mov.b		FPCR_MODE(%a6),%d0	# fetch rnd mode/prec
   2099 
   2100 	lea		FP_SRC(%a6),%a0		# pass ptr to src operand
   2101 
   2102 	mov.l		(%a6),EXC_A6(%a6)	# in case a6 changes
   2103 	bsr.l		fout			# call fmove out routine
   2104 
   2105 # Exceptions in order of precedence:
   2106 # 	BSUN	: no
   2107 #	SNAN	: yes
   2108 #	OPERR	: if ((k_factor > +17) || (dec. exp exceeds 3 digits))
   2109 #	OVFL	: no
   2110 #	UNFL	: no
   2111 #	DZ	: no
   2112 # 	INEX2	: yes
   2113 #	INEX1	: no
   2114 
   2115 # determine the highest priority exception(if any) set by the
   2116 # emulation routine that has also been enabled by the user.
   2117 	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions enabled
   2118 	bne.w		fu_out_ena_p		# some are enabled
   2119 
   2120 fu_out_exit_p:
   2121 	mov.l		EXC_A6(%a6),(%a6)	# restore a6
   2122 
   2123 	btst		&0x5,EXC_SR(%a6)	# user or supervisor?
   2124 	bne.b		fu_out_exit_s_p		# supervisor
   2125 
   2126 	mov.l		EXC_A7(%a6),%a0		# update user a7
   2127 	mov.l		%a0,%usp
   2128 
   2129 fu_out_exit_cont_p:
   2130 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
   2131 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   2132 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   2133 
   2134 	unlk		%a6			# unravel stack frame
   2135 
   2136 	btst		&0x7,(%sp)		# is trace on?
   2137 	bne.w		fu_trace_p		# yes
   2138 
   2139 	bra.l		_fpsp_done		# exit to os
   2140 
   2141 # the exception occurred in supervisor mode. check to see if the
   2142 # addressing mode was -(a7). if so, we'll need to shift the
   2143 # stack frame "down".
   2144 fu_out_exit_s_p:
   2145 	btst		&mda7_bit,SPCOND_FLG(%a6) # was ea mode -(a7)
   2146 	beq.b		fu_out_exit_cont_p	# no
   2147 
   2148 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
   2149 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   2150 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   2151 
   2152 	mov.l		(%a6),%a6		# restore frame pointer
   2153 
   2154 	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
   2155 	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
   2156 
   2157 # now, copy the result to the proper place on the stack
   2158 	mov.l		LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
   2159 	mov.l		LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
   2160 	mov.l		LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
   2161 
   2162 	add.l		&LOCAL_SIZE-0x8,%sp
   2163 
   2164 	btst		&0x7,(%sp)
   2165 	bne.w		fu_trace_p
   2166 
   2167 	bra.l		_fpsp_done
   2168 
   2169 fu_out_ena_p:
   2170 	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enabled
   2171 	bfffo		%d0{&24:&8},%d0		# find highest priority exception
   2172 	beq.w		fu_out_exit_p
   2173 
   2174 	mov.l		EXC_A6(%a6),(%a6)	# restore a6
   2175 
   2176 # an exception occurred and that exception was enabled.
   2177 # the only exception possible on packed move out are INEX, OPERR, and SNAN.
   2178 fu_out_exc_p:
   2179 	cmpi.b		%d0,&0x1a
   2180 	bgt.w		fu_inex_p2
   2181 	beq.w		fu_operr_p
   2182 
   2183 fu_snan_p:
   2184 	btst		&0x5,EXC_SR(%a6)
   2185 	bne.b		fu_snan_s_p
   2186 
   2187 	mov.l		EXC_A7(%a6),%a0
   2188 	mov.l		%a0,%usp
   2189 	bra.w		fu_snan
   2190 
   2191 fu_snan_s_p:
   2192 	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
   2193 	bne.w		fu_snan
   2194 
   2195 # the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
   2196 # the strategy is to move the exception frame "down" 12 bytes. then, we
   2197 # can store the default result where the exception frame was.
   2198 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
   2199 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   2200 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   2201 
   2202 	mov.w		&0x30d8,EXC_VOFF(%a6)	# vector offset = 0xd0
   2203 	mov.w		&0xe006,2+FP_SRC(%a6) 	# set fsave status
   2204 
   2205 	frestore	FP_SRC(%a6)		# restore src operand
   2206 
   2207 	mov.l		(%a6),%a6		# restore frame pointer
   2208 
   2209 	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
   2210 	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
   2211 	mov.l		LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
   2212 
   2213 # now, we copy the default result to it's proper location
   2214 	mov.l		LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
   2215 	mov.l		LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
   2216 	mov.l		LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
   2217 
   2218 	add.l		&LOCAL_SIZE-0x8,%sp
   2219 
   2220 
   2221 	bra.l		_real_snan
   2222 
   2223 fu_operr_p:
   2224 	btst		&0x5,EXC_SR(%a6)
   2225 	bne.w		fu_operr_p_s
   2226 
   2227 	mov.l		EXC_A7(%a6),%a0
   2228 	mov.l		%a0,%usp
   2229 	bra.w		fu_operr
   2230 
   2231 fu_operr_p_s:
   2232 	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
   2233 	bne.w		fu_operr
   2234 
   2235 # the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
   2236 # the strategy is to move the exception frame "down" 12 bytes. then, we
   2237 # can store the default result where the exception frame was.
   2238 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
   2239 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   2240 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   2241 
   2242 	mov.w		&0x30d0,EXC_VOFF(%a6)	# vector offset = 0xd0
   2243 	mov.w		&0xe004,2+FP_SRC(%a6) 	# set fsave status
   2244 
   2245 	frestore	FP_SRC(%a6)		# restore src operand
   2246 
   2247 	mov.l		(%a6),%a6		# restore frame pointer
   2248 
   2249 	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
   2250 	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
   2251 	mov.l		LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
   2252 
   2253 # now, we copy the default result to it's proper location
   2254 	mov.l		LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
   2255 	mov.l		LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
   2256 	mov.l		LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
   2257 
   2258 	add.l		&LOCAL_SIZE-0x8,%sp
   2259 
   2260 
   2261 	bra.l		_real_operr
   2262 
   2263 fu_inex_p2:
   2264 	btst		&0x5,EXC_SR(%a6)
   2265 	bne.w		fu_inex_s_p2
   2266 
   2267 	mov.l		EXC_A7(%a6),%a0
   2268 	mov.l		%a0,%usp
   2269 	bra.w		fu_inex
   2270 
   2271 fu_inex_s_p2:
   2272 	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
   2273 	bne.w		fu_inex
   2274 
   2275 # the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
   2276 # the strategy is to move the exception frame "down" 12 bytes. then, we
   2277 # can store the default result where the exception frame was.
   2278 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
   2279 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   2280 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   2281 
   2282 	mov.w		&0x30c4,EXC_VOFF(%a6) 	# vector offset = 0xc4
   2283 	mov.w		&0xe001,2+FP_SRC(%a6) 	# set fsave status
   2284 
   2285 	frestore	FP_SRC(%a6)		# restore src operand
   2286 
   2287 	mov.l		(%a6),%a6		# restore frame pointer
   2288 
   2289 	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
   2290 	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
   2291 	mov.l		LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
   2292 
   2293 # now, we copy the default result to it's proper location
   2294 	mov.l		LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
   2295 	mov.l		LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
   2296 	mov.l		LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
   2297 
   2298 	add.l		&LOCAL_SIZE-0x8,%sp
   2299 
   2300 
   2301 	bra.l		_real_inex
   2302 
   2303 #########################################################################
   2304 
   2305 #
   2306 # if we're stuffing a source operand back into an fsave frame then we
   2307 # have to make sure that for single or double source operands that the
   2308 # format stuffed is as weird as the hardware usually makes it.
   2309 #
   2310 	global		funimp_skew
   2311 funimp_skew:
   2312 	bfextu		EXC_EXTWORD(%a6){&3:&3},%d0 # extract src specifier
   2313 	cmpi.b		%d0,&0x1		# was src sgl?
   2314 	beq.b		funimp_skew_sgl		# yes
   2315 	cmpi.b		%d0,&0x5		# was src dbl?
   2316 	beq.b		funimp_skew_dbl		# yes
   2317 	rts
   2318 
   2319 funimp_skew_sgl:
   2320 	mov.w		FP_SRC_EX(%a6),%d0	# fetch DENORM exponent
   2321 	andi.w		&0x7fff,%d0		# strip sign
   2322 	beq.b		funimp_skew_sgl_not
   2323 	cmpi.w		%d0,&0x3f80
   2324 	bgt.b		funimp_skew_sgl_not
   2325 	neg.w		%d0			# make exponent negative
   2326 	addi.w		&0x3f81,%d0		# find amt to shift
   2327 	mov.l		FP_SRC_HI(%a6),%d1	# fetch DENORM hi(man)
   2328 	lsr.l		%d0,%d1			# shift it
   2329 	bset		&31,%d1			# set j-bit
   2330 	mov.l		%d1,FP_SRC_HI(%a6)	# insert new hi(man)
   2331 	andi.w		&0x8000,FP_SRC_EX(%a6)	# clear old exponent
   2332 	ori.w		&0x3f80,FP_SRC_EX(%a6)	# insert new "skewed" exponent
   2333 funimp_skew_sgl_not:
   2334 	rts
   2335 
   2336 funimp_skew_dbl:
   2337 	mov.w		FP_SRC_EX(%a6),%d0	# fetch DENORM exponent
   2338 	andi.w		&0x7fff,%d0		# strip sign
   2339 	beq.b		funimp_skew_dbl_not
   2340 	cmpi.w		%d0,&0x3c00
   2341 	bgt.b		funimp_skew_dbl_not
   2342 
   2343 	tst.b		FP_SRC_EX(%a6)		# make "internal format"
   2344 	smi.b		0x2+FP_SRC(%a6)
   2345 	mov.w		%d0,FP_SRC_EX(%a6)	# insert exponent with cleared sign
   2346 	clr.l		%d0			# clear g,r,s
   2347 	lea		FP_SRC(%a6),%a0		# pass ptr to src op
   2348 	mov.w		&0x3c01,%d1		# pass denorm threshold
   2349 	bsr.l		dnrm_lp			# denorm it
   2350 	mov.w		&0x3c00,%d0		# new exponent
   2351 	tst.b		0x2+FP_SRC(%a6)		# is sign set?
   2352 	beq.b		fss_dbl_denorm_done	# no
   2353 	bset		&15,%d0			# set sign
   2354 fss_dbl_denorm_done:
   2355 	bset		&0x7,FP_SRC_HI(%a6)	# set j-bit
   2356 	mov.w		%d0,FP_SRC_EX(%a6)	# insert new exponent
   2357 funimp_skew_dbl_not:
   2358 	rts
   2359 
   2360 #########################################################################
   2361 	global		_mem_write2
   2362 _mem_write2:
   2363 	btst		&0x5,EXC_SR(%a6)
   2364 	beq.l		_dmem_write
   2365 	mov.l		0x0(%a0),FP_DST_EX(%a6)
   2366 	mov.l		0x4(%a0),FP_DST_HI(%a6)
   2367 	mov.l		0x8(%a0),FP_DST_LO(%a6)
   2368 	clr.l		%d1
   2369 	rts
   2370 
   2371 #########################################################################
   2372 # XDEF ****************************************************************	#
   2373 #	_fpsp_effadd(): 060FPSP entry point for FP "Unimplemented	#
   2374 #		     	effective address" exception.			#
   2375 #									#
   2376 #	This handler should be the first code executed upon taking the	#
   2377 #	FP Unimplemented Effective Address exception in an operating	#
   2378 #	system.								#
   2379 #									#
   2380 # XREF ****************************************************************	#
   2381 #	_imem_read_long() - read instruction longword			#
   2382 #	fix_skewed_ops() - adjust src operand in fsave frame		#
   2383 #	set_tag_x() - determine optype of src/dst operands		#
   2384 #	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
   2385 #	unnorm_fix() - change UNNORM operands to NORM or ZERO		#
   2386 #	load_fpn2() - load dst operand from FP regfile			#
   2387 #	tbl_unsupp - add of table of emulation routines for opclass 0,2	#
   2388 #	decbin() - convert packed data to FP binary data		#
   2389 #	_real_fpu_disabled() - "callout" for "FPU disabled" exception	#
   2390 #	_real_access() - "callout" for access error exception		#
   2391 #	_mem_read() - read extended immediate operand from memory	#
   2392 #	_fpsp_done() - "callout" for exit; work all done		#
   2393 #	_real_trace() - "callout" for Trace enabled exception		#
   2394 #	fmovm_dynamic() - emulate dynamic fmovm instruction		#
   2395 #	fmovm_ctrl() - emulate fmovm control instruction		#
   2396 #									#
   2397 # INPUT ***************************************************************	#
   2398 #	- The system stack contains the "Unimplemented <ea>" stk frame	#
   2399 # 									#
   2400 # OUTPUT **************************************************************	#
   2401 #	If access error:						#
   2402 #	- The system stack is changed to an access error stack frame	#
   2403 #	If FPU disabled:						#
   2404 #	- The system stack is changed to an FPU disabled stack frame	#
   2405 #	If Trace exception enabled:					#
   2406 #	- The system stack is changed to a Trace exception stack frame	#
   2407 #	Else: (normal case)						#
   2408 #	- None (correct result has been stored as appropriate)		#
   2409 #									#
   2410 # ALGORITHM ***********************************************************	#
   2411 #	This exception handles 3 types of operations:			#
   2412 # (1) FP Instructions using extended precision or packed immediate	#
   2413 #     addressing mode.							#
   2414 # (2) The "fmovm.x" instruction w/ dynamic register specification.	#
   2415 # (3) The "fmovm.l" instruction w/ 2 or 3 control registers.		#
   2416 #									#
   2417 #	For immediate data operations, the data is read in w/ a		#
   2418 # _mem_read() "callout", converted to FP binary (if packed), and used	#
   2419 # as the source operand to the instruction specified by the instruction	#
   2420 # word. If no FP exception should be reported ads a result of the 	#
   2421 # emulation, then the result is stored to the destination register and	#
   2422 # the handler exits through _fpsp_done(). If an enabled exc has been	#
   2423 # signalled as a result of emulation, then an fsave state frame		#
   2424 # corresponding to the FP exception type must be entered into the 060	#
   2425 # FPU before exiting. In either the enabled or disabled cases, we 	#
   2426 # must also check if a Trace exception is pending, in which case, we	#
   2427 # must create a Trace exception stack frame from the current exception	#
   2428 # stack frame. If no Trace is pending, we simply exit through		#
   2429 # _fpsp_done().								#
   2430 #	For "fmovm.x", call the routine fmovm_dynamic() which will 	#
   2431 # decode and emulate the instruction. No FP exceptions can be pending	#
   2432 # as a result of this operation emulation. A Trace exception can be	#
   2433 # pending, though, which means the current stack frame must be changed	#
   2434 # to a Trace stack frame and an exit made through _real_trace().	#
   2435 # For the case of "fmovm.x Dn,-(a7)", where the offending instruction	#
   2436 # was executed from supervisor mode, this handler must store the FP	#
   2437 # register file values to the system stack by itself since		#
   2438 # fmovm_dynamic() can't handle this. A normal exit is made through	#
   2439 # fpsp_done().								#
   2440 #	For "fmovm.l", fmovm_ctrl() is used to emulate the instruction.	#
   2441 # Again, a Trace exception may be pending and an exit made through	#
   2442 # _real_trace(). Else, a normal exit is made through _fpsp_done().	#
   2443 #									#
   2444 #	Before any of the above is attempted, it must be checked to	#
   2445 # see if the FPU is disabled. Since the "Unimp <ea>" exception is taken	#
   2446 # before the "FPU disabled" exception, but the "FPU disabled" exception	#
   2447 # has higher priority, we check the disabled bit in the PCR. If set,	#
   2448 # then we must create an 8 word "FPU disabled" exception stack frame	#
   2449 # from the current 4 word exception stack frame. This includes 		#
   2450 # reproducing the effective address of the instruction to put on the 	#
   2451 # new stack frame.							#
   2452 #									#
   2453 # 	In the process of all emulation work, if a _mem_read()		#
   2454 # "callout" returns a failing result indicating an access error, then	#
   2455 # we must create an access error stack frame from the current stack	#
   2456 # frame. This information includes a faulting address and a fault-	#
   2457 # status-longword. These are created within this handler.		#
   2458 #									#
   2459 #########################################################################
   2460 
   2461 	global		_fpsp_effadd
   2462 _fpsp_effadd:
   2463 
   2464 # This exception type takes priority over the "Line F Emulator"
   2465 # exception. Therefore, the FPU could be disabled when entering here.
   2466 # So, we must check to see if it's disabled and handle that case separately.
   2467 	mov.l		%d0,-(%sp)		# save d0
   2468 	movc		%pcr,%d0		# load proc cr
   2469 	btst		&0x1,%d0		# is FPU disabled?
   2470 	bne.w		iea_disabled		# yes
   2471 	mov.l		(%sp)+,%d0		# restore d0
   2472 
   2473 	link		%a6,&-LOCAL_SIZE	# init stack frame
   2474 
   2475 	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
   2476 	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
   2477 	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
   2478 
   2479 # PC of instruction that took the exception is the PC in the frame
   2480 	mov.l		EXC_PC(%a6),EXC_EXTWPTR(%a6)
   2481 
   2482 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   2483 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
   2484 	bsr.l		_imem_read_long		# fetch the instruction words
   2485 	mov.l		%d0,EXC_OPWORD(%a6)	# store OPWORD and EXTWORD
   2486 
   2487 #########################################################################
   2488 
   2489 	tst.w		%d0			# is operation fmovem?
   2490 	bmi.w		iea_fmovm		# yes
   2491 
   2492 #
   2493 # here, we will have:
   2494 # 	fabs	fdabs	fsabs		facos		fmod
   2495 #	fadd	fdadd	fsadd		fasin		frem
   2496 # 	fcmp				fatan		fscale
   2497 #	fdiv	fddiv	fsdiv		fatanh		fsin
   2498 #	fint				fcos		fsincos
   2499 #	fintrz				fcosh		fsinh
   2500 #	fmove	fdmove	fsmove		fetox		ftan
   2501 # 	fmul	fdmul	fsmul		fetoxm1		ftanh
   2502 #	fneg	fdneg	fsneg		fgetexp		ftentox
   2503 #	fsgldiv				fgetman		ftwotox
   2504 # 	fsglmul				flog10
   2505 # 	fsqrt				flog2
   2506 #	fsub	fdsub	fssub		flogn
   2507 #	ftst				flognp1
   2508 # which can all use f<op>.{x,p}
   2509 # so, now it's immediate data extended precision AND PACKED FORMAT!
   2510 #
   2511 iea_op:
   2512 	andi.l		&0x00ff00ff,USER_FPSR(%a6)
   2513 
   2514 	btst		&0xa,%d0		# is src fmt x or p?
   2515 	bne.b		iea_op_pack		# packed
   2516 
   2517 
   2518 	mov.l		EXC_EXTWPTR(%a6),%a0	# pass: ptr to #<data>
   2519 	lea		FP_SRC(%a6),%a1		# pass: ptr to super addr
   2520 	mov.l		&0xc,%d0		# pass: 12 bytes
   2521 	bsr.l		_imem_read		# read extended immediate
   2522 
   2523 	tst.l		%d1			# did ifetch fail?
   2524 	bne.w		iea_iacc		# yes
   2525 
   2526 	bra.b		iea_op_setsrc
   2527 
   2528 iea_op_pack:
   2529 
   2530 	mov.l		EXC_EXTWPTR(%a6),%a0	# pass: ptr to #<data>
   2531 	lea		FP_SRC(%a6),%a1		# pass: ptr to super dst
   2532 	mov.l		&0xc,%d0		# pass: 12 bytes
   2533 	bsr.l		_imem_read		# read packed operand
   2534 
   2535 	tst.l		%d1			# did ifetch fail?
   2536 	bne.w		iea_iacc		# yes
   2537 
   2538 # The packed operand is an INF or a NAN if the exponent field is all ones.
   2539 	bfextu		FP_SRC(%a6){&1:&15},%d0	# get exp
   2540 	cmpi.w		%d0,&0x7fff		# INF or NAN?
   2541 	beq.b		iea_op_setsrc		# operand is an INF or NAN
   2542 
   2543 # The packed operand is a zero if the mantissa is all zero, else it's
   2544 # a normal packed op.
   2545 	mov.b		3+FP_SRC(%a6),%d0	# get byte 4
   2546 	andi.b		&0x0f,%d0		# clear all but last nybble
   2547 	bne.b		iea_op_gp_not_spec	# not a zero
   2548 	tst.l		FP_SRC_HI(%a6)		# is lw 2 zero?
   2549 	bne.b		iea_op_gp_not_spec	# not a zero
   2550 	tst.l		FP_SRC_LO(%a6)		# is lw 3 zero?
   2551 	beq.b		iea_op_setsrc		# operand is a ZERO
   2552 iea_op_gp_not_spec:
   2553 	lea		FP_SRC(%a6),%a0		# pass: ptr to packed op
   2554 	bsr.l		decbin			# convert to extended
   2555 	fmovm.x		&0x80,FP_SRC(%a6)	# make this the srcop
   2556 
   2557 iea_op_setsrc:
   2558 	addi.l		&0xc,EXC_EXTWPTR(%a6)	# update extension word pointer
   2559 
   2560 # FP_SRC now holds the src operand.
   2561 	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
   2562 	bsr.l		set_tag_x		# tag the operand type
   2563 	mov.b		%d0,STAG(%a6)		# could be ANYTHING!!!
   2564 	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
   2565 	bne.b		iea_op_getdst		# no
   2566 	bsr.l		unnorm_fix		# yes; convert to NORM/DENORM/ZERO
   2567 	mov.b		%d0,STAG(%a6)		# set new optype tag
   2568 iea_op_getdst:
   2569 	clr.b		STORE_FLG(%a6)		# clear "store result" boolean
   2570 
   2571 	btst		&0x5,1+EXC_CMDREG(%a6)	# is operation monadic or dyadic?
   2572 	beq.b		iea_op_extract		# monadic
   2573 	btst		&0x4,1+EXC_CMDREG(%a6)	# is operation fsincos,ftst,fcmp?
   2574 	bne.b		iea_op_spec		# yes
   2575 
   2576 iea_op_loaddst:
   2577 	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno
   2578 	bsr.l		load_fpn2		# load dst operand
   2579 
   2580 	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
   2581 	bsr.l		set_tag_x		# tag the operand type
   2582 	mov.b		%d0,DTAG(%a6)		# could be ANYTHING!!!
   2583 	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
   2584 	bne.b		iea_op_extract		# no
   2585 	bsr.l		unnorm_fix		# yes; convert to NORM/DENORM/ZERO
   2586 	mov.b		%d0,DTAG(%a6)		# set new optype tag
   2587 	bra.b		iea_op_extract
   2588 
   2589 # the operation is fsincos, ftst, or fcmp. only fcmp is dyadic
   2590 iea_op_spec:
   2591 	btst		&0x3,1+EXC_CMDREG(%a6)	# is operation fsincos?
   2592 	beq.b		iea_op_extract		# yes
   2593 # now, we're left with ftst and fcmp. so, first let's tag them so that they don't
   2594 # store a result. then, only fcmp will branch back and pick up a dst operand.
   2595 	st		STORE_FLG(%a6)		# don't store a final result
   2596 	btst		&0x1,1+EXC_CMDREG(%a6)	# is operation fcmp?
   2597 	beq.b		iea_op_loaddst		# yes
   2598 
   2599 iea_op_extract:
   2600 	clr.l		%d0
   2601 	mov.b		FPCR_MODE(%a6),%d0	# pass: rnd mode,prec
   2602 
   2603 	mov.b		1+EXC_CMDREG(%a6),%d1
   2604 	andi.w		&0x007f,%d1		# extract extension
   2605 
   2606 	fmov.l		&0x0,%fpcr
   2607 	fmov.l		&0x0,%fpsr
   2608 
   2609 	lea		FP_SRC(%a6),%a0
   2610 	lea		FP_DST(%a6),%a1
   2611 
   2612 	mov.l		(tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
   2613 	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
   2614 
   2615 #
   2616 # Exceptions in order of precedence:
   2617 #	BSUN	: none
   2618 #	SNAN	: all operations
   2619 #	OPERR	: all reg-reg or mem-reg operations that can normally operr
   2620 #	OVFL	: same as OPERR
   2621 #	UNFL	: same as OPERR
   2622 #	DZ	: same as OPERR
   2623 #	INEX2	: same as OPERR
   2624 #	INEX1	: all packed immediate operations
   2625 #
   2626 
   2627 # we determine the highest priority exception(if any) set by the
   2628 # emulation routine that has also been enabled by the user.
   2629 	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions enabled
   2630 	bne.b		iea_op_ena		# some are enabled
   2631 
   2632 # now, we save the result, unless, of course, the operation was ftst or fcmp.
   2633 # these don't save results.
   2634 iea_op_save:
   2635 	tst.b		STORE_FLG(%a6)		# does this op store a result?
   2636 	bne.b		iea_op_exit1		# exit with no frestore
   2637 
   2638 iea_op_store:
   2639 	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno
   2640 	bsr.l		store_fpreg		# store the result
   2641 
   2642 iea_op_exit1:
   2643 	mov.l		EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC"
   2644 	mov.l		EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame
   2645 
   2646 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
   2647 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   2648 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   2649 
   2650 	unlk		%a6			# unravel the frame
   2651 
   2652 	btst		&0x7,(%sp)		# is trace on?
   2653 	bne.w		iea_op_trace		# yes
   2654 
   2655 	bra.l		_fpsp_done		# exit to os
   2656 
   2657 iea_op_ena:
   2658 	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enable and set
   2659 	bfffo		%d0{&24:&8},%d0		# find highest priority exception
   2660 	bne.b		iea_op_exc		# at least one was set
   2661 
   2662 # no exception occurred. now, did a disabled, exact overflow occur with inexact
   2663 # enabled? if so, then we have to stuff an overflow frame into the FPU.
   2664 	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
   2665 	beq.b		iea_op_save
   2666 
   2667 iea_op_ovfl:
   2668 	btst		&inex2_bit,FPCR_ENABLE(%a6) # is inexact enabled?
   2669 	beq.b		iea_op_store		# no
   2670 	bra.b		iea_op_exc_ovfl		# yes
   2671 
   2672 # an enabled exception occurred. we have to insert the exception type back into
   2673 # the machine.
   2674 iea_op_exc:
   2675 	subi.l		&24,%d0			# fix offset to be 0-8
   2676 	cmpi.b		%d0,&0x6		# is exception INEX?
   2677 	bne.b		iea_op_exc_force	# no
   2678 
   2679 # the enabled exception was inexact. so, if it occurs with an overflow
   2680 # or underflow that was disabled, then we have to force an overflow or
   2681 # underflow frame.
   2682 	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
   2683 	bne.b		iea_op_exc_ovfl		# yes
   2684 	btst		&unfl_bit,FPSR_EXCEPT(%a6) # did underflow occur?
   2685 	bne.b		iea_op_exc_unfl		# yes
   2686 
   2687 iea_op_exc_force:
   2688 	mov.w		(tbl_iea_except.b,%pc,%d0.w*2),2+FP_SRC(%a6)
   2689 	bra.b		iea_op_exit2		# exit with frestore
   2690 
   2691 tbl_iea_except:
   2692 	short		0xe002, 0xe006, 0xe004, 0xe005
   2693 	short		0xe003, 0xe002, 0xe001, 0xe001
   2694 
   2695 iea_op_exc_ovfl:
   2696 	mov.w		&0xe005,2+FP_SRC(%a6)
   2697 	bra.b		iea_op_exit2
   2698 
   2699 iea_op_exc_unfl:
   2700 	mov.w		&0xe003,2+FP_SRC(%a6)
   2701 
   2702 iea_op_exit2:
   2703 	mov.l		EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC"
   2704 	mov.l		EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame
   2705 
   2706 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
   2707 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   2708 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   2709 
   2710 	frestore 	FP_SRC(%a6)		# restore exceptional state
   2711 
   2712 	unlk		%a6			# unravel the frame
   2713 
   2714 	btst		&0x7,(%sp)		# is trace on?
   2715 	bne.b		iea_op_trace		# yes
   2716 
   2717 	bra.l		_fpsp_done		# exit to os
   2718 
   2719 #
   2720 # The opclass two instruction that took an "Unimplemented Effective Address"
   2721 # exception was being traced. Make the "current" PC the FPIAR and put it in
   2722 # the trace stack frame then jump to _real_trace().
   2723 #
   2724 #		 UNIMP EA FRAME		   TRACE FRAME
   2725 #		*****************	*****************
   2726 #		* 0x0 *  0x0f0	*	*    Current	*
   2727 #		*****************	*      PC	*
   2728 #		*    Current	*	*****************
   2729 #		*      PC	*	* 0x2 *  0x024	*
   2730 #		*****************	*****************
   2731 #		*      SR	*	*     Next	*
   2732 #		*****************	*      PC	*
   2733 #					*****************
   2734 #					*      SR	*
   2735 #					*****************
   2736 iea_op_trace:
   2737 	mov.l		(%sp),-(%sp)		# shift stack frame "down"
   2738 	mov.w		0x8(%sp),0x4(%sp)
   2739 	mov.w		&0x2024,0x6(%sp)	# stk fmt = 0x2; voff = 0x024
   2740 	fmov.l		%fpiar,0x8(%sp)		# "Current PC" is in FPIAR
   2741 
   2742 	bra.l		_real_trace
   2743 
   2744 #########################################################################
   2745 iea_fmovm:
   2746 	btst		&14,%d0			# ctrl or data reg
   2747 	beq.w		iea_fmovm_ctrl
   2748 
   2749 iea_fmovm_data:
   2750 
   2751 	btst		&0x5,EXC_SR(%a6)	# user or supervisor mode
   2752 	bne.b		iea_fmovm_data_s
   2753 
   2754 iea_fmovm_data_u:
   2755 	mov.l		%usp,%a0
   2756 	mov.l		%a0,EXC_A7(%a6)		# store current a7
   2757 	bsr.l		fmovm_dynamic		# do dynamic fmovm
   2758 	mov.l		EXC_A7(%a6),%a0		# load possibly new a7
   2759 	mov.l		%a0,%usp		# update usp
   2760 	bra.w		iea_fmovm_exit
   2761 
   2762 iea_fmovm_data_s:
   2763 	clr.b		SPCOND_FLG(%a6)
   2764 	lea		0x2+EXC_VOFF(%a6),%a0
   2765 	mov.l		%a0,EXC_A7(%a6)
   2766 	bsr.l		fmovm_dynamic		# do dynamic fmovm
   2767 
   2768 	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
   2769 	beq.w		iea_fmovm_data_predec
   2770 	cmpi.b		SPCOND_FLG(%a6),&mia7_flg
   2771 	bne.w		iea_fmovm_exit
   2772 
   2773 # right now, d0 = the size.
   2774 # the data has been fetched from the supervisor stack, but we have not
   2775 # incremented the stack pointer by the appropriate number of bytes.
   2776 # do it here.
   2777 iea_fmovm_data_postinc:
   2778 	btst		&0x7,EXC_SR(%a6)
   2779 	bne.b		iea_fmovm_data_pi_trace
   2780 
   2781 	mov.w		EXC_SR(%a6),(EXC_SR,%a6,%d0)
   2782 	mov.l		EXC_EXTWPTR(%a6),(EXC_PC,%a6,%d0)
   2783 	mov.w		&0x00f0,(EXC_VOFF,%a6,%d0)
   2784 
   2785 	lea		(EXC_SR,%a6,%d0),%a0
   2786 	mov.l		%a0,EXC_SR(%a6)
   2787 
   2788 	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
   2789 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   2790  	movm.l		EXC_DREGS(%a6),&0x0303 	# restore d0-d1/a0-a1
   2791 
   2792 	unlk		%a6
   2793 	mov.l		(%sp)+,%sp
   2794 	bra.l		_fpsp_done
   2795 
   2796 iea_fmovm_data_pi_trace:
   2797 	mov.w		EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0)
   2798 	mov.l		EXC_EXTWPTR(%a6),(EXC_PC-0x4,%a6,%d0)
   2799 	mov.w		&0x2024,(EXC_VOFF-0x4,%a6,%d0)
   2800 	mov.l		EXC_PC(%a6),(EXC_VOFF+0x2-0x4,%a6,%d0)
   2801 
   2802 	lea		(EXC_SR-0x4,%a6,%d0),%a0
   2803 	mov.l		%a0,EXC_SR(%a6)
   2804 
   2805 	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
   2806 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   2807  	movm.l		EXC_DREGS(%a6),&0x0303 	# restore d0-d1/a0-a1
   2808 
   2809 	unlk		%a6
   2810 	mov.l		(%sp)+,%sp
   2811 	bra.l		_real_trace
   2812 
   2813 # right now, d1 = size and d0 = the strg.
   2814 iea_fmovm_data_predec:
   2815 	mov.b		%d1,EXC_VOFF(%a6)	# store strg
   2816 	mov.b		%d0,0x1+EXC_VOFF(%a6)	# store size
   2817 
   2818 	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
   2819 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   2820  	movm.l		EXC_DREGS(%a6),&0x0303 	# restore d0-d1/a0-a1
   2821 
   2822 	mov.l		(%a6),-(%sp)		# make a copy of a6
   2823 	mov.l		%d0,-(%sp)		# save d0
   2824 	mov.l		%d1,-(%sp)		# save d1
   2825 	mov.l		EXC_EXTWPTR(%a6),-(%sp)	# make a copy of Next PC
   2826 
   2827 	clr.l		%d0
   2828 	mov.b		0x1+EXC_VOFF(%a6),%d0	# fetch size
   2829 	neg.l		%d0			# get negative of size
   2830 
   2831 	btst		&0x7,EXC_SR(%a6)	# is trace enabled?
   2832 	beq.b		iea_fmovm_data_p2
   2833 
   2834 	mov.w		EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0)
   2835 	mov.l		EXC_PC(%a6),(EXC_VOFF-0x2,%a6,%d0)
   2836 	mov.l		(%sp)+,(EXC_PC-0x4,%a6,%d0)
   2837 	mov.w		&0x2024,(EXC_VOFF-0x4,%a6,%d0)
   2838 
   2839 	pea		(%a6,%d0)		# create final sp
   2840 	bra.b		iea_fmovm_data_p3
   2841 
   2842 iea_fmovm_data_p2:
   2843 	mov.w		EXC_SR(%a6),(EXC_SR,%a6,%d0)
   2844 	mov.l		(%sp)+,(EXC_PC,%a6,%d0)
   2845 	mov.w		&0x00f0,(EXC_VOFF,%a6,%d0)
   2846 
   2847 	pea		(0x4,%a6,%d0)		# create final sp
   2848 
   2849 iea_fmovm_data_p3:
   2850 	clr.l		%d1
   2851 	mov.b		EXC_VOFF(%a6),%d1	# fetch strg
   2852 
   2853 	tst.b		%d1
   2854 	bpl.b		fm_1
   2855 	fmovm.x		&0x80,(0x4+0x8,%a6,%d0)
   2856 	addi.l		&0xc,%d0
   2857 fm_1:
   2858 	lsl.b		&0x1,%d1
   2859 	bpl.b		fm_2
   2860 	fmovm.x		&0x40,(0x4+0x8,%a6,%d0)
   2861 	addi.l		&0xc,%d0
   2862 fm_2:
   2863 	lsl.b		&0x1,%d1
   2864 	bpl.b		fm_3
   2865 	fmovm.x		&0x20,(0x4+0x8,%a6,%d0)
   2866 	addi.l		&0xc,%d0
   2867 fm_3:
   2868 	lsl.b		&0x1,%d1
   2869 	bpl.b		fm_4
   2870 	fmovm.x		&0x10,(0x4+0x8,%a6,%d0)
   2871 	addi.l		&0xc,%d0
   2872 fm_4:
   2873 	lsl.b		&0x1,%d1
   2874 	bpl.b		fm_5
   2875 	fmovm.x		&0x08,(0x4+0x8,%a6,%d0)
   2876 	addi.l		&0xc,%d0
   2877 fm_5:
   2878 	lsl.b		&0x1,%d1
   2879 	bpl.b		fm_6
   2880 	fmovm.x		&0x04,(0x4+0x8,%a6,%d0)
   2881 	addi.l		&0xc,%d0
   2882 fm_6:
   2883 	lsl.b		&0x1,%d1
   2884 	bpl.b		fm_7
   2885 	fmovm.x		&0x02,(0x4+0x8,%a6,%d0)
   2886 	addi.l		&0xc,%d0
   2887 fm_7:
   2888 	lsl.b		&0x1,%d1
   2889 	bpl.b		fm_end
   2890 	fmovm.x		&0x01,(0x4+0x8,%a6,%d0)
   2891 fm_end:
   2892 	mov.l		0x4(%sp),%d1
   2893 	mov.l		0x8(%sp),%d0
   2894 	mov.l		0xc(%sp),%a6
   2895 	mov.l		(%sp)+,%sp
   2896 
   2897 	btst		&0x7,(%sp)		# is trace enabled?
   2898 	beq.l		_fpsp_done
   2899 	bra.l		_real_trace
   2900 
   2901 #########################################################################
   2902 iea_fmovm_ctrl:
   2903 
   2904 	bsr.l		fmovm_ctrl		# load ctrl regs
   2905 
   2906 iea_fmovm_exit:
   2907 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
   2908 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   2909 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   2910 
   2911 	btst		&0x7,EXC_SR(%a6)	# is trace on?
   2912 	bne.b		iea_fmovm_trace		# yes
   2913 
   2914 	mov.l		EXC_EXTWPTR(%a6),EXC_PC(%a6) # set Next PC
   2915 
   2916 	unlk		%a6			# unravel the frame
   2917 
   2918 	bra.l		_fpsp_done		# exit to os
   2919 
   2920 #
   2921 # The control reg instruction that took an "Unimplemented Effective Address"
   2922 # exception was being traced. The "Current PC" for the trace frame is the
   2923 # PC stacked for Unimp EA. The "Next PC" is in EXC_EXTWPTR.
   2924 # After fixing the stack frame, jump to _real_trace().
   2925 #
   2926 #		 UNIMP EA FRAME		   TRACE FRAME
   2927 #		*****************	*****************
   2928 #		* 0x0 *  0x0f0	*	*    Current	*
   2929 #		*****************	*      PC	*
   2930 #		*    Current	*	*****************
   2931 #		*      PC	*	* 0x2 *  0x024	*
   2932 #		*****************	*****************
   2933 #		*      SR	*	*     Next	*
   2934 #		*****************	*      PC	*
   2935 #					*****************
   2936 #					*      SR	*
   2937 #					*****************
   2938 # this ain't a pretty solution, but it works:
   2939 # -restore a6 (not with unlk)
   2940 # -shift stack frame down over where old a6 used to be
   2941 # -add LOCAL_SIZE to stack pointer
   2942 iea_fmovm_trace:
   2943 	mov.l		(%a6),%a6		# restore frame pointer
   2944 	mov.w		EXC_SR+LOCAL_SIZE(%sp),0x0+LOCAL_SIZE(%sp)
   2945 	mov.l		EXC_PC+LOCAL_SIZE(%sp),0x8+LOCAL_SIZE(%sp)
   2946 	mov.l		EXC_EXTWPTR+LOCAL_SIZE(%sp),0x2+LOCAL_SIZE(%sp)
   2947 	mov.w		&0x2024,0x6+LOCAL_SIZE(%sp) # stk fmt = 0x2; voff = 0x024
   2948 	add.l		&LOCAL_SIZE,%sp		# clear stack frame
   2949 
   2950 	bra.l		_real_trace
   2951 
   2952 #########################################################################
   2953 # The FPU is disabled and so we should really have taken the "Line
   2954 # F Emulator" exception. So, here we create an 8-word stack frame
   2955 # from our 4-word stack frame. This means we must calculate the length
   2956 # of the faulting instruction to get the "next PC". This is trivial for
   2957 # immediate operands but requires some extra work for fmovm dynamic
   2958 # which can use most addressing modes.
   2959 iea_disabled:
   2960 	mov.l		(%sp)+,%d0		# restore d0
   2961 
   2962 	link		%a6,&-LOCAL_SIZE	# init stack frame
   2963 
   2964 	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
   2965 
   2966 # PC of instruction that took the exception is the PC in the frame
   2967 	mov.l		EXC_PC(%a6),EXC_EXTWPTR(%a6)
   2968 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   2969 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
   2970 	bsr.l		_imem_read_long		# fetch the instruction words
   2971 	mov.l		%d0,EXC_OPWORD(%a6)	# store OPWORD and EXTWORD
   2972 
   2973 	tst.w		%d0			# is instr fmovm?
   2974 	bmi.b		iea_dis_fmovm		# yes
   2975 # instruction is using an extended precision immediate operand. therefore,
   2976 # the total instruction length is 16 bytes.
   2977 iea_dis_immed:
   2978 	mov.l		&0x10,%d0		# 16 bytes of instruction
   2979 	bra.b		iea_dis_cont
   2980 iea_dis_fmovm:
   2981 	btst		&0xe,%d0		# is instr fmovm ctrl
   2982 	bne.b		iea_dis_fmovm_data	# no
   2983 # the instruction is a fmovm.l with 2 or 3 registers.
   2984 	bfextu		%d0{&19:&3},%d1
   2985 	mov.l		&0xc,%d0
   2986 	cmpi.b		%d1,&0x7		# move all regs?
   2987 	bne.b		iea_dis_cont
   2988 	addq.l		&0x4,%d0
   2989 	bra.b		iea_dis_cont
   2990 # the instruction is an fmovm.x dynamic which can use many addressing
   2991 # modes and thus can have several different total instruction lengths.
   2992 # call fmovm_calc_ea which will go through the ea calc process and,
   2993 # as a by-product, will tell us how long the instruction is.
   2994 iea_dis_fmovm_data:
   2995 	clr.l		%d0
   2996 	bsr.l		fmovm_calc_ea
   2997 	mov.l		EXC_EXTWPTR(%a6),%d0
   2998 	sub.l		EXC_PC(%a6),%d0
   2999 iea_dis_cont:
   3000 	mov.w		%d0,EXC_VOFF(%a6)	# store stack shift value
   3001 
   3002 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   3003 
   3004 	unlk		%a6
   3005 
   3006 # here, we actually create the 8-word frame from the 4-word frame,
   3007 # with the "next PC" as additional info.
   3008 # the <ea> field is let as undefined.
   3009 	subq.l		&0x8,%sp		# make room for new stack
   3010 	mov.l		%d0,-(%sp)		# save d0
   3011 	mov.w		0xc(%sp),0x4(%sp)	# move SR
   3012 	mov.l		0xe(%sp),0x6(%sp)	# move Current PC
   3013 	clr.l		%d0
   3014 	mov.w		0x12(%sp),%d0
   3015 	mov.l		0x6(%sp),0x10(%sp)	# move Current PC
   3016 	add.l		%d0,0x6(%sp)		# make Next PC
   3017 	mov.w		&0x402c,0xa(%sp)	# insert offset,frame format
   3018 	mov.l		(%sp)+,%d0		# restore d0
   3019 
   3020 	bra.l		_real_fpu_disabled
   3021 
   3022 ##########
   3023 
   3024 iea_iacc:
   3025 	movc		%pcr,%d0
   3026 	btst		&0x1,%d0
   3027 	bne.b		iea_iacc_cont
   3028 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   3029 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1 on stack
   3030 iea_iacc_cont:
   3031 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   3032 
   3033 	unlk		%a6
   3034 
   3035 	subq.w		&0x8,%sp		# make stack frame bigger
   3036 	mov.l		0x8(%sp),(%sp)		# store SR,hi(PC)
   3037 	mov.w		0xc(%sp),0x4(%sp)	# store lo(PC)
   3038 	mov.w		&0x4008,0x6(%sp)	# store voff
   3039 	mov.l		0x2(%sp),0x8(%sp)	# store ea
   3040 	mov.l		&0x09428001,0xc(%sp)	# store fslw
   3041 
   3042 iea_acc_done:
   3043 	btst		&0x5,(%sp)		# user or supervisor mode?
   3044 	beq.b		iea_acc_done2		# user
   3045 	bset		&0x2,0xd(%sp)		# set supervisor TM bit
   3046 
   3047 iea_acc_done2:
   3048 	bra.l		_real_access
   3049 
   3050 iea_dacc:
   3051 	lea		-LOCAL_SIZE(%a6),%sp
   3052 
   3053 	movc		%pcr,%d1
   3054 	btst		&0x1,%d1
   3055 	bne.b		iea_dacc_cont
   3056 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1 on stack
   3057 	fmovm.l		LOCAL_SIZE+USER_FPCR(%sp),%fpcr,%fpsr,%fpiar # restore ctrl regs
   3058 iea_dacc_cont:
   3059 	mov.l		(%a6),%a6
   3060 
   3061 	mov.l		0x4+LOCAL_SIZE(%sp),-0x8+0x4+LOCAL_SIZE(%sp)
   3062 	mov.w		0x8+LOCAL_SIZE(%sp),-0x8+0x8+LOCAL_SIZE(%sp)
   3063 	mov.w		&0x4008,-0x8+0xa+LOCAL_SIZE(%sp)
   3064 	mov.l		%a0,-0x8+0xc+LOCAL_SIZE(%sp)
   3065 	mov.w		%d0,-0x8+0x10+LOCAL_SIZE(%sp)
   3066 	mov.w		&0x0001,-0x8+0x12+LOCAL_SIZE(%sp)
   3067 
   3068 	movm.l		LOCAL_SIZE+EXC_DREGS(%sp),&0x0303 # restore d0-d1/a0-a1
   3069 	add.w		&LOCAL_SIZE-0x4,%sp
   3070 
   3071 	bra.b		iea_acc_done
   3072 
   3073 #########################################################################
   3074 # XDEF ****************************************************************	#
   3075 #	_fpsp_operr(): 060FPSP entry point for FP Operr exception.	#
   3076 #									#
   3077 #	This handler should be the first code executed upon taking the	#
   3078 # 	FP Operand Error exception in an operating system.		#
   3079 #									#
   3080 # XREF ****************************************************************	#
   3081 #	_imem_read_long() - read instruction longword			#
   3082 #	fix_skewed_ops() - adjust src operand in fsave frame		#
   3083 #	_real_operr() - "callout" to operating system operr handler	#
   3084 #	_dmem_write_{byte,word,long}() - store data to mem (opclass 3)	#
   3085 #	store_dreg_{b,w,l}() - store data to data regfile (opclass 3)	#
   3086 #	facc_out_{b,w,l}() - store to memory took access error (opcl 3)	#
   3087 #									#
   3088 # INPUT ***************************************************************	#
   3089 #	- The system stack contains the FP Operr exception frame	#
   3090 #	- The fsave frame contains the source operand			#
   3091 # 									#
   3092 # OUTPUT **************************************************************	#
   3093 #	No access error:						#
   3094 #	- The system stack is unchanged					#
   3095 #	- The fsave frame contains the adjusted src op for opclass 0,2	#
   3096 #									#
   3097 # ALGORITHM ***********************************************************	#
   3098 #	In a system where the FP Operr exception is enabled, the goal	#
   3099 # is to get to the handler specified at _real_operr(). But, on the 060,	#
   3100 # for opclass zero and two instruction taking this exception, the 	#
   3101 # input operand in the fsave frame may be incorrect for some cases	#
   3102 # and needs to be corrected. This handler calls fix_skewed_ops() to	#
   3103 # do just this and then exits through _real_operr().			#
   3104 #	For opclass 3 instructions, the 060 doesn't store the default	#
   3105 # operr result out to memory or data register file as it should.	#
   3106 # This code must emulate the move out before finally exiting through	#
   3107 # _real_inex(). The move out, if to memory, is performed using 		#
   3108 # _mem_write() "callout" routines that may return a failing result.	#
   3109 # In this special case, the handler must exit through facc_out() 	#
   3110 # which creates an access error stack frame from the current operr	#
   3111 # stack frame.								#
   3112 #									#
   3113 #########################################################################
   3114 
   3115 	global		_fpsp_operr
   3116 _fpsp_operr:
   3117 
   3118 	link.w		%a6,&-LOCAL_SIZE	# init stack frame
   3119 
   3120 	fsave		FP_SRC(%a6)		# grab the "busy" frame
   3121 
   3122  	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
   3123 	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
   3124  	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
   3125 
   3126 # the FPIAR holds the "current PC" of the faulting instruction
   3127 	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
   3128 
   3129 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   3130 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
   3131 	bsr.l		_imem_read_long		# fetch the instruction words
   3132 	mov.l		%d0,EXC_OPWORD(%a6)
   3133 
   3134 ##############################################################################
   3135 
   3136 	btst		&13,%d0			# is instr an fmove out?
   3137 	bne.b		foperr_out		# fmove out
   3138 
   3139 
   3140 # here, we simply see if the operand in the fsave frame needs to be "unskewed".
   3141 # this would be the case for opclass two operations with a source infinity or
   3142 # denorm operand in the sgl or dbl format. NANs also become skewed, but can't
   3143 # cause an operr so we don't need to check for them here.
   3144 	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
   3145 	bsr.l		fix_skewed_ops		# fix src op
   3146 
   3147 foperr_exit:
   3148 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
   3149 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   3150 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   3151 
   3152 	frestore	FP_SRC(%a6)
   3153 
   3154 	unlk		%a6
   3155 	bra.l		_real_operr
   3156 
   3157 ########################################################################
   3158 
   3159 #
   3160 # the hardware does not save the default result to memory on enabled
   3161 # operand error exceptions. we do this here before passing control to
   3162 # the user operand error handler.
   3163 #
   3164 # byte, word, and long destination format operations can pass
   3165 # through here. we simply need to test the sign of the src
   3166 # operand and save the appropriate minimum or maximum integer value
   3167 # to the effective address as pointed to by the stacked effective address.
   3168 #
   3169 # although packed opclass three operations can take operand error
   3170 # exceptions, they won't pass through here since they are caught
   3171 # first by the unsupported data format exception handler. that handler
   3172 # sends them directly to _real_operr() if necessary.
   3173 #
   3174 foperr_out:
   3175 
   3176 	mov.w		FP_SRC_EX(%a6),%d1	# fetch exponent
   3177 	andi.w		&0x7fff,%d1
   3178 	cmpi.w		%d1,&0x7fff
   3179 	bne.b		foperr_out_not_qnan
   3180 # the operand is either an infinity or a QNAN.
   3181 	tst.l		FP_SRC_LO(%a6)
   3182 	bne.b		foperr_out_qnan
   3183 	mov.l		FP_SRC_HI(%a6),%d1
   3184 	andi.l		&0x7fffffff,%d1
   3185 	beq.b		foperr_out_not_qnan
   3186 foperr_out_qnan:
   3187 	mov.l		FP_SRC_HI(%a6),L_SCR1(%a6)
   3188 	bra.b		foperr_out_jmp
   3189 
   3190 foperr_out_not_qnan:
   3191 	mov.l		&0x7fffffff,%d1
   3192 	tst.b		FP_SRC_EX(%a6)
   3193 	bpl.b		foperr_out_not_qnan2
   3194 	addq.l		&0x1,%d1
   3195 foperr_out_not_qnan2:
   3196 	mov.l		%d1,L_SCR1(%a6)
   3197 
   3198 foperr_out_jmp:
   3199 	bfextu		%d0{&19:&3},%d0		# extract dst format field
   3200 	mov.b		1+EXC_OPWORD(%a6),%d1	# extract <ea> mode,reg
   3201 	mov.w		(tbl_operr.b,%pc,%d0.w*2),%a0
   3202 	jmp		(tbl_operr.b,%pc,%a0)
   3203 
   3204 tbl_operr:
   3205 	short		foperr_out_l - tbl_operr # long word integer
   3206 	short		tbl_operr    - tbl_operr # sgl prec shouldn't happen
   3207 	short		tbl_operr    - tbl_operr # ext prec shouldn't happen
   3208 	short		foperr_exit  - tbl_operr # packed won't enter here
   3209 	short		foperr_out_w - tbl_operr # word integer
   3210 	short		tbl_operr    - tbl_operr # dbl prec shouldn't happen
   3211 	short		foperr_out_b - tbl_operr # byte integer
   3212 	short		tbl_operr    - tbl_operr # packed won't enter here
   3213 
   3214 foperr_out_b:
   3215 	mov.b		L_SCR1(%a6),%d0		# load positive default result
   3216 	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
   3217 	ble.b		foperr_out_b_save_dn	# yes
   3218 	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
   3219 	bsr.l		_dmem_write_byte	# write the default result
   3220 
   3221 	tst.l		%d1			# did dstore fail?
   3222 	bne.l		facc_out_b		# yes
   3223 
   3224 	bra.w		foperr_exit
   3225 foperr_out_b_save_dn:
   3226 	andi.w		&0x0007,%d1
   3227 	bsr.l		store_dreg_b		# store result to regfile
   3228 	bra.w		foperr_exit
   3229 
   3230 foperr_out_w:
   3231 	mov.w		L_SCR1(%a6),%d0		# load positive default result
   3232 	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
   3233 	ble.b		foperr_out_w_save_dn	# yes
   3234 	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
   3235 	bsr.l		_dmem_write_word	# write the default result
   3236 
   3237 	tst.l		%d1			# did dstore fail?
   3238 	bne.l		facc_out_w		# yes
   3239 
   3240 	bra.w		foperr_exit
   3241 foperr_out_w_save_dn:
   3242 	andi.w		&0x0007,%d1
   3243 	bsr.l		store_dreg_w		# store result to regfile
   3244 	bra.w		foperr_exit
   3245 
   3246 foperr_out_l:
   3247 	mov.l		L_SCR1(%a6),%d0		# load positive default result
   3248 	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
   3249 	ble.b		foperr_out_l_save_dn	# yes
   3250 	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
   3251 	bsr.l		_dmem_write_long	# write the default result
   3252 
   3253 	tst.l		%d1			# did dstore fail?
   3254 	bne.l		facc_out_l		# yes
   3255 
   3256 	bra.w		foperr_exit
   3257 foperr_out_l_save_dn:
   3258 	andi.w		&0x0007,%d1
   3259 	bsr.l		store_dreg_l		# store result to regfile
   3260 	bra.w		foperr_exit
   3261 
   3262 #########################################################################
   3263 # XDEF ****************************************************************	#
   3264 #	_fpsp_snan(): 060FPSP entry point for FP SNAN exception.	#
   3265 #									#
   3266 #	This handler should be the first code executed upon taking the	#
   3267 # 	FP Signalling NAN exception in an operating system.		#
   3268 #									#
   3269 # XREF ****************************************************************	#
   3270 #	_imem_read_long() - read instruction longword			#
   3271 #	fix_skewed_ops() - adjust src operand in fsave frame		#
   3272 #	_real_snan() - "callout" to operating system SNAN handler	#
   3273 #	_dmem_write_{byte,word,long}() - store data to mem (opclass 3)	#
   3274 #	store_dreg_{b,w,l}() - store data to data regfile (opclass 3)	#
   3275 #	facc_out_{b,w,l,d,x}() - store to mem took acc error (opcl 3)	#
   3276 #	_calc_ea_fout() - fix An if <ea> is -() or ()+; also get <ea>	#
   3277 #									#
   3278 # INPUT ***************************************************************	#
   3279 #	- The system stack contains the FP SNAN exception frame		#
   3280 #	- The fsave frame contains the source operand			#
   3281 # 									#
   3282 # OUTPUT **************************************************************	#
   3283 #	No access error:						#
   3284 #	- The system stack is unchanged					#
   3285 #	- The fsave frame contains the adjusted src op for opclass 0,2	#
   3286 #									#
   3287 # ALGORITHM ***********************************************************	#
   3288 #	In a system where the FP SNAN exception is enabled, the goal	#
   3289 # is to get to the handler specified at _real_snan(). But, on the 060,	#
   3290 # for opclass zero and two instructions taking this exception, the 	#
   3291 # input operand in the fsave frame may be incorrect for some cases	#
   3292 # and needs to be corrected. This handler calls fix_skewed_ops() to	#
   3293 # do just this and then exits through _real_snan().			#
   3294 #	For opclass 3 instructions, the 060 doesn't store the default	#
   3295 # SNAN result out to memory or data register file as it should.		#
   3296 # This code must emulate the move out before finally exiting through	#
   3297 # _real_snan(). The move out, if to memory, is performed using 		#
   3298 # _mem_write() "callout" routines that may return a failing result.	#
   3299 # In this special case, the handler must exit through facc_out() 	#
   3300 # which creates an access error stack frame from the current SNAN	#
   3301 # stack frame.								#
   3302 #	For the case of an extended precision opclass 3 instruction,	#
   3303 # if the effective addressing mode was -() or ()+, then the address	#
   3304 # register must get updated by calling _calc_ea_fout(). If the <ea>	#
   3305 # was -(a7) from supervisor mode, then the exception frame currently	#
   3306 # on the system stack must be carefully moved "down" to make room	#
   3307 # for the operand being moved.						#
   3308 #									#
   3309 #########################################################################
   3310 
   3311 	global		_fpsp_snan
   3312 _fpsp_snan:
   3313 
   3314 	link.w		%a6,&-LOCAL_SIZE	# init stack frame
   3315 
   3316 	fsave		FP_SRC(%a6)		# grab the "busy" frame
   3317 
   3318  	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
   3319 	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
   3320  	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
   3321 
   3322 # the FPIAR holds the "current PC" of the faulting instruction
   3323 	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
   3324 
   3325 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   3326 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
   3327 	bsr.l		_imem_read_long		# fetch the instruction words
   3328 	mov.l		%d0,EXC_OPWORD(%a6)
   3329 
   3330 ##############################################################################
   3331 
   3332 	btst		&13,%d0			# is instr an fmove out?
   3333 	bne.w		fsnan_out		# fmove out
   3334 
   3335 
   3336 # here, we simply see if the operand in the fsave frame needs to be "unskewed".
   3337 # this would be the case for opclass two operations with a source infinity or
   3338 # denorm operand in the sgl or dbl format. NANs also become skewed and must be
   3339 # fixed here.
   3340 	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
   3341 	bsr.l		fix_skewed_ops		# fix src op
   3342 
   3343 fsnan_exit:
   3344 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
   3345 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   3346 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   3347 
   3348 	frestore	FP_SRC(%a6)
   3349 
   3350 	unlk		%a6
   3351 	bra.l		_real_snan
   3352 
   3353 ########################################################################
   3354 
   3355 #
   3356 # the hardware does not save the default result to memory on enabled
   3357 # snan exceptions. we do this here before passing control to
   3358 # the user snan handler.
   3359 #
   3360 # byte, word, long, and packed destination format operations can pass
   3361 # through here. since packed format operations already were handled by
   3362 # fpsp_unsupp(), then we need to do nothing else for them here.
   3363 # for byte, word, and long, we simply need to test the sign of the src
   3364 # operand and save the appropriate minimum or maximum integer value
   3365 # to the effective address as pointed to by the stacked effective address.
   3366 #
   3367 fsnan_out:
   3368 
   3369 	bfextu		%d0{&19:&3},%d0		# extract dst format field
   3370 	mov.b		1+EXC_OPWORD(%a6),%d1	# extract <ea> mode,reg
   3371 	mov.w		(tbl_snan.b,%pc,%d0.w*2),%a0
   3372 	jmp		(tbl_snan.b,%pc,%a0)
   3373 
   3374 tbl_snan:
   3375 	short		fsnan_out_l - tbl_snan # long word integer
   3376 	short		fsnan_out_s - tbl_snan # sgl prec shouldn't happen
   3377 	short		fsnan_out_x - tbl_snan # ext prec shouldn't happen
   3378 	short		tbl_snan    - tbl_snan # packed needs no help
   3379 	short		fsnan_out_w - tbl_snan # word integer
   3380 	short		fsnan_out_d - tbl_snan # dbl prec shouldn't happen
   3381 	short		fsnan_out_b - tbl_snan # byte integer
   3382 	short		tbl_snan    - tbl_snan # packed needs no help
   3383 
   3384 fsnan_out_b:
   3385 	mov.b		FP_SRC_HI(%a6),%d0	# load upper byte of SNAN
   3386 	bset		&6,%d0			# set SNAN bit
   3387 	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
   3388 	ble.b		fsnan_out_b_dn		# yes
   3389 	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
   3390 	bsr.l		_dmem_write_byte	# write the default result
   3391 
   3392 	tst.l		%d1			# did dstore fail?
   3393 	bne.l		facc_out_b		# yes
   3394 
   3395 	bra.w		fsnan_exit
   3396 fsnan_out_b_dn:
   3397 	andi.w		&0x0007,%d1
   3398 	bsr.l		store_dreg_b		# store result to regfile
   3399 	bra.w		fsnan_exit
   3400 
   3401 fsnan_out_w:
   3402 	mov.w		FP_SRC_HI(%a6),%d0	# load upper word of SNAN
   3403 	bset		&14,%d0			# set SNAN bit
   3404 	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
   3405 	ble.b		fsnan_out_w_dn		# yes
   3406 	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
   3407 	bsr.l		_dmem_write_word	# write the default result
   3408 
   3409 	tst.l		%d1			# did dstore fail?
   3410 	bne.l		facc_out_w		# yes
   3411 
   3412 	bra.w		fsnan_exit
   3413 fsnan_out_w_dn:
   3414 	andi.w		&0x0007,%d1
   3415 	bsr.l		store_dreg_w		# store result to regfile
   3416 	bra.w		fsnan_exit
   3417 
   3418 fsnan_out_l:
   3419 	mov.l		FP_SRC_HI(%a6),%d0	# load upper longword of SNAN
   3420 	bset		&30,%d0			# set SNAN bit
   3421 	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
   3422 	ble.b		fsnan_out_l_dn		# yes
   3423 	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
   3424 	bsr.l		_dmem_write_long	# write the default result
   3425 
   3426 	tst.l		%d1			# did dstore fail?
   3427 	bne.l		facc_out_l		# yes
   3428 
   3429 	bra.w		fsnan_exit
   3430 fsnan_out_l_dn:
   3431 	andi.w		&0x0007,%d1
   3432 	bsr.l		store_dreg_l		# store result to regfile
   3433 	bra.w		fsnan_exit
   3434 
   3435 fsnan_out_s:
   3436 	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
   3437 	ble.b		fsnan_out_d_dn		# yes
   3438 	mov.l		FP_SRC_EX(%a6),%d0	# fetch SNAN sign
   3439 	andi.l		&0x80000000,%d0		# keep sign
   3440 	ori.l		&0x7fc00000,%d0		# insert new exponent,SNAN bit
   3441 	mov.l		FP_SRC_HI(%a6),%d1	# load mantissa
   3442 	lsr.l		&0x8,%d1		# shift mantissa for sgl
   3443 	or.l		%d1,%d0			# create sgl SNAN
   3444 	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
   3445 	bsr.l		_dmem_write_long	# write the default result
   3446 
   3447 	tst.l		%d1			# did dstore fail?
   3448 	bne.l		facc_out_l		# yes
   3449 
   3450 	bra.w		fsnan_exit
   3451 fsnan_out_d_dn:
   3452 	mov.l		FP_SRC_EX(%a6),%d0	# fetch SNAN sign
   3453 	andi.l		&0x80000000,%d0		# keep sign
   3454 	ori.l		&0x7fc00000,%d0		# insert new exponent,SNAN bit
   3455 	mov.l		%d1,-(%sp)
   3456 	mov.l		FP_SRC_HI(%a6),%d1	# load mantissa
   3457 	lsr.l		&0x8,%d1		# shift mantissa for sgl
   3458 	or.l		%d1,%d0			# create sgl SNAN
   3459 	mov.l		(%sp)+,%d1
   3460 	andi.w		&0x0007,%d1
   3461 	bsr.l		store_dreg_l		# store result to regfile
   3462 	bra.w		fsnan_exit
   3463 
   3464 fsnan_out_d:
   3465 	mov.l		FP_SRC_EX(%a6),%d0	# fetch SNAN sign
   3466 	andi.l		&0x80000000,%d0		# keep sign
   3467 	ori.l		&0x7ff80000,%d0		# insert new exponent,SNAN bit
   3468 	mov.l		FP_SRC_HI(%a6),%d1	# load hi mantissa
   3469 	mov.l		%d0,FP_SCR0_EX(%a6)	# store to temp space
   3470 	mov.l		&11,%d0			# load shift amt
   3471 	lsr.l		%d0,%d1
   3472 	or.l		%d1,FP_SCR0_EX(%a6)	# create dbl hi
   3473 	mov.l		FP_SRC_HI(%a6),%d1	# load hi mantissa
   3474 	andi.l		&0x000007ff,%d1
   3475 	ror.l		%d0,%d1
   3476 	mov.l		%d1,FP_SCR0_HI(%a6)	# store to temp space
   3477 	mov.l		FP_SRC_LO(%a6),%d1	# load lo mantissa
   3478 	lsr.l		%d0,%d1
   3479 	or.l		%d1,FP_SCR0_HI(%a6)	# create dbl lo
   3480 	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
   3481 	mov.l		EXC_EA(%a6),%a1		# pass: dst addr
   3482 	movq.l		&0x8,%d0		# pass: size of 8 bytes
   3483 	bsr.l		_dmem_write		# write the default result
   3484 
   3485 	tst.l		%d1			# did dstore fail?
   3486 	bne.l		facc_out_d		# yes
   3487 
   3488 	bra.w		fsnan_exit
   3489 
   3490 # for extended precision, if the addressing mode is pre-decrement or
   3491 # post-increment, then the address register did not get updated.
   3492 # in addition, for pre-decrement, the stacked <ea> is incorrect.
   3493 fsnan_out_x:
   3494 	clr.b		SPCOND_FLG(%a6)		# clear special case flag
   3495 
   3496 	mov.w		FP_SRC_EX(%a6),FP_SCR0_EX(%a6)
   3497 	clr.w		2+FP_SCR0(%a6)
   3498 	mov.l		FP_SRC_HI(%a6),%d0
   3499 	bset		&30,%d0
   3500 	mov.l		%d0,FP_SCR0_HI(%a6)
   3501 	mov.l		FP_SRC_LO(%a6),FP_SCR0_LO(%a6)
   3502 
   3503 	btst		&0x5,EXC_SR(%a6)	# supervisor mode exception?
   3504 	bne.b		fsnan_out_x_s		# yes
   3505 
   3506 	mov.l		%usp,%a0		# fetch user stack pointer
   3507 	mov.l		%a0,EXC_A7(%a6)		# save on stack for calc_ea()
   3508 	mov.l		(%a6),EXC_A6(%a6)
   3509 
   3510 	bsr.l		_calc_ea_fout		# find the correct ea,update An
   3511 	mov.l		%a0,%a1
   3512 	mov.l		%a0,EXC_EA(%a6)		# stack correct <ea>
   3513 
   3514 	mov.l		EXC_A7(%a6),%a0
   3515 	mov.l		%a0,%usp		# restore user stack pointer
   3516 	mov.l		EXC_A6(%a6),(%a6)
   3517 
   3518 fsnan_out_x_save:
   3519 	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
   3520 	movq.l		&0xc,%d0		# pass: size of extended
   3521 	bsr.l		_dmem_write		# write the default result
   3522 
   3523 	tst.l		%d1			# did dstore fail?
   3524 	bne.l		facc_out_x		# yes
   3525 
   3526 	bra.w		fsnan_exit
   3527 
   3528 fsnan_out_x_s:
   3529 	mov.l		(%a6),EXC_A6(%a6)
   3530 
   3531 	bsr.l		_calc_ea_fout		# find the correct ea,update An
   3532 	mov.l		%a0,%a1
   3533 	mov.l		%a0,EXC_EA(%a6)		# stack correct <ea>
   3534 
   3535 	mov.l		EXC_A6(%a6),(%a6)
   3536 
   3537 	cmpi.b		SPCOND_FLG(%a6),&mda7_flg # is <ea> mode -(a7)?
   3538 	bne.b		fsnan_out_x_save	# no
   3539 
   3540 # the operation was "fmove.x SNAN,-(a7)" from supervisor mode.
   3541 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
   3542 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   3543 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   3544 
   3545 	frestore	FP_SRC(%a6)
   3546 
   3547 	mov.l		EXC_A6(%a6),%a6		# restore frame pointer
   3548 
   3549 	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
   3550 	mov.l		LOCAL_SIZE+EXC_PC+0x2(%sp),LOCAL_SIZE+EXC_PC+0x2-0xc(%sp)
   3551 	mov.l		LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
   3552 
   3553 	mov.l		LOCAL_SIZE+FP_SCR0_EX(%sp),LOCAL_SIZE+EXC_SR(%sp)
   3554 	mov.l		LOCAL_SIZE+FP_SCR0_HI(%sp),LOCAL_SIZE+EXC_PC+0x2(%sp)
   3555 	mov.l		LOCAL_SIZE+FP_SCR0_LO(%sp),LOCAL_SIZE+EXC_EA(%sp)
   3556 
   3557 	add.l		&LOCAL_SIZE-0x8,%sp
   3558 
   3559 	bra.l		_real_snan
   3560 
   3561 #########################################################################
   3562 # XDEF ****************************************************************	#
   3563 #	_fpsp_inex(): 060FPSP entry point for FP Inexact exception.	#
   3564 #									#
   3565 #	This handler should be the first code executed upon taking the	#
   3566 # 	FP Inexact exception in an operating system.			#
   3567 #									#
   3568 # XREF ****************************************************************	#
   3569 #	_imem_read_long() - read instruction longword			#
   3570 #	fix_skewed_ops() - adjust src operand in fsave frame		#
   3571 #	set_tag_x() - determine optype of src/dst operands		#
   3572 #	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
   3573 #	unnorm_fix() - change UNNORM operands to NORM or ZERO		#
   3574 #	load_fpn2() - load dst operand from FP regfile			#
   3575 #	smovcr() - emulate an "fmovcr" instruction			#
   3576 #	fout() - emulate an opclass 3 instruction			#
   3577 #	tbl_unsupp - add of table of emulation routines for opclass 0,2	#
   3578 #	_real_inex() - "callout" to operating system inexact handler	#
   3579 #									#
   3580 # INPUT ***************************************************************	#
   3581 #	- The system stack contains the FP Inexact exception frame	#
   3582 #	- The fsave frame contains the source operand			#
   3583 # 									#
   3584 # OUTPUT **************************************************************	#
   3585 #	- The system stack is unchanged					#
   3586 #	- The fsave frame contains the adjusted src op for opclass 0,2	#
   3587 #									#
   3588 # ALGORITHM ***********************************************************	#
   3589 #	In a system where the FP Inexact exception is enabled, the goal	#
   3590 # is to get to the handler specified at _real_inex(). But, on the 060,	#
   3591 # for opclass zero and two instruction taking this exception, the 	#
   3592 # hardware doesn't store the correct result to the destination FP	#
   3593 # register as did the '040 and '881/2. This handler must emulate the 	#
   3594 # instruction in order to get this value and then store it to the 	#
   3595 # correct register before calling _real_inex().				#
   3596 #	For opclass 3 instructions, the 060 doesn't store the default	#
   3597 # inexact result out to memory or data register file as it should.	#
   3598 # This code must emulate the move out by calling fout() before finally	#
   3599 # exiting through _real_inex().						#
   3600 #									#
   3601 #########################################################################
   3602 
   3603 	global		_fpsp_inex
   3604 _fpsp_inex:
   3605 
   3606 	link.w		%a6,&-LOCAL_SIZE	# init stack frame
   3607 
   3608 	fsave		FP_SRC(%a6)		# grab the "busy" frame
   3609 
   3610  	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
   3611 	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
   3612  	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
   3613 
   3614 # the FPIAR holds the "current PC" of the faulting instruction
   3615 	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
   3616 
   3617 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   3618 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
   3619 	bsr.l		_imem_read_long		# fetch the instruction words
   3620 	mov.l		%d0,EXC_OPWORD(%a6)
   3621 
   3622 ##############################################################################
   3623 
   3624 	btst		&13,%d0			# is instr an fmove out?
   3625 	bne.w		finex_out		# fmove out
   3626 
   3627 
   3628 # the hardware, for "fabs" and "fneg" w/ a long source format, puts the
   3629 # longword integer directly into the upper longword of the mantissa along
   3630 # w/ an exponent value of 0x401e. we convert this to extended precision here.
   3631 	bfextu		%d0{&19:&3},%d0		# fetch instr size
   3632 	bne.b		finex_cont		# instr size is not long
   3633 	cmpi.w		FP_SRC_EX(%a6),&0x401e	# is exponent 0x401e?
   3634 	bne.b		finex_cont		# no
   3635 	fmov.l		&0x0,%fpcr
   3636 	fmov.l		FP_SRC_HI(%a6),%fp0	# load integer src
   3637 	fmov.x		%fp0,FP_SRC(%a6)	# store integer as extended precision
   3638 	mov.w		&0xe001,0x2+FP_SRC(%a6)
   3639 
   3640 finex_cont:
   3641 	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
   3642 	bsr.l		fix_skewed_ops		# fix src op
   3643 
   3644 # Here, we zero the ccode and exception byte field since we're going to
   3645 # emulate the whole instruction. Notice, though, that we don't kill the
   3646 # INEX1 bit. This is because a packed op has long since been converted
   3647 # to extended before arriving here. Therefore, we need to retain the
   3648 # INEX1 bit from when the operand was first converted.
   3649 	andi.l		&0x00ff01ff,USER_FPSR(%a6) # zero all but accured field
   3650 
   3651 	fmov.l		&0x0,%fpcr		# zero current control regs
   3652 	fmov.l		&0x0,%fpsr
   3653 
   3654 	bfextu		EXC_EXTWORD(%a6){&0:&6},%d1 # extract upper 6 of cmdreg
   3655 	cmpi.b		%d1,&0x17		# is op an fmovecr?
   3656 	beq.w		finex_fmovcr		# yes
   3657 
   3658 	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
   3659 	bsr.l		set_tag_x		# tag the operand type
   3660 	mov.b		%d0,STAG(%a6)		# maybe NORM,DENORM
   3661 
   3662 # bits four and five of the fp extension word separate the monadic and dyadic
   3663 # operations that can pass through fpsp_inex(). remember that fcmp and ftst
   3664 # will never take this exception, but fsincos will.
   3665 	btst		&0x5,1+EXC_CMDREG(%a6)	# is operation monadic or dyadic?
   3666 	beq.b		finex_extract		# monadic
   3667 
   3668 	btst		&0x4,1+EXC_CMDREG(%a6)	# is operation an fsincos?
   3669 	bne.b		finex_extract		# yes
   3670 
   3671 	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
   3672 	bsr.l		load_fpn2		# load dst into FP_DST
   3673 
   3674 	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
   3675 	bsr.l		set_tag_x		# tag the operand type
   3676 	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
   3677 	bne.b		finex_op2_done		# no
   3678 	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
   3679 finex_op2_done:
   3680 	mov.b		%d0,DTAG(%a6)		# save dst optype tag
   3681 
   3682 finex_extract:
   3683 	clr.l		%d0
   3684 	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec/mode
   3685 
   3686 	mov.b		1+EXC_CMDREG(%a6),%d1
   3687 	andi.w		&0x007f,%d1		# extract extension
   3688 
   3689 	lea		FP_SRC(%a6),%a0
   3690 	lea		FP_DST(%a6),%a1
   3691 
   3692 	mov.l		(tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
   3693 	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
   3694 
   3695 # the operation has been emulated. the result is in fp0.
   3696 finex_save:
   3697 	bfextu		EXC_CMDREG(%a6){&6:&3},%d0
   3698 	bsr.l		store_fpreg
   3699 
   3700 finex_exit:
   3701 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
   3702 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   3703 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   3704 
   3705 	frestore	FP_SRC(%a6)
   3706 
   3707 	unlk		%a6
   3708 	bra.l		_real_inex
   3709 
   3710 finex_fmovcr:
   3711 	clr.l		%d0
   3712 	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec,mode
   3713 	mov.b		1+EXC_CMDREG(%a6),%d1
   3714 	andi.l		&0x0000007f,%d1		# pass rom offset
   3715 	bsr.l		smovcr
   3716 	bra.b		finex_save
   3717 
   3718 ########################################################################
   3719 
   3720 #
   3721 # the hardware does not save the default result to memory on enabled
   3722 # inexact exceptions. we do this here before passing control to
   3723 # the user inexact handler.
   3724 #
   3725 # byte, word, and long destination format operations can pass
   3726 # through here. so can double and single precision.
   3727 # although packed opclass three operations can take inexact
   3728 # exceptions, they won't pass through here since they are caught
   3729 # first by the unsupported data format exception handler. that handler
   3730 # sends them directly to _real_inex() if necessary.
   3731 #
   3732 finex_out:
   3733 
   3734 	mov.b		&NORM,STAG(%a6)		# src is a NORM
   3735 
   3736 	clr.l		%d0
   3737 	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec,mode
   3738 
   3739 	andi.l		&0xffff00ff,USER_FPSR(%a6) # zero exception field
   3740 
   3741 	lea		FP_SRC(%a6),%a0		# pass ptr to src operand
   3742 
   3743 	bsr.l		fout			# store the default result
   3744 
   3745 	bra.b		finex_exit
   3746 
   3747 #########################################################################
   3748 # XDEF ****************************************************************	#
   3749 #	_fpsp_dz(): 060FPSP entry point for FP DZ exception.		#
   3750 #									#
   3751 #	This handler should be the first code executed upon taking	#
   3752 #	the FP DZ exception in an operating system.			#
   3753 #									#
   3754 # XREF ****************************************************************	#
   3755 #	_imem_read_long() - read instruction longword from memory	#
   3756 #	fix_skewed_ops() - adjust fsave operand				#
   3757 #	_real_dz() - "callout" exit point from FP DZ handler		#
   3758 #									#
   3759 # INPUT ***************************************************************	#
   3760 #	- The system stack contains the FP DZ exception stack.		#
   3761 #	- The fsave frame contains the source operand.			#
   3762 # 									#
   3763 # OUTPUT **************************************************************	#
   3764 #	- The system stack contains the FP DZ exception stack.		#
   3765 #	- The fsave frame contains the adjusted source operand.		#
   3766 #									#
   3767 # ALGORITHM ***********************************************************	#
   3768 #	In a system where the DZ exception is enabled, the goal is to	#
   3769 # get to the handler specified at _real_dz(). But, on the 060, when the	#
   3770 # exception is taken, the input operand in the fsave state frame may	#
   3771 # be incorrect for some cases and need to be adjusted. So, this package	#
   3772 # adjusts the operand using fix_skewed_ops() and then branches to	#
   3773 # _real_dz(). 								#
   3774 #									#
   3775 #########################################################################
   3776 
   3777 	global		_fpsp_dz
   3778 _fpsp_dz:
   3779 
   3780 	link.w		%a6,&-LOCAL_SIZE	# init stack frame
   3781 
   3782 	fsave		FP_SRC(%a6)		# grab the "busy" frame
   3783 
   3784  	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
   3785 	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
   3786  	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
   3787 
   3788 # the FPIAR holds the "current PC" of the faulting instruction
   3789 	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
   3790 
   3791 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   3792 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
   3793 	bsr.l		_imem_read_long		# fetch the instruction words
   3794 	mov.l		%d0,EXC_OPWORD(%a6)
   3795 
   3796 ##############################################################################
   3797 
   3798 
   3799 # here, we simply see if the operand in the fsave frame needs to be "unskewed".
   3800 # this would be the case for opclass two operations with a source zero
   3801 # in the sgl or dbl format.
   3802 	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
   3803 	bsr.l		fix_skewed_ops		# fix src op
   3804 
   3805 fdz_exit:
   3806 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
   3807 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   3808 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   3809 
   3810 	frestore	FP_SRC(%a6)
   3811 
   3812 	unlk		%a6
   3813 	bra.l		_real_dz
   3814 
   3815 #########################################################################
   3816 # XDEF ****************************************************************	#
   3817 #	_fpsp_fline(): 060FPSP entry point for "Line F emulator" exc.	#
   3818 #									#
   3819 #	This handler should be the first code executed upon taking the	#
   3820 #	"Line F Emulator" exception in an operating system.		#
   3821 #									#
   3822 # XREF ****************************************************************	#
   3823 #	_fpsp_unimp() - handle "FP Unimplemented" exceptions		#
   3824 #	_real_fpu_disabled() - handle "FPU disabled" exceptions		#
   3825 #	_real_fline() - handle "FLINE" exceptions			#
   3826 #	_imem_read_long() - read instruction longword			#
   3827 #									#
   3828 # INPUT ***************************************************************	#
   3829 #	- The system stack contains a "Line F Emulator" exception	#
   3830 #	  stack frame.							#
   3831 # 									#
   3832 # OUTPUT **************************************************************	#
   3833 #	- The system stack is unchanged					#
   3834 #									#
   3835 # ALGORITHM ***********************************************************	#
   3836 #	When a "Line F Emulator" exception occurs, there are 3 possible	#
   3837 # exception types, denoted by the exception stack frame format number:	#
   3838 #	(1) FPU unimplemented instruction (6 word stack frame)		#
   3839 #	(2) FPU disabled (8 word stack frame)				#
   3840 #	(3) Line F (4 word stack frame)					#
   3841 #									#
   3842 #	This module determines which and forks the flow off to the 	#
   3843 # appropriate "callout" (for "disabled" and "Line F") or to the		#
   3844 # correct emulation code (for "FPU unimplemented").			#
   3845 #	This code also must check for "fmovecr" instructions w/ a	#
   3846 # non-zero <ea> field. These may get flagged as "Line F" but should	#
   3847 # really be flagged as "FPU Unimplemented". (This is a "feature" on	#
   3848 # the '060.								#
   3849 #									#
   3850 #########################################################################
   3851 
   3852 	global		_fpsp_fline
   3853 _fpsp_fline:
   3854 
   3855 # check to see if this exception is a "FP Unimplemented Instruction"
   3856 # exception. if so, branch directly to that handler's entry point.
   3857 	cmpi.w		0x6(%sp),&0x202c
   3858 	beq.l		_fpsp_unimp
   3859 
   3860 # check to see if the FPU is disabled. if so, jump to the OS entry
   3861 # point for that condition.
   3862 	cmpi.w		0x6(%sp),&0x402c
   3863 	beq.l		_real_fpu_disabled
   3864 
   3865 # the exception was an "F-Line Illegal" exception. we check to see
   3866 # if the F-Line instruction is an "fmovecr" w/ a non-zero <ea>. if
   3867 # so, convert the F-Line exception stack frame to an FP Unimplemented
   3868 # Instruction exception stack frame else branch to the OS entry
   3869 # point for the F-Line exception handler.
   3870 	link.w		%a6,&-LOCAL_SIZE	# init stack frame
   3871 
   3872 	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
   3873 
   3874 	mov.l		EXC_PC(%a6),EXC_EXTWPTR(%a6)
   3875 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   3876 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
   3877 	bsr.l		_imem_read_long		# fetch instruction words
   3878 
   3879 	bfextu		%d0{&0:&10},%d1		# is it an fmovecr?
   3880 	cmpi.w		%d1,&0x03c8
   3881 	bne.b		fline_fline		# no
   3882 
   3883 	bfextu		%d0{&16:&6},%d1		# is it an fmovecr?
   3884 	cmpi.b		%d1,&0x17
   3885 	bne.b		fline_fline		# no
   3886 
   3887 # it's an fmovecr w/ a non-zero <ea> that has entered through
   3888 # the F-Line Illegal exception.
   3889 # so, we need to convert the F-Line exception stack frame into an
   3890 # FP Unimplemented Instruction stack frame and jump to that entry
   3891 # point.
   3892 #
   3893 # but, if the FPU is disabled, then we need to jump to the FPU diabled
   3894 # entry point.
   3895 	movc		%pcr,%d0
   3896 	btst		&0x1,%d0
   3897 	beq.b		fline_fmovcr
   3898 
   3899 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   3900 
   3901 	unlk		%a6
   3902 
   3903 	sub.l		&0x8,%sp		# make room for "Next PC", <ea>
   3904 	mov.w		0x8(%sp),(%sp)
   3905 	mov.l		0xa(%sp),0x2(%sp)	# move "Current PC"
   3906 	mov.w		&0x402c,0x6(%sp)
   3907 	mov.l		0x2(%sp),0xc(%sp)
   3908 	addq.l		&0x4,0x2(%sp)		# set "Next PC"
   3909 
   3910 	bra.l		_real_fpu_disabled
   3911 
   3912 fline_fmovcr:
   3913 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   3914 
   3915 	unlk		%a6
   3916 
   3917 	fmov.l		0x2(%sp),%fpiar		# set current PC
   3918 	addq.l		&0x4,0x2(%sp)		# set Next PC
   3919 
   3920 	mov.l		(%sp),-(%sp)
   3921 	mov.l		0x8(%sp),0x4(%sp)
   3922 	mov.b		&0x20,0x6(%sp)
   3923 
   3924 	bra.l		_fpsp_unimp
   3925 
   3926 fline_fline:
   3927 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   3928 
   3929 	unlk		%a6
   3930 
   3931 	bra.l		_real_fline
   3932 
   3933 #########################################################################
   3934 # XDEF ****************************************************************	#
   3935 #	_fpsp_unimp(): 060FPSP entry point for FP "Unimplemented	#
   3936 #		       Instruction" exception.				#
   3937 #									#
   3938 #	This handler should be the first code executed upon taking the	#
   3939 #	FP Unimplemented Instruction exception in an operating system.	#
   3940 #									#
   3941 # XREF ****************************************************************	#
   3942 #	_imem_read_{word,long}() - read instruction word/longword	#
   3943 #	load_fop() - load src/dst ops from memory and/or FP regfile	#
   3944 #	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
   3945 #	tbl_trans - addr of table of emulation routines for trnscndls	#
   3946 #	_real_access() - "callout" for access error exception		#
   3947 #	_fpsp_done() - "callout" for exit; work all done		#
   3948 #	_real_trace() - "callout" for Trace enabled exception		#
   3949 #	smovcr() - emulate "fmovecr" instruction			#
   3950 #	funimp_skew() - adjust fsave src ops to "incorrect" value	#
   3951 #	_ftrapcc() - emulate an "ftrapcc" instruction			#
   3952 #	_fdbcc() - emulate an "fdbcc" instruction			#
   3953 #	_fscc() - emulate an "fscc" instruction				#
   3954 #	_real_trap() - "callout" for Trap exception			#
   3955 # 	_real_bsun() - "callout" for enabled Bsun exception		#
   3956 #									#
   3957 # INPUT ***************************************************************	#
   3958 #	- The system stack contains the "Unimplemented Instr" stk frame	#
   3959 # 									#
   3960 # OUTPUT **************************************************************	#
   3961 #	If access error:						#
   3962 #	- The system stack is changed to an access error stack frame	#
   3963 #	If Trace exception enabled:					#
   3964 #	- The system stack is changed to a Trace exception stack frame	#
   3965 #	Else: (normal case)						#
   3966 #	- Correct result has been stored as appropriate			#
   3967 #									#
   3968 # ALGORITHM ***********************************************************	#
   3969 #	There are two main cases of instructions that may enter here to	#
   3970 # be emulated: (1) the FPgen instructions, most of which were also	#
   3971 # unimplemented on the 040, and (2) "ftrapcc", "fscc", and "fdbcc".	#
   3972 #	For the first set, this handler calls the routine load_fop()	#
   3973 # to load the source and destination (for dyadic) operands to be used	#
   3974 # for instruction emulation. The correct emulation routine is then 	#
   3975 # chosen by decoding the instruction type and indexing into an 		#
   3976 # emulation subroutine index table. After emulation returns, this 	#
   3977 # handler checks to see if an exception should occur as a result of the #
   3978 # FP instruction emulation. If so, then an FP exception of the correct	#
   3979 # type is inserted into the FPU state frame using the "frestore"	#
   3980 # instruction before exiting through _fpsp_done(). In either the 	#
   3981 # exceptional or non-exceptional cases, we must check to see if the	#
   3982 # Trace exception is enabled. If so, then we must create a Trace	#
   3983 # exception frame from the current exception frame and exit through	#
   3984 # _real_trace().							#
   3985 # 	For "fdbcc", "ftrapcc", and "fscc", the emulation subroutines	#
   3986 # _fdbcc(), _ftrapcc(), and _fscc() respectively are used. All three	#
   3987 # may flag that a BSUN exception should be taken. If so, then the 	#
   3988 # current exception stack frame is converted into a BSUN exception 	#
   3989 # stack frame and an exit is made through _real_bsun(). If the		#
   3990 # instruction was "ftrapcc" and a Trap exception should result, a Trap	#
   3991 # exception stack frame is created from the current frame and an exit	#
   3992 # is made through _real_trap(). If a Trace exception is pending, then	#
   3993 # a Trace exception frame is created from the current frame and a jump	#
   3994 # is made to _real_trace(). Finally, if none of these conditions exist,	#
   3995 # then the handler exits though the callout _fpsp_done().		#
   3996 #									#
   3997 # 	In any of the above scenarios, if a _mem_read() or _mem_write()	#
   3998 # "callout" returns a failing value, then an access error stack frame	#
   3999 # is created from the current stack frame and an exit is made through	#
   4000 # _real_access().							#
   4001 #									#
   4002 #########################################################################
   4003 
   4004 #
   4005 # FP UNIMPLEMENTED INSTRUCTION STACK FRAME:
   4006 #
   4007 #	*****************
   4008 #	*		* => <ea> of fp unimp instr.
   4009 #	-      EA	-
   4010 #	*		*
   4011 #	*****************
   4012 #	* 0x2 *  0x02c	* => frame format and vector offset(vector #11)
   4013 #	*****************
   4014 #	*		*
   4015 #	-    Next PC	- => PC of instr to execute after exc handling
   4016 #	*		*
   4017 #	*****************
   4018 #	*      SR	* => SR at the time the exception was taken
   4019 #	*****************
   4020 #
   4021 # Note: the !NULL bit does not get set in the fsave frame when the
   4022 # machine encounters an fp unimp exception. Therefore, it must be set
   4023 # before leaving this handler.
   4024 #
   4025 	global		_fpsp_unimp
   4026 _fpsp_unimp:
   4027 
   4028 	link.w		%a6,&-LOCAL_SIZE	# init stack frame
   4029 
   4030 	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
   4031 	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
   4032 	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1
   4033 
   4034 	btst		&0x5,EXC_SR(%a6)	# user mode exception?
   4035 	bne.b		funimp_s		# no; supervisor mode
   4036 
   4037 # save the value of the user stack pointer onto the stack frame
   4038 funimp_u:
   4039 	mov.l		%usp,%a0		# fetch user stack pointer
   4040 	mov.l		%a0,EXC_A7(%a6)		# store in stack frame
   4041 	bra.b		funimp_cont
   4042 
   4043 # store the value of the supervisor stack pointer BEFORE the exc occurred.
   4044 # old_sp is address just above stacked effective address.
   4045 funimp_s:
   4046 	lea		4+EXC_EA(%a6),%a0	# load old a7'
   4047 	mov.l		%a0,EXC_A7(%a6)		# store a7'
   4048 	mov.l		%a0,OLD_A7(%a6)		# make a copy
   4049 
   4050 funimp_cont:
   4051 
   4052 # the FPIAR holds the "current PC" of the faulting instruction.
   4053 	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
   4054 
   4055 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   4056 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
   4057 	bsr.l		_imem_read_long		# fetch the instruction words
   4058 	mov.l		%d0,EXC_OPWORD(%a6)
   4059 
   4060 ############################################################################
   4061 
   4062 	fmov.l		&0x0,%fpcr		# clear FPCR
   4063 	fmov.l		&0x0,%fpsr		# clear FPSR
   4064 
   4065 	clr.b		SPCOND_FLG(%a6)		# clear "special case" flag
   4066 
   4067 # Divide the fp instructions into 8 types based on the TYPE field in
   4068 # bits 6-8 of the opword(classes 6,7 are undefined).
   4069 # (for the '060, only two types  can take this exception)
   4070 #	bftst		%d0{&7:&3}		# test TYPE
   4071 	btst		&22,%d0			# type 0 or 1 ?
   4072 	bne.w		funimp_misc		# type 1
   4073 
   4074 #########################################
   4075 # TYPE == 0: General instructions	#
   4076 #########################################
   4077 funimp_gen:
   4078 
   4079 	clr.b		STORE_FLG(%a6)		# clear "store result" flag
   4080 
   4081 # clear the ccode byte and exception status byte
   4082 	andi.l		&0x00ff00ff,USER_FPSR(%a6)
   4083 
   4084 	bfextu		%d0{&16:&6},%d1		# extract upper 6 of cmdreg
   4085 	cmpi.b		%d1,&0x17		# is op an fmovecr?
   4086 	beq.w		funimp_fmovcr		# yes
   4087 
   4088 funimp_gen_op:
   4089 	bsr.l		_load_fop		# load
   4090 
   4091 	clr.l		%d0
   4092 	mov.b		FPCR_MODE(%a6),%d0	# fetch rnd mode
   4093 
   4094 	mov.b		1+EXC_CMDREG(%a6),%d1
   4095 	andi.w		&0x003f,%d1		# extract extension bits
   4096 	lsl.w		&0x3,%d1		# shift right 3 bits
   4097 	or.b		STAG(%a6),%d1		# insert src optag bits
   4098 
   4099 	lea		FP_DST(%a6),%a1		# pass dst ptr in a1
   4100 	lea		FP_SRC(%a6),%a0		# pass src ptr in a0
   4101 
   4102 	mov.w		(tbl_trans.w,%pc,%d1.w*2),%d1
   4103 	jsr		(tbl_trans.w,%pc,%d1.w*1) # emulate
   4104 
   4105 funimp_fsave:
   4106 	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions enabled
   4107 	bne.w		funimp_ena		# some are enabled
   4108 
   4109 funimp_store:
   4110 	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # fetch Dn
   4111 	bsr.l		store_fpreg		# store result to fp regfile
   4112 
   4113 funimp_gen_exit:
   4114 	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
   4115 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   4116  	movm.l		EXC_DREGS(%a6),&0x0303 	# restore d0-d1/a0-a1
   4117 
   4118 funimp_gen_exit_cmp:
   4119 	cmpi.b		SPCOND_FLG(%a6),&mia7_flg # was the ea mode (sp)+ ?
   4120 	beq.b		funimp_gen_exit_a7	# yes
   4121 
   4122 	cmpi.b		SPCOND_FLG(%a6),&mda7_flg # was the ea mode -(sp) ?
   4123 	beq.b		funimp_gen_exit_a7	# yes
   4124 
   4125 funimp_gen_exit_cont:
   4126 	unlk		%a6
   4127 
   4128 funimp_gen_exit_cont2:
   4129 	btst		&0x7,(%sp)		# is trace on?
   4130 	beq.l		_fpsp_done		# no
   4131 
   4132 # this catches a problem with the case where an exception will be re-inserted
   4133 # into the machine. the frestore has already been executed...so, the fmov.l
   4134 # alone of the control register would trigger an unwanted exception.
   4135 # until I feel like fixing this, we'll sidestep the exception.
   4136 	fsave		-(%sp)
   4137 	fmov.l		%fpiar,0x14(%sp)	# "Current PC" is in FPIAR
   4138 	frestore	(%sp)+
   4139 	mov.w		&0x2024,0x6(%sp)	# stk fmt = 0x2; voff = 0x24
   4140 	bra.l		_real_trace
   4141 
   4142 funimp_gen_exit_a7:
   4143 	btst		&0x5,EXC_SR(%a6)	# supervisor or user mode?
   4144 	bne.b		funimp_gen_exit_a7_s	# supervisor
   4145 
   4146 	mov.l		%a0,-(%sp)
   4147 	mov.l		EXC_A7(%a6),%a0
   4148 	mov.l		%a0,%usp
   4149 	mov.l		(%sp)+,%a0
   4150 	bra.b		funimp_gen_exit_cont
   4151 
   4152 # if the instruction was executed from supervisor mode and the addressing
   4153 # mode was (a7)+, then the stack frame for the rte must be shifted "up"
   4154 # "n" bytes where "n" is the size of the src operand type.
   4155 # f<op>.{b,w,l,s,d,x,p}
   4156 funimp_gen_exit_a7_s:
   4157 	mov.l		%d0,-(%sp)		# save d0
   4158 	mov.l		EXC_A7(%a6),%d0		# load new a7'
   4159 	sub.l		OLD_A7(%a6),%d0		# subtract old a7'
   4160 	mov.l		0x2+EXC_PC(%a6),(0x2+EXC_PC,%a6,%d0) # shift stack frame
   4161 	mov.l		EXC_SR(%a6),(EXC_SR,%a6,%d0) # shift stack frame
   4162 	mov.w		%d0,EXC_SR(%a6)		# store incr number
   4163 	mov.l		(%sp)+,%d0		# restore d0
   4164 
   4165 	unlk		%a6
   4166 
   4167 	add.w		(%sp),%sp		# stack frame shifted
   4168 	bra.b		funimp_gen_exit_cont2
   4169 
   4170 ######################
   4171 # fmovecr.x #ccc,fpn #
   4172 ######################
   4173 funimp_fmovcr:
   4174 	clr.l		%d0
   4175 	mov.b		FPCR_MODE(%a6),%d0
   4176 	mov.b		1+EXC_CMDREG(%a6),%d1
   4177 	andi.l		&0x0000007f,%d1		# pass rom offset in d1
   4178 	bsr.l		smovcr
   4179 	bra.w		funimp_fsave
   4180 
   4181 #########################################################################
   4182 
   4183 #
   4184 # the user has enabled some exceptions. we figure not to see this too
   4185 # often so that's why it gets lower priority.
   4186 #
   4187 funimp_ena:
   4188 
   4189 # was an exception set that was also enabled?
   4190 	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enabled and set
   4191 	bfffo		%d0{&24:&8},%d0		# find highest priority exception
   4192 	bne.b		funimp_exc		# at least one was set
   4193 
   4194 # no exception that was enabled was set BUT if we got an exact overflow
   4195 # and overflow wasn't enabled but inexact was (yech!) then this is
   4196 # an inexact exception; otherwise, return to normal non-exception flow.
   4197 	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
   4198 	beq.w		funimp_store		# no; return to normal flow
   4199 
   4200 # the overflow w/ exact result happened but was inexact set in the FPCR?
   4201 funimp_ovfl:
   4202 	btst		&inex2_bit,FPCR_ENABLE(%a6) # is inexact enabled?
   4203 	beq.w		funimp_store		# no; return to normal flow
   4204 	bra.b		funimp_exc_ovfl		# yes
   4205 
   4206 # some exception happened that was actually enabled.
   4207 # we'll insert this new exception into the FPU and then return.
   4208 funimp_exc:
   4209 	subi.l		&24,%d0			# fix offset to be 0-8
   4210 	cmpi.b		%d0,&0x6		# is exception INEX?
   4211 	bne.b		funimp_exc_force	# no
   4212 
   4213 # the enabled exception was inexact. so, if it occurs with an overflow
   4214 # or underflow that was disabled, then we have to force an overflow or
   4215 # underflow frame. the eventual overflow or underflow handler will see that
   4216 # it's actually an inexact and act appropriately. this is the only easy
   4217 # way to have the EXOP available for the enabled inexact handler when
   4218 # a disabled overflow or underflow has also happened.
   4219 	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
   4220 	bne.b		funimp_exc_ovfl		# yes
   4221 	btst		&unfl_bit,FPSR_EXCEPT(%a6) # did underflow occur?
   4222 	bne.b		funimp_exc_unfl		# yes
   4223 
   4224 # force the fsave exception status bits to signal an exception of the
   4225 # appropriate type. don't forget to "skew" the source operand in case we
   4226 # "unskewed" the one the hardware initially gave us.
   4227 funimp_exc_force:
   4228 	mov.l		%d0,-(%sp)		# save d0
   4229 	bsr.l		funimp_skew		# check for special case
   4230 	mov.l		(%sp)+,%d0		# restore d0
   4231 	mov.w		(tbl_funimp_except.b,%pc,%d0.w*2),2+FP_SRC(%a6)
   4232 	bra.b		funimp_gen_exit2	# exit with frestore
   4233 
   4234 tbl_funimp_except:
   4235 	short		0xe002, 0xe006, 0xe004, 0xe005
   4236 	short		0xe003, 0xe002, 0xe001, 0xe001
   4237 
   4238 # insert an overflow frame
   4239 funimp_exc_ovfl:
   4240 	bsr.l		funimp_skew		# check for special case
   4241 	mov.w		&0xe005,2+FP_SRC(%a6)
   4242 	bra.b		funimp_gen_exit2
   4243 
   4244 # insert an underflow frame
   4245 funimp_exc_unfl:
   4246 	bsr.l		funimp_skew		# check for special case
   4247 	mov.w		&0xe003,2+FP_SRC(%a6)
   4248 
   4249 # this is the general exit point for an enabled exception that will be
   4250 # restored into the machine for the instruction just emulated.
   4251 funimp_gen_exit2:
   4252 	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
   4253 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   4254  	movm.l		EXC_DREGS(%a6),&0x0303 	# restore d0-d1/a0-a1
   4255 
   4256 	frestore	FP_SRC(%a6)		# insert exceptional status
   4257 
   4258 	bra.w		funimp_gen_exit_cmp
   4259 
   4260 ############################################################################
   4261 
   4262 #
   4263 # TYPE == 1: FDB<cc>, FS<cc>, FTRAP<cc>
   4264 #
   4265 # These instructions were implemented on the '881/2 and '040 in hardware but
   4266 # are emulated in software on the '060.
   4267 #
   4268 funimp_misc:
   4269 	bfextu		%d0{&10:&3},%d1		# extract mode field
   4270 	cmpi.b		%d1,&0x1		# is it an fdb<cc>?
   4271 	beq.w		funimp_fdbcc		# yes
   4272 	cmpi.b		%d1,&0x7		# is it an fs<cc>?
   4273 	bne.w		funimp_fscc		# yes
   4274 	bfextu		%d0{&13:&3},%d1
   4275 	cmpi.b		%d1,&0x2		# is it an fs<cc>?
   4276 	blt.w		funimp_fscc		# yes
   4277 
   4278 #########################
   4279 # ftrap<cc>		#
   4280 # ftrap<cc>.w #<data>	#
   4281 # ftrap<cc>.l #<data>	#
   4282 #########################
   4283 funimp_ftrapcc:
   4284 
   4285 	bsr.l		_ftrapcc		# FTRAP<cc>()
   4286 
   4287 	cmpi.b		SPCOND_FLG(%a6),&fbsun_flg # is enabled bsun occurring?
   4288 	beq.w		funimp_bsun		# yes
   4289 
   4290 	cmpi.b		SPCOND_FLG(%a6),&ftrapcc_flg # should a trap occur?
   4291 	bne.w		funimp_done		# no
   4292 
   4293 #	 FP UNIMP FRAME		   TRAP  FRAME
   4294 #	*****************	*****************
   4295 #	**    <EA>     **	**  Current PC **
   4296 #	*****************	*****************
   4297 #	* 0x2 *  0x02c	*	* 0x2 *  0x01c  *
   4298 #	*****************	*****************
   4299 #	**   Next PC   **	**   Next PC   **
   4300 #	*****************	*****************
   4301 #	*      SR	*	*      SR	*
   4302 #	*****************	*****************
   4303 #	    (6 words)		    (6 words)
   4304 #
   4305 # the ftrapcc instruction should take a trap. so, here we must create a
   4306 # trap stack frame from an unimplemented fp instruction stack frame and
   4307 # jump to the user supplied entry point for the trap exception
   4308 funimp_ftrapcc_tp:
   4309 	mov.l		USER_FPIAR(%a6),EXC_EA(%a6) # Address = Current PC
   4310 	mov.w		&0x201c,EXC_VOFF(%a6)	# Vector Offset = 0x01c
   4311 
   4312 	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
   4313 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   4314  	movm.l		EXC_DREGS(%a6),&0x0303 	# restore d0-d1/a0-a1
   4315 
   4316 	unlk		%a6
   4317 	bra.l		_real_trap
   4318 
   4319 #########################
   4320 # fdb<cc> Dn,<label>	#
   4321 #########################
   4322 funimp_fdbcc:
   4323 
   4324 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   4325 	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
   4326 	bsr.l		_imem_read_word		# read displacement
   4327 
   4328 	tst.l		%d1			# did ifetch fail?
   4329 	bne.w		funimp_iacc		# yes
   4330 
   4331 	ext.l		%d0			# sign extend displacement
   4332 
   4333 	bsr.l		_fdbcc			# FDB<cc>()
   4334 
   4335 	cmpi.b		SPCOND_FLG(%a6),&fbsun_flg # is enabled bsun occurring?
   4336 	beq.w		funimp_bsun
   4337 
   4338 	bra.w		funimp_done		# branch to finish
   4339 
   4340 #################
   4341 # fs<cc>.b <ea>	#
   4342 #################
   4343 funimp_fscc:
   4344 
   4345 	bsr.l		_fscc			# FS<cc>()
   4346 
   4347 # I am assuming here that an "fs<cc>.b -(An)" or "fs<cc>.b (An)+" instruction
   4348 # does not need to update "An" before taking a bsun exception.
   4349 	cmpi.b		SPCOND_FLG(%a6),&fbsun_flg # is enabled bsun occurring?
   4350 	beq.w		funimp_bsun
   4351 
   4352 	btst		&0x5,EXC_SR(%a6)	# yes; is it a user mode exception?
   4353 	bne.b		funimp_fscc_s		# no
   4354 
   4355 funimp_fscc_u:
   4356 	mov.l		EXC_A7(%a6),%a0		# yes; set new USP
   4357 	mov.l		%a0,%usp
   4358 	bra.w		funimp_done		# branch to finish
   4359 
   4360 # remember, I'm assuming that post-increment is bogus...(it IS!!!)
   4361 # so, the least significant WORD of the stacked effective address got
   4362 # overwritten by the "fs<cc> -(An)". We must shift the stack frame "down"
   4363 # so that the rte will work correctly without destroying the result.
   4364 # even though the operation size is byte, the stack ptr is decr by 2.
   4365 #
   4366 # remember, also, this instruction may be traced.
   4367 funimp_fscc_s:
   4368 	cmpi.b		SPCOND_FLG(%a6),&mda7_flg # was a7 modified?
   4369 	bne.w		funimp_done		# no
   4370 
   4371 	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
   4372 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   4373  	movm.l		EXC_DREGS(%a6),&0x0303 	# restore d0-d1/a0-a1
   4374 
   4375 	unlk		%a6
   4376 
   4377 	btst		&0x7,(%sp)		# is trace enabled?
   4378 	bne.b		funimp_fscc_s_trace	# yes
   4379 
   4380 	subq.l		&0x2,%sp
   4381 	mov.l		0x2(%sp),(%sp)		# shift SR,hi(PC) "down"
   4382 	mov.l		0x6(%sp),0x4(%sp)	# shift lo(PC),voff "down"
   4383 	bra.l		_fpsp_done
   4384 
   4385 funimp_fscc_s_trace:
   4386 	subq.l		&0x2,%sp
   4387 	mov.l		0x2(%sp),(%sp)		# shift SR,hi(PC) "down"
   4388 	mov.w		0x6(%sp),0x4(%sp)	# shift lo(PC)
   4389 	mov.w		&0x2024,0x6(%sp)	# fmt/voff = $2024
   4390 	fmov.l		%fpiar,0x8(%sp)		# insert "current PC"
   4391 
   4392 	bra.l		_real_trace
   4393 
   4394 #
   4395 # The ftrap<cc>, fs<cc>, or fdb<cc> is to take an enabled bsun. we must convert
   4396 # the fp unimplemented instruction exception stack frame into a bsun stack frame,
   4397 # restore a bsun exception into the machine, and branch to the user
   4398 # supplied bsun hook.
   4399 #
   4400 #	 FP UNIMP FRAME		   BSUN FRAME
   4401 #	*****************	*****************
   4402 #	**    <EA>     **	* 0x0 * 0x0c0	*
   4403 #	*****************	*****************
   4404 #	* 0x2 *  0x02c  *	** Current PC  **
   4405 #	*****************	*****************
   4406 #	**   Next PC   **	*      SR	*
   4407 #	*****************	*****************
   4408 #	*      SR	*	    (4 words)
   4409 #	*****************
   4410 #	    (6 words)
   4411 #
   4412 funimp_bsun:
   4413 	mov.w		&0x00c0,2+EXC_EA(%a6)	# Fmt = 0x0; Vector Offset = 0x0c0
   4414 	mov.l		USER_FPIAR(%a6),EXC_VOFF(%a6) # PC = Current PC
   4415 	mov.w		EXC_SR(%a6),2+EXC_PC(%a6) # shift SR "up"
   4416 
   4417 	mov.w		&0xe000,2+FP_SRC(%a6)	# bsun exception enabled
   4418 
   4419 	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
   4420 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   4421  	movm.l		EXC_DREGS(%a6),&0x0303 	# restore d0-d1/a0-a1
   4422 
   4423 	frestore	FP_SRC(%a6)		# restore bsun exception
   4424 
   4425 	unlk		%a6
   4426 
   4427 	addq.l		&0x4,%sp		# erase sludge
   4428 
   4429 	bra.l		_real_bsun		# branch to user bsun hook
   4430 
   4431 #
   4432 # all ftrapcc/fscc/fdbcc processing has been completed. unwind the stack frame
   4433 # and return.
   4434 #
   4435 # as usual, we have to check for trace mode being on here. since instructions
   4436 # modifying the supervisor stack frame don't pass through here, this is a
   4437 # relatively easy task.
   4438 #
   4439 funimp_done:
   4440 	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
   4441 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   4442  	movm.l		EXC_DREGS(%a6),&0x0303 	# restore d0-d1/a0-a1
   4443 
   4444 	unlk		%a6
   4445 
   4446 	btst		&0x7,(%sp)		# is trace enabled?
   4447 	bne.b		funimp_trace		# yes
   4448 
   4449 	bra.l		_fpsp_done
   4450 
   4451 #	 FP UNIMP FRAME		  TRACE  FRAME
   4452 #	*****************	*****************
   4453 #	**    <EA>     **	**  Current PC **
   4454 #	*****************	*****************
   4455 #	* 0x2 *  0x02c	*	* 0x2 *  0x024  *
   4456 #	*****************	*****************
   4457 #	**   Next PC   **	**   Next PC   **
   4458 #	*****************	*****************
   4459 #	*      SR	*	*      SR	*
   4460 #	*****************	*****************
   4461 #	    (6 words)		    (6 words)
   4462 #
   4463 # the fscc instruction should take a trace trap. so, here we must create a
   4464 # trace stack frame from an unimplemented fp instruction stack frame and
   4465 # jump to the user supplied entry point for the trace exception
   4466 funimp_trace:
   4467 	fmov.l		%fpiar,0x8(%sp)		# current PC is in fpiar
   4468 	mov.b		&0x24,0x7(%sp)		# vector offset = 0x024
   4469 
   4470 	bra.l		_real_trace
   4471 
   4472 ################################################################
   4473 
   4474 	global		tbl_trans
   4475 	swbeg		&0x1c0
   4476 tbl_trans:
   4477 	short 		tbl_trans - tbl_trans	# $00-0 fmovecr all
   4478 	short 		tbl_trans - tbl_trans	# $00-1 fmovecr all
   4479 	short 		tbl_trans - tbl_trans	# $00-2 fmovecr all
   4480 	short 		tbl_trans - tbl_trans	# $00-3 fmovecr all
   4481 	short 		tbl_trans - tbl_trans	# $00-4 fmovecr all
   4482 	short 		tbl_trans - tbl_trans	# $00-5 fmovecr all
   4483 	short 		tbl_trans - tbl_trans	# $00-6 fmovecr all
   4484 	short 		tbl_trans - tbl_trans	# $00-7 fmovecr all
   4485 
   4486 	short 		tbl_trans - tbl_trans	# $01-0 fint norm
   4487 	short		tbl_trans - tbl_trans	# $01-1 fint zero
   4488 	short		tbl_trans - tbl_trans	# $01-2 fint inf
   4489 	short		tbl_trans - tbl_trans	# $01-3 fint qnan
   4490 	short		tbl_trans - tbl_trans	# $01-5 fint denorm
   4491 	short		tbl_trans - tbl_trans	# $01-4 fint snan
   4492 	short		tbl_trans - tbl_trans	# $01-6 fint unnorm
   4493 	short		tbl_trans - tbl_trans	# $01-7 ERROR
   4494 
   4495 	short		ssinh	 - tbl_trans	# $02-0 fsinh norm
   4496 	short		src_zero - tbl_trans	# $02-1 fsinh zero
   4497 	short		src_inf	 - tbl_trans	# $02-2 fsinh inf
   4498 	short		src_qnan - tbl_trans	# $02-3 fsinh qnan
   4499 	short		ssinhd	 - tbl_trans	# $02-5 fsinh denorm
   4500 	short		src_snan - tbl_trans	# $02-4 fsinh snan
   4501 	short		tbl_trans - tbl_trans	# $02-6 fsinh unnorm
   4502 	short		tbl_trans - tbl_trans	# $02-7 ERROR
   4503 
   4504 	short		tbl_trans - tbl_trans	# $03-0 fintrz norm
   4505 	short		tbl_trans - tbl_trans	# $03-1 fintrz zero
   4506 	short		tbl_trans - tbl_trans	# $03-2 fintrz inf
   4507 	short		tbl_trans - tbl_trans	# $03-3 fintrz qnan
   4508 	short		tbl_trans - tbl_trans	# $03-5 fintrz denorm
   4509 	short		tbl_trans - tbl_trans	# $03-4 fintrz snan
   4510 	short		tbl_trans - tbl_trans	# $03-6 fintrz unnorm
   4511 	short		tbl_trans - tbl_trans	# $03-7 ERROR
   4512 
   4513 	short		tbl_trans - tbl_trans	# $04-0 fsqrt norm
   4514 	short		tbl_trans - tbl_trans	# $04-1 fsqrt zero
   4515 	short		tbl_trans - tbl_trans	# $04-2 fsqrt inf
   4516 	short		tbl_trans - tbl_trans	# $04-3 fsqrt qnan
   4517 	short		tbl_trans - tbl_trans	# $04-5 fsqrt denorm
   4518 	short		tbl_trans - tbl_trans	# $04-4 fsqrt snan
   4519 	short		tbl_trans - tbl_trans	# $04-6 fsqrt unnorm
   4520 	short		tbl_trans - tbl_trans	# $04-7 ERROR
   4521 
   4522 	short		tbl_trans - tbl_trans	# $05-0 ERROR
   4523 	short		tbl_trans - tbl_trans	# $05-1 ERROR
   4524 	short		tbl_trans - tbl_trans	# $05-2 ERROR
   4525 	short		tbl_trans - tbl_trans	# $05-3 ERROR
   4526 	short		tbl_trans - tbl_trans	# $05-4 ERROR
   4527 	short		tbl_trans - tbl_trans	# $05-5 ERROR
   4528 	short		tbl_trans - tbl_trans	# $05-6 ERROR
   4529 	short		tbl_trans - tbl_trans	# $05-7 ERROR
   4530 
   4531 	short		slognp1	 - tbl_trans	# $06-0 flognp1 norm
   4532 	short		src_zero - tbl_trans	# $06-1 flognp1 zero
   4533 	short		sopr_inf - tbl_trans	# $06-2 flognp1 inf
   4534 	short		src_qnan - tbl_trans	# $06-3 flognp1 qnan
   4535 	short		slognp1d - tbl_trans	# $06-5 flognp1 denorm
   4536 	short		src_snan - tbl_trans	# $06-4 flognp1 snan
   4537 	short		tbl_trans - tbl_trans	# $06-6 flognp1 unnorm
   4538 	short		tbl_trans - tbl_trans	# $06-7 ERROR
   4539 
   4540 	short		tbl_trans - tbl_trans	# $07-0 ERROR
   4541 	short		tbl_trans - tbl_trans	# $07-1 ERROR
   4542 	short		tbl_trans - tbl_trans	# $07-2 ERROR
   4543 	short		tbl_trans - tbl_trans	# $07-3 ERROR
   4544 	short		tbl_trans - tbl_trans	# $07-4 ERROR
   4545 	short		tbl_trans - tbl_trans	# $07-5 ERROR
   4546 	short		tbl_trans - tbl_trans	# $07-6 ERROR
   4547 	short		tbl_trans - tbl_trans	# $07-7 ERROR
   4548 
   4549 	short		setoxm1	 - tbl_trans	# $08-0 fetoxm1 norm
   4550 	short		src_zero - tbl_trans	# $08-1 fetoxm1 zero
   4551 	short		setoxm1i - tbl_trans	# $08-2 fetoxm1 inf
   4552 	short		src_qnan - tbl_trans	# $08-3 fetoxm1 qnan
   4553 	short		setoxm1d - tbl_trans	# $08-5 fetoxm1 denorm
   4554 	short		src_snan - tbl_trans	# $08-4 fetoxm1 snan
   4555 	short		tbl_trans - tbl_trans	# $08-6 fetoxm1 unnorm
   4556 	short		tbl_trans - tbl_trans	# $08-7 ERROR
   4557 
   4558 	short		stanh	 - tbl_trans	# $09-0 ftanh norm
   4559 	short		src_zero - tbl_trans	# $09-1 ftanh zero
   4560 	short		src_one	 - tbl_trans	# $09-2 ftanh inf
   4561 	short		src_qnan - tbl_trans	# $09-3 ftanh qnan
   4562 	short		stanhd	 - tbl_trans	# $09-5 ftanh denorm
   4563 	short		src_snan - tbl_trans	# $09-4 ftanh snan
   4564 	short		tbl_trans - tbl_trans	# $09-6 ftanh unnorm
   4565 	short		tbl_trans - tbl_trans	# $09-7 ERROR
   4566 
   4567 	short		satan	 - tbl_trans	# $0a-0 fatan norm
   4568 	short		src_zero - tbl_trans	# $0a-1 fatan zero
   4569 	short		spi_2	 - tbl_trans	# $0a-2 fatan inf
   4570 	short		src_qnan - tbl_trans	# $0a-3 fatan qnan
   4571 	short		satand	 - tbl_trans	# $0a-5 fatan denorm
   4572 	short		src_snan - tbl_trans	# $0a-4 fatan snan
   4573 	short		tbl_trans - tbl_trans	# $0a-6 fatan unnorm
   4574 	short		tbl_trans - tbl_trans	# $0a-7 ERROR
   4575 
   4576 	short		tbl_trans - tbl_trans	# $0b-0 ERROR
   4577 	short		tbl_trans - tbl_trans	# $0b-1 ERROR
   4578 	short		tbl_trans - tbl_trans	# $0b-2 ERROR
   4579 	short		tbl_trans - tbl_trans	# $0b-3 ERROR
   4580 	short		tbl_trans - tbl_trans	# $0b-4 ERROR
   4581 	short		tbl_trans - tbl_trans	# $0b-5 ERROR
   4582 	short		tbl_trans - tbl_trans	# $0b-6 ERROR
   4583 	short		tbl_trans - tbl_trans	# $0b-7 ERROR
   4584 
   4585 	short		sasin	 - tbl_trans	# $0c-0 fasin norm
   4586 	short		src_zero - tbl_trans	# $0c-1 fasin zero
   4587 	short		t_operr	 - tbl_trans	# $0c-2 fasin inf
   4588 	short		src_qnan - tbl_trans	# $0c-3 fasin qnan
   4589 	short		sasind	 - tbl_trans	# $0c-5 fasin denorm
   4590 	short		src_snan - tbl_trans	# $0c-4 fasin snan
   4591 	short		tbl_trans - tbl_trans	# $0c-6 fasin unnorm
   4592 	short		tbl_trans - tbl_trans	# $0c-7 ERROR
   4593 
   4594 	short		satanh	 - tbl_trans	# $0d-0 fatanh norm
   4595 	short		src_zero - tbl_trans	# $0d-1 fatanh zero
   4596 	short		t_operr	 - tbl_trans	# $0d-2 fatanh inf
   4597 	short		src_qnan - tbl_trans	# $0d-3 fatanh qnan
   4598 	short		satanhd	 - tbl_trans	# $0d-5 fatanh denorm
   4599 	short		src_snan - tbl_trans	# $0d-4 fatanh snan
   4600 	short		tbl_trans - tbl_trans	# $0d-6 fatanh unnorm
   4601 	short		tbl_trans - tbl_trans	# $0d-7 ERROR
   4602 
   4603 	short		ssin	 - tbl_trans	# $0e-0 fsin norm
   4604 	short		src_zero - tbl_trans	# $0e-1 fsin zero
   4605 	short		t_operr	 - tbl_trans	# $0e-2 fsin inf
   4606 	short		src_qnan - tbl_trans	# $0e-3 fsin qnan
   4607 	short		ssind	 - tbl_trans	# $0e-5 fsin denorm
   4608 	short		src_snan - tbl_trans	# $0e-4 fsin snan
   4609 	short		tbl_trans - tbl_trans	# $0e-6 fsin unnorm
   4610 	short		tbl_trans - tbl_trans	# $0e-7 ERROR
   4611 
   4612 	short		stan	 - tbl_trans	# $0f-0 ftan norm
   4613 	short		src_zero - tbl_trans	# $0f-1 ftan zero
   4614 	short		t_operr	 - tbl_trans	# $0f-2 ftan inf
   4615 	short		src_qnan - tbl_trans	# $0f-3 ftan qnan
   4616 	short		stand	 - tbl_trans	# $0f-5 ftan denorm
   4617 	short		src_snan - tbl_trans	# $0f-4 ftan snan
   4618 	short		tbl_trans - tbl_trans	# $0f-6 ftan unnorm
   4619 	short		tbl_trans - tbl_trans	# $0f-7 ERROR
   4620 
   4621 	short		setox	 - tbl_trans	# $10-0 fetox norm
   4622 	short		ld_pone	 - tbl_trans	# $10-1 fetox zero
   4623 	short		szr_inf	 - tbl_trans	# $10-2 fetox inf
   4624 	short		src_qnan - tbl_trans	# $10-3 fetox qnan
   4625 	short		setoxd	 - tbl_trans	# $10-5 fetox denorm
   4626 	short		src_snan - tbl_trans	# $10-4 fetox snan
   4627 	short		tbl_trans - tbl_trans	# $10-6 fetox unnorm
   4628 	short		tbl_trans - tbl_trans	# $10-7 ERROR
   4629 
   4630 	short		stwotox	 - tbl_trans	# $11-0 ftwotox norm
   4631 	short		ld_pone	 - tbl_trans	# $11-1 ftwotox zero
   4632 	short		szr_inf	 - tbl_trans	# $11-2 ftwotox inf
   4633 	short		src_qnan - tbl_trans	# $11-3 ftwotox qnan
   4634 	short		stwotoxd - tbl_trans	# $11-5 ftwotox denorm
   4635 	short		src_snan - tbl_trans	# $11-4 ftwotox snan
   4636 	short		tbl_trans - tbl_trans	# $11-6 ftwotox unnorm
   4637 	short		tbl_trans - tbl_trans	# $11-7 ERROR
   4638 
   4639 	short		stentox	 - tbl_trans	# $12-0 ftentox norm
   4640 	short		ld_pone	 - tbl_trans	# $12-1 ftentox zero
   4641 	short		szr_inf	 - tbl_trans	# $12-2 ftentox inf
   4642 	short		src_qnan - tbl_trans	# $12-3 ftentox qnan
   4643 	short		stentoxd - tbl_trans	# $12-5 ftentox denorm
   4644 	short		src_snan - tbl_trans	# $12-4 ftentox snan
   4645 	short		tbl_trans - tbl_trans	# $12-6 ftentox unnorm
   4646 	short		tbl_trans - tbl_trans	# $12-7 ERROR
   4647 
   4648 	short		tbl_trans - tbl_trans	# $13-0 ERROR
   4649 	short		tbl_trans - tbl_trans	# $13-1 ERROR
   4650 	short		tbl_trans - tbl_trans	# $13-2 ERROR
   4651 	short		tbl_trans - tbl_trans	# $13-3 ERROR
   4652 	short		tbl_trans - tbl_trans	# $13-4 ERROR
   4653 	short		tbl_trans - tbl_trans	# $13-5 ERROR
   4654 	short		tbl_trans - tbl_trans	# $13-6 ERROR
   4655 	short		tbl_trans - tbl_trans	# $13-7 ERROR
   4656 
   4657 	short		slogn	 - tbl_trans	# $14-0 flogn norm
   4658 	short		t_dz2	 - tbl_trans	# $14-1 flogn zero
   4659 	short		sopr_inf - tbl_trans	# $14-2 flogn inf
   4660 	short		src_qnan - tbl_trans	# $14-3 flogn qnan
   4661 	short		slognd	 - tbl_trans	# $14-5 flogn denorm
   4662 	short		src_snan - tbl_trans	# $14-4 flogn snan
   4663 	short		tbl_trans - tbl_trans	# $14-6 flogn unnorm
   4664 	short		tbl_trans - tbl_trans	# $14-7 ERROR
   4665 
   4666 	short		slog10	 - tbl_trans	# $15-0 flog10 norm
   4667 	short		t_dz2	 - tbl_trans	# $15-1 flog10 zero
   4668 	short		sopr_inf - tbl_trans	# $15-2 flog10 inf
   4669 	short		src_qnan - tbl_trans	# $15-3 flog10 qnan
   4670 	short		slog10d	 - tbl_trans	# $15-5 flog10 denorm
   4671 	short		src_snan - tbl_trans	# $15-4 flog10 snan
   4672 	short		tbl_trans - tbl_trans	# $15-6 flog10 unnorm
   4673 	short		tbl_trans - tbl_trans	# $15-7 ERROR
   4674 
   4675 	short		slog2	 - tbl_trans	# $16-0 flog2 norm
   4676 	short		t_dz2	 - tbl_trans	# $16-1 flog2 zero
   4677 	short		sopr_inf - tbl_trans	# $16-2 flog2 inf
   4678 	short		src_qnan - tbl_trans	# $16-3 flog2 qnan
   4679 	short		slog2d	 - tbl_trans	# $16-5 flog2 denorm
   4680 	short		src_snan - tbl_trans	# $16-4 flog2 snan
   4681 	short		tbl_trans - tbl_trans	# $16-6 flog2 unnorm
   4682 	short		tbl_trans - tbl_trans	# $16-7 ERROR
   4683 
   4684 	short		tbl_trans - tbl_trans	# $17-0 ERROR
   4685 	short		tbl_trans - tbl_trans	# $17-1 ERROR
   4686 	short		tbl_trans - tbl_trans	# $17-2 ERROR
   4687 	short		tbl_trans - tbl_trans	# $17-3 ERROR
   4688 	short		tbl_trans - tbl_trans	# $17-4 ERROR
   4689 	short		tbl_trans - tbl_trans	# $17-5 ERROR
   4690 	short		tbl_trans - tbl_trans	# $17-6 ERROR
   4691 	short		tbl_trans - tbl_trans	# $17-7 ERROR
   4692 
   4693 	short		tbl_trans - tbl_trans	# $18-0 fabs norm
   4694 	short		tbl_trans - tbl_trans	# $18-1 fabs zero
   4695 	short		tbl_trans - tbl_trans	# $18-2 fabs inf
   4696 	short		tbl_trans - tbl_trans	# $18-3 fabs qnan
   4697 	short		tbl_trans - tbl_trans	# $18-5 fabs denorm
   4698 	short		tbl_trans - tbl_trans	# $18-4 fabs snan
   4699 	short		tbl_trans - tbl_trans	# $18-6 fabs unnorm
   4700 	short		tbl_trans - tbl_trans	# $18-7 ERROR
   4701 
   4702 	short		scosh	 - tbl_trans	# $19-0 fcosh norm
   4703 	short		ld_pone	 - tbl_trans	# $19-1 fcosh zero
   4704 	short		ld_pinf	 - tbl_trans	# $19-2 fcosh inf
   4705 	short		src_qnan - tbl_trans	# $19-3 fcosh qnan
   4706 	short		scoshd	 - tbl_trans	# $19-5 fcosh denorm
   4707 	short		src_snan - tbl_trans	# $19-4 fcosh snan
   4708 	short		tbl_trans - tbl_trans	# $19-6 fcosh unnorm
   4709 	short		tbl_trans - tbl_trans	# $19-7 ERROR
   4710 
   4711 	short		tbl_trans - tbl_trans	# $1a-0 fneg norm
   4712 	short		tbl_trans - tbl_trans	# $1a-1 fneg zero
   4713 	short		tbl_trans - tbl_trans	# $1a-2 fneg inf
   4714 	short		tbl_trans - tbl_trans	# $1a-3 fneg qnan
   4715 	short		tbl_trans - tbl_trans	# $1a-5 fneg denorm
   4716 	short		tbl_trans - tbl_trans	# $1a-4 fneg snan
   4717 	short		tbl_trans - tbl_trans	# $1a-6 fneg unnorm
   4718 	short		tbl_trans - tbl_trans	# $1a-7 ERROR
   4719 
   4720 	short		tbl_trans - tbl_trans	# $1b-0 ERROR
   4721 	short		tbl_trans - tbl_trans	# $1b-1 ERROR
   4722 	short		tbl_trans - tbl_trans	# $1b-2 ERROR
   4723 	short		tbl_trans - tbl_trans	# $1b-3 ERROR
   4724 	short		tbl_trans - tbl_trans	# $1b-4 ERROR
   4725 	short		tbl_trans - tbl_trans	# $1b-5 ERROR
   4726 	short		tbl_trans - tbl_trans	# $1b-6 ERROR
   4727 	short		tbl_trans - tbl_trans	# $1b-7 ERROR
   4728 
   4729 	short		sacos	 - tbl_trans	# $1c-0 facos norm
   4730 	short		ld_ppi2	 - tbl_trans	# $1c-1 facos zero
   4731 	short		t_operr	 - tbl_trans	# $1c-2 facos inf
   4732 	short		src_qnan - tbl_trans	# $1c-3 facos qnan
   4733 	short		sacosd	 - tbl_trans	# $1c-5 facos denorm
   4734 	short		src_snan - tbl_trans	# $1c-4 facos snan
   4735 	short		tbl_trans - tbl_trans	# $1c-6 facos unnorm
   4736 	short		tbl_trans - tbl_trans	# $1c-7 ERROR
   4737 
   4738 	short		scos	 - tbl_trans	# $1d-0 fcos norm
   4739 	short		ld_pone	 - tbl_trans	# $1d-1 fcos zero
   4740 	short		t_operr	 - tbl_trans	# $1d-2 fcos inf
   4741 	short		src_qnan - tbl_trans	# $1d-3 fcos qnan
   4742 	short		scosd	 - tbl_trans	# $1d-5 fcos denorm
   4743 	short		src_snan - tbl_trans	# $1d-4 fcos snan
   4744 	short		tbl_trans - tbl_trans	# $1d-6 fcos unnorm
   4745 	short		tbl_trans - tbl_trans	# $1d-7 ERROR
   4746 
   4747 	short		sgetexp	 - tbl_trans	# $1e-0 fgetexp norm
   4748 	short		src_zero - tbl_trans	# $1e-1 fgetexp zero
   4749 	short		t_operr	 - tbl_trans	# $1e-2 fgetexp inf
   4750 	short		src_qnan - tbl_trans	# $1e-3 fgetexp qnan
   4751 	short		sgetexpd - tbl_trans	# $1e-5 fgetexp denorm
   4752 	short		src_snan - tbl_trans	# $1e-4 fgetexp snan
   4753 	short		tbl_trans - tbl_trans	# $1e-6 fgetexp unnorm
   4754 	short		tbl_trans - tbl_trans	# $1e-7 ERROR
   4755 
   4756 	short		sgetman	 - tbl_trans	# $1f-0 fgetman norm
   4757 	short		src_zero - tbl_trans	# $1f-1 fgetman zero
   4758 	short		t_operr	 - tbl_trans	# $1f-2 fgetman inf
   4759 	short		src_qnan - tbl_trans	# $1f-3 fgetman qnan
   4760 	short		sgetmand - tbl_trans	# $1f-5 fgetman denorm
   4761 	short		src_snan - tbl_trans	# $1f-4 fgetman snan
   4762 	short		tbl_trans - tbl_trans	# $1f-6 fgetman unnorm
   4763 	short		tbl_trans - tbl_trans	# $1f-7 ERROR
   4764 
   4765 	short		tbl_trans - tbl_trans	# $20-0 fdiv norm
   4766 	short		tbl_trans - tbl_trans	# $20-1 fdiv zero
   4767 	short		tbl_trans - tbl_trans	# $20-2 fdiv inf
   4768 	short		tbl_trans - tbl_trans	# $20-3 fdiv qnan
   4769 	short		tbl_trans - tbl_trans	# $20-5 fdiv denorm
   4770 	short		tbl_trans - tbl_trans	# $20-4 fdiv snan
   4771 	short		tbl_trans - tbl_trans	# $20-6 fdiv unnorm
   4772 	short		tbl_trans - tbl_trans	# $20-7 ERROR
   4773 
   4774 	short		smod_snorm - tbl_trans	# $21-0 fmod norm
   4775 	short		smod_szero - tbl_trans	# $21-1 fmod zero
   4776 	short		smod_sinf - tbl_trans	# $21-2 fmod inf
   4777 	short		sop_sqnan - tbl_trans	# $21-3 fmod qnan
   4778 	short		smod_sdnrm - tbl_trans	# $21-5 fmod denorm
   4779 	short		sop_ssnan - tbl_trans	# $21-4 fmod snan
   4780 	short		tbl_trans - tbl_trans	# $21-6 fmod unnorm
   4781 	short		tbl_trans - tbl_trans	# $21-7 ERROR
   4782 
   4783 	short		tbl_trans - tbl_trans	# $22-0 fadd norm
   4784 	short		tbl_trans - tbl_trans	# $22-1 fadd zero
   4785 	short		tbl_trans - tbl_trans	# $22-2 fadd inf
   4786 	short		tbl_trans - tbl_trans	# $22-3 fadd qnan
   4787 	short		tbl_trans - tbl_trans	# $22-5 fadd denorm
   4788 	short		tbl_trans - tbl_trans	# $22-4 fadd snan
   4789 	short		tbl_trans - tbl_trans	# $22-6 fadd unnorm
   4790 	short		tbl_trans - tbl_trans	# $22-7 ERROR
   4791 
   4792 	short		tbl_trans - tbl_trans	# $23-0 fmul norm
   4793 	short		tbl_trans - tbl_trans	# $23-1 fmul zero
   4794 	short		tbl_trans - tbl_trans	# $23-2 fmul inf
   4795 	short		tbl_trans - tbl_trans	# $23-3 fmul qnan
   4796 	short		tbl_trans - tbl_trans	# $23-5 fmul denorm
   4797 	short		tbl_trans - tbl_trans	# $23-4 fmul snan
   4798 	short		tbl_trans - tbl_trans	# $23-6 fmul unnorm
   4799 	short		tbl_trans - tbl_trans	# $23-7 ERROR
   4800 
   4801 	short		tbl_trans - tbl_trans	# $24-0 fsgldiv norm
   4802 	short		tbl_trans - tbl_trans	# $24-1 fsgldiv zero
   4803 	short		tbl_trans - tbl_trans	# $24-2 fsgldiv inf
   4804 	short		tbl_trans - tbl_trans	# $24-3 fsgldiv qnan
   4805 	short		tbl_trans - tbl_trans	# $24-5 fsgldiv denorm
   4806 	short		tbl_trans - tbl_trans	# $24-4 fsgldiv snan
   4807 	short		tbl_trans - tbl_trans	# $24-6 fsgldiv unnorm
   4808 	short		tbl_trans - tbl_trans	# $24-7 ERROR
   4809 
   4810 	short		srem_snorm - tbl_trans	# $25-0 frem norm
   4811 	short		srem_szero - tbl_trans	# $25-1 frem zero
   4812 	short		srem_sinf - tbl_trans	# $25-2 frem inf
   4813 	short		sop_sqnan - tbl_trans	# $25-3 frem qnan
   4814 	short		srem_sdnrm - tbl_trans	# $25-5 frem denorm
   4815 	short		sop_ssnan - tbl_trans	# $25-4 frem snan
   4816 	short		tbl_trans - tbl_trans	# $25-6 frem unnorm
   4817 	short		tbl_trans - tbl_trans	# $25-7 ERROR
   4818 
   4819 	short		sscale_snorm - tbl_trans # $26-0 fscale norm
   4820 	short		sscale_szero - tbl_trans # $26-1 fscale zero
   4821 	short		sscale_sinf - tbl_trans	# $26-2 fscale inf
   4822 	short		sop_sqnan - tbl_trans	# $26-3 fscale qnan
   4823 	short		sscale_sdnrm - tbl_trans # $26-5 fscale denorm
   4824 	short		sop_ssnan - tbl_trans	# $26-4 fscale snan
   4825 	short		tbl_trans - tbl_trans	# $26-6 fscale unnorm
   4826 	short		tbl_trans - tbl_trans	# $26-7 ERROR
   4827 
   4828 	short		tbl_trans - tbl_trans	# $27-0 fsglmul norm
   4829 	short		tbl_trans - tbl_trans	# $27-1 fsglmul zero
   4830 	short		tbl_trans - tbl_trans	# $27-2 fsglmul inf
   4831 	short		tbl_trans - tbl_trans	# $27-3 fsglmul qnan
   4832 	short		tbl_trans - tbl_trans	# $27-5 fsglmul denorm
   4833 	short		tbl_trans - tbl_trans	# $27-4 fsglmul snan
   4834 	short		tbl_trans - tbl_trans	# $27-6 fsglmul unnorm
   4835 	short		tbl_trans - tbl_trans	# $27-7 ERROR
   4836 
   4837 	short		tbl_trans - tbl_trans	# $28-0 fsub norm
   4838 	short		tbl_trans - tbl_trans	# $28-1 fsub zero
   4839 	short		tbl_trans - tbl_trans	# $28-2 fsub inf
   4840 	short		tbl_trans - tbl_trans	# $28-3 fsub qnan
   4841 	short		tbl_trans - tbl_trans	# $28-5 fsub denorm
   4842 	short		tbl_trans - tbl_trans	# $28-4 fsub snan
   4843 	short		tbl_trans - tbl_trans	# $28-6 fsub unnorm
   4844 	short		tbl_trans - tbl_trans	# $28-7 ERROR
   4845 
   4846 	short		tbl_trans - tbl_trans	# $29-0 ERROR
   4847 	short		tbl_trans - tbl_trans	# $29-1 ERROR
   4848 	short		tbl_trans - tbl_trans	# $29-2 ERROR
   4849 	short		tbl_trans - tbl_trans	# $29-3 ERROR
   4850 	short		tbl_trans - tbl_trans	# $29-4 ERROR
   4851 	short		tbl_trans - tbl_trans	# $29-5 ERROR
   4852 	short		tbl_trans - tbl_trans	# $29-6 ERROR
   4853 	short		tbl_trans - tbl_trans	# $29-7 ERROR
   4854 
   4855 	short		tbl_trans - tbl_trans	# $2a-0 ERROR
   4856 	short		tbl_trans - tbl_trans	# $2a-1 ERROR
   4857 	short		tbl_trans - tbl_trans	# $2a-2 ERROR
   4858 	short		tbl_trans - tbl_trans	# $2a-3 ERROR
   4859 	short		tbl_trans - tbl_trans	# $2a-4 ERROR
   4860 	short		tbl_trans - tbl_trans	# $2a-5 ERROR
   4861 	short		tbl_trans - tbl_trans	# $2a-6 ERROR
   4862 	short		tbl_trans - tbl_trans	# $2a-7 ERROR
   4863 
   4864 	short		tbl_trans - tbl_trans	# $2b-0 ERROR
   4865 	short		tbl_trans - tbl_trans	# $2b-1 ERROR
   4866 	short		tbl_trans - tbl_trans	# $2b-2 ERROR
   4867 	short		tbl_trans - tbl_trans	# $2b-3 ERROR
   4868 	short		tbl_trans - tbl_trans	# $2b-4 ERROR
   4869 	short		tbl_trans - tbl_trans	# $2b-5 ERROR
   4870 	short		tbl_trans - tbl_trans	# $2b-6 ERROR
   4871 	short		tbl_trans - tbl_trans	# $2b-7 ERROR
   4872 
   4873 	short		tbl_trans - tbl_trans	# $2c-0 ERROR
   4874 	short		tbl_trans - tbl_trans	# $2c-1 ERROR
   4875 	short		tbl_trans - tbl_trans	# $2c-2 ERROR
   4876 	short		tbl_trans - tbl_trans	# $2c-3 ERROR
   4877 	short		tbl_trans - tbl_trans	# $2c-4 ERROR
   4878 	short		tbl_trans - tbl_trans	# $2c-5 ERROR
   4879 	short		tbl_trans - tbl_trans	# $2c-6 ERROR
   4880 	short		tbl_trans - tbl_trans	# $2c-7 ERROR
   4881 
   4882 	short		tbl_trans - tbl_trans	# $2d-0 ERROR
   4883 	short		tbl_trans - tbl_trans	# $2d-1 ERROR
   4884 	short		tbl_trans - tbl_trans	# $2d-2 ERROR
   4885 	short		tbl_trans - tbl_trans	# $2d-3 ERROR
   4886 	short		tbl_trans - tbl_trans	# $2d-4 ERROR
   4887 	short		tbl_trans - tbl_trans	# $2d-5 ERROR
   4888 	short		tbl_trans - tbl_trans	# $2d-6 ERROR
   4889 	short		tbl_trans - tbl_trans	# $2d-7 ERROR
   4890 
   4891 	short		tbl_trans - tbl_trans	# $2e-0 ERROR
   4892 	short		tbl_trans - tbl_trans	# $2e-1 ERROR
   4893 	short		tbl_trans - tbl_trans	# $2e-2 ERROR
   4894 	short		tbl_trans - tbl_trans	# $2e-3 ERROR
   4895 	short		tbl_trans - tbl_trans	# $2e-4 ERROR
   4896 	short		tbl_trans - tbl_trans	# $2e-5 ERROR
   4897 	short		tbl_trans - tbl_trans	# $2e-6 ERROR
   4898 	short		tbl_trans - tbl_trans	# $2e-7 ERROR
   4899 
   4900 	short		tbl_trans - tbl_trans	# $2f-0 ERROR
   4901 	short		tbl_trans - tbl_trans	# $2f-1 ERROR
   4902 	short		tbl_trans - tbl_trans	# $2f-2 ERROR
   4903 	short		tbl_trans - tbl_trans	# $2f-3 ERROR
   4904 	short		tbl_trans - tbl_trans	# $2f-4 ERROR
   4905 	short		tbl_trans - tbl_trans	# $2f-5 ERROR
   4906 	short		tbl_trans - tbl_trans	# $2f-6 ERROR
   4907 	short		tbl_trans - tbl_trans	# $2f-7 ERROR
   4908 
   4909 	short		ssincos	 - tbl_trans	# $30-0 fsincos norm
   4910 	short		ssincosz - tbl_trans	# $30-1 fsincos zero
   4911 	short		ssincosi - tbl_trans	# $30-2 fsincos inf
   4912 	short		ssincosqnan - tbl_trans	# $30-3 fsincos qnan
   4913 	short		ssincosd - tbl_trans	# $30-5 fsincos denorm
   4914 	short		ssincossnan - tbl_trans	# $30-4 fsincos snan
   4915 	short		tbl_trans - tbl_trans	# $30-6 fsincos unnorm
   4916 	short		tbl_trans - tbl_trans	# $30-7 ERROR
   4917 
   4918 	short		ssincos	 - tbl_trans	# $31-0 fsincos norm
   4919 	short		ssincosz - tbl_trans	# $31-1 fsincos zero
   4920 	short		ssincosi - tbl_trans	# $31-2 fsincos inf
   4921 	short		ssincosqnan - tbl_trans	# $31-3 fsincos qnan
   4922 	short		ssincosd - tbl_trans	# $31-5 fsincos denorm
   4923 	short		ssincossnan - tbl_trans	# $31-4 fsincos snan
   4924 	short		tbl_trans - tbl_trans	# $31-6 fsincos unnorm
   4925 	short		tbl_trans - tbl_trans	# $31-7 ERROR
   4926 
   4927 	short		ssincos	 - tbl_trans	# $32-0 fsincos norm
   4928 	short		ssincosz - tbl_trans	# $32-1 fsincos zero
   4929 	short		ssincosi - tbl_trans	# $32-2 fsincos inf
   4930 	short		ssincosqnan - tbl_trans	# $32-3 fsincos qnan
   4931 	short		ssincosd - tbl_trans	# $32-5 fsincos denorm
   4932 	short		ssincossnan - tbl_trans	# $32-4 fsincos snan
   4933 	short		tbl_trans - tbl_trans	# $32-6 fsincos unnorm
   4934 	short		tbl_trans - tbl_trans	# $32-7 ERROR
   4935 
   4936 	short		ssincos	 - tbl_trans	# $33-0 fsincos norm
   4937 	short		ssincosz - tbl_trans	# $33-1 fsincos zero
   4938 	short		ssincosi - tbl_trans	# $33-2 fsincos inf
   4939 	short		ssincosqnan - tbl_trans	# $33-3 fsincos qnan
   4940 	short		ssincosd - tbl_trans	# $33-5 fsincos denorm
   4941 	short		ssincossnan - tbl_trans	# $33-4 fsincos snan
   4942 	short		tbl_trans - tbl_trans	# $33-6 fsincos unnorm
   4943 	short		tbl_trans - tbl_trans	# $33-7 ERROR
   4944 
   4945 	short		ssincos	 - tbl_trans	# $34-0 fsincos norm
   4946 	short		ssincosz - tbl_trans	# $34-1 fsincos zero
   4947 	short		ssincosi - tbl_trans	# $34-2 fsincos inf
   4948 	short		ssincosqnan - tbl_trans	# $34-3 fsincos qnan
   4949 	short		ssincosd - tbl_trans	# $34-5 fsincos denorm
   4950 	short		ssincossnan - tbl_trans	# $34-4 fsincos snan
   4951 	short		tbl_trans - tbl_trans	# $34-6 fsincos unnorm
   4952 	short		tbl_trans - tbl_trans	# $34-7 ERROR
   4953 
   4954 	short		ssincos	 - tbl_trans	# $35-0 fsincos norm
   4955 	short		ssincosz - tbl_trans	# $35-1 fsincos zero
   4956 	short		ssincosi - tbl_trans	# $35-2 fsincos inf
   4957 	short		ssincosqnan - tbl_trans	# $35-3 fsincos qnan
   4958 	short		ssincosd - tbl_trans	# $35-5 fsincos denorm
   4959 	short		ssincossnan - tbl_trans	# $35-4 fsincos snan
   4960 	short		tbl_trans - tbl_trans	# $35-6 fsincos unnorm
   4961 	short		tbl_trans - tbl_trans	# $35-7 ERROR
   4962 
   4963 	short		ssincos	 - tbl_trans	# $36-0 fsincos norm
   4964 	short		ssincosz - tbl_trans	# $36-1 fsincos zero
   4965 	short		ssincosi - tbl_trans	# $36-2 fsincos inf
   4966 	short		ssincosqnan - tbl_trans	# $36-3 fsincos qnan
   4967 	short		ssincosd - tbl_trans	# $36-5 fsincos denorm
   4968 	short		ssincossnan - tbl_trans	# $36-4 fsincos snan
   4969 	short		tbl_trans - tbl_trans	# $36-6 fsincos unnorm
   4970 	short		tbl_trans - tbl_trans	# $36-7 ERROR
   4971 
   4972 	short		ssincos	 - tbl_trans	# $37-0 fsincos norm
   4973 	short		ssincosz - tbl_trans	# $37-1 fsincos zero
   4974 	short		ssincosi - tbl_trans	# $37-2 fsincos inf
   4975 	short		ssincosqnan - tbl_trans	# $37-3 fsincos qnan
   4976 	short		ssincosd - tbl_trans	# $37-5 fsincos denorm
   4977 	short		ssincossnan - tbl_trans	# $37-4 fsincos snan
   4978 	short		tbl_trans - tbl_trans	# $37-6 fsincos unnorm
   4979 	short		tbl_trans - tbl_trans	# $37-7 ERROR
   4980 
   4981 ##########
   4982 
   4983 # the instruction fetch access for the displacement word for the
   4984 # fdbcc emulation failed. here, we create an access error frame
   4985 # from the current frame and branch to _real_access().
   4986 funimp_iacc:
   4987 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   4988 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   4989 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
   4990 
   4991 	mov.l		USER_FPIAR(%a6),EXC_PC(%a6) # store current PC
   4992 
   4993 	unlk		%a6
   4994 
   4995 	mov.l		(%sp),-(%sp)		# store SR,hi(PC)
   4996 	mov.w		0x8(%sp),0x4(%sp)	# store lo(PC)
   4997 	mov.w		&0x4008,0x6(%sp)	# store voff
   4998 	mov.l		0x2(%sp),0x8(%sp)	# store EA
   4999 	mov.l		&0x09428001,0xc(%sp)	# store FSLW
   5000 
   5001 	btst		&0x5,(%sp)		# user or supervisor mode?
   5002 	beq.b		funimp_iacc_end		# user
   5003 	bset		&0x2,0xd(%sp)		# set supervisor TM bit
   5004 
   5005 funimp_iacc_end:
   5006 	bra.l		_real_access
   5007 
   5008 #########################################################################
   5009 # ssin():     computes the sine of a normalized input			#
   5010 # ssind():    computes the sine of a denormalized input			#
   5011 # scos():     computes the cosine of a normalized input			#
   5012 # scosd():    computes the cosine of a denormalized input		#
   5013 # ssincos():  computes the sine and cosine of a normalized input	#
   5014 # ssincosd(): computes the sine and cosine of a denormalized input	#
   5015 #									#
   5016 # INPUT *************************************************************** #
   5017 #	a0 = pointer to extended precision input			#
   5018 #	d0 = round precision,mode					#
   5019 #									#
   5020 # OUTPUT ************************************************************** #
   5021 #	fp0 = sin(X) or cos(X) 						#
   5022 #									#
   5023 #    For ssincos(X):							#
   5024 #	fp0 = sin(X)							#
   5025 #	fp1 = cos(X)							#
   5026 #									#
   5027 # ACCURACY and MONOTONICITY ******************************************* #
   5028 #	The returned result is within 1 ulp in 64 significant bit, i.e.	#
   5029 #	within 0.5001 ulp to 53 bits if the result is subsequently 	#
   5030 #	rounded to double precision. The result is provably monotonic	#
   5031 #	in double precision.						#
   5032 #									#
   5033 # ALGORITHM ***********************************************************	#
   5034 #									#
   5035 #	SIN and COS:							#
   5036 #	1. If SIN is invoked, set AdjN := 0; otherwise, set AdjN := 1.	#
   5037 #									#
   5038 #	2. If |X| >= 15Pi or |X| < 2**(-40), go to 7.			#
   5039 #									#
   5040 #	3. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let	#
   5041 #		k = N mod 4, so in particular, k = 0,1,2,or 3.		#
   5042 #		Overwrite k by k := k + AdjN.				#
   5043 #									#
   5044 #	4. If k is even, go to 6.					#
   5045 #									#
   5046 #	5. (k is odd) Set j := (k-1)/2, sgn := (-1)**j. 		#
   5047 #		Return sgn*cos(r) where cos(r) is approximated by an 	#
   5048 #		even polynomial in r, 1 + r*r*(B1+s*(B2+ ... + s*B8)),	#
   5049 #		s = r*r.						#
   5050 #		Exit.							#
   5051 #									#
   5052 #	6. (k is even) Set j := k/2, sgn := (-1)**j. Return sgn*sin(r)	#
   5053 #		where sin(r) is approximated by an odd polynomial in r	#
   5054 #		r + r*s*(A1+s*(A2+ ... + s*A7)),	s = r*r.	#
   5055 #		Exit.							#
   5056 #									#
   5057 #	7. If |X| > 1, go to 9.						#
   5058 #									#
   5059 #	8. (|X|<2**(-40)) If SIN is invoked, return X; 			#
   5060 #		otherwise return 1.					#
   5061 #									#
   5062 #	9. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, 		#
   5063 #		go back to 3.						#
   5064 #									#
   5065 #	SINCOS:								#
   5066 #	1. If |X| >= 15Pi or |X| < 2**(-40), go to 6.			#
   5067 #									#
   5068 #	2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let	#
   5069 #		k = N mod 4, so in particular, k = 0,1,2,or 3.		#
   5070 #									#
   5071 #	3. If k is even, go to 5.					#
   5072 #									#
   5073 #	4. (k is odd) Set j1 := (k-1)/2, j2 := j1 (EOR) (k mod 2), ie.	#
   5074 #		j1 exclusive or with the l.s.b. of k.			#
   5075 #		sgn1 := (-1)**j1, sgn2 := (-1)**j2.			#
   5076 #		SIN(X) = sgn1 * cos(r) and COS(X) = sgn2*sin(r) where	#
   5077 #		sin(r) and cos(r) are computed as odd and even 		#
   5078 #		polynomials in r, respectively. Exit			#
   5079 #									#
   5080 #	5. (k is even) Set j1 := k/2, sgn1 := (-1)**j1.			#
   5081 #		SIN(X) = sgn1 * sin(r) and COS(X) = sgn1*cos(r) where	#
   5082 #		sin(r) and cos(r) are computed as odd and even 		#
   5083 #		polynomials in r, respectively. Exit			#
   5084 #									#
   5085 #	6. If |X| > 1, go to 8.						#
   5086 #									#
   5087 #	7. (|X|<2**(-40)) SIN(X) = X and COS(X) = 1. Exit.		#
   5088 #									#
   5089 #	8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, 		#
   5090 #		go back to 2.						#
   5091 #									#
   5092 #########################################################################
   5093 
   5094 SINA7:	long		0xBD6AAA77,0xCCC994F5
   5095 SINA6:	long		0x3DE61209,0x7AAE8DA1
   5096 SINA5:	long		0xBE5AE645,0x2A118AE4
   5097 SINA4:	long		0x3EC71DE3,0xA5341531
   5098 SINA3:	long		0xBF2A01A0,0x1A018B59,0x00000000,0x00000000
   5099 SINA2:	long		0x3FF80000,0x88888888,0x888859AF,0x00000000
   5100 SINA1:	long		0xBFFC0000,0xAAAAAAAA,0xAAAAAA99,0x00000000
   5101 
   5102 COSB8:	long		0x3D2AC4D0,0xD6011EE3
   5103 COSB7:	long		0xBDA9396F,0x9F45AC19
   5104 COSB6:	long		0x3E21EED9,0x0612C972
   5105 COSB5:	long		0xBE927E4F,0xB79D9FCF
   5106 COSB4:	long		0x3EFA01A0,0x1A01D423,0x00000000,0x00000000
   5107 COSB3:	long		0xBFF50000,0xB60B60B6,0x0B61D438,0x00000000
   5108 COSB2:	long		0x3FFA0000,0xAAAAAAAA,0xAAAAAB5E
   5109 COSB1:	long		0xBF000000
   5110 
   5111 	set		INARG,FP_SCR0
   5112 
   5113 	set		X,FP_SCR0
   5114 #	set		XDCARE,X+2
   5115 	set		XFRAC,X+4
   5116 
   5117 	set		RPRIME,FP_SCR0
   5118 	set		SPRIME,FP_SCR1
   5119 
   5120 	set		POSNEG1,L_SCR1
   5121 	set		TWOTO63,L_SCR1
   5122 
   5123 	set		ENDFLAG,L_SCR2
   5124 	set		INT,L_SCR2
   5125 
   5126 	set		ADJN,L_SCR3
   5127 
   5128 ############################################
   5129 	global		ssin
   5130 ssin:
   5131 	mov.l		&0,ADJN(%a6)		# yes; SET ADJN TO 0
   5132 	bra.b		SINBGN
   5133 
   5134 ############################################
   5135 	global		scos
   5136 scos:
   5137 	mov.l		&1,ADJN(%a6)		# yes; SET ADJN TO 1
   5138 
   5139 ############################################
   5140 SINBGN:
   5141 #--SAVE FPCR, FP1. CHECK IF |X| IS TOO SMALL OR LARGE
   5142 
   5143 	fmov.x		(%a0),%fp0		# LOAD INPUT
   5144 	fmov.x		%fp0,X(%a6)		# save input at X
   5145 
   5146 # "COMPACTIFY" X
   5147 	mov.l		(%a0),%d1		# put exp in hi word
   5148 	mov.w		4(%a0),%d1		# fetch hi(man)
   5149 	and.l		&0x7FFFFFFF,%d1		# strip sign
   5150 
   5151 	cmpi.l		%d1,&0x3FD78000		# is |X| >= 2**(-40)?
   5152 	bge.b		SOK1			# no
   5153 	bra.w		SINSM			# yes; input is very small
   5154 
   5155 SOK1:
   5156 	cmp.l		%d1,&0x4004BC7E		# is |X| < 15 PI?
   5157 	blt.b		SINMAIN			# no
   5158 	bra.w		SREDUCEX		# yes; input is very large
   5159 
   5160 #--THIS IS THE USUAL CASE, |X| <= 15 PI.
   5161 #--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
   5162 SINMAIN:
   5163 	fmov.x		%fp0,%fp1
   5164 	fmul.d		TWOBYPI(%pc),%fp1 	# X*2/PI
   5165 
   5166 	lea		PITBL+0x200(%pc),%a1 	# TABLE OF N*PI/2, N = -32,...,32
   5167 
   5168 	fmov.l		%fp1,INT(%a6)		# CONVERT TO INTEGER
   5169 
   5170 	mov.l		INT(%a6),%d1		# make a copy of N
   5171 	asl.l		&4,%d1			# N *= 16
   5172 	add.l		%d1,%a1			# tbl_addr = a1 + (N*16)
   5173 
   5174 # A1 IS THE ADDRESS OF N*PIBY2
   5175 # ...WHICH IS IN TWO PIECES Y1 & Y2
   5176 	fsub.x		(%a1)+,%fp0 		# X-Y1
   5177 	fsub.s		(%a1),%fp0 		# fp0 = R = (X-Y1)-Y2
   5178 
   5179 SINCONT:
   5180 #--continuation from REDUCEX
   5181 
   5182 #--GET N+ADJN AND SEE IF SIN(R) OR COS(R) IS NEEDED
   5183 	mov.l		INT(%a6),%d1
   5184 	add.l		ADJN(%a6),%d1		# SEE IF D0 IS ODD OR EVEN
   5185 	ror.l		&1,%d1			# D0 WAS ODD IFF D0 IS NEGATIVE
   5186 	cmp.l		%d1,&0
   5187 	blt.w		COSPOLY
   5188 
   5189 #--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
   5190 #--THEN WE RETURN	SGN*SIN(R). SGN*SIN(R) IS COMPUTED BY
   5191 #--R' + R'*S*(A1 + S(A2 + S(A3 + S(A4 + ... + SA7)))), WHERE
   5192 #--R' = SGN*R, S=R*R. THIS CAN BE REWRITTEN AS
   5193 #--R' + R'*S*( [A1+T(A3+T(A5+TA7))] + [S(A2+T(A4+TA6))])
   5194 #--WHERE T=S*S.
   5195 #--NOTE THAT A3 THROUGH A7 ARE STORED IN DOUBLE PRECISION
   5196 #--WHILE A1 AND A2 ARE IN DOUBLE-EXTENDED FORMAT.
   5197 SINPOLY:
   5198 	fmovm.x		&0x0c,-(%sp)		# save fp2/fp3
   5199 
   5200 	fmov.x		%fp0,X(%a6)		# X IS R
   5201 	fmul.x		%fp0,%fp0		# FP0 IS S
   5202 
   5203 	fmov.d		SINA7(%pc),%fp3
   5204 	fmov.d		SINA6(%pc),%fp2
   5205 
   5206 	fmov.x		%fp0,%fp1
   5207 	fmul.x		%fp1,%fp1		# FP1 IS T
   5208 
   5209 	ror.l		&1,%d1
   5210 	and.l		&0x80000000,%d1
   5211 # ...LEAST SIG. BIT OF D0 IN SIGN POSITION
   5212 	eor.l		%d1,X(%a6)		# X IS NOW R'= SGN*R
   5213 
   5214 	fmul.x		%fp1,%fp3		# TA7
   5215 	fmul.x		%fp1,%fp2		# TA6
   5216 
   5217 	fadd.d		SINA5(%pc),%fp3		# A5+TA7
   5218 	fadd.d		SINA4(%pc),%fp2		# A4+TA6
   5219 
   5220 	fmul.x		%fp1,%fp3		# T(A5+TA7)
   5221 	fmul.x		%fp1,%fp2		# T(A4+TA6)
   5222 
   5223 	fadd.d		SINA3(%pc),%fp3		# A3+T(A5+TA7)
   5224 	fadd.x		SINA2(%pc),%fp2		# A2+T(A4+TA6)
   5225 
   5226 	fmul.x		%fp3,%fp1		# T(A3+T(A5+TA7))
   5227 
   5228 	fmul.x		%fp0,%fp2		# S(A2+T(A4+TA6))
   5229 	fadd.x		SINA1(%pc),%fp1		# A1+T(A3+T(A5+TA7))
   5230 	fmul.x		X(%a6),%fp0		# R'*S
   5231 
   5232 	fadd.x		%fp2,%fp1		# [A1+T(A3+T(A5+TA7))]+[S(A2+T(A4+TA6))]
   5233 
   5234 	fmul.x		%fp1,%fp0		# SIN(R')-R'
   5235 
   5236 	fmovm.x		(%sp)+,&0x30		# restore fp2/fp3
   5237 
   5238 	fmov.l		%d0,%fpcr		# restore users round mode,prec
   5239 	fadd.x		X(%a6),%fp0		# last inst - possible exception set
   5240 	bra		t_inx2
   5241 
   5242 #--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
   5243 #--THEN WE RETURN	SGN*COS(R). SGN*COS(R) IS COMPUTED BY
   5244 #--SGN + S'*(B1 + S(B2 + S(B3 + S(B4 + ... + SB8)))), WHERE
   5245 #--S=R*R AND S'=SGN*S. THIS CAN BE REWRITTEN AS
   5246 #--SGN + S'*([B1+T(B3+T(B5+TB7))] + [S(B2+T(B4+T(B6+TB8)))])
   5247 #--WHERE T=S*S.
   5248 #--NOTE THAT B4 THROUGH B8 ARE STORED IN DOUBLE PRECISION
   5249 #--WHILE B2 AND B3 ARE IN DOUBLE-EXTENDED FORMAT, B1 IS -1/2
   5250 #--AND IS THEREFORE STORED AS SINGLE PRECISION.
   5251 COSPOLY:
   5252 	fmovm.x		&0x0c,-(%sp)		# save fp2/fp3
   5253 
   5254 	fmul.x		%fp0,%fp0		# FP0 IS S
   5255 
   5256 	fmov.d		COSB8(%pc),%fp2
   5257 	fmov.d		COSB7(%pc),%fp3
   5258 
   5259 	fmov.x		%fp0,%fp1
   5260 	fmul.x		%fp1,%fp1		# FP1 IS T
   5261 
   5262 	fmov.x		%fp0,X(%a6)		# X IS S
   5263 	ror.l		&1,%d1
   5264 	and.l		&0x80000000,%d1
   5265 # ...LEAST SIG. BIT OF D0 IN SIGN POSITION
   5266 
   5267 	fmul.x		%fp1,%fp2		# TB8
   5268 
   5269 	eor.l		%d1,X(%a6)		# X IS NOW S'= SGN*S
   5270 	and.l		&0x80000000,%d1
   5271 
   5272 	fmul.x		%fp1,%fp3		# TB7
   5273 
   5274 	or.l		&0x3F800000,%d1		# D0 IS SGN IN SINGLE
   5275 	mov.l		%d1,POSNEG1(%a6)
   5276 
   5277 	fadd.d		COSB6(%pc),%fp2		# B6+TB8
   5278 	fadd.d		COSB5(%pc),%fp3		# B5+TB7
   5279 
   5280 	fmul.x		%fp1,%fp2		# T(B6+TB8)
   5281 	fmul.x		%fp1,%fp3		# T(B5+TB7)
   5282 
   5283 	fadd.d		COSB4(%pc),%fp2		# B4+T(B6+TB8)
   5284 	fadd.x		COSB3(%pc),%fp3		# B3+T(B5+TB7)
   5285 
   5286 	fmul.x		%fp1,%fp2		# T(B4+T(B6+TB8))
   5287 	fmul.x		%fp3,%fp1		# T(B3+T(B5+TB7))
   5288 
   5289 	fadd.x		COSB2(%pc),%fp2		# B2+T(B4+T(B6+TB8))
   5290 	fadd.s		COSB1(%pc),%fp1		# B1+T(B3+T(B5+TB7))
   5291 
   5292 	fmul.x		%fp2,%fp0		# S(B2+T(B4+T(B6+TB8)))
   5293 
   5294 	fadd.x		%fp1,%fp0
   5295 
   5296 	fmul.x		X(%a6),%fp0
   5297 
   5298 	fmovm.x		(%sp)+,&0x30		# restore fp2/fp3
   5299 
   5300 	fmov.l		%d0,%fpcr		# restore users round mode,prec
   5301 	fadd.s		POSNEG1(%a6),%fp0	# last inst - possible exception set
   5302 	bra		t_inx2
   5303 
   5304 ##############################################
   5305 
   5306 # SINe: Big OR Small?
   5307 #--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
   5308 #--IF |X| < 2**(-40), RETURN X OR 1.
   5309 SINBORS:
   5310 	cmp.l		%d1,&0x3FFF8000
   5311 	bgt.l		SREDUCEX
   5312 
   5313 SINSM:
   5314 	mov.l		ADJN(%a6),%d1
   5315 	cmp.l		%d1,&0
   5316 	bgt.b		COSTINY
   5317 
   5318 # here, the operation may underflow iff the precision is sgl or dbl.
   5319 # extended denorms are handled through another entry point.
   5320 SINTINY:
   5321 #	mov.w		&0x0000,XDCARE(%a6)	# JUST IN CASE
   5322 
   5323 	fmov.l		%d0,%fpcr		# restore users round mode,prec
   5324 	mov.b		&FMOV_OP,%d1		# last inst is MOVE
   5325 	fmov.x		X(%a6),%fp0		# last inst - possible exception set
   5326 	bra		t_catch
   5327 
   5328 COSTINY:
   5329 	fmov.s		&0x3F800000,%fp0	# fp0 = 1.0
   5330 	fmov.l		%d0,%fpcr		# restore users round mode,prec
   5331 	fadd.s 		&0x80800000,%fp0	# last inst - possible exception set
   5332 	bra		t_pinx2
   5333 
   5334 ################################################
   5335 	global		ssind
   5336 #--SIN(X) = X FOR DENORMALIZED X
   5337 ssind:
   5338 	bra		t_extdnrm
   5339 
   5340 ############################################
   5341 	global		scosd
   5342 #--COS(X) = 1 FOR DENORMALIZED X
   5343 scosd:
   5344 	fmov.s		&0x3F800000,%fp0	# fp0 = 1.0
   5345 	bra		t_pinx2
   5346 
   5347 ##################################################
   5348 
   5349 	global		ssincos
   5350 ssincos:
   5351 #--SET ADJN TO 4
   5352 	mov.l		&4,ADJN(%a6)
   5353 
   5354 	fmov.x		(%a0),%fp0		# LOAD INPUT
   5355 	fmov.x		%fp0,X(%a6)
   5356 
   5357 	mov.l		(%a0),%d1
   5358 	mov.w		4(%a0),%d1
   5359 	and.l		&0x7FFFFFFF,%d1		# COMPACTIFY X
   5360 
   5361 	cmp.l		%d1,&0x3FD78000		# |X| >= 2**(-40)?
   5362 	bge.b		SCOK1
   5363 	bra.w		SCSM
   5364 
   5365 SCOK1:
   5366 	cmp.l		%d1,&0x4004BC7E		# |X| < 15 PI?
   5367 	blt.b		SCMAIN
   5368 	bra.w		SREDUCEX
   5369 
   5370 
   5371 #--THIS IS THE USUAL CASE, |X| <= 15 PI.
   5372 #--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
   5373 SCMAIN:
   5374 	fmov.x		%fp0,%fp1
   5375 
   5376 	fmul.d		TWOBYPI(%pc),%fp1	# X*2/PI
   5377 
   5378 	lea		PITBL+0x200(%pc),%a1	# TABLE OF N*PI/2, N = -32,...,32
   5379 
   5380 	fmov.l		%fp1,INT(%a6)		# CONVERT TO INTEGER
   5381 
   5382 	mov.l		INT(%a6),%d1
   5383 	asl.l		&4,%d1
   5384 	add.l		%d1,%a1			# ADDRESS OF N*PIBY2, IN Y1, Y2
   5385 
   5386 	fsub.x		(%a1)+,%fp0		# X-Y1
   5387 	fsub.s		(%a1),%fp0		# FP0 IS R = (X-Y1)-Y2
   5388 
   5389 SCCONT:
   5390 #--continuation point from REDUCEX
   5391 
   5392 	mov.l		INT(%a6),%d1
   5393 	ror.l		&1,%d1
   5394 	cmp.l		%d1,&0			# D0 < 0 IFF N IS ODD
   5395 	bge.w		NEVEN
   5396 
   5397 SNODD:
   5398 #--REGISTERS SAVED SO FAR: D0, A0, FP2.
   5399 	fmovm.x		&0x04,-(%sp)		# save fp2
   5400 
   5401 	fmov.x		%fp0,RPRIME(%a6)
   5402 	fmul.x		%fp0,%fp0		# FP0 IS S = R*R
   5403 	fmov.d		SINA7(%pc),%fp1		# A7
   5404 	fmov.d		COSB8(%pc),%fp2		# B8
   5405 	fmul.x		%fp0,%fp1		# SA7
   5406 	fmul.x		%fp0,%fp2		# SB8
   5407 
   5408 	mov.l		%d2,-(%sp)
   5409 	mov.l		%d1,%d2
   5410 	ror.l		&1,%d2
   5411 	and.l		&0x80000000,%d2
   5412 	eor.l		%d1,%d2
   5413 	and.l		&0x80000000,%d2
   5414 
   5415 	fadd.d		SINA6(%pc),%fp1		# A6+SA7
   5416 	fadd.d		COSB7(%pc),%fp2		# B7+SB8
   5417 
   5418 	fmul.x		%fp0,%fp1		# S(A6+SA7)
   5419 	eor.l		%d2,RPRIME(%a6)
   5420 	mov.l		(%sp)+,%d2
   5421 	fmul.x		%fp0,%fp2		# S(B7+SB8)
   5422 	ror.l		&1,%d1
   5423 	and.l		&0x80000000,%d1
   5424 	mov.l		&0x3F800000,POSNEG1(%a6)
   5425 	eor.l		%d1,POSNEG1(%a6)
   5426 
   5427 	fadd.d		SINA5(%pc),%fp1		# A5+S(A6+SA7)
   5428 	fadd.d		COSB6(%pc),%fp2		# B6+S(B7+SB8)
   5429 
   5430 	fmul.x		%fp0,%fp1		# S(A5+S(A6+SA7))
   5431 	fmul.x		%fp0,%fp2		# S(B6+S(B7+SB8))
   5432 	fmov.x		%fp0,SPRIME(%a6)
   5433 
   5434 	fadd.d		SINA4(%pc),%fp1		# A4+S(A5+S(A6+SA7))
   5435 	eor.l		%d1,SPRIME(%a6)
   5436 	fadd.d		COSB5(%pc),%fp2		# B5+S(B6+S(B7+SB8))
   5437 
   5438 	fmul.x		%fp0,%fp1		# S(A4+...)
   5439 	fmul.x		%fp0,%fp2		# S(B5+...)
   5440 
   5441 	fadd.d		SINA3(%pc),%fp1		# A3+S(A4+...)
   5442 	fadd.d		COSB4(%pc),%fp2		# B4+S(B5+...)
   5443 
   5444 	fmul.x		%fp0,%fp1		# S(A3+...)
   5445 	fmul.x		%fp0,%fp2		# S(B4+...)
   5446 
   5447 	fadd.x		SINA2(%pc),%fp1		# A2+S(A3+...)
   5448 	fadd.x		COSB3(%pc),%fp2		# B3+S(B4+...)
   5449 
   5450 	fmul.x		%fp0,%fp1		# S(A2+...)
   5451 	fmul.x		%fp0,%fp2		# S(B3+...)
   5452 
   5453 	fadd.x		SINA1(%pc),%fp1		# A1+S(A2+...)
   5454 	fadd.x		COSB2(%pc),%fp2		# B2+S(B3+...)
   5455 
   5456 	fmul.x		%fp0,%fp1		# S(A1+...)
   5457 	fmul.x		%fp2,%fp0		# S(B2+...)
   5458 
   5459 	fmul.x		RPRIME(%a6),%fp1	# R'S(A1+...)
   5460 	fadd.s		COSB1(%pc),%fp0		# B1+S(B2...)
   5461 	fmul.x		SPRIME(%a6),%fp0	# S'(B1+S(B2+...))
   5462 
   5463 	fmovm.x		(%sp)+,&0x20		# restore fp2
   5464 
   5465 	fmov.l		%d0,%fpcr
   5466 	fadd.x		RPRIME(%a6),%fp1	# COS(X)
   5467 	bsr		sto_cos			# store cosine result
   5468 	fadd.s		POSNEG1(%a6),%fp0	# SIN(X)
   5469 	bra		t_inx2
   5470 
   5471 NEVEN:
   5472 #--REGISTERS SAVED SO FAR: FP2.
   5473 	fmovm.x		&0x04,-(%sp)		# save fp2
   5474 
   5475 	fmov.x		%fp0,RPRIME(%a6)
   5476 	fmul.x		%fp0,%fp0		# FP0 IS S = R*R
   5477 
   5478 	fmov.d		COSB8(%pc),%fp1		# B8
   5479 	fmov.d		SINA7(%pc),%fp2		# A7
   5480 
   5481 	fmul.x		%fp0,%fp1		# SB8
   5482 	fmov.x		%fp0,SPRIME(%a6)
   5483 	fmul.x		%fp0,%fp2		# SA7
   5484 
   5485 	ror.l		&1,%d1
   5486 	and.l		&0x80000000,%d1
   5487 
   5488 	fadd.d		COSB7(%pc),%fp1		# B7+SB8
   5489 	fadd.d		SINA6(%pc),%fp2		# A6+SA7
   5490 
   5491 	eor.l		%d1,RPRIME(%a6)
   5492 	eor.l		%d1,SPRIME(%a6)
   5493 
   5494 	fmul.x		%fp0,%fp1		# S(B7+SB8)
   5495 
   5496 	or.l		&0x3F800000,%d1
   5497 	mov.l		%d1,POSNEG1(%a6)
   5498 
   5499 	fmul.x		%fp0,%fp2		# S(A6+SA7)
   5500 
   5501 	fadd.d		COSB6(%pc),%fp1		# B6+S(B7+SB8)
   5502 	fadd.d		SINA5(%pc),%fp2		# A5+S(A6+SA7)
   5503 
   5504 	fmul.x		%fp0,%fp1		# S(B6+S(B7+SB8))
   5505 	fmul.x		%fp0,%fp2		# S(A5+S(A6+SA7))
   5506 
   5507 	fadd.d		COSB5(%pc),%fp1		# B5+S(B6+S(B7+SB8))
   5508 	fadd.d		SINA4(%pc),%fp2		# A4+S(A5+S(A6+SA7))
   5509 
   5510 	fmul.x		%fp0,%fp1		# S(B5+...)
   5511 	fmul.x		%fp0,%fp2		# S(A4+...)
   5512 
   5513 	fadd.d		COSB4(%pc),%fp1		# B4+S(B5+...)
   5514 	fadd.d		SINA3(%pc),%fp2		# A3+S(A4+...)
   5515 
   5516 	fmul.x		%fp0,%fp1		# S(B4+...)
   5517 	fmul.x		%fp0,%fp2		# S(A3+...)
   5518 
   5519 	fadd.x		COSB3(%pc),%fp1		# B3+S(B4+...)
   5520 	fadd.x		SINA2(%pc),%fp2		# A2+S(A3+...)
   5521 
   5522 	fmul.x		%fp0,%fp1		# S(B3+...)
   5523 	fmul.x		%fp0,%fp2		# S(A2+...)
   5524 
   5525 	fadd.x		COSB2(%pc),%fp1		# B2+S(B3+...)
   5526 	fadd.x		SINA1(%pc),%fp2		# A1+S(A2+...)
   5527 
   5528 	fmul.x		%fp0,%fp1		# S(B2+...)
   5529 	fmul.x		%fp2,%fp0		# s(a1+...)
   5530 
   5531 
   5532 	fadd.s		COSB1(%pc),%fp1		# B1+S(B2...)
   5533 	fmul.x		RPRIME(%a6),%fp0	# R'S(A1+...)
   5534 	fmul.x		SPRIME(%a6),%fp1	# S'(B1+S(B2+...))
   5535 
   5536 	fmovm.x		(%sp)+,&0x20		# restore fp2
   5537 
   5538 	fmov.l		%d0,%fpcr
   5539 	fadd.s		POSNEG1(%a6),%fp1	# COS(X)
   5540 	bsr		sto_cos			# store cosine result
   5541 	fadd.x		RPRIME(%a6),%fp0	# SIN(X)
   5542 	bra		t_inx2
   5543 
   5544 ################################################
   5545 
   5546 SCBORS:
   5547 	cmp.l		%d1,&0x3FFF8000
   5548 	bgt.w		SREDUCEX
   5549 
   5550 ################################################
   5551 
   5552 SCSM:
   5553 #	mov.w		&0x0000,XDCARE(%a6)
   5554 	fmov.s		&0x3F800000,%fp1
   5555 
   5556 	fmov.l		%d0,%fpcr
   5557 	fsub.s		&0x00800000,%fp1
   5558 	bsr		sto_cos			# store cosine result
   5559 	fmov.l		%fpcr,%d0		# d0 must have fpcr,too
   5560 	mov.b		&FMOV_OP,%d1		# last inst is MOVE
   5561 	fmov.x		X(%a6),%fp0
   5562 	bra		t_catch
   5563 
   5564 ##############################################
   5565 
   5566 	global		ssincosd
   5567 #--SIN AND COS OF X FOR DENORMALIZED X
   5568 ssincosd:
   5569 	mov.l		%d0,-(%sp)		# save d0
   5570 	fmov.s		&0x3F800000,%fp1
   5571 	bsr		sto_cos			# store cosine result
   5572 	mov.l		(%sp)+,%d0		# restore d0
   5573 	bra		t_extdnrm
   5574 
   5575 ############################################
   5576 
   5577 #--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
   5578 #--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
   5579 #--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.
   5580 SREDUCEX:
   5581 	fmovm.x		&0x3c,-(%sp)		# save {fp2-fp5}
   5582 	mov.l		%d2,-(%sp)		# save d2
   5583 	fmov.s		&0x00000000,%fp1	# fp1 = 0
   5584 
   5585 #--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that
   5586 #--there is a danger of unwanted overflow in first LOOP iteration.  In this
   5587 #--case, reduce argument by one remainder step to make subsequent reduction
   5588 #--safe.
   5589 	cmp.l		%d1,&0x7ffeffff		# is arg dangerously large?
   5590 	bne.b		SLOOP			# no
   5591 
   5592 # yes; create 2**16383*PI/2
   5593 	mov.w		&0x7ffe,FP_SCR0_EX(%a6)
   5594 	mov.l		&0xc90fdaa2,FP_SCR0_HI(%a6)
   5595 	clr.l		FP_SCR0_LO(%a6)
   5596 
   5597 # create low half of 2**16383*PI/2 at FP_SCR1
   5598 	mov.w		&0x7fdc,FP_SCR1_EX(%a6)
   5599 	mov.l		&0x85a308d3,FP_SCR1_HI(%a6)
   5600 	clr.l		FP_SCR1_LO(%a6)
   5601 
   5602 	ftest.x		%fp0			# test sign of argument
   5603 	fblt.w		sred_neg
   5604 
   5605 	or.b		&0x80,FP_SCR0_EX(%a6)	# positive arg
   5606 	or.b		&0x80,FP_SCR1_EX(%a6)
   5607 sred_neg:
   5608 	fadd.x		FP_SCR0(%a6),%fp0	# high part of reduction is exact
   5609 	fmov.x		%fp0,%fp1		# save high result in fp1
   5610 	fadd.x		FP_SCR1(%a6),%fp0	# low part of reduction
   5611 	fsub.x		%fp0,%fp1		# determine low component of result
   5612 	fadd.x		FP_SCR1(%a6),%fp1	# fp0/fp1 are reduced argument.
   5613 
   5614 #--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.
   5615 #--integer quotient will be stored in N
   5616 #--Intermeditate remainder is 66-bit long; (R,r) in (FP0,FP1)
   5617 SLOOP:
   5618 	fmov.x		%fp0,INARG(%a6)		# +-2**K * F, 1 <= F < 2
   5619 	mov.w		INARG(%a6),%d1
   5620 	mov.l		%d1,%a1			# save a copy of D0
   5621 	and.l		&0x00007FFF,%d1
   5622 	sub.l		&0x00003FFF,%d1		# d0 = K
   5623 	cmp.l		%d1,&28
   5624 	ble.b		SLASTLOOP
   5625 SCONTLOOP:
   5626 	sub.l		&27,%d1			# d0 = L := K-27
   5627 	mov.b		&0,ENDFLAG(%a6)
   5628 	bra.b		SWORK
   5629 SLASTLOOP:
   5630 	clr.l		%d1			# d0 = L := 0
   5631 	mov.b		&1,ENDFLAG(%a6)
   5632 
   5633 SWORK:
   5634 #--FIND THE REMAINDER OF (R,r) W.R.T.	2**L * (PI/2). L IS SO CHOSEN
   5635 #--THAT	INT( X * (2/PI) / 2**(L) ) < 2**29.
   5636 
   5637 #--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),
   5638 #--2**L * (PIby2_1), 2**L * (PIby2_2)
   5639 
   5640 	mov.l		&0x00003FFE,%d2		# BIASED EXP OF 2/PI
   5641 	sub.l		%d1,%d2			# BIASED EXP OF 2**(-L)*(2/PI)
   5642 
   5643 	mov.l		&0xA2F9836E,FP_SCR0_HI(%a6)
   5644 	mov.l		&0x4E44152A,FP_SCR0_LO(%a6)
   5645 	mov.w		%d2,FP_SCR0_EX(%a6)	# FP_SCR0 = 2**(-L)*(2/PI)
   5646 
   5647 	fmov.x		%fp0,%fp2
   5648 	fmul.x		FP_SCR0(%a6),%fp2	# fp2 = X * 2**(-L)*(2/PI)
   5649 
   5650 #--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
   5651 #--FLOATING POINT FORMAT, THE TWO FMOVE'S	FMOVE.L FP <--> N
   5652 #--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
   5653 #--(SIGN(INARG)*2**63	+	FP2) - SIGN(INARG)*2**63 WILL GIVE
   5654 #--US THE DESIRED VALUE IN FLOATING POINT.
   5655 	mov.l		%a1,%d2
   5656 	swap		%d2
   5657 	and.l		&0x80000000,%d2
   5658 	or.l		&0x5F000000,%d2		# d2 = SIGN(INARG)*2**63 IN SGL
   5659 	mov.l		%d2,TWOTO63(%a6)
   5660 	fadd.s		TWOTO63(%a6),%fp2	# THE FRACTIONAL PART OF FP1 IS ROUNDED
   5661 	fsub.s		TWOTO63(%a6),%fp2	# fp2 = N
   5662 #	fint.x		%fp2
   5663 
   5664 #--CREATING 2**(L)*Piby2_1 and 2**(L)*Piby2_2
   5665 	mov.l		%d1,%d2			# d2 = L
   5666 
   5667 	add.l		&0x00003FFF,%d2		# BIASED EXP OF 2**L * (PI/2)
   5668 	mov.w		%d2,FP_SCR0_EX(%a6)
   5669 	mov.l		&0xC90FDAA2,FP_SCR0_HI(%a6)
   5670 	clr.l		FP_SCR0_LO(%a6)		# FP_SCR0 = 2**(L) * Piby2_1
   5671 
   5672 	add.l		&0x00003FDD,%d1
   5673 	mov.w		%d1,FP_SCR1_EX(%a6)
   5674 	mov.l		&0x85A308D3,FP_SCR1_HI(%a6)
   5675 	clr.l		FP_SCR1_LO(%a6)		# FP_SCR1 = 2**(L) * Piby2_2
   5676 
   5677 	mov.b		ENDFLAG(%a6),%d1
   5678 
   5679 #--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and
   5680 #--P2 = 2**(L) * Piby2_2
   5681 	fmov.x		%fp2,%fp4		# fp4 = N
   5682 	fmul.x		FP_SCR0(%a6),%fp4	# fp4 = W = N*P1
   5683 	fmov.x		%fp2,%fp5		# fp5 = N
   5684 	fmul.x		FP_SCR1(%a6),%fp5	# fp5 = w = N*P2
   5685 	fmov.x		%fp4,%fp3		# fp3 = W = N*P1
   5686 
   5687 #--we want P+p = W+w  but  |p| <= half ulp of P
   5688 #--Then, we need to compute  A := R-P   and  a := r-p
   5689 	fadd.x		%fp5,%fp3		# fp3 = P
   5690 	fsub.x		%fp3,%fp4		# fp4 = W-P
   5691 
   5692 	fsub.x		%fp3,%fp0		# fp0 = A := R - P
   5693 	fadd.x		%fp5,%fp4		# fp4 = p = (W-P)+w
   5694 
   5695 	fmov.x		%fp0,%fp3		# fp3 = A
   5696 	fsub.x		%fp4,%fp1		# fp1 = a := r - p
   5697 
   5698 #--Now we need to normalize (A,a) to  "new (R,r)" where R+r = A+a but
   5699 #--|r| <= half ulp of R.
   5700 	fadd.x		%fp1,%fp0		# fp0 = R := A+a
   5701 #--No need to calculate r if this is the last loop
   5702 	cmp.b		%d1,&0
   5703 	bgt.w		SRESTORE
   5704 
   5705 #--Need to calculate r
   5706 	fsub.x		%fp0,%fp3		# fp3 = A-R
   5707 	fadd.x		%fp3,%fp1		# fp1 = r := (A-R)+a
   5708 	bra.w		SLOOP
   5709 
   5710 SRESTORE:
   5711 	fmov.l		%fp2,INT(%a6)
   5712 	mov.l		(%sp)+,%d2		# restore d2
   5713 	fmovm.x		(%sp)+,&0x3c		# restore {fp2-fp5}
   5714 
   5715 	mov.l		ADJN(%a6),%d1
   5716 	cmp.l		%d1,&4
   5717 
   5718 	blt.w		SINCONT
   5719 	bra.w		SCCONT
   5720 
   5721 #########################################################################
   5722 # stan():  computes the tangent of a normalized input			#
   5723 # stand(): computes the tangent of a denormalized input			#
   5724 #									#
   5725 # INPUT *************************************************************** #
   5726 #	a0 = pointer to extended precision input			#
   5727 #	d0 = round precision,mode					#
   5728 #									#
   5729 # OUTPUT ************************************************************** #
   5730 #	fp0 = tan(X)							#
   5731 #									#
   5732 # ACCURACY and MONOTONICITY ******************************************* #
   5733 #	The returned result is within 3 ulp in 64 significant bit, i.e. #
   5734 #	within 0.5001 ulp to 53 bits if the result is subsequently	#
   5735 #	rounded to double precision. The result is provably monotonic	#
   5736 #	in double precision.						#
   5737 #									#
   5738 # ALGORITHM *********************************************************** #
   5739 #									#
   5740 #	1. If |X| >= 15Pi or |X| < 2**(-40), go to 6.			#
   5741 #									#
   5742 #	2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let	#
   5743 #		k = N mod 2, so in particular, k = 0 or 1.		#
   5744 #									#
   5745 #	3. If k is odd, go to 5.					#
   5746 #									#
   5747 #	4. (k is even) Tan(X) = tan(r) and tan(r) is approximated by a	#
   5748 #		rational function U/V where				#
   5749 #		U = r + r*s*(P1 + s*(P2 + s*P3)), and			#
   5750 #		V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))),  s = r*r.	#
   5751 #		Exit.							#
   5752 #									#
   5753 #	4. (k is odd) Tan(X) = -cot(r). Since tan(r) is approximated by #
   5754 #		a rational function U/V where				#
   5755 #		U = r + r*s*(P1 + s*(P2 + s*P3)), and			#
   5756 #		V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r,	#
   5757 #		-Cot(r) = -V/U. Exit.					#
   5758 #									#
   5759 #	6. If |X| > 1, go to 8.						#
   5760 #									#
   5761 #	7. (|X|<2**(-40)) Tan(X) = X. Exit.				#
   5762 #									#
   5763 #	8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back 	#
   5764 #		to 2.							#
   5765 #									#
   5766 #########################################################################
   5767 
   5768 TANQ4:
   5769 	long		0x3EA0B759,0xF50F8688
   5770 TANP3:
   5771 	long		0xBEF2BAA5,0xA8924F04
   5772 
   5773 TANQ3:
   5774 	long		0xBF346F59,0xB39BA65F,0x00000000,0x00000000
   5775 
   5776 TANP2:
   5777 	long		0x3FF60000,0xE073D3FC,0x199C4A00,0x00000000
   5778 
   5779 TANQ2:
   5780 	long		0x3FF90000,0xD23CD684,0x15D95FA1,0x00000000
   5781 
   5782 TANP1:
   5783 	long		0xBFFC0000,0x8895A6C5,0xFB423BCA,0x00000000
   5784 
   5785 TANQ1:
   5786 	long		0xBFFD0000,0xEEF57E0D,0xA84BC8CE,0x00000000
   5787 
   5788 INVTWOPI:
   5789 	long		0x3FFC0000,0xA2F9836E,0x4E44152A,0x00000000
   5790 
   5791 TWOPI1:
   5792 	long		0x40010000,0xC90FDAA2,0x00000000,0x00000000
   5793 TWOPI2:
   5794 	long		0x3FDF0000,0x85A308D4,0x00000000,0x00000000
   5795 
   5796 #--N*PI/2, -32 <= N <= 32, IN A LEADING TERM IN EXT. AND TRAILING
   5797 #--TERM IN SGL. NOTE THAT PI IS 64-BIT LONG, THUS N*PI/2 IS AT
   5798 #--MOST 69 BITS LONG.
   5799 #	global		PITBL
   5800 PITBL:
   5801 	long		0xC0040000,0xC90FDAA2,0x2168C235,0x21800000
   5802 	long		0xC0040000,0xC2C75BCD,0x105D7C23,0xA0D00000
   5803 	long		0xC0040000,0xBC7EDCF7,0xFF523611,0xA1E80000
   5804 	long		0xC0040000,0xB6365E22,0xEE46F000,0x21480000
   5805 	long		0xC0040000,0xAFEDDF4D,0xDD3BA9EE,0xA1200000
   5806 	long		0xC0040000,0xA9A56078,0xCC3063DD,0x21FC0000
   5807 	long		0xC0040000,0xA35CE1A3,0xBB251DCB,0x21100000
   5808 	long		0xC0040000,0x9D1462CE,0xAA19D7B9,0xA1580000
   5809 	long		0xC0040000,0x96CBE3F9,0x990E91A8,0x21E00000
   5810 	long		0xC0040000,0x90836524,0x88034B96,0x20B00000
   5811 	long		0xC0040000,0x8A3AE64F,0x76F80584,0xA1880000
   5812 	long		0xC0040000,0x83F2677A,0x65ECBF73,0x21C40000
   5813 	long		0xC0030000,0xFB53D14A,0xA9C2F2C2,0x20000000
   5814 	long		0xC0030000,0xEEC2D3A0,0x87AC669F,0x21380000
   5815 	long		0xC0030000,0xE231D5F6,0x6595DA7B,0xA1300000
   5816 	long		0xC0030000,0xD5A0D84C,0x437F4E58,0x9FC00000
   5817 	long		0xC0030000,0xC90FDAA2,0x2168C235,0x21000000
   5818 	long		0xC0030000,0xBC7EDCF7,0xFF523611,0xA1680000
   5819 	long		0xC0030000,0xAFEDDF4D,0xDD3BA9EE,0xA0A00000
   5820 	long		0xC0030000,0xA35CE1A3,0xBB251DCB,0x20900000
   5821 	long		0xC0030000,0x96CBE3F9,0x990E91A8,0x21600000
   5822 	long		0xC0030000,0x8A3AE64F,0x76F80584,0xA1080000
   5823 	long		0xC0020000,0xFB53D14A,0xA9C2F2C2,0x1F800000
   5824 	long		0xC0020000,0xE231D5F6,0x6595DA7B,0xA0B00000
   5825 	long		0xC0020000,0xC90FDAA2,0x2168C235,0x20800000
   5826 	long		0xC0020000,0xAFEDDF4D,0xDD3BA9EE,0xA0200000
   5827 	long		0xC0020000,0x96CBE3F9,0x990E91A8,0x20E00000
   5828 	long		0xC0010000,0xFB53D14A,0xA9C2F2C2,0x1F000000
   5829 	long		0xC0010000,0xC90FDAA2,0x2168C235,0x20000000
   5830 	long		0xC0010000,0x96CBE3F9,0x990E91A8,0x20600000
   5831 	long		0xC0000000,0xC90FDAA2,0x2168C235,0x1F800000
   5832 	long		0xBFFF0000,0xC90FDAA2,0x2168C235,0x1F000000
   5833 	long		0x00000000,0x00000000,0x00000000,0x00000000
   5834 	long		0x3FFF0000,0xC90FDAA2,0x2168C235,0x9F000000
   5835 	long		0x40000000,0xC90FDAA2,0x2168C235,0x9F800000
   5836 	long		0x40010000,0x96CBE3F9,0x990E91A8,0xA0600000
   5837 	long		0x40010000,0xC90FDAA2,0x2168C235,0xA0000000
   5838 	long		0x40010000,0xFB53D14A,0xA9C2F2C2,0x9F000000
   5839 	long		0x40020000,0x96CBE3F9,0x990E91A8,0xA0E00000
   5840 	long		0x40020000,0xAFEDDF4D,0xDD3BA9EE,0x20200000
   5841 	long		0x40020000,0xC90FDAA2,0x2168C235,0xA0800000
   5842 	long		0x40020000,0xE231D5F6,0x6595DA7B,0x20B00000
   5843 	long		0x40020000,0xFB53D14A,0xA9C2F2C2,0x9F800000
   5844 	long		0x40030000,0x8A3AE64F,0x76F80584,0x21080000
   5845 	long		0x40030000,0x96CBE3F9,0x990E91A8,0xA1600000
   5846 	long		0x40030000,0xA35CE1A3,0xBB251DCB,0xA0900000
   5847 	long		0x40030000,0xAFEDDF4D,0xDD3BA9EE,0x20A00000
   5848 	long		0x40030000,0xBC7EDCF7,0xFF523611,0x21680000
   5849 	long		0x40030000,0xC90FDAA2,0x2168C235,0xA1000000
   5850 	long		0x40030000,0xD5A0D84C,0x437F4E58,0x1FC00000
   5851 	long		0x40030000,0xE231D5F6,0x6595DA7B,0x21300000
   5852 	long		0x40030000,0xEEC2D3A0,0x87AC669F,0xA1380000
   5853 	long		0x40030000,0xFB53D14A,0xA9C2F2C2,0xA0000000
   5854 	long		0x40040000,0x83F2677A,0x65ECBF73,0xA1C40000
   5855 	long		0x40040000,0x8A3AE64F,0x76F80584,0x21880000
   5856 	long		0x40040000,0x90836524,0x88034B96,0xA0B00000
   5857 	long		0x40040000,0x96CBE3F9,0x990E91A8,0xA1E00000
   5858 	long		0x40040000,0x9D1462CE,0xAA19D7B9,0x21580000
   5859 	long		0x40040000,0xA35CE1A3,0xBB251DCB,0xA1100000
   5860 	long		0x40040000,0xA9A56078,0xCC3063DD,0xA1FC0000
   5861 	long		0x40040000,0xAFEDDF4D,0xDD3BA9EE,0x21200000
   5862 	long		0x40040000,0xB6365E22,0xEE46F000,0xA1480000
   5863 	long		0x40040000,0xBC7EDCF7,0xFF523611,0x21E80000
   5864 	long		0x40040000,0xC2C75BCD,0x105D7C23,0x20D00000
   5865 	long		0x40040000,0xC90FDAA2,0x2168C235,0xA1800000
   5866 
   5867 	set		INARG,FP_SCR0
   5868 
   5869 	set		TWOTO63,L_SCR1
   5870 	set		INT,L_SCR1
   5871 	set		ENDFLAG,L_SCR2
   5872 
   5873 	global		stan
   5874 stan:
   5875 	fmov.x		(%a0),%fp0		# LOAD INPUT
   5876 
   5877 	mov.l		(%a0),%d1
   5878 	mov.w		4(%a0),%d1
   5879 	and.l		&0x7FFFFFFF,%d1
   5880 
   5881 	cmp.l		%d1,&0x3FD78000		# |X| >= 2**(-40)?
   5882 	bge.b		TANOK1
   5883 	bra.w		TANSM
   5884 TANOK1:
   5885 	cmp.l		%d1,&0x4004BC7E		# |X| < 15 PI?
   5886 	blt.b		TANMAIN
   5887 	bra.w		REDUCEX
   5888 
   5889 TANMAIN:
   5890 #--THIS IS THE USUAL CASE, |X| <= 15 PI.
   5891 #--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
   5892 	fmov.x		%fp0,%fp1
   5893 	fmul.d		TWOBYPI(%pc),%fp1	# X*2/PI
   5894 
   5895 	lea.l		PITBL+0x200(%pc),%a1	# TABLE OF N*PI/2, N = -32,...,32
   5896 
   5897 	fmov.l		%fp1,%d1		# CONVERT TO INTEGER
   5898 
   5899 	asl.l		&4,%d1
   5900 	add.l		%d1,%a1			# ADDRESS N*PIBY2 IN Y1, Y2
   5901 
   5902 	fsub.x		(%a1)+,%fp0		# X-Y1
   5903 
   5904 	fsub.s		(%a1),%fp0		# FP0 IS R = (X-Y1)-Y2
   5905 
   5906 	ror.l		&5,%d1
   5907 	and.l		&0x80000000,%d1		# D0 WAS ODD IFF D0 < 0
   5908 
   5909 TANCONT:
   5910 	fmovm.x		&0x0c,-(%sp)		# save fp2,fp3
   5911 
   5912 	cmp.l		%d1,&0
   5913 	blt.w		NODD
   5914 
   5915 	fmov.x		%fp0,%fp1
   5916 	fmul.x		%fp1,%fp1		# S = R*R
   5917 
   5918 	fmov.d		TANQ4(%pc),%fp3
   5919 	fmov.d		TANP3(%pc),%fp2
   5920 
   5921 	fmul.x		%fp1,%fp3		# SQ4
   5922 	fmul.x		%fp1,%fp2		# SP3
   5923 
   5924 	fadd.d		TANQ3(%pc),%fp3		# Q3+SQ4
   5925 	fadd.x		TANP2(%pc),%fp2		# P2+SP3
   5926 
   5927 	fmul.x		%fp1,%fp3		# S(Q3+SQ4)
   5928 	fmul.x		%fp1,%fp2		# S(P2+SP3)
   5929 
   5930 	fadd.x		TANQ2(%pc),%fp3		# Q2+S(Q3+SQ4)
   5931 	fadd.x		TANP1(%pc),%fp2		# P1+S(P2+SP3)
   5932 
   5933 	fmul.x		%fp1,%fp3		# S(Q2+S(Q3+SQ4))
   5934 	fmul.x		%fp1,%fp2		# S(P1+S(P2+SP3))
   5935 
   5936 	fadd.x		TANQ1(%pc),%fp3		# Q1+S(Q2+S(Q3+SQ4))
   5937 	fmul.x		%fp0,%fp2		# RS(P1+S(P2+SP3))
   5938 
   5939 	fmul.x		%fp3,%fp1		# S(Q1+S(Q2+S(Q3+SQ4)))
   5940 
   5941 	fadd.x		%fp2,%fp0		# R+RS(P1+S(P2+SP3))
   5942 
   5943 	fadd.s		&0x3F800000,%fp1	# 1+S(Q1+...)
   5944 
   5945 	fmovm.x		(%sp)+,&0x30		# restore fp2,fp3
   5946 
   5947 	fmov.l		%d0,%fpcr		# restore users round mode,prec
   5948 	fdiv.x		%fp1,%fp0		# last inst - possible exception set
   5949 	bra		t_inx2
   5950 
   5951 NODD:
   5952 	fmov.x		%fp0,%fp1
   5953 	fmul.x		%fp0,%fp0		# S = R*R
   5954 
   5955 	fmov.d		TANQ4(%pc),%fp3
   5956 	fmov.d		TANP3(%pc),%fp2
   5957 
   5958 	fmul.x		%fp0,%fp3		# SQ4
   5959 	fmul.x		%fp0,%fp2		# SP3
   5960 
   5961 	fadd.d		TANQ3(%pc),%fp3		# Q3+SQ4
   5962 	fadd.x		TANP2(%pc),%fp2		# P2+SP3
   5963 
   5964 	fmul.x		%fp0,%fp3		# S(Q3+SQ4)
   5965 	fmul.x		%fp0,%fp2		# S(P2+SP3)
   5966 
   5967 	fadd.x		TANQ2(%pc),%fp3		# Q2+S(Q3+SQ4)
   5968 	fadd.x		TANP1(%pc),%fp2		# P1+S(P2+SP3)
   5969 
   5970 	fmul.x		%fp0,%fp3		# S(Q2+S(Q3+SQ4))
   5971 	fmul.x		%fp0,%fp2		# S(P1+S(P2+SP3))
   5972 
   5973 	fadd.x		TANQ1(%pc),%fp3		# Q1+S(Q2+S(Q3+SQ4))
   5974 	fmul.x		%fp1,%fp2		# RS(P1+S(P2+SP3))
   5975 
   5976 	fmul.x		%fp3,%fp0		# S(Q1+S(Q2+S(Q3+SQ4)))
   5977 
   5978 	fadd.x		%fp2,%fp1		# R+RS(P1+S(P2+SP3))
   5979 	fadd.s		&0x3F800000,%fp0	# 1+S(Q1+...)
   5980 
   5981 	fmovm.x		(%sp)+,&0x30		# restore fp2,fp3
   5982 
   5983 	fmov.x		%fp1,-(%sp)
   5984 	eor.l		&0x80000000,(%sp)
   5985 
   5986 	fmov.l		%d0,%fpcr		# restore users round mode,prec
   5987 	fdiv.x		(%sp)+,%fp0		# last inst - possible exception set
   5988 	bra		t_inx2
   5989 
   5990 TANBORS:
   5991 #--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
   5992 #--IF |X| < 2**(-40), RETURN X OR 1.
   5993 	cmp.l		%d1,&0x3FFF8000
   5994 	bgt.b		REDUCEX
   5995 
   5996 TANSM:
   5997 	fmov.x		%fp0,-(%sp)
   5998 	fmov.l		%d0,%fpcr		# restore users round mode,prec
   5999 	mov.b		&FMOV_OP,%d1		# last inst is MOVE
   6000 	fmov.x		(%sp)+,%fp0		# last inst - posibble exception set
   6001 	bra		t_catch
   6002 
   6003 	global		stand
   6004 #--TAN(X) = X FOR DENORMALIZED X
   6005 stand:
   6006 	bra		t_extdnrm
   6007 
   6008 #--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
   6009 #--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
   6010 #--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.
   6011 REDUCEX:
   6012 	fmovm.x		&0x3c,-(%sp)		# save {fp2-fp5}
   6013 	mov.l		%d2,-(%sp)		# save d2
   6014 	fmov.s		&0x00000000,%fp1	# fp1 = 0
   6015 
   6016 #--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that
   6017 #--there is a danger of unwanted overflow in first LOOP iteration.  In this
   6018 #--case, reduce argument by one remainder step to make subsequent reduction
   6019 #--safe.
   6020 	cmp.l		%d1,&0x7ffeffff		# is arg dangerously large?
   6021 	bne.b		LOOP			# no
   6022 
   6023 # yes; create 2**16383*PI/2
   6024 	mov.w		&0x7ffe,FP_SCR0_EX(%a6)
   6025 	mov.l		&0xc90fdaa2,FP_SCR0_HI(%a6)
   6026 	clr.l		FP_SCR0_LO(%a6)
   6027 
   6028 # create low half of 2**16383*PI/2 at FP_SCR1
   6029 	mov.w		&0x7fdc,FP_SCR1_EX(%a6)
   6030 	mov.l		&0x85a308d3,FP_SCR1_HI(%a6)
   6031 	clr.l		FP_SCR1_LO(%a6)
   6032 
   6033 	ftest.x		%fp0			# test sign of argument
   6034 	fblt.w		red_neg
   6035 
   6036 	or.b		&0x80,FP_SCR0_EX(%a6)	# positive arg
   6037 	or.b		&0x80,FP_SCR1_EX(%a6)
   6038 red_neg:
   6039 	fadd.x		FP_SCR0(%a6),%fp0	# high part of reduction is exact
   6040 	fmov.x		%fp0,%fp1		# save high result in fp1
   6041 	fadd.x		FP_SCR1(%a6),%fp0	# low part of reduction
   6042 	fsub.x		%fp0,%fp1		# determine low component of result
   6043 	fadd.x		FP_SCR1(%a6),%fp1	# fp0/fp1 are reduced argument.
   6044 
   6045 #--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.
   6046 #--integer quotient will be stored in N
   6047 #--Intermeditate remainder is 66-bit long; (R,r) in (FP0,FP1)
   6048 LOOP:
   6049 	fmov.x		%fp0,INARG(%a6)		# +-2**K * F, 1 <= F < 2
   6050 	mov.w		INARG(%a6),%d1
   6051 	mov.l		%d1,%a1			# save a copy of D0
   6052 	and.l		&0x00007FFF,%d1
   6053 	sub.l		&0x00003FFF,%d1		# d0 = K
   6054 	cmp.l		%d1,&28
   6055 	ble.b		LASTLOOP
   6056 CONTLOOP:
   6057 	sub.l		&27,%d1			# d0 = L := K-27
   6058 	mov.b		&0,ENDFLAG(%a6)
   6059 	bra.b		WORK
   6060 LASTLOOP:
   6061 	clr.l		%d1			# d0 = L := 0
   6062 	mov.b		&1,ENDFLAG(%a6)
   6063 
   6064 WORK:
   6065 #--FIND THE REMAINDER OF (R,r) W.R.T.	2**L * (PI/2). L IS SO CHOSEN
   6066 #--THAT	INT( X * (2/PI) / 2**(L) ) < 2**29.
   6067 
   6068 #--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),
   6069 #--2**L * (PIby2_1), 2**L * (PIby2_2)
   6070 
   6071 	mov.l		&0x00003FFE,%d2		# BIASED EXP OF 2/PI
   6072 	sub.l		%d1,%d2			# BIASED EXP OF 2**(-L)*(2/PI)
   6073 
   6074 	mov.l		&0xA2F9836E,FP_SCR0_HI(%a6)
   6075 	mov.l		&0x4E44152A,FP_SCR0_LO(%a6)
   6076 	mov.w		%d2,FP_SCR0_EX(%a6)	# FP_SCR0 = 2**(-L)*(2/PI)
   6077 
   6078 	fmov.x		%fp0,%fp2
   6079 	fmul.x		FP_SCR0(%a6),%fp2	# fp2 = X * 2**(-L)*(2/PI)
   6080 
   6081 #--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
   6082 #--FLOATING POINT FORMAT, THE TWO FMOVE'S	FMOVE.L FP <--> N
   6083 #--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
   6084 #--(SIGN(INARG)*2**63	+	FP2) - SIGN(INARG)*2**63 WILL GIVE
   6085 #--US THE DESIRED VALUE IN FLOATING POINT.
   6086 	mov.l		%a1,%d2
   6087 	swap		%d2
   6088 	and.l		&0x80000000,%d2
   6089 	or.l		&0x5F000000,%d2		# d2 = SIGN(INARG)*2**63 IN SGL
   6090 	mov.l		%d2,TWOTO63(%a6)
   6091 	fadd.s		TWOTO63(%a6),%fp2	# THE FRACTIONAL PART OF FP1 IS ROUNDED
   6092 	fsub.s		TWOTO63(%a6),%fp2	# fp2 = N
   6093 #	fintrz.x	%fp2,%fp2
   6094 
   6095 #--CREATING 2**(L)*Piby2_1 and 2**(L)*Piby2_2
   6096 	mov.l		%d1,%d2			# d2 = L
   6097 
   6098 	add.l		&0x00003FFF,%d2		# BIASED EXP OF 2**L * (PI/2)
   6099 	mov.w		%d2,FP_SCR0_EX(%a6)
   6100 	mov.l		&0xC90FDAA2,FP_SCR0_HI(%a6)
   6101 	clr.l		FP_SCR0_LO(%a6)		# FP_SCR0 = 2**(L) * Piby2_1
   6102 
   6103 	add.l		&0x00003FDD,%d1
   6104 	mov.w		%d1,FP_SCR1_EX(%a6)
   6105 	mov.l		&0x85A308D3,FP_SCR1_HI(%a6)
   6106 	clr.l		FP_SCR1_LO(%a6)		# FP_SCR1 = 2**(L) * Piby2_2
   6107 
   6108 	mov.b		ENDFLAG(%a6),%d1
   6109 
   6110 #--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and
   6111 #--P2 = 2**(L) * Piby2_2
   6112 	fmov.x		%fp2,%fp4		# fp4 = N
   6113 	fmul.x		FP_SCR0(%a6),%fp4	# fp4 = W = N*P1
   6114 	fmov.x		%fp2,%fp5		# fp5 = N
   6115 	fmul.x		FP_SCR1(%a6),%fp5	# fp5 = w = N*P2
   6116 	fmov.x		%fp4,%fp3		# fp3 = W = N*P1
   6117 
   6118 #--we want P+p = W+w  but  |p| <= half ulp of P
   6119 #--Then, we need to compute  A := R-P   and  a := r-p
   6120 	fadd.x		%fp5,%fp3		# fp3 = P
   6121 	fsub.x		%fp3,%fp4		# fp4 = W-P
   6122 
   6123 	fsub.x		%fp3,%fp0		# fp0 = A := R - P
   6124 	fadd.x		%fp5,%fp4		# fp4 = p = (W-P)+w
   6125 
   6126 	fmov.x		%fp0,%fp3		# fp3 = A
   6127 	fsub.x		%fp4,%fp1		# fp1 = a := r - p
   6128 
   6129 #--Now we need to normalize (A,a) to  "new (R,r)" where R+r = A+a but
   6130 #--|r| <= half ulp of R.
   6131 	fadd.x		%fp1,%fp0		# fp0 = R := A+a
   6132 #--No need to calculate r if this is the last loop
   6133 	cmp.b		%d1,&0
   6134 	bgt.w		RESTORE
   6135 
   6136 #--Need to calculate r
   6137 	fsub.x		%fp0,%fp3		# fp3 = A-R
   6138 	fadd.x		%fp3,%fp1		# fp1 = r := (A-R)+a
   6139 	bra.w		LOOP
   6140 
   6141 RESTORE:
   6142 	fmov.l		%fp2,INT(%a6)
   6143 	mov.l		(%sp)+,%d2		# restore d2
   6144 	fmovm.x		(%sp)+,&0x3c		# restore {fp2-fp5}
   6145 
   6146 	mov.l		INT(%a6),%d1
   6147 	ror.l		&1,%d1
   6148 
   6149 	bra.w		TANCONT
   6150 
   6151 #########################################################################
   6152 # satan():  computes the arctangent of a normalized number		#
   6153 # satand(): computes the arctangent of a denormalized number		#
   6154 #									#
   6155 # INPUT	*************************************************************** #
   6156 #	a0 = pointer to extended precision input			#
   6157 #	d0 = round precision,mode					#
   6158 #									#
   6159 # OUTPUT ************************************************************** #
   6160 #	fp0 = arctan(X)							#
   6161 #									#
   6162 # ACCURACY and MONOTONICITY ******************************************* #
   6163 #	The returned result is within 2 ulps in	64 significant bit,	#
   6164 #	i.e. within 0.5001 ulp to 53 bits if the result is subsequently	#
   6165 #	rounded to double precision. The result is provably monotonic	#
   6166 #	in double precision. 						#
   6167 #									#
   6168 # ALGORITHM *********************************************************** #
   6169 #	Step 1. If |X| >= 16 or |X| < 1/16, go to Step 5.		#
   6170 #									#
   6171 #	Step 2. Let X = sgn * 2**k * 1.xxxxxxxx...x. 			#
   6172 #		Note that k = -4, -3,..., or 3.				#
   6173 #		Define F = sgn * 2**k * 1.xxxx1, i.e. the first 5 	#
   6174 #		significant bits of X with a bit-1 attached at the 6-th	#
   6175 #		bit position. Define u to be u = (X-F) / (1 + X*F).	#
   6176 #									#
   6177 #	Step 3. Approximate arctan(u) by a polynomial poly.		#
   6178 #									#
   6179 #	Step 4. Return arctan(F) + poly, arctan(F) is fetched from a 	#
   6180 #		table of values calculated beforehand. Exit.		#
   6181 #									#
   6182 #	Step 5. If |X| >= 16, go to Step 7.				#
   6183 #									#
   6184 #	Step 6. Approximate arctan(X) by an odd polynomial in X. Exit.	#
   6185 #									#
   6186 #	Step 7. Define X' = -1/X. Approximate arctan(X') by an odd 	#
   6187 #		polynomial in X'.					#
   6188 #		Arctan(X) = sign(X)*Pi/2 + arctan(X'). Exit.		#
   6189 #									#
   6190 #########################################################################
   6191 
   6192 ATANA3:	long		0xBFF6687E,0x314987D8
   6193 ATANA2:	long		0x4002AC69,0x34A26DB3
   6194 ATANA1:	long		0xBFC2476F,0x4E1DA28E
   6195 
   6196 ATANB6:	long		0x3FB34444,0x7F876989
   6197 ATANB5:	long		0xBFB744EE,0x7FAF45DB
   6198 ATANB4:	long		0x3FBC71C6,0x46940220
   6199 ATANB3:	long		0xBFC24924,0x921872F9
   6200 ATANB2:	long		0x3FC99999,0x99998FA9
   6201 ATANB1:	long		0xBFD55555,0x55555555
   6202 
   6203 ATANC5:	long		0xBFB70BF3,0x98539E6A
   6204 ATANC4:	long		0x3FBC7187,0x962D1D7D
   6205 ATANC3:	long		0xBFC24924,0x827107B8
   6206 ATANC2:	long		0x3FC99999,0x9996263E
   6207 ATANC1:	long		0xBFD55555,0x55555536
   6208 
   6209 PPIBY2:	long		0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
   6210 NPIBY2:	long		0xBFFF0000,0xC90FDAA2,0x2168C235,0x00000000
   6211 
   6212 PTINY:	long		0x00010000,0x80000000,0x00000000,0x00000000
   6213 NTINY:	long		0x80010000,0x80000000,0x00000000,0x00000000
   6214 
   6215 ATANTBL:
   6216 	long		0x3FFB0000,0x83D152C5,0x060B7A51,0x00000000
   6217 	long		0x3FFB0000,0x8BC85445,0x65498B8B,0x00000000
   6218 	long		0x3FFB0000,0x93BE4060,0x17626B0D,0x00000000
   6219 	long		0x3FFB0000,0x9BB3078D,0x35AEC202,0x00000000
   6220 	long		0x3FFB0000,0xA3A69A52,0x5DDCE7DE,0x00000000
   6221 	long		0x3FFB0000,0xAB98E943,0x62765619,0x00000000
   6222 	long		0x3FFB0000,0xB389E502,0xF9C59862,0x00000000
   6223 	long		0x3FFB0000,0xBB797E43,0x6B09E6FB,0x00000000
   6224 	long		0x3FFB0000,0xC367A5C7,0x39E5F446,0x00000000
   6225 	long		0x3FFB0000,0xCB544C61,0xCFF7D5C6,0x00000000
   6226 	long		0x3FFB0000,0xD33F62F8,0x2488533E,0x00000000
   6227 	long		0x3FFB0000,0xDB28DA81,0x62404C77,0x00000000
   6228 	long		0x3FFB0000,0xE310A407,0x8AD34F18,0x00000000
   6229 	long		0x3FFB0000,0xEAF6B0A8,0x188EE1EB,0x00000000
   6230 	long		0x3FFB0000,0xF2DAF194,0x9DBE79D5,0x00000000
   6231 	long		0x3FFB0000,0xFABD5813,0x61D47E3E,0x00000000
   6232 	long		0x3FFC0000,0x8346AC21,0x0959ECC4,0x00000000
   6233 	long		0x3FFC0000,0x8B232A08,0x304282D8,0x00000000
   6234 	long		0x3FFC0000,0x92FB70B8,0xD29AE2F9,0x00000000
   6235 	long		0x3FFC0000,0x9ACF476F,0x5CCD1CB4,0x00000000
   6236 	long		0x3FFC0000,0xA29E7630,0x4954F23F,0x00000000
   6237 	long		0x3FFC0000,0xAA68C5D0,0x8AB85230,0x00000000
   6238 	long		0x3FFC0000,0xB22DFFFD,0x9D539F83,0x00000000
   6239 	long		0x3FFC0000,0xB9EDEF45,0x3E900EA5,0x00000000
   6240 	long		0x3FFC0000,0xC1A85F1C,0xC75E3EA5,0x00000000
   6241 	long		0x3FFC0000,0xC95D1BE8,0x28138DE6,0x00000000
   6242 	long		0x3FFC0000,0xD10BF300,0x840D2DE4,0x00000000
   6243 	long		0x3FFC0000,0xD8B4B2BA,0x6BC05E7A,0x00000000
   6244 	long		0x3FFC0000,0xE0572A6B,0xB42335F6,0x00000000
   6245 	long		0x3FFC0000,0xE7F32A70,0xEA9CAA8F,0x00000000
   6246 	long		0x3FFC0000,0xEF888432,0x64ECEFAA,0x00000000
   6247 	long		0x3FFC0000,0xF7170A28,0xECC06666,0x00000000
   6248 	long		0x3FFD0000,0x812FD288,0x332DAD32,0x00000000
   6249 	long		0x3FFD0000,0x88A8D1B1,0x218E4D64,0x00000000
   6250 	long		0x3FFD0000,0x9012AB3F,0x23E4AEE8,0x00000000
   6251 	long		0x3FFD0000,0x976CC3D4,0x11E7F1B9,0x00000000
   6252 	long		0x3FFD0000,0x9EB68949,0x3889A227,0x00000000
   6253 	long		0x3FFD0000,0xA5EF72C3,0x4487361B,0x00000000
   6254 	long		0x3FFD0000,0xAD1700BA,0xF07A7227,0x00000000
   6255 	long		0x3FFD0000,0xB42CBCFA,0xFD37EFB7,0x00000000
   6256 	long		0x3FFD0000,0xBB303A94,0x0BA80F89,0x00000000
   6257 	long		0x3FFD0000,0xC22115C6,0xFCAEBBAF,0x00000000
   6258 	long		0x3FFD0000,0xC8FEF3E6,0x86331221,0x00000000
   6259 	long		0x3FFD0000,0xCFC98330,0xB4000C70,0x00000000
   6260 	long		0x3FFD0000,0xD6807AA1,0x102C5BF9,0x00000000
   6261 	long		0x3FFD0000,0xDD2399BC,0x31252AA3,0x00000000
   6262 	long		0x3FFD0000,0xE3B2A855,0x6B8FC517,0x00000000
   6263 	long		0x3FFD0000,0xEA2D764F,0x64315989,0x00000000
   6264 	long		0x3FFD0000,0xF3BF5BF8,0xBAD1A21D,0x00000000
   6265 	long		0x3FFE0000,0x801CE39E,0x0D205C9A,0x00000000
   6266 	long		0x3FFE0000,0x8630A2DA,0xDA1ED066,0x00000000
   6267 	long		0x3FFE0000,0x8C1AD445,0xF3E09B8C,0x00000000
   6268 	long		0x3FFE0000,0x91DB8F16,0x64F350E2,0x00000000
   6269 	long		0x3FFE0000,0x97731420,0x365E538C,0x00000000
   6270 	long		0x3FFE0000,0x9CE1C8E6,0xA0B8CDBA,0x00000000
   6271 	long		0x3FFE0000,0xA22832DB,0xCADAAE09,0x00000000
   6272 	long		0x3FFE0000,0xA746F2DD,0xB7602294,0x00000000
   6273 	long		0x3FFE0000,0xAC3EC0FB,0x997DD6A2,0x00000000
   6274 	long		0x3FFE0000,0xB110688A,0xEBDC6F6A,0x00000000
   6275 	long		0x3FFE0000,0xB5BCC490,0x59ECC4B0,0x00000000
   6276 	long		0x3FFE0000,0xBA44BC7D,0xD470782F,0x00000000
   6277 	long		0x3FFE0000,0xBEA94144,0xFD049AAC,0x00000000
   6278 	long		0x3FFE0000,0xC2EB4ABB,0x661628B6,0x00000000
   6279 	long		0x3FFE0000,0xC70BD54C,0xE602EE14,0x00000000
   6280 	long		0x3FFE0000,0xCD000549,0xADEC7159,0x00000000
   6281 	long		0x3FFE0000,0xD48457D2,0xD8EA4EA3,0x00000000
   6282 	long		0x3FFE0000,0xDB948DA7,0x12DECE3B,0x00000000
   6283 	long		0x3FFE0000,0xE23855F9,0x69E8096A,0x00000000
   6284 	long		0x3FFE0000,0xE8771129,0xC4353259,0x00000000
   6285 	long		0x3FFE0000,0xEE57C16E,0x0D379C0D,0x00000000
   6286 	long		0x3FFE0000,0xF3E10211,0xA87C3779,0x00000000
   6287 	long		0x3FFE0000,0xF919039D,0x758B8D41,0x00000000
   6288 	long		0x3FFE0000,0xFE058B8F,0x64935FB3,0x00000000
   6289 	long		0x3FFF0000,0x8155FB49,0x7B685D04,0x00000000
   6290 	long		0x3FFF0000,0x83889E35,0x49D108E1,0x00000000
   6291 	long		0x3FFF0000,0x859CFA76,0x511D724B,0x00000000
   6292 	long		0x3FFF0000,0x87952ECF,0xFF8131E7,0x00000000
   6293 	long		0x3FFF0000,0x89732FD1,0x9557641B,0x00000000
   6294 	long		0x3FFF0000,0x8B38CAD1,0x01932A35,0x00000000
   6295 	long		0x3FFF0000,0x8CE7A8D8,0x301EE6B5,0x00000000
   6296 	long		0x3FFF0000,0x8F46A39E,0x2EAE5281,0x00000000
   6297 	long		0x3FFF0000,0x922DA7D7,0x91888487,0x00000000
   6298 	long		0x3FFF0000,0x94D19FCB,0xDEDF5241,0x00000000
   6299 	long		0x3FFF0000,0x973AB944,0x19D2A08B,0x00000000
   6300 	long		0x3FFF0000,0x996FF00E,0x08E10B96,0x00000000
   6301 	long		0x3FFF0000,0x9B773F95,0x12321DA7,0x00000000
   6302 	long		0x3FFF0000,0x9D55CC32,0x0F935624,0x00000000
   6303 	long		0x3FFF0000,0x9F100575,0x006CC571,0x00000000
   6304 	long		0x3FFF0000,0xA0A9C290,0xD97CC06C,0x00000000
   6305 	long		0x3FFF0000,0xA22659EB,0xEBC0630A,0x00000000
   6306 	long		0x3FFF0000,0xA388B4AF,0xF6EF0EC9,0x00000000
   6307 	long		0x3FFF0000,0xA4D35F10,0x61D292C4,0x00000000
   6308 	long		0x3FFF0000,0xA60895DC,0xFBE3187E,0x00000000
   6309 	long		0x3FFF0000,0xA72A51DC,0x7367BEAC,0x00000000
   6310 	long		0x3FFF0000,0xA83A5153,0x0956168F,0x00000000
   6311 	long		0x3FFF0000,0xA93A2007,0x7539546E,0x00000000
   6312 	long		0x3FFF0000,0xAA9E7245,0x023B2605,0x00000000
   6313 	long		0x3FFF0000,0xAC4C84BA,0x6FE4D58F,0x00000000
   6314 	long		0x3FFF0000,0xADCE4A4A,0x606B9712,0x00000000
   6315 	long		0x3FFF0000,0xAF2A2DCD,0x8D263C9C,0x00000000
   6316 	long		0x3FFF0000,0xB0656F81,0xF22265C7,0x00000000
   6317 	long		0x3FFF0000,0xB1846515,0x0F71496A,0x00000000
   6318 	long		0x3FFF0000,0xB28AAA15,0x6F9ADA35,0x00000000
   6319 	long		0x3FFF0000,0xB37B44FF,0x3766B895,0x00000000
   6320 	long		0x3FFF0000,0xB458C3DC,0xE9630433,0x00000000
   6321 	long		0x3FFF0000,0xB525529D,0x562246BD,0x00000000
   6322 	long		0x3FFF0000,0xB5E2CCA9,0x5F9D88CC,0x00000000
   6323 	long		0x3FFF0000,0xB692CADA,0x7ACA1ADA,0x00000000
   6324 	long		0x3FFF0000,0xB736AEA7,0xA6925838,0x00000000
   6325 	long		0x3FFF0000,0xB7CFAB28,0x7E9F7B36,0x00000000
   6326 	long		0x3FFF0000,0xB85ECC66,0xCB219835,0x00000000
   6327 	long		0x3FFF0000,0xB8E4FD5A,0x20A593DA,0x00000000
   6328 	long		0x3FFF0000,0xB99F41F6,0x4AFF9BB5,0x00000000
   6329 	long		0x3FFF0000,0xBA7F1E17,0x842BBE7B,0x00000000
   6330 	long		0x3FFF0000,0xBB471285,0x7637E17D,0x00000000
   6331 	long		0x3FFF0000,0xBBFABE8A,0x4788DF6F,0x00000000
   6332 	long		0x3FFF0000,0xBC9D0FAD,0x2B689D79,0x00000000
   6333 	long		0x3FFF0000,0xBD306A39,0x471ECD86,0x00000000
   6334 	long		0x3FFF0000,0xBDB6C731,0x856AF18A,0x00000000
   6335 	long		0x3FFF0000,0xBE31CAC5,0x02E80D70,0x00000000
   6336 	long		0x3FFF0000,0xBEA2D55C,0xE33194E2,0x00000000
   6337 	long		0x3FFF0000,0xBF0B10B7,0xC03128F0,0x00000000
   6338 	long		0x3FFF0000,0xBF6B7A18,0xDACB778D,0x00000000
   6339 	long		0x3FFF0000,0xBFC4EA46,0x63FA18F6,0x00000000
   6340 	long		0x3FFF0000,0xC0181BDE,0x8B89A454,0x00000000
   6341 	long		0x3FFF0000,0xC065B066,0xCFBF6439,0x00000000
   6342 	long		0x3FFF0000,0xC0AE345F,0x56340AE6,0x00000000
   6343 	long		0x3FFF0000,0xC0F22291,0x9CB9E6A7,0x00000000
   6344 
   6345 	set		X,FP_SCR0
   6346 	set		XDCARE,X+2
   6347 	set		XFRAC,X+4
   6348 	set		XFRACLO,X+8
   6349 
   6350 	set		ATANF,FP_SCR1
   6351 	set		ATANFHI,ATANF+4
   6352 	set		ATANFLO,ATANF+8
   6353 
   6354 	global		satan
   6355 #--ENTRY POINT FOR ATAN(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
   6356 satan:
   6357 	fmov.x		(%a0),%fp0		# LOAD INPUT
   6358 
   6359 	mov.l		(%a0),%d1
   6360 	mov.w		4(%a0),%d1
   6361 	fmov.x		%fp0,X(%a6)
   6362 	and.l		&0x7FFFFFFF,%d1
   6363 
   6364 	cmp.l		%d1,&0x3FFB8000		# |X| >= 1/16?
   6365 	bge.b		ATANOK1
   6366 	bra.w		ATANSM
   6367 
   6368 ATANOK1:
   6369 	cmp.l		%d1,&0x4002FFFF		# |X| < 16 ?
   6370 	ble.b		ATANMAIN
   6371 	bra.w		ATANBIG
   6372 
   6373 #--THE MOST LIKELY CASE, |X| IN [1/16, 16). WE USE TABLE TECHNIQUE
   6374 #--THE IDEA IS ATAN(X) = ATAN(F) + ATAN( [X-F] / [1+XF] ).
   6375 #--SO IF F IS CHOSEN TO BE CLOSE TO X AND ATAN(F) IS STORED IN
   6376 #--A TABLE, ALL WE NEED IS TO APPROXIMATE ATAN(U) WHERE
   6377 #--U = (X-F)/(1+XF) IS SMALL (REMEMBER F IS CLOSE TO X). IT IS
   6378 #--TRUE THAT A DIVIDE IS NOW NEEDED, BUT THE APPROXIMATION FOR
   6379 #--ATAN(U) IS A VERY SHORT POLYNOMIAL AND THE INDEXING TO
   6380 #--FETCH F AND SAVING OF REGISTERS CAN BE ALL HIDED UNDER THE
   6381 #--DIVIDE. IN THE END THIS METHOD IS MUCH FASTER THAN A TRADITIONAL
   6382 #--ONE. NOTE ALSO THAT THE TRADITIONAL SCHEME THAT APPROXIMATE
   6383 #--ATAN(X) DIRECTLY WILL NEED TO USE A RATIONAL APPROXIMATION
   6384 #--(DIVISION NEEDED) ANYWAY BECAUSE A POLYNOMIAL APPROXIMATION
   6385 #--WILL INVOLVE A VERY LONG POLYNOMIAL.
   6386 
   6387 #--NOW WE SEE X AS +-2^K * 1.BBBBBBB....B <- 1. + 63 BITS
   6388 #--WE CHOSE F TO BE +-2^K * 1.BBBB1
   6389 #--THAT IS IT MATCHES THE EXPONENT AND FIRST 5 BITS OF X, THE
   6390 #--SIXTH BITS IS SET TO BE 1. SINCE K = -4, -3, ..., 3, THERE
   6391 #--ARE ONLY 8 TIMES 16 = 2^7 = 128 |F|'S. SINCE ATAN(-|F|) IS
   6392 #-- -ATAN(|F|), WE NEED TO STORE ONLY ATAN(|F|).
   6393 
   6394 ATANMAIN:
   6395 
   6396 	and.l		&0xF8000000,XFRAC(%a6)	# FIRST 5 BITS
   6397 	or.l		&0x04000000,XFRAC(%a6)	# SET 6-TH BIT TO 1
   6398 	mov.l		&0x00000000,XFRACLO(%a6) # LOCATION OF X IS NOW F
   6399 
   6400 	fmov.x		%fp0,%fp1		# FP1 IS X
   6401 	fmul.x		X(%a6),%fp1		# FP1 IS X*F, NOTE THAT X*F > 0
   6402 	fsub.x		X(%a6),%fp0		# FP0 IS X-F
   6403 	fadd.s		&0x3F800000,%fp1	# FP1 IS 1 + X*F
   6404 	fdiv.x		%fp1,%fp0		# FP0 IS U = (X-F)/(1+X*F)
   6405 
   6406 #--WHILE THE DIVISION IS TAKING ITS TIME, WE FETCH ATAN(|F|)
   6407 #--CREATE ATAN(F) AND STORE IT IN ATANF, AND
   6408 #--SAVE REGISTERS FP2.
   6409 
   6410 	mov.l		%d2,-(%sp)		# SAVE d2 TEMPORARILY
   6411 	mov.l		%d1,%d2			# THE EXP AND 16 BITS OF X
   6412 	and.l		&0x00007800,%d1		# 4 VARYING BITS OF F'S FRACTION
   6413 	and.l		&0x7FFF0000,%d2		# EXPONENT OF F
   6414 	sub.l		&0x3FFB0000,%d2		# K+4
   6415 	asr.l		&1,%d2
   6416 	add.l		%d2,%d1			# THE 7 BITS IDENTIFYING F
   6417 	asr.l		&7,%d1			# INDEX INTO TBL OF ATAN(|F|)
   6418 	lea		ATANTBL(%pc),%a1
   6419 	add.l		%d1,%a1			# ADDRESS OF ATAN(|F|)
   6420 	mov.l		(%a1)+,ATANF(%a6)
   6421 	mov.l		(%a1)+,ATANFHI(%a6)
   6422 	mov.l		(%a1)+,ATANFLO(%a6)	# ATANF IS NOW ATAN(|F|)
   6423 	mov.l		X(%a6),%d1		# LOAD SIGN AND EXPO. AGAIN
   6424 	and.l		&0x80000000,%d1		# SIGN(F)
   6425 	or.l		%d1,ATANF(%a6)		# ATANF IS NOW SIGN(F)*ATAN(|F|)
   6426 	mov.l		(%sp)+,%d2		# RESTORE d2
   6427 
   6428 #--THAT'S ALL I HAVE TO DO FOR NOW,
   6429 #--BUT ALAS, THE DIVIDE IS STILL CRANKING!
   6430 
   6431 #--U IN FP0, WE ARE NOW READY TO COMPUTE ATAN(U) AS
   6432 #--U + A1*U*V*(A2 + V*(A3 + V)), V = U*U
   6433 #--THE POLYNOMIAL MAY LOOK STRANGE, BUT IS NEVERTHELESS CORRECT.
   6434 #--THE NATURAL FORM IS U + U*V*(A1 + V*(A2 + V*A3))
   6435 #--WHAT WE HAVE HERE IS MERELY	A1 = A3, A2 = A1/A3, A3 = A2/A3.
   6436 #--THE REASON FOR THIS REARRANGEMENT IS TO MAKE THE INDEPENDENT
   6437 #--PARTS A1*U*V AND (A2 + ... STUFF) MORE LOAD-BALANCED
   6438 
   6439 	fmovm.x		&0x04,-(%sp)		# save fp2
   6440 
   6441 	fmov.x		%fp0,%fp1
   6442 	fmul.x		%fp1,%fp1
   6443 	fmov.d		ATANA3(%pc),%fp2
   6444 	fadd.x		%fp1,%fp2		# A3+V
   6445 	fmul.x		%fp1,%fp2		# V*(A3+V)
   6446 	fmul.x		%fp0,%fp1		# U*V
   6447 	fadd.d		ATANA2(%pc),%fp2	# A2+V*(A3+V)
   6448 	fmul.d		ATANA1(%pc),%fp1	# A1*U*V
   6449 	fmul.x		%fp2,%fp1		# A1*U*V*(A2+V*(A3+V))
   6450 	fadd.x		%fp1,%fp0		# ATAN(U), FP1 RELEASED
   6451 
   6452 	fmovm.x 	(%sp)+,&0x20		# restore fp2
   6453 
   6454 	fmov.l		%d0,%fpcr		# restore users rnd mode,prec
   6455 	fadd.x		ATANF(%a6),%fp0		# ATAN(X)
   6456 	bra		t_inx2
   6457 
   6458 ATANBORS:
   6459 #--|X| IS IN d0 IN COMPACT FORM. FP1, d0 SAVED.
   6460 #--FP0 IS X AND |X| <= 1/16 OR |X| >= 16.
   6461 	cmp.l		%d1,&0x3FFF8000
   6462 	bgt.w		ATANBIG			# I.E. |X| >= 16
   6463 
   6464 ATANSM:
   6465 #--|X| <= 1/16
   6466 #--IF |X| < 2^(-40), RETURN X AS ANSWER. OTHERWISE, APPROXIMATE
   6467 #--ATAN(X) BY X + X*Y*(B1+Y*(B2+Y*(B3+Y*(B4+Y*(B5+Y*B6)))))
   6468 #--WHICH IS X + X*Y*( [B1+Z*(B3+Z*B5)] + [Y*(B2+Z*(B4+Z*B6)] )
   6469 #--WHERE Y = X*X, AND Z = Y*Y.
   6470 
   6471 	cmp.l		%d1,&0x3FD78000
   6472 	blt.w		ATANTINY
   6473 
   6474 #--COMPUTE POLYNOMIAL
   6475 	fmovm.x		&0x0c,-(%sp)		# save fp2/fp3
   6476 
   6477 	fmul.x		%fp0,%fp0		# FPO IS Y = X*X
   6478 
   6479 	fmov.x		%fp0,%fp1
   6480 	fmul.x		%fp1,%fp1		# FP1 IS Z = Y*Y
   6481 
   6482 	fmov.d		ATANB6(%pc),%fp2
   6483 	fmov.d		ATANB5(%pc),%fp3
   6484 
   6485 	fmul.x		%fp1,%fp2		# Z*B6
   6486 	fmul.x		%fp1,%fp3		# Z*B5
   6487 
   6488 	fadd.d		ATANB4(%pc),%fp2	# B4+Z*B6
   6489 	fadd.d		ATANB3(%pc),%fp3	# B3+Z*B5
   6490 
   6491 	fmul.x		%fp1,%fp2		# Z*(B4+Z*B6)
   6492 	fmul.x		%fp3,%fp1		# Z*(B3+Z*B5)
   6493 
   6494 	fadd.d		ATANB2(%pc),%fp2	# B2+Z*(B4+Z*B6)
   6495 	fadd.d		ATANB1(%pc),%fp1	# B1+Z*(B3+Z*B5)
   6496 
   6497 	fmul.x		%fp0,%fp2		# Y*(B2+Z*(B4+Z*B6))
   6498 	fmul.x		X(%a6),%fp0		# X*Y
   6499 
   6500 	fadd.x		%fp2,%fp1		# [B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]
   6501 
   6502 	fmul.x		%fp1,%fp0		# X*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))])
   6503 
   6504 	fmovm.x		(%sp)+,&0x30		# restore fp2/fp3
   6505 
   6506 	fmov.l		%d0,%fpcr		# restore users rnd mode,prec
   6507 	fadd.x		X(%a6),%fp0
   6508 	bra		t_inx2
   6509 
   6510 ATANTINY:
   6511 #--|X| < 2^(-40), ATAN(X) = X
   6512 
   6513 	fmov.l		%d0,%fpcr		# restore users rnd mode,prec
   6514 	mov.b		&FMOV_OP,%d1		# last inst is MOVE
   6515 	fmov.x		X(%a6),%fp0		# last inst - possible exception set
   6516 
   6517 	bra		t_catch
   6518 
   6519 ATANBIG:
   6520 #--IF |X| > 2^(100), RETURN	SIGN(X)*(PI/2 - TINY). OTHERWISE,
   6521 #--RETURN SIGN(X)*PI/2 + ATAN(-1/X).
   6522 	cmp.l		%d1,&0x40638000
   6523 	bgt.w		ATANHUGE
   6524 
   6525 #--APPROXIMATE ATAN(-1/X) BY
   6526 #--X'+X'*Y*(C1+Y*(C2+Y*(C3+Y*(C4+Y*C5)))), X' = -1/X, Y = X'*X'
   6527 #--THIS CAN BE RE-WRITTEN AS
   6528 #--X'+X'*Y*( [C1+Z*(C3+Z*C5)] + [Y*(C2+Z*C4)] ), Z = Y*Y.
   6529 
   6530 	fmovm.x		&0x0c,-(%sp)		# save fp2/fp3
   6531 
   6532 	fmov.s		&0xBF800000,%fp1	# LOAD -1
   6533 	fdiv.x		%fp0,%fp1		# FP1 IS -1/X
   6534 
   6535 #--DIVIDE IS STILL CRANKING
   6536 
   6537 	fmov.x		%fp1,%fp0		# FP0 IS X'
   6538 	fmul.x		%fp0,%fp0		# FP0 IS Y = X'*X'
   6539 	fmov.x		%fp1,X(%a6)		# X IS REALLY X'
   6540 
   6541 	fmov.x		%fp0,%fp1
   6542 	fmul.x		%fp1,%fp1		# FP1 IS Z = Y*Y
   6543 
   6544 	fmov.d		ATANC5(%pc),%fp3
   6545 	fmov.d		ATANC4(%pc),%fp2
   6546 
   6547 	fmul.x		%fp1,%fp3		# Z*C5
   6548 	fmul.x		%fp1,%fp2		# Z*B4
   6549 
   6550 	fadd.d		ATANC3(%pc),%fp3	# C3+Z*C5
   6551 	fadd.d		ATANC2(%pc),%fp2	# C2+Z*C4
   6552 
   6553 	fmul.x		%fp3,%fp1		# Z*(C3+Z*C5), FP3 RELEASED
   6554 	fmul.x		%fp0,%fp2		# Y*(C2+Z*C4)
   6555 
   6556 	fadd.d		ATANC1(%pc),%fp1	# C1+Z*(C3+Z*C5)
   6557 	fmul.x		X(%a6),%fp0		# X'*Y
   6558 
   6559 	fadd.x		%fp2,%fp1		# [Y*(C2+Z*C4)]+[C1+Z*(C3+Z*C5)]
   6560 
   6561 	fmul.x		%fp1,%fp0		# X'*Y*([B1+Z*(B3+Z*B5)]
   6562 #					...	+[Y*(B2+Z*(B4+Z*B6))])
   6563 	fadd.x		X(%a6),%fp0
   6564 
   6565 	fmovm.x		(%sp)+,&0x30		# restore fp2/fp3
   6566 
   6567 	fmov.l		%d0,%fpcr		# restore users rnd mode,prec
   6568 	tst.b		(%a0)
   6569 	bpl.b		pos_big
   6570 
   6571 neg_big:
   6572 	fadd.x		NPIBY2(%pc),%fp0
   6573 	bra		t_minx2
   6574 
   6575 pos_big:
   6576 	fadd.x		PPIBY2(%pc),%fp0
   6577 	bra		t_pinx2
   6578 
   6579 ATANHUGE:
   6580 #--RETURN SIGN(X)*(PIBY2 - TINY) = SIGN(X)*PIBY2 - SIGN(X)*TINY
   6581 	tst.b		(%a0)
   6582 	bpl.b		pos_huge
   6583 
   6584 neg_huge:
   6585 	fmov.x		NPIBY2(%pc),%fp0
   6586 	fmov.l		%d0,%fpcr
   6587 	fadd.x		PTINY(%pc),%fp0
   6588 	bra		t_minx2
   6589 
   6590 pos_huge:
   6591 	fmov.x		PPIBY2(%pc),%fp0
   6592 	fmov.l		%d0,%fpcr
   6593 	fadd.x		NTINY(%pc),%fp0
   6594 	bra		t_pinx2
   6595 
   6596 	global		satand
   6597 #--ENTRY POINT FOR ATAN(X) FOR DENORMALIZED ARGUMENT
   6598 satand:
   6599 	bra		t_extdnrm
   6600 
   6601 #########################################################################
   6602 # sasin():  computes the inverse sine of a normalized input		#
   6603 # sasind(): computes the inverse sine of a denormalized input		#
   6604 #									#
   6605 # INPUT ***************************************************************	#
   6606 #	a0 = pointer to extended precision input			#
   6607 #	d0 = round precision,mode					#
   6608 #									#
   6609 # OUTPUT **************************************************************	#
   6610 #	fp0 = arcsin(X)							#
   6611 #									#
   6612 # ACCURACY and MONOTONICITY *******************************************	#
   6613 #	The returned result is within 3 ulps in	64 significant bit,	#
   6614 #	i.e. within 0.5001 ulp to 53 bits if the result is subsequently	#
   6615 #	rounded to double precision. The result is provably monotonic	#
   6616 #	in double precision.						#
   6617 #									#
   6618 # ALGORITHM ***********************************************************	#
   6619 #									#
   6620 #	ASIN								#
   6621 #	1. If |X| >= 1, go to 3.					#
   6622 #									#
   6623 #	2. (|X| < 1) Calculate asin(X) by				#
   6624 #		z := sqrt( [1-X][1+X] )					#
   6625 #		asin(X) = atan( x / z ).				#
   6626 #		Exit.							#
   6627 #									#
   6628 #	3. If |X| > 1, go to 5.						#
   6629 #									#
   6630 #	4. (|X| = 1) sgn := sign(X), return asin(X) := sgn * Pi/2. Exit.#
   6631 #									#
   6632 #	5. (|X| > 1) Generate an invalid operation by 0 * infinity.	#
   6633 #		Exit.							#
   6634 #									#
   6635 #########################################################################
   6636 
   6637 	global		sasin
   6638 sasin:
   6639 	fmov.x		(%a0),%fp0		# LOAD INPUT
   6640 
   6641 	mov.l		(%a0),%d1
   6642 	mov.w		4(%a0),%d1
   6643 	and.l		&0x7FFFFFFF,%d1
   6644 	cmp.l		%d1,&0x3FFF8000
   6645 	bge.b		ASINBIG
   6646 
   6647 # This catch is added here for the '060 QSP. Originally, the call to
   6648 # satan() would handle this case by causing the exception which would
   6649 # not be caught until gen_except(). Now, with the exceptions being
   6650 # detected inside of satan(), the exception would have been handled there
   6651 # instead of inside sasin() as expected.
   6652 	cmp.l		%d1,&0x3FD78000
   6653 	blt.w		ASINTINY
   6654 
   6655 #--THIS IS THE USUAL CASE, |X| < 1
   6656 #--ASIN(X) = ATAN( X / SQRT( (1-X)(1+X) ) )
   6657 
   6658 ASINMAIN:
   6659 	fmov.s		&0x3F800000,%fp1
   6660 	fsub.x		%fp0,%fp1		# 1-X
   6661 	fmovm.x		&0x4,-(%sp)		#  {fp2}
   6662 	fmov.s		&0x3F800000,%fp2
   6663 	fadd.x		%fp0,%fp2		# 1+X
   6664 	fmul.x		%fp2,%fp1		# (1+X)(1-X)
   6665 	fmovm.x		(%sp)+,&0x20		#  {fp2}
   6666 	fsqrt.x		%fp1			# SQRT([1-X][1+X])
   6667 	fdiv.x		%fp1,%fp0		# X/SQRT([1-X][1+X])
   6668 	fmovm.x		&0x01,-(%sp)		# save X/SQRT(...)
   6669 	lea		(%sp),%a0		# pass ptr to X/SQRT(...)
   6670 	bsr		satan
   6671 	add.l		&0xc,%sp		# clear X/SQRT(...) from stack
   6672 	bra		t_inx2
   6673 
   6674 ASINBIG:
   6675 	fabs.x		%fp0			# |X|
   6676 	fcmp.s		%fp0,&0x3F800000
   6677 	fbgt		t_operr			# cause an operr exception
   6678 
   6679 #--|X| = 1, ASIN(X) = +- PI/2.
   6680 ASINONE:
   6681 	fmov.x		PIBY2(%pc),%fp0
   6682 	mov.l		(%a0),%d1
   6683 	and.l		&0x80000000,%d1		# SIGN BIT OF X
   6684 	or.l		&0x3F800000,%d1		# +-1 IN SGL FORMAT
   6685 	mov.l		%d1,-(%sp)		# push SIGN(X) IN SGL-FMT
   6686 	fmov.l		%d0,%fpcr
   6687 	fmul.s		(%sp)+,%fp0
   6688 	bra		t_inx2
   6689 
   6690 #--|X| < 2^(-40), ATAN(X) = X
   6691 ASINTINY:
   6692 	fmov.l		%d0,%fpcr		# restore users rnd mode,prec
   6693 	mov.b		&FMOV_OP,%d1		# last inst is MOVE
   6694 	fmov.x		(%a0),%fp0		# last inst - possible exception
   6695 	bra		t_catch
   6696 
   6697 	global		sasind
   6698 #--ASIN(X) = X FOR DENORMALIZED X
   6699 sasind:
   6700 	bra		t_extdnrm
   6701 
   6702 #########################################################################
   6703 # sacos():  computes the inverse cosine of a normalized input		#
   6704 # sacosd(): computes the inverse cosine of a denormalized input		#
   6705 #									#
   6706 # INPUT ***************************************************************	#
   6707 #	a0 = pointer to extended precision input			#
   6708 #	d0 = round precision,mode					#
   6709 #									#
   6710 # OUTPUT ************************************************************** #
   6711 #	fp0 = arccos(X)							#
   6712 #									#
   6713 # ACCURACY and MONOTONICITY *******************************************	#
   6714 #	The returned result is within 3 ulps in	64 significant bit,	#
   6715 #	i.e. within 0.5001 ulp to 53 bits if the result is subsequently	#
   6716 #	rounded to double precision. The result is provably monotonic	#
   6717 #	in double precision.						#
   6718 #									#
   6719 # ALGORITHM *********************************************************** #
   6720 #									#
   6721 #	ACOS								#
   6722 #	1. If |X| >= 1, go to 3.					#
   6723 #									#
   6724 #	2. (|X| < 1) Calculate acos(X) by				#
   6725 #		z := (1-X) / (1+X)					#
   6726 #		acos(X) = 2 * atan( sqrt(z) ).				#
   6727 #		Exit.							#
   6728 #									#
   6729 #	3. If |X| > 1, go to 5.						#
   6730 #									#
   6731 #	4. (|X| = 1) If X > 0, return 0. Otherwise, return Pi. Exit.	#
   6732 #									#
   6733 #	5. (|X| > 1) Generate an invalid operation by 0 * infinity.	#
   6734 #		Exit.							#
   6735 #									#
   6736 #########################################################################
   6737 
   6738 	global		sacos
   6739 sacos:
   6740 	fmov.x		(%a0),%fp0		# LOAD INPUT
   6741 
   6742 	mov.l		(%a0),%d1		# pack exp w/ upper 16 fraction
   6743 	mov.w		4(%a0),%d1
   6744 	and.l		&0x7FFFFFFF,%d1
   6745 	cmp.l		%d1,&0x3FFF8000
   6746 	bge.b		ACOSBIG
   6747 
   6748 #--THIS IS THE USUAL CASE, |X| < 1
   6749 #--ACOS(X) = 2 * ATAN(	SQRT( (1-X)/(1+X) ) )
   6750 
   6751 ACOSMAIN:
   6752 	fmov.s		&0x3F800000,%fp1
   6753 	fadd.x		%fp0,%fp1		# 1+X
   6754 	fneg.x		%fp0			# -X
   6755 	fadd.s		&0x3F800000,%fp0	# 1-X
   6756 	fdiv.x		%fp1,%fp0		# (1-X)/(1+X)
   6757 	fsqrt.x		%fp0			# SQRT((1-X)/(1+X))
   6758 	mov.l		%d0,-(%sp)		# save original users fpcr
   6759 	clr.l		%d0
   6760 	fmovm.x		&0x01,-(%sp)		# save SQRT(...) to stack
   6761 	lea		(%sp),%a0		# pass ptr to sqrt
   6762 	bsr		satan			# ATAN(SQRT([1-X]/[1+X]))
   6763 	add.l		&0xc,%sp		# clear SQRT(...) from stack
   6764 
   6765 	fmov.l		(%sp)+,%fpcr		# restore users round prec,mode
   6766 	fadd.x		%fp0,%fp0		# 2 * ATAN( STUFF )
   6767 	bra		t_pinx2
   6768 
   6769 ACOSBIG:
   6770 	fabs.x		%fp0
   6771 	fcmp.s		%fp0,&0x3F800000
   6772 	fbgt		t_operr			# cause an operr exception
   6773 
   6774 #--|X| = 1, ACOS(X) = 0 OR PI
   6775 	tst.b		(%a0)			# is X positive or negative?
   6776 	bpl.b		ACOSP1
   6777 
   6778 #--X = -1
   6779 #Returns PI and inexact exception
   6780 ACOSM1:
   6781 	fmov.x		PI(%pc),%fp0		# load PI
   6782 	fmov.l		%d0,%fpcr		# load round mode,prec
   6783 	fadd.s		&0x00800000,%fp0	# add a small value
   6784 	bra		t_pinx2
   6785 
   6786 ACOSP1:
   6787 	bra		ld_pzero		# answer is positive zero
   6788 
   6789 	global		sacosd
   6790 #--ACOS(X) = PI/2 FOR DENORMALIZED X
   6791 sacosd:
   6792 	fmov.l		%d0,%fpcr		# load user's rnd mode/prec
   6793 	fmov.x		PIBY2(%pc),%fp0
   6794 	bra		t_pinx2
   6795 
   6796 #########################################################################
   6797 # setox():    computes the exponential for a normalized input		#
   6798 # setoxd():   computes the exponential for a denormalized input		#
   6799 # setoxm1():  computes the exponential minus 1 for a normalized input	#
   6800 # setoxm1d(): computes the exponential minus 1 for a denormalized input	#
   6801 #									#
   6802 # INPUT	*************************************************************** #
   6803 #	a0 = pointer to extended precision input			#
   6804 #	d0 = round precision,mode					#
   6805 #									#
   6806 # OUTPUT ************************************************************** #
   6807 #	fp0 = exp(X) or exp(X)-1					#
   6808 #									#
   6809 # ACCURACY and MONOTONICITY ******************************************* #
   6810 #	The returned result is within 0.85 ulps in 64 significant bit, 	#
   6811 #	i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
   6812 #	rounded to double precision. The result is provably monotonic 	#
   6813 #	in double precision.						#
   6814 #									#
   6815 # ALGORITHM and IMPLEMENTATION **************************************** #
   6816 #									#
   6817 #	setoxd								#
   6818 #	------								#
   6819 #	Step 1.	Set ans := 1.0						#
   6820 #									#
   6821 #	Step 2.	Return	ans := ans + sign(X)*2^(-126). Exit.		#
   6822 #	Notes:	This will always generate one exception -- inexact.	#
   6823 #									#
   6824 #									#
   6825 #	setox								#
   6826 #	-----								#
   6827 #									#
   6828 #	Step 1.	Filter out extreme cases of input argument.		#
   6829 #		1.1	If |X| >= 2^(-65), go to Step 1.3.		#
   6830 #		1.2	Go to Step 7.					#
   6831 #		1.3	If |X| < 16380 log(2), go to Step 2.		#
   6832 #		1.4	Go to Step 8.					#
   6833 #	Notes:	The usual case should take the branches 1.1 -> 1.3 -> 2.#
   6834 #		To avoid the use of floating-point comparisons, a	#
   6835 #		compact representation of |X| is used. This format is a	#
   6836 #		32-bit integer, the upper (more significant) 16 bits 	#
   6837 #		are the sign and biased exponent field of |X|; the 	#
   6838 #		lower 16 bits are the 16 most significant fraction	#
   6839 #		(including the explicit bit) bits of |X|. Consequently,	#
   6840 #		the comparisons in Steps 1.1 and 1.3 can be performed	#
   6841 #		by integer comparison. Note also that the constant	#
   6842 #		16380 log(2) used in Step 1.3 is also in the compact	#
   6843 #		form. Thus taking the branch to Step 2 guarantees 	#
   6844 #		|X| < 16380 log(2). There is no harm to have a small	#
   6845 #		number of cases where |X| is less than,	but close to,	#
   6846 #		16380 log(2) and the branch to Step 9 is taken.		#
   6847 #									#
   6848 #	Step 2.	Calculate N = round-to-nearest-int( X * 64/log2 ).	#
   6849 #		2.1	Set AdjFlag := 0 (indicates the branch 1.3 -> 2 #
   6850 #			was taken)					#
   6851 #		2.2	N := round-to-nearest-integer( X * 64/log2 ).	#
   6852 #		2.3	Calculate	J = N mod 64; so J = 0,1,2,..., #
   6853 #			or 63.						#
   6854 #		2.4	Calculate	M = (N - J)/64; so N = 64M + J.	#
   6855 #		2.5	Calculate the address of the stored value of 	#
   6856 #			2^(J/64).					#
   6857 #		2.6	Create the value Scale = 2^M.			#
   6858 #	Notes:	The calculation in 2.2 is really performed by		#
   6859 #			Z := X * constant				#
   6860 #			N := round-to-nearest-integer(Z)		#
   6861 #		where							#
   6862 #			constant := single-precision( 64/log 2 ).	#
   6863 #									#
   6864 #		Using a single-precision constant avoids memory 	#
   6865 #		access. Another effect of using a single-precision	#
   6866 #		"constant" is that the calculated value Z is 		#
   6867 #									#
   6868 #			Z = X*(64/log2)*(1+eps), |eps| <= 2^(-24).	#
   6869 #									#
   6870 #		This error has to be considered later in Steps 3 and 4.	#
   6871 #									#
   6872 #	Step 3.	Calculate X - N*log2/64.				#
   6873 #		3.1	R := X + N*L1, 					#
   6874 #				where L1 := single-precision(-log2/64).	#
   6875 #		3.2	R := R + N*L2, 					#
   6876 #				L2 := extended-precision(-log2/64 - L1).#
   6877 #	Notes:	a) The way L1 and L2 are chosen ensures L1+L2 		#
   6878 #		approximate the value -log2/64 to 88 bits of accuracy.	#
   6879 #		b) N*L1 is exact because N is no longer than 22 bits	#
   6880 #		and L1 is no longer than 24 bits.			#
   6881 #		c) The calculation X+N*L1 is also exact due to 		#
   6882 #		cancellation. Thus, R is practically X+N(L1+L2) to full	#
   6883 #		64 bits. 						#
   6884 #		d) It is important to estimate how large can |R| be	#
   6885 #		after Step 3.2.						#
   6886 #									#
   6887 #		N = rnd-to-int( X*64/log2 (1+eps) ), |eps|<=2^(-24)	#
   6888 #		X*64/log2 (1+eps)	=	N + f,	|f| <= 0.5	#
   6889 #		X*64/log2 - N	=	f - eps*X 64/log2		#
   6890 #		X - N*log2/64	=	f*log2/64 - eps*X		#
   6891 #									#
   6892 #									#
   6893 #		Now |X| <= 16446 log2, thus				#
   6894 #									#
   6895 #			|X - N*log2/64| <= (0.5 + 16446/2^(18))*log2/64	#
   6896 #					<= 0.57 log2/64.		#
   6897 #		 This bound will be used in Step 4.			#
   6898 #									#
   6899 #	Step 4.	Approximate exp(R)-1 by a polynomial			#
   6900 #		p = R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*A5))))	#
   6901 #	Notes:	a) In order to reduce memory access, the coefficients 	#
   6902 #		are made as "short" as possible: A1 (which is 1/2), A4	#
   6903 #		and A5 are single precision; A2 and A3 are double	#
   6904 #		precision. 						#
   6905 #		b) Even with the restrictions above, 			#
   6906 #		   |p - (exp(R)-1)| < 2^(-68.8) for all |R| <= 0.0062.	#
   6907 #		Note that 0.0062 is slightly bigger than 0.57 log2/64.	#
   6908 #		c) To fully utilize the pipeline, p is separated into	#
   6909 #		two independent pieces of roughly equal complexities	#
   6910 #			p = [ R + R*S*(A2 + S*A4) ]	+		#
   6911 #				[ S*(A1 + S*(A3 + S*A5)) ]		#
   6912 #		where S = R*R.						#
   6913 #									#
   6914 #	Step 5.	Compute 2^(J/64)*exp(R) = 2^(J/64)*(1+p) by		#
   6915 #				ans := T + ( T*p + t)			#
   6916 #		where T and t are the stored values for 2^(J/64).	#
   6917 #	Notes:	2^(J/64) is stored as T and t where T+t approximates	#
   6918 #		2^(J/64) to roughly 85 bits; T is in extended precision	#
   6919 #		and t is in single precision. Note also that T is 	#
   6920 #		rounded to 62 bits so that the last two bits of T are 	#
   6921 #		zero. The reason for such a special form is that T-1, 	#
   6922 #		T-2, and T-8 will all be exact --- a property that will	#
   6923 #		give much more accurate computation of the function 	#
   6924 #		EXPM1.							#
   6925 #									#
   6926 #	Step 6.	Reconstruction of exp(X)				#
   6927 #			exp(X) = 2^M * 2^(J/64) * exp(R).		#
   6928 #		6.1	If AdjFlag = 0, go to 6.3			#
   6929 #		6.2	ans := ans * AdjScale				#
   6930 #		6.3	Restore the user FPCR				#
   6931 #		6.4	Return ans := ans * Scale. Exit.		#
   6932 #	Notes:	If AdjFlag = 0, we have X = Mlog2 + Jlog2/64 + R,	#
   6933 #		|M| <= 16380, and Scale = 2^M. Moreover, exp(X) will	#
   6934 #		neither overflow nor underflow. If AdjFlag = 1, that	#
   6935 #		means that						#
   6936 #			X = (M1+M)log2 + Jlog2/64 + R, |M1+M| >= 16380.	#
   6937 #		Hence, exp(X) may overflow or underflow or neither.	#
   6938 #		When that is the case, AdjScale = 2^(M1) where M1 is	#
   6939 #		approximately M. Thus 6.2 will never cause 		#
   6940 #		over/underflow. Possible exception in 6.4 is overflow	#
   6941 #		or underflow. The inexact exception is not generated in	#
   6942 #		6.4. Although one can argue that the inexact flag	#
   6943 #		should always be raised, to simulate that exception 	#
   6944 #		cost to much than the flag is worth in practical uses.	#
   6945 #									#
   6946 #	Step 7.	Return 1 + X.						#
   6947 #		7.1	ans := X					#
   6948 #		7.2	Restore user FPCR.				#
   6949 #		7.3	Return ans := 1 + ans. Exit			#
   6950 #	Notes:	For non-zero X, the inexact exception will always be	#
   6951 #		raised by 7.3. That is the only exception raised by 7.3.#
   6952 #		Note also that we use the FMOVEM instruction to move X	#
   6953 #		in Step 7.1 to avoid unnecessary trapping. (Although	#
   6954 #		the FMOVEM may not seem relevant since X is normalized,	#
   6955 #		the precaution will be useful in the library version of	#
   6956 #		this code where the separate entry for denormalized 	#
   6957 #		inputs will be done away with.)				#
   6958 #									#
   6959 #	Step 8.	Handle exp(X) where |X| >= 16380log2.			#
   6960 #		8.1	If |X| > 16480 log2, go to Step 9.		#
   6961 #		(mimic 2.2 - 2.6)					#
   6962 #		8.2	N := round-to-integer( X * 64/log2 )		#
   6963 #		8.3	Calculate J = N mod 64, J = 0,1,...,63		#
   6964 #		8.4	K := (N-J)/64, M1 := truncate(K/2), M = K-M1, 	#
   6965 #			AdjFlag := 1.					#
   6966 #		8.5	Calculate the address of the stored value 	#
   6967 #			2^(J/64).					#
   6968 #		8.6	Create the values Scale = 2^M, AdjScale = 2^M1.	#
   6969 #		8.7	Go to Step 3.					#
   6970 #	Notes:	Refer to notes for 2.2 - 2.6.				#
   6971 #									#
   6972 #	Step 9.	Handle exp(X), |X| > 16480 log2.			#
   6973 #		9.1	If X < 0, go to 9.3				#
   6974 #		9.2	ans := Huge, go to 9.4				#
   6975 #		9.3	ans := Tiny.					#
   6976 #		9.4	Restore user FPCR.				#
   6977 #		9.5	Return ans := ans * ans. Exit.			#
   6978 #	Notes:	Exp(X) will surely overflow or underflow, depending on	#
   6979 #		X's sign. "Huge" and "Tiny" are respectively large/tiny	#
   6980 #		extended-precision numbers whose square over/underflow	#
   6981 #		with an inexact result. Thus, 9.5 always raises the	#
   6982 #		inexact together with either overflow or underflow.	#
   6983 #									#
   6984 #	setoxm1d							#
   6985 #	--------							#
   6986 #									#
   6987 #	Step 1.	Set ans := 0						#
   6988 #									#
   6989 #	Step 2.	Return	ans := X + ans. Exit.				#
   6990 #	Notes:	This will return X with the appropriate rounding	#
   6991 #		 precision prescribed by the user FPCR.			#
   6992 #									#
   6993 #	setoxm1								#
   6994 #	-------								#
   6995 #									#
   6996 #	Step 1.	Check |X|						#
   6997 #		1.1	If |X| >= 1/4, go to Step 1.3.			#
   6998 #		1.2	Go to Step 7.					#
   6999 #		1.3	If |X| < 70 log(2), go to Step 2.		#
   7000 #		1.4	Go to Step 10.					#
   7001 #	Notes:	The usual case should take the branches 1.1 -> 1.3 -> 2.#
   7002 #		However, it is conceivable |X| can be small very often	#
   7003 #		because EXPM1 is intended to evaluate exp(X)-1 		#
   7004 #		accurately when |X| is small. For further details on 	#
   7005 #		the comparisons, see the notes on Step 1 of setox.	#
   7006 #									#
   7007 #	Step 2.	Calculate N = round-to-nearest-int( X * 64/log2 ).	#
   7008 #		2.1	N := round-to-nearest-integer( X * 64/log2 ).	#
   7009 #		2.2	Calculate	J = N mod 64; so J = 0,1,2,..., #
   7010 #			or 63.						#
   7011 #		2.3	Calculate	M = (N - J)/64; so N = 64M + J.	#
   7012 #		2.4	Calculate the address of the stored value of 	#
   7013 #			2^(J/64).					#
   7014 #		2.5	Create the values Sc = 2^M and 			#
   7015 #			OnebySc := -2^(-M).				#
   7016 #	Notes:	See the notes on Step 2 of setox.			#
   7017 #									#
   7018 #	Step 3.	Calculate X - N*log2/64.				#
   7019 #		3.1	R := X + N*L1, 					#
   7020 #				where L1 := single-precision(-log2/64).	#
   7021 #		3.2	R := R + N*L2, 					#
   7022 #				L2 := extended-precision(-log2/64 - L1).#
   7023 #	Notes:	Applying the analysis of Step 3 of setox in this case	#
   7024 #		shows that |R| <= 0.0055 (note that |X| <= 70 log2 in	#
   7025 #		this case).						#
   7026 #									#
   7027 #	Step 4.	Approximate exp(R)-1 by a polynomial			#
   7028 #			p = R+R*R*(A1+R*(A2+R*(A3+R*(A4+R*(A5+R*A6)))))	#
   7029 #	Notes:	a) In order to reduce memory access, the coefficients 	#
   7030 #		are made as "short" as possible: A1 (which is 1/2), A5 	#
   7031 #		and A6 are single precision; A2, A3 and A4 are double 	#
   7032 #		precision. 						#
   7033 #		b) Even with the restriction above,			#
   7034 #			|p - (exp(R)-1)| <	|R| * 2^(-72.7)		#
   7035 #		for all |R| <= 0.0055.					#
   7036 #		c) To fully utilize the pipeline, p is separated into	#
   7037 #		two independent pieces of roughly equal complexity	#
   7038 #			p = [ R*S*(A2 + S*(A4 + S*A6)) ]	+	#
   7039 #				[ R + S*(A1 + S*(A3 + S*A5)) ]		#
   7040 #		where S = R*R.						#
   7041 #									#
   7042 #	Step 5.	Compute 2^(J/64)*p by					#
   7043 #				p := T*p				#
   7044 #		where T and t are the stored values for 2^(J/64).	#
   7045 #	Notes:	2^(J/64) is stored as T and t where T+t approximates	#
   7046 #		2^(J/64) to roughly 85 bits; T is in extended precision	#
   7047 #		and t is in single precision. Note also that T is 	#
   7048 #		rounded to 62 bits so that the last two bits of T are 	#
   7049 #		zero. The reason for such a special form is that T-1, 	#
   7050 #		T-2, and T-8 will all be exact --- a property that will	#
   7051 #		be exploited in Step 6 below. The total relative error	#
   7052 #		in p is no bigger than 2^(-67.7) compared to the final	#
   7053 #		result.							#
   7054 #									#
   7055 #	Step 6.	Reconstruction of exp(X)-1				#
   7056 #			exp(X)-1 = 2^M * ( 2^(J/64) + p - 2^(-M) ).	#
   7057 #		6.1	If M <= 63, go to Step 6.3.			#
   7058 #		6.2	ans := T + (p + (t + OnebySc)). Go to 6.6	#
   7059 #		6.3	If M >= -3, go to 6.5.				#
   7060 #		6.4	ans := (T + (p + t)) + OnebySc. Go to 6.6	#
   7061 #		6.5	ans := (T + OnebySc) + (p + t).			#
   7062 #		6.6	Restore user FPCR.				#
   7063 #		6.7	Return ans := Sc * ans. Exit.			#
   7064 #	Notes:	The various arrangements of the expressions give 	#
   7065 #		accurate evaluations.					#
   7066 #									#
   7067 #	Step 7.	exp(X)-1 for |X| < 1/4.					#
   7068 #		7.1	If |X| >= 2^(-65), go to Step 9.		#
   7069 #		7.2	Go to Step 8.					#
   7070 #									#
   7071 #	Step 8.	Calculate exp(X)-1, |X| < 2^(-65).			#
   7072 #		8.1	If |X| < 2^(-16312), goto 8.3			#
   7073 #		8.2	Restore FPCR; return ans := X - 2^(-16382).	#
   7074 #			Exit.						#
   7075 #		8.3	X := X * 2^(140).				#
   7076 #		8.4	Restore FPCR; ans := ans - 2^(-16382).		#
   7077 #		 Return ans := ans*2^(140). Exit			#
   7078 #	Notes:	The idea is to return "X - tiny" under the user		#
   7079 #		precision and rounding modes. To avoid unnecessary	#
   7080 #		inefficiency, we stay away from denormalized numbers 	#
   7081 #		the best we can. For |X| >= 2^(-16312), the 		#
   7082 #		straightforward 8.2 generates the inexact exception as	#
   7083 #		the case warrants.					#
   7084 #									#
   7085 #	Step 9.	Calculate exp(X)-1, |X| < 1/4, by a polynomial		#
   7086 #			p = X + X*X*(B1 + X*(B2 + ... + X*B12))		#
   7087 #	Notes:	a) In order to reduce memory access, the coefficients	#
   7088 #		are made as "short" as possible: B1 (which is 1/2), B9	#
   7089 #		to B12 are single precision; B3 to B8 are double 	#
   7090 #		precision; and B2 is double extended.			#
   7091 #		b) Even with the restriction above,			#
   7092 #			|p - (exp(X)-1)| < |X| 2^(-70.6)		#
   7093 #		for all |X| <= 0.251.					#
   7094 #		Note that 0.251 is slightly bigger than 1/4.		#
   7095 #		c) To fully preserve accuracy, the polynomial is 	#
   7096 #		computed as						#
   7097 #			X + ( S*B1 +	Q ) where S = X*X and		#
   7098 #			Q	=	X*S*(B2 + X*(B3 + ... + X*B12))	#
   7099 #		d) To fully utilize the pipeline, Q is separated into	#
   7100 #		two independent pieces of roughly equal complexity	#
   7101 #			Q = [ X*S*(B2 + S*(B4 + ... + S*B12)) ] +	#
   7102 #				[ S*S*(B3 + S*(B5 + ... + S*B11)) ]	#
   7103 #									#
   7104 #	Step 10. Calculate exp(X)-1 for |X| >= 70 log 2.		#
   7105 #		10.1 If X >= 70log2 , exp(X) - 1 = exp(X) for all 	#
   7106 #		practical purposes. Therefore, go to Step 1 of setox.	#
   7107 #		10.2 If X <= -70log2, exp(X) - 1 = -1 for all practical	#
   7108 #		purposes. 						#
   7109 #		ans := -1 						#
   7110 #		Restore user FPCR					#
   7111 #		Return ans := ans + 2^(-126). Exit.			#
   7112 #	Notes:	10.2 will always create an inexact and return -1 + tiny	#
   7113 #		in the user rounding precision and mode.		#
   7114 #									#
   7115 #########################################################################
   7116 
   7117 L2:	long		0x3FDC0000,0x82E30865,0x4361C4C6,0x00000000
   7118 
   7119 EEXPA3:	long		0x3FA55555,0x55554CC1
   7120 EEXPA2:	long		0x3FC55555,0x55554A54
   7121 
   7122 EM1A4:	long		0x3F811111,0x11174385
   7123 EM1A3:	long		0x3FA55555,0x55554F5A
   7124 
   7125 EM1A2:	long		0x3FC55555,0x55555555,0x00000000,0x00000000
   7126 
   7127 EM1B8:	long		0x3EC71DE3,0xA5774682
   7128 EM1B7:	long		0x3EFA01A0,0x19D7CB68
   7129 
   7130 EM1B6:	long		0x3F2A01A0,0x1A019DF3
   7131 EM1B5:	long		0x3F56C16C,0x16C170E2
   7132 
   7133 EM1B4:	long		0x3F811111,0x11111111
   7134 EM1B3:	long		0x3FA55555,0x55555555
   7135 
   7136 EM1B2:	long		0x3FFC0000,0xAAAAAAAA,0xAAAAAAAB
   7137 	long		0x00000000
   7138 
   7139 TWO140:	long		0x48B00000,0x00000000
   7140 TWON140:
   7141 	long		0x37300000,0x00000000
   7142 
   7143 EEXPTBL:
   7144 	long		0x3FFF0000,0x80000000,0x00000000,0x00000000
   7145 	long		0x3FFF0000,0x8164D1F3,0xBC030774,0x9F841A9B
   7146 	long		0x3FFF0000,0x82CD8698,0xAC2BA1D8,0x9FC1D5B9
   7147 	long		0x3FFF0000,0x843A28C3,0xACDE4048,0xA0728369
   7148 	long		0x3FFF0000,0x85AAC367,0xCC487B14,0x1FC5C95C
   7149 	long		0x3FFF0000,0x871F6196,0x9E8D1010,0x1EE85C9F
   7150 	long		0x3FFF0000,0x88980E80,0x92DA8528,0x9FA20729
   7151 	long		0x3FFF0000,0x8A14D575,0x496EFD9C,0xA07BF9AF
   7152 	long		0x3FFF0000,0x8B95C1E3,0xEA8BD6E8,0xA0020DCF
   7153 	long		0x3FFF0000,0x8D1ADF5B,0x7E5BA9E4,0x205A63DA
   7154 	long		0x3FFF0000,0x8EA4398B,0x45CD53C0,0x1EB70051
   7155 	long		0x3FFF0000,0x9031DC43,0x1466B1DC,0x1F6EB029
   7156 	long		0x3FFF0000,0x91C3D373,0xAB11C338,0xA0781494
   7157 	long		0x3FFF0000,0x935A2B2F,0x13E6E92C,0x9EB319B0
   7158 	long		0x3FFF0000,0x94F4EFA8,0xFEF70960,0x2017457D
   7159 	long		0x3FFF0000,0x96942D37,0x20185A00,0x1F11D537
   7160 	long		0x3FFF0000,0x9837F051,0x8DB8A970,0x9FB952DD
   7161 	long		0x3FFF0000,0x99E04593,0x20B7FA64,0x1FE43087
   7162 	long		0x3FFF0000,0x9B8D39B9,0xD54E5538,0x1FA2A818
   7163 	long		0x3FFF0000,0x9D3ED9A7,0x2CFFB750,0x1FDE494D
   7164 	long		0x3FFF0000,0x9EF53260,0x91A111AC,0x20504890
   7165 	long		0x3FFF0000,0xA0B0510F,0xB9714FC4,0xA073691C
   7166 	long		0x3FFF0000,0xA2704303,0x0C496818,0x1F9B7A05
   7167 	long		0x3FFF0000,0xA43515AE,0x09E680A0,0xA0797126
   7168 	long		0x3FFF0000,0xA5FED6A9,0xB15138EC,0xA071A140
   7169 	long		0x3FFF0000,0xA7CD93B4,0xE9653568,0x204F62DA
   7170 	long		0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x1F283C4A
   7171 	long		0x3FFF0000,0xAB7A39B5,0xA93ED338,0x9F9A7FDC
   7172 	long		0x3FFF0000,0xAD583EEA,0x42A14AC8,0xA05B3FAC
   7173 	long		0x3FFF0000,0xAF3B78AD,0x690A4374,0x1FDF2610
   7174 	long		0x3FFF0000,0xB123F581,0xD2AC2590,0x9F705F90
   7175 	long		0x3FFF0000,0xB311C412,0xA9112488,0x201F678A
   7176 	long		0x3FFF0000,0xB504F333,0xF9DE6484,0x1F32FB13
   7177 	long		0x3FFF0000,0xB6FD91E3,0x28D17790,0x20038B30
   7178 	long		0x3FFF0000,0xB8FBAF47,0x62FB9EE8,0x200DC3CC
   7179 	long		0x3FFF0000,0xBAFF5AB2,0x133E45FC,0x9F8B2AE6
   7180 	long		0x3FFF0000,0xBD08A39F,0x580C36C0,0xA02BBF70
   7181 	long		0x3FFF0000,0xBF1799B6,0x7A731084,0xA00BF518
   7182 	long		0x3FFF0000,0xC12C4CCA,0x66709458,0xA041DD41
   7183 	long		0x3FFF0000,0xC346CCDA,0x24976408,0x9FDF137B
   7184 	long		0x3FFF0000,0xC5672A11,0x5506DADC,0x201F1568
   7185 	long		0x3FFF0000,0xC78D74C8,0xABB9B15C,0x1FC13A2E
   7186 	long		0x3FFF0000,0xC9B9BD86,0x6E2F27A4,0xA03F8F03
   7187 	long		0x3FFF0000,0xCBEC14FE,0xF2727C5C,0x1FF4907D
   7188 	long		0x3FFF0000,0xCE248C15,0x1F8480E4,0x9E6E53E4
   7189 	long		0x3FFF0000,0xD06333DA,0xEF2B2594,0x1FD6D45C
   7190 	long		0x3FFF0000,0xD2A81D91,0xF12AE45C,0xA076EDB9
   7191 	long		0x3FFF0000,0xD4F35AAB,0xCFEDFA20,0x9FA6DE21
   7192 	long		0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x1EE69A2F
   7193 	long		0x3FFF0000,0xD99D15C2,0x78AFD7B4,0x207F439F
   7194 	long		0x3FFF0000,0xDBFBB797,0xDAF23754,0x201EC207
   7195 	long		0x3FFF0000,0xDE60F482,0x5E0E9124,0x9E8BE175
   7196 	long		0x3FFF0000,0xE0CCDEEC,0x2A94E110,0x20032C4B
   7197 	long		0x3FFF0000,0xE33F8972,0xBE8A5A50,0x2004DFF5
   7198 	long		0x3FFF0000,0xE5B906E7,0x7C8348A8,0x1E72F47A
   7199 	long		0x3FFF0000,0xE8396A50,0x3C4BDC68,0x1F722F22
   7200 	long		0x3FFF0000,0xEAC0C6E7,0xDD243930,0xA017E945
   7201 	long		0x3FFF0000,0xED4F301E,0xD9942B84,0x1F401A5B
   7202 	long		0x3FFF0000,0xEFE4B99B,0xDCDAF5CC,0x9FB9A9E3
   7203 	long		0x3FFF0000,0xF281773C,0x59FFB138,0x20744C05
   7204 	long		0x3FFF0000,0xF5257D15,0x2486CC2C,0x1F773A19
   7205 	long		0x3FFF0000,0xF7D0DF73,0x0AD13BB8,0x1FFE90D5
   7206 	long		0x3FFF0000,0xFA83B2DB,0x722A033C,0xA041ED22
   7207 	long		0x3FFF0000,0xFD3E0C0C,0xF486C174,0x1F853F3A
   7208 
   7209 	set		ADJFLAG,L_SCR2
   7210 	set		SCALE,FP_SCR0
   7211 	set		ADJSCALE,FP_SCR1
   7212 	set		SC,FP_SCR0
   7213 	set		ONEBYSC,FP_SCR1
   7214 
   7215 	global		setox
   7216 setox:
   7217 #--entry point for EXP(X), here X is finite, non-zero, and not NaN's
   7218 
   7219 #--Step 1.
   7220 	mov.l		(%a0),%d1		# load part of input X
   7221 	and.l		&0x7FFF0000,%d1		# biased expo. of X
   7222 	cmp.l		%d1,&0x3FBE0000		# 2^(-65)
   7223 	bge.b		EXPC1			# normal case
   7224 	bra		EXPSM
   7225 
   7226 EXPC1:
   7227 #--The case |X| >= 2^(-65)
   7228 	mov.w		4(%a0),%d1		# expo. and partial sig. of |X|
   7229 	cmp.l		%d1,&0x400CB167		# 16380 log2 trunc. 16 bits
   7230 	blt.b		EXPMAIN			# normal case
   7231 	bra		EEXPBIG
   7232 
   7233 EXPMAIN:
   7234 #--Step 2.
   7235 #--This is the normal branch:	2^(-65) <= |X| < 16380 log2.
   7236 	fmov.x		(%a0),%fp0		# load input from (a0)
   7237 
   7238 	fmov.x		%fp0,%fp1
   7239 	fmul.s		&0x42B8AA3B,%fp0	# 64/log2 * X
   7240 	fmovm.x		&0xc,-(%sp)		# save fp2 {%fp2/%fp3}
   7241 	mov.l		&0,ADJFLAG(%a6)
   7242 	fmov.l		%fp0,%d1		# N = int( X * 64/log2 )
   7243 	lea		EEXPTBL(%pc),%a1
   7244 	fmov.l		%d1,%fp0		# convert to floating-format
   7245 
   7246 	mov.l		%d1,L_SCR1(%a6)		# save N temporarily
   7247 	and.l		&0x3F,%d1		# D0 is J = N mod 64
   7248 	lsl.l		&4,%d1
   7249 	add.l		%d1,%a1			# address of 2^(J/64)
   7250 	mov.l		L_SCR1(%a6),%d1
   7251 	asr.l		&6,%d1			# D0 is M
   7252 	add.w		&0x3FFF,%d1		# biased expo. of 2^(M)
   7253 	mov.w		L2(%pc),L_SCR1(%a6)	# prefetch L2, no need in CB
   7254 
   7255 EXPCONT1:
   7256 #--Step 3.
   7257 #--fp1,fp2 saved on the stack. fp0 is N, fp1 is X,
   7258 #--a0 points to 2^(J/64), D0 is biased expo. of 2^(M)
   7259 	fmov.x		%fp0,%fp2
   7260 	fmul.s		&0xBC317218,%fp0	# N * L1, L1 = lead(-log2/64)
   7261 	fmul.x		L2(%pc),%fp2		# N * L2, L1+L2 = -log2/64
   7262 	fadd.x		%fp1,%fp0		# X + N*L1
   7263 	fadd.x		%fp2,%fp0		# fp0 is R, reduced arg.
   7264 
   7265 #--Step 4.
   7266 #--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL
   7267 #-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*A5))))
   7268 #--TO FULLY UTILIZE THE PIPELINE, WE COMPUTE S = R*R
   7269 #--[R+R*S*(A2+S*A4)] + [S*(A1+S*(A3+S*A5))]
   7270 
   7271 	fmov.x		%fp0,%fp1
   7272 	fmul.x		%fp1,%fp1		# fp1 IS S = R*R
   7273 
   7274 	fmov.s		&0x3AB60B70,%fp2	# fp2 IS A5
   7275 
   7276 	fmul.x		%fp1,%fp2		# fp2 IS S*A5
   7277 	fmov.x		%fp1,%fp3
   7278 	fmul.s		&0x3C088895,%fp3	# fp3 IS S*A4
   7279 
   7280 	fadd.d		EEXPA3(%pc),%fp2	# fp2 IS A3+S*A5
   7281 	fadd.d		EEXPA2(%pc),%fp3	# fp3 IS A2+S*A4
   7282 
   7283 	fmul.x		%fp1,%fp2		# fp2 IS S*(A3+S*A5)
   7284 	mov.w		%d1,SCALE(%a6)		# SCALE is 2^(M) in extended
   7285 	mov.l		&0x80000000,SCALE+4(%a6)
   7286 	clr.l		SCALE+8(%a6)
   7287 
   7288 	fmul.x		%fp1,%fp3		# fp3 IS S*(A2+S*A4)
   7289 
   7290 	fadd.s		&0x3F000000,%fp2	# fp2 IS A1+S*(A3+S*A5)
   7291 	fmul.x		%fp0,%fp3		# fp3 IS R*S*(A2+S*A4)
   7292 
   7293 	fmul.x		%fp1,%fp2		# fp2 IS S*(A1+S*(A3+S*A5))
   7294 	fadd.x		%fp3,%fp0		# fp0 IS R+R*S*(A2+S*A4),
   7295 
   7296 	fmov.x		(%a1)+,%fp1		# fp1 is lead. pt. of 2^(J/64)
   7297 	fadd.x		%fp2,%fp0		# fp0 is EXP(R) - 1
   7298 
   7299 #--Step 5
   7300 #--final reconstruction process
   7301 #--EXP(X) = 2^M * ( 2^(J/64) + 2^(J/64)*(EXP(R)-1) )
   7302 
   7303 	fmul.x		%fp1,%fp0		# 2^(J/64)*(Exp(R)-1)
   7304 	fmovm.x		(%sp)+,&0x30		# fp2 restored {%fp2/%fp3}
   7305 	fadd.s		(%a1),%fp0		# accurate 2^(J/64)
   7306 
   7307 	fadd.x		%fp1,%fp0		# 2^(J/64) + 2^(J/64)*...
   7308 	mov.l		ADJFLAG(%a6),%d1
   7309 
   7310 #--Step 6
   7311 	tst.l		%d1
   7312 	beq.b		NORMAL
   7313 ADJUST:
   7314 	fmul.x		ADJSCALE(%a6),%fp0
   7315 NORMAL:
   7316 	fmov.l		%d0,%fpcr		# restore user FPCR
   7317 	mov.b		&FMUL_OP,%d1		# last inst is MUL
   7318 	fmul.x		SCALE(%a6),%fp0		# multiply 2^(M)
   7319 	bra		t_catch
   7320 
   7321 EXPSM:
   7322 #--Step 7
   7323 	fmovm.x		(%a0),&0x80		# load X
   7324 	fmov.l		%d0,%fpcr
   7325 	fadd.s		&0x3F800000,%fp0	# 1+X in user mode
   7326 	bra		t_pinx2
   7327 
   7328 EEXPBIG:
   7329 #--Step 8
   7330 	cmp.l		%d1,&0x400CB27C		# 16480 log2
   7331 	bgt.b		EXP2BIG
   7332 #--Steps 8.2 -- 8.6
   7333 	fmov.x		(%a0),%fp0		# load input from (a0)
   7334 
   7335 	fmov.x		%fp0,%fp1
   7336 	fmul.s		&0x42B8AA3B,%fp0	# 64/log2 * X
   7337 	fmovm.x		&0xc,-(%sp)		# save fp2 {%fp2/%fp3}
   7338 	mov.l		&1,ADJFLAG(%a6)
   7339 	fmov.l		%fp0,%d1		# N = int( X * 64/log2 )
   7340 	lea		EEXPTBL(%pc),%a1
   7341 	fmov.l		%d1,%fp0		# convert to floating-format
   7342 	mov.l		%d1,L_SCR1(%a6)		# save N temporarily
   7343 	and.l		&0x3F,%d1		# D0 is J = N mod 64
   7344 	lsl.l		&4,%d1
   7345 	add.l		%d1,%a1			# address of 2^(J/64)
   7346 	mov.l		L_SCR1(%a6),%d1
   7347 	asr.l		&6,%d1			# D0 is K
   7348 	mov.l		%d1,L_SCR1(%a6)		# save K temporarily
   7349 	asr.l		&1,%d1			# D0 is M1
   7350 	sub.l		%d1,L_SCR1(%a6)		# a1 is M
   7351 	add.w		&0x3FFF,%d1		# biased expo. of 2^(M1)
   7352 	mov.w		%d1,ADJSCALE(%a6)	# ADJSCALE := 2^(M1)
   7353 	mov.l		&0x80000000,ADJSCALE+4(%a6)
   7354 	clr.l		ADJSCALE+8(%a6)
   7355 	mov.l		L_SCR1(%a6),%d1		# D0 is M
   7356 	add.w		&0x3FFF,%d1		# biased expo. of 2^(M)
   7357 	bra.w		EXPCONT1		# go back to Step 3
   7358 
   7359 EXP2BIG:
   7360 #--Step 9
   7361 	tst.b		(%a0)			# is X positive or negative?
   7362 	bmi		t_unfl2
   7363 	bra		t_ovfl2
   7364 
   7365 	global		setoxd
   7366 setoxd:
   7367 #--entry point for EXP(X), X is denormalized
   7368 	mov.l		(%a0),-(%sp)
   7369 	andi.l		&0x80000000,(%sp)
   7370 	ori.l		&0x00800000,(%sp)	# sign(X)*2^(-126)
   7371 
   7372 	fmov.s		&0x3F800000,%fp0
   7373 
   7374 	fmov.l		%d0,%fpcr
   7375 	fadd.s		(%sp)+,%fp0
   7376 	bra		t_pinx2
   7377 
   7378 	global		setoxm1
   7379 setoxm1:
   7380 #--entry point for EXPM1(X), here X is finite, non-zero, non-NaN
   7381 
   7382 #--Step 1.
   7383 #--Step 1.1
   7384 	mov.l		(%a0),%d1		# load part of input X
   7385 	and.l		&0x7FFF0000,%d1		# biased expo. of X
   7386 	cmp.l		%d1,&0x3FFD0000		# 1/4
   7387 	bge.b		EM1CON1			# |X| >= 1/4
   7388 	bra		EM1SM
   7389 
   7390 EM1CON1:
   7391 #--Step 1.3
   7392 #--The case |X| >= 1/4
   7393 	mov.w		4(%a0),%d1		# expo. and partial sig. of |X|
   7394 	cmp.l		%d1,&0x4004C215		# 70log2 rounded up to 16 bits
   7395 	ble.b		EM1MAIN			# 1/4 <= |X| <= 70log2
   7396 	bra		EM1BIG
   7397 
   7398 EM1MAIN:
   7399 #--Step 2.
   7400 #--This is the case:	1/4 <= |X| <= 70 log2.
   7401 	fmov.x		(%a0),%fp0		# load input from (a0)
   7402 
   7403 	fmov.x		%fp0,%fp1
   7404 	fmul.s		&0x42B8AA3B,%fp0	# 64/log2 * X
   7405 	fmovm.x		&0xc,-(%sp)		# save fp2 {%fp2/%fp3}
   7406 	fmov.l		%fp0,%d1		# N = int( X * 64/log2 )
   7407 	lea		EEXPTBL(%pc),%a1
   7408 	fmov.l		%d1,%fp0		# convert to floating-format
   7409 
   7410 	mov.l		%d1,L_SCR1(%a6)		# save N temporarily
   7411 	and.l		&0x3F,%d1		# D0 is J = N mod 64
   7412 	lsl.l		&4,%d1
   7413 	add.l		%d1,%a1			# address of 2^(J/64)
   7414 	mov.l		L_SCR1(%a6),%d1
   7415 	asr.l		&6,%d1			# D0 is M
   7416 	mov.l		%d1,L_SCR1(%a6)		# save a copy of M
   7417 
   7418 #--Step 3.
   7419 #--fp1,fp2 saved on the stack. fp0 is N, fp1 is X,
   7420 #--a0 points to 2^(J/64), D0 and a1 both contain M
   7421 	fmov.x		%fp0,%fp2
   7422 	fmul.s		&0xBC317218,%fp0	# N * L1, L1 = lead(-log2/64)
   7423 	fmul.x		L2(%pc),%fp2		# N * L2, L1+L2 = -log2/64
   7424 	fadd.x		%fp1,%fp0		# X + N*L1
   7425 	fadd.x		%fp2,%fp0		# fp0 is R, reduced arg.
   7426 	add.w		&0x3FFF,%d1		# D0 is biased expo. of 2^M
   7427 
   7428 #--Step 4.
   7429 #--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL
   7430 #-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*(A5 + R*A6)))))
   7431 #--TO FULLY UTILIZE THE PIPELINE, WE COMPUTE S = R*R
   7432 #--[R*S*(A2+S*(A4+S*A6))] + [R+S*(A1+S*(A3+S*A5))]
   7433 
   7434 	fmov.x		%fp0,%fp1
   7435 	fmul.x		%fp1,%fp1		# fp1 IS S = R*R
   7436 
   7437 	fmov.s		&0x3950097B,%fp2	# fp2 IS a6
   7438 
   7439 	fmul.x		%fp1,%fp2		# fp2 IS S*A6
   7440 	fmov.x		%fp1,%fp3
   7441 	fmul.s		&0x3AB60B6A,%fp3	# fp3 IS S*A5
   7442 
   7443 	fadd.d		EM1A4(%pc),%fp2		# fp2 IS A4+S*A6
   7444 	fadd.d		EM1A3(%pc),%fp3		# fp3 IS A3+S*A5
   7445 	mov.w		%d1,SC(%a6)		# SC is 2^(M) in extended
   7446 	mov.l		&0x80000000,SC+4(%a6)
   7447 	clr.l		SC+8(%a6)
   7448 
   7449 	fmul.x		%fp1,%fp2		# fp2 IS S*(A4+S*A6)
   7450 	mov.l		L_SCR1(%a6),%d1		# D0 is	M
   7451 	neg.w		%d1			# D0 is -M
   7452 	fmul.x		%fp1,%fp3		# fp3 IS S*(A3+S*A5)
   7453 	add.w		&0x3FFF,%d1		# biased expo. of 2^(-M)
   7454 	fadd.d		EM1A2(%pc),%fp2		# fp2 IS A2+S*(A4+S*A6)
   7455 	fadd.s		&0x3F000000,%fp3	# fp3 IS A1+S*(A3+S*A5)
   7456 
   7457 	fmul.x		%fp1,%fp2		# fp2 IS S*(A2+S*(A4+S*A6))
   7458 	or.w		&0x8000,%d1		# signed/expo. of -2^(-M)
   7459 	mov.w		%d1,ONEBYSC(%a6)	# OnebySc is -2^(-M)
   7460 	mov.l		&0x80000000,ONEBYSC+4(%a6)
   7461 	clr.l		ONEBYSC+8(%a6)
   7462 	fmul.x		%fp3,%fp1		# fp1 IS S*(A1+S*(A3+S*A5))
   7463 
   7464 	fmul.x		%fp0,%fp2		# fp2 IS R*S*(A2+S*(A4+S*A6))
   7465 	fadd.x		%fp1,%fp0		# fp0 IS R+S*(A1+S*(A3+S*A5))
   7466 
   7467 	fadd.x		%fp2,%fp0		# fp0 IS EXP(R)-1
   7468 
   7469 	fmovm.x		(%sp)+,&0x30		# fp2 restored {%fp2/%fp3}
   7470 
   7471 #--Step 5
   7472 #--Compute 2^(J/64)*p
   7473 
   7474 	fmul.x		(%a1),%fp0		# 2^(J/64)*(Exp(R)-1)
   7475 
   7476 #--Step 6
   7477 #--Step 6.1
   7478 	mov.l		L_SCR1(%a6),%d1		# retrieve M
   7479 	cmp.l		%d1,&63
   7480 	ble.b		MLE63
   7481 #--Step 6.2	M >= 64
   7482 	fmov.s		12(%a1),%fp1		# fp1 is t
   7483 	fadd.x		ONEBYSC(%a6),%fp1	# fp1 is t+OnebySc
   7484 	fadd.x		%fp1,%fp0		# p+(t+OnebySc), fp1 released
   7485 	fadd.x		(%a1),%fp0		# T+(p+(t+OnebySc))
   7486 	bra		EM1SCALE
   7487 MLE63:
   7488 #--Step 6.3	M <= 63
   7489 	cmp.l		%d1,&-3
   7490 	bge.b		MGEN3
   7491 MLTN3:
   7492 #--Step 6.4	M <= -4
   7493 	fadd.s		12(%a1),%fp0		# p+t
   7494 	fadd.x		(%a1),%fp0		# T+(p+t)
   7495 	fadd.x		ONEBYSC(%a6),%fp0	# OnebySc + (T+(p+t))
   7496 	bra		EM1SCALE
   7497 MGEN3:
   7498 #--Step 6.5	-3 <= M <= 63
   7499 	fmov.x		(%a1)+,%fp1		# fp1 is T
   7500 	fadd.s		(%a1),%fp0		# fp0 is p+t
   7501 	fadd.x		ONEBYSC(%a6),%fp1	# fp1 is T+OnebySc
   7502 	fadd.x		%fp1,%fp0		# (T+OnebySc)+(p+t)
   7503 
   7504 EM1SCALE:
   7505 #--Step 6.6
   7506 	fmov.l		%d0,%fpcr
   7507 	fmul.x		SC(%a6),%fp0
   7508 	bra		t_inx2
   7509 
   7510 EM1SM:
   7511 #--Step 7	|X| < 1/4.
   7512 	cmp.l		%d1,&0x3FBE0000		# 2^(-65)
   7513 	bge.b		EM1POLY
   7514 
   7515 EM1TINY:
   7516 #--Step 8	|X| < 2^(-65)
   7517 	cmp.l		%d1,&0x00330000		# 2^(-16312)
   7518 	blt.b		EM12TINY
   7519 #--Step 8.2
   7520 	mov.l		&0x80010000,SC(%a6)	# SC is -2^(-16382)
   7521 	mov.l		&0x80000000,SC+4(%a6)
   7522 	clr.l		SC+8(%a6)
   7523 	fmov.x		(%a0),%fp0
   7524 	fmov.l		%d0,%fpcr
   7525 	mov.b		&FADD_OP,%d1		# last inst is ADD
   7526 	fadd.x		SC(%a6),%fp0
   7527 	bra		t_catch
   7528 
   7529 EM12TINY:
   7530 #--Step 8.3
   7531 	fmov.x		(%a0),%fp0
   7532 	fmul.d		TWO140(%pc),%fp0
   7533 	mov.l		&0x80010000,SC(%a6)
   7534 	mov.l		&0x80000000,SC+4(%a6)
   7535 	clr.l		SC+8(%a6)
   7536 	fadd.x		SC(%a6),%fp0
   7537 	fmov.l		%d0,%fpcr
   7538 	mov.b		&FMUL_OP,%d1		# last inst is MUL
   7539 	fmul.d		TWON140(%pc),%fp0
   7540 	bra		t_catch
   7541 
   7542 EM1POLY:
   7543 #--Step 9	exp(X)-1 by a simple polynomial
   7544 	fmov.x		(%a0),%fp0		# fp0 is X
   7545 	fmul.x		%fp0,%fp0		# fp0 is S := X*X
   7546 	fmovm.x		&0xc,-(%sp)		# save fp2 {%fp2/%fp3}
   7547 	fmov.s		&0x2F30CAA8,%fp1	# fp1 is B12
   7548 	fmul.x		%fp0,%fp1		# fp1 is S*B12
   7549 	fmov.s		&0x310F8290,%fp2	# fp2 is B11
   7550 	fadd.s		&0x32D73220,%fp1	# fp1 is B10+S*B12
   7551 
   7552 	fmul.x		%fp0,%fp2		# fp2 is S*B11
   7553 	fmul.x		%fp0,%fp1		# fp1 is S*(B10 + ...
   7554 
   7555 	fadd.s		&0x3493F281,%fp2	# fp2 is B9+S*...
   7556 	fadd.d		EM1B8(%pc),%fp1		# fp1 is B8+S*...
   7557 
   7558 	fmul.x		%fp0,%fp2		# fp2 is S*(B9+...
   7559 	fmul.x		%fp0,%fp1		# fp1 is S*(B8+...
   7560 
   7561 	fadd.d		EM1B7(%pc),%fp2		# fp2 is B7+S*...
   7562 	fadd.d		EM1B6(%pc),%fp1		# fp1 is B6+S*...
   7563 
   7564 	fmul.x		%fp0,%fp2		# fp2 is S*(B7+...
   7565 	fmul.x		%fp0,%fp1		# fp1 is S*(B6+...
   7566 
   7567 	fadd.d		EM1B5(%pc),%fp2		# fp2 is B5+S*...
   7568 	fadd.d		EM1B4(%pc),%fp1		# fp1 is B4+S*...
   7569 
   7570 	fmul.x		%fp0,%fp2		# fp2 is S*(B5+...
   7571 	fmul.x		%fp0,%fp1		# fp1 is S*(B4+...
   7572 
   7573 	fadd.d		EM1B3(%pc),%fp2		# fp2 is B3+S*...
   7574 	fadd.x		EM1B2(%pc),%fp1		# fp1 is B2+S*...
   7575 
   7576 	fmul.x		%fp0,%fp2		# fp2 is S*(B3+...
   7577 	fmul.x		%fp0,%fp1		# fp1 is S*(B2+...
   7578 
   7579 	fmul.x		%fp0,%fp2		# fp2 is S*S*(B3+...)
   7580 	fmul.x		(%a0),%fp1		# fp1 is X*S*(B2...
   7581 
   7582 	fmul.s		&0x3F000000,%fp0	# fp0 is S*B1
   7583 	fadd.x		%fp2,%fp1		# fp1 is Q
   7584 
   7585 	fmovm.x		(%sp)+,&0x30		# fp2 restored {%fp2/%fp3}
   7586 
   7587 	fadd.x		%fp1,%fp0		# fp0 is S*B1+Q
   7588 
   7589 	fmov.l		%d0,%fpcr
   7590 	fadd.x		(%a0),%fp0
   7591 	bra		t_inx2
   7592 
   7593 EM1BIG:
   7594 #--Step 10	|X| > 70 log2
   7595 	mov.l		(%a0),%d1
   7596 	cmp.l		%d1,&0
   7597 	bgt.w		EXPC1
   7598 #--Step 10.2
   7599 	fmov.s		&0xBF800000,%fp0	# fp0 is -1
   7600 	fmov.l		%d0,%fpcr
   7601 	fadd.s		&0x00800000,%fp0	# -1 + 2^(-126)
   7602 	bra		t_minx2
   7603 
   7604 	global		setoxm1d
   7605 setoxm1d:
   7606 #--entry point for EXPM1(X), here X is denormalized
   7607 #--Step 0.
   7608 	bra		t_extdnrm
   7609 
   7610 #########################################################################
   7611 # sgetexp():  returns the exponent portion of the input argument.	#
   7612 #	      The exponent bias is removed and the exponent value is	#
   7613 #	      returned as an extended precision number in fp0.		#
   7614 # sgetexpd(): handles denormalized numbers. 				#
   7615 #									#
   7616 # sgetman():  extracts the mantissa of the input argument. The 		#
   7617 #	      mantissa is converted to an extended precision number w/ 	#
   7618 #	      an exponent of $3fff and is returned in fp0. The range of #
   7619 #	      the result is [1.0 - 2.0).				#
   7620 # sgetmand(): handles denormalized numbers.				#
   7621 #									#
   7622 # INPUT *************************************************************** #
   7623 #	a0  = pointer to extended precision input			#
   7624 #									#
   7625 # OUTPUT ************************************************************** #
   7626 #	fp0 = exponent(X) or mantissa(X)				#
   7627 #									#
   7628 #########################################################################
   7629 
   7630 	global		sgetexp
   7631 sgetexp:
   7632 	mov.w		SRC_EX(%a0),%d0		# get the exponent
   7633 	bclr		&0xf,%d0		# clear the sign bit
   7634 	subi.w		&0x3fff,%d0		# subtract off the bias
   7635 	fmov.w		%d0,%fp0		# return exp in fp0
   7636 	blt.b		sgetexpn		# it's negative
   7637 	rts
   7638 
   7639 sgetexpn:
   7640 	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
   7641 	rts
   7642 
   7643 	global		sgetexpd
   7644 sgetexpd:
   7645 	bsr.l		norm			# normalize
   7646 	neg.w		%d0			# new exp = -(shft amt)
   7647 	subi.w		&0x3fff,%d0		# subtract off the bias
   7648 	fmov.w		%d0,%fp0		# return exp in fp0
   7649 	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
   7650 	rts
   7651 
   7652 	global		sgetman
   7653 sgetman:
   7654 	mov.w		SRC_EX(%a0),%d0		# get the exp
   7655 	ori.w		&0x7fff,%d0		# clear old exp
   7656 	bclr		&0xe,%d0		# make it the new exp +-3fff
   7657 
   7658 # here, we build the result in a tmp location so as not to disturb the input
   7659 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6) # copy to tmp loc
   7660 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6) # copy to tmp loc
   7661 	mov.w		%d0,FP_SCR0_EX(%a6)	# insert new exponent
   7662 	fmov.x		FP_SCR0(%a6),%fp0	# put new value back in fp0
   7663 	bmi.b		sgetmann		# it's negative
   7664 	rts
   7665 
   7666 sgetmann:
   7667 	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
   7668 	rts
   7669 
   7670 #
   7671 # For denormalized numbers, shift the mantissa until the j-bit = 1,
   7672 # then load the exponent with +/1 $3fff.
   7673 #
   7674 	global		sgetmand
   7675 sgetmand:
   7676 	bsr.l		norm			# normalize exponent
   7677 	bra.b		sgetman
   7678 
   7679 #########################################################################
   7680 # scosh():  computes the hyperbolic cosine of a normalized input	#
   7681 # scoshd(): computes the hyperbolic cosine of a denormalized input	#
   7682 #									#
   7683 # INPUT ***************************************************************	#
   7684 #	a0 = pointer to extended precision input			#
   7685 #	d0 = round precision,mode					#
   7686 #									#
   7687 # OUTPUT **************************************************************	#
   7688 #	fp0 = cosh(X)							#
   7689 #									#
   7690 # ACCURACY and MONOTONICITY *******************************************	#
   7691 #	The returned result is within 3 ulps in 64 significant bit, 	#
   7692 #	i.e. within 0.5001 ulp to 53 bits if the result is subsequently	#
   7693 #	rounded to double precision. The result is provably monotonic 	#
   7694 #	in double precision.						#
   7695 #									#
   7696 # ALGORITHM ***********************************************************	#
   7697 #									#
   7698 #	COSH								#
   7699 #	1. If |X| > 16380 log2, go to 3.				#
   7700 #									#
   7701 #	2. (|X| <= 16380 log2) Cosh(X) is obtained by the formulae	#
   7702 #		y = |X|, z = exp(Y), and				#
   7703 #		cosh(X) = (1/2)*( z + 1/z ).				#
   7704 #		Exit.							#
   7705 #									#
   7706 #	3. (|X| > 16380 log2). If |X| > 16480 log2, go to 5.		#
   7707 #									#
   7708 #	4. (16380 log2 < |X| <= 16480 log2)				#
   7709 #		cosh(X) = sign(X) * exp(|X|)/2.				#
   7710 #		However, invoking exp(|X|) may cause premature 		#
   7711 #		overflow. Thus, we calculate sinh(X) as follows:	#
   7712 #		Y	:= |X|						#
   7713 #		Fact	:=	2**(16380)				#
   7714 #		Y'	:= Y - 16381 log2				#
   7715 #		cosh(X) := Fact * exp(Y').				#
   7716 #		Exit.							#
   7717 #									#
   7718 #	5. (|X| > 16480 log2) sinh(X) must overflow. Return		#
   7719 #		Huge*Huge to generate overflow and an infinity with	#
   7720 #		the appropriate sign. Huge is the largest finite number	#
   7721 #		in extended format. Exit.				#
   7722 #									#
   7723 #########################################################################
   7724 
   7725 TWO16380:
   7726 	long		0x7FFB0000,0x80000000,0x00000000,0x00000000
   7727 
   7728 	global		scosh
   7729 scosh:
   7730 	fmov.x		(%a0),%fp0		# LOAD INPUT
   7731 
   7732 	mov.l		(%a0),%d1
   7733 	mov.w		4(%a0),%d1
   7734 	and.l		&0x7FFFFFFF,%d1
   7735 	cmp.l		%d1,&0x400CB167
   7736 	bgt.b		COSHBIG
   7737 
   7738 #--THIS IS THE USUAL CASE, |X| < 16380 LOG2
   7739 #--COSH(X) = (1/2) * ( EXP(X) + 1/EXP(X) )
   7740 
   7741 	fabs.x		%fp0			# |X|
   7742 
   7743 	mov.l		%d0,-(%sp)
   7744 	clr.l		%d0
   7745 	fmovm.x		&0x01,-(%sp)		# save |X| to stack
   7746 	lea		(%sp),%a0		# pass ptr to |X|
   7747 	bsr		setox			# FP0 IS EXP(|X|)
   7748 	add.l		&0xc,%sp		# erase |X| from stack
   7749 	fmul.s		&0x3F000000,%fp0	# (1/2)EXP(|X|)
   7750 	mov.l		(%sp)+,%d0
   7751 
   7752 	fmov.s		&0x3E800000,%fp1	# (1/4)
   7753 	fdiv.x		%fp0,%fp1		# 1/(2 EXP(|X|))
   7754 
   7755 	fmov.l		%d0,%fpcr
   7756 	mov.b		&FADD_OP,%d1		# last inst is ADD
   7757 	fadd.x		%fp1,%fp0
   7758 	bra		t_catch
   7759 
   7760 COSHBIG:
   7761 	cmp.l		%d1,&0x400CB2B3
   7762 	bgt.b		COSHHUGE
   7763 
   7764 	fabs.x		%fp0
   7765 	fsub.d		T1(%pc),%fp0		# (|X|-16381LOG2_LEAD)
   7766 	fsub.d		T2(%pc),%fp0		# |X| - 16381 LOG2, ACCURATE
   7767 
   7768 	mov.l		%d0,-(%sp)
   7769 	clr.l		%d0
   7770 	fmovm.x		&0x01,-(%sp)		# save fp0 to stack
   7771 	lea		(%sp),%a0		# pass ptr to fp0
   7772 	bsr		setox
   7773 	add.l		&0xc,%sp		# clear fp0 from stack
   7774 	mov.l		(%sp)+,%d0
   7775 
   7776 	fmov.l		%d0,%fpcr
   7777 	mov.b		&FMUL_OP,%d1		# last inst is MUL
   7778 	fmul.x		TWO16380(%pc),%fp0
   7779 	bra		t_catch
   7780 
   7781 COSHHUGE:
   7782 	bra		t_ovfl2
   7783 
   7784 	global		scoshd
   7785 #--COSH(X) = 1 FOR DENORMALIZED X
   7786 scoshd:
   7787 	fmov.s		&0x3F800000,%fp0
   7788 
   7789 	fmov.l		%d0,%fpcr
   7790 	fadd.s		&0x00800000,%fp0
   7791 	bra		t_pinx2
   7792 
   7793 #########################################################################
   7794 # ssinh():  computes the hyperbolic sine of a normalized input		#
   7795 # ssinhd(): computes the hyperbolic sine of a denormalized input	#
   7796 #									#
   7797 # INPUT *************************************************************** #
   7798 #	a0 = pointer to extended precision input			#
   7799 #	d0 = round precision,mode					#
   7800 #									#
   7801 # OUTPUT ************************************************************** #
   7802 #	fp0 = sinh(X)							#
   7803 #									#
   7804 # ACCURACY and MONOTONICITY *******************************************	#
   7805 #	The returned result is within 3 ulps in 64 significant bit, 	#
   7806 #	i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
   7807 #	rounded to double precision. The result is provably monotonic	#
   7808 #	in double precision.						#
   7809 #									#
   7810 # ALGORITHM *********************************************************** #
   7811 #									#
   7812 #       SINH								#
   7813 #       1. If |X| > 16380 log2, go to 3.				#
   7814 #									#
   7815 #       2. (|X| <= 16380 log2) Sinh(X) is obtained by the formula	#
   7816 #               y = |X|, sgn = sign(X), and z = expm1(Y),		#
   7817 #               sinh(X) = sgn*(1/2)*( z + z/(1+z) ).			#
   7818 #          Exit.							#
   7819 #									#
   7820 #       3. If |X| > 16480 log2, go to 5.				#
   7821 #									#
   7822 #       4. (16380 log2 < |X| <= 16480 log2)				#
   7823 #               sinh(X) = sign(X) * exp(|X|)/2.				#
   7824 #          However, invoking exp(|X|) may cause premature overflow.	#
   7825 #          Thus, we calculate sinh(X) as follows:			#
   7826 #             Y       := |X|						#
   7827 #             sgn     := sign(X)					#
   7828 #             sgnFact := sgn * 2**(16380)				#
   7829 #             Y'      := Y - 16381 log2					#
   7830 #             sinh(X) := sgnFact * exp(Y').				#
   7831 #          Exit.							#
   7832 #									#
   7833 #       5. (|X| > 16480 log2) sinh(X) must overflow. Return		#
   7834 #          sign(X)*Huge*Huge to generate overflow and an infinity with	#
   7835 #          the appropriate sign. Huge is the largest finite number in	#
   7836 #          extended format. Exit.					#
   7837 #									#
   7838 #########################################################################
   7839 
   7840 	global		ssinh
   7841 ssinh:
   7842 	fmov.x		(%a0),%fp0		# LOAD INPUT
   7843 
   7844 	mov.l		(%a0),%d1
   7845 	mov.w		4(%a0),%d1
   7846 	mov.l		%d1,%a1			# save (compacted) operand
   7847 	and.l		&0x7FFFFFFF,%d1
   7848 	cmp.l		%d1,&0x400CB167
   7849 	bgt.b		SINHBIG
   7850 
   7851 #--THIS IS THE USUAL CASE, |X| < 16380 LOG2
   7852 #--Y = |X|, Z = EXPM1(Y), SINH(X) = SIGN(X)*(1/2)*( Z + Z/(1+Z) )
   7853 
   7854 	fabs.x		%fp0			# Y = |X|
   7855 
   7856 	movm.l		&0x8040,-(%sp)		# {a1/d0}
   7857 	fmovm.x		&0x01,-(%sp)		# save Y on stack
   7858 	lea		(%sp),%a0		# pass ptr to Y
   7859 	clr.l		%d0
   7860 	bsr		setoxm1			# FP0 IS Z = EXPM1(Y)
   7861 	add.l		&0xc,%sp		# clear Y from stack
   7862 	fmov.l		&0,%fpcr
   7863 	movm.l		(%sp)+,&0x0201		# {a1/d0}
   7864 
   7865 	fmov.x		%fp0,%fp1
   7866 	fadd.s		&0x3F800000,%fp1	# 1+Z
   7867 	fmov.x		%fp0,-(%sp)
   7868 	fdiv.x		%fp1,%fp0		# Z/(1+Z)
   7869 	mov.l		%a1,%d1
   7870 	and.l		&0x80000000,%d1
   7871 	or.l		&0x3F000000,%d1
   7872 	fadd.x		(%sp)+,%fp0
   7873 	mov.l		%d1,-(%sp)
   7874 
   7875 	fmov.l		%d0,%fpcr
   7876 	mov.b		&FMUL_OP,%d1		# last inst is MUL
   7877 	fmul.s		(%sp)+,%fp0		# last fp inst - possible exceptions set
   7878 	bra		t_catch
   7879 
   7880 SINHBIG:
   7881 	cmp.l		%d1,&0x400CB2B3
   7882 	bgt		t_ovfl
   7883 	fabs.x		%fp0
   7884 	fsub.d		T1(%pc),%fp0		# (|X|-16381LOG2_LEAD)
   7885 	mov.l		&0,-(%sp)
   7886 	mov.l		&0x80000000,-(%sp)
   7887 	mov.l		%a1,%d1
   7888 	and.l		&0x80000000,%d1
   7889 	or.l		&0x7FFB0000,%d1
   7890 	mov.l		%d1,-(%sp)		# EXTENDED FMT
   7891 	fsub.d		T2(%pc),%fp0		# |X| - 16381 LOG2, ACCURATE
   7892 
   7893 	mov.l		%d0,-(%sp)
   7894 	clr.l		%d0
   7895 	fmovm.x		&0x01,-(%sp)		# save fp0 on stack
   7896 	lea		(%sp),%a0		# pass ptr to fp0
   7897 	bsr		setox
   7898 	add.l		&0xc,%sp		# clear fp0 from stack
   7899 
   7900 	mov.l		(%sp)+,%d0
   7901 	fmov.l		%d0,%fpcr
   7902 	mov.b		&FMUL_OP,%d1		# last inst is MUL
   7903 	fmul.x		(%sp)+,%fp0		# possible exception
   7904 	bra		t_catch
   7905 
   7906 	global		ssinhd
   7907 #--SINH(X) = X FOR DENORMALIZED X
   7908 ssinhd:
   7909 	bra		t_extdnrm
   7910 
   7911 #########################################################################
   7912 # stanh():  computes the hyperbolic tangent of a normalized input	#
   7913 # stanhd(): computes the hyperbolic tangent of a denormalized input	#
   7914 #									#
   7915 # INPUT ***************************************************************	#
   7916 #	a0 = pointer to extended precision input			#
   7917 #	d0 = round precision,mode					#
   7918 #									#
   7919 # OUTPUT **************************************************************	#
   7920 #	fp0 = tanh(X)							#
   7921 #									#
   7922 # ACCURACY and MONOTONICITY *******************************************	#
   7923 #	The returned result is within 3 ulps in 64 significant bit, 	#
   7924 #	i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
   7925 #	rounded to double precision. The result is provably monotonic	#
   7926 #	in double precision.						#
   7927 #									#
   7928 # ALGORITHM ***********************************************************	#
   7929 #									#
   7930 #	TANH								#
   7931 #	1. If |X| >= (5/2) log2 or |X| <= 2**(-40), go to 3.		#
   7932 #									#
   7933 #	2. (2**(-40) < |X| < (5/2) log2) Calculate tanh(X) by		#
   7934 #		sgn := sign(X), y := 2|X|, z := expm1(Y), and		#
   7935 #		tanh(X) = sgn*( z/(2+z) ).				#
   7936 #		Exit.							#
   7937 #									#
   7938 #	3. (|X| <= 2**(-40) or |X| >= (5/2) log2). If |X| < 1,		#
   7939 #		go to 7.						#
   7940 #									#
   7941 #	4. (|X| >= (5/2) log2) If |X| >= 50 log2, go to 6.		#
   7942 #									#
   7943 #	5. ((5/2) log2 <= |X| < 50 log2) Calculate tanh(X) by		#
   7944 #		sgn := sign(X), y := 2|X|, z := exp(Y),			#
   7945 #		tanh(X) = sgn - [ sgn*2/(1+z) ].			#
   7946 #		Exit.							#
   7947 #									#
   7948 #	6. (|X| >= 50 log2) Tanh(X) = +-1 (round to nearest). Thus, we	#
   7949 #		calculate Tanh(X) by					#
   7950 #		sgn := sign(X), Tiny := 2**(-126),			#
   7951 #		tanh(X) := sgn - sgn*Tiny.				#
   7952 #		Exit.							#
   7953 #									#
   7954 #	7. (|X| < 2**(-40)). Tanh(X) = X.	Exit.			#
   7955 #									#
   7956 #########################################################################
   7957 
   7958 	set		X,FP_SCR0
   7959 	set		XFRAC,X+4
   7960 
   7961 	set		SGN,L_SCR3
   7962 
   7963 	set		V,FP_SCR0
   7964 
   7965 	global		stanh
   7966 stanh:
   7967 	fmov.x		(%a0),%fp0		# LOAD INPUT
   7968 
   7969 	fmov.x		%fp0,X(%a6)
   7970 	mov.l		(%a0),%d1
   7971 	mov.w		4(%a0),%d1
   7972 	mov.l		%d1,X(%a6)
   7973 	and.l		&0x7FFFFFFF,%d1
   7974 	cmp.l		%d1, &0x3fd78000	# is |X| < 2^(-40)?
   7975 	blt.w		TANHBORS		# yes
   7976 	cmp.l		%d1, &0x3fffddce	# is |X| > (5/2)LOG2?
   7977 	bgt.w		TANHBORS		# yes
   7978 
   7979 #--THIS IS THE USUAL CASE
   7980 #--Y = 2|X|, Z = EXPM1(Y), TANH(X) = SIGN(X) * Z / (Z+2).
   7981 
   7982 	mov.l		X(%a6),%d1
   7983 	mov.l		%d1,SGN(%a6)
   7984 	and.l		&0x7FFF0000,%d1
   7985 	add.l		&0x00010000,%d1		# EXPONENT OF 2|X|
   7986 	mov.l		%d1,X(%a6)
   7987 	and.l		&0x80000000,SGN(%a6)
   7988 	fmov.x		X(%a6),%fp0		# FP0 IS Y = 2|X|
   7989 
   7990 	mov.l		%d0,-(%sp)
   7991 	clr.l		%d0
   7992 	fmovm.x		&0x1,-(%sp)		# save Y on stack
   7993 	lea		(%sp),%a0		# pass ptr to Y
   7994 	bsr		setoxm1			# FP0 IS Z = EXPM1(Y)
   7995 	add.l		&0xc,%sp		# clear Y from stack
   7996 	mov.l		(%sp)+,%d0
   7997 
   7998 	fmov.x		%fp0,%fp1
   7999 	fadd.s		&0x40000000,%fp1	# Z+2
   8000 	mov.l		SGN(%a6),%d1
   8001 	fmov.x		%fp1,V(%a6)
   8002 	eor.l		%d1,V(%a6)
   8003 
   8004 	fmov.l		%d0,%fpcr		# restore users round prec,mode
   8005 	fdiv.x		V(%a6),%fp0
   8006 	bra		t_inx2
   8007 
   8008 TANHBORS:
   8009 	cmp.l		%d1,&0x3FFF8000
   8010 	blt.w		TANHSM
   8011 
   8012 	cmp.l		%d1,&0x40048AA1
   8013 	bgt.w		TANHHUGE
   8014 
   8015 #-- (5/2) LOG2 < |X| < 50 LOG2,
   8016 #--TANH(X) = 1 - (2/[EXP(2X)+1]). LET Y = 2|X|, SGN = SIGN(X),
   8017 #--TANH(X) = SGN -	SGN*2/[EXP(Y)+1].
   8018 
   8019 	mov.l		X(%a6),%d1
   8020 	mov.l		%d1,SGN(%a6)
   8021 	and.l		&0x7FFF0000,%d1
   8022 	add.l		&0x00010000,%d1		# EXPO OF 2|X|
   8023 	mov.l		%d1,X(%a6)		# Y = 2|X|
   8024 	and.l		&0x80000000,SGN(%a6)
   8025 	mov.l		SGN(%a6),%d1
   8026 	fmov.x		X(%a6),%fp0		# Y = 2|X|
   8027 
   8028 	mov.l		%d0,-(%sp)
   8029 	clr.l		%d0
   8030 	fmovm.x		&0x01,-(%sp)		# save Y on stack
   8031 	lea		(%sp),%a0		# pass ptr to Y
   8032 	bsr		setox			# FP0 IS EXP(Y)
   8033 	add.l		&0xc,%sp		# clear Y from stack
   8034 	mov.l		(%sp)+,%d0
   8035 	mov.l		SGN(%a6),%d1
   8036 	fadd.s		&0x3F800000,%fp0	# EXP(Y)+1
   8037 
   8038 	eor.l		&0xC0000000,%d1		# -SIGN(X)*2
   8039 	fmov.s		%d1,%fp1		# -SIGN(X)*2 IN SGL FMT
   8040 	fdiv.x		%fp0,%fp1		# -SIGN(X)2 / [EXP(Y)+1 ]
   8041 
   8042 	mov.l		SGN(%a6),%d1
   8043 	or.l		&0x3F800000,%d1		# SGN
   8044 	fmov.s		%d1,%fp0		# SGN IN SGL FMT
   8045 
   8046 	fmov.l		%d0,%fpcr		# restore users round prec,mode
   8047 	mov.b		&FADD_OP,%d1		# last inst is ADD
   8048 	fadd.x		%fp1,%fp0
   8049 	bra		t_inx2
   8050 
   8051 TANHSM:
   8052 	fmov.l		%d0,%fpcr		# restore users round prec,mode
   8053 	mov.b		&FMOV_OP,%d1		# last inst is MOVE
   8054 	fmov.x		X(%a6),%fp0		# last inst - possible exception set
   8055 	bra		t_catch
   8056 
   8057 #---RETURN SGN(X) - SGN(X)EPS
   8058 TANHHUGE:
   8059 	mov.l		X(%a6),%d1
   8060 	and.l		&0x80000000,%d1
   8061 	or.l		&0x3F800000,%d1
   8062 	fmov.s		%d1,%fp0
   8063 	and.l		&0x80000000,%d1
   8064 	eor.l		&0x80800000,%d1		# -SIGN(X)*EPS
   8065 
   8066 	fmov.l		%d0,%fpcr		# restore users round prec,mode
   8067 	fadd.s		%d1,%fp0
   8068 	bra		t_inx2
   8069 
   8070 	global		stanhd
   8071 #--TANH(X) = X FOR DENORMALIZED X
   8072 stanhd:
   8073 	bra		t_extdnrm
   8074 
   8075 #########################################################################
   8076 # slogn():    computes the natural logarithm of a normalized input	#
   8077 # slognd():   computes the natural logarithm of a denormalized input	#
   8078 # slognp1():  computes the log(1+X) of a normalized input		#
   8079 # slognp1d(): computes the log(1+X) of a denormalized input		#
   8080 #									#
   8081 # INPUT ***************************************************************	#
   8082 #	a0 = pointer to extended precision input			#
   8083 #	d0 = round precision,mode					#
   8084 #									#
   8085 # OUTPUT **************************************************************	#
   8086 #	fp0 = log(X) or log(1+X)					#
   8087 #									#
   8088 # ACCURACY and MONOTONICITY *******************************************	#
   8089 #	The returned result is within 2 ulps in 64 significant bit, 	#
   8090 #	i.e. within 0.5001 ulp to 53 bits if the result is subsequently	#
   8091 #	rounded to double precision. The result is provably monotonic	#
   8092 #	in double precision.						#
   8093 #									#
   8094 # ALGORITHM ***********************************************************	#
   8095 #	LOGN:								#
   8096 #	Step 1. If |X-1| < 1/16, approximate log(X) by an odd 		#
   8097 #		polynomial in u, where u = 2(X-1)/(X+1). Otherwise, 	#
   8098 #		move on to Step 2.					#
   8099 #									#
   8100 #	Step 2. X = 2**k * Y where 1 <= Y < 2. Define F to be the first	#
   8101 #		seven significant bits of Y plus 2**(-7), i.e. 		#
   8102 #		F = 1.xxxxxx1 in base 2 where the six "x" match those 	#
   8103 #		of Y. Note that |Y-F| <= 2**(-7).			#
   8104 #									#
   8105 #	Step 3. Define u = (Y-F)/F. Approximate log(1+u) by a 		#
   8106 #		polynomial in u, log(1+u) = poly.			#
   8107 #									#
   8108 #	Step 4. Reconstruct 						#
   8109 #		log(X) = log( 2**k * Y ) = k*log(2) + log(F) + log(1+u)	#
   8110 #		by k*log(2) + (log(F) + poly). The values of log(F) are	#
   8111 #		calculated beforehand and stored in the program.	#
   8112 #									#
   8113 #	lognp1:								#
   8114 #	Step 1: If |X| < 1/16, approximate log(1+X) by an odd 		#
   8115 #		polynomial in u where u = 2X/(2+X). Otherwise, move on	#
   8116 #		to Step 2.						#
   8117 #									#
   8118 #	Step 2: Let 1+X = 2**k * Y, where 1 <= Y < 2. Define F as done	#
   8119 #		in Step 2 of the algorithm for LOGN and compute 	#
   8120 #		log(1+X) as k*log(2) + log(F) + poly where poly 	#
   8121 #		approximates log(1+u), u = (Y-F)/F. 			#
   8122 #									#
   8123 #	Implementation Notes:						#
   8124 #	Note 1. There are 64 different possible values for F, thus 64 	#
   8125 #		log(F)'s need to be tabulated. Moreover, the values of	#
   8126 #		1/F are also tabulated so that the division in (Y-F)/F	#
   8127 #		can be performed by a multiplication.			#
   8128 #									#
   8129 #	Note 2. In Step 2 of lognp1, in order to preserved accuracy, 	#
   8130 #		the value Y-F has to be calculated carefully when 	#
   8131 #		1/2 <= X < 3/2. 					#
   8132 #									#
   8133 #	Note 3. To fully exploit the pipeline, polynomials are usually 	#
   8134 #		separated into two parts evaluated independently before	#
   8135 #		being added up.						#
   8136 #									#
   8137 #########################################################################
   8138 LOGOF2:
   8139 	long		0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
   8140 
   8141 one:
   8142 	long		0x3F800000
   8143 zero:
   8144 	long		0x00000000
   8145 infty:
   8146 	long		0x7F800000
   8147 negone:
   8148 	long		0xBF800000
   8149 
   8150 LOGA6:
   8151 	long		0x3FC2499A,0xB5E4040B
   8152 LOGA5:
   8153 	long		0xBFC555B5,0x848CB7DB
   8154 
   8155 LOGA4:
   8156 	long		0x3FC99999,0x987D8730
   8157 LOGA3:
   8158 	long		0xBFCFFFFF,0xFF6F7E97
   8159 
   8160 LOGA2:
   8161 	long		0x3FD55555,0x555555A4
   8162 LOGA1:
   8163 	long		0xBFE00000,0x00000008
   8164 
   8165 LOGB5:
   8166 	long		0x3F175496,0xADD7DAD6
   8167 LOGB4:
   8168 	long		0x3F3C71C2,0xFE80C7E0
   8169 
   8170 LOGB3:
   8171 	long		0x3F624924,0x928BCCFF
   8172 LOGB2:
   8173 	long		0x3F899999,0x999995EC
   8174 
   8175 LOGB1:
   8176 	long		0x3FB55555,0x55555555
   8177 TWO:
   8178 	long		0x40000000,0x00000000
   8179 
   8180 LTHOLD:
   8181 	long		0x3f990000,0x80000000,0x00000000,0x00000000
   8182 
   8183 LOGTBL:
   8184 	long		0x3FFE0000,0xFE03F80F,0xE03F80FE,0x00000000
   8185 	long		0x3FF70000,0xFF015358,0x833C47E2,0x00000000
   8186 	long		0x3FFE0000,0xFA232CF2,0x52138AC0,0x00000000
   8187 	long		0x3FF90000,0xBDC8D83E,0xAD88D549,0x00000000
   8188 	long		0x3FFE0000,0xF6603D98,0x0F6603DA,0x00000000
   8189 	long		0x3FFA0000,0x9CF43DCF,0xF5EAFD48,0x00000000
   8190 	long		0x3FFE0000,0xF2B9D648,0x0F2B9D65,0x00000000
   8191 	long		0x3FFA0000,0xDA16EB88,0xCB8DF614,0x00000000
   8192 	long		0x3FFE0000,0xEF2EB71F,0xC4345238,0x00000000
   8193 	long		0x3FFB0000,0x8B29B775,0x1BD70743,0x00000000
   8194 	long		0x3FFE0000,0xEBBDB2A5,0xC1619C8C,0x00000000
   8195 	long		0x3FFB0000,0xA8D839F8,0x30C1FB49,0x00000000
   8196 	long		0x3FFE0000,0xE865AC7B,0x7603A197,0x00000000
   8197 	long		0x3FFB0000,0xC61A2EB1,0x8CD907AD,0x00000000
   8198 	long		0x3FFE0000,0xE525982A,0xF70C880E,0x00000000
   8199 	long		0x3FFB0000,0xE2F2A47A,0xDE3A18AF,0x00000000
   8200 	long		0x3FFE0000,0xE1FC780E,0x1FC780E2,0x00000000
   8201 	long		0x3FFB0000,0xFF64898E,0xDF55D551,0x00000000
   8202 	long		0x3FFE0000,0xDEE95C4C,0xA037BA57,0x00000000
   8203 	long		0x3FFC0000,0x8DB956A9,0x7B3D0148,0x00000000
   8204 	long		0x3FFE0000,0xDBEB61EE,0xD19C5958,0x00000000
   8205 	long		0x3FFC0000,0x9B8FE100,0xF47BA1DE,0x00000000
   8206 	long		0x3FFE0000,0xD901B203,0x6406C80E,0x00000000
   8207 	long		0x3FFC0000,0xA9372F1D,0x0DA1BD17,0x00000000
   8208 	long		0x3FFE0000,0xD62B80D6,0x2B80D62C,0x00000000
   8209 	long		0x3FFC0000,0xB6B07F38,0xCE90E46B,0x00000000
   8210 	long		0x3FFE0000,0xD3680D36,0x80D3680D,0x00000000
   8211 	long		0x3FFC0000,0xC3FD0329,0x06488481,0x00000000
   8212 	long		0x3FFE0000,0xD0B69FCB,0xD2580D0B,0x00000000
   8213 	long		0x3FFC0000,0xD11DE0FF,0x15AB18CA,0x00000000
   8214 	long		0x3FFE0000,0xCE168A77,0x25080CE1,0x00000000
   8215 	long		0x3FFC0000,0xDE1433A1,0x6C66B150,0x00000000
   8216 	long		0x3FFE0000,0xCB8727C0,0x65C393E0,0x00000000
   8217 	long		0x3FFC0000,0xEAE10B5A,0x7DDC8ADD,0x00000000
   8218 	long		0x3FFE0000,0xC907DA4E,0x871146AD,0x00000000
   8219 	long		0x3FFC0000,0xF7856E5E,0xE2C9B291,0x00000000
   8220 	long		0x3FFE0000,0xC6980C69,0x80C6980C,0x00000000
   8221 	long		0x3FFD0000,0x82012CA5,0xA68206D7,0x00000000
   8222 	long		0x3FFE0000,0xC4372F85,0x5D824CA6,0x00000000
   8223 	long		0x3FFD0000,0x882C5FCD,0x7256A8C5,0x00000000
   8224 	long		0x3FFE0000,0xC1E4BBD5,0x95F6E947,0x00000000
   8225 	long		0x3FFD0000,0x8E44C60B,0x4CCFD7DE,0x00000000
   8226 	long		0x3FFE0000,0xBFA02FE8,0x0BFA02FF,0x00000000
   8227 	long		0x3FFD0000,0x944AD09E,0xF4351AF6,0x00000000
   8228 	long		0x3FFE0000,0xBD691047,0x07661AA3,0x00000000
   8229 	long		0x3FFD0000,0x9A3EECD4,0xC3EAA6B2,0x00000000
   8230 	long		0x3FFE0000,0xBB3EE721,0xA54D880C,0x00000000
   8231 	long		0x3FFD0000,0xA0218434,0x353F1DE8,0x00000000
   8232 	long		0x3FFE0000,0xB92143FA,0x36F5E02E,0x00000000
   8233 	long		0x3FFD0000,0xA5F2FCAB,0xBBC506DA,0x00000000
   8234 	long		0x3FFE0000,0xB70FBB5A,0x19BE3659,0x00000000
   8235 	long		0x3FFD0000,0xABB3B8BA,0x2AD362A5,0x00000000
   8236 	long		0x3FFE0000,0xB509E68A,0x9B94821F,0x00000000
   8237 	long		0x3FFD0000,0xB1641795,0xCE3CA97B,0x00000000
   8238 	long		0x3FFE0000,0xB30F6352,0x8917C80B,0x00000000
   8239 	long		0x3FFD0000,0xB7047551,0x5D0F1C61,0x00000000
   8240 	long		0x3FFE0000,0xB11FD3B8,0x0B11FD3C,0x00000000
   8241 	long		0x3FFD0000,0xBC952AFE,0xEA3D13E1,0x00000000
   8242 	long		0x3FFE0000,0xAF3ADDC6,0x80AF3ADE,0x00000000
   8243 	long		0x3FFD0000,0xC2168ED0,0xF458BA4A,0x00000000
   8244 	long		0x3FFE0000,0xAD602B58,0x0AD602B6,0x00000000
   8245 	long		0x3FFD0000,0xC788F439,0xB3163BF1,0x00000000
   8246 	long		0x3FFE0000,0xAB8F69E2,0x8359CD11,0x00000000
   8247 	long		0x3FFD0000,0xCCECAC08,0xBF04565D,0x00000000
   8248 	long		0x3FFE0000,0xA9C84A47,0xA07F5638,0x00000000
   8249 	long		0x3FFD0000,0xD2420487,0x2DD85160,0x00000000
   8250 	long		0x3FFE0000,0xA80A80A8,0x0A80A80B,0x00000000
   8251 	long		0x3FFD0000,0xD7894992,0x3BC3588A,0x00000000
   8252 	long		0x3FFE0000,0xA655C439,0x2D7B73A8,0x00000000
   8253 	long		0x3FFD0000,0xDCC2C4B4,0x9887DACC,0x00000000
   8254 	long		0x3FFE0000,0xA4A9CF1D,0x96833751,0x00000000
   8255 	long		0x3FFD0000,0xE1EEBD3E,0x6D6A6B9E,0x00000000
   8256 	long		0x3FFE0000,0xA3065E3F,0xAE7CD0E0,0x00000000
   8257 	long		0x3FFD0000,0xE70D785C,0x2F9F5BDC,0x00000000
   8258 	long		0x3FFE0000,0xA16B312E,0xA8FC377D,0x00000000
   8259 	long		0x3FFD0000,0xEC1F392C,0x5179F283,0x00000000
   8260 	long		0x3FFE0000,0x9FD809FD,0x809FD80A,0x00000000
   8261 	long		0x3FFD0000,0xF12440D3,0xE36130E6,0x00000000
   8262 	long		0x3FFE0000,0x9E4CAD23,0xDD5F3A20,0x00000000
   8263 	long		0x3FFD0000,0xF61CCE92,0x346600BB,0x00000000
   8264 	long		0x3FFE0000,0x9CC8E160,0xC3FB19B9,0x00000000
   8265 	long		0x3FFD0000,0xFB091FD3,0x8145630A,0x00000000
   8266 	long		0x3FFE0000,0x9B4C6F9E,0xF03A3CAA,0x00000000
   8267 	long		0x3FFD0000,0xFFE97042,0xBFA4C2AD,0x00000000
   8268 	long		0x3FFE0000,0x99D722DA,0xBDE58F06,0x00000000
   8269 	long		0x3FFE0000,0x825EFCED,0x49369330,0x00000000
   8270 	long		0x3FFE0000,0x9868C809,0x868C8098,0x00000000
   8271 	long		0x3FFE0000,0x84C37A7A,0xB9A905C9,0x00000000
   8272 	long		0x3FFE0000,0x97012E02,0x5C04B809,0x00000000
   8273 	long		0x3FFE0000,0x87224C2E,0x8E645FB7,0x00000000
   8274 	long		0x3FFE0000,0x95A02568,0x095A0257,0x00000000
   8275 	long		0x3FFE0000,0x897B8CAC,0x9F7DE298,0x00000000
   8276 	long		0x3FFE0000,0x94458094,0x45809446,0x00000000
   8277 	long		0x3FFE0000,0x8BCF55DE,0xC4CD05FE,0x00000000
   8278 	long		0x3FFE0000,0x92F11384,0x0497889C,0x00000000
   8279 	long		0x3FFE0000,0x8E1DC0FB,0x89E125E5,0x00000000
   8280 	long		0x3FFE0000,0x91A2B3C4,0xD5E6F809,0x00000000
   8281 	long		0x3FFE0000,0x9066E68C,0x955B6C9B,0x00000000
   8282 	long		0x3FFE0000,0x905A3863,0x3E06C43B,0x00000000
   8283 	long		0x3FFE0000,0x92AADE74,0xC7BE59E0,0x00000000
   8284 	long		0x3FFE0000,0x8F1779D9,0xFDC3A219,0x00000000
   8285 	long		0x3FFE0000,0x94E9BFF6,0x15845643,0x00000000
   8286 	long		0x3FFE0000,0x8DDA5202,0x37694809,0x00000000
   8287 	long		0x3FFE0000,0x9723A1B7,0x20134203,0x00000000
   8288 	long		0x3FFE0000,0x8CA29C04,0x6514E023,0x00000000
   8289 	long		0x3FFE0000,0x995899C8,0x90EB8990,0x00000000
   8290 	long		0x3FFE0000,0x8B70344A,0x139BC75A,0x00000000
   8291 	long		0x3FFE0000,0x9B88BDAA,0x3A3DAE2F,0x00000000
   8292 	long		0x3FFE0000,0x8A42F870,0x5669DB46,0x00000000
   8293 	long		0x3FFE0000,0x9DB4224F,0xFFE1157C,0x00000000
   8294 	long		0x3FFE0000,0x891AC73A,0xE9819B50,0x00000000
   8295 	long		0x3FFE0000,0x9FDADC26,0x8B7A12DA,0x00000000
   8296 	long		0x3FFE0000,0x87F78087,0xF78087F8,0x00000000
   8297 	long		0x3FFE0000,0xA1FCFF17,0xCE733BD4,0x00000000
   8298 	long		0x3FFE0000,0x86D90544,0x7A34ACC6,0x00000000
   8299 	long		0x3FFE0000,0xA41A9E8F,0x5446FB9F,0x00000000
   8300 	long		0x3FFE0000,0x85BF3761,0x2CEE3C9B,0x00000000
   8301 	long		0x3FFE0000,0xA633CD7E,0x6771CD8B,0x00000000
   8302 	long		0x3FFE0000,0x84A9F9C8,0x084A9F9D,0x00000000
   8303 	long		0x3FFE0000,0xA8489E60,0x0B435A5E,0x00000000
   8304 	long		0x3FFE0000,0x83993052,0x3FBE3368,0x00000000
   8305 	long		0x3FFE0000,0xAA59233C,0xCCA4BD49,0x00000000
   8306 	long		0x3FFE0000,0x828CBFBE,0xB9A020A3,0x00000000
   8307 	long		0x3FFE0000,0xAC656DAE,0x6BCC4985,0x00000000
   8308 	long		0x3FFE0000,0x81848DA8,0xFAF0D277,0x00000000
   8309 	long		0x3FFE0000,0xAE6D8EE3,0x60BB2468,0x00000000
   8310 	long		0x3FFE0000,0x80808080,0x80808081,0x00000000
   8311 	long		0x3FFE0000,0xB07197A2,0x3C46C654,0x00000000
   8312 
   8313 	set		ADJK,L_SCR1
   8314 
   8315 	set		X,FP_SCR0
   8316 	set		XDCARE,X+2
   8317 	set		XFRAC,X+4
   8318 
   8319 	set		F,FP_SCR1
   8320 	set		FFRAC,F+4
   8321 
   8322 	set		KLOG2,FP_SCR0
   8323 
   8324 	set		SAVEU,FP_SCR0
   8325 
   8326 	global		slogn
   8327 #--ENTRY POINT FOR LOG(X) FOR X FINITE, NON-ZERO, NOT NAN'S
   8328 slogn:
   8329 	fmov.x		(%a0),%fp0		# LOAD INPUT
   8330 	mov.l		&0x00000000,ADJK(%a6)
   8331 
   8332 LOGBGN:
   8333 #--FPCR SAVED AND CLEARED, INPUT IS 2^(ADJK)*FP0, FP0 CONTAINS
   8334 #--A FINITE, NON-ZERO, NORMALIZED NUMBER.
   8335 
   8336 	mov.l		(%a0),%d1
   8337 	mov.w		4(%a0),%d1
   8338 
   8339 	mov.l		(%a0),X(%a6)
   8340 	mov.l		4(%a0),X+4(%a6)
   8341 	mov.l		8(%a0),X+8(%a6)
   8342 
   8343 	cmp.l		%d1,&0			# CHECK IF X IS NEGATIVE
   8344 	blt.w		LOGNEG			# LOG OF NEGATIVE ARGUMENT IS INVALID
   8345 # X IS POSITIVE, CHECK IF X IS NEAR 1
   8346 	cmp.l		%d1,&0x3ffef07d 	# IS X < 15/16?
   8347 	blt.b		LOGMAIN			# YES
   8348 	cmp.l		%d1,&0x3fff8841 	# IS X > 17/16?
   8349 	ble.w		LOGNEAR1		# NO
   8350 
   8351 LOGMAIN:
   8352 #--THIS SHOULD BE THE USUAL CASE, X NOT VERY CLOSE TO 1
   8353 
   8354 #--X = 2^(K) * Y, 1 <= Y < 2. THUS, Y = 1.XXXXXXXX....XX IN BINARY.
   8355 #--WE DEFINE F = 1.XXXXXX1, I.E. FIRST 7 BITS OF Y AND ATTACH A 1.
   8356 #--THE IDEA IS THAT LOG(X) = K*LOG2 + LOG(Y)
   8357 #--			 = K*LOG2 + LOG(F) + LOG(1 + (Y-F)/F).
   8358 #--NOTE THAT U = (Y-F)/F IS VERY SMALL AND THUS APPROXIMATING
   8359 #--LOG(1+U) CAN BE VERY EFFICIENT.
   8360 #--ALSO NOTE THAT THE VALUE 1/F IS STORED IN A TABLE SO THAT NO
   8361 #--DIVISION IS NEEDED TO CALCULATE (Y-F)/F.
   8362 
   8363 #--GET K, Y, F, AND ADDRESS OF 1/F.
   8364 	asr.l		&8,%d1
   8365 	asr.l		&8,%d1			# SHIFTED 16 BITS, BIASED EXPO. OF X
   8366 	sub.l		&0x3FFF,%d1		# THIS IS K
   8367 	add.l		ADJK(%a6),%d1		# ADJUST K, ORIGINAL INPUT MAY BE  DENORM.
   8368 	lea		LOGTBL(%pc),%a0		# BASE ADDRESS OF 1/F AND LOG(F)
   8369 	fmov.l		%d1,%fp1		# CONVERT K TO FLOATING-POINT FORMAT
   8370 
   8371 #--WHILE THE CONVERSION IS GOING ON, WE GET F AND ADDRESS OF 1/F
   8372 	mov.l		&0x3FFF0000,X(%a6)	# X IS NOW Y, I.E. 2^(-K)*X
   8373 	mov.l		XFRAC(%a6),FFRAC(%a6)
   8374 	and.l		&0xFE000000,FFRAC(%a6)	# FIRST 7 BITS OF Y
   8375 	or.l		&0x01000000,FFRAC(%a6)	# GET F: ATTACH A 1 AT THE EIGHTH BIT
   8376 	mov.l		FFRAC(%a6),%d1	# READY TO GET ADDRESS OF 1/F
   8377 	and.l		&0x7E000000,%d1
   8378 	asr.l		&8,%d1
   8379 	asr.l		&8,%d1
   8380 	asr.l		&4,%d1			# SHIFTED 20, D0 IS THE DISPLACEMENT
   8381 	add.l		%d1,%a0			# A0 IS THE ADDRESS FOR 1/F
   8382 
   8383 	fmov.x		X(%a6),%fp0
   8384 	mov.l		&0x3fff0000,F(%a6)
   8385 	clr.l		F+8(%a6)
   8386 	fsub.x		F(%a6),%fp0		# Y-F
   8387 	fmovm.x		&0xc,-(%sp)		# SAVE FP2-3 WHILE FP0 IS NOT READY
   8388 #--SUMMARY: FP0 IS Y-F, A0 IS ADDRESS OF 1/F, FP1 IS K
   8389 #--REGISTERS SAVED: FPCR, FP1, FP2
   8390 
   8391 LP1CONT1:
   8392 #--AN RE-ENTRY POINT FOR LOGNP1
   8393 	fmul.x		(%a0),%fp0		# FP0 IS U = (Y-F)/F
   8394 	fmul.x		LOGOF2(%pc),%fp1	# GET K*LOG2 WHILE FP0 IS NOT READY
   8395 	fmov.x		%fp0,%fp2
   8396 	fmul.x		%fp2,%fp2		# FP2 IS V=U*U
   8397 	fmov.x		%fp1,KLOG2(%a6)		# PUT K*LOG2 IN MEMEORY, FREE FP1
   8398 
   8399 #--LOG(1+U) IS APPROXIMATED BY
   8400 #--U + V*(A1+U*(A2+U*(A3+U*(A4+U*(A5+U*A6))))) WHICH IS
   8401 #--[U + V*(A1+V*(A3+V*A5))]  +  [U*V*(A2+V*(A4+V*A6))]
   8402 
   8403 	fmov.x		%fp2,%fp3
   8404 	fmov.x		%fp2,%fp1
   8405 
   8406 	fmul.d		LOGA6(%pc),%fp1		# V*A6
   8407 	fmul.d		LOGA5(%pc),%fp2		# V*A5
   8408 
   8409 	fadd.d		LOGA4(%pc),%fp1		# A4+V*A6
   8410 	fadd.d		LOGA3(%pc),%fp2		# A3+V*A5
   8411 
   8412 	fmul.x		%fp3,%fp1		# V*(A4+V*A6)
   8413 	fmul.x		%fp3,%fp2		# V*(A3+V*A5)
   8414 
   8415 	fadd.d		LOGA2(%pc),%fp1		# A2+V*(A4+V*A6)
   8416 	fadd.d		LOGA1(%pc),%fp2		# A1+V*(A3+V*A5)
   8417 
   8418 	fmul.x		%fp3,%fp1		# V*(A2+V*(A4+V*A6))
   8419 	add.l		&16,%a0			# ADDRESS OF LOG(F)
   8420 	fmul.x		%fp3,%fp2		# V*(A1+V*(A3+V*A5))
   8421 
   8422 	fmul.x		%fp0,%fp1		# U*V*(A2+V*(A4+V*A6))
   8423 	fadd.x		%fp2,%fp0		# U+V*(A1+V*(A3+V*A5))
   8424 
   8425 	fadd.x		(%a0),%fp1		# LOG(F)+U*V*(A2+V*(A4+V*A6))
   8426 	fmovm.x		(%sp)+,&0x30		# RESTORE FP2-3
   8427 	fadd.x		%fp1,%fp0		# FP0 IS LOG(F) + LOG(1+U)
   8428 
   8429 	fmov.l		%d0,%fpcr
   8430 	fadd.x		KLOG2(%a6),%fp0		# FINAL ADD
   8431 	bra		t_inx2
   8432 
   8433 
   8434 LOGNEAR1:
   8435 
   8436 # if the input is exactly equal to one, then exit through ld_pzero.
   8437 # if these 2 lines weren't here, the correct answer would be returned
   8438 # but the INEX2 bit would be set.
   8439 	fcmp.b		%fp0,&0x1		# is it equal to one?
   8440 	fbeq.l		ld_pzero		# yes
   8441 
   8442 #--REGISTERS SAVED: FPCR, FP1. FP0 CONTAINS THE INPUT.
   8443 	fmov.x		%fp0,%fp1
   8444 	fsub.s		one(%pc),%fp1		# FP1 IS X-1
   8445 	fadd.s		one(%pc),%fp0		# FP0 IS X+1
   8446 	fadd.x		%fp1,%fp1		# FP1 IS 2(X-1)
   8447 #--LOG(X) = LOG(1+U/2)-LOG(1-U/2) WHICH IS AN ODD POLYNOMIAL
   8448 #--IN U, U = 2(X-1)/(X+1) = FP1/FP0
   8449 
   8450 LP1CONT2:
   8451 #--THIS IS AN RE-ENTRY POINT FOR LOGNP1
   8452 	fdiv.x		%fp0,%fp1		# FP1 IS U
   8453 	fmovm.x		&0xc,-(%sp)		# SAVE FP2-3
   8454 #--REGISTERS SAVED ARE NOW FPCR,FP1,FP2,FP3
   8455 #--LET V=U*U, W=V*V, CALCULATE
   8456 #--U + U*V*(B1 + V*(B2 + V*(B3 + V*(B4 + V*B5)))) BY
   8457 #--U + U*V*(  [B1 + W*(B3 + W*B5)]  +  [V*(B2 + W*B4)]  )
   8458 	fmov.x		%fp1,%fp0
   8459 	fmul.x		%fp0,%fp0		# FP0 IS V
   8460 	fmov.x		%fp1,SAVEU(%a6)		# STORE U IN MEMORY, FREE FP1
   8461 	fmov.x		%fp0,%fp1
   8462 	fmul.x		%fp1,%fp1		# FP1 IS W
   8463 
   8464 	fmov.d		LOGB5(%pc),%fp3
   8465 	fmov.d		LOGB4(%pc),%fp2
   8466 
   8467 	fmul.x		%fp1,%fp3		# W*B5
   8468 	fmul.x		%fp1,%fp2		# W*B4
   8469 
   8470 	fadd.d		LOGB3(%pc),%fp3		# B3+W*B5
   8471 	fadd.d		LOGB2(%pc),%fp2		# B2+W*B4
   8472 
   8473 	fmul.x		%fp3,%fp1		# W*(B3+W*B5), FP3 RELEASED
   8474 
   8475 	fmul.x		%fp0,%fp2		# V*(B2+W*B4)
   8476 
   8477 	fadd.d		LOGB1(%pc),%fp1		# B1+W*(B3+W*B5)
   8478 	fmul.x		SAVEU(%a6),%fp0		# FP0 IS U*V
   8479 
   8480 	fadd.x		%fp2,%fp1		# B1+W*(B3+W*B5) + V*(B2+W*B4), FP2 RELEASED
   8481 	fmovm.x		(%sp)+,&0x30		# FP2-3 RESTORED
   8482 
   8483 	fmul.x		%fp1,%fp0		# U*V*( [B1+W*(B3+W*B5)] + [V*(B2+W*B4)] )
   8484 
   8485 	fmov.l		%d0,%fpcr
   8486 	fadd.x		SAVEU(%a6),%fp0
   8487 	bra		t_inx2
   8488 
   8489 #--REGISTERS SAVED FPCR. LOG(-VE) IS INVALID
   8490 LOGNEG:
   8491 	bra		t_operr
   8492 
   8493 	global		slognd
   8494 slognd:
   8495 #--ENTRY POINT FOR LOG(X) FOR DENORMALIZED INPUT
   8496 
   8497 	mov.l		&-100,ADJK(%a6)		# INPUT = 2^(ADJK) * FP0
   8498 
   8499 #----normalize the input value by left shifting k bits (k to be determined
   8500 #----below), adjusting exponent and storing -k to  ADJK
   8501 #----the value TWOTO100 is no longer needed.
   8502 #----Note that this code assumes the denormalized input is NON-ZERO.
   8503 
   8504 	movm.l		&0x3f00,-(%sp)		# save some registers  {d2-d7}
   8505 	mov.l		(%a0),%d3		# D3 is exponent of smallest norm. #
   8506 	mov.l		4(%a0),%d4
   8507 	mov.l		8(%a0),%d5		# (D4,D5) is (Hi_X,Lo_X)
   8508 	clr.l		%d2			# D2 used for holding K
   8509 
   8510 	tst.l		%d4
   8511 	bne.b		Hi_not0
   8512 
   8513 Hi_0:
   8514 	mov.l		%d5,%d4
   8515 	clr.l		%d5
   8516 	mov.l		&32,%d2
   8517 	clr.l		%d6
   8518 	bfffo		%d4{&0:&32},%d6
   8519 	lsl.l		%d6,%d4
   8520 	add.l		%d6,%d2			# (D3,D4,D5) is normalized
   8521 
   8522 	mov.l		%d3,X(%a6)
   8523 	mov.l		%d4,XFRAC(%a6)
   8524 	mov.l		%d5,XFRAC+4(%a6)
   8525 	neg.l		%d2
   8526 	mov.l		%d2,ADJK(%a6)
   8527 	fmov.x		X(%a6),%fp0
   8528 	movm.l		(%sp)+,&0xfc		# restore registers {d2-d7}
   8529 	lea		X(%a6),%a0
   8530 	bra.w		LOGBGN			# begin regular log(X)
   8531 
   8532 Hi_not0:
   8533 	clr.l		%d6
   8534 	bfffo		%d4{&0:&32},%d6		# find first 1
   8535 	mov.l		%d6,%d2			# get k
   8536 	lsl.l		%d6,%d4
   8537 	mov.l		%d5,%d7			# a copy of D5
   8538 	lsl.l		%d6,%d5
   8539 	neg.l		%d6
   8540 	add.l		&32,%d6
   8541 	lsr.l		%d6,%d7
   8542 	or.l		%d7,%d4			# (D3,D4,D5) normalized
   8543 
   8544 	mov.l		%d3,X(%a6)
   8545 	mov.l		%d4,XFRAC(%a6)
   8546 	mov.l		%d5,XFRAC+4(%a6)
   8547 	neg.l		%d2
   8548 	mov.l		%d2,ADJK(%a6)
   8549 	fmov.x		X(%a6),%fp0
   8550 	movm.l		(%sp)+,&0xfc		# restore registers {d2-d7}
   8551 	lea		X(%a6),%a0
   8552 	bra.w		LOGBGN			# begin regular log(X)
   8553 
   8554 	global		slognp1
   8555 #--ENTRY POINT FOR LOG(1+X) FOR X FINITE, NON-ZERO, NOT NAN'S
   8556 slognp1:
   8557 	fmov.x		(%a0),%fp0		# LOAD INPUT
   8558 	fabs.x		%fp0			# test magnitude
   8559 	fcmp.x		%fp0,LTHOLD(%pc)	# compare with min threshold
   8560 	fbgt.w		LP1REAL			# if greater, continue
   8561 	fmov.l		%d0,%fpcr
   8562 	mov.b		&FMOV_OP,%d1		# last inst is MOVE
   8563 	fmov.x		(%a0),%fp0		# return signed argument
   8564 	bra		t_catch
   8565 
   8566 LP1REAL:
   8567 	fmov.x		(%a0),%fp0		# LOAD INPUT
   8568 	mov.l		&0x00000000,ADJK(%a6)
   8569 	fmov.x		%fp0,%fp1		# FP1 IS INPUT Z
   8570 	fadd.s		one(%pc),%fp0		# X := ROUND(1+Z)
   8571 	fmov.x		%fp0,X(%a6)
   8572 	mov.w		XFRAC(%a6),XDCARE(%a6)
   8573 	mov.l		X(%a6),%d1
   8574 	cmp.l		%d1,&0
   8575 	ble.w		LP1NEG0			# LOG OF ZERO OR -VE
   8576 	cmp.l		%d1,&0x3ffe8000 	# IS BOUNDS [1/2,3/2]?
   8577 	blt.w		LOGMAIN
   8578 	cmp.l		%d1,&0x3fffc000
   8579 	bgt.w		LOGMAIN
   8580 #--IF 1+Z > 3/2 OR 1+Z < 1/2, THEN X, WHICH IS ROUNDING 1+Z,
   8581 #--CONTAINS AT LEAST 63 BITS OF INFORMATION OF Z. IN THAT CASE,
   8582 #--SIMPLY INVOKE LOG(X) FOR LOG(1+Z).
   8583 
   8584 LP1NEAR1:
   8585 #--NEXT SEE IF EXP(-1/16) < X < EXP(1/16)
   8586 	cmp.l		%d1,&0x3ffef07d
   8587 	blt.w		LP1CARE
   8588 	cmp.l		%d1,&0x3fff8841
   8589 	bgt.w		LP1CARE
   8590 
   8591 LP1ONE16:
   8592 #--EXP(-1/16) < X < EXP(1/16). LOG(1+Z) = LOG(1+U/2) - LOG(1-U/2)
   8593 #--WHERE U = 2Z/(2+Z) = 2Z/(1+X).
   8594 	fadd.x		%fp1,%fp1		# FP1 IS 2Z
   8595 	fadd.s		one(%pc),%fp0		# FP0 IS 1+X
   8596 #--U = FP1/FP0
   8597 	bra.w		LP1CONT2
   8598 
   8599 LP1CARE:
   8600 #--HERE WE USE THE USUAL TABLE DRIVEN APPROACH. CARE HAS TO BE
   8601 #--TAKEN BECAUSE 1+Z CAN HAVE 67 BITS OF INFORMATION AND WE MUST
   8602 #--PRESERVE ALL THE INFORMATION. BECAUSE 1+Z IS IN [1/2,3/2],
   8603 #--THERE ARE ONLY TWO CASES.
   8604 #--CASE 1: 1+Z < 1, THEN K = -1 AND Y-F = (2-F) + 2Z
   8605 #--CASE 2: 1+Z > 1, THEN K = 0  AND Y-F = (1-F) + Z
   8606 #--ON RETURNING TO LP1CONT1, WE MUST HAVE K IN FP1, ADDRESS OF
   8607 #--(1/F) IN A0, Y-F IN FP0, AND FP2 SAVED.
   8608 
   8609 	mov.l		XFRAC(%a6),FFRAC(%a6)
   8610 	and.l		&0xFE000000,FFRAC(%a6)
   8611 	or.l		&0x01000000,FFRAC(%a6)	# F OBTAINED
   8612 	cmp.l		%d1,&0x3FFF8000		# SEE IF 1+Z > 1
   8613 	bge.b		KISZERO
   8614 
   8615 KISNEG1:
   8616 	fmov.s		TWO(%pc),%fp0
   8617 	mov.l		&0x3fff0000,F(%a6)
   8618 	clr.l		F+8(%a6)
   8619 	fsub.x		F(%a6),%fp0		# 2-F
   8620 	mov.l		FFRAC(%a6),%d1
   8621 	and.l		&0x7E000000,%d1
   8622 	asr.l		&8,%d1
   8623 	asr.l		&8,%d1
   8624 	asr.l		&4,%d1			# D0 CONTAINS DISPLACEMENT FOR 1/F
   8625 	fadd.x		%fp1,%fp1		# GET 2Z
   8626 	fmovm.x		&0xc,-(%sp)		# SAVE FP2  {%fp2/%fp3}
   8627 	fadd.x		%fp1,%fp0		# FP0 IS Y-F = (2-F)+2Z
   8628 	lea		LOGTBL(%pc),%a0		# A0 IS ADDRESS OF 1/F
   8629 	add.l		%d1,%a0
   8630 	fmov.s		negone(%pc),%fp1	# FP1 IS K = -1
   8631 	bra.w		LP1CONT1
   8632 
   8633 KISZERO:
   8634 	fmov.s		one(%pc),%fp0
   8635 	mov.l		&0x3fff0000,F(%a6)
   8636 	clr.l		F+8(%a6)
   8637 	fsub.x		F(%a6),%fp0		# 1-F
   8638 	mov.l		FFRAC(%a6),%d1
   8639 	and.l		&0x7E000000,%d1
   8640 	asr.l		&8,%d1
   8641 	asr.l		&8,%d1
   8642 	asr.l		&4,%d1
   8643 	fadd.x		%fp1,%fp0		# FP0 IS Y-F
   8644 	fmovm.x		&0xc,-(%sp)		# FP2 SAVED {%fp2/%fp3}
   8645 	lea		LOGTBL(%pc),%a0
   8646 	add.l		%d1,%a0			# A0 IS ADDRESS OF 1/F
   8647 	fmov.s		zero(%pc),%fp1		# FP1 IS K = 0
   8648 	bra.w		LP1CONT1
   8649 
   8650 LP1NEG0:
   8651 #--FPCR SAVED. D0 IS X IN COMPACT FORM.
   8652 	cmp.l		%d1,&0
   8653 	blt.b		LP1NEG
   8654 LP1ZERO:
   8655 	fmov.s		negone(%pc),%fp0
   8656 
   8657 	fmov.l		%d0,%fpcr
   8658 	bra		t_dz
   8659 
   8660 LP1NEG:
   8661 	fmov.s		zero(%pc),%fp0
   8662 
   8663 	fmov.l		%d0,%fpcr
   8664 	bra		t_operr
   8665 
   8666 	global		slognp1d
   8667 #--ENTRY POINT FOR LOG(1+Z) FOR DENORMALIZED INPUT
   8668 # Simply return the denorm
   8669 slognp1d:
   8670 	bra		t_extdnrm
   8671 
   8672 #########################################################################
   8673 # satanh():  computes the inverse hyperbolic tangent of a norm input	#
   8674 # satanhd(): computes the inverse hyperbolic tangent of a denorm input	#
   8675 #									#
   8676 # INPUT ***************************************************************	#
   8677 #	a0 = pointer to extended precision input			#
   8678 #	d0 = round precision,mode					#
   8679 #									#
   8680 # OUTPUT **************************************************************	#
   8681 #	fp0 = arctanh(X)						#
   8682 #									#
   8683 # ACCURACY and MONOTONICITY *******************************************	#
   8684 #	The returned result is within 3 ulps in	64 significant bit,	#
   8685 #	i.e. within 0.5001 ulp to 53 bits if the result is subsequently	#
   8686 #	rounded to double precision. The result is provably monotonic	#
   8687 #	in double precision.						#
   8688 #									#
   8689 # ALGORITHM ***********************************************************	#
   8690 #									#
   8691 #	ATANH								#
   8692 #	1. If |X| >= 1, go to 3.					#
   8693 #									#
   8694 #	2. (|X| < 1) Calculate atanh(X) by				#
   8695 #		sgn := sign(X)						#
   8696 #		y := |X|						#
   8697 #		z := 2y/(1-y)						#
   8698 #		atanh(X) := sgn * (1/2) * logp1(z)			#
   8699 #		Exit.							#
   8700 #									#
   8701 #	3. If |X| > 1, go to 5.						#
   8702 #									#
   8703 #	4. (|X| = 1) Generate infinity with an appropriate sign and	#
   8704 #		divide-by-zero by					#
   8705 #		sgn := sign(X)						#
   8706 #		atan(X) := sgn / (+0).					#
   8707 #		Exit.							#
   8708 #									#
   8709 #	5. (|X| > 1) Generate an invalid operation by 0 * infinity.	#
   8710 #		Exit.							#
   8711 #									#
   8712 #########################################################################
   8713 
   8714 	global		satanh
   8715 satanh:
   8716 	mov.l		(%a0),%d1
   8717 	mov.w		4(%a0),%d1
   8718 	and.l		&0x7FFFFFFF,%d1
   8719 	cmp.l		%d1,&0x3FFF8000
   8720 	bge.b		ATANHBIG
   8721 
   8722 #--THIS IS THE USUAL CASE, |X| < 1
   8723 #--Y = |X|, Z = 2Y/(1-Y), ATANH(X) = SIGN(X) * (1/2) * LOG1P(Z).
   8724 
   8725 	fabs.x		(%a0),%fp0		# Y = |X|
   8726 	fmov.x		%fp0,%fp1
   8727 	fneg.x		%fp1			# -Y
   8728 	fadd.x		%fp0,%fp0		# 2Y
   8729 	fadd.s		&0x3F800000,%fp1	# 1-Y
   8730 	fdiv.x		%fp1,%fp0		# 2Y/(1-Y)
   8731 	mov.l		(%a0),%d1
   8732 	and.l		&0x80000000,%d1
   8733 	or.l		&0x3F000000,%d1		# SIGN(X)*HALF
   8734 	mov.l		%d1,-(%sp)
   8735 
   8736 	mov.l		%d0,-(%sp)		# save rnd prec,mode
   8737 	clr.l		%d0			# pass ext prec,RN
   8738 	fmovm.x		&0x01,-(%sp)		# save Z on stack
   8739 	lea		(%sp),%a0		# pass ptr to Z
   8740 	bsr		slognp1			# LOG1P(Z)
   8741 	add.l		&0xc,%sp		# clear Z from stack
   8742 
   8743 	mov.l		(%sp)+,%d0		# fetch old prec,mode
   8744 	fmov.l		%d0,%fpcr		# load it
   8745 	mov.b		&FMUL_OP,%d1		# last inst is MUL
   8746 	fmul.s		(%sp)+,%fp0
   8747 	bra		t_catch
   8748 
   8749 ATANHBIG:
   8750 	fabs.x		(%a0),%fp0		# |X|
   8751 	fcmp.s		%fp0,&0x3F800000
   8752 	fbgt		t_operr
   8753 	bra		t_dz
   8754 
   8755 	global		satanhd
   8756 #--ATANH(X) = X FOR DENORMALIZED X
   8757 satanhd:
   8758 	bra		t_extdnrm
   8759 
   8760 #########################################################################
   8761 # slog10():  computes the base-10 logarithm of a normalized input	#
   8762 # slog10d(): computes the base-10 logarithm of a denormalized input	#
   8763 # slog2():   computes the base-2 logarithm of a normalized input	#
   8764 # slog2d():  computes the base-2 logarithm of a denormalized input	#
   8765 #									#
   8766 # INPUT *************************************************************** #
   8767 #	a0 = pointer to extended precision input			#
   8768 #	d0 = round precision,mode					#
   8769 #									#
   8770 # OUTPUT **************************************************************	#
   8771 #	fp0 = log_10(X) or log_2(X)					#
   8772 #									#
   8773 # ACCURACY and MONOTONICITY *******************************************	#
   8774 #	The returned result is within 1.7 ulps in 64 significant bit,	#
   8775 #	i.e. within 0.5003 ulp to 53 bits if the result is subsequently	#
   8776 #	rounded to double precision. The result is provably monotonic	#
   8777 #	in double precision.						#
   8778 #									#
   8779 # ALGORITHM ***********************************************************	#
   8780 #									#
   8781 #       slog10d:							#
   8782 #									#
   8783 #       Step 0.	If X < 0, create a NaN and raise the invalid operation	#
   8784 #               flag. Otherwise, save FPCR in D1; set FpCR to default.	#
   8785 #       Notes:  Default means round-to-nearest mode, no floating-point	#
   8786 #               traps, and precision control = double extended.		#
   8787 #									#
   8788 #       Step 1. Call slognd to obtain Y = log(X), the natural log of X.	#
   8789 #       Notes:  Even if X is denormalized, log(X) is always normalized.	#
   8790 #									#
   8791 #       Step 2.  Compute log_10(X) = log(X) * (1/log(10)).		#
   8792 #            2.1 Restore the user FPCR					#
   8793 #            2.2 Return ans := Y * INV_L10.				#
   8794 #									#
   8795 #       slog10: 							#
   8796 #									#
   8797 #       Step 0. If X < 0, create a NaN and raise the invalid operation	#
   8798 #               flag. Otherwise, save FPCR in D1; set FpCR to default.	#
   8799 #       Notes:  Default means round-to-nearest mode, no floating-point	#
   8800 #               traps, and precision control = double extended.		#
   8801 #									#
   8802 #       Step 1. Call sLogN to obtain Y = log(X), the natural log of X.	#
   8803 #									#
   8804 #       Step 2.   Compute log_10(X) = log(X) * (1/log(10)).		#
   8805 #            2.1  Restore the user FPCR					#
   8806 #            2.2  Return ans := Y * INV_L10.				#
   8807 #									#
   8808 #       sLog2d:								#
   8809 #									#
   8810 #       Step 0. If X < 0, create a NaN and raise the invalid operation	#
   8811 #               flag. Otherwise, save FPCR in D1; set FpCR to default.	#
   8812 #       Notes:  Default means round-to-nearest mode, no floating-point	#
   8813 #               traps, and precision control = double extended.		#
   8814 #									#
   8815 #       Step 1. Call slognd to obtain Y = log(X), the natural log of X.	#
   8816 #       Notes:  Even if X is denormalized, log(X) is always normalized.	#
   8817 #									#
   8818 #       Step 2.   Compute log_10(X) = log(X) * (1/log(2)).		#
   8819 #            2.1  Restore the user FPCR					#
   8820 #            2.2  Return ans := Y * INV_L2.				#
   8821 #									#
   8822 #       sLog2:								#
   8823 #									#
   8824 #       Step 0. If X < 0, create a NaN and raise the invalid operation	#
   8825 #               flag. Otherwise, save FPCR in D1; set FpCR to default.	#
   8826 #       Notes:  Default means round-to-nearest mode, no floating-point	#
   8827 #               traps, and precision control = double extended.		#
   8828 #									#
   8829 #       Step 1. If X is not an integer power of two, i.e., X != 2^k,	#
   8830 #               go to Step 3.						#
   8831 #									#
   8832 #       Step 2.   Return k.						#
   8833 #            2.1  Get integer k, X = 2^k.				#
   8834 #            2.2  Restore the user FPCR.				#
   8835 #            2.3  Return ans := convert-to-double-extended(k).		#
   8836 #									#
   8837 #       Step 3. Call sLogN to obtain Y = log(X), the natural log of X.	#
   8838 #									#
   8839 #       Step 4.   Compute log_2(X) = log(X) * (1/log(2)).		#
   8840 #            4.1  Restore the user FPCR					#
   8841 #            4.2  Return ans := Y * INV_L2.				#
   8842 #									#
   8843 #########################################################################
   8844 
   8845 INV_L10:
   8846 	long		0x3FFD0000,0xDE5BD8A9,0x37287195,0x00000000
   8847 
   8848 INV_L2:
   8849 	long		0x3FFF0000,0xB8AA3B29,0x5C17F0BC,0x00000000
   8850 
   8851 	global		slog10
   8852 #--entry point for Log10(X), X is normalized
   8853 slog10:
   8854 	fmov.b		&0x1,%fp0
   8855 	fcmp.x		%fp0,(%a0)		# if operand == 1,
   8856 	fbeq.l		ld_pzero		# return an EXACT zero
   8857 
   8858 	mov.l		(%a0),%d1
   8859 	blt.w		invalid
   8860 	mov.l		%d0,-(%sp)
   8861 	clr.l		%d0
   8862 	bsr		slogn			# log(X), X normal.
   8863 	fmov.l		(%sp)+,%fpcr
   8864 	fmul.x		INV_L10(%pc),%fp0
   8865 	bra		t_inx2
   8866 
   8867 	global		slog10d
   8868 #--entry point for Log10(X), X is denormalized
   8869 slog10d:
   8870 	mov.l		(%a0),%d1
   8871 	blt.w		invalid
   8872 	mov.l		%d0,-(%sp)
   8873 	clr.l		%d0
   8874 	bsr		slognd			# log(X), X denorm.
   8875 	fmov.l		(%sp)+,%fpcr
   8876 	fmul.x		INV_L10(%pc),%fp0
   8877 	bra		t_minx2
   8878 
   8879 	global		slog2
   8880 #--entry point for Log2(X), X is normalized
   8881 slog2:
   8882 	mov.l		(%a0),%d1
   8883 	blt.w		invalid
   8884 
   8885 	mov.l		8(%a0),%d1
   8886 	bne.b		continue		# X is not 2^k
   8887 
   8888 	mov.l		4(%a0),%d1
   8889 	and.l		&0x7FFFFFFF,%d1
   8890 	bne.b		continue
   8891 
   8892 #--X = 2^k.
   8893 	mov.w		(%a0),%d1
   8894 	and.l		&0x00007FFF,%d1
   8895 	sub.l		&0x3FFF,%d1
   8896 	beq.l		ld_pzero
   8897 	fmov.l		%d0,%fpcr
   8898 	fmov.l		%d1,%fp0
   8899 	bra		t_inx2
   8900 
   8901 continue:
   8902 	mov.l		%d0,-(%sp)
   8903 	clr.l		%d0
   8904 	bsr		slogn			# log(X), X normal.
   8905 	fmov.l		(%sp)+,%fpcr
   8906 	fmul.x		INV_L2(%pc),%fp0
   8907 	bra		t_inx2
   8908 
   8909 invalid:
   8910 	bra		t_operr
   8911 
   8912 	global		slog2d
   8913 #--entry point for Log2(X), X is denormalized
   8914 slog2d:
   8915 	mov.l		(%a0),%d1
   8916 	blt.w		invalid
   8917 	mov.l		%d0,-(%sp)
   8918 	clr.l		%d0
   8919 	bsr		slognd			# log(X), X denorm.
   8920 	fmov.l		(%sp)+,%fpcr
   8921 	fmul.x		INV_L2(%pc),%fp0
   8922 	bra		t_minx2
   8923 
   8924 #########################################################################
   8925 # stwotox():  computes 2**X for a normalized input			#
   8926 # stwotoxd(): computes 2**X for a denormalized input			#
   8927 # stentox():  computes 10**X for a normalized input			#
   8928 # stentoxd(): computes 10**X for a denormalized input			#
   8929 #									#
   8930 # INPUT ***************************************************************	#
   8931 #	a0 = pointer to extended precision input			#
   8932 #	d0 = round precision,mode					#
   8933 #									#
   8934 # OUTPUT **************************************************************	#
   8935 #	fp0 = 2**X or 10**X						#
   8936 #									#
   8937 # ACCURACY and MONOTONICITY *******************************************	#
   8938 #	The returned result is within 2 ulps in 64 significant bit, 	#
   8939 #	i.e. within 0.5001 ulp to 53 bits if the result is subsequently	#
   8940 #	rounded to double precision. The result is provably monotonic	#
   8941 #	in double precision.						#
   8942 #									#
   8943 # ALGORITHM ***********************************************************	#
   8944 #									#
   8945 #	twotox								#
   8946 #	1. If |X| > 16480, go to ExpBig.				#
   8947 #									#
   8948 #	2. If |X| < 2**(-70), go to ExpSm.				#
   8949 #									#
   8950 #	3. Decompose X as X = N/64 + r where |r| <= 1/128. Furthermore	#
   8951 #		decompose N as						#
   8952 #		 N = 64(M + M') + j,  j = 0,1,2,...,63.			#
   8953 #									#
   8954 #	4. Overwrite r := r * log2. Then				#
   8955 #		2**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r).		#
   8956 #		Go to expr to compute that expression.			#
   8957 #									#
   8958 #	tentox								#
   8959 #	1. If |X| > 16480*log_10(2) (base 10 log of 2), go to ExpBig.	#
   8960 #									#
   8961 #	2. If |X| < 2**(-70), go to ExpSm.				#
   8962 #									#
   8963 #	3. Set y := X*log_2(10)*64 (base 2 log of 10). Set		#
   8964 #		N := round-to-int(y). Decompose N as			#
   8965 #		 N = 64(M + M') + j,  j = 0,1,2,...,63.			#
   8966 #									#
   8967 #	4. Define r as							#
   8968 #		r := ((X - N*L1)-N*L2) * L10				#
   8969 #		where L1, L2 are the leading and trailing parts of 	#
   8970 #		log_10(2)/64 and L10 is the natural log of 10. Then	#
   8971 #		10**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r).		#
   8972 #		Go to expr to compute that expression.			#
   8973 #									#
   8974 #	expr								#
   8975 #	1. Fetch 2**(j/64) from table as Fact1 and Fact2.		#
   8976 #									#
   8977 #	2. Overwrite Fact1 and Fact2 by					#
   8978 #		Fact1 := 2**(M) * Fact1					#
   8979 #		Fact2 := 2**(M) * Fact2					#
   8980 #		Thus Fact1 + Fact2 = 2**(M) * 2**(j/64).		#
   8981 #									#
   8982 #	3. Calculate P where 1 + P approximates exp(r):			#
   8983 #		P = r + r*r*(A1+r*(A2+...+r*A5)).			#
   8984 #									#
   8985 #	4. Let AdjFact := 2**(M'). Return				#
   8986 #		AdjFact * ( Fact1 + ((Fact1*P) + Fact2) ).		#
   8987 #		Exit.							#
   8988 #									#
   8989 #	ExpBig								#
   8990 #	1. Generate overflow by Huge * Huge if X > 0; otherwise, 	#
   8991 #	        generate underflow by Tiny * Tiny.			#
   8992 #									#
   8993 #	ExpSm								#
   8994 #	1. Return 1 + X.						#
   8995 #									#
   8996 #########################################################################
   8997 
   8998 L2TEN64:
   8999 	long		0x406A934F,0x0979A371	# 64LOG10/LOG2
   9000 L10TWO1:
   9001 	long		0x3F734413,0x509F8000	# LOG2/64LOG10
   9002 
   9003 L10TWO2:
   9004 	long		0xBFCD0000,0xC0219DC1,0xDA994FD2,0x00000000
   9005 
   9006 LOG10:	long		0x40000000,0x935D8DDD,0xAAA8AC17,0x00000000
   9007 
   9008 LOG2:	long		0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
   9009 
   9010 EXPA5:	long		0x3F56C16D,0x6F7BD0B2
   9011 EXPA4:	long		0x3F811112,0x302C712C
   9012 EXPA3:	long		0x3FA55555,0x55554CC1
   9013 EXPA2:	long		0x3FC55555,0x55554A54
   9014 EXPA1:	long		0x3FE00000,0x00000000,0x00000000,0x00000000
   9015 
   9016 TEXPTBL:
   9017 	long		0x3FFF0000,0x80000000,0x00000000,0x3F738000
   9018 	long		0x3FFF0000,0x8164D1F3,0xBC030773,0x3FBEF7CA
   9019 	long		0x3FFF0000,0x82CD8698,0xAC2BA1D7,0x3FBDF8A9
   9020 	long		0x3FFF0000,0x843A28C3,0xACDE4046,0x3FBCD7C9
   9021 	long		0x3FFF0000,0x85AAC367,0xCC487B15,0xBFBDE8DA
   9022 	long		0x3FFF0000,0x871F6196,0x9E8D1010,0x3FBDE85C
   9023 	long		0x3FFF0000,0x88980E80,0x92DA8527,0x3FBEBBF1
   9024 	long		0x3FFF0000,0x8A14D575,0x496EFD9A,0x3FBB80CA
   9025 	long		0x3FFF0000,0x8B95C1E3,0xEA8BD6E7,0xBFBA8373
   9026 	long		0x3FFF0000,0x8D1ADF5B,0x7E5BA9E6,0xBFBE9670
   9027 	long		0x3FFF0000,0x8EA4398B,0x45CD53C0,0x3FBDB700
   9028 	long		0x3FFF0000,0x9031DC43,0x1466B1DC,0x3FBEEEB0
   9029 	long		0x3FFF0000,0x91C3D373,0xAB11C336,0x3FBBFD6D
   9030 	long		0x3FFF0000,0x935A2B2F,0x13E6E92C,0xBFBDB319
   9031 	long		0x3FFF0000,0x94F4EFA8,0xFEF70961,0x3FBDBA2B
   9032 	long		0x3FFF0000,0x96942D37,0x20185A00,0x3FBE91D5
   9033 	long		0x3FFF0000,0x9837F051,0x8DB8A96F,0x3FBE8D5A
   9034 	long		0x3FFF0000,0x99E04593,0x20B7FA65,0xBFBCDE7B
   9035 	long		0x3FFF0000,0x9B8D39B9,0xD54E5539,0xBFBEBAAF
   9036 	long		0x3FFF0000,0x9D3ED9A7,0x2CFFB751,0xBFBD86DA
   9037 	long		0x3FFF0000,0x9EF53260,0x91A111AE,0xBFBEBEDD
   9038 	long		0x3FFF0000,0xA0B0510F,0xB9714FC2,0x3FBCC96E
   9039 	long		0x3FFF0000,0xA2704303,0x0C496819,0xBFBEC90B
   9040 	long		0x3FFF0000,0xA43515AE,0x09E6809E,0x3FBBD1DB
   9041 	long		0x3FFF0000,0xA5FED6A9,0xB15138EA,0x3FBCE5EB
   9042 	long		0x3FFF0000,0xA7CD93B4,0xE965356A,0xBFBEC274
   9043 	long		0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x3FBEA83C
   9044 	long		0x3FFF0000,0xAB7A39B5,0xA93ED337,0x3FBECB00
   9045 	long		0x3FFF0000,0xAD583EEA,0x42A14AC6,0x3FBE9301
   9046 	long		0x3FFF0000,0xAF3B78AD,0x690A4375,0xBFBD8367
   9047 	long		0x3FFF0000,0xB123F581,0xD2AC2590,0xBFBEF05F
   9048 	long		0x3FFF0000,0xB311C412,0xA9112489,0x3FBDFB3C
   9049 	long		0x3FFF0000,0xB504F333,0xF9DE6484,0x3FBEB2FB
   9050 	long		0x3FFF0000,0xB6FD91E3,0x28D17791,0x3FBAE2CB
   9051 	long		0x3FFF0000,0xB8FBAF47,0x62FB9EE9,0x3FBCDC3C
   9052 	long		0x3FFF0000,0xBAFF5AB2,0x133E45FB,0x3FBEE9AA
   9053 	long		0x3FFF0000,0xBD08A39F,0x580C36BF,0xBFBEAEFD
   9054 	long		0x3FFF0000,0xBF1799B6,0x7A731083,0xBFBCBF51
   9055 	long		0x3FFF0000,0xC12C4CCA,0x66709456,0x3FBEF88A
   9056 	long		0x3FFF0000,0xC346CCDA,0x24976407,0x3FBD83B2
   9057 	long		0x3FFF0000,0xC5672A11,0x5506DADD,0x3FBDF8AB
   9058 	long		0x3FFF0000,0xC78D74C8,0xABB9B15D,0xBFBDFB17
   9059 	long		0x3FFF0000,0xC9B9BD86,0x6E2F27A3,0xBFBEFE3C
   9060 	long		0x3FFF0000,0xCBEC14FE,0xF2727C5D,0xBFBBB6F8
   9061 	long		0x3FFF0000,0xCE248C15,0x1F8480E4,0xBFBCEE53
   9062 	long		0x3FFF0000,0xD06333DA,0xEF2B2595,0xBFBDA4AE
   9063 	long		0x3FFF0000,0xD2A81D91,0xF12AE45A,0x3FBC9124
   9064 	long		0x3FFF0000,0xD4F35AAB,0xCFEDFA1F,0x3FBEB243
   9065 	long		0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x3FBDE69A
   9066 	long		0x3FFF0000,0xD99D15C2,0x78AFD7B6,0xBFB8BC61
   9067 	long		0x3FFF0000,0xDBFBB797,0xDAF23755,0x3FBDF610
   9068 	long		0x3FFF0000,0xDE60F482,0x5E0E9124,0xBFBD8BE1
   9069 	long		0x3FFF0000,0xE0CCDEEC,0x2A94E111,0x3FBACB12
   9070 	long		0x3FFF0000,0xE33F8972,0xBE8A5A51,0x3FBB9BFE
   9071 	long		0x3FFF0000,0xE5B906E7,0x7C8348A8,0x3FBCF2F4
   9072 	long		0x3FFF0000,0xE8396A50,0x3C4BDC68,0x3FBEF22F
   9073 	long		0x3FFF0000,0xEAC0C6E7,0xDD24392F,0xBFBDBF4A
   9074 	long		0x3FFF0000,0xED4F301E,0xD9942B84,0x3FBEC01A
   9075 	long		0x3FFF0000,0xEFE4B99B,0xDCDAF5CB,0x3FBE8CAC
   9076 	long		0x3FFF0000,0xF281773C,0x59FFB13A,0xBFBCBB3F
   9077 	long		0x3FFF0000,0xF5257D15,0x2486CC2C,0x3FBEF73A
   9078 	long		0x3FFF0000,0xF7D0DF73,0x0AD13BB9,0xBFB8B795
   9079 	long		0x3FFF0000,0xFA83B2DB,0x722A033A,0x3FBEF84B
   9080 	long		0x3FFF0000,0xFD3E0C0C,0xF486C175,0xBFBEF581
   9081 
   9082 	set		INT,L_SCR1
   9083 
   9084 	set		X,FP_SCR0
   9085 	set		XDCARE,X+2
   9086 	set		XFRAC,X+4
   9087 
   9088 	set		ADJFACT,FP_SCR0
   9089 
   9090 	set		FACT1,FP_SCR0
   9091 	set		FACT1HI,FACT1+4
   9092 	set		FACT1LOW,FACT1+8
   9093 
   9094 	set		FACT2,FP_SCR1
   9095 	set		FACT2HI,FACT2+4
   9096 	set		FACT2LOW,FACT2+8
   9097 
   9098 	global		stwotox
   9099 #--ENTRY POINT FOR 2**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
   9100 stwotox:
   9101 	fmovm.x		(%a0),&0x80		# LOAD INPUT
   9102 
   9103 	mov.l		(%a0),%d1
   9104 	mov.w		4(%a0),%d1
   9105 	fmov.x		%fp0,X(%a6)
   9106 	and.l		&0x7FFFFFFF,%d1
   9107 
   9108 	cmp.l		%d1,&0x3FB98000		# |X| >= 2**(-70)?
   9109 	bge.b		TWOOK1
   9110 	bra.w		EXPBORS
   9111 
   9112 TWOOK1:
   9113 	cmp.l		%d1,&0x400D80C0		# |X| > 16480?
   9114 	ble.b		TWOMAIN
   9115 	bra.w		EXPBORS
   9116 
   9117 TWOMAIN:
   9118 #--USUAL CASE, 2^(-70) <= |X| <= 16480
   9119 
   9120 	fmov.x		%fp0,%fp1
   9121 	fmul.s		&0x42800000,%fp1	# 64 * X
   9122 	fmov.l		%fp1,INT(%a6)		# N = ROUND-TO-INT(64 X)
   9123 	mov.l		%d2,-(%sp)
   9124 	lea		TEXPTBL(%pc),%a1	# LOAD ADDRESS OF TABLE OF 2^(J/64)
   9125 	fmov.l		INT(%a6),%fp1		# N --> FLOATING FMT
   9126 	mov.l		INT(%a6),%d1
   9127 	mov.l		%d1,%d2
   9128 	and.l		&0x3F,%d1		# D0 IS J
   9129 	asl.l		&4,%d1			# DISPLACEMENT FOR 2^(J/64)
   9130 	add.l		%d1,%a1			# ADDRESS FOR 2^(J/64)
   9131 	asr.l		&6,%d2			# d2 IS L, N = 64L + J
   9132 	mov.l		%d2,%d1
   9133 	asr.l		&1,%d1			# D0 IS M
   9134 	sub.l		%d1,%d2			# d2 IS M', N = 64(M+M') + J
   9135 	add.l		&0x3FFF,%d2
   9136 
   9137 #--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
   9138 #--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
   9139 #--ADJFACT = 2^(M').
   9140 #--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
   9141 
   9142 	fmovm.x		&0x0c,-(%sp)		# save fp2/fp3
   9143 
   9144 	fmul.s		&0x3C800000,%fp1	# (1/64)*N
   9145 	mov.l		(%a1)+,FACT1(%a6)
   9146 	mov.l		(%a1)+,FACT1HI(%a6)
   9147 	mov.l		(%a1)+,FACT1LOW(%a6)
   9148 	mov.w		(%a1)+,FACT2(%a6)
   9149 
   9150 	fsub.x		%fp1,%fp0		# X - (1/64)*INT(64 X)
   9151 
   9152 	mov.w		(%a1)+,FACT2HI(%a6)
   9153 	clr.w		FACT2HI+2(%a6)
   9154 	clr.l		FACT2LOW(%a6)
   9155 	add.w		%d1,FACT1(%a6)
   9156 	fmul.x		LOG2(%pc),%fp0		# FP0 IS R
   9157 	add.w		%d1,FACT2(%a6)
   9158 
   9159 	bra.w		expr
   9160 
   9161 EXPBORS:
   9162 #--FPCR, D0 SAVED
   9163 	cmp.l		%d1,&0x3FFF8000
   9164 	bgt.b		TEXPBIG
   9165 
   9166 #--|X| IS SMALL, RETURN 1 + X
   9167 
   9168 	fmov.l		%d0,%fpcr		# restore users round prec,mode
   9169 	fadd.s		&0x3F800000,%fp0	# RETURN 1 + X
   9170 	bra		t_pinx2
   9171 
   9172 TEXPBIG:
   9173 #--|X| IS LARGE, GENERATE OVERFLOW IF X > 0; ELSE GENERATE UNDERFLOW
   9174 #--REGISTERS SAVE SO FAR ARE FPCR AND  D0
   9175 	mov.l		X(%a6),%d1
   9176 	cmp.l		%d1,&0
   9177 	blt.b		EXPNEG
   9178 
   9179 	bra		t_ovfl2			# t_ovfl expects positive value
   9180 
   9181 EXPNEG:
   9182 	bra		t_unfl2			# t_unfl expects positive value
   9183 
   9184 	global		stwotoxd
   9185 stwotoxd:
   9186 #--ENTRY POINT FOR 2**(X) FOR DENORMALIZED ARGUMENT
   9187 
   9188 	fmov.l		%d0,%fpcr		# set user's rounding mode/precision
   9189 	fmov.s		&0x3F800000,%fp0	# RETURN 1 + X
   9190 	mov.l		(%a0),%d1
   9191 	or.l		&0x00800001,%d1
   9192 	fadd.s		%d1,%fp0
   9193 	bra		t_pinx2
   9194 
   9195 	global		stentox
   9196 #--ENTRY POINT FOR 10**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
   9197 stentox:
   9198 	fmovm.x		(%a0),&0x80		# LOAD INPUT
   9199 
   9200 	mov.l		(%a0),%d1
   9201 	mov.w		4(%a0),%d1
   9202 	fmov.x		%fp0,X(%a6)
   9203 	and.l		&0x7FFFFFFF,%d1
   9204 
   9205 	cmp.l		%d1,&0x3FB98000		# |X| >= 2**(-70)?
   9206 	bge.b		TENOK1
   9207 	bra.w		EXPBORS
   9208 
   9209 TENOK1:
   9210 	cmp.l		%d1,&0x400B9B07		# |X| <= 16480*log2/log10 ?
   9211 	ble.b		TENMAIN
   9212 	bra.w		EXPBORS
   9213 
   9214 TENMAIN:
   9215 #--USUAL CASE, 2^(-70) <= |X| <= 16480 LOG 2 / LOG 10
   9216 
   9217 	fmov.x		%fp0,%fp1
   9218 	fmul.d		L2TEN64(%pc),%fp1	# X*64*LOG10/LOG2
   9219 	fmov.l		%fp1,INT(%a6)		# N=INT(X*64*LOG10/LOG2)
   9220 	mov.l		%d2,-(%sp)
   9221 	lea		TEXPTBL(%pc),%a1	# LOAD ADDRESS OF TABLE OF 2^(J/64)
   9222 	fmov.l		INT(%a6),%fp1		# N --> FLOATING FMT
   9223 	mov.l		INT(%a6),%d1
   9224 	mov.l		%d1,%d2
   9225 	and.l		&0x3F,%d1		# D0 IS J
   9226 	asl.l		&4,%d1			# DISPLACEMENT FOR 2^(J/64)
   9227 	add.l		%d1,%a1			# ADDRESS FOR 2^(J/64)
   9228 	asr.l		&6,%d2			# d2 IS L, N = 64L + J
   9229 	mov.l		%d2,%d1
   9230 	asr.l		&1,%d1			# D0 IS M
   9231 	sub.l		%d1,%d2			# d2 IS M', N = 64(M+M') + J
   9232 	add.l		&0x3FFF,%d2
   9233 
   9234 #--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
   9235 #--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
   9236 #--ADJFACT = 2^(M').
   9237 #--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
   9238 	fmovm.x		&0x0c,-(%sp)		# save fp2/fp3
   9239 
   9240 	fmov.x		%fp1,%fp2
   9241 
   9242 	fmul.d		L10TWO1(%pc),%fp1	# N*(LOG2/64LOG10)_LEAD
   9243 	mov.l		(%a1)+,FACT1(%a6)
   9244 
   9245 	fmul.x		L10TWO2(%pc),%fp2	# N*(LOG2/64LOG10)_TRAIL
   9246 
   9247 	mov.l		(%a1)+,FACT1HI(%a6)
   9248 	mov.l		(%a1)+,FACT1LOW(%a6)
   9249 	fsub.x		%fp1,%fp0		# X - N L_LEAD
   9250 	mov.w		(%a1)+,FACT2(%a6)
   9251 
   9252 	fsub.x		%fp2,%fp0		# X - N L_TRAIL
   9253 
   9254 	mov.w		(%a1)+,FACT2HI(%a6)
   9255 	clr.w		FACT2HI+2(%a6)
   9256 	clr.l		FACT2LOW(%a6)
   9257 
   9258 	fmul.x		LOG10(%pc),%fp0		# FP0 IS R
   9259 	add.w		%d1,FACT1(%a6)
   9260 	add.w		%d1,FACT2(%a6)
   9261 
   9262 expr:
   9263 #--FPCR, FP2, FP3 ARE SAVED IN ORDER AS SHOWN.
   9264 #--ADJFACT CONTAINS 2**(M'), FACT1 + FACT2 = 2**(M) * 2**(J/64).
   9265 #--FP0 IS R. THE FOLLOWING CODE COMPUTES
   9266 #--	2**(M'+M) * 2**(J/64) * EXP(R)
   9267 
   9268 	fmov.x		%fp0,%fp1
   9269 	fmul.x		%fp1,%fp1		# FP1 IS S = R*R
   9270 
   9271 	fmov.d		EXPA5(%pc),%fp2		# FP2 IS A5
   9272 	fmov.d		EXPA4(%pc),%fp3		# FP3 IS A4
   9273 
   9274 	fmul.x		%fp1,%fp2		# FP2 IS S*A5
   9275 	fmul.x		%fp1,%fp3		# FP3 IS S*A4
   9276 
   9277 	fadd.d		EXPA3(%pc),%fp2		# FP2 IS A3+S*A5
   9278 	fadd.d		EXPA2(%pc),%fp3		# FP3 IS A2+S*A4
   9279 
   9280 	fmul.x		%fp1,%fp2		# FP2 IS S*(A3+S*A5)
   9281 	fmul.x		%fp1,%fp3		# FP3 IS S*(A2+S*A4)
   9282 
   9283 	fadd.d		EXPA1(%pc),%fp2		# FP2 IS A1+S*(A3+S*A5)
   9284 	fmul.x		%fp0,%fp3		# FP3 IS R*S*(A2+S*A4)
   9285 
   9286 	fmul.x		%fp1,%fp2		# FP2 IS S*(A1+S*(A3+S*A5))
   9287 	fadd.x		%fp3,%fp0		# FP0 IS R+R*S*(A2+S*A4)
   9288 	fadd.x		%fp2,%fp0		# FP0 IS EXP(R) - 1
   9289 
   9290 	fmovm.x		(%sp)+,&0x30		# restore fp2/fp3
   9291 
   9292 #--FINAL RECONSTRUCTION PROCESS
   9293 #--EXP(X) = 2^M*2^(J/64) + 2^M*2^(J/64)*(EXP(R)-1)  -  (1 OR 0)
   9294 
   9295 	fmul.x		FACT1(%a6),%fp0
   9296 	fadd.x		FACT2(%a6),%fp0
   9297 	fadd.x		FACT1(%a6),%fp0
   9298 
   9299 	fmov.l		%d0,%fpcr		# restore users round prec,mode
   9300 	mov.w		%d2,ADJFACT(%a6)	# INSERT EXPONENT
   9301 	mov.l		(%sp)+,%d2
   9302 	mov.l		&0x80000000,ADJFACT+4(%a6)
   9303 	clr.l		ADJFACT+8(%a6)
   9304 	mov.b		&FMUL_OP,%d1		# last inst is MUL
   9305 	fmul.x		ADJFACT(%a6),%fp0	# FINAL ADJUSTMENT
   9306 	bra		t_catch
   9307 
   9308 	global		stentoxd
   9309 stentoxd:
   9310 #--ENTRY POINT FOR 10**(X) FOR DENORMALIZED ARGUMENT
   9311 
   9312 	fmov.l		%d0,%fpcr		# set user's rounding mode/precision
   9313 	fmov.s		&0x3F800000,%fp0	# RETURN 1 + X
   9314 	mov.l		(%a0),%d1
   9315 	or.l		&0x00800001,%d1
   9316 	fadd.s		%d1,%fp0
   9317 	bra		t_pinx2
   9318 
   9319 #########################################################################
   9320 # smovcr(): returns the ROM constant at the offset specified in d1	#
   9321 #	    rounded to the mode and precision specified in d0. 		#
   9322 #									#
   9323 # INPUT	***************************************************************	#
   9324 # 	d0 = rnd prec,mode						#
   9325 #	d1 = ROM offset							#
   9326 #									#
   9327 # OUTPUT **************************************************************	#
   9328 #	fp0 = the ROM constant rounded to the user's rounding mode,prec	#
   9329 #									#
   9330 #########################################################################
   9331 
   9332 	global		smovcr
   9333 smovcr:
   9334 	mov.l		%d1,-(%sp)		# save rom offset for a sec
   9335 
   9336 	lsr.b		&0x4,%d0		# shift ctrl bits to lo
   9337 	mov.l		%d0,%d1			# make a copy
   9338 	andi.w		&0x3,%d1		# extract rnd mode
   9339 	andi.w		&0xc,%d0		# extract rnd prec
   9340 	swap		%d0			# put rnd prec in hi
   9341 	mov.w		%d1,%d0			# put rnd mode in lo
   9342 
   9343 	mov.l		(%sp)+,%d1		# get rom offset
   9344 
   9345 #
   9346 # check range of offset
   9347 #
   9348 	tst.b		%d1			# if zero, offset is to pi
   9349 	beq.b		pi_tbl			# it is pi
   9350 	cmpi.b		%d1,&0x0a		# check range $01 - $0a
   9351 	ble.b		z_val			# if in this range, return zero
   9352 	cmpi.b		%d1,&0x0e		# check range $0b - $0e
   9353 	ble.b		sm_tbl			# valid constants in this range
   9354 	cmpi.b		%d1,&0x2f		# check range $10 - $2f
   9355 	ble.b		z_val			# if in this range, return zero
   9356 	cmpi.b		%d1,&0x3f		# check range $30 - $3f
   9357 	ble.b		bg_tbl			# valid constants in this range
   9358 
   9359 z_val:
   9360 	bra.l		ld_pzero		# return a zero
   9361 
   9362 #
   9363 # the answer is PI rounded to the proper precision.
   9364 #
   9365 # fetch a pointer to the answer table relating to the proper rounding
   9366 # precision.
   9367 #
   9368 pi_tbl:
   9369 	tst.b		%d0			# is rmode RN?
   9370 	bne.b		pi_not_rn		# no
   9371 pi_rn:
   9372 	lea.l		PIRN(%pc),%a0		# yes; load PI RN table addr
   9373 	bra.w		set_finx
   9374 pi_not_rn:
   9375 	cmpi.b		%d0,&rp_mode		# is rmode RP?
   9376 	beq.b		pi_rp			# yes
   9377 pi_rzrm:
   9378 	lea.l		PIRZRM(%pc),%a0		# no; load PI RZ,RM table addr
   9379 	bra.b		set_finx
   9380 pi_rp:
   9381 	lea.l		PIRP(%pc),%a0		# load PI RP table addr
   9382 	bra.b		set_finx
   9383 
   9384 #
   9385 # the answer is one of:
   9386 #	$0B	log10(2)	(inexact)
   9387 #	$0C	e		(inexact)
   9388 #	$0D	log2(e)		(inexact)
   9389 #	$0E	log10(e)	(exact)
   9390 #
   9391 # fetch a pointer to the answer table relating to the proper rounding
   9392 # precision.
   9393 #
   9394 sm_tbl:
   9395 	subi.b		&0xb,%d1		# make offset in 0-4 range
   9396 	tst.b		%d0			# is rmode RN?
   9397 	bne.b		sm_not_rn		# no
   9398 sm_rn:
   9399 	lea.l		SMALRN(%pc),%a0		# yes; load RN table addr
   9400 sm_tbl_cont:
   9401 	cmpi.b		%d1,&0x2		# is result log10(e)?
   9402 	ble.b		set_finx		# no; answer is inexact
   9403 	bra.b		no_finx			# yes; answer is exact
   9404 sm_not_rn:
   9405 	cmpi.b		%d0,&rp_mode		# is rmode RP?
   9406 	beq.b		sm_rp			# yes
   9407 sm_rzrm:
   9408 	lea.l		SMALRZRM(%pc),%a0	# no; load RZ,RM table addr
   9409 	bra.b		sm_tbl_cont
   9410 sm_rp:
   9411 	lea.l		SMALRP(%pc),%a0		# load RP table addr
   9412 	bra.b		sm_tbl_cont
   9413 
   9414 #
   9415 # the answer is one of:
   9416 #	$30	ln(2)		(inexact)
   9417 #	$31	ln(10)		(inexact)
   9418 #	$32	10^0		(exact)
   9419 #	$33	10^1		(exact)
   9420 #	$34	10^2		(exact)
   9421 #	$35	10^4		(exact)
   9422 #	$36	10^8		(exact)
   9423 #	$37	10^16		(exact)
   9424 #	$38	10^32		(inexact)
   9425 #	$39	10^64		(inexact)
   9426 #	$3A	10^128		(inexact)
   9427 #	$3B	10^256		(inexact)
   9428 #	$3C	10^512		(inexact)
   9429 #	$3D	10^1024		(inexact)
   9430 #	$3E	10^2048		(inexact)
   9431 #	$3F	10^4096		(inexact)
   9432 #
   9433 # fetch a pointer to the answer table relating to the proper rounding
   9434 # precision.
   9435 #
   9436 bg_tbl:
   9437 	subi.b		&0x30,%d1		# make offset in 0-f range
   9438 	tst.b		%d0			# is rmode RN?
   9439 	bne.b		bg_not_rn		# no
   9440 bg_rn:
   9441 	lea.l		BIGRN(%pc),%a0		# yes; load RN table addr
   9442 bg_tbl_cont:
   9443 	cmpi.b		%d1,&0x1		# is offset <= $31?
   9444 	ble.b		set_finx		# yes; answer is inexact
   9445 	cmpi.b		%d1,&0x7		# is $32 <= offset <= $37?
   9446 	ble.b		no_finx			# yes; answer is exact
   9447 	bra.b		set_finx		# no; answer is inexact
   9448 bg_not_rn:
   9449 	cmpi.b		%d0,&rp_mode		# is rmode RP?
   9450 	beq.b		bg_rp			# yes
   9451 bg_rzrm:
   9452 	lea.l		BIGRZRM(%pc),%a0	# no; load RZ,RM table addr
   9453 	bra.b		bg_tbl_cont
   9454 bg_rp:
   9455 	lea.l		BIGRP(%pc),%a0		# load RP table addr
   9456 	bra.b		bg_tbl_cont
   9457 
   9458 # answer is inexact, so set INEX2 and AINEX in the user's FPSR.
   9459 set_finx:
   9460 	ori.l		&inx2a_mask,USER_FPSR(%a6) # set INEX2/AINEX
   9461 no_finx:
   9462 	mulu.w		&0xc,%d1		# offset points into tables
   9463 	swap		%d0			# put rnd prec in lo word
   9464 	tst.b		%d0			# is precision extended?
   9465 
   9466 	bne.b		not_ext			# if xprec, do not call round
   9467 
   9468 # Precision is extended
   9469 	fmovm.x		(%a0,%d1.w),&0x80	# return result in fp0
   9470 	rts
   9471 
   9472 # Precision is single or double
   9473 not_ext:
   9474 	swap		%d0			# rnd prec in upper word
   9475 
   9476 # call round() to round the answer to the proper precision.
   9477 # exponents out of range for single or double DO NOT cause underflow
   9478 # or overflow.
   9479 	mov.w		0x0(%a0,%d1.w),FP_SCR1_EX(%a6) # load first word
   9480 	mov.l		0x4(%a0,%d1.w),FP_SCR1_HI(%a6) # load second word
   9481 	mov.l		0x8(%a0,%d1.w),FP_SCR1_LO(%a6) # load third word
   9482 	mov.l		%d0,%d1
   9483 	clr.l		%d0			# clear g,r,s
   9484 	lea		FP_SCR1(%a6),%a0	# pass ptr to answer
   9485 	clr.w		LOCAL_SGN(%a0)		# sign always positive
   9486 	bsr.l		_round			# round the mantissa
   9487 
   9488 	fmovm.x		(%a0),&0x80		# return rounded result in fp0
   9489 	rts
   9490 
   9491 	align		0x4
   9492 
   9493 PIRN:	long		0x40000000,0xc90fdaa2,0x2168c235	# pi
   9494 PIRZRM:	long		0x40000000,0xc90fdaa2,0x2168c234	# pi
   9495 PIRP:	long		0x40000000,0xc90fdaa2,0x2168c235	# pi
   9496 
   9497 SMALRN:	long		0x3ffd0000,0x9a209a84,0xfbcff798	# log10(2)
   9498 	long		0x40000000,0xadf85458,0xa2bb4a9a	# e
   9499 	long		0x3fff0000,0xb8aa3b29,0x5c17f0bc	# log2(e)
   9500 	long		0x3ffd0000,0xde5bd8a9,0x37287195	# log10(e)
   9501 	long		0x00000000,0x00000000,0x00000000	# 0.0
   9502 
   9503 SMALRZRM:
   9504 	long		0x3ffd0000,0x9a209a84,0xfbcff798	# log10(2)
   9505 	long		0x40000000,0xadf85458,0xa2bb4a9a	# e
   9506 	long		0x3fff0000,0xb8aa3b29,0x5c17f0bb	# log2(e)
   9507 	long		0x3ffd0000,0xde5bd8a9,0x37287195	# log10(e)
   9508 	long		0x00000000,0x00000000,0x00000000	# 0.0
   9509 
   9510 SMALRP:	long		0x3ffd0000,0x9a209a84,0xfbcff799	# log10(2)
   9511 	long		0x40000000,0xadf85458,0xa2bb4a9b	# e
   9512 	long		0x3fff0000,0xb8aa3b29,0x5c17f0bc	# log2(e)
   9513 	long		0x3ffd0000,0xde5bd8a9,0x37287195	# log10(e)
   9514 	long		0x00000000,0x00000000,0x00000000	# 0.0
   9515 
   9516 BIGRN:	long		0x3ffe0000,0xb17217f7,0xd1cf79ac	# ln(2)
   9517 	long		0x40000000,0x935d8ddd,0xaaa8ac17	# ln(10)
   9518 
   9519 	long		0x3fff0000,0x80000000,0x00000000	# 10 ^ 0
   9520 	long		0x40020000,0xA0000000,0x00000000	# 10 ^ 1
   9521 	long		0x40050000,0xC8000000,0x00000000	# 10 ^ 2
   9522 	long		0x400C0000,0x9C400000,0x00000000	# 10 ^ 4
   9523 	long		0x40190000,0xBEBC2000,0x00000000	# 10 ^ 8
   9524 	long		0x40340000,0x8E1BC9BF,0x04000000	# 10 ^ 16
   9525 	long		0x40690000,0x9DC5ADA8,0x2B70B59E	# 10 ^ 32
   9526 	long		0x40D30000,0xC2781F49,0xFFCFA6D5	# 10 ^ 64
   9527 	long		0x41A80000,0x93BA47C9,0x80E98CE0	# 10 ^ 128
   9528 	long		0x43510000,0xAA7EEBFB,0x9DF9DE8E	# 10 ^ 256
   9529 	long		0x46A30000,0xE319A0AE,0xA60E91C7	# 10 ^ 512
   9530 	long		0x4D480000,0xC9767586,0x81750C17	# 10 ^ 1024
   9531 	long		0x5A920000,0x9E8B3B5D,0xC53D5DE5	# 10 ^ 2048
   9532 	long		0x75250000,0xC4605202,0x8A20979B	# 10 ^ 4096
   9533 
   9534 BIGRZRM:
   9535 	long		0x3ffe0000,0xb17217f7,0xd1cf79ab	# ln(2)
   9536 	long		0x40000000,0x935d8ddd,0xaaa8ac16	# ln(10)
   9537 
   9538 	long		0x3fff0000,0x80000000,0x00000000	# 10 ^ 0
   9539 	long		0x40020000,0xA0000000,0x00000000	# 10 ^ 1
   9540 	long		0x40050000,0xC8000000,0x00000000	# 10 ^ 2
   9541 	long		0x400C0000,0x9C400000,0x00000000	# 10 ^ 4
   9542 	long		0x40190000,0xBEBC2000,0x00000000	# 10 ^ 8
   9543 	long		0x40340000,0x8E1BC9BF,0x04000000	# 10 ^ 16
   9544 	long		0x40690000,0x9DC5ADA8,0x2B70B59D	# 10 ^ 32
   9545 	long		0x40D30000,0xC2781F49,0xFFCFA6D5	# 10 ^ 64
   9546 	long		0x41A80000,0x93BA47C9,0x80E98CDF	# 10 ^ 128
   9547 	long		0x43510000,0xAA7EEBFB,0x9DF9DE8D	# 10 ^ 256
   9548 	long		0x46A30000,0xE319A0AE,0xA60E91C6	# 10 ^ 512
   9549 	long		0x4D480000,0xC9767586,0x81750C17	# 10 ^ 1024
   9550 	long		0x5A920000,0x9E8B3B5D,0xC53D5DE4	# 10 ^ 2048
   9551 	long		0x75250000,0xC4605202,0x8A20979A	# 10 ^ 4096
   9552 
   9553 BIGRP:
   9554 	long		0x3ffe0000,0xb17217f7,0xd1cf79ac	# ln(2)
   9555 	long		0x40000000,0x935d8ddd,0xaaa8ac17	# ln(10)
   9556 
   9557 	long		0x3fff0000,0x80000000,0x00000000	# 10 ^ 0
   9558 	long		0x40020000,0xA0000000,0x00000000	# 10 ^ 1
   9559 	long		0x40050000,0xC8000000,0x00000000	# 10 ^ 2
   9560 	long		0x400C0000,0x9C400000,0x00000000	# 10 ^ 4
   9561 	long		0x40190000,0xBEBC2000,0x00000000	# 10 ^ 8
   9562 	long		0x40340000,0x8E1BC9BF,0x04000000	# 10 ^ 16
   9563 	long		0x40690000,0x9DC5ADA8,0x2B70B59E	# 10 ^ 32
   9564 	long		0x40D30000,0xC2781F49,0xFFCFA6D6	# 10 ^ 64
   9565 	long		0x41A80000,0x93BA47C9,0x80E98CE0	# 10 ^ 128
   9566 	long		0x43510000,0xAA7EEBFB,0x9DF9DE8E	# 10 ^ 256
   9567 	long		0x46A30000,0xE319A0AE,0xA60E91C7	# 10 ^ 512
   9568 	long		0x4D480000,0xC9767586,0x81750C18	# 10 ^ 1024
   9569 	long		0x5A920000,0x9E8B3B5D,0xC53D5DE5	# 10 ^ 2048
   9570 	long		0x75250000,0xC4605202,0x8A20979B	# 10 ^ 4096
   9571 
   9572 #########################################################################
   9573 # sscale(): computes the destination operand scaled by the source	#
   9574 #	    operand. If the absoulute value of the source operand is 	#
   9575 #	    >= 2^14, an overflow or underflow is returned.		#
   9576 #									#
   9577 # INPUT *************************************************************** #
   9578 #	a0  = pointer to double-extended source operand X		#
   9579 #	a1  = pointer to double-extended destination operand Y		#
   9580 #									#
   9581 # OUTPUT ************************************************************** #
   9582 #	fp0 =  scale(X,Y)						#
   9583 #									#
   9584 #########################################################################
   9585 
   9586 set	SIGN,		L_SCR1
   9587 
   9588 	global		sscale
   9589 sscale:
   9590 	mov.l		%d0,-(%sp)		# store off ctrl bits for now
   9591 
   9592 	mov.w		DST_EX(%a1),%d1		# get dst exponent
   9593 	smi.b		SIGN(%a6)		# use SIGN to hold dst sign
   9594 	andi.l		&0x00007fff,%d1		# strip sign from dst exp
   9595 
   9596 	mov.w		SRC_EX(%a0),%d0		# check src bounds
   9597 	andi.w		&0x7fff,%d0		# clr src sign bit
   9598 	cmpi.w		%d0,&0x3fff		# is src ~ ZERO?
   9599 	blt.w		src_small		# yes
   9600 	cmpi.w		%d0,&0x400c		# no; is src too big?
   9601 	bgt.w		src_out			# yes
   9602 
   9603 #
   9604 # Source is within 2^14 range.
   9605 #
   9606 src_ok:
   9607 	fintrz.x	SRC(%a0),%fp0		# calc int of src
   9608 	fmov.l		%fp0,%d0		# int src to d0
   9609 # don't want any accrued bits from the fintrz showing up later since
   9610 # we may need to read the fpsr for the last fp op in t_catch2().
   9611 	fmov.l		&0x0,%fpsr
   9612 
   9613 	tst.b		DST_HI(%a1)		# is dst denormalized?
   9614 	bmi.b		sok_norm
   9615 
   9616 # the dst is a DENORM. normalize the DENORM and add the adjustment to
   9617 # the src value. then, jump to the norm part of the routine.
   9618 sok_dnrm:
   9619 	mov.l		%d0,-(%sp)		# save src for now
   9620 
   9621 	mov.w		DST_EX(%a1),FP_SCR0_EX(%a6) # make a copy
   9622 	mov.l		DST_HI(%a1),FP_SCR0_HI(%a6)
   9623 	mov.l		DST_LO(%a1),FP_SCR0_LO(%a6)
   9624 
   9625 	lea		FP_SCR0(%a6),%a0	# pass ptr to DENORM
   9626 	bsr.l		norm			# normalize the DENORM
   9627 	neg.l		%d0
   9628 	add.l		(%sp)+,%d0		# add adjustment to src
   9629 
   9630 	fmovm.x		FP_SCR0(%a6),&0x80	# load normalized DENORM
   9631 
   9632 	cmpi.w		%d0,&-0x3fff		# is the shft amt really low?
   9633 	bge.b		sok_norm2		# thank goodness no
   9634 
   9635 # the multiply factor that we're trying to create should be a denorm
   9636 # for the multiply to work. therefore, we're going to actually do a
   9637 # multiply with a denorm which will cause an unimplemented data type
   9638 # exception to be put into the machine which will be caught and corrected
   9639 # later. we don't do this with the DENORMs above because this method
   9640 # is slower. but, don't fret, I don't see it being used much either.
   9641 	fmov.l		(%sp)+,%fpcr		# restore user fpcr
   9642 	mov.l		&0x80000000,%d1		# load normalized mantissa
   9643 	subi.l		&-0x3fff,%d0		# how many should we shift?
   9644 	neg.l		%d0			# make it positive
   9645 	cmpi.b		%d0,&0x20		# is it > 32?
   9646 	bge.b		sok_dnrm_32		# yes
   9647 	lsr.l		%d0,%d1			# no; bit stays in upper lw
   9648 	clr.l		-(%sp)			# insert zero low mantissa
   9649 	mov.l		%d1,-(%sp)		# insert new high mantissa
   9650 	clr.l		-(%sp)			# make zero exponent
   9651 	bra.b		sok_norm_cont
   9652 sok_dnrm_32:
   9653 	subi.b		&0x20,%d0		# get shift count
   9654 	lsr.l		%d0,%d1			# make low mantissa longword
   9655 	mov.l		%d1,-(%sp)		# insert new low mantissa
   9656 	clr.l		-(%sp)			# insert zero high mantissa
   9657 	clr.l		-(%sp)			# make zero exponent
   9658 	bra.b		sok_norm_cont
   9659 
   9660 # the src will force the dst to a DENORM value or worse. so, let's
   9661 # create an fp multiply that will create the result.
   9662 sok_norm:
   9663 	fmovm.x		DST(%a1),&0x80		# load fp0 with normalized src
   9664 sok_norm2:
   9665 	fmov.l		(%sp)+,%fpcr		# restore user fpcr
   9666 
   9667 	addi.w		&0x3fff,%d0		# turn src amt into exp value
   9668 	swap		%d0			# put exponent in high word
   9669 	clr.l		-(%sp)			# insert new exponent
   9670 	mov.l		&0x80000000,-(%sp)	# insert new high mantissa
   9671 	mov.l		%d0,-(%sp)		# insert new lo mantissa
   9672 
   9673 sok_norm_cont:
   9674 	fmov.l		%fpcr,%d0		# d0 needs fpcr for t_catch2
   9675 	mov.b		&FMUL_OP,%d1		# last inst is MUL
   9676 	fmul.x		(%sp)+,%fp0		# do the multiply
   9677 	bra		t_catch2		# catch any exceptions
   9678 
   9679 #
   9680 # Source is outside of 2^14 range.  Test the sign and branch
   9681 # to the appropriate exception handler.
   9682 #
   9683 src_out:
   9684 	mov.l		(%sp)+,%d0		# restore ctrl bits
   9685 	exg		%a0,%a1			# swap src,dst ptrs
   9686 	tst.b		SRC_EX(%a1)		# is src negative?
   9687 	bmi		t_unfl			# yes; underflow
   9688 	bra		t_ovfl_sc		# no; overflow
   9689 
   9690 #
   9691 # The source input is below 1, so we check for denormalized numbers
   9692 # and set unfl.
   9693 #
   9694 src_small:
   9695 	tst.b		DST_HI(%a1)		# is dst denormalized?
   9696 	bpl.b		ssmall_done		# yes
   9697 
   9698 	mov.l		(%sp)+,%d0
   9699 	fmov.l		%d0,%fpcr		# no; load control bits
   9700 	mov.b		&FMOV_OP,%d1		# last inst is MOVE
   9701 	fmov.x		DST(%a1),%fp0		# simply return dest
   9702 	bra		t_catch2
   9703 ssmall_done:
   9704 	mov.l		(%sp)+,%d0		# load control bits into d1
   9705 	mov.l		%a1,%a0			# pass ptr to dst
   9706 	bra		t_resdnrm
   9707 
   9708 #########################################################################
   9709 # smod(): computes the fp MOD of the input values X,Y.			#
   9710 # srem(): computes the fp (IEEE) REM of the input values X,Y.		#
   9711 #									#
   9712 # INPUT *************************************************************** #
   9713 #	a0 = pointer to extended precision input X			#
   9714 #	a1 = pointer to extended precision input Y			#
   9715 #	d0 = round precision,mode					#
   9716 #									#
   9717 # 	The input operands X and Y can be either normalized or 		#
   9718 #	denormalized.							#
   9719 #									#
   9720 # OUTPUT ************************************************************** #
   9721 #      fp0 = FREM(X,Y) or FMOD(X,Y)					#
   9722 #									#
   9723 # ALGORITHM *********************************************************** #
   9724 #									#
   9725 #       Step 1.  Save and strip signs of X and Y: signX := sign(X),	#
   9726 #                signY := sign(Y), X := |X|, Y := |Y|, 			#
   9727 #                signQ := signX EOR signY. Record whether MOD or REM	#
   9728 #                is requested.						#
   9729 #									#
   9730 #       Step 2.  Set L := expo(X)-expo(Y), k := 0, Q := 0.		#
   9731 #                If (L < 0) then					#
   9732 #                   R := X, go to Step 4.				#
   9733 #                else							#
   9734 #                   R := 2^(-L)X, j := L.				#
   9735 #                endif							#
   9736 #									#
   9737 #       Step 3.  Perform MOD(X,Y)					#
   9738 #            3.1 If R = Y, go to Step 9.				#
   9739 #            3.2 If R > Y, then { R := R - Y, Q := Q + 1}		#
   9740 #            3.3 If j = 0, go to Step 4.				#
   9741 #            3.4 k := k + 1, j := j - 1, Q := 2Q, R := 2R. Go to	#
   9742 #                Step 3.1.						#
   9743 #									#
   9744 #       Step 4.  At this point, R = X - QY = MOD(X,Y). Set		#
   9745 #                Last_Subtract := false (used in Step 7 below). If	#
   9746 #                MOD is requested, go to Step 6. 			#
   9747 #									#
   9748 #       Step 5.  R = MOD(X,Y), but REM(X,Y) is requested.		#
   9749 #            5.1 If R < Y/2, then R = MOD(X,Y) = REM(X,Y). Go to	#
   9750 #                Step 6.						#
   9751 #            5.2 If R > Y/2, then { set Last_Subtract := true,		#
   9752 #                Q := Q + 1, Y := signY*Y }. Go to Step 6.		#
   9753 #            5.3 This is the tricky case of R = Y/2. If Q is odd,	#
   9754 #                then { Q := Q + 1, signX := -signX }.			#
   9755 #									#
   9756 #       Step 6.  R := signX*R.						#
   9757 #									#
   9758 #       Step 7.  If Last_Subtract = true, R := R - Y.			#
   9759 #									#
   9760 #       Step 8.  Return signQ, last 7 bits of Q, and R as required.	#
   9761 #									#
   9762 #       Step 9.  At this point, R = 2^(-j)*X - Q Y = Y. Thus,		#
   9763 #                X = 2^(j)*(Q+1)Y. set Q := 2^(j)*(Q+1),		#
   9764 #                R := 0. Return signQ, last 7 bits of Q, and R.		#
   9765 #									#
   9766 #########################################################################
   9767 
   9768 	set		Mod_Flag,L_SCR3
   9769 	set		Sc_Flag,L_SCR3+1
   9770 
   9771 	set		SignY,L_SCR2
   9772 	set		SignX,L_SCR2+2
   9773 	set		SignQ,L_SCR3+2
   9774 
   9775 	set		Y,FP_SCR0
   9776 	set		Y_Hi,Y+4
   9777 	set		Y_Lo,Y+8
   9778 
   9779 	set		R,FP_SCR1
   9780 	set		R_Hi,R+4
   9781 	set		R_Lo,R+8
   9782 
   9783 Scale:
   9784 	long		0x00010000,0x80000000,0x00000000,0x00000000
   9785 
   9786 	global		smod
   9787 smod:
   9788 	clr.b		FPSR_QBYTE(%a6)
   9789 	mov.l		%d0,-(%sp)		# save ctrl bits
   9790 	clr.b		Mod_Flag(%a6)
   9791 	bra.b		Mod_Rem
   9792 
   9793 	global		srem
   9794 srem:
   9795 	clr.b		FPSR_QBYTE(%a6)
   9796 	mov.l		%d0,-(%sp)		# save ctrl bits
   9797 	mov.b		&0x1,Mod_Flag(%a6)
   9798 
   9799 Mod_Rem:
   9800 #..Save sign of X and Y
   9801 	movm.l		&0x3f00,-(%sp)		# save data registers
   9802 	mov.w		SRC_EX(%a0),%d3
   9803 	mov.w		%d3,SignY(%a6)
   9804 	and.l		&0x00007FFF,%d3		# Y := |Y|
   9805 
   9806 #
   9807 	mov.l		SRC_HI(%a0),%d4
   9808 	mov.l		SRC_LO(%a0),%d5		# (D3,D4,D5) is |Y|
   9809 
   9810 	tst.l		%d3
   9811 	bne.b		Y_Normal
   9812 
   9813 	mov.l		&0x00003FFE,%d3		# $3FFD + 1
   9814 	tst.l		%d4
   9815 	bne.b		HiY_not0
   9816 
   9817 HiY_0:
   9818 	mov.l		%d5,%d4
   9819 	clr.l		%d5
   9820 	sub.l		&32,%d3
   9821 	clr.l		%d6
   9822 	bfffo		%d4{&0:&32},%d6
   9823 	lsl.l		%d6,%d4
   9824 	sub.l		%d6,%d3			# (D3,D4,D5) is normalized
   9825 #	                                        ...with bias $7FFD
   9826 	bra.b		Chk_X
   9827 
   9828 HiY_not0:
   9829 	clr.l		%d6
   9830 	bfffo		%d4{&0:&32},%d6
   9831 	sub.l		%d6,%d3
   9832 	lsl.l		%d6,%d4
   9833 	mov.l		%d5,%d7			# a copy of D5
   9834 	lsl.l		%d6,%d5
   9835 	neg.l		%d6
   9836 	add.l		&32,%d6
   9837 	lsr.l		%d6,%d7
   9838 	or.l		%d7,%d4			# (D3,D4,D5) normalized
   9839 #                                       ...with bias $7FFD
   9840 	bra.b		Chk_X
   9841 
   9842 Y_Normal:
   9843 	add.l		&0x00003FFE,%d3		# (D3,D4,D5) normalized
   9844 #                                       ...with bias $7FFD
   9845 
   9846 Chk_X:
   9847 	mov.w		DST_EX(%a1),%d0
   9848 	mov.w		%d0,SignX(%a6)
   9849 	mov.w		SignY(%a6),%d1
   9850 	eor.l		%d0,%d1
   9851 	and.l		&0x00008000,%d1
   9852 	mov.w		%d1,SignQ(%a6)		# sign(Q) obtained
   9853 	and.l		&0x00007FFF,%d0
   9854 	mov.l		DST_HI(%a1),%d1
   9855 	mov.l		DST_LO(%a1),%d2		# (D0,D1,D2) is |X|
   9856 	tst.l		%d0
   9857 	bne.b		X_Normal
   9858 	mov.l		&0x00003FFE,%d0
   9859 	tst.l		%d1
   9860 	bne.b		HiX_not0
   9861 
   9862 HiX_0:
   9863 	mov.l		%d2,%d1
   9864 	clr.l		%d2
   9865 	sub.l		&32,%d0
   9866 	clr.l		%d6
   9867 	bfffo		%d1{&0:&32},%d6
   9868 	lsl.l		%d6,%d1
   9869 	sub.l		%d6,%d0			# (D0,D1,D2) is normalized
   9870 #                                       ...with bias $7FFD
   9871 	bra.b		Init
   9872 
   9873 HiX_not0:
   9874 	clr.l		%d6
   9875 	bfffo		%d1{&0:&32},%d6
   9876 	sub.l		%d6,%d0
   9877 	lsl.l		%d6,%d1
   9878 	mov.l		%d2,%d7			# a copy of D2
   9879 	lsl.l		%d6,%d2
   9880 	neg.l		%d6
   9881 	add.l		&32,%d6
   9882 	lsr.l		%d6,%d7
   9883 	or.l		%d7,%d1			# (D0,D1,D2) normalized
   9884 #                                       ...with bias $7FFD
   9885 	bra.b		Init
   9886 
   9887 X_Normal:
   9888 	add.l		&0x00003FFE,%d0		# (D0,D1,D2) normalized
   9889 #                                       ...with bias $7FFD
   9890 
   9891 Init:
   9892 #
   9893 	mov.l		%d3,L_SCR1(%a6)		# save biased exp(Y)
   9894 	mov.l		%d0,-(%sp)		# save biased exp(X)
   9895 	sub.l		%d3,%d0			# L := expo(X)-expo(Y)
   9896 
   9897 	clr.l		%d6			# D6 := carry <- 0
   9898 	clr.l		%d3			# D3 is Q
   9899 	mov.l		&0,%a1			# A1 is k; j+k=L, Q=0
   9900 
   9901 #..(Carry,D1,D2) is R
   9902 	tst.l		%d0
   9903 	bge.b		Mod_Loop_pre
   9904 
   9905 #..expo(X) < expo(Y). Thus X = mod(X,Y)
   9906 #
   9907 	mov.l		(%sp)+,%d0		# restore d0
   9908 	bra.w		Get_Mod
   9909 
   9910 Mod_Loop_pre:
   9911 	addq.l		&0x4,%sp		# erase exp(X)
   9912 #..At this point  R = 2^(-L)X; Q = 0; k = 0; and  k+j = L
   9913 Mod_Loop:
   9914 	tst.l		%d6			# test carry bit
   9915 	bgt.b		R_GT_Y
   9916 
   9917 #..At this point carry = 0, R = (D1,D2), Y = (D4,D5)
   9918 	cmp.l		%d1,%d4			# compare hi(R) and hi(Y)
   9919 	bne.b		R_NE_Y
   9920 	cmp.l		%d2,%d5			# compare lo(R) and lo(Y)
   9921 	bne.b		R_NE_Y
   9922 
   9923 #..At this point, R = Y
   9924 	bra.w		Rem_is_0
   9925 
   9926 R_NE_Y:
   9927 #..use the borrow of the previous compare
   9928 	bcs.b		R_LT_Y			# borrow is set iff R < Y
   9929 
   9930 R_GT_Y:
   9931 #..If Carry is set, then Y < (Carry,D1,D2) < 2Y. Otherwise, Carry = 0
   9932 #..and Y < (D1,D2) < 2Y. Either way, perform R - Y
   9933 	sub.l		%d5,%d2			# lo(R) - lo(Y)
   9934 	subx.l		%d4,%d1			# hi(R) - hi(Y)
   9935 	clr.l		%d6			# clear carry
   9936 	addq.l		&1,%d3			# Q := Q + 1
   9937 
   9938 R_LT_Y:
   9939 #..At this point, Carry=0, R < Y. R = 2^(k-L)X - QY; k+j = L; j >= 0.
   9940 	tst.l		%d0			# see if j = 0.
   9941 	beq.b		PostLoop
   9942 
   9943 	add.l		%d3,%d3			# Q := 2Q
   9944 	add.l		%d2,%d2			# lo(R) = 2lo(R)
   9945 	roxl.l		&1,%d1			# hi(R) = 2hi(R) + carry
   9946 	scs		%d6			# set Carry if 2(R) overflows
   9947 	addq.l		&1,%a1			# k := k+1
   9948 	subq.l		&1,%d0			# j := j - 1
   9949 #..At this point, R=(Carry,D1,D2) = 2^(k-L)X - QY, j+k=L, j >= 0, R < 2Y.
   9950 
   9951 	bra.b		Mod_Loop
   9952 
   9953 PostLoop:
   9954 #..k = L, j = 0, Carry = 0, R = (D1,D2) = X - QY, R < Y.
   9955 
   9956 #..normalize R.
   9957 	mov.l		L_SCR1(%a6),%d0		# new biased expo of R
   9958 	tst.l		%d1
   9959 	bne.b		HiR_not0
   9960 
   9961 HiR_0:
   9962 	mov.l		%d2,%d1
   9963 	clr.l		%d2
   9964 	sub.l		&32,%d0
   9965 	clr.l		%d6
   9966 	bfffo		%d1{&0:&32},%d6
   9967 	lsl.l		%d6,%d1
   9968 	sub.l		%d6,%d0			# (D0,D1,D2) is normalized
   9969 #                                       ...with bias $7FFD
   9970 	bra.b		Get_Mod
   9971 
   9972 HiR_not0:
   9973 	clr.l		%d6
   9974 	bfffo		%d1{&0:&32},%d6
   9975 	bmi.b		Get_Mod			# already normalized
   9976 	sub.l		%d6,%d0
   9977 	lsl.l		%d6,%d1
   9978 	mov.l		%d2,%d7			# a copy of D2
   9979 	lsl.l		%d6,%d2
   9980 	neg.l		%d6
   9981 	add.l		&32,%d6
   9982 	lsr.l		%d6,%d7
   9983 	or.l		%d7,%d1			# (D0,D1,D2) normalized
   9984 
   9985 #
   9986 Get_Mod:
   9987 	cmp.l		%d0,&0x000041FE
   9988 	bge.b		No_Scale
   9989 Do_Scale:
   9990 	mov.w		%d0,R(%a6)
   9991 	mov.l		%d1,R_Hi(%a6)
   9992 	mov.l		%d2,R_Lo(%a6)
   9993 	mov.l		L_SCR1(%a6),%d6
   9994 	mov.w		%d6,Y(%a6)
   9995 	mov.l		%d4,Y_Hi(%a6)
   9996 	mov.l		%d5,Y_Lo(%a6)
   9997 	fmov.x		R(%a6),%fp0		# no exception
   9998 	mov.b		&1,Sc_Flag(%a6)
   9999 	bra.b		ModOrRem
   10000 No_Scale:
   10001 	mov.l		%d1,R_Hi(%a6)
   10002 	mov.l		%d2,R_Lo(%a6)
   10003 	sub.l		&0x3FFE,%d0
   10004 	mov.w		%d0,R(%a6)
   10005 	mov.l		L_SCR1(%a6),%d6
   10006 	sub.l		&0x3FFE,%d6
   10007 	mov.l		%d6,L_SCR1(%a6)
   10008 	fmov.x		R(%a6),%fp0
   10009 	mov.w		%d6,Y(%a6)
   10010 	mov.l		%d4,Y_Hi(%a6)
   10011 	mov.l		%d5,Y_Lo(%a6)
   10012 	clr.b		Sc_Flag(%a6)
   10013 
   10014 #
   10015 ModOrRem:
   10016 	tst.b		Mod_Flag(%a6)
   10017 	beq.b		Fix_Sign
   10018 
   10019 	mov.l		L_SCR1(%a6),%d6		# new biased expo(Y)
   10020 	subq.l		&1,%d6			# biased expo(Y/2)
   10021 	cmp.l		%d0,%d6
   10022 	blt.b		Fix_Sign
   10023 	bgt.b		Last_Sub
   10024 
   10025 	cmp.l		%d1,%d4
   10026 	bne.b		Not_EQ
   10027 	cmp.l		%d2,%d5
   10028 	bne.b		Not_EQ
   10029 	bra.w		Tie_Case
   10030 
   10031 Not_EQ:
   10032 	bcs.b		Fix_Sign
   10033 
   10034 Last_Sub:
   10035 #
   10036 	fsub.x		Y(%a6),%fp0		# no exceptions
   10037 	addq.l		&1,%d3			# Q := Q + 1
   10038 
   10039 #
   10040 Fix_Sign:
   10041 #..Get sign of X
   10042 	mov.w		SignX(%a6),%d6
   10043 	bge.b		Get_Q
   10044 	fneg.x		%fp0
   10045 
   10046 #..Get Q
   10047 #
   10048 Get_Q:
   10049 	clr.l		%d6
   10050 	mov.w		SignQ(%a6),%d6		# D6 is sign(Q)
   10051 	mov.l		&8,%d7
   10052 	lsr.l		%d7,%d6
   10053 	and.l		&0x0000007F,%d3		# 7 bits of Q
   10054 	or.l		%d6,%d3			# sign and bits of Q
   10055 #	swap		%d3
   10056 #	fmov.l		%fpsr,%d6
   10057 #	and.l		&0xFF00FFFF,%d6
   10058 #	or.l		%d3,%d6
   10059 #	fmov.l		%d6,%fpsr		# put Q in fpsr
   10060 	mov.b		%d3,FPSR_QBYTE(%a6)	# put Q in fpsr
   10061 
   10062 #
   10063 Restore:
   10064 	movm.l		(%sp)+,&0xfc		#  {%d2-%d7}
   10065 	mov.l		(%sp)+,%d0
   10066 	fmov.l		%d0,%fpcr
   10067 	tst.b		Sc_Flag(%a6)
   10068 	beq.b		Finish
   10069 	mov.b		&FMUL_OP,%d1		# last inst is MUL
   10070 	fmul.x		Scale(%pc),%fp0		# may cause underflow
   10071 	bra		t_catch2
   10072 # the '040 package did this apparently to see if the dst operand for the
   10073 # preceding fmul was a denorm. but, it better not have been since the
   10074 # algorithm just got done playing with fp0 and expected no exceptions
   10075 # as a result. trust me...
   10076 #	bra		t_avoid_unsupp		# check for denorm as a
   10077 #						;result of the scaling
   10078 
   10079 Finish:
   10080 	mov.b		&FMOV_OP,%d1		# last inst is MOVE
   10081 	fmov.x		%fp0,%fp0		# capture exceptions & round
   10082 	bra		t_catch2
   10083 
   10084 Rem_is_0:
   10085 #..R = 2^(-j)X - Q Y = Y, thus R = 0 and quotient = 2^j (Q+1)
   10086 	addq.l		&1,%d3
   10087 	cmp.l		%d0,&8			# D0 is j
   10088 	bge.b		Q_Big
   10089 
   10090 	lsl.l		%d0,%d3
   10091 	bra.b		Set_R_0
   10092 
   10093 Q_Big:
   10094 	clr.l		%d3
   10095 
   10096 Set_R_0:
   10097 	fmov.s		&0x00000000,%fp0
   10098 	clr.b		Sc_Flag(%a6)
   10099 	bra.w		Fix_Sign
   10100 
   10101 Tie_Case:
   10102 #..Check parity of Q
   10103 	mov.l		%d3,%d6
   10104 	and.l		&0x00000001,%d6
   10105 	tst.l		%d6
   10106 	beq.w		Fix_Sign		# Q is even
   10107 
   10108 #..Q is odd, Q := Q + 1, signX := -signX
   10109 	addq.l		&1,%d3
   10110 	mov.w		SignX(%a6),%d6
   10111 	eor.l		&0x00008000,%d6
   10112 	mov.w		%d6,SignX(%a6)
   10113 	bra.w		Fix_Sign
   10114 
   10115 qnan:	long		0x7fff0000, 0xffffffff, 0xffffffff
   10116 
   10117 #########################################################################
   10118 # XDEF ****************************************************************	#
   10119 #	t_dz(): Handle DZ exception during transcendental emulation.	#
   10120 #	        Sets N bit according to sign of source operand.		#
   10121 #	t_dz2(): Handle DZ exception during transcendental emulation.	#
   10122 #		 Sets N bit always.					#
   10123 #									#
   10124 # XREF ****************************************************************	#
   10125 #	None								#
   10126 #									#
   10127 # INPUT ***************************************************************	#
   10128 #	a0 = pointer to source operand					#
   10129 # 									#
   10130 # OUTPUT **************************************************************	#
   10131 #	fp0 = default result						#
   10132 #									#
   10133 # ALGORITHM ***********************************************************	#
   10134 #	- Store properly signed INF into fp0.				#
   10135 #	- Set FPSR exception status dz bit, ccode inf bit, and 		#
   10136 #	  accrued dz bit.						#
   10137 #									#
   10138 #########################################################################
   10139 
   10140 	global		t_dz
   10141 t_dz:
   10142 	tst.b		SRC_EX(%a0) 		# no; is src negative?
   10143 	bmi.b		t_dz2			# yes
   10144 
   10145 dz_pinf:
   10146 	fmov.s		&0x7f800000,%fp0	# return +INF in fp0
   10147 	ori.l		&dzinf_mask,USER_FPSR(%a6) # set I/DZ/ADZ
   10148 	rts
   10149 
   10150 	global		t_dz2
   10151 t_dz2:
   10152 	fmov.s		&0xff800000,%fp0	# return -INF in fp0
   10153 	ori.l		&dzinf_mask+neg_mask,USER_FPSR(%a6) # set N/I/DZ/ADZ
   10154 	rts
   10155 
   10156 #################################################################
   10157 # OPERR exception:						#
   10158 #	- set FPSR exception status operr bit, condition code 	#
   10159 #	  nan bit; Store default NAN into fp0			#
   10160 #################################################################
   10161 	global		t_operr
   10162 t_operr:
   10163 	ori.l		&opnan_mask,USER_FPSR(%a6) # set NaN/OPERR/AIOP
   10164 	fmovm.x		qnan(%pc),&0x80		# return default NAN in fp0
   10165 	rts
   10166 
   10167 #################################################################
   10168 # Extended DENORM:						#
   10169 # 	- For all functions that have a denormalized input and	#
   10170 #	  that f(x)=x, this is the entry point.			#
   10171 #	- we only return the EXOP here if either underflow or	#
   10172 #	  inexact is enabled.					#
   10173 #################################################################
   10174 
   10175 # Entry point for scale w/ extended denorm. The function does
   10176 # NOT set INEX2/AUNFL/AINEX.
   10177 	global		t_resdnrm
   10178 t_resdnrm:
   10179 	ori.l		&unfl_mask,USER_FPSR(%a6) # set UNFL
   10180 	bra.b		xdnrm_con
   10181 
   10182 	global		t_extdnrm
   10183 t_extdnrm:
   10184 	ori.l		&unfinx_mask,USER_FPSR(%a6) # set UNFL/INEX2/AUNFL/AINEX
   10185 
   10186 xdnrm_con:
   10187 	mov.l		%a0,%a1			# make copy of src ptr
   10188 	mov.l		%d0,%d1			# make copy of rnd prec,mode
   10189 	andi.b		&0xc0,%d1		# extended precision?
   10190 	bne.b		xdnrm_sd		# no
   10191 
   10192 # result precision is extended.
   10193 	tst.b		LOCAL_EX(%a0)		# is denorm negative?
   10194 	bpl.b		xdnrm_exit		# no
   10195 
   10196 	bset		&neg_bit,FPSR_CC(%a6)	# yes; set 'N' ccode bit
   10197 	bra.b		xdnrm_exit
   10198 
   10199 # result precision is single or double
   10200 xdnrm_sd:
   10201 	mov.l		%a1,-(%sp)
   10202 	tst.b		LOCAL_EX(%a0)		# is denorm pos or neg?
   10203 	smi.b		%d1			# set d0 accodingly
   10204 	bsr.l		unf_sub
   10205 	mov.l		(%sp)+,%a1
   10206 xdnrm_exit:
   10207 	fmovm.x		(%a0),&0x80		# return default result in fp0
   10208 
   10209 	mov.b		FPCR_ENABLE(%a6),%d0
   10210 	andi.b		&0x0a,%d0		# is UNFL or INEX enabled?
   10211 	bne.b		xdnrm_ena		# yes
   10212 	rts
   10213 
   10214 ################
   10215 # unfl enabled #
   10216 ################
   10217 # we have a DENORM that needs to be converted into an EXOP.
   10218 # so, normalize the mantissa, add 0x6000 to the new exponent,
   10219 # and return the result in fp1.
   10220 xdnrm_ena:
   10221 	mov.w		LOCAL_EX(%a1),FP_SCR0_EX(%a6)
   10222 	mov.l		LOCAL_HI(%a1),FP_SCR0_HI(%a6)
   10223 	mov.l		LOCAL_LO(%a1),FP_SCR0_LO(%a6)
   10224 
   10225 	lea		FP_SCR0(%a6),%a0
   10226 	bsr.l		norm			# normalize mantissa
   10227 	addi.l		&0x6000,%d0		# add extra bias
   10228 	andi.w		&0x8000,FP_SCR0_EX(%a6)	# keep old sign
   10229 	or.w		%d0,FP_SCR0_EX(%a6)	# insert new exponent
   10230 
   10231 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
   10232 	rts
   10233 
   10234 #################################################################
   10235 # UNFL exception:						#
   10236 # 	- This routine is for cases where even an EXOP isn't	#
   10237 #  	  large enough to hold the range of this result.	#
   10238 #	  In such a case, the EXOP equals zero.			#
   10239 #  	- Return the default result to the proper precision 	#
   10240 #	  with the sign of this result being the same as that	#
   10241 #	  of the src operand.					#
   10242 # 	- t_unfl2() is provided to force the result sign to 	#
   10243 #	  positive which is the desired result for fetox().	#
   10244 #################################################################
   10245 	global		t_unfl
   10246 t_unfl:
   10247 	ori.l		&unfinx_mask,USER_FPSR(%a6) # set UNFL/INEX2/AUNFL/AINEX
   10248 
   10249 	tst.b		(%a0)			# is result pos or neg?
   10250 	smi.b		%d1			# set d1 accordingly
   10251 	bsr.l		unf_sub			# calc default unfl result
   10252 	fmovm.x		(%a0),&0x80		# return default result in fp0
   10253 
   10254 	fmov.s		&0x00000000,%fp1	# return EXOP in fp1
   10255 	rts
   10256 
   10257 # t_unfl2 ALWAYS tells unf_sub to create a positive result
   10258 	global		t_unfl2
   10259 t_unfl2:
   10260 	ori.l		&unfinx_mask,USER_FPSR(%a6) # set UNFL/INEX2/AUNFL/AINEX
   10261 
   10262 	sf.b		%d1			# set d0 to represent positive
   10263 	bsr.l		unf_sub			# calc default unfl result
   10264 	fmovm.x		(%a0),&0x80		# return default result in fp0
   10265 
   10266 	fmov.s		&0x0000000,%fp1		# return EXOP in fp1
   10267 	rts
   10268 
   10269 #################################################################
   10270 # OVFL exception:						#
   10271 # 	- This routine is for cases where even an EXOP isn't	#
   10272 #  	  large enough to hold the range of this result.	#
   10273 # 	- Return the default result to the proper precision 	#
   10274 #	  with the sign of this result being the same as that 	#
   10275 #	  of the src operand.					#
   10276 # 	- t_ovfl2() is provided to force the result sign to 	#
   10277 #	  positive which is the desired result for fcosh().	#
   10278 # 	- t_ovfl_sc() is provided for scale() which only sets 	#
   10279 #	  the inexact bits if the number is inexact for the 	#
   10280 #	  precision indicated.					#
   10281 #################################################################
   10282 
   10283 	global		t_ovfl_sc
   10284 t_ovfl_sc:
   10285 	ori.l		&ovfl_inx_mask,USER_FPSR(%a6) # set OVFL/AOVFL/AINEX
   10286 
   10287 	mov.b		%d0,%d1			# fetch rnd mode/prec
   10288 	andi.b		&0xc0,%d1		# extract rnd prec
   10289 	beq.b		ovfl_work		# prec is extended
   10290 
   10291 	tst.b		LOCAL_HI(%a0)		# is dst a DENORM?
   10292 	bmi.b		ovfl_sc_norm		# no
   10293 
   10294 # dst op is a DENORM. we have to normalize the mantissa to see if the
   10295 # result would be inexact for the given precision. make a copy of the
   10296 # dst so we don't screw up the version passed to us.
   10297 	mov.w		LOCAL_EX(%a0),FP_SCR0_EX(%a6)
   10298 	mov.l		LOCAL_HI(%a0),FP_SCR0_HI(%a6)
   10299 	mov.l		LOCAL_LO(%a0),FP_SCR0_LO(%a6)
   10300 	lea		FP_SCR0(%a6),%a0	# pass ptr to FP_SCR0
   10301 	movm.l		&0xc080,-(%sp)		# save d0-d1/a0
   10302 	bsr.l		norm			# normalize mantissa
   10303 	movm.l		(%sp)+,&0x0103		# restore d0-d1/a0
   10304 
   10305 ovfl_sc_norm:
   10306 	cmpi.b		%d1,&0x40		# is prec dbl?
   10307 	bne.b		ovfl_sc_dbl		# no; sgl
   10308 ovfl_sc_sgl:
   10309 	tst.l		LOCAL_LO(%a0)		# is lo lw of sgl set?
   10310 	bne.b		ovfl_sc_inx		# yes
   10311 	tst.b		3+LOCAL_HI(%a0)		# is lo byte of hi lw set?
   10312 	bne.b		ovfl_sc_inx		# yes
   10313 	bra.b		ovfl_work		# don't set INEX2
   10314 ovfl_sc_dbl:
   10315 	mov.l		LOCAL_LO(%a0),%d1	# are any of lo 11 bits of
   10316 	andi.l		&0x7ff,%d1		# dbl mantissa set?
   10317 	beq.b		ovfl_work		# no; don't set INEX2
   10318 ovfl_sc_inx:
   10319 	ori.l		&inex2_mask,USER_FPSR(%a6) # set INEX2
   10320 	bra.b		ovfl_work		# continue
   10321 
   10322 	global		t_ovfl
   10323 t_ovfl:
   10324 	ori.l		&ovfinx_mask,USER_FPSR(%a6) # set OVFL/INEX2/AOVFL/AINEX
   10325 
   10326 ovfl_work:
   10327 	tst.b		LOCAL_EX(%a0)		# what is the sign?
   10328 	smi.b		%d1			# set d1 accordingly
   10329 	bsr.l		ovf_res			# calc default ovfl result
   10330 	mov.b		%d0,FPSR_CC(%a6)	# insert new ccodes
   10331 	fmovm.x		(%a0),&0x80		# return default result in fp0
   10332 
   10333 	fmov.s		&0x00000000,%fp1	# return EXOP in fp1
   10334 	rts
   10335 
   10336 # t_ovfl2 ALWAYS tells ovf_res to create a positive result
   10337 	global		t_ovfl2
   10338 t_ovfl2:
   10339 	ori.l		&ovfinx_mask,USER_FPSR(%a6) # set OVFL/INEX2/AOVFL/AINEX
   10340 
   10341 	sf.b		%d1			# clear sign flag for positive
   10342 	bsr.l		ovf_res			# calc default ovfl result
   10343 	mov.b		%d0,FPSR_CC(%a6)	# insert new ccodes
   10344 	fmovm.x		(%a0),&0x80		# return default result in fp0
   10345 
   10346 	fmov.s		&0x00000000,%fp1	# return EXOP in fp1
   10347 	rts
   10348 
   10349 #################################################################
   10350 # t_catch(): 							#
   10351 #	- the last operation of a transcendental emulation	#
   10352 # 	  routine may have caused an underflow or overflow. 	#
   10353 # 	  we find out if this occurred by doing an fsave and 	#
   10354 #	  checking the exception bit. if one did occur, then we	#
   10355 #	  jump to fgen_except() which creates the default	#
   10356 #	  result and EXOP for us.				#
   10357 #################################################################
   10358 	global		t_catch
   10359 t_catch:
   10360 
   10361 	fsave		-(%sp)
   10362 	tst.b		0x2(%sp)
   10363 	bmi.b		catch
   10364 	add.l		&0xc,%sp
   10365 
   10366 #################################################################
   10367 # INEX2 exception:						#
   10368 #	- The inex2 and ainex bits are set.			#
   10369 #################################################################
   10370 	global		t_inx2
   10371 t_inx2:
   10372 	fblt.w		t_minx2
   10373 	fbeq.w		inx2_zero
   10374 
   10375 	global		t_pinx2
   10376 t_pinx2:
   10377 	ori.w		&inx2a_mask,2+USER_FPSR(%a6) # set INEX2/AINEX
   10378 	rts
   10379 
   10380 	global		t_minx2
   10381 t_minx2:
   10382 	ori.l		&inx2a_mask+neg_mask,USER_FPSR(%a6) # set N/INEX2/AINEX
   10383 	rts
   10384 
   10385 inx2_zero:
   10386 	mov.b		&z_bmask,FPSR_CC(%a6)
   10387 	ori.w		&inx2a_mask,2+USER_FPSR(%a6) # set INEX2/AINEX
   10388 	rts
   10389 
   10390 # an underflow or overflow exception occurred.
   10391 # we must set INEX/AINEX since the fmul/fdiv/fmov emulation may not!
   10392 catch:
   10393 	ori.w		&inx2a_mask,FPSR_EXCEPT(%a6)
   10394 catch2:
   10395 	bsr.l		fgen_except
   10396 	add.l		&0xc,%sp
   10397 	rts
   10398 
   10399 	global		t_catch2
   10400 t_catch2:
   10401 
   10402 	fsave		-(%sp)
   10403 
   10404 	tst.b		0x2(%sp)
   10405 	bmi.b		catch2
   10406 	add.l		&0xc,%sp
   10407 
   10408 	fmov.l		%fpsr,%d0
   10409 	or.l		%d0,USER_FPSR(%a6)
   10410 
   10411 	rts
   10412 
   10413 #########################################################################
   10414 
   10415 #########################################################################
   10416 # unf_res(): underflow default result calculation for transcendentals	#
   10417 #									#
   10418 # INPUT:								#
   10419 # 	d0   : rnd mode,precision					#
   10420 # 	d1.b : sign bit of result ('11111111 = (-) ; '00000000 = (+))	#
   10421 # OUTPUT:								#
   10422 #	a0   : points to result (in instruction memory)			#
   10423 #########################################################################
   10424 unf_sub:
   10425 	ori.l		&unfinx_mask,USER_FPSR(%a6)
   10426 
   10427 	andi.w		&0x10,%d1		# keep sign bit in 4th spot
   10428 
   10429 	lsr.b		&0x4,%d0		# shift rnd prec,mode to lo bits
   10430 	andi.b		&0xf,%d0		# strip hi rnd mode bit
   10431 	or.b		%d1,%d0			# concat {sgn,mode,prec}
   10432 
   10433 	mov.l		%d0,%d1			# make a copy
   10434 	lsl.b		&0x1,%d1		# mult index 2 by 2
   10435 
   10436 	mov.b		(tbl_unf_cc.b,%pc,%d0.w*1),FPSR_CC(%a6) # insert ccode bits
   10437 	lea		(tbl_unf_result.b,%pc,%d1.w*8),%a0 # grab result ptr
   10438 	rts
   10439 
   10440 tbl_unf_cc:
   10441 	byte		0x4, 0x4, 0x4, 0x0
   10442 	byte		0x4, 0x4, 0x4, 0x0
   10443 	byte		0x4, 0x4, 0x4, 0x0
   10444 	byte		0x0, 0x0, 0x0, 0x0
   10445 	byte		0x8+0x4, 0x8+0x4, 0x8, 0x8+0x4
   10446 	byte		0x8+0x4, 0x8+0x4, 0x8, 0x8+0x4
   10447 	byte		0x8+0x4, 0x8+0x4, 0x8, 0x8+0x4
   10448 
   10449 tbl_unf_result:
   10450 	long		0x00000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
   10451 	long		0x00000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
   10452 	long		0x00000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
   10453 	long		0x00000000, 0x00000000, 0x00000001, 0x0 # MIN; ext
   10454 
   10455 	long		0x3f810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
   10456 	long		0x3f810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
   10457 	long		0x3f810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
   10458 	long		0x3f810000, 0x00000100, 0x00000000, 0x0 # MIN; sgl
   10459 
   10460 	long		0x3c010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl
   10461 	long		0x3c010000, 0x00000000, 0x00000000, 0x0 # ZER0;dbl
   10462 	long		0x3c010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl
   10463 	long		0x3c010000, 0x00000000, 0x00000800, 0x0 # MIN; dbl
   10464 
   10465 	long		0x0,0x0,0x0,0x0
   10466 	long		0x0,0x0,0x0,0x0
   10467 	long		0x0,0x0,0x0,0x0
   10468 	long		0x0,0x0,0x0,0x0
   10469 
   10470 	long		0x80000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
   10471 	long		0x80000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
   10472 	long		0x80000000, 0x00000000, 0x00000001, 0x0 # MIN; ext
   10473 	long		0x80000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
   10474 
   10475 	long		0xbf810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
   10476 	long		0xbf810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
   10477 	long		0xbf810000, 0x00000100, 0x00000000, 0x0 # MIN; sgl
   10478 	long		0xbf810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
   10479 
   10480 	long		0xbc010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl
   10481 	long		0xbc010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl
   10482 	long		0xbc010000, 0x00000000, 0x00000800, 0x0 # MIN; dbl
   10483 	long		0xbc010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl
   10484 
   10485 ############################################################
   10486 
   10487 #########################################################################
   10488 # src_zero(): Return signed zero according to sign of src operand.	#
   10489 #########################################################################
   10490 	global		src_zero
   10491 src_zero:
   10492 	tst.b		SRC_EX(%a0)		# get sign of src operand
   10493 	bmi.b		ld_mzero		# if neg, load neg zero
   10494 
   10495 #
   10496 # ld_pzero(): return a positive zero.
   10497 #
   10498 	global		ld_pzero
   10499 ld_pzero:
   10500 	fmov.s		&0x00000000,%fp0	# load +0
   10501 	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
   10502 	rts
   10503 
   10504 # ld_mzero(): return a negative zero.
   10505 	global		ld_mzero
   10506 ld_mzero:
   10507 	fmov.s		&0x80000000,%fp0	# load -0
   10508 	mov.b		&neg_bmask+z_bmask,FPSR_CC(%a6) # set 'N','Z' ccode bits
   10509 	rts
   10510 
   10511 #########################################################################
   10512 # dst_zero(): Return signed zero according to sign of dst operand.	#
   10513 #########################################################################
   10514 	global		dst_zero
   10515 dst_zero:
   10516 	tst.b		DST_EX(%a1) 		# get sign of dst operand
   10517 	bmi.b		ld_mzero		# if neg, load neg zero
   10518 	bra.b		ld_pzero		# load positive zero
   10519 
   10520 #########################################################################
   10521 # src_inf(): Return signed inf according to sign of src operand.	#
   10522 #########################################################################
   10523 	global		src_inf
   10524 src_inf:
   10525 	tst.b		SRC_EX(%a0) 		# get sign of src operand
   10526 	bmi.b		ld_minf			# if negative branch
   10527 
   10528 #
   10529 # ld_pinf(): return a positive infinity.
   10530 #
   10531 	global		ld_pinf
   10532 ld_pinf:
   10533 	fmov.s		&0x7f800000,%fp0	# load +INF
   10534 	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'INF' ccode bit
   10535 	rts
   10536 
   10537 #
   10538 # ld_minf():return a negative infinity.
   10539 #
   10540 	global		ld_minf
   10541 ld_minf:
   10542 	fmov.s		&0xff800000,%fp0	# load -INF
   10543 	mov.b		&neg_bmask+inf_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
   10544 	rts
   10545 
   10546 #########################################################################
   10547 # dst_inf(): Return signed inf according to sign of dst operand.	#
   10548 #########################################################################
   10549 	global		dst_inf
   10550 dst_inf:
   10551 	tst.b		DST_EX(%a1) 		# get sign of dst operand
   10552 	bmi.b		ld_minf			# if negative branch
   10553 	bra.b		ld_pinf
   10554 
   10555 	global		szr_inf
   10556 #################################################################
   10557 # szr_inf(): Return +ZERO for a negative src operand or		#
   10558 #	            +INF for a positive src operand.		#
   10559 #	     Routine used for fetox, ftwotox, and ftentox.	#
   10560 #################################################################
   10561 szr_inf:
   10562 	tst.b		SRC_EX(%a0)		# check sign of source
   10563 	bmi.b		ld_pzero
   10564 	bra.b		ld_pinf
   10565 
   10566 #########################################################################
   10567 # sopr_inf(): Return +INF for a positive src operand or			#
   10568 #	      jump to operand error routine for a negative src operand.	#
   10569 #	      Routine used for flogn, flognp1, flog10, and flog2.	#
   10570 #########################################################################
   10571 	global		sopr_inf
   10572 sopr_inf:
   10573 	tst.b		SRC_EX(%a0)		# check sign of source
   10574 	bmi.w		t_operr
   10575 	bra.b		ld_pinf
   10576 
   10577 #################################################################
   10578 # setoxm1i(): Return minus one for a negative src operand or	#
   10579 #	      positive infinity for a positive src operand.	#
   10580 #	      Routine used for fetoxm1.				#
   10581 #################################################################
   10582 	global		setoxm1i
   10583 setoxm1i:
   10584 	tst.b		SRC_EX(%a0)		# check sign of source
   10585 	bmi.b		ld_mone
   10586 	bra.b		ld_pinf
   10587 
   10588 #########################################################################
   10589 # src_one(): Return signed one according to sign of src operand.	#
   10590 #########################################################################
   10591 	global		src_one
   10592 src_one:
   10593 	tst.b		SRC_EX(%a0) 		# check sign of source
   10594 	bmi.b		ld_mone
   10595 
   10596 #
   10597 # ld_pone(): return positive one.
   10598 #
   10599 	global		ld_pone
   10600 ld_pone:
   10601 	fmov.s		&0x3f800000,%fp0	# load +1
   10602 	clr.b		FPSR_CC(%a6)
   10603 	rts
   10604 
   10605 #
   10606 # ld_mone(): return negative one.
   10607 #
   10608 	global		ld_mone
   10609 ld_mone:
   10610 	fmov.s		&0xbf800000,%fp0	# load -1
   10611 	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
   10612 	rts
   10613 
   10614 ppiby2:	long		0x3fff0000, 0xc90fdaa2, 0x2168c235
   10615 mpiby2:	long		0xbfff0000, 0xc90fdaa2, 0x2168c235
   10616 
   10617 #################################################################
   10618 # spi_2(): Return signed PI/2 according to sign of src operand.	#
   10619 #################################################################
   10620 	global		spi_2
   10621 spi_2:
   10622 	tst.b		SRC_EX(%a0) 		# check sign of source
   10623 	bmi.b		ld_mpi2
   10624 
   10625 #
   10626 # ld_ppi2(): return positive PI/2.
   10627 #
   10628 	global		ld_ppi2
   10629 ld_ppi2:
   10630 	fmov.l		%d0,%fpcr
   10631 	fmov.x		ppiby2(%pc),%fp0	# load +pi/2
   10632 	bra.w		t_pinx2			# set INEX2
   10633 
   10634 #
   10635 # ld_mpi2(): return negative PI/2.
   10636 #
   10637 	global		ld_mpi2
   10638 ld_mpi2:
   10639 	fmov.l		%d0,%fpcr
   10640 	fmov.x		mpiby2(%pc),%fp0	# load -pi/2
   10641 	bra.w		t_minx2			# set INEX2
   10642 
   10643 ####################################################
   10644 # The following routines give support for fsincos. #
   10645 ####################################################
   10646 
   10647 #
   10648 # ssincosz(): When the src operand is ZERO, store a one in the
   10649 # 	      cosine register and return a ZERO in fp0 w/ the same sign
   10650 #	      as the src operand.
   10651 #
   10652 	global		ssincosz
   10653 ssincosz:
   10654 	fmov.s		&0x3f800000,%fp1
   10655 	tst.b		SRC_EX(%a0)		# test sign
   10656 	bpl.b		sincoszp
   10657 	fmov.s		&0x80000000,%fp0	# return sin result in fp0
   10658 	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6)
   10659 	bra.b		sto_cos			# store cosine result
   10660 sincoszp:
   10661 	fmov.s		&0x00000000,%fp0	# return sin result in fp0
   10662 	mov.b		&z_bmask,FPSR_CC(%a6)
   10663 	bra.b		sto_cos			# store cosine result
   10664 
   10665 #
   10666 # ssincosi(): When the src operand is INF, store a QNAN in the cosine
   10667 #	      register and jump to the operand error routine for negative
   10668 #	      src operands.
   10669 #
   10670 	global		ssincosi
   10671 ssincosi:
   10672 	fmov.x		qnan(%pc),%fp1		# load NAN
   10673 	bsr.l		sto_cos			# store cosine result
   10674 	bra.w		t_operr
   10675 
   10676 #
   10677 # ssincosqnan(): When the src operand is a QNAN, store the QNAN in the cosine
   10678 # 		 register and branch to the src QNAN routine.
   10679 #
   10680 	global		ssincosqnan
   10681 ssincosqnan:
   10682 	fmov.x		LOCAL_EX(%a0),%fp1
   10683 	bsr.l		sto_cos
   10684 	bra.w		src_qnan
   10685 
   10686 #
   10687 # ssincossnan(): When the src operand is an SNAN, store the SNAN w/ the SNAN bit set
   10688 #		 in the cosine register and branch to the src SNAN routine.
   10689 #
   10690 	global		ssincossnan
   10691 ssincossnan:
   10692 	fmov.x		LOCAL_EX(%a0),%fp1
   10693 	bsr.l		sto_cos
   10694 	bra.w		src_snan
   10695 
   10696 ########################################################################
   10697 
   10698 #########################################################################
   10699 # sto_cos(): store fp1 to the fpreg designated by the CMDREG dst field.	#
   10700 #	     fp1 holds the result of the cosine portion of ssincos().	#
   10701 #	     the value in fp1 will not take any exceptions when moved.	#
   10702 # INPUT:								#
   10703 #	fp1 : fp value to store						#
   10704 # MODIFIED:								#
   10705 #	d0								#
   10706 #########################################################################
   10707 	global		sto_cos
   10708 sto_cos:
   10709 	mov.b		1+EXC_CMDREG(%a6),%d0
   10710 	andi.w		&0x7,%d0
   10711 	mov.w		(tbl_sto_cos.b,%pc,%d0.w*2),%d0
   10712 	jmp		(tbl_sto_cos.b,%pc,%d0.w*1)
   10713 
   10714 tbl_sto_cos:
   10715 	short		sto_cos_0 - tbl_sto_cos
   10716 	short		sto_cos_1 - tbl_sto_cos
   10717 	short		sto_cos_2 - tbl_sto_cos
   10718 	short		sto_cos_3 - tbl_sto_cos
   10719 	short		sto_cos_4 - tbl_sto_cos
   10720 	short		sto_cos_5 - tbl_sto_cos
   10721 	short		sto_cos_6 - tbl_sto_cos
   10722 	short		sto_cos_7 - tbl_sto_cos
   10723 
   10724 sto_cos_0:
   10725 	fmovm.x		&0x40,EXC_FP0(%a6)
   10726 	rts
   10727 sto_cos_1:
   10728 	fmovm.x		&0x40,EXC_FP1(%a6)
   10729 	rts
   10730 sto_cos_2:
   10731 	fmov.x 		%fp1,%fp2
   10732 	rts
   10733 sto_cos_3:
   10734 	fmov.x		%fp1,%fp3
   10735 	rts
   10736 sto_cos_4:
   10737 	fmov.x		%fp1,%fp4
   10738 	rts
   10739 sto_cos_5:
   10740 	fmov.x		%fp1,%fp5
   10741 	rts
   10742 sto_cos_6:
   10743 	fmov.x		%fp1,%fp6
   10744 	rts
   10745 sto_cos_7:
   10746 	fmov.x		%fp1,%fp7
   10747 	rts
   10748 
   10749 ##################################################################
   10750 	global		smod_sdnrm
   10751 	global		smod_snorm
   10752 smod_sdnrm:
   10753 smod_snorm:
   10754 	mov.b		DTAG(%a6),%d1
   10755 	beq.l		smod
   10756 	cmpi.b		%d1,&ZERO
   10757 	beq.w		smod_zro
   10758 	cmpi.b		%d1,&INF
   10759 	beq.l		t_operr
   10760 	cmpi.b		%d1,&DENORM
   10761 	beq.l		smod
   10762 	cmpi.b		%d1,&SNAN
   10763 	beq.l		dst_snan
   10764 	bra.l		dst_qnan
   10765 
   10766 	global		smod_szero
   10767 smod_szero:
   10768 	mov.b		DTAG(%a6),%d1
   10769 	beq.l		t_operr
   10770 	cmpi.b		%d1,&ZERO
   10771 	beq.l		t_operr
   10772 	cmpi.b		%d1,&INF
   10773 	beq.l		t_operr
   10774 	cmpi.b		%d1,&DENORM
   10775 	beq.l		t_operr
   10776 	cmpi.b		%d1,&QNAN
   10777 	beq.l		dst_qnan
   10778 	bra.l		dst_snan
   10779 
   10780 	global		smod_sinf
   10781 smod_sinf:
   10782 	mov.b		DTAG(%a6),%d1
   10783 	beq.l		smod_fpn
   10784 	cmpi.b		%d1,&ZERO
   10785 	beq.l		smod_zro
   10786 	cmpi.b		%d1,&INF
   10787 	beq.l		t_operr
   10788 	cmpi.b		%d1,&DENORM
   10789 	beq.l		smod_fpn
   10790 	cmpi.b		%d1,&QNAN
   10791 	beq.l		dst_qnan
   10792 	bra.l		dst_snan
   10793 
   10794 smod_zro:
   10795 srem_zro:
   10796 	mov.b		SRC_EX(%a0),%d1		# get src sign
   10797 	mov.b		DST_EX(%a1),%d0		# get dst sign
   10798 	eor.b		%d0,%d1			# get qbyte sign
   10799 	andi.b		&0x80,%d1
   10800 	mov.b		%d1,FPSR_QBYTE(%a6)
   10801 	tst.b		%d0
   10802 	bpl.w		ld_pzero
   10803 	bra.w		ld_mzero
   10804 
   10805 smod_fpn:
   10806 srem_fpn:
   10807 	clr.b		FPSR_QBYTE(%a6)
   10808 	mov.l		%d0,-(%sp)
   10809 	mov.b		SRC_EX(%a0),%d1		# get src sign
   10810 	mov.b		DST_EX(%a1),%d0		# get dst sign
   10811 	eor.b		%d0,%d1			# get qbyte sign
   10812 	andi.b		&0x80,%d1
   10813 	mov.b		%d1,FPSR_QBYTE(%a6)
   10814 	cmpi.b		DTAG(%a6),&DENORM
   10815 	bne.b		smod_nrm
   10816 	lea		DST(%a1),%a0
   10817 	mov.l		(%sp)+,%d0
   10818 	bra		t_resdnrm
   10819 smod_nrm:
   10820 	fmov.l		(%sp)+,%fpcr
   10821 	fmov.x		DST(%a1),%fp0
   10822 	tst.b		DST_EX(%a1)
   10823 	bmi.b		smod_nrm_neg
   10824 	rts
   10825 
   10826 smod_nrm_neg:
   10827 	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode
   10828 	rts
   10829 
   10830 #########################################################################
   10831 	global		srem_snorm
   10832 	global		srem_sdnrm
   10833 srem_sdnrm:
   10834 srem_snorm:
   10835 	mov.b		DTAG(%a6),%d1
   10836 	beq.l		srem
   10837 	cmpi.b		%d1,&ZERO
   10838 	beq.w		srem_zro
   10839 	cmpi.b		%d1,&INF
   10840 	beq.l		t_operr
   10841 	cmpi.b		%d1,&DENORM
   10842 	beq.l		srem
   10843 	cmpi.b		%d1,&QNAN
   10844 	beq.l		dst_qnan
   10845 	bra.l		dst_snan
   10846 
   10847 	global		srem_szero
   10848 srem_szero:
   10849 	mov.b		DTAG(%a6),%d1
   10850 	beq.l		t_operr
   10851 	cmpi.b		%d1,&ZERO
   10852 	beq.l		t_operr
   10853 	cmpi.b		%d1,&INF
   10854 	beq.l		t_operr
   10855 	cmpi.b		%d1,&DENORM
   10856 	beq.l		t_operr
   10857 	cmpi.b		%d1,&QNAN
   10858 	beq.l		dst_qnan
   10859 	bra.l		dst_snan
   10860 
   10861 	global		srem_sinf
   10862 srem_sinf:
   10863 	mov.b		DTAG(%a6),%d1
   10864 	beq.w		srem_fpn
   10865 	cmpi.b		%d1,&ZERO
   10866 	beq.w		srem_zro
   10867 	cmpi.b		%d1,&INF
   10868 	beq.l		t_operr
   10869 	cmpi.b		%d1,&DENORM
   10870 	beq.l		srem_fpn
   10871 	cmpi.b		%d1,&QNAN
   10872 	beq.l		dst_qnan
   10873 	bra.l		dst_snan
   10874 
   10875 #########################################################################
   10876 	global		sscale_snorm
   10877 	global		sscale_sdnrm
   10878 sscale_snorm:
   10879 sscale_sdnrm:
   10880 	mov.b		DTAG(%a6),%d1
   10881 	beq.l		sscale
   10882 	cmpi.b		%d1,&ZERO
   10883 	beq.l		dst_zero
   10884 	cmpi.b		%d1,&INF
   10885 	beq.l		dst_inf
   10886 	cmpi.b		%d1,&DENORM
   10887 	beq.l		sscale
   10888 	cmpi.b		%d1,&QNAN
   10889 	beq.l		dst_qnan
   10890 	bra.l		dst_snan
   10891 
   10892 	global		sscale_szero
   10893 sscale_szero:
   10894 	mov.b		DTAG(%a6),%d1
   10895 	beq.l		sscale
   10896 	cmpi.b		%d1,&ZERO
   10897 	beq.l		dst_zero
   10898 	cmpi.b		%d1,&INF
   10899 	beq.l		dst_inf
   10900 	cmpi.b		%d1,&DENORM
   10901 	beq.l		sscale
   10902 	cmpi.b		%d1,&QNAN
   10903 	beq.l		dst_qnan
   10904 	bra.l		dst_snan
   10905 
   10906 	global		sscale_sinf
   10907 sscale_sinf:
   10908 	mov.b		DTAG(%a6),%d1
   10909 	beq.l		t_operr
   10910 	cmpi.b		%d1,&QNAN
   10911 	beq.l		dst_qnan
   10912 	cmpi.b		%d1,&SNAN
   10913 	beq.l		dst_snan
   10914 	bra.l		t_operr
   10915 
   10916 ########################################################################
   10917 
   10918 #
   10919 # sop_sqnan(): The src op for frem/fmod/fscale was a QNAN.
   10920 #
   10921 	global		sop_sqnan
   10922 sop_sqnan:
   10923 	mov.b		DTAG(%a6),%d1
   10924 	cmpi.b		%d1,&QNAN
   10925 	beq.b		dst_qnan
   10926 	cmpi.b		%d1,&SNAN
   10927 	beq.b		dst_snan
   10928 	bra.b		src_qnan
   10929 
   10930 #
   10931 # sop_ssnan(): The src op for frem/fmod/fscale was an SNAN.
   10932 #
   10933 	global		sop_ssnan
   10934 sop_ssnan:
   10935 	mov.b		DTAG(%a6),%d1
   10936 	cmpi.b		%d1,&QNAN
   10937 	beq.b		dst_qnan_src_snan
   10938 	cmpi.b		%d1,&SNAN
   10939 	beq.b		dst_snan
   10940 	bra.b		src_snan
   10941 
   10942 dst_qnan_src_snan:
   10943 	ori.l		&snaniop_mask,USER_FPSR(%a6) # set NAN/SNAN/AIOP
   10944 	bra.b		dst_qnan
   10945 
   10946 #
   10947 # dst_qnan(): Return the dst SNAN w/ the SNAN bit set.
   10948 #
   10949 	global		dst_snan
   10950 dst_snan:
   10951 	fmov.x		DST(%a1),%fp0		# the fmove sets the SNAN bit
   10952 	fmov.l		%fpsr,%d0		# catch resulting status
   10953 	or.l		%d0,USER_FPSR(%a6)	# store status
   10954 	rts
   10955 
   10956 #
   10957 # dst_qnan(): Return the dst QNAN.
   10958 #
   10959 	global		dst_qnan
   10960 dst_qnan:
   10961 	fmov.x		DST(%a1),%fp0		# return the non-signalling nan
   10962 	tst.b		DST_EX(%a1)		# set ccodes according to QNAN sign
   10963 	bmi.b		dst_qnan_m
   10964 dst_qnan_p:
   10965 	mov.b		&nan_bmask,FPSR_CC(%a6)
   10966 	rts
   10967 dst_qnan_m:
   10968 	mov.b		&neg_bmask+nan_bmask,FPSR_CC(%a6)
   10969 	rts
   10970 
   10971 #
   10972 # src_snan(): Return the src SNAN w/ the SNAN bit set.
   10973 #
   10974 	global		src_snan
   10975 src_snan:
   10976 	fmov.x		SRC(%a0),%fp0		# the fmove sets the SNAN bit
   10977 	fmov.l		%fpsr,%d0		# catch resulting status
   10978 	or.l		%d0,USER_FPSR(%a6)	# store status
   10979 	rts
   10980 
   10981 #
   10982 # src_qnan(): Return the src QNAN.
   10983 #
   10984 	global		src_qnan
   10985 src_qnan:
   10986 	fmov.x		SRC(%a0),%fp0		# return the non-signalling nan
   10987 	tst.b		SRC_EX(%a0)		# set ccodes according to QNAN sign
   10988 	bmi.b		dst_qnan_m
   10989 src_qnan_p:
   10990 	mov.b		&nan_bmask,FPSR_CC(%a6)
   10991 	rts
   10992 src_qnan_m:
   10993 	mov.b		&neg_bmask+nan_bmask,FPSR_CC(%a6)
   10994 	rts
   10995 
   10996 #
   10997 # fkern2.s:
   10998 #	These entry points are used by the exception handler
   10999 # routines where an instruction is selected by an index into
   11000 # a large jump table corresponding to a given instruction which
   11001 # has been decoded. Flow continues here where we now decode
   11002 # further accoding to the source operand type.
   11003 #
   11004 
   11005 	global		fsinh
   11006 fsinh:
   11007 	mov.b		STAG(%a6),%d1
   11008 	beq.l		ssinh
   11009 	cmpi.b		%d1,&ZERO
   11010 	beq.l		src_zero
   11011 	cmpi.b		%d1,&INF
   11012 	beq.l		src_inf
   11013 	cmpi.b		%d1,&DENORM
   11014 	beq.l		ssinhd
   11015 	cmpi.b		%d1,&QNAN
   11016 	beq.l		src_qnan
   11017 	bra.l		src_snan
   11018 
   11019 	global		flognp1
   11020 flognp1:
   11021 	mov.b		STAG(%a6),%d1
   11022 	beq.l		slognp1
   11023 	cmpi.b		%d1,&ZERO
   11024 	beq.l		src_zero
   11025 	cmpi.b		%d1,&INF
   11026 	beq.l		sopr_inf
   11027 	cmpi.b		%d1,&DENORM
   11028 	beq.l		slognp1d
   11029 	cmpi.b		%d1,&QNAN
   11030 	beq.l		src_qnan
   11031 	bra.l		src_snan
   11032 
   11033 	global		fetoxm1
   11034 fetoxm1:
   11035 	mov.b		STAG(%a6),%d1
   11036 	beq.l		setoxm1
   11037 	cmpi.b		%d1,&ZERO
   11038 	beq.l		src_zero
   11039 	cmpi.b		%d1,&INF
   11040 	beq.l		setoxm1i
   11041 	cmpi.b		%d1,&DENORM
   11042 	beq.l		setoxm1d
   11043 	cmpi.b		%d1,&QNAN
   11044 	beq.l		src_qnan
   11045 	bra.l		src_snan
   11046 
   11047 	global		ftanh
   11048 ftanh:
   11049 	mov.b		STAG(%a6),%d1
   11050 	beq.l		stanh
   11051 	cmpi.b		%d1,&ZERO
   11052 	beq.l		src_zero
   11053 	cmpi.b		%d1,&INF
   11054 	beq.l		src_one
   11055 	cmpi.b		%d1,&DENORM
   11056 	beq.l		stanhd
   11057 	cmpi.b		%d1,&QNAN
   11058 	beq.l		src_qnan
   11059 	bra.l		src_snan
   11060 
   11061 	global		fatan
   11062 fatan:
   11063 	mov.b		STAG(%a6),%d1
   11064 	beq.l		satan
   11065 	cmpi.b		%d1,&ZERO
   11066 	beq.l		src_zero
   11067 	cmpi.b		%d1,&INF
   11068 	beq.l		spi_2
   11069 	cmpi.b		%d1,&DENORM
   11070 	beq.l		satand
   11071 	cmpi.b		%d1,&QNAN
   11072 	beq.l		src_qnan
   11073 	bra.l		src_snan
   11074 
   11075 	global		fasin
   11076 fasin:
   11077 	mov.b		STAG(%a6),%d1
   11078 	beq.l		sasin
   11079 	cmpi.b		%d1,&ZERO
   11080 	beq.l		src_zero
   11081 	cmpi.b		%d1,&INF
   11082 	beq.l		t_operr
   11083 	cmpi.b		%d1,&DENORM
   11084 	beq.l		sasind
   11085 	cmpi.b		%d1,&QNAN
   11086 	beq.l		src_qnan
   11087 	bra.l		src_snan
   11088 
   11089 	global		fatanh
   11090 fatanh:
   11091 	mov.b		STAG(%a6),%d1
   11092 	beq.l		satanh
   11093 	cmpi.b		%d1,&ZERO
   11094 	beq.l		src_zero
   11095 	cmpi.b		%d1,&INF
   11096 	beq.l		t_operr
   11097 	cmpi.b		%d1,&DENORM
   11098 	beq.l		satanhd
   11099 	cmpi.b		%d1,&QNAN
   11100 	beq.l		src_qnan
   11101 	bra.l		src_snan
   11102 
   11103 	global		fsine
   11104 fsine:
   11105 	mov.b		STAG(%a6),%d1
   11106 	beq.l		ssin
   11107 	cmpi.b		%d1,&ZERO
   11108 	beq.l		src_zero
   11109 	cmpi.b		%d1,&INF
   11110 	beq.l		t_operr
   11111 	cmpi.b		%d1,&DENORM
   11112 	beq.l		ssind
   11113 	cmpi.b		%d1,&QNAN
   11114 	beq.l		src_qnan
   11115 	bra.l		src_snan
   11116 
   11117 	global		ftan
   11118 ftan:
   11119 	mov.b		STAG(%a6),%d1
   11120 	beq.l		stan
   11121 	cmpi.b		%d1,&ZERO
   11122 	beq.l		src_zero
   11123 	cmpi.b		%d1,&INF
   11124 	beq.l		t_operr
   11125 	cmpi.b		%d1,&DENORM
   11126 	beq.l		stand
   11127 	cmpi.b		%d1,&QNAN
   11128 	beq.l		src_qnan
   11129 	bra.l		src_snan
   11130 
   11131 	global		fetox
   11132 fetox:
   11133 	mov.b		STAG(%a6),%d1
   11134 	beq.l		setox
   11135 	cmpi.b		%d1,&ZERO
   11136 	beq.l		ld_pone
   11137 	cmpi.b		%d1,&INF
   11138 	beq.l		szr_inf
   11139 	cmpi.b		%d1,&DENORM
   11140 	beq.l		setoxd
   11141 	cmpi.b		%d1,&QNAN
   11142 	beq.l		src_qnan
   11143 	bra.l		src_snan
   11144 
   11145 	global		ftwotox
   11146 ftwotox:
   11147 	mov.b		STAG(%a6),%d1
   11148 	beq.l		stwotox
   11149 	cmpi.b		%d1,&ZERO
   11150 	beq.l		ld_pone
   11151 	cmpi.b		%d1,&INF
   11152 	beq.l		szr_inf
   11153 	cmpi.b		%d1,&DENORM
   11154 	beq.l		stwotoxd
   11155 	cmpi.b		%d1,&QNAN
   11156 	beq.l		src_qnan
   11157 	bra.l		src_snan
   11158 
   11159 	global		ftentox
   11160 ftentox:
   11161 	mov.b		STAG(%a6),%d1
   11162 	beq.l		stentox
   11163 	cmpi.b		%d1,&ZERO
   11164 	beq.l		ld_pone
   11165 	cmpi.b		%d1,&INF
   11166 	beq.l		szr_inf
   11167 	cmpi.b		%d1,&DENORM
   11168 	beq.l		stentoxd
   11169 	cmpi.b		%d1,&QNAN
   11170 	beq.l		src_qnan
   11171 	bra.l		src_snan
   11172 
   11173 	global		flogn
   11174 flogn:
   11175 	mov.b		STAG(%a6),%d1
   11176 	beq.l		slogn
   11177 	cmpi.b		%d1,&ZERO
   11178 	beq.l		t_dz2
   11179 	cmpi.b		%d1,&INF
   11180 	beq.l		sopr_inf
   11181 	cmpi.b		%d1,&DENORM
   11182 	beq.l		slognd
   11183 	cmpi.b		%d1,&QNAN
   11184 	beq.l		src_qnan
   11185 	bra.l		src_snan
   11186 
   11187 	global		flog10
   11188 flog10:
   11189 	mov.b		STAG(%a6),%d1
   11190 	beq.l		slog10
   11191 	cmpi.b		%d1,&ZERO
   11192 	beq.l		t_dz2
   11193 	cmpi.b		%d1,&INF
   11194 	beq.l		sopr_inf
   11195 	cmpi.b		%d1,&DENORM
   11196 	beq.l		slog10d
   11197 	cmpi.b		%d1,&QNAN
   11198 	beq.l		src_qnan
   11199 	bra.l		src_snan
   11200 
   11201 	global		flog2
   11202 flog2:
   11203 	mov.b		STAG(%a6),%d1
   11204 	beq.l		slog2
   11205 	cmpi.b		%d1,&ZERO
   11206 	beq.l		t_dz2
   11207 	cmpi.b		%d1,&INF
   11208 	beq.l		sopr_inf
   11209 	cmpi.b		%d1,&DENORM
   11210 	beq.l		slog2d
   11211 	cmpi.b		%d1,&QNAN
   11212 	beq.l		src_qnan
   11213 	bra.l		src_snan
   11214 
   11215 	global		fcosh
   11216 fcosh:
   11217 	mov.b		STAG(%a6),%d1
   11218 	beq.l		scosh
   11219 	cmpi.b		%d1,&ZERO
   11220 	beq.l		ld_pone
   11221 	cmpi.b		%d1,&INF
   11222 	beq.l		ld_pinf
   11223 	cmpi.b		%d1,&DENORM
   11224 	beq.l		scoshd
   11225 	cmpi.b		%d1,&QNAN
   11226 	beq.l		src_qnan
   11227 	bra.l		src_snan
   11228 
   11229 	global		facos
   11230 facos:
   11231 	mov.b		STAG(%a6),%d1
   11232 	beq.l		sacos
   11233 	cmpi.b		%d1,&ZERO
   11234 	beq.l		ld_ppi2
   11235 	cmpi.b		%d1,&INF
   11236 	beq.l		t_operr
   11237 	cmpi.b		%d1,&DENORM
   11238 	beq.l		sacosd
   11239 	cmpi.b		%d1,&QNAN
   11240 	beq.l		src_qnan
   11241 	bra.l		src_snan
   11242 
   11243 	global		fcos
   11244 fcos:
   11245 	mov.b		STAG(%a6),%d1
   11246 	beq.l		scos
   11247 	cmpi.b		%d1,&ZERO
   11248 	beq.l		ld_pone
   11249 	cmpi.b		%d1,&INF
   11250 	beq.l		t_operr
   11251 	cmpi.b		%d1,&DENORM
   11252 	beq.l		scosd
   11253 	cmpi.b		%d1,&QNAN
   11254 	beq.l		src_qnan
   11255 	bra.l		src_snan
   11256 
   11257 	global		fgetexp
   11258 fgetexp:
   11259 	mov.b		STAG(%a6),%d1
   11260 	beq.l		sgetexp
   11261 	cmpi.b		%d1,&ZERO
   11262 	beq.l		src_zero
   11263 	cmpi.b		%d1,&INF
   11264 	beq.l		t_operr
   11265 	cmpi.b		%d1,&DENORM
   11266 	beq.l		sgetexpd
   11267 	cmpi.b		%d1,&QNAN
   11268 	beq.l		src_qnan
   11269 	bra.l		src_snan
   11270 
   11271 	global		fgetman
   11272 fgetman:
   11273 	mov.b		STAG(%a6),%d1
   11274 	beq.l		sgetman
   11275 	cmpi.b		%d1,&ZERO
   11276 	beq.l		src_zero
   11277 	cmpi.b		%d1,&INF
   11278 	beq.l		t_operr
   11279 	cmpi.b		%d1,&DENORM
   11280 	beq.l		sgetmand
   11281 	cmpi.b		%d1,&QNAN
   11282 	beq.l		src_qnan
   11283 	bra.l		src_snan
   11284 
   11285 	global		fsincos
   11286 fsincos:
   11287 	mov.b		STAG(%a6),%d1
   11288 	beq.l		ssincos
   11289 	cmpi.b		%d1,&ZERO
   11290 	beq.l		ssincosz
   11291 	cmpi.b		%d1,&INF
   11292 	beq.l		ssincosi
   11293 	cmpi.b		%d1,&DENORM
   11294 	beq.l		ssincosd
   11295 	cmpi.b		%d1,&QNAN
   11296 	beq.l		ssincosqnan
   11297 	bra.l		ssincossnan
   11298 
   11299 	global		fmod
   11300 fmod:
   11301 	mov.b		STAG(%a6),%d1
   11302 	beq.l		smod_snorm
   11303 	cmpi.b		%d1,&ZERO
   11304 	beq.l		smod_szero
   11305 	cmpi.b		%d1,&INF
   11306 	beq.l		smod_sinf
   11307 	cmpi.b		%d1,&DENORM
   11308 	beq.l		smod_sdnrm
   11309 	cmpi.b		%d1,&QNAN
   11310 	beq.l		sop_sqnan
   11311 	bra.l		sop_ssnan
   11312 
   11313 	global		frem
   11314 frem:
   11315 	mov.b		STAG(%a6),%d1
   11316 	beq.l		srem_snorm
   11317 	cmpi.b		%d1,&ZERO
   11318 	beq.l		srem_szero
   11319 	cmpi.b		%d1,&INF
   11320 	beq.l		srem_sinf
   11321 	cmpi.b		%d1,&DENORM
   11322 	beq.l		srem_sdnrm
   11323 	cmpi.b		%d1,&QNAN
   11324 	beq.l		sop_sqnan
   11325 	bra.l		sop_ssnan
   11326 
   11327 	global		fscale
   11328 fscale:
   11329 	mov.b		STAG(%a6),%d1
   11330 	beq.l		sscale_snorm
   11331 	cmpi.b		%d1,&ZERO
   11332 	beq.l		sscale_szero
   11333 	cmpi.b		%d1,&INF
   11334 	beq.l		sscale_sinf
   11335 	cmpi.b		%d1,&DENORM
   11336 	beq.l		sscale_sdnrm
   11337 	cmpi.b		%d1,&QNAN
   11338 	beq.l		sop_sqnan
   11339 	bra.l		sop_ssnan
   11340 
   11341 #########################################################################
   11342 # XDEF ****************************************************************	#
   11343 # 	fgen_except(): catch an exception during transcendental 	#
   11344 #		       emulation					#
   11345 #									#
   11346 # XREF ****************************************************************	#
   11347 #	fmul() - emulate a multiply instruction				#
   11348 #	fadd() - emulate an add instruction				#
   11349 #	fin() - emulate an fmove instruction				#
   11350 #									#
   11351 # INPUT ***************************************************************	#
   11352 #	fp0 = destination operand					#
   11353 #	d0  = type of instruction that took exception			#
   11354 #	fsave frame = source operand					#
   11355 # 									#
   11356 # OUTPUT **************************************************************	#
   11357 #	fp0 = result							#
   11358 #	fp1 = EXOP							#
   11359 #									#
   11360 # ALGORITHM ***********************************************************	#
   11361 # 	An exception occurred on the last instruction of the 		#
   11362 # transcendental emulation. hopefully, this won't be happening much 	#
   11363 # because it will be VERY slow.						#
   11364 # 	The only exceptions capable of passing through here are		#
   11365 # Overflow, Underflow, and Unsupported Data Type.			#
   11366 #									#
   11367 #########################################################################
   11368 
   11369 	global		fgen_except
   11370 fgen_except:
   11371 	cmpi.b		0x3(%sp),&0x7		# is exception UNSUPP?
   11372 	beq.b		fge_unsupp		# yes
   11373 
   11374 	mov.b		&NORM,STAG(%a6)
   11375 
   11376 fge_cont:
   11377 	mov.b		&NORM,DTAG(%a6)
   11378 
   11379 # ok, I have a problem with putting the dst op at FP_DST. the emulation
   11380 # routines aren't supposed to alter the operands but we've just squashed
   11381 # FP_DST here...
   11382 
   11383 # 8/17/93 - this turns out to be more of a "cleanliness" standpoint
   11384 # then a potential bug. to begin with, only the dyadic functions
   11385 # frem,fmod, and fscale would get the dst trashed here. But, for
   11386 # the 060SP, the FP_DST is never used again anyways.
   11387 	fmovm.x		&0x80,FP_DST(%a6)	# dst op is in fp0
   11388 
   11389 	lea		0x4(%sp),%a0		# pass: ptr to src op
   11390 	lea		FP_DST(%a6),%a1		# pass: ptr to dst op
   11391 
   11392 	cmpi.b		%d1,&FMOV_OP
   11393 	beq.b		fge_fin			# it was an "fmov"
   11394 	cmpi.b		%d1,&FADD_OP
   11395 	beq.b		fge_fadd		# it was an "fadd"
   11396 fge_fmul:
   11397 	bsr.l		fmul
   11398 	rts
   11399 fge_fadd:
   11400 	bsr.l		fadd
   11401 	rts
   11402 fge_fin:
   11403 	bsr.l		fin
   11404 	rts
   11405 
   11406 fge_unsupp:
   11407 	mov.b		&DENORM,STAG(%a6)
   11408 	bra.b		fge_cont
   11409 
   11410 #
   11411 # This table holds the offsets of the emulation routines for each individual
   11412 # math operation relative to the address of this table. Included are
   11413 # routines like fadd/fmul/fabs as well as the transcendentals.
   11414 # The location within the table is determined by the extension bits of the
   11415 # operation longword.
   11416 #
   11417 
   11418 	swbeg		&109
   11419 tbl_unsupp:
   11420 	long		fin	 	- tbl_unsupp	# 00: fmove
   11421 	long		fint	 	- tbl_unsupp	# 01: fint
   11422 	long		fsinh	 	- tbl_unsupp	# 02: fsinh
   11423 	long		fintrz	 	- tbl_unsupp	# 03: fintrz
   11424 	long		fsqrt	 	- tbl_unsupp	# 04: fsqrt
   11425 	long		tbl_unsupp	- tbl_unsupp
   11426 	long		flognp1		- tbl_unsupp	# 06: flognp1
   11427 	long		tbl_unsupp	- tbl_unsupp
   11428 	long		fetoxm1		- tbl_unsupp	# 08: fetoxm1
   11429 	long		ftanh		- tbl_unsupp	# 09: ftanh
   11430 	long		fatan		- tbl_unsupp	# 0a: fatan
   11431 	long		tbl_unsupp	- tbl_unsupp
   11432 	long		fasin		- tbl_unsupp	# 0c: fasin
   11433 	long		fatanh		- tbl_unsupp	# 0d: fatanh
   11434 	long		fsine		- tbl_unsupp	# 0e: fsin
   11435 	long		ftan		- tbl_unsupp	# 0f: ftan
   11436 	long		fetox		- tbl_unsupp	# 10: fetox
   11437 	long		ftwotox		- tbl_unsupp	# 11: ftwotox
   11438 	long		ftentox		- tbl_unsupp	# 12: ftentox
   11439 	long		tbl_unsupp	- tbl_unsupp
   11440 	long		flogn		- tbl_unsupp	# 14: flogn
   11441 	long		flog10		- tbl_unsupp	# 15: flog10
   11442 	long		flog2		- tbl_unsupp	# 16: flog2
   11443 	long		tbl_unsupp	- tbl_unsupp
   11444 	long		fabs		- tbl_unsupp 	# 18: fabs
   11445 	long		fcosh		- tbl_unsupp	# 19: fcosh
   11446 	long		fneg		- tbl_unsupp 	# 1a: fneg
   11447 	long		tbl_unsupp	- tbl_unsupp
   11448 	long		facos		- tbl_unsupp	# 1c: facos
   11449 	long		fcos		- tbl_unsupp	# 1d: fcos
   11450 	long		fgetexp		- tbl_unsupp	# 1e: fgetexp
   11451 	long		fgetman		- tbl_unsupp	# 1f: fgetman
   11452 	long		fdiv		- tbl_unsupp 	# 20: fdiv
   11453 	long		fmod		- tbl_unsupp	# 21: fmod
   11454 	long		fadd		- tbl_unsupp 	# 22: fadd
   11455 	long		fmul		- tbl_unsupp 	# 23: fmul
   11456 	long		fsgldiv		- tbl_unsupp 	# 24: fsgldiv
   11457 	long		frem		- tbl_unsupp	# 25: frem
   11458 	long		fscale		- tbl_unsupp	# 26: fscale
   11459 	long		fsglmul		- tbl_unsupp 	# 27: fsglmul
   11460 	long		fsub		- tbl_unsupp 	# 28: fsub
   11461 	long		tbl_unsupp	- tbl_unsupp
   11462 	long		tbl_unsupp	- tbl_unsupp
   11463 	long		tbl_unsupp	- tbl_unsupp
   11464 	long		tbl_unsupp	- tbl_unsupp
   11465 	long		tbl_unsupp	- tbl_unsupp
   11466 	long		tbl_unsupp	- tbl_unsupp
   11467 	long		tbl_unsupp	- tbl_unsupp
   11468 	long		fsincos		- tbl_unsupp	# 30: fsincos
   11469 	long		fsincos		- tbl_unsupp	# 31: fsincos
   11470 	long		fsincos		- tbl_unsupp	# 32: fsincos
   11471 	long		fsincos		- tbl_unsupp	# 33: fsincos
   11472 	long		fsincos		- tbl_unsupp	# 34: fsincos
   11473 	long		fsincos		- tbl_unsupp	# 35: fsincos
   11474 	long		fsincos		- tbl_unsupp	# 36: fsincos
   11475 	long		fsincos		- tbl_unsupp	# 37: fsincos
   11476 	long		fcmp		- tbl_unsupp 	# 38: fcmp
   11477 	long		tbl_unsupp	- tbl_unsupp
   11478 	long		ftst		- tbl_unsupp 	# 3a: ftst
   11479 	long		tbl_unsupp	- tbl_unsupp
   11480 	long		tbl_unsupp	- tbl_unsupp
   11481 	long		tbl_unsupp	- tbl_unsupp
   11482 	long		tbl_unsupp	- tbl_unsupp
   11483 	long		tbl_unsupp	- tbl_unsupp
   11484 	long		fsin		- tbl_unsupp 	# 40: fsmove
   11485 	long		fssqrt		- tbl_unsupp 	# 41: fssqrt
   11486 	long		tbl_unsupp	- tbl_unsupp
   11487 	long		tbl_unsupp	- tbl_unsupp
   11488 	long		fdin		- tbl_unsupp	# 44: fdmove
   11489 	long		fdsqrt		- tbl_unsupp 	# 45: fdsqrt
   11490 	long		tbl_unsupp	- tbl_unsupp
   11491 	long		tbl_unsupp	- tbl_unsupp
   11492 	long		tbl_unsupp	- tbl_unsupp
   11493 	long		tbl_unsupp	- tbl_unsupp
   11494 	long		tbl_unsupp	- tbl_unsupp
   11495 	long		tbl_unsupp	- tbl_unsupp
   11496 	long		tbl_unsupp	- tbl_unsupp
   11497 	long		tbl_unsupp	- tbl_unsupp
   11498 	long		tbl_unsupp	- tbl_unsupp
   11499 	long		tbl_unsupp	- tbl_unsupp
   11500 	long		tbl_unsupp	- tbl_unsupp
   11501 	long		tbl_unsupp	- tbl_unsupp
   11502 	long		tbl_unsupp	- tbl_unsupp
   11503 	long		tbl_unsupp	- tbl_unsupp
   11504 	long		tbl_unsupp	- tbl_unsupp
   11505 	long		tbl_unsupp	- tbl_unsupp
   11506 	long		tbl_unsupp	- tbl_unsupp
   11507 	long		tbl_unsupp	- tbl_unsupp
   11508 	long		fsabs		- tbl_unsupp 	# 58: fsabs
   11509 	long		tbl_unsupp	- tbl_unsupp
   11510 	long		fsneg		- tbl_unsupp 	# 5a: fsneg
   11511 	long		tbl_unsupp	- tbl_unsupp
   11512 	long		fdabs		- tbl_unsupp	# 5c: fdabs
   11513 	long		tbl_unsupp	- tbl_unsupp
   11514 	long		fdneg		- tbl_unsupp 	# 5e: fdneg
   11515 	long		tbl_unsupp	- tbl_unsupp
   11516 	long		fsdiv		- tbl_unsupp	# 60: fsdiv
   11517 	long		tbl_unsupp	- tbl_unsupp
   11518 	long		fsadd		- tbl_unsupp	# 62: fsadd
   11519 	long		fsmul		- tbl_unsupp	# 63: fsmul
   11520 	long		fddiv		- tbl_unsupp 	# 64: fddiv
   11521 	long		tbl_unsupp	- tbl_unsupp
   11522 	long		fdadd		- tbl_unsupp	# 66: fdadd
   11523 	long		fdmul		- tbl_unsupp 	# 67: fdmul
   11524 	long		fssub		- tbl_unsupp	# 68: fssub
   11525 	long		tbl_unsupp	- tbl_unsupp
   11526 	long		tbl_unsupp	- tbl_unsupp
   11527 	long		tbl_unsupp	- tbl_unsupp
   11528 	long		fdsub		- tbl_unsupp 	# 6c: fdsub
   11529 
   11530 #########################################################################
   11531 # XDEF ****************************************************************	#
   11532 # 	fmul(): emulates the fmul instruction				#
   11533 #	fsmul(): emulates the fsmul instruction				#
   11534 #	fdmul(): emulates the fdmul instruction				#
   11535 #									#
   11536 # XREF ****************************************************************	#
   11537 #	scale_to_zero_src() - scale src exponent to zero		#
   11538 #	scale_to_zero_dst() - scale dst exponent to zero		#
   11539 #	unf_res() - return default underflow result			#
   11540 #	ovf_res() - return default overflow result			#
   11541 # 	res_qnan() - return QNAN result					#
   11542 # 	res_snan() - return SNAN result					#
   11543 #									#
   11544 # INPUT ***************************************************************	#
   11545 #	a0 = pointer to extended precision source operand		#
   11546 #	a1 = pointer to extended precision destination operand		#
   11547 #	d0  rnd prec,mode						#
   11548 #									#
   11549 # OUTPUT **************************************************************	#
   11550 #	fp0 = result							#
   11551 #	fp1 = EXOP (if exception occurred)				#
   11552 #									#
   11553 # ALGORITHM ***********************************************************	#
   11554 #	Handle NANs, infinities, and zeroes as special cases. Divide	#
   11555 # norms/denorms into ext/sgl/dbl precision.				#
   11556 #	For norms/denorms, scale the exponents such that a multiply	#
   11557 # instruction won't cause an exception. Use the regular fmul to		#
   11558 # compute a result. Check if the regular operands would have taken	#
   11559 # an exception. If so, return the default overflow/underflow result	#
   11560 # and return the EXOP if exceptions are enabled. Else, scale the 	#
   11561 # result operand to the proper exponent.				#
   11562 #									#
   11563 #########################################################################
   11564 
   11565 	align 		0x10
   11566 tbl_fmul_ovfl:
   11567 	long		0x3fff - 0x7ffe		# ext_max
   11568 	long		0x3fff - 0x407e		# sgl_max
   11569 	long		0x3fff - 0x43fe		# dbl_max
   11570 tbl_fmul_unfl:
   11571 	long		0x3fff + 0x0001		# ext_unfl
   11572 	long		0x3fff - 0x3f80		# sgl_unfl
   11573 	long		0x3fff - 0x3c00		# dbl_unfl
   11574 
   11575 	global		fsmul
   11576 fsmul:
   11577 	andi.b		&0x30,%d0		# clear rnd prec
   11578 	ori.b		&s_mode*0x10,%d0	# insert sgl prec
   11579 	bra.b		fmul
   11580 
   11581 	global		fdmul
   11582 fdmul:
   11583 	andi.b		&0x30,%d0
   11584 	ori.b		&d_mode*0x10,%d0	# insert dbl prec
   11585 
   11586 	global		fmul
   11587 fmul:
   11588 	mov.l		%d0,L_SCR3(%a6)		# store rnd info
   11589 
   11590 	clr.w		%d1
   11591 	mov.b		DTAG(%a6),%d1
   11592 	lsl.b		&0x3,%d1
   11593 	or.b		STAG(%a6),%d1		# combine src tags
   11594 	bne.w		fmul_not_norm		# optimize on non-norm input
   11595 
   11596 fmul_norm:
   11597 	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
   11598 	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
   11599 	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
   11600 
   11601 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
   11602 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
   11603 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
   11604 
   11605 	bsr.l		scale_to_zero_src	# scale src exponent
   11606 	mov.l		%d0,-(%sp)		# save scale factor 1
   11607 
   11608 	bsr.l		scale_to_zero_dst	# scale dst exponent
   11609 
   11610 	add.l		%d0,(%sp)		# SCALE_FACTOR = scale1 + scale2
   11611 
   11612 	mov.w		2+L_SCR3(%a6),%d1	# fetch precision
   11613 	lsr.b		&0x6,%d1		# shift to lo bits
   11614 	mov.l		(%sp)+,%d0		# load S.F.
   11615 	cmp.l		%d0,(tbl_fmul_ovfl.w,%pc,%d1.w*4) # would result ovfl?
   11616 	beq.w		fmul_may_ovfl		# result may rnd to overflow
   11617 	blt.w		fmul_ovfl		# result will overflow
   11618 
   11619 	cmp.l		%d0,(tbl_fmul_unfl.w,%pc,%d1.w*4) # would result unfl?
   11620 	beq.w		fmul_may_unfl		# result may rnd to no unfl
   11621 	bgt.w		fmul_unfl		# result will underflow
   11622 
   11623 #
   11624 # NORMAL:
   11625 # - the result of the multiply operation will neither overflow nor underflow.
   11626 # - do the multiply to the proper precision and rounding mode.
   11627 # - scale the result exponent using the scale factor. if both operands were
   11628 # normalized then we really don't need to go through this scaling. but for now,
   11629 # this will do.
   11630 #
   11631 fmul_normal:
   11632 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
   11633 
   11634 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   11635 	fmov.l		&0x0,%fpsr		# clear FPSR
   11636 
   11637 	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
   11638 
   11639 	fmov.l		%fpsr,%d1		# save status
   11640 	fmov.l		&0x0,%fpcr		# clear FPCR
   11641 
   11642 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
   11643 
   11644 fmul_normal_exit:
   11645 	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
   11646 	mov.l		%d2,-(%sp)		# save d2
   11647 	mov.w		FP_SCR0_EX(%a6),%d1	# load {sgn,exp}
   11648 	mov.l		%d1,%d2			# make a copy
   11649 	andi.l		&0x7fff,%d1		# strip sign
   11650 	andi.w		&0x8000,%d2		# keep old sign
   11651 	sub.l		%d0,%d1			# add scale factor
   11652 	or.w		%d2,%d1			# concat old sign,new exp
   11653 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
   11654 	mov.l		(%sp)+,%d2		# restore d2
   11655 	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
   11656 	rts
   11657 
   11658 #
   11659 # OVERFLOW:
   11660 # - the result of the multiply operation is an overflow.
   11661 # - do the multiply to the proper precision and rounding mode in order to
   11662 # set the inexact bits.
   11663 # - calculate the default result and return it in fp0.
   11664 # - if overflow or inexact is enabled, we need a multiply result rounded to
   11665 # extended precision. if the original operation was extended, then we have this
   11666 # result. if the original operation was single or double, we have to do another
   11667 # multiply using extended precision and the correct rounding mode. the result
   11668 # of this operation then has its exponent scaled by -0x6000 to create the
   11669 # exceptional operand.
   11670 #
   11671 fmul_ovfl:
   11672 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
   11673 
   11674 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   11675 	fmov.l		&0x0,%fpsr		# clear FPSR
   11676 
   11677 	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
   11678 
   11679 	fmov.l		%fpsr,%d1		# save status
   11680 	fmov.l		&0x0,%fpcr		# clear FPCR
   11681 
   11682 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
   11683 
   11684 # save setting this until now because this is where fmul_may_ovfl may jump in
   11685 fmul_ovfl_tst:
   11686 	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
   11687 
   11688 	mov.b		FPCR_ENABLE(%a6),%d1
   11689 	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
   11690 	bne.b		fmul_ovfl_ena		# yes
   11691 
   11692 # calculate the default result
   11693 fmul_ovfl_dis:
   11694 	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
   11695 	sne		%d1			# set sign param accordingly
   11696 	mov.l		L_SCR3(%a6),%d0		# pass rnd prec,mode
   11697 	bsr.l		ovf_res			# calculate default result
   11698 	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
   11699 	fmovm.x		(%a0),&0x80		# return default result in fp0
   11700 	rts
   11701 
   11702 #
   11703 # OVFL is enabled; Create EXOP:
   11704 # - if precision is extended, then we have the EXOP. simply bias the exponent
   11705 # with an extra -0x6000. if the precision is single or double, we need to
   11706 # calculate a result rounded to extended precision.
   11707 #
   11708 fmul_ovfl_ena:
   11709 	mov.l		L_SCR3(%a6),%d1
   11710 	andi.b		&0xc0,%d1		# test the rnd prec
   11711 	bne.b		fmul_ovfl_ena_sd	# it's sgl or dbl
   11712 
   11713 fmul_ovfl_ena_cont:
   11714 	fmovm.x		&0x80,FP_SCR0(%a6)	# move result to stack
   11715 
   11716 	mov.l		%d2,-(%sp)		# save d2
   11717 	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
   11718 	mov.w		%d1,%d2			# make a copy
   11719 	andi.l		&0x7fff,%d1		# strip sign
   11720 	sub.l		%d0,%d1			# add scale factor
   11721 	subi.l		&0x6000,%d1		# subtract bias
   11722 	andi.w		&0x7fff,%d1		# clear sign bit
   11723 	andi.w		&0x8000,%d2		# keep old sign
   11724 	or.w		%d2,%d1			# concat old sign,new exp
   11725 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
   11726 	mov.l		(%sp)+,%d2		# restore d2
   11727 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
   11728 	bra.b		fmul_ovfl_dis
   11729 
   11730 fmul_ovfl_ena_sd:
   11731 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
   11732 
   11733 	mov.l		L_SCR3(%a6),%d1
   11734 	andi.b		&0x30,%d1		# keep rnd mode only
   11735 	fmov.l		%d1,%fpcr		# set FPCR
   11736 
   11737 	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
   11738 
   11739 	fmov.l		&0x0,%fpcr		# clear FPCR
   11740 	bra.b		fmul_ovfl_ena_cont
   11741 
   11742 #
   11743 # may OVERFLOW:
   11744 # - the result of the multiply operation MAY overflow.
   11745 # - do the multiply to the proper precision and rounding mode in order to
   11746 # set the inexact bits.
   11747 # - calculate the default result and return it in fp0.
   11748 #
   11749 fmul_may_ovfl:
   11750 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
   11751 
   11752 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   11753 	fmov.l		&0x0,%fpsr		# clear FPSR
   11754 
   11755 	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
   11756 
   11757 	fmov.l		%fpsr,%d1		# save status
   11758 	fmov.l		&0x0,%fpcr		# clear FPCR
   11759 
   11760 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
   11761 
   11762 	fabs.x		%fp0,%fp1		# make a copy of result
   11763 	fcmp.b		%fp1,&0x2		# is |result| >= 2.b?
   11764 	fbge.w		fmul_ovfl_tst		# yes; overflow has occurred
   11765 
   11766 # no, it didn't overflow; we have correct result
   11767 	bra.w		fmul_normal_exit
   11768 
   11769 #
   11770 # UNDERFLOW:
   11771 # - the result of the multiply operation is an underflow.
   11772 # - do the multiply to the proper precision and rounding mode in order to
   11773 # set the inexact bits.
   11774 # - calculate the default result and return it in fp0.
   11775 # - if overflow or inexact is enabled, we need a multiply result rounded to
   11776 # extended precision. if the original operation was extended, then we have this
   11777 # result. if the original operation was single or double, we have to do another
   11778 # multiply using extended precision and the correct rounding mode. the result
   11779 # of this operation then has its exponent scaled by -0x6000 to create the
   11780 # exceptional operand.
   11781 #
   11782 fmul_unfl:
   11783 	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
   11784 
   11785 # for fun, let's use only extended precision, round to zero. then, let
   11786 # the unf_res() routine figure out all the rest.
   11787 # will we get the correct answer.
   11788 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
   11789 
   11790 	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
   11791 	fmov.l		&0x0,%fpsr		# clear FPSR
   11792 
   11793 	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
   11794 
   11795 	fmov.l		%fpsr,%d1		# save status
   11796 	fmov.l		&0x0,%fpcr		# clear FPCR
   11797 
   11798 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
   11799 
   11800 	mov.b		FPCR_ENABLE(%a6),%d1
   11801 	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
   11802 	bne.b		fmul_unfl_ena		# yes
   11803 
   11804 fmul_unfl_dis:
   11805 	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
   11806 
   11807 	lea		FP_SCR0(%a6),%a0	# pass: result addr
   11808 	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
   11809 	bsr.l		unf_res			# calculate default result
   11810 	or.b		%d0,FPSR_CC(%a6)	# unf_res2 may have set 'Z'
   11811 	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
   11812 	rts
   11813 
   11814 #
   11815 # UNFL is enabled.
   11816 #
   11817 fmul_unfl_ena:
   11818 	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op
   11819 
   11820 	mov.l		L_SCR3(%a6),%d1
   11821 	andi.b		&0xc0,%d1		# is precision extended?
   11822 	bne.b		fmul_unfl_ena_sd	# no, sgl or dbl
   11823 
   11824 # if the rnd mode is anything but RZ, then we have to re-do the above
   11825 # multiplication becuase we used RZ for all.
   11826 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   11827 
   11828 fmul_unfl_ena_cont:
   11829 	fmov.l		&0x0,%fpsr		# clear FPSR
   11830 
   11831 	fmul.x		FP_SCR0(%a6),%fp1	# execute multiply
   11832 
   11833 	fmov.l		&0x0,%fpcr		# clear FPCR
   11834 
   11835 	fmovm.x		&0x40,FP_SCR0(%a6)	# save result to stack
   11836 	mov.l		%d2,-(%sp)		# save d2
   11837 	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
   11838 	mov.l		%d1,%d2			# make a copy
   11839 	andi.l		&0x7fff,%d1		# strip sign
   11840 	andi.w		&0x8000,%d2		# keep old sign
   11841 	sub.l		%d0,%d1			# add scale factor
   11842 	addi.l		&0x6000,%d1		# add bias
   11843 	andi.w		&0x7fff,%d1
   11844 	or.w		%d2,%d1			# concat old sign,new exp
   11845 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
   11846 	mov.l		(%sp)+,%d2		# restore d2
   11847 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
   11848 	bra.w		fmul_unfl_dis
   11849 
   11850 fmul_unfl_ena_sd:
   11851 	mov.l		L_SCR3(%a6),%d1
   11852 	andi.b		&0x30,%d1		# use only rnd mode
   11853 	fmov.l		%d1,%fpcr		# set FPCR
   11854 
   11855 	bra.b		fmul_unfl_ena_cont
   11856 
   11857 # MAY UNDERFLOW:
   11858 # -use the correct rounding mode and precision. this code favors operations
   11859 # that do not underflow.
   11860 fmul_may_unfl:
   11861 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
   11862 
   11863 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   11864 	fmov.l		&0x0,%fpsr		# clear FPSR
   11865 
   11866 	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
   11867 
   11868 	fmov.l		%fpsr,%d1		# save status
   11869 	fmov.l		&0x0,%fpcr		# clear FPCR
   11870 
   11871 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
   11872 
   11873 	fabs.x		%fp0,%fp1		# make a copy of result
   11874 	fcmp.b		%fp1,&0x2		# is |result| > 2.b?
   11875 	fbgt.w		fmul_normal_exit	# no; no underflow occurred
   11876 	fblt.w		fmul_unfl		# yes; underflow occurred
   11877 
   11878 #
   11879 # we still don't know if underflow occurred. result is ~ equal to 2. but,
   11880 # we don't know if the result was an underflow that rounded up to a 2 or
   11881 # a normalized number that rounded down to a 2. so, redo the entire operation
   11882 # using RZ as the rounding mode to see what the pre-rounded result is.
   11883 # this case should be relatively rare.
   11884 #
   11885 	fmovm.x		FP_SCR1(%a6),&0x40	# load dst operand
   11886 
   11887 	mov.l		L_SCR3(%a6),%d1
   11888 	andi.b		&0xc0,%d1		# keep rnd prec
   11889 	ori.b		&rz_mode*0x10,%d1	# insert RZ
   11890 
   11891 	fmov.l		%d1,%fpcr		# set FPCR
   11892 	fmov.l		&0x0,%fpsr		# clear FPSR
   11893 
   11894 	fmul.x		FP_SCR0(%a6),%fp1	# execute multiply
   11895 
   11896 	fmov.l		&0x0,%fpcr		# clear FPCR
   11897 	fabs.x		%fp1			# make absolute value
   11898 	fcmp.b		%fp1,&0x2		# is |result| < 2.b?
   11899 	fbge.w		fmul_normal_exit	# no; no underflow occurred
   11900 	bra.w		fmul_unfl		# yes, underflow occurred
   11901 
   11902 ################################################################################
   11903 
   11904 #
   11905 # Multiply: inputs are not both normalized; what are they?
   11906 #
   11907 fmul_not_norm:
   11908 	mov.w		(tbl_fmul_op.b,%pc,%d1.w*2),%d1
   11909 	jmp		(tbl_fmul_op.b,%pc,%d1.w)
   11910 
   11911 	swbeg		&48
   11912 tbl_fmul_op:
   11913 	short		fmul_norm	- tbl_fmul_op # NORM x NORM
   11914 	short		fmul_zero	- tbl_fmul_op # NORM x ZERO
   11915 	short		fmul_inf_src	- tbl_fmul_op # NORM x INF
   11916 	short		fmul_res_qnan	- tbl_fmul_op # NORM x QNAN
   11917 	short		fmul_norm	- tbl_fmul_op # NORM x DENORM
   11918 	short		fmul_res_snan	- tbl_fmul_op # NORM x SNAN
   11919 	short		tbl_fmul_op	- tbl_fmul_op #
   11920 	short		tbl_fmul_op	- tbl_fmul_op #
   11921 
   11922 	short		fmul_zero	- tbl_fmul_op # ZERO x NORM
   11923 	short		fmul_zero	- tbl_fmul_op # ZERO x ZERO
   11924 	short		fmul_res_operr	- tbl_fmul_op # ZERO x INF
   11925 	short		fmul_res_qnan	- tbl_fmul_op # ZERO x QNAN
   11926 	short		fmul_zero	- tbl_fmul_op # ZERO x DENORM
   11927 	short		fmul_res_snan	- tbl_fmul_op # ZERO x SNAN
   11928 	short		tbl_fmul_op	- tbl_fmul_op #
   11929 	short		tbl_fmul_op	- tbl_fmul_op #
   11930 
   11931 	short		fmul_inf_dst	- tbl_fmul_op # INF x NORM
   11932 	short		fmul_res_operr	- tbl_fmul_op # INF x ZERO
   11933 	short		fmul_inf_dst	- tbl_fmul_op # INF x INF
   11934 	short		fmul_res_qnan	- tbl_fmul_op # INF x QNAN
   11935 	short		fmul_inf_dst	- tbl_fmul_op # INF x DENORM
   11936 	short		fmul_res_snan	- tbl_fmul_op # INF x SNAN
   11937 	short		tbl_fmul_op	- tbl_fmul_op #
   11938 	short		tbl_fmul_op	- tbl_fmul_op #
   11939 
   11940 	short		fmul_res_qnan	- tbl_fmul_op # QNAN x NORM
   11941 	short		fmul_res_qnan	- tbl_fmul_op # QNAN x ZERO
   11942 	short		fmul_res_qnan	- tbl_fmul_op # QNAN x INF
   11943 	short		fmul_res_qnan	- tbl_fmul_op # QNAN x QNAN
   11944 	short		fmul_res_qnan	- tbl_fmul_op # QNAN x DENORM
   11945 	short		fmul_res_snan	- tbl_fmul_op # QNAN x SNAN
   11946 	short		tbl_fmul_op	- tbl_fmul_op #
   11947 	short		tbl_fmul_op	- tbl_fmul_op #
   11948 
   11949 	short		fmul_norm	- tbl_fmul_op # NORM x NORM
   11950 	short		fmul_zero	- tbl_fmul_op # NORM x ZERO
   11951 	short		fmul_inf_src	- tbl_fmul_op # NORM x INF
   11952 	short		fmul_res_qnan	- tbl_fmul_op # NORM x QNAN
   11953 	short		fmul_norm	- tbl_fmul_op # NORM x DENORM
   11954 	short		fmul_res_snan	- tbl_fmul_op # NORM x SNAN
   11955 	short		tbl_fmul_op	- tbl_fmul_op #
   11956 	short		tbl_fmul_op	- tbl_fmul_op #
   11957 
   11958 	short		fmul_res_snan	- tbl_fmul_op # SNAN x NORM
   11959 	short		fmul_res_snan	- tbl_fmul_op # SNAN x ZERO
   11960 	short		fmul_res_snan	- tbl_fmul_op # SNAN x INF
   11961 	short		fmul_res_snan	- tbl_fmul_op # SNAN x QNAN
   11962 	short		fmul_res_snan	- tbl_fmul_op # SNAN x DENORM
   11963 	short		fmul_res_snan	- tbl_fmul_op # SNAN x SNAN
   11964 	short		tbl_fmul_op	- tbl_fmul_op #
   11965 	short		tbl_fmul_op	- tbl_fmul_op #
   11966 
   11967 fmul_res_operr:
   11968 	bra.l		res_operr
   11969 fmul_res_snan:
   11970 	bra.l		res_snan
   11971 fmul_res_qnan:
   11972 	bra.l		res_qnan
   11973 
   11974 #
   11975 # Multiply: (Zero x Zero) || (Zero x norm) || (Zero x denorm)
   11976 #
   11977 	global		fmul_zero		# global for fsglmul
   11978 fmul_zero:
   11979 	mov.b		SRC_EX(%a0),%d0		# exclusive or the signs
   11980 	mov.b		DST_EX(%a1),%d1
   11981 	eor.b		%d0,%d1
   11982 	bpl.b		fmul_zero_p		# result ZERO is pos.
   11983 fmul_zero_n:
   11984 	fmov.s		&0x80000000,%fp0	# load -ZERO
   11985 	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/N
   11986 	rts
   11987 fmul_zero_p:
   11988 	fmov.s		&0x00000000,%fp0	# load +ZERO
   11989 	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
   11990 	rts
   11991 
   11992 #
   11993 # Multiply: (inf x inf) || (inf x norm) || (inf x denorm)
   11994 #
   11995 # Note: The j-bit for an infinity is a don't-care. However, to be
   11996 # strictly compatible w/ the 68881/882, we make sure to return an
   11997 # INF w/ the j-bit set if the input INF j-bit was set. Destination
   11998 # INFs take priority.
   11999 #
   12000 	global		fmul_inf_dst		# global for fsglmul
   12001 fmul_inf_dst:
   12002 	fmovm.x		DST(%a1),&0x80		# return INF result in fp0
   12003 	mov.b		SRC_EX(%a0),%d0		# exclusive or the signs
   12004 	mov.b		DST_EX(%a1),%d1
   12005 	eor.b		%d0,%d1
   12006 	bpl.b		fmul_inf_dst_p		# result INF is pos.
   12007 fmul_inf_dst_n:
   12008 	fabs.x		%fp0			# clear result sign
   12009 	fneg.x		%fp0			# set result sign
   12010 	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/N
   12011 	rts
   12012 fmul_inf_dst_p:
   12013 	fabs.x		%fp0			# clear result sign
   12014 	mov.b		&inf_bmask,FPSR_CC(%a6)	# set INF
   12015 	rts
   12016 
   12017 	global		fmul_inf_src		# global for fsglmul
   12018 fmul_inf_src:
   12019 	fmovm.x		SRC(%a0),&0x80		# return INF result in fp0
   12020 	mov.b		SRC_EX(%a0),%d0		# exclusive or the signs
   12021 	mov.b		DST_EX(%a1),%d1
   12022 	eor.b		%d0,%d1
   12023 	bpl.b		fmul_inf_dst_p		# result INF is pos.
   12024 	bra.b		fmul_inf_dst_n
   12025 
   12026 #########################################################################
   12027 # XDEF ****************************************************************	#
   12028 #	fin(): emulates the fmove instruction				#
   12029 #	fsin(): emulates the fsmove instruction				#
   12030 #	fdin(): emulates the fdmove instruction				#
   12031 #									#
   12032 # XREF ****************************************************************	#
   12033 #	norm() - normalize mantissa for EXOP on denorm			#
   12034 #	scale_to_zero_src() - scale src exponent to zero		#
   12035 #	ovf_res() - return default overflow result			#
   12036 # 	unf_res() - return default underflow result			#
   12037 #	res_qnan_1op() - return QNAN result				#
   12038 #	res_snan_1op() - return SNAN result				#
   12039 #									#
   12040 # INPUT ***************************************************************	#
   12041 #	a0 = pointer to extended precision source operand		#
   12042 #	d0 = round prec/mode						#
   12043 # 									#
   12044 # OUTPUT **************************************************************	#
   12045 #	fp0 = result							#
   12046 #	fp1 = EXOP (if exception occurred)				#
   12047 #									#
   12048 # ALGORITHM ***********************************************************	#
   12049 # 	Handle NANs, infinities, and zeroes as special cases. Divide	#
   12050 # norms into extended, single, and double precision.			#
   12051 # 	Norms can be emulated w/ a regular fmove instruction. For	#
   12052 # sgl/dbl, must scale exponent and perform an "fmove". Check to see	#
   12053 # if the result would have overflowed/underflowed. If so, use unf_res()	#
   12054 # or ovf_res() to return the default result. Also return EXOP if	#
   12055 # exception is enabled. If no exception, return the default result.	#
   12056 #	Unnorms don't pass through here.				#
   12057 #									#
   12058 #########################################################################
   12059 
   12060 	global		fsin
   12061 fsin:
   12062 	andi.b		&0x30,%d0		# clear rnd prec
   12063 	ori.b		&s_mode*0x10,%d0	# insert sgl precision
   12064 	bra.b		fin
   12065 
   12066 	global		fdin
   12067 fdin:
   12068 	andi.b		&0x30,%d0		# clear rnd prec
   12069 	ori.b		&d_mode*0x10,%d0	# insert dbl precision
   12070 
   12071 	global		fin
   12072 fin:
   12073 	mov.l		%d0,L_SCR3(%a6)		# store rnd info
   12074 
   12075 	mov.b		STAG(%a6),%d1		# fetch src optype tag
   12076 	bne.w		fin_not_norm		# optimize on non-norm input
   12077 
   12078 #
   12079 # FP MOVE IN: NORMs and DENORMs ONLY!
   12080 #
   12081 fin_norm:
   12082 	andi.b		&0xc0,%d0		# is precision extended?
   12083 	bne.w		fin_not_ext		# no, so go handle dbl or sgl
   12084 
   12085 #
   12086 # precision selected is extended. so...we cannot get an underflow
   12087 # or overflow because of rounding to the correct precision. so...
   12088 # skip the scaling and unscaling...
   12089 #
   12090 	tst.b		SRC_EX(%a0)		# is the operand negative?
   12091 	bpl.b		fin_norm_done		# no
   12092 	bset		&neg_bit,FPSR_CC(%a6)	# yes, so set 'N' ccode bit
   12093 fin_norm_done:
   12094 	fmovm.x		SRC(%a0),&0x80		# return result in fp0
   12095 	rts
   12096 
   12097 #
   12098 # for an extended precision DENORM, the UNFL exception bit is set
   12099 # the accrued bit is NOT set in this instance(no inexactness!)
   12100 #
   12101 fin_denorm:
   12102 	andi.b		&0xc0,%d0		# is precision extended?
   12103 	bne.w		fin_not_ext		# no, so go handle dbl or sgl
   12104 
   12105 	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
   12106 	tst.b		SRC_EX(%a0)		# is the operand negative?
   12107 	bpl.b		fin_denorm_done		# no
   12108 	bset		&neg_bit,FPSR_CC(%a6)	# yes, so set 'N' ccode bit
   12109 fin_denorm_done:
   12110 	fmovm.x		SRC(%a0),&0x80		# return result in fp0
   12111 	btst		&unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
   12112 	bne.b		fin_denorm_unfl_ena	# yes
   12113 	rts
   12114 
   12115 #
   12116 # the input is an extended DENORM and underflow is enabled in the FPCR.
   12117 # normalize the mantissa and add the bias of 0x6000 to the resulting negative
   12118 # exponent and insert back into the operand.
   12119 #
   12120 fin_denorm_unfl_ena:
   12121 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
   12122 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
   12123 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
   12124 	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
   12125 	bsr.l		norm			# normalize result
   12126 	neg.w		%d0			# new exponent = -(shft val)
   12127 	addi.w		&0x6000,%d0		# add new bias to exponent
   12128 	mov.w		FP_SCR0_EX(%a6),%d1	# fetch old sign,exp
   12129 	andi.w		&0x8000,%d1		# keep old sign
   12130 	andi.w		&0x7fff,%d0		# clear sign position
   12131 	or.w		%d1,%d0			# concat new exo,old sign
   12132 	mov.w		%d0,FP_SCR0_EX(%a6)	# insert new exponent
   12133 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
   12134 	rts
   12135 
   12136 #
   12137 # operand is to be rounded to single or double precision
   12138 #
   12139 fin_not_ext:
   12140 	cmpi.b		%d0,&s_mode*0x10 	# separate sgl/dbl prec
   12141 	bne.b		fin_dbl
   12142 
   12143 #
   12144 # operand is to be rounded to single precision
   12145 #
   12146 fin_sgl:
   12147 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
   12148 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
   12149 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
   12150 	bsr.l		scale_to_zero_src	# calculate scale factor
   12151 
   12152 	cmpi.l		%d0,&0x3fff-0x3f80	# will move in underflow?
   12153 	bge.w		fin_sd_unfl		# yes; go handle underflow
   12154 	cmpi.l		%d0,&0x3fff-0x407e	# will move in overflow?
   12155 	beq.w		fin_sd_may_ovfl		# maybe; go check
   12156 	blt.w		fin_sd_ovfl		# yes; go handle overflow
   12157 
   12158 #
   12159 # operand will NOT overflow or underflow when moved into the fp reg file
   12160 #
   12161 fin_sd_normal:
   12162 	fmov.l		&0x0,%fpsr		# clear FPSR
   12163 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   12164 
   12165 	fmov.x		FP_SCR0(%a6),%fp0	# perform move
   12166 
   12167 	fmov.l		%fpsr,%d1		# save FPSR
   12168 	fmov.l		&0x0,%fpcr		# clear FPCR
   12169 
   12170 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
   12171 
   12172 fin_sd_normal_exit:
   12173 	mov.l		%d2,-(%sp)		# save d2
   12174 	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
   12175 	mov.w		FP_SCR0_EX(%a6),%d1	# load {sgn,exp}
   12176 	mov.w		%d1,%d2			# make a copy
   12177 	andi.l		&0x7fff,%d1		# strip sign
   12178 	sub.l		%d0,%d1			# add scale factor
   12179 	andi.w		&0x8000,%d2		# keep old sign
   12180 	or.w		%d1,%d2			# concat old sign,new exponent
   12181 	mov.w		%d2,FP_SCR0_EX(%a6)	# insert new exponent
   12182 	mov.l		(%sp)+,%d2		# restore d2
   12183 	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
   12184 	rts
   12185 
   12186 #
   12187 # operand is to be rounded to double precision
   12188 #
   12189 fin_dbl:
   12190 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
   12191 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
   12192 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
   12193 	bsr.l		scale_to_zero_src	# calculate scale factor
   12194 
   12195 	cmpi.l		%d0,&0x3fff-0x3c00	# will move in underflow?
   12196 	bge.w		fin_sd_unfl		# yes; go handle underflow
   12197 	cmpi.l		%d0,&0x3fff-0x43fe	# will move in overflow?
   12198 	beq.w		fin_sd_may_ovfl		# maybe; go check
   12199 	blt.w		fin_sd_ovfl		# yes; go handle overflow
   12200 	bra.w		fin_sd_normal		# no; ho handle normalized op
   12201 
   12202 #
   12203 # operand WILL underflow when moved in to the fp register file
   12204 #
   12205 fin_sd_unfl:
   12206 	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
   12207 
   12208 	tst.b		FP_SCR0_EX(%a6)		# is operand negative?
   12209 	bpl.b		fin_sd_unfl_tst
   12210 	bset		&neg_bit,FPSR_CC(%a6)	# set 'N' ccode bit
   12211 
   12212 # if underflow or inexact is enabled, then go calculate the EXOP first.
   12213 fin_sd_unfl_tst:
   12214 	mov.b		FPCR_ENABLE(%a6),%d1
   12215 	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
   12216 	bne.b		fin_sd_unfl_ena		# yes
   12217 
   12218 fin_sd_unfl_dis:
   12219 	lea		FP_SCR0(%a6),%a0	# pass: result addr
   12220 	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
   12221 	bsr.l		unf_res			# calculate default result
   12222 	or.b		%d0,FPSR_CC(%a6)	# unf_res may have set 'Z'
   12223 	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
   12224 	rts
   12225 
   12226 #
   12227 # operand will underflow AND underflow or inexact is enabled.
   12228 # therefore, we must return the result rounded to extended precision.
   12229 #
   12230 fin_sd_unfl_ena:
   12231 	mov.l		FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
   12232 	mov.l		FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
   12233 	mov.w		FP_SCR0_EX(%a6),%d1	# load current exponent
   12234 
   12235 	mov.l		%d2,-(%sp)		# save d2
   12236 	mov.w		%d1,%d2			# make a copy
   12237 	andi.l		&0x7fff,%d1		# strip sign
   12238 	sub.l		%d0,%d1			# subtract scale factor
   12239 	andi.w		&0x8000,%d2		# extract old sign
   12240 	addi.l		&0x6000,%d1		# add new bias
   12241 	andi.w		&0x7fff,%d1
   12242 	or.w		%d1,%d2			# concat old sign,new exp
   12243 	mov.w		%d2,FP_SCR1_EX(%a6)	# insert new exponent
   12244 	fmovm.x		FP_SCR1(%a6),&0x40	# return EXOP in fp1
   12245 	mov.l		(%sp)+,%d2		# restore d2
   12246 	bra.b		fin_sd_unfl_dis
   12247 
   12248 #
   12249 # operand WILL overflow.
   12250 #
   12251 fin_sd_ovfl:
   12252 	fmov.l		&0x0,%fpsr		# clear FPSR
   12253 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   12254 
   12255 	fmov.x		FP_SCR0(%a6),%fp0	# perform move
   12256 
   12257 	fmov.l		&0x0,%fpcr		# clear FPCR
   12258 	fmov.l		%fpsr,%d1		# save FPSR
   12259 
   12260 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
   12261 
   12262 fin_sd_ovfl_tst:
   12263 	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
   12264 
   12265 	mov.b		FPCR_ENABLE(%a6),%d1
   12266 	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
   12267 	bne.b		fin_sd_ovfl_ena		# yes
   12268 
   12269 #
   12270 # OVFL is not enabled; therefore, we must create the default result by
   12271 # calling ovf_res().
   12272 #
   12273 fin_sd_ovfl_dis:
   12274 	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
   12275 	sne		%d1			# set sign param accordingly
   12276 	mov.l		L_SCR3(%a6),%d0		# pass: prec,mode
   12277 	bsr.l		ovf_res			# calculate default result
   12278 	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
   12279 	fmovm.x		(%a0),&0x80		# return default result in fp0
   12280 	rts
   12281 
   12282 #
   12283 # OVFL is enabled.
   12284 # the INEX2 bit has already been updated by the round to the correct precision.
   12285 # now, round to extended(and don't alter the FPSR).
   12286 #
   12287 fin_sd_ovfl_ena:
   12288 	mov.l		%d2,-(%sp)		# save d2
   12289 	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
   12290 	mov.l		%d1,%d2			# make a copy
   12291 	andi.l		&0x7fff,%d1		# strip sign
   12292 	andi.w		&0x8000,%d2		# keep old sign
   12293 	sub.l		%d0,%d1			# add scale factor
   12294 	sub.l		&0x6000,%d1		# subtract bias
   12295 	andi.w		&0x7fff,%d1
   12296 	or.w		%d2,%d1
   12297 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
   12298 	mov.l		(%sp)+,%d2		# restore d2
   12299 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
   12300 	bra.b		fin_sd_ovfl_dis
   12301 
   12302 #
   12303 # the move in MAY overflow. so...
   12304 #
   12305 fin_sd_may_ovfl:
   12306 	fmov.l		&0x0,%fpsr		# clear FPSR
   12307 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   12308 
   12309 	fmov.x		FP_SCR0(%a6),%fp0	# perform the move
   12310 
   12311 	fmov.l		%fpsr,%d1		# save status
   12312 	fmov.l		&0x0,%fpcr		# clear FPCR
   12313 
   12314 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
   12315 
   12316 	fabs.x		%fp0,%fp1		# make a copy of result
   12317 	fcmp.b		%fp1,&0x2		# is |result| >= 2.b?
   12318 	fbge.w		fin_sd_ovfl_tst		# yes; overflow has occurred
   12319 
   12320 # no, it didn't overflow; we have correct result
   12321 	bra.w		fin_sd_normal_exit
   12322 
   12323 ##########################################################################
   12324 
   12325 #
   12326 # operand is not a NORM: check its optype and branch accordingly
   12327 #
   12328 fin_not_norm:
   12329 	cmpi.b		%d1,&DENORM		# weed out DENORM
   12330 	beq.w		fin_denorm
   12331 	cmpi.b		%d1,&SNAN		# weed out SNANs
   12332 	beq.l		res_snan_1op
   12333 	cmpi.b		%d1,&QNAN		# weed out QNANs
   12334 	beq.l		res_qnan_1op
   12335 
   12336 #
   12337 # do the fmove in; at this point, only possible ops are ZERO and INF.
   12338 # use fmov to determine ccodes.
   12339 # prec:mode should be zero at this point but it won't affect answer anyways.
   12340 #
   12341 	fmov.x		SRC(%a0),%fp0		# do fmove in
   12342 	fmov.l		%fpsr,%d0		# no exceptions possible
   12343 	rol.l		&0x8,%d0		# put ccodes in lo byte
   12344 	mov.b		%d0,FPSR_CC(%a6)	# insert correct ccodes
   12345 	rts
   12346 
   12347 #########################################################################
   12348 # XDEF ****************************************************************	#
   12349 # 	fdiv(): emulates the fdiv instruction				#
   12350 #	fsdiv(): emulates the fsdiv instruction				#
   12351 #	fddiv(): emulates the fddiv instruction				#
   12352 #									#
   12353 # XREF ****************************************************************	#
   12354 #	scale_to_zero_src() - scale src exponent to zero		#
   12355 #	scale_to_zero_dst() - scale dst exponent to zero		#
   12356 #	unf_res() - return default underflow result			#
   12357 #	ovf_res() - return default overflow result			#
   12358 # 	res_qnan() - return QNAN result					#
   12359 # 	res_snan() - return SNAN result					#
   12360 #									#
   12361 # INPUT ***************************************************************	#
   12362 #	a0 = pointer to extended precision source operand		#
   12363 #	a1 = pointer to extended precision destination operand		#
   12364 #	d0  rnd prec,mode						#
   12365 #									#
   12366 # OUTPUT **************************************************************	#
   12367 #	fp0 = result							#
   12368 #	fp1 = EXOP (if exception occurred)				#
   12369 #									#
   12370 # ALGORITHM ***********************************************************	#
   12371 #	Handle NANs, infinities, and zeroes as special cases. Divide	#
   12372 # norms/denorms into ext/sgl/dbl precision.				#
   12373 #	For norms/denorms, scale the exponents such that a divide	#
   12374 # instruction won't cause an exception. Use the regular fdiv to		#
   12375 # compute a result. Check if the regular operands would have taken	#
   12376 # an exception. If so, return the default overflow/underflow result	#
   12377 # and return the EXOP if exceptions are enabled. Else, scale the 	#
   12378 # result operand to the proper exponent.				#
   12379 #									#
   12380 #########################################################################
   12381 
   12382 	align		0x10
   12383 tbl_fdiv_unfl:
   12384 	long		0x3fff - 0x0000		# ext_unfl
   12385 	long		0x3fff - 0x3f81		# sgl_unfl
   12386 	long		0x3fff - 0x3c01		# dbl_unfl
   12387 
   12388 tbl_fdiv_ovfl:
   12389 	long		0x3fff - 0x7ffe		# ext overflow exponent
   12390 	long		0x3fff - 0x407e		# sgl overflow exponent
   12391 	long		0x3fff - 0x43fe		# dbl overflow exponent
   12392 
   12393 	global		fsdiv
   12394 fsdiv:
   12395 	andi.b		&0x30,%d0		# clear rnd prec
   12396 	ori.b		&s_mode*0x10,%d0	# insert sgl prec
   12397 	bra.b		fdiv
   12398 
   12399 	global		fddiv
   12400 fddiv:
   12401 	andi.b		&0x30,%d0		# clear rnd prec
   12402 	ori.b		&d_mode*0x10,%d0	# insert dbl prec
   12403 
   12404 	global		fdiv
   12405 fdiv:
   12406 	mov.l		%d0,L_SCR3(%a6)		# store rnd info
   12407 
   12408 	clr.w		%d1
   12409 	mov.b		DTAG(%a6),%d1
   12410 	lsl.b		&0x3,%d1
   12411 	or.b		STAG(%a6),%d1		# combine src tags
   12412 
   12413 	bne.w		fdiv_not_norm		# optimize on non-norm input
   12414 
   12415 #
   12416 # DIVIDE: NORMs and DENORMs ONLY!
   12417 #
   12418 fdiv_norm:
   12419 	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
   12420 	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
   12421 	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
   12422 
   12423 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
   12424 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
   12425 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
   12426 
   12427 	bsr.l		scale_to_zero_src	# scale src exponent
   12428 	mov.l		%d0,-(%sp)		# save scale factor 1
   12429 
   12430 	bsr.l		scale_to_zero_dst	# scale dst exponent
   12431 
   12432 	neg.l		(%sp)			# SCALE FACTOR = scale1 - scale2
   12433 	add.l		%d0,(%sp)
   12434 
   12435 	mov.w		2+L_SCR3(%a6),%d1	# fetch precision
   12436 	lsr.b		&0x6,%d1		# shift to lo bits
   12437 	mov.l		(%sp)+,%d0		# load S.F.
   12438 	cmp.l		%d0,(tbl_fdiv_ovfl.b,%pc,%d1.w*4) # will result overflow?
   12439 	ble.w		fdiv_may_ovfl		# result will overflow
   12440 
   12441 	cmp.l		%d0,(tbl_fdiv_unfl.w,%pc,%d1.w*4) # will result underflow?
   12442 	beq.w		fdiv_may_unfl		# maybe
   12443 	bgt.w		fdiv_unfl		# yes; go handle underflow
   12444 
   12445 fdiv_normal:
   12446 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
   12447 
   12448 	fmov.l		L_SCR3(%a6),%fpcr	# save FPCR
   12449 	fmov.l		&0x0,%fpsr		# clear FPSR
   12450 
   12451 	fdiv.x		FP_SCR0(%a6),%fp0	# perform divide
   12452 
   12453 	fmov.l		%fpsr,%d1		# save FPSR
   12454 	fmov.l		&0x0,%fpcr		# clear FPCR
   12455 
   12456 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
   12457 
   12458 fdiv_normal_exit:
   12459 	fmovm.x		&0x80,FP_SCR0(%a6)	# store result on stack
   12460 	mov.l		%d2,-(%sp)		# store d2
   12461 	mov.w		FP_SCR0_EX(%a6),%d1	# load {sgn,exp}
   12462 	mov.l		%d1,%d2			# make a copy
   12463 	andi.l		&0x7fff,%d1		# strip sign
   12464 	andi.w		&0x8000,%d2		# keep old sign
   12465 	sub.l		%d0,%d1			# add scale factor
   12466 	or.w		%d2,%d1			# concat old sign,new exp
   12467 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
   12468 	mov.l		(%sp)+,%d2		# restore d2
   12469 	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
   12470 	rts
   12471 
   12472 tbl_fdiv_ovfl2:
   12473 	long		0x7fff
   12474 	long		0x407f
   12475 	long		0x43ff
   12476 
   12477 fdiv_no_ovfl:
   12478 	mov.l		(%sp)+,%d0		# restore scale factor
   12479 	bra.b		fdiv_normal_exit
   12480 
   12481 fdiv_may_ovfl:
   12482 	mov.l		%d0,-(%sp)		# save scale factor
   12483 
   12484 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
   12485 
   12486 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   12487 	fmov.l		&0x0,%fpsr		# set FPSR
   12488 
   12489 	fdiv.x		FP_SCR0(%a6),%fp0	# execute divide
   12490 
   12491 	fmov.l		%fpsr,%d0
   12492 	fmov.l		&0x0,%fpcr
   12493 
   12494 	or.l		%d0,USER_FPSR(%a6)	# save INEX,N
   12495 
   12496 	fmovm.x		&0x01,-(%sp)		# save result to stack
   12497 	mov.w		(%sp),%d0		# fetch new exponent
   12498 	add.l		&0xc,%sp		# clear result from stack
   12499 	andi.l		&0x7fff,%d0		# strip sign
   12500 	sub.l		(%sp),%d0		# add scale factor
   12501 	cmp.l		%d0,(tbl_fdiv_ovfl2.b,%pc,%d1.w*4)
   12502 	blt.b		fdiv_no_ovfl
   12503 	mov.l		(%sp)+,%d0
   12504 
   12505 fdiv_ovfl_tst:
   12506 	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
   12507 
   12508 	mov.b		FPCR_ENABLE(%a6),%d1
   12509 	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
   12510 	bne.b		fdiv_ovfl_ena		# yes
   12511 
   12512 fdiv_ovfl_dis:
   12513 	btst		&neg_bit,FPSR_CC(%a6) 	# is result negative?
   12514 	sne		%d1			# set sign param accordingly
   12515 	mov.l		L_SCR3(%a6),%d0		# pass prec:rnd
   12516 	bsr.l		ovf_res			# calculate default result
   12517 	or.b		%d0,FPSR_CC(%a6)	# set INF if applicable
   12518 	fmovm.x		(%a0),&0x80		# return default result in fp0
   12519 	rts
   12520 
   12521 fdiv_ovfl_ena:
   12522 	mov.l		L_SCR3(%a6),%d1
   12523 	andi.b		&0xc0,%d1		# is precision extended?
   12524 	bne.b		fdiv_ovfl_ena_sd	# no, do sgl or dbl
   12525 
   12526 fdiv_ovfl_ena_cont:
   12527 	fmovm.x		&0x80,FP_SCR0(%a6)	# move result to stack
   12528 
   12529 	mov.l		%d2,-(%sp)		# save d2
   12530 	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
   12531 	mov.w		%d1,%d2			# make a copy
   12532 	andi.l		&0x7fff,%d1		# strip sign
   12533 	sub.l		%d0,%d1			# add scale factor
   12534 	subi.l		&0x6000,%d1		# subtract bias
   12535 	andi.w		&0x7fff,%d1		# clear sign bit
   12536 	andi.w		&0x8000,%d2		# keep old sign
   12537 	or.w		%d2,%d1			# concat old sign,new exp
   12538 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
   12539 	mov.l		(%sp)+,%d2		# restore d2
   12540 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
   12541 	bra.b		fdiv_ovfl_dis
   12542 
   12543 fdiv_ovfl_ena_sd:
   12544 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
   12545 
   12546 	mov.l		L_SCR3(%a6),%d1
   12547 	andi.b		&0x30,%d1		# keep rnd mode
   12548 	fmov.l		%d1,%fpcr		# set FPCR
   12549 
   12550 	fdiv.x		FP_SCR0(%a6),%fp0	# execute divide
   12551 
   12552 	fmov.l		&0x0,%fpcr		# clear FPCR
   12553 	bra.b		fdiv_ovfl_ena_cont
   12554 
   12555 fdiv_unfl:
   12556 	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
   12557 
   12558 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
   12559 
   12560 	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
   12561 	fmov.l		&0x0,%fpsr		# clear FPSR
   12562 
   12563 	fdiv.x		FP_SCR0(%a6),%fp0	# execute divide
   12564 
   12565 	fmov.l		%fpsr,%d1		# save status
   12566 	fmov.l		&0x0,%fpcr		# clear FPCR
   12567 
   12568 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
   12569 
   12570 	mov.b		FPCR_ENABLE(%a6),%d1
   12571 	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
   12572 	bne.b		fdiv_unfl_ena		# yes
   12573 
   12574 fdiv_unfl_dis:
   12575 	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
   12576 
   12577 	lea		FP_SCR0(%a6),%a0	# pass: result addr
   12578 	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
   12579 	bsr.l		unf_res			# calculate default result
   12580 	or.b		%d0,FPSR_CC(%a6)	# 'Z' may have been set
   12581 	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
   12582 	rts
   12583 
   12584 #
   12585 # UNFL is enabled.
   12586 #
   12587 fdiv_unfl_ena:
   12588 	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op
   12589 
   12590 	mov.l		L_SCR3(%a6),%d1
   12591 	andi.b		&0xc0,%d1		# is precision extended?
   12592 	bne.b		fdiv_unfl_ena_sd	# no, sgl or dbl
   12593 
   12594 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   12595 
   12596 fdiv_unfl_ena_cont:
   12597 	fmov.l		&0x0,%fpsr		# clear FPSR
   12598 
   12599 	fdiv.x		FP_SCR0(%a6),%fp1	# execute divide
   12600 
   12601 	fmov.l		&0x0,%fpcr		# clear FPCR
   12602 
   12603 	fmovm.x		&0x40,FP_SCR0(%a6)	# save result to stack
   12604 	mov.l		%d2,-(%sp)		# save d2
   12605 	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
   12606 	mov.l		%d1,%d2			# make a copy
   12607 	andi.l		&0x7fff,%d1		# strip sign
   12608 	andi.w		&0x8000,%d2		# keep old sign
   12609 	sub.l		%d0,%d1			# add scale factoer
   12610 	addi.l		&0x6000,%d1		# add bias
   12611 	andi.w		&0x7fff,%d1
   12612 	or.w		%d2,%d1			# concat old sign,new exp
   12613 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exp
   12614 	mov.l		(%sp)+,%d2		# restore d2
   12615 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
   12616 	bra.w		fdiv_unfl_dis
   12617 
   12618 fdiv_unfl_ena_sd:
   12619 	mov.l		L_SCR3(%a6),%d1
   12620 	andi.b		&0x30,%d1		# use only rnd mode
   12621 	fmov.l		%d1,%fpcr		# set FPCR
   12622 
   12623 	bra.b		fdiv_unfl_ena_cont
   12624 
   12625 #
   12626 # the divide operation MAY underflow:
   12627 #
   12628 fdiv_may_unfl:
   12629 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
   12630 
   12631 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   12632 	fmov.l		&0x0,%fpsr		# clear FPSR
   12633 
   12634 	fdiv.x		FP_SCR0(%a6),%fp0	# execute divide
   12635 
   12636 	fmov.l		%fpsr,%d1		# save status
   12637 	fmov.l		&0x0,%fpcr		# clear FPCR
   12638 
   12639 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
   12640 
   12641 	fabs.x		%fp0,%fp1		# make a copy of result
   12642 	fcmp.b		%fp1,&0x1		# is |result| > 1.b?
   12643 	fbgt.w		fdiv_normal_exit	# no; no underflow occurred
   12644 	fblt.w		fdiv_unfl		# yes; underflow occurred
   12645 
   12646 #
   12647 # we still don't know if underflow occurred. result is ~ equal to 1. but,
   12648 # we don't know if the result was an underflow that rounded up to a 1
   12649 # or a normalized number that rounded down to a 1. so, redo the entire
   12650 # operation using RZ as the rounding mode to see what the pre-rounded
   12651 # result is. this case should be relatively rare.
   12652 #
   12653 	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op into fp1
   12654 
   12655 	mov.l		L_SCR3(%a6),%d1
   12656 	andi.b		&0xc0,%d1		# keep rnd prec
   12657 	ori.b		&rz_mode*0x10,%d1	# insert RZ
   12658 
   12659 	fmov.l		%d1,%fpcr		# set FPCR
   12660 	fmov.l		&0x0,%fpsr		# clear FPSR
   12661 
   12662 	fdiv.x		FP_SCR0(%a6),%fp1	# execute divide
   12663 
   12664 	fmov.l		&0x0,%fpcr		# clear FPCR
   12665 	fabs.x		%fp1			# make absolute value
   12666 	fcmp.b		%fp1,&0x1		# is |result| < 1.b?
   12667 	fbge.w		fdiv_normal_exit	# no; no underflow occurred
   12668 	bra.w		fdiv_unfl		# yes; underflow occurred
   12669 
   12670 ############################################################################
   12671 
   12672 #
   12673 # Divide: inputs are not both normalized; what are they?
   12674 #
   12675 fdiv_not_norm:
   12676 	mov.w		(tbl_fdiv_op.b,%pc,%d1.w*2),%d1
   12677 	jmp		(tbl_fdiv_op.b,%pc,%d1.w*1)
   12678 
   12679 	swbeg		&48
   12680 tbl_fdiv_op:
   12681 	short		fdiv_norm	- tbl_fdiv_op # NORM / NORM
   12682 	short		fdiv_inf_load	- tbl_fdiv_op # NORM / ZERO
   12683 	short		fdiv_zero_load	- tbl_fdiv_op # NORM / INF
   12684 	short		fdiv_res_qnan	- tbl_fdiv_op # NORM / QNAN
   12685 	short		fdiv_norm	- tbl_fdiv_op # NORM / DENORM
   12686 	short		fdiv_res_snan	- tbl_fdiv_op # NORM / SNAN
   12687 	short		tbl_fdiv_op	- tbl_fdiv_op #
   12688 	short		tbl_fdiv_op	- tbl_fdiv_op #
   12689 
   12690 	short		fdiv_zero_load	- tbl_fdiv_op # ZERO / NORM
   12691 	short		fdiv_res_operr	- tbl_fdiv_op # ZERO / ZERO
   12692 	short		fdiv_zero_load	- tbl_fdiv_op # ZERO / INF
   12693 	short		fdiv_res_qnan	- tbl_fdiv_op # ZERO / QNAN
   12694 	short		fdiv_zero_load	- tbl_fdiv_op # ZERO / DENORM
   12695 	short		fdiv_res_snan	- tbl_fdiv_op # ZERO / SNAN
   12696 	short		tbl_fdiv_op	- tbl_fdiv_op #
   12697 	short		tbl_fdiv_op	- tbl_fdiv_op #
   12698 
   12699 	short		fdiv_inf_dst	- tbl_fdiv_op # INF / NORM
   12700 	short		fdiv_inf_dst	- tbl_fdiv_op # INF / ZERO
   12701 	short		fdiv_res_operr	- tbl_fdiv_op # INF / INF
   12702 	short		fdiv_res_qnan	- tbl_fdiv_op # INF / QNAN
   12703 	short		fdiv_inf_dst	- tbl_fdiv_op # INF / DENORM
   12704 	short		fdiv_res_snan	- tbl_fdiv_op # INF / SNAN
   12705 	short		tbl_fdiv_op	- tbl_fdiv_op #
   12706 	short		tbl_fdiv_op	- tbl_fdiv_op #
   12707 
   12708 	short		fdiv_res_qnan	- tbl_fdiv_op # QNAN / NORM
   12709 	short		fdiv_res_qnan	- tbl_fdiv_op # QNAN / ZERO
   12710 	short		fdiv_res_qnan	- tbl_fdiv_op # QNAN / INF
   12711 	short		fdiv_res_qnan	- tbl_fdiv_op # QNAN / QNAN
   12712 	short		fdiv_res_qnan	- tbl_fdiv_op # QNAN / DENORM
   12713 	short		fdiv_res_snan	- tbl_fdiv_op # QNAN / SNAN
   12714 	short		tbl_fdiv_op	- tbl_fdiv_op #
   12715 	short		tbl_fdiv_op	- tbl_fdiv_op #
   12716 
   12717 	short		fdiv_norm	- tbl_fdiv_op # DENORM / NORM
   12718 	short		fdiv_inf_load	- tbl_fdiv_op # DENORM / ZERO
   12719 	short		fdiv_zero_load	- tbl_fdiv_op # DENORM / INF
   12720 	short		fdiv_res_qnan	- tbl_fdiv_op # DENORM / QNAN
   12721 	short		fdiv_norm	- tbl_fdiv_op # DENORM / DENORM
   12722 	short		fdiv_res_snan	- tbl_fdiv_op # DENORM / SNAN
   12723 	short		tbl_fdiv_op	- tbl_fdiv_op #
   12724 	short		tbl_fdiv_op	- tbl_fdiv_op #
   12725 
   12726 	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / NORM
   12727 	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / ZERO
   12728 	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / INF
   12729 	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / QNAN
   12730 	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / DENORM
   12731 	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / SNAN
   12732 	short		tbl_fdiv_op	- tbl_fdiv_op #
   12733 	short		tbl_fdiv_op	- tbl_fdiv_op #
   12734 
   12735 fdiv_res_qnan:
   12736 	bra.l		res_qnan
   12737 fdiv_res_snan:
   12738 	bra.l		res_snan
   12739 fdiv_res_operr:
   12740 	bra.l		res_operr
   12741 
   12742 	global		fdiv_zero_load		# global for fsgldiv
   12743 fdiv_zero_load:
   12744 	mov.b		SRC_EX(%a0),%d0		# result sign is exclusive
   12745 	mov.b		DST_EX(%a1),%d1		# or of input signs.
   12746 	eor.b		%d0,%d1
   12747 	bpl.b		fdiv_zero_load_p	# result is positive
   12748 	fmov.s		&0x80000000,%fp0	# load a -ZERO
   12749 	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6)	# set Z/N
   12750 	rts
   12751 fdiv_zero_load_p:
   12752 	fmov.s		&0x00000000,%fp0	# load a +ZERO
   12753 	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
   12754 	rts
   12755 
   12756 #
   12757 # The destination was In Range and the source was a ZERO. The result,
   12758 # therefore, is an INF w/ the proper sign.
   12759 # So, determine the sign and return a new INF (w/ the j-bit cleared).
   12760 #
   12761 	global		fdiv_inf_load		# global for fsgldiv
   12762 fdiv_inf_load:
   12763 	ori.w		&dz_mask+adz_mask,2+USER_FPSR(%a6) # no; set DZ/ADZ
   12764 	mov.b		SRC_EX(%a0),%d0		# load both signs
   12765 	mov.b		DST_EX(%a1),%d1
   12766 	eor.b		%d0,%d1
   12767 	bpl.b		fdiv_inf_load_p		# result is positive
   12768 	fmov.s		&0xff800000,%fp0	# make result -INF
   12769 	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/N
   12770 	rts
   12771 fdiv_inf_load_p:
   12772 	fmov.s		&0x7f800000,%fp0	# make result +INF
   12773 	mov.b		&inf_bmask,FPSR_CC(%a6)	# set INF
   12774 	rts
   12775 
   12776 #
   12777 # The destination was an INF w/ an In Range or ZERO source, the result is
   12778 # an INF w/ the proper sign.
   12779 # The 68881/882 returns the destination INF w/ the new sign(if the j-bit of the
   12780 # dst INF is set, then then j-bit of the result INF is also set).
   12781 #
   12782 	global		fdiv_inf_dst		# global for fsgldiv
   12783 fdiv_inf_dst:
   12784 	mov.b		DST_EX(%a1),%d0		# load both signs
   12785 	mov.b		SRC_EX(%a0),%d1
   12786 	eor.b		%d0,%d1
   12787 	bpl.b		fdiv_inf_dst_p		# result is positive
   12788 
   12789 	fmovm.x		DST(%a1),&0x80		# return result in fp0
   12790 	fabs.x		%fp0			# clear sign bit
   12791 	fneg.x		%fp0			# set sign bit
   12792 	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/NEG
   12793 	rts
   12794 
   12795 fdiv_inf_dst_p:
   12796 	fmovm.x		DST(%a1),&0x80		# return result in fp0
   12797 	fabs.x		%fp0			# return positive INF
   12798 	mov.b		&inf_bmask,FPSR_CC(%a6) # set INF
   12799 	rts
   12800 
   12801 #########################################################################
   12802 # XDEF ****************************************************************	#
   12803 #	fneg(): emulates the fneg instruction				#
   12804 #	fsneg(): emulates the fsneg instruction				#
   12805 #	fdneg(): emulates the fdneg instruction				#
   12806 #									#
   12807 # XREF ****************************************************************	#
   12808 # 	norm() - normalize a denorm to provide EXOP			#
   12809 #	scale_to_zero_src() - scale sgl/dbl source exponent		#
   12810 #	ovf_res() - return default overflow result			#
   12811 #	unf_res() - return default underflow result			#
   12812 # 	res_qnan_1op() - return QNAN result				#
   12813 #	res_snan_1op() - return SNAN result				#
   12814 #									#
   12815 # INPUT ***************************************************************	#
   12816 #	a0 = pointer to extended precision source operand		#
   12817 #	d0 = rnd prec,mode						#
   12818 #									#
   12819 # OUTPUT **************************************************************	#
   12820 #	fp0 = result							#
   12821 #	fp1 = EXOP (if exception occurred)				#
   12822 #									#
   12823 # ALGORITHM ***********************************************************	#
   12824 #	Handle NANs, zeroes, and infinities as special cases. Separate	#
   12825 # norms/denorms into ext/sgl/dbl precisions. Extended precision can be	#
   12826 # emulated by simply setting sign bit. Sgl/dbl operands must be scaled	#
   12827 # and an actual fneg performed to see if overflow/underflow would have	#
   12828 # occurred. If so, return default underflow/overflow result. Else,	#
   12829 # scale the result exponent and return result. FPSR gets set based on	#
   12830 # the result value.							#
   12831 #									#
   12832 #########################################################################
   12833 
   12834 	global		fsneg
   12835 fsneg:
   12836 	andi.b		&0x30,%d0		# clear rnd prec
   12837 	ori.b		&s_mode*0x10,%d0	# insert sgl precision
   12838 	bra.b		fneg
   12839 
   12840 	global		fdneg
   12841 fdneg:
   12842 	andi.b		&0x30,%d0		# clear rnd prec
   12843 	ori.b		&d_mode*0x10,%d0	# insert dbl prec
   12844 
   12845 	global		fneg
   12846 fneg:
   12847 	mov.l		%d0,L_SCR3(%a6)		# store rnd info
   12848 	mov.b		STAG(%a6),%d1
   12849 	bne.w		fneg_not_norm		# optimize on non-norm input
   12850 
   12851 #
   12852 # NEGATE SIGN : norms and denorms ONLY!
   12853 #
   12854 fneg_norm:
   12855 	andi.b		&0xc0,%d0		# is precision extended?
   12856 	bne.w		fneg_not_ext		# no; go handle sgl or dbl
   12857 
   12858 #
   12859 # precision selected is extended. so...we can not get an underflow
   12860 # or overflow because of rounding to the correct precision. so...
   12861 # skip the scaling and unscaling...
   12862 #
   12863 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
   12864 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
   12865 	mov.w		SRC_EX(%a0),%d0
   12866 	eori.w		&0x8000,%d0		# negate sign
   12867 	bpl.b		fneg_norm_load		# sign is positive
   12868 	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
   12869 fneg_norm_load:
   12870 	mov.w		%d0,FP_SCR0_EX(%a6)
   12871 	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
   12872 	rts
   12873 
   12874 #
   12875 # for an extended precision DENORM, the UNFL exception bit is set
   12876 # the accrued bit is NOT set in this instance(no inexactness!)
   12877 #
   12878 fneg_denorm:
   12879 	andi.b		&0xc0,%d0		# is precision extended?
   12880 	bne.b		fneg_not_ext		# no; go handle sgl or dbl
   12881 
   12882 	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
   12883 
   12884 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
   12885 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
   12886 	mov.w		SRC_EX(%a0),%d0
   12887 	eori.w		&0x8000,%d0		# negate sign
   12888 	bpl.b		fneg_denorm_done	# no
   12889 	mov.b		&neg_bmask,FPSR_CC(%a6)	# yes, set 'N' ccode bit
   12890 fneg_denorm_done:
   12891 	mov.w		%d0,FP_SCR0_EX(%a6)
   12892 	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
   12893 
   12894 	btst		&unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
   12895 	bne.b		fneg_ext_unfl_ena	# yes
   12896 	rts
   12897 
   12898 #
   12899 # the input is an extended DENORM and underflow is enabled in the FPCR.
   12900 # normalize the mantissa and add the bias of 0x6000 to the resulting negative
   12901 # exponent and insert back into the operand.
   12902 #
   12903 fneg_ext_unfl_ena:
   12904 	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
   12905 	bsr.l		norm			# normalize result
   12906 	neg.w		%d0			# new exponent = -(shft val)
   12907 	addi.w		&0x6000,%d0		# add new bias to exponent
   12908 	mov.w		FP_SCR0_EX(%a6),%d1	# fetch old sign,exp
   12909 	andi.w		&0x8000,%d1	 	# keep old sign
   12910 	andi.w		&0x7fff,%d0		# clear sign position
   12911 	or.w		%d1,%d0			# concat old sign, new exponent
   12912 	mov.w		%d0,FP_SCR0_EX(%a6)	# insert new exponent
   12913 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
   12914 	rts
   12915 
   12916 #
   12917 # operand is either single or double
   12918 #
   12919 fneg_not_ext:
   12920 	cmpi.b		%d0,&s_mode*0x10	# separate sgl/dbl prec
   12921 	bne.b		fneg_dbl
   12922 
   12923 #
   12924 # operand is to be rounded to single precision
   12925 #
   12926 fneg_sgl:
   12927 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
   12928 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
   12929 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
   12930 	bsr.l		scale_to_zero_src	# calculate scale factor
   12931 
   12932 	cmpi.l		%d0,&0x3fff-0x3f80	# will move in underflow?
   12933 	bge.w		fneg_sd_unfl		# yes; go handle underflow
   12934 	cmpi.l		%d0,&0x3fff-0x407e	# will move in overflow?
   12935 	beq.w		fneg_sd_may_ovfl	# maybe; go check
   12936 	blt.w		fneg_sd_ovfl		# yes; go handle overflow
   12937 
   12938 #
   12939 # operand will NOT overflow or underflow when moved in to the fp reg file
   12940 #
   12941 fneg_sd_normal:
   12942 	fmov.l		&0x0,%fpsr		# clear FPSR
   12943 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   12944 
   12945 	fneg.x		FP_SCR0(%a6),%fp0	# perform negation
   12946 
   12947 	fmov.l		%fpsr,%d1		# save FPSR
   12948 	fmov.l		&0x0,%fpcr		# clear FPCR
   12949 
   12950 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
   12951 
   12952 fneg_sd_normal_exit:
   12953 	mov.l		%d2,-(%sp)		# save d2
   12954 	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
   12955 	mov.w		FP_SCR0_EX(%a6),%d1	# load sgn,exp
   12956 	mov.w		%d1,%d2			# make a copy
   12957 	andi.l		&0x7fff,%d1		# strip sign
   12958 	sub.l		%d0,%d1			# add scale factor
   12959 	andi.w		&0x8000,%d2		# keep old sign
   12960 	or.w		%d1,%d2			# concat old sign,new exp
   12961 	mov.w		%d2,FP_SCR0_EX(%a6)	# insert new exponent
   12962 	mov.l		(%sp)+,%d2		# restore d2
   12963 	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
   12964 	rts
   12965 
   12966 #
   12967 # operand is to be rounded to double precision
   12968 #
   12969 fneg_dbl:
   12970 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
   12971 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
   12972 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
   12973 	bsr.l		scale_to_zero_src	# calculate scale factor
   12974 
   12975 	cmpi.l		%d0,&0x3fff-0x3c00	# will move in underflow?
   12976 	bge.b		fneg_sd_unfl		# yes; go handle underflow
   12977 	cmpi.l		%d0,&0x3fff-0x43fe	# will move in overflow?
   12978 	beq.w		fneg_sd_may_ovfl	# maybe; go check
   12979 	blt.w		fneg_sd_ovfl		# yes; go handle overflow
   12980 	bra.w		fneg_sd_normal		# no; ho handle normalized op
   12981 
   12982 #
   12983 # operand WILL underflow when moved in to the fp register file
   12984 #
   12985 fneg_sd_unfl:
   12986 	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
   12987 
   12988 	eori.b		&0x80,FP_SCR0_EX(%a6)	# negate sign
   12989 	bpl.b		fneg_sd_unfl_tst
   12990 	bset		&neg_bit,FPSR_CC(%a6)	# set 'N' ccode bit
   12991 
   12992 # if underflow or inexact is enabled, go calculate EXOP first.
   12993 fneg_sd_unfl_tst:
   12994 	mov.b		FPCR_ENABLE(%a6),%d1
   12995 	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
   12996 	bne.b		fneg_sd_unfl_ena	# yes
   12997 
   12998 fneg_sd_unfl_dis:
   12999 	lea		FP_SCR0(%a6),%a0	# pass: result addr
   13000 	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
   13001 	bsr.l		unf_res			# calculate default result
   13002 	or.b		%d0,FPSR_CC(%a6)	# unf_res may have set 'Z'
   13003 	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
   13004 	rts
   13005 
   13006 #
   13007 # operand will underflow AND underflow is enabled.
   13008 # therefore, we must return the result rounded to extended precision.
   13009 #
   13010 fneg_sd_unfl_ena:
   13011 	mov.l		FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
   13012 	mov.l		FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
   13013 	mov.w		FP_SCR0_EX(%a6),%d1	# load current exponent
   13014 
   13015 	mov.l		%d2,-(%sp)		# save d2
   13016 	mov.l		%d1,%d2			# make a copy
   13017 	andi.l		&0x7fff,%d1		# strip sign
   13018 	andi.w		&0x8000,%d2		# keep old sign
   13019 	sub.l		%d0,%d1			# subtract scale factor
   13020 	addi.l		&0x6000,%d1		# add new bias
   13021 	andi.w		&0x7fff,%d1
   13022 	or.w		%d2,%d1			# concat new sign,new exp
   13023 	mov.w		%d1,FP_SCR1_EX(%a6)	# insert new exp
   13024 	fmovm.x		FP_SCR1(%a6),&0x40	# return EXOP in fp1
   13025 	mov.l		(%sp)+,%d2		# restore d2
   13026 	bra.b		fneg_sd_unfl_dis
   13027 
   13028 #
   13029 # operand WILL overflow.
   13030 #
   13031 fneg_sd_ovfl:
   13032 	fmov.l		&0x0,%fpsr		# clear FPSR
   13033 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   13034 
   13035 	fneg.x		FP_SCR0(%a6),%fp0	# perform negation
   13036 
   13037 	fmov.l		&0x0,%fpcr		# clear FPCR
   13038 	fmov.l		%fpsr,%d1		# save FPSR
   13039 
   13040 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
   13041 
   13042 fneg_sd_ovfl_tst:
   13043 	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
   13044 
   13045 	mov.b		FPCR_ENABLE(%a6),%d1
   13046 	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
   13047 	bne.b		fneg_sd_ovfl_ena	# yes
   13048 
   13049 #
   13050 # OVFL is not enabled; therefore, we must create the default result by
   13051 # calling ovf_res().
   13052 #
   13053 fneg_sd_ovfl_dis:
   13054 	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
   13055 	sne		%d1			# set sign param accordingly
   13056 	mov.l		L_SCR3(%a6),%d0		# pass: prec,mode
   13057 	bsr.l		ovf_res			# calculate default result
   13058 	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
   13059 	fmovm.x		(%a0),&0x80		# return default result in fp0
   13060 	rts
   13061 
   13062 #
   13063 # OVFL is enabled.
   13064 # the INEX2 bit has already been updated by the round to the correct precision.
   13065 # now, round to extended(and don't alter the FPSR).
   13066 #
   13067 fneg_sd_ovfl_ena:
   13068 	mov.l		%d2,-(%sp)		# save d2
   13069 	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
   13070 	mov.l		%d1,%d2			# make a copy
   13071 	andi.l		&0x7fff,%d1		# strip sign
   13072 	andi.w		&0x8000,%d2		# keep old sign
   13073 	sub.l		%d0,%d1			# add scale factor
   13074 	subi.l		&0x6000,%d1		# subtract bias
   13075 	andi.w		&0x7fff,%d1
   13076 	or.w		%d2,%d1			# concat sign,exp
   13077 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
   13078 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
   13079 	mov.l		(%sp)+,%d2		# restore d2
   13080 	bra.b		fneg_sd_ovfl_dis
   13081 
   13082 #
   13083 # the move in MAY underflow. so...
   13084 #
   13085 fneg_sd_may_ovfl:
   13086 	fmov.l		&0x0,%fpsr		# clear FPSR
   13087 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   13088 
   13089 	fneg.x		FP_SCR0(%a6),%fp0	# perform negation
   13090 
   13091 	fmov.l		%fpsr,%d1		# save status
   13092 	fmov.l		&0x0,%fpcr		# clear FPCR
   13093 
   13094 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
   13095 
   13096 	fabs.x		%fp0,%fp1		# make a copy of result
   13097 	fcmp.b		%fp1,&0x2		# is |result| >= 2.b?
   13098 	fbge.w		fneg_sd_ovfl_tst	# yes; overflow has occurred
   13099 
   13100 # no, it didn't overflow; we have correct result
   13101 	bra.w		fneg_sd_normal_exit
   13102 
   13103 ##########################################################################
   13104 
   13105 #
   13106 # input is not normalized; what is it?
   13107 #
   13108 fneg_not_norm:
   13109 	cmpi.b		%d1,&DENORM		# weed out DENORM
   13110 	beq.w		fneg_denorm
   13111 	cmpi.b		%d1,&SNAN		# weed out SNAN
   13112 	beq.l		res_snan_1op
   13113 	cmpi.b		%d1,&QNAN		# weed out QNAN
   13114 	beq.l		res_qnan_1op
   13115 
   13116 #
   13117 # do the fneg; at this point, only possible ops are ZERO and INF.
   13118 # use fneg to determine ccodes.
   13119 # prec:mode should be zero at this point but it won't affect answer anyways.
   13120 #
   13121 	fneg.x		SRC_EX(%a0),%fp0	# do fneg
   13122 	fmov.l		%fpsr,%d0
   13123 	rol.l		&0x8,%d0		# put ccodes in lo byte
   13124 	mov.b		%d0,FPSR_CC(%a6)	# insert correct ccodes
   13125 	rts
   13126 
   13127 #########################################################################
   13128 # XDEF ****************************************************************	#
   13129 # 	ftst(): emulates the ftest instruction				#
   13130 #									#
   13131 # XREF ****************************************************************	#
   13132 # 	res{s,q}nan_1op() - set NAN result for monadic instruction	#
   13133 #									#
   13134 # INPUT ***************************************************************	#
   13135 # 	a0 = pointer to extended precision source operand		#
   13136 #									#
   13137 # OUTPUT **************************************************************	#
   13138 #	none								#
   13139 #									#
   13140 # ALGORITHM ***********************************************************	#
   13141 # 	Check the source operand tag (STAG) and set the FPCR according	#
   13142 # to the operand type and sign.						#
   13143 #									#
   13144 #########################################################################
   13145 
   13146 	global		ftst
   13147 ftst:
   13148 	mov.b		STAG(%a6),%d1
   13149 	bne.b		ftst_not_norm		# optimize on non-norm input
   13150 
   13151 #
   13152 # Norm:
   13153 #
   13154 ftst_norm:
   13155 	tst.b		SRC_EX(%a0)		# is operand negative?
   13156 	bmi.b		ftst_norm_m		# yes
   13157 	rts
   13158 ftst_norm_m:
   13159 	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
   13160 	rts
   13161 
   13162 #
   13163 # input is not normalized; what is it?
   13164 #
   13165 ftst_not_norm:
   13166 	cmpi.b		%d1,&ZERO		# weed out ZERO
   13167 	beq.b		ftst_zero
   13168 	cmpi.b		%d1,&INF		# weed out INF
   13169 	beq.b		ftst_inf
   13170 	cmpi.b		%d1,&SNAN		# weed out SNAN
   13171 	beq.l		res_snan_1op
   13172 	cmpi.b		%d1,&QNAN		# weed out QNAN
   13173 	beq.l		res_qnan_1op
   13174 
   13175 #
   13176 # Denorm:
   13177 #
   13178 ftst_denorm:
   13179 	tst.b		SRC_EX(%a0)		# is operand negative?
   13180 	bmi.b		ftst_denorm_m		# yes
   13181 	rts
   13182 ftst_denorm_m:
   13183 	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
   13184 	rts
   13185 
   13186 #
   13187 # Infinity:
   13188 #
   13189 ftst_inf:
   13190 	tst.b		SRC_EX(%a0)		# is operand negative?
   13191 	bmi.b		ftst_inf_m		# yes
   13192 ftst_inf_p:
   13193 	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'I' ccode bit
   13194 	rts
   13195 ftst_inf_m:
   13196 	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'I','N' ccode bits
   13197 	rts
   13198 
   13199 #
   13200 # Zero:
   13201 #
   13202 ftst_zero:
   13203 	tst.b		SRC_EX(%a0)		# is operand negative?
   13204 	bmi.b		ftst_zero_m		# yes
   13205 ftst_zero_p:
   13206 	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
   13207 	rts
   13208 ftst_zero_m:
   13209 	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6)	# set 'Z','N' ccode bits
   13210 	rts
   13211 
   13212 #########################################################################
   13213 # XDEF ****************************************************************	#
   13214 #	fint(): emulates the fint instruction				#
   13215 #									#
   13216 # XREF ****************************************************************	#
   13217 #	res_{s,q}nan_1op() - set NAN result for monadic operation	#
   13218 #									#
   13219 # INPUT ***************************************************************	#
   13220 #	a0 = pointer to extended precision source operand		#
   13221 #	d0 = round precision/mode					#
   13222 #									#
   13223 # OUTPUT **************************************************************	#
   13224 #	fp0 = result							#
   13225 #									#
   13226 # ALGORITHM ***********************************************************	#
   13227 # 	Separate according to operand type. Unnorms don't pass through 	#
   13228 # here. For norms, load the rounding mode/prec, execute a "fint", then 	#
   13229 # store the resulting FPSR bits.					#
   13230 # 	For denorms, force the j-bit to a one and do the same as for	#
   13231 # norms. Denorms are so low that the answer will either be a zero or a 	#
   13232 # one.									#
   13233 # 	For zeroes/infs/NANs, return the same while setting the FPSR	#
   13234 # as appropriate.							#
   13235 #									#
   13236 #########################################################################
   13237 
   13238 	global		fint
   13239 fint:
   13240 	mov.b		STAG(%a6),%d1
   13241 	bne.b		fint_not_norm		# optimize on non-norm input
   13242 
   13243 #
   13244 # Norm:
   13245 #
   13246 fint_norm:
   13247 	andi.b		&0x30,%d0		# set prec = ext
   13248 
   13249 	fmov.l		%d0,%fpcr		# set FPCR
   13250 	fmov.l		&0x0,%fpsr		# clear FPSR
   13251 
   13252 	fint.x 		SRC(%a0),%fp0		# execute fint
   13253 
   13254 	fmov.l		&0x0,%fpcr		# clear FPCR
   13255 	fmov.l		%fpsr,%d0		# save FPSR
   13256 	or.l		%d0,USER_FPSR(%a6)	# set exception bits
   13257 
   13258 	rts
   13259 
   13260 #
   13261 # input is not normalized; what is it?
   13262 #
   13263 fint_not_norm:
   13264 	cmpi.b		%d1,&ZERO		# weed out ZERO
   13265 	beq.b		fint_zero
   13266 	cmpi.b		%d1,&INF		# weed out INF
   13267 	beq.b		fint_inf
   13268 	cmpi.b		%d1,&DENORM		# weed out DENORM
   13269 	beq.b		fint_denorm
   13270 	cmpi.b		%d1,&SNAN		# weed out SNAN
   13271 	beq.l		res_snan_1op
   13272 	bra.l		res_qnan_1op		# weed out QNAN
   13273 
   13274 #
   13275 # Denorm:
   13276 #
   13277 # for DENORMs, the result will be either (+/-)ZERO or (+/-)1.
   13278 # also, the INEX2 and AINEX exception bits will be set.
   13279 # so, we could either set these manually or force the DENORM
   13280 # to a very small NORM and ship it to the NORM routine.
   13281 # I do the latter.
   13282 #
   13283 fint_denorm:
   13284 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6) # copy sign, zero exp
   13285 	mov.b		&0x80,FP_SCR0_HI(%a6)	# force DENORM ==> small NORM
   13286 	lea		FP_SCR0(%a6),%a0
   13287 	bra.b		fint_norm
   13288 
   13289 #
   13290 # Zero:
   13291 #
   13292 fint_zero:
   13293 	tst.b		SRC_EX(%a0)		# is ZERO negative?
   13294 	bmi.b		fint_zero_m		# yes
   13295 fint_zero_p:
   13296 	fmov.s		&0x00000000,%fp0	# return +ZERO in fp0
   13297 	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
   13298 	rts
   13299 fint_zero_m:
   13300 	fmov.s		&0x80000000,%fp0	# return -ZERO in fp0
   13301 	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
   13302 	rts
   13303 
   13304 #
   13305 # Infinity:
   13306 #
   13307 fint_inf:
   13308 	fmovm.x		SRC(%a0),&0x80		# return result in fp0
   13309 	tst.b		SRC_EX(%a0)		# is INF negative?
   13310 	bmi.b		fint_inf_m		# yes
   13311 fint_inf_p:
   13312 	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'I' ccode bit
   13313 	rts
   13314 fint_inf_m:
   13315 	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
   13316 	rts
   13317 
   13318 #########################################################################
   13319 # XDEF ****************************************************************	#
   13320 #	fintrz(): emulates the fintrz instruction			#
   13321 #									#
   13322 # XREF ****************************************************************	#
   13323 #	res_{s,q}nan_1op() - set NAN result for monadic operation	#
   13324 #									#
   13325 # INPUT ***************************************************************	#
   13326 #	a0 = pointer to extended precision source operand		#
   13327 #	d0 = round precision/mode					#
   13328 #									#
   13329 # OUTPUT **************************************************************	#
   13330 # 	fp0 = result							#
   13331 #									#
   13332 # ALGORITHM ***********************************************************	#
   13333 #	Separate according to operand type. Unnorms don't pass through	#
   13334 # here. For norms, load the rounding mode/prec, execute a "fintrz", 	#
   13335 # then store the resulting FPSR bits.					#
   13336 # 	For denorms, force the j-bit to a one and do the same as for	#
   13337 # norms. Denorms are so low that the answer will either be a zero or a	#
   13338 # one.									#
   13339 # 	For zeroes/infs/NANs, return the same while setting the FPSR	#
   13340 # as appropriate.							#
   13341 #									#
   13342 #########################################################################
   13343 
   13344 	global		fintrz
   13345 fintrz:
   13346 	mov.b		STAG(%a6),%d1
   13347 	bne.b		fintrz_not_norm		# optimize on non-norm input
   13348 
   13349 #
   13350 # Norm:
   13351 #
   13352 fintrz_norm:
   13353 	fmov.l		&0x0,%fpsr		# clear FPSR
   13354 
   13355 	fintrz.x	SRC(%a0),%fp0		# execute fintrz
   13356 
   13357 	fmov.l		%fpsr,%d0		# save FPSR
   13358 	or.l		%d0,USER_FPSR(%a6)	# set exception bits
   13359 
   13360 	rts
   13361 
   13362 #
   13363 # input is not normalized; what is it?
   13364 #
   13365 fintrz_not_norm:
   13366 	cmpi.b		%d1,&ZERO		# weed out ZERO
   13367 	beq.b		fintrz_zero
   13368 	cmpi.b		%d1,&INF		# weed out INF
   13369 	beq.b		fintrz_inf
   13370 	cmpi.b		%d1,&DENORM		# weed out DENORM
   13371 	beq.b		fintrz_denorm
   13372 	cmpi.b		%d1,&SNAN		# weed out SNAN
   13373 	beq.l		res_snan_1op
   13374 	bra.l		res_qnan_1op		# weed out QNAN
   13375 
   13376 #
   13377 # Denorm:
   13378 #
   13379 # for DENORMs, the result will be (+/-)ZERO.
   13380 # also, the INEX2 and AINEX exception bits will be set.
   13381 # so, we could either set these manually or force the DENORM
   13382 # to a very small NORM and ship it to the NORM routine.
   13383 # I do the latter.
   13384 #
   13385 fintrz_denorm:
   13386 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6) # copy sign, zero exp
   13387 	mov.b		&0x80,FP_SCR0_HI(%a6)	# force DENORM ==> small NORM
   13388 	lea		FP_SCR0(%a6),%a0
   13389 	bra.b		fintrz_norm
   13390 
   13391 #
   13392 # Zero:
   13393 #
   13394 fintrz_zero:
   13395 	tst.b		SRC_EX(%a0)		# is ZERO negative?
   13396 	bmi.b		fintrz_zero_m		# yes
   13397 fintrz_zero_p:
   13398 	fmov.s		&0x00000000,%fp0	# return +ZERO in fp0
   13399 	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
   13400 	rts
   13401 fintrz_zero_m:
   13402 	fmov.s		&0x80000000,%fp0	# return -ZERO in fp0
   13403 	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
   13404 	rts
   13405 
   13406 #
   13407 # Infinity:
   13408 #
   13409 fintrz_inf:
   13410 	fmovm.x		SRC(%a0),&0x80		# return result in fp0
   13411 	tst.b		SRC_EX(%a0)		# is INF negative?
   13412 	bmi.b		fintrz_inf_m		# yes
   13413 fintrz_inf_p:
   13414 	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'I' ccode bit
   13415 	rts
   13416 fintrz_inf_m:
   13417 	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
   13418 	rts
   13419 
   13420 #########################################################################
   13421 # XDEF ****************************************************************	#
   13422 #	fabs():  emulates the fabs instruction				#
   13423 #	fsabs(): emulates the fsabs instruction				#
   13424 #	fdabs(): emulates the fdabs instruction				#
   13425 #									#
   13426 # XREF **************************************************************** #
   13427 #	norm() - normalize denorm mantissa to provide EXOP		#
   13428 #	scale_to_zero_src() - make exponent. = 0; get scale factor	#
   13429 #	unf_res() - calculate underflow result				#
   13430 #	ovf_res() - calculate overflow result				#
   13431 #	res_{s,q}nan_1op() - set NAN result for monadic operation	#
   13432 #									#
   13433 # INPUT *************************************************************** #
   13434 #	a0 = pointer to extended precision source operand		#
   13435 #	d0 = rnd precision/mode						#
   13436 #									#
   13437 # OUTPUT ************************************************************** #
   13438 #	fp0 = result							#
   13439 #	fp1 = EXOP (if exception occurred)				#
   13440 #									#
   13441 # ALGORITHM ***********************************************************	#
   13442 #	Handle NANs, infinities, and zeroes as special cases. Divide	#
   13443 # norms into extended, single, and double precision. 			#
   13444 # 	Simply clear sign for extended precision norm. Ext prec denorm	#
   13445 # gets an EXOP created for it since it's an underflow.			#
   13446 #	Double and single precision can overflow and underflow. First,	#
   13447 # scale the operand such that the exponent is zero. Perform an "fabs"	#
   13448 # using the correct rnd mode/prec. Check to see if the original 	#
   13449 # exponent would take an exception. If so, use unf_res() or ovf_res()	#
   13450 # to calculate the default result. Also, create the EXOP for the	#
   13451 # exceptional case. If no exception should occur, insert the correct 	#
   13452 # result exponent and return.						#
   13453 # 	Unnorms don't pass through here.				#
   13454 #									#
   13455 #########################################################################
   13456 
   13457 	global		fsabs
   13458 fsabs:
   13459 	andi.b		&0x30,%d0		# clear rnd prec
   13460 	ori.b		&s_mode*0x10,%d0	# insert sgl precision
   13461 	bra.b		fabs
   13462 
   13463 	global		fdabs
   13464 fdabs:
   13465 	andi.b		&0x30,%d0		# clear rnd prec
   13466 	ori.b		&d_mode*0x10,%d0	# insert dbl precision
   13467 
   13468 	global		fabs
   13469 fabs:
   13470 	mov.l		%d0,L_SCR3(%a6)		# store rnd info
   13471 	mov.b		STAG(%a6),%d1
   13472 	bne.w		fabs_not_norm		# optimize on non-norm input
   13473 
   13474 #
   13475 # ABSOLUTE VALUE: norms and denorms ONLY!
   13476 #
   13477 fabs_norm:
   13478 	andi.b		&0xc0,%d0		# is precision extended?
   13479 	bne.b		fabs_not_ext		# no; go handle sgl or dbl
   13480 
   13481 #
   13482 # precision selected is extended. so...we can not get an underflow
   13483 # or overflow because of rounding to the correct precision. so...
   13484 # skip the scaling and unscaling...
   13485 #
   13486 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
   13487 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
   13488 	mov.w		SRC_EX(%a0),%d1
   13489 	bclr		&15,%d1			# force absolute value
   13490 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert exponent
   13491 	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
   13492 	rts
   13493 
   13494 #
   13495 # for an extended precision DENORM, the UNFL exception bit is set
   13496 # the accrued bit is NOT set in this instance(no inexactness!)
   13497 #
   13498 fabs_denorm:
   13499 	andi.b		&0xc0,%d0		# is precision extended?
   13500 	bne.b		fabs_not_ext		# no
   13501 
   13502 	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
   13503 
   13504 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
   13505 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
   13506 	mov.w		SRC_EX(%a0),%d0
   13507 	bclr		&15,%d0			# clear sign
   13508 	mov.w		%d0,FP_SCR0_EX(%a6)	# insert exponent
   13509 
   13510 	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
   13511 
   13512 	btst		&unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
   13513 	bne.b		fabs_ext_unfl_ena
   13514 	rts
   13515 
   13516 #
   13517 # the input is an extended DENORM and underflow is enabled in the FPCR.
   13518 # normalize the mantissa and add the bias of 0x6000 to the resulting negative
   13519 # exponent and insert back into the operand.
   13520 #
   13521 fabs_ext_unfl_ena:
   13522 	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
   13523 	bsr.l		norm			# normalize result
   13524 	neg.w		%d0			# new exponent = -(shft val)
   13525 	addi.w		&0x6000,%d0		# add new bias to exponent
   13526 	mov.w		FP_SCR0_EX(%a6),%d1	# fetch old sign,exp
   13527 	andi.w		&0x8000,%d1		# keep old sign
   13528 	andi.w		&0x7fff,%d0		# clear sign position
   13529 	or.w		%d1,%d0			# concat old sign, new exponent
   13530 	mov.w		%d0,FP_SCR0_EX(%a6)	# insert new exponent
   13531 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
   13532 	rts
   13533 
   13534 #
   13535 # operand is either single or double
   13536 #
   13537 fabs_not_ext:
   13538 	cmpi.b		%d0,&s_mode*0x10	# separate sgl/dbl prec
   13539 	bne.b		fabs_dbl
   13540 
   13541 #
   13542 # operand is to be rounded to single precision
   13543 #
   13544 fabs_sgl:
   13545 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
   13546 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
   13547 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
   13548 	bsr.l		scale_to_zero_src	# calculate scale factor
   13549 
   13550 	cmpi.l		%d0,&0x3fff-0x3f80	# will move in underflow?
   13551 	bge.w		fabs_sd_unfl		# yes; go handle underflow
   13552 	cmpi.l		%d0,&0x3fff-0x407e	# will move in overflow?
   13553 	beq.w		fabs_sd_may_ovfl	# maybe; go check
   13554 	blt.w		fabs_sd_ovfl		# yes; go handle overflow
   13555 
   13556 #
   13557 # operand will NOT overflow or underflow when moved in to the fp reg file
   13558 #
   13559 fabs_sd_normal:
   13560 	fmov.l		&0x0,%fpsr		# clear FPSR
   13561 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   13562 
   13563 	fabs.x		FP_SCR0(%a6),%fp0	# perform absolute
   13564 
   13565 	fmov.l		%fpsr,%d1		# save FPSR
   13566 	fmov.l		&0x0,%fpcr		# clear FPCR
   13567 
   13568 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
   13569 
   13570 fabs_sd_normal_exit:
   13571 	mov.l		%d2,-(%sp)		# save d2
   13572 	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
   13573 	mov.w		FP_SCR0_EX(%a6),%d1	# load sgn,exp
   13574 	mov.l		%d1,%d2			# make a copy
   13575 	andi.l		&0x7fff,%d1		# strip sign
   13576 	sub.l		%d0,%d1			# add scale factor
   13577 	andi.w		&0x8000,%d2		# keep old sign
   13578 	or.w		%d1,%d2			# concat old sign,new exp
   13579 	mov.w		%d2,FP_SCR0_EX(%a6)	# insert new exponent
   13580 	mov.l		(%sp)+,%d2		# restore d2
   13581 	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
   13582 	rts
   13583 
   13584 #
   13585 # operand is to be rounded to double precision
   13586 #
   13587 fabs_dbl:
   13588 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
   13589 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
   13590 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
   13591 	bsr.l		scale_to_zero_src	# calculate scale factor
   13592 
   13593 	cmpi.l		%d0,&0x3fff-0x3c00	# will move in underflow?
   13594 	bge.b		fabs_sd_unfl		# yes; go handle underflow
   13595 	cmpi.l		%d0,&0x3fff-0x43fe	# will move in overflow?
   13596 	beq.w		fabs_sd_may_ovfl	# maybe; go check
   13597 	blt.w		fabs_sd_ovfl		# yes; go handle overflow
   13598 	bra.w		fabs_sd_normal		# no; ho handle normalized op
   13599 
   13600 #
   13601 # operand WILL underflow when moved in to the fp register file
   13602 #
   13603 fabs_sd_unfl:
   13604 	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
   13605 
   13606 	bclr		&0x7,FP_SCR0_EX(%a6)	# force absolute value
   13607 
   13608 # if underflow or inexact is enabled, go calculate EXOP first.
   13609 	mov.b		FPCR_ENABLE(%a6),%d1
   13610 	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
   13611 	bne.b		fabs_sd_unfl_ena	# yes
   13612 
   13613 fabs_sd_unfl_dis:
   13614 	lea		FP_SCR0(%a6),%a0	# pass: result addr
   13615 	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
   13616 	bsr.l		unf_res			# calculate default result
   13617 	or.b		%d0,FPSR_CC(%a6)	# set possible 'Z' ccode
   13618 	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
   13619 	rts
   13620 
   13621 #
   13622 # operand will underflow AND underflow is enabled.
   13623 # therefore, we must return the result rounded to extended precision.
   13624 #
   13625 fabs_sd_unfl_ena:
   13626 	mov.l		FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
   13627 	mov.l		FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
   13628 	mov.w		FP_SCR0_EX(%a6),%d1	# load current exponent
   13629 
   13630 	mov.l		%d2,-(%sp)		# save d2
   13631 	mov.l		%d1,%d2			# make a copy
   13632 	andi.l		&0x7fff,%d1		# strip sign
   13633 	andi.w		&0x8000,%d2		# keep old sign
   13634 	sub.l		%d0,%d1			# subtract scale factor
   13635 	addi.l		&0x6000,%d1		# add new bias
   13636 	andi.w		&0x7fff,%d1
   13637 	or.w		%d2,%d1			# concat new sign,new exp
   13638 	mov.w		%d1,FP_SCR1_EX(%a6)	# insert new exp
   13639 	fmovm.x		FP_SCR1(%a6),&0x40	# return EXOP in fp1
   13640 	mov.l		(%sp)+,%d2		# restore d2
   13641 	bra.b		fabs_sd_unfl_dis
   13642 
   13643 #
   13644 # operand WILL overflow.
   13645 #
   13646 fabs_sd_ovfl:
   13647 	fmov.l		&0x0,%fpsr		# clear FPSR
   13648 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   13649 
   13650 	fabs.x		FP_SCR0(%a6),%fp0	# perform absolute
   13651 
   13652 	fmov.l		&0x0,%fpcr		# clear FPCR
   13653 	fmov.l		%fpsr,%d1		# save FPSR
   13654 
   13655 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
   13656 
   13657 fabs_sd_ovfl_tst:
   13658 	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
   13659 
   13660 	mov.b		FPCR_ENABLE(%a6),%d1
   13661 	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
   13662 	bne.b		fabs_sd_ovfl_ena	# yes
   13663 
   13664 #
   13665 # OVFL is not enabled; therefore, we must create the default result by
   13666 # calling ovf_res().
   13667 #
   13668 fabs_sd_ovfl_dis:
   13669 	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
   13670 	sne		%d1			# set sign param accordingly
   13671 	mov.l		L_SCR3(%a6),%d0		# pass: prec,mode
   13672 	bsr.l		ovf_res			# calculate default result
   13673 	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
   13674 	fmovm.x		(%a0),&0x80		# return default result in fp0
   13675 	rts
   13676 
   13677 #
   13678 # OVFL is enabled.
   13679 # the INEX2 bit has already been updated by the round to the correct precision.
   13680 # now, round to extended(and don't alter the FPSR).
   13681 #
   13682 fabs_sd_ovfl_ena:
   13683 	mov.l		%d2,-(%sp)		# save d2
   13684 	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
   13685 	mov.l		%d1,%d2			# make a copy
   13686 	andi.l		&0x7fff,%d1		# strip sign
   13687 	andi.w		&0x8000,%d2		# keep old sign
   13688 	sub.l		%d0,%d1			# add scale factor
   13689 	subi.l		&0x6000,%d1		# subtract bias
   13690 	andi.w		&0x7fff,%d1
   13691 	or.w		%d2,%d1			# concat sign,exp
   13692 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
   13693 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
   13694 	mov.l		(%sp)+,%d2		# restore d2
   13695 	bra.b		fabs_sd_ovfl_dis
   13696 
   13697 #
   13698 # the move in MAY underflow. so...
   13699 #
   13700 fabs_sd_may_ovfl:
   13701 	fmov.l		&0x0,%fpsr		# clear FPSR
   13702 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   13703 
   13704 	fabs.x		FP_SCR0(%a6),%fp0	# perform absolute
   13705 
   13706 	fmov.l		%fpsr,%d1		# save status
   13707 	fmov.l		&0x0,%fpcr		# clear FPCR
   13708 
   13709 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
   13710 
   13711 	fabs.x		%fp0,%fp1		# make a copy of result
   13712 	fcmp.b		%fp1,&0x2		# is |result| >= 2.b?
   13713 	fbge.w		fabs_sd_ovfl_tst	# yes; overflow has occurred
   13714 
   13715 # no, it didn't overflow; we have correct result
   13716 	bra.w		fabs_sd_normal_exit
   13717 
   13718 ##########################################################################
   13719 
   13720 #
   13721 # input is not normalized; what is it?
   13722 #
   13723 fabs_not_norm:
   13724 	cmpi.b		%d1,&DENORM		# weed out DENORM
   13725 	beq.w		fabs_denorm
   13726 	cmpi.b		%d1,&SNAN		# weed out SNAN
   13727 	beq.l		res_snan_1op
   13728 	cmpi.b		%d1,&QNAN		# weed out QNAN
   13729 	beq.l		res_qnan_1op
   13730 
   13731 	fabs.x		SRC(%a0),%fp0		# force absolute value
   13732 
   13733 	cmpi.b		%d1,&INF		# weed out INF
   13734 	beq.b		fabs_inf
   13735 fabs_zero:
   13736 	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
   13737 	rts
   13738 fabs_inf:
   13739 	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'I' ccode bit
   13740 	rts
   13741 
   13742 #########################################################################
   13743 # XDEF ****************************************************************	#
   13744 # 	fcmp(): fp compare op routine					#
   13745 #									#
   13746 # XREF ****************************************************************	#
   13747 # 	res_qnan() - return QNAN result					#
   13748 #	res_snan() - return SNAN result					#
   13749 #									#
   13750 # INPUT ***************************************************************	#
   13751 #	a0 = pointer to extended precision source operand		#
   13752 #	a1 = pointer to extended precision destination operand		#
   13753 #	d0 = round prec/mode						#
   13754 #									#
   13755 # OUTPUT ************************************************************** #
   13756 #	None								#
   13757 #									#
   13758 # ALGORITHM ***********************************************************	#
   13759 # 	Handle NANs and denorms as special cases. For everything else,	#
   13760 # just use the actual fcmp instruction to produce the correct condition	#
   13761 # codes.								#
   13762 #									#
   13763 #########################################################################
   13764 
   13765 	global		fcmp
   13766 fcmp:
   13767 	clr.w		%d1
   13768 	mov.b		DTAG(%a6),%d1
   13769 	lsl.b		&0x3,%d1
   13770 	or.b		STAG(%a6),%d1
   13771 	bne.b		fcmp_not_norm		# optimize on non-norm input
   13772 
   13773 #
   13774 # COMPARE FP OPs : NORMs, ZEROs, INFs, and "corrected" DENORMs
   13775 #
   13776 fcmp_norm:
   13777 	fmovm.x		DST(%a1),&0x80		# load dst op
   13778 
   13779 	fcmp.x 		%fp0,SRC(%a0)		# do compare
   13780 
   13781 	fmov.l		%fpsr,%d0		# save FPSR
   13782 	rol.l		&0x8,%d0		# extract ccode bits
   13783 	mov.b		%d0,FPSR_CC(%a6)	# set ccode bits(no exc bits are set)
   13784 
   13785 	rts
   13786 
   13787 #
   13788 # fcmp: inputs are not both normalized; what are they?
   13789 #
   13790 fcmp_not_norm:
   13791 	mov.w		(tbl_fcmp_op.b,%pc,%d1.w*2),%d1
   13792 	jmp		(tbl_fcmp_op.b,%pc,%d1.w*1)
   13793 
   13794 	swbeg		&48
   13795 tbl_fcmp_op:
   13796 	short		fcmp_norm	- tbl_fcmp_op # NORM - NORM
   13797 	short		fcmp_norm	- tbl_fcmp_op # NORM - ZERO
   13798 	short		fcmp_norm	- tbl_fcmp_op # NORM - INF
   13799 	short		fcmp_res_qnan	- tbl_fcmp_op # NORM - QNAN
   13800 	short		fcmp_nrm_dnrm 	- tbl_fcmp_op # NORM - DENORM
   13801 	short		fcmp_res_snan	- tbl_fcmp_op # NORM - SNAN
   13802 	short		tbl_fcmp_op	- tbl_fcmp_op #
   13803 	short		tbl_fcmp_op	- tbl_fcmp_op #
   13804 
   13805 	short		fcmp_norm	- tbl_fcmp_op # ZERO - NORM
   13806 	short		fcmp_norm	- tbl_fcmp_op # ZERO - ZERO
   13807 	short		fcmp_norm	- tbl_fcmp_op # ZERO - INF
   13808 	short		fcmp_res_qnan	- tbl_fcmp_op # ZERO - QNAN
   13809 	short		fcmp_dnrm_s	- tbl_fcmp_op # ZERO - DENORM
   13810 	short		fcmp_res_snan	- tbl_fcmp_op # ZERO - SNAN
   13811 	short		tbl_fcmp_op	- tbl_fcmp_op #
   13812 	short		tbl_fcmp_op	- tbl_fcmp_op #
   13813 
   13814 	short		fcmp_norm	- tbl_fcmp_op # INF - NORM
   13815 	short		fcmp_norm	- tbl_fcmp_op # INF - ZERO
   13816 	short		fcmp_norm	- tbl_fcmp_op # INF - INF
   13817 	short		fcmp_res_qnan	- tbl_fcmp_op # INF - QNAN
   13818 	short		fcmp_dnrm_s	- tbl_fcmp_op # INF - DENORM
   13819 	short		fcmp_res_snan	- tbl_fcmp_op # INF - SNAN
   13820 	short		tbl_fcmp_op	- tbl_fcmp_op #
   13821 	short		tbl_fcmp_op	- tbl_fcmp_op #
   13822 
   13823 	short		fcmp_res_qnan	- tbl_fcmp_op # QNAN - NORM
   13824 	short		fcmp_res_qnan	- tbl_fcmp_op # QNAN - ZERO
   13825 	short		fcmp_res_qnan	- tbl_fcmp_op # QNAN - INF
   13826 	short		fcmp_res_qnan	- tbl_fcmp_op # QNAN - QNAN
   13827 	short		fcmp_res_qnan	- tbl_fcmp_op # QNAN - DENORM
   13828 	short		fcmp_res_snan	- tbl_fcmp_op # QNAN - SNAN
   13829 	short		tbl_fcmp_op	- tbl_fcmp_op #
   13830 	short		tbl_fcmp_op	- tbl_fcmp_op #
   13831 
   13832 	short		fcmp_dnrm_nrm	- tbl_fcmp_op # DENORM - NORM
   13833 	short		fcmp_dnrm_d	- tbl_fcmp_op # DENORM - ZERO
   13834 	short		fcmp_dnrm_d	- tbl_fcmp_op # DENORM - INF
   13835 	short		fcmp_res_qnan	- tbl_fcmp_op # DENORM - QNAN
   13836 	short		fcmp_dnrm_sd	- tbl_fcmp_op # DENORM - DENORM
   13837 	short		fcmp_res_snan	- tbl_fcmp_op # DENORM - SNAN
   13838 	short		tbl_fcmp_op	- tbl_fcmp_op #
   13839 	short		tbl_fcmp_op	- tbl_fcmp_op #
   13840 
   13841 	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - NORM
   13842 	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - ZERO
   13843 	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - INF
   13844 	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - QNAN
   13845 	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - DENORM
   13846 	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - SNAN
   13847 	short		tbl_fcmp_op	- tbl_fcmp_op #
   13848 	short		tbl_fcmp_op	- tbl_fcmp_op #
   13849 
   13850 # unlike all other functions for QNAN and SNAN, fcmp does NOT set the
   13851 # 'N' bit for a negative QNAN or SNAN input so we must squelch it here.
   13852 fcmp_res_qnan:
   13853 	bsr.l		res_qnan
   13854 	andi.b		&0xf7,FPSR_CC(%a6)
   13855 	rts
   13856 fcmp_res_snan:
   13857 	bsr.l		res_snan
   13858 	andi.b		&0xf7,FPSR_CC(%a6)
   13859 	rts
   13860 
   13861 #
   13862 # DENORMs are a little more difficult.
   13863 # If you have a 2 DENORMs, then you can just force the j-bit to a one
   13864 # and use the fcmp_norm routine.
   13865 # If you have a DENORM and an INF or ZERO, just force the DENORM's j-bit to a one
   13866 # and use the fcmp_norm routine.
   13867 # If you have a DENORM and a NORM with opposite signs, then use fcmp_norm, also.
   13868 # But with a DENORM and a NORM of the same sign, the neg bit is set if the
   13869 # (1) signs are (+) and the DENORM is the dst or
   13870 # (2) signs are (-) and the DENORM is the src
   13871 #
   13872 
   13873 fcmp_dnrm_s:
   13874 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
   13875 	mov.l		SRC_HI(%a0),%d0
   13876 	bset		&31,%d0			# DENORM src; make into small norm
   13877 	mov.l		%d0,FP_SCR0_HI(%a6)
   13878 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
   13879 	lea		FP_SCR0(%a6),%a0
   13880 	bra.w		fcmp_norm
   13881 
   13882 fcmp_dnrm_d:
   13883 	mov.l		DST_EX(%a1),FP_SCR0_EX(%a6)
   13884 	mov.l		DST_HI(%a1),%d0
   13885 	bset		&31,%d0			# DENORM src; make into small norm
   13886 	mov.l		%d0,FP_SCR0_HI(%a6)
   13887 	mov.l		DST_LO(%a1),FP_SCR0_LO(%a6)
   13888 	lea		FP_SCR0(%a6),%a1
   13889 	bra.w		fcmp_norm
   13890 
   13891 fcmp_dnrm_sd:
   13892 	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
   13893 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
   13894 	mov.l		DST_HI(%a1),%d0
   13895 	bset		&31,%d0			# DENORM dst; make into small norm
   13896 	mov.l		%d0,FP_SCR1_HI(%a6)
   13897 	mov.l		SRC_HI(%a0),%d0
   13898 	bset		&31,%d0			# DENORM dst; make into small norm
   13899 	mov.l		%d0,FP_SCR0_HI(%a6)
   13900 	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
   13901 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
   13902 	lea		FP_SCR1(%a6),%a1
   13903 	lea		FP_SCR0(%a6),%a0
   13904 	bra.w		fcmp_norm
   13905 
   13906 fcmp_nrm_dnrm:
   13907 	mov.b		SRC_EX(%a0),%d0		# determine if like signs
   13908 	mov.b		DST_EX(%a1),%d1
   13909 	eor.b		%d0,%d1
   13910 	bmi.w		fcmp_dnrm_s
   13911 
   13912 # signs are the same, so must determine the answer ourselves.
   13913 	tst.b		%d0			# is src op negative?
   13914 	bmi.b		fcmp_nrm_dnrm_m		# yes
   13915 	rts
   13916 fcmp_nrm_dnrm_m:
   13917 	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
   13918 	rts
   13919 
   13920 fcmp_dnrm_nrm:
   13921 	mov.b		SRC_EX(%a0),%d0		# determine if like signs
   13922 	mov.b		DST_EX(%a1),%d1
   13923 	eor.b		%d0,%d1
   13924 	bmi.w		fcmp_dnrm_d
   13925 
   13926 # signs are the same, so must determine the answer ourselves.
   13927 	tst.b		%d0			# is src op negative?
   13928 	bpl.b		fcmp_dnrm_nrm_m		# no
   13929 	rts
   13930 fcmp_dnrm_nrm_m:
   13931 	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
   13932 	rts
   13933 
   13934 #########################################################################
   13935 # XDEF ****************************************************************	#
   13936 # 	fsglmul(): emulates the fsglmul instruction			#
   13937 #									#
   13938 # XREF ****************************************************************	#
   13939 #	scale_to_zero_src() - scale src exponent to zero		#
   13940 #	scale_to_zero_dst() - scale dst exponent to zero		#
   13941 #	unf_res4() - return default underflow result for sglop		#
   13942 #	ovf_res() - return default overflow result			#
   13943 # 	res_qnan() - return QNAN result					#
   13944 # 	res_snan() - return SNAN result					#
   13945 #									#
   13946 # INPUT ***************************************************************	#
   13947 #	a0 = pointer to extended precision source operand		#
   13948 #	a1 = pointer to extended precision destination operand		#
   13949 #	d0  rnd prec,mode						#
   13950 #									#
   13951 # OUTPUT **************************************************************	#
   13952 #	fp0 = result							#
   13953 #	fp1 = EXOP (if exception occurred)				#
   13954 #									#
   13955 # ALGORITHM ***********************************************************	#
   13956 #	Handle NANs, infinities, and zeroes as special cases. Divide	#
   13957 # norms/denorms into ext/sgl/dbl precision.				#
   13958 #	For norms/denorms, scale the exponents such that a multiply	#
   13959 # instruction won't cause an exception. Use the regular fsglmul to	#
   13960 # compute a result. Check if the regular operands would have taken	#
   13961 # an exception. If so, return the default overflow/underflow result	#
   13962 # and return the EXOP if exceptions are enabled. Else, scale the 	#
   13963 # result operand to the proper exponent.				#
   13964 #									#
   13965 #########################################################################
   13966 
   13967 	global		fsglmul
   13968 fsglmul:
   13969 	mov.l		%d0,L_SCR3(%a6)		# store rnd info
   13970 
   13971 	clr.w		%d1
   13972 	mov.b		DTAG(%a6),%d1
   13973 	lsl.b		&0x3,%d1
   13974 	or.b		STAG(%a6),%d1
   13975 
   13976 	bne.w		fsglmul_not_norm	# optimize on non-norm input
   13977 
   13978 fsglmul_norm:
   13979 	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
   13980 	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
   13981 	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
   13982 
   13983 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
   13984 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
   13985 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
   13986 
   13987 	bsr.l		scale_to_zero_src	# scale exponent
   13988 	mov.l		%d0,-(%sp)		# save scale factor 1
   13989 
   13990 	bsr.l		scale_to_zero_dst	# scale dst exponent
   13991 
   13992 	add.l		(%sp)+,%d0		# SCALE_FACTOR = scale1 + scale2
   13993 
   13994 	cmpi.l		%d0,&0x3fff-0x7ffe 	# would result ovfl?
   13995 	beq.w		fsglmul_may_ovfl	# result may rnd to overflow
   13996 	blt.w		fsglmul_ovfl		# result will overflow
   13997 
   13998 	cmpi.l		%d0,&0x3fff+0x0001 	# would result unfl?
   13999 	beq.w		fsglmul_may_unfl	# result may rnd to no unfl
   14000 	bgt.w		fsglmul_unfl		# result will underflow
   14001 
   14002 fsglmul_normal:
   14003 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
   14004 
   14005 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   14006 	fmov.l		&0x0,%fpsr		# clear FPSR
   14007 
   14008 	fsglmul.x	FP_SCR0(%a6),%fp0	# execute sgl multiply
   14009 
   14010 	fmov.l		%fpsr,%d1		# save status
   14011 	fmov.l		&0x0,%fpcr		# clear FPCR
   14012 
   14013 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
   14014 
   14015 fsglmul_normal_exit:
   14016 	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
   14017 	mov.l		%d2,-(%sp)		# save d2
   14018 	mov.w		FP_SCR0_EX(%a6),%d1	# load {sgn,exp}
   14019 	mov.l		%d1,%d2			# make a copy
   14020 	andi.l		&0x7fff,%d1		# strip sign
   14021 	andi.w		&0x8000,%d2		# keep old sign
   14022 	sub.l		%d0,%d1			# add scale factor
   14023 	or.w		%d2,%d1			# concat old sign,new exp
   14024 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
   14025 	mov.l		(%sp)+,%d2		# restore d2
   14026 	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
   14027 	rts
   14028 
   14029 fsglmul_ovfl:
   14030 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
   14031 
   14032 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   14033 	fmov.l		&0x0,%fpsr		# clear FPSR
   14034 
   14035 	fsglmul.x	FP_SCR0(%a6),%fp0	# execute sgl multiply
   14036 
   14037 	fmov.l		%fpsr,%d1		# save status
   14038 	fmov.l		&0x0,%fpcr		# clear FPCR
   14039 
   14040 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
   14041 
   14042 fsglmul_ovfl_tst:
   14043 
   14044 # save setting this until now because this is where fsglmul_may_ovfl may jump in
   14045 	or.l		&ovfl_inx_mask, USER_FPSR(%a6) # set ovfl/aovfl/ainex
   14046 
   14047 	mov.b		FPCR_ENABLE(%a6),%d1
   14048 	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
   14049 	bne.b		fsglmul_ovfl_ena	# yes
   14050 
   14051 fsglmul_ovfl_dis:
   14052 	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
   14053 	sne		%d1			# set sign param accordingly
   14054 	mov.l		L_SCR3(%a6),%d0		# pass prec:rnd
   14055 	andi.b		&0x30,%d0		# force prec = ext
   14056 	bsr.l		ovf_res			# calculate default result
   14057 	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
   14058 	fmovm.x		(%a0),&0x80		# return default result in fp0
   14059 	rts
   14060 
   14061 fsglmul_ovfl_ena:
   14062 	fmovm.x		&0x80,FP_SCR0(%a6)	# move result to stack
   14063 
   14064 	mov.l		%d2,-(%sp)		# save d2
   14065 	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
   14066 	mov.l		%d1,%d2			# make a copy
   14067 	andi.l		&0x7fff,%d1		# strip sign
   14068 	sub.l		%d0,%d1			# add scale factor
   14069 	subi.l		&0x6000,%d1		# subtract bias
   14070 	andi.w		&0x7fff,%d1
   14071 	andi.w		&0x8000,%d2		# keep old sign
   14072 	or.w		%d2,%d1			# concat old sign,new exp
   14073 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
   14074 	mov.l		(%sp)+,%d2		# restore d2
   14075 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
   14076 	bra.b		fsglmul_ovfl_dis
   14077 
   14078 fsglmul_may_ovfl:
   14079 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
   14080 
   14081 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   14082 	fmov.l		&0x0,%fpsr		# clear FPSR
   14083 
   14084 	fsglmul.x	FP_SCR0(%a6),%fp0	# execute sgl multiply
   14085 
   14086 	fmov.l		%fpsr,%d1		# save status
   14087 	fmov.l		&0x0,%fpcr		# clear FPCR
   14088 
   14089 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
   14090 
   14091 	fabs.x		%fp0,%fp1		# make a copy of result
   14092 	fcmp.b		%fp1,&0x2		# is |result| >= 2.b?
   14093 	fbge.w		fsglmul_ovfl_tst	# yes; overflow has occurred
   14094 
   14095 # no, it didn't overflow; we have correct result
   14096 	bra.w		fsglmul_normal_exit
   14097 
   14098 fsglmul_unfl:
   14099 	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
   14100 
   14101 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
   14102 
   14103 	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
   14104 	fmov.l		&0x0,%fpsr		# clear FPSR
   14105 
   14106 	fsglmul.x	FP_SCR0(%a6),%fp0	# execute sgl multiply
   14107 
   14108 	fmov.l		%fpsr,%d1		# save status
   14109 	fmov.l		&0x0,%fpcr		# clear FPCR
   14110 
   14111 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
   14112 
   14113 	mov.b		FPCR_ENABLE(%a6),%d1
   14114 	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
   14115 	bne.b		fsglmul_unfl_ena	# yes
   14116 
   14117 fsglmul_unfl_dis:
   14118 	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
   14119 
   14120 	lea		FP_SCR0(%a6),%a0	# pass: result addr
   14121 	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
   14122 	bsr.l		unf_res4		# calculate default result
   14123 	or.b		%d0,FPSR_CC(%a6)	# 'Z' bit may have been set
   14124 	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
   14125 	rts
   14126 
   14127 #
   14128 # UNFL is enabled.
   14129 #
   14130 fsglmul_unfl_ena:
   14131 	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op
   14132 
   14133 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   14134 	fmov.l		&0x0,%fpsr		# clear FPSR
   14135 
   14136 	fsglmul.x	FP_SCR0(%a6),%fp1	# execute sgl multiply
   14137 
   14138 	fmov.l		&0x0,%fpcr		# clear FPCR
   14139 
   14140 	fmovm.x		&0x40,FP_SCR0(%a6)	# save result to stack
   14141 	mov.l		%d2,-(%sp)		# save d2
   14142 	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
   14143 	mov.l		%d1,%d2			# make a copy
   14144 	andi.l		&0x7fff,%d1		# strip sign
   14145 	andi.w		&0x8000,%d2		# keep old sign
   14146 	sub.l		%d0,%d1			# add scale factor
   14147 	addi.l		&0x6000,%d1		# add bias
   14148 	andi.w		&0x7fff,%d1
   14149 	or.w		%d2,%d1			# concat old sign,new exp
   14150 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
   14151 	mov.l		(%sp)+,%d2		# restore d2
   14152 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
   14153 	bra.w		fsglmul_unfl_dis
   14154 
   14155 fsglmul_may_unfl:
   14156 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
   14157 
   14158 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   14159 	fmov.l		&0x0,%fpsr		# clear FPSR
   14160 
   14161 	fsglmul.x	FP_SCR0(%a6),%fp0	# execute sgl multiply
   14162 
   14163 	fmov.l		%fpsr,%d1		# save status
   14164 	fmov.l		&0x0,%fpcr		# clear FPCR
   14165 
   14166 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
   14167 
   14168 	fabs.x		%fp0,%fp1		# make a copy of result
   14169 	fcmp.b		%fp1,&0x2		# is |result| > 2.b?
   14170 	fbgt.w		fsglmul_normal_exit	# no; no underflow occurred
   14171 	fblt.w		fsglmul_unfl		# yes; underflow occurred
   14172 
   14173 #
   14174 # we still don't know if underflow occurred. result is ~ equal to 2. but,
   14175 # we don't know if the result was an underflow that rounded up to a 2 or
   14176 # a normalized number that rounded down to a 2. so, redo the entire operation
   14177 # using RZ as the rounding mode to see what the pre-rounded result is.
   14178 # this case should be relatively rare.
   14179 #
   14180 	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op into fp1
   14181 
   14182 	mov.l		L_SCR3(%a6),%d1
   14183 	andi.b		&0xc0,%d1		# keep rnd prec
   14184 	ori.b		&rz_mode*0x10,%d1	# insert RZ
   14185 
   14186 	fmov.l		%d1,%fpcr		# set FPCR
   14187 	fmov.l		&0x0,%fpsr		# clear FPSR
   14188 
   14189 	fsglmul.x	FP_SCR0(%a6),%fp1	# execute sgl multiply
   14190 
   14191 	fmov.l		&0x0,%fpcr		# clear FPCR
   14192 	fabs.x		%fp1			# make absolute value
   14193 	fcmp.b		%fp1,&0x2		# is |result| < 2.b?
   14194 	fbge.w		fsglmul_normal_exit	# no; no underflow occurred
   14195 	bra.w		fsglmul_unfl		# yes, underflow occurred
   14196 
   14197 ##############################################################################
   14198 
   14199 #
   14200 # Single Precision Multiply: inputs are not both normalized; what are they?
   14201 #
   14202 fsglmul_not_norm:
   14203 	mov.w		(tbl_fsglmul_op.b,%pc,%d1.w*2),%d1
   14204 	jmp		(tbl_fsglmul_op.b,%pc,%d1.w*1)
   14205 
   14206 	swbeg		&48
   14207 tbl_fsglmul_op:
   14208 	short		fsglmul_norm		- tbl_fsglmul_op # NORM x NORM
   14209 	short		fsglmul_zero		- tbl_fsglmul_op # NORM x ZERO
   14210 	short		fsglmul_inf_src		- tbl_fsglmul_op # NORM x INF
   14211 	short		fsglmul_res_qnan	- tbl_fsglmul_op # NORM x QNAN
   14212 	short		fsglmul_norm		- tbl_fsglmul_op # NORM x DENORM
   14213 	short		fsglmul_res_snan	- tbl_fsglmul_op # NORM x SNAN
   14214 	short		tbl_fsglmul_op		- tbl_fsglmul_op #
   14215 	short		tbl_fsglmul_op		- tbl_fsglmul_op #
   14216 
   14217 	short		fsglmul_zero		- tbl_fsglmul_op # ZERO x NORM
   14218 	short		fsglmul_zero		- tbl_fsglmul_op # ZERO x ZERO
   14219 	short		fsglmul_res_operr	- tbl_fsglmul_op # ZERO x INF
   14220 	short		fsglmul_res_qnan	- tbl_fsglmul_op # ZERO x QNAN
   14221 	short		fsglmul_zero		- tbl_fsglmul_op # ZERO x DENORM
   14222 	short		fsglmul_res_snan	- tbl_fsglmul_op # ZERO x SNAN
   14223 	short		tbl_fsglmul_op		- tbl_fsglmul_op #
   14224 	short		tbl_fsglmul_op		- tbl_fsglmul_op #
   14225 
   14226 	short		fsglmul_inf_dst		- tbl_fsglmul_op # INF x NORM
   14227 	short		fsglmul_res_operr	- tbl_fsglmul_op # INF x ZERO
   14228 	short		fsglmul_inf_dst		- tbl_fsglmul_op # INF x INF
   14229 	short		fsglmul_res_qnan	- tbl_fsglmul_op # INF x QNAN
   14230 	short		fsglmul_inf_dst		- tbl_fsglmul_op # INF x DENORM
   14231 	short		fsglmul_res_snan	- tbl_fsglmul_op # INF x SNAN
   14232 	short		tbl_fsglmul_op		- tbl_fsglmul_op #
   14233 	short		tbl_fsglmul_op		- tbl_fsglmul_op #
   14234 
   14235 	short		fsglmul_res_qnan	- tbl_fsglmul_op # QNAN x NORM
   14236 	short		fsglmul_res_qnan	- tbl_fsglmul_op # QNAN x ZERO
   14237 	short		fsglmul_res_qnan	- tbl_fsglmul_op # QNAN x INF
   14238 	short		fsglmul_res_qnan	- tbl_fsglmul_op # QNAN x QNAN
   14239 	short		fsglmul_res_qnan	- tbl_fsglmul_op # QNAN x DENORM
   14240 	short		fsglmul_res_snan	- tbl_fsglmul_op # QNAN x SNAN
   14241 	short		tbl_fsglmul_op		- tbl_fsglmul_op #
   14242 	short		tbl_fsglmul_op		- tbl_fsglmul_op #
   14243 
   14244 	short		fsglmul_norm		- tbl_fsglmul_op # NORM x NORM
   14245 	short		fsglmul_zero		- tbl_fsglmul_op # NORM x ZERO
   14246 	short		fsglmul_inf_src		- tbl_fsglmul_op # NORM x INF
   14247 	short		fsglmul_res_qnan	- tbl_fsglmul_op # NORM x QNAN
   14248 	short		fsglmul_norm		- tbl_fsglmul_op # NORM x DENORM
   14249 	short		fsglmul_res_snan	- tbl_fsglmul_op # NORM x SNAN
   14250 	short		tbl_fsglmul_op		- tbl_fsglmul_op #
   14251 	short		tbl_fsglmul_op		- tbl_fsglmul_op #
   14252 
   14253 	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x NORM
   14254 	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x ZERO
   14255 	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x INF
   14256 	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x QNAN
   14257 	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x DENORM
   14258 	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x SNAN
   14259 	short		tbl_fsglmul_op		- tbl_fsglmul_op #
   14260 	short		tbl_fsglmul_op		- tbl_fsglmul_op #
   14261 
   14262 fsglmul_res_operr:
   14263 	bra.l		res_operr
   14264 fsglmul_res_snan:
   14265 	bra.l		res_snan
   14266 fsglmul_res_qnan:
   14267 	bra.l		res_qnan
   14268 fsglmul_zero:
   14269 	bra.l		fmul_zero
   14270 fsglmul_inf_src:
   14271 	bra.l		fmul_inf_src
   14272 fsglmul_inf_dst:
   14273 	bra.l		fmul_inf_dst
   14274 
   14275 #########################################################################
   14276 # XDEF ****************************************************************	#
   14277 # 	fsgldiv(): emulates the fsgldiv instruction			#
   14278 #									#
   14279 # XREF ****************************************************************	#
   14280 #	scale_to_zero_src() - scale src exponent to zero		#
   14281 #	scale_to_zero_dst() - scale dst exponent to zero		#
   14282 #	unf_res4() - return default underflow result for sglop		#
   14283 #	ovf_res() - return default overflow result			#
   14284 # 	res_qnan() - return QNAN result					#
   14285 # 	res_snan() - return SNAN result					#
   14286 #									#
   14287 # INPUT ***************************************************************	#
   14288 #	a0 = pointer to extended precision source operand		#
   14289 #	a1 = pointer to extended precision destination operand		#
   14290 #	d0  rnd prec,mode						#
   14291 #									#
   14292 # OUTPUT **************************************************************	#
   14293 #	fp0 = result							#
   14294 #	fp1 = EXOP (if exception occurred)				#
   14295 #									#
   14296 # ALGORITHM ***********************************************************	#
   14297 #	Handle NANs, infinities, and zeroes as special cases. Divide	#
   14298 # norms/denorms into ext/sgl/dbl precision.				#
   14299 #	For norms/denorms, scale the exponents such that a divide	#
   14300 # instruction won't cause an exception. Use the regular fsgldiv to	#
   14301 # compute a result. Check if the regular operands would have taken	#
   14302 # an exception. If so, return the default overflow/underflow result	#
   14303 # and return the EXOP if exceptions are enabled. Else, scale the 	#
   14304 # result operand to the proper exponent.				#
   14305 #									#
   14306 #########################################################################
   14307 
   14308 	global		fsgldiv
   14309 fsgldiv:
   14310 	mov.l		%d0,L_SCR3(%a6)		# store rnd info
   14311 
   14312 	clr.w		%d1
   14313 	mov.b		DTAG(%a6),%d1
   14314 	lsl.b		&0x3,%d1
   14315 	or.b		STAG(%a6),%d1		# combine src tags
   14316 
   14317 	bne.w		fsgldiv_not_norm	# optimize on non-norm input
   14318 
   14319 #
   14320 # DIVIDE: NORMs and DENORMs ONLY!
   14321 #
   14322 fsgldiv_norm:
   14323 	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
   14324 	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
   14325 	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
   14326 
   14327 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
   14328 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
   14329 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
   14330 
   14331 	bsr.l		scale_to_zero_src	# calculate scale factor 1
   14332 	mov.l		%d0,-(%sp)		# save scale factor 1
   14333 
   14334 	bsr.l		scale_to_zero_dst	# calculate scale factor 2
   14335 
   14336 	neg.l		(%sp)			# S.F. = scale1 - scale2
   14337 	add.l		%d0,(%sp)
   14338 
   14339 	mov.w		2+L_SCR3(%a6),%d1	# fetch precision,mode
   14340 	lsr.b		&0x6,%d1
   14341 	mov.l		(%sp)+,%d0
   14342 	cmpi.l		%d0,&0x3fff-0x7ffe
   14343 	ble.w		fsgldiv_may_ovfl
   14344 
   14345 	cmpi.l		%d0,&0x3fff-0x0000 	# will result underflow?
   14346 	beq.w		fsgldiv_may_unfl	# maybe
   14347 	bgt.w		fsgldiv_unfl		# yes; go handle underflow
   14348 
   14349 fsgldiv_normal:
   14350 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
   14351 
   14352 	fmov.l		L_SCR3(%a6),%fpcr	# save FPCR
   14353 	fmov.l		&0x0,%fpsr		# clear FPSR
   14354 
   14355 	fsgldiv.x	FP_SCR0(%a6),%fp0	# perform sgl divide
   14356 
   14357 	fmov.l		%fpsr,%d1		# save FPSR
   14358 	fmov.l		&0x0,%fpcr		# clear FPCR
   14359 
   14360 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
   14361 
   14362 fsgldiv_normal_exit:
   14363 	fmovm.x		&0x80,FP_SCR0(%a6)	# store result on stack
   14364 	mov.l		%d2,-(%sp)		# save d2
   14365 	mov.w		FP_SCR0_EX(%a6),%d1	# load {sgn,exp}
   14366 	mov.l		%d1,%d2			# make a copy
   14367 	andi.l		&0x7fff,%d1		# strip sign
   14368 	andi.w		&0x8000,%d2		# keep old sign
   14369 	sub.l		%d0,%d1			# add scale factor
   14370 	or.w		%d2,%d1			# concat old sign,new exp
   14371 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
   14372 	mov.l		(%sp)+,%d2		# restore d2
   14373 	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
   14374 	rts
   14375 
   14376 fsgldiv_may_ovfl:
   14377 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
   14378 
   14379 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   14380 	fmov.l		&0x0,%fpsr		# set FPSR
   14381 
   14382 	fsgldiv.x	FP_SCR0(%a6),%fp0	# execute divide
   14383 
   14384 	fmov.l		%fpsr,%d1
   14385 	fmov.l		&0x0,%fpcr
   14386 
   14387 	or.l		%d1,USER_FPSR(%a6)	# save INEX,N
   14388 
   14389 	fmovm.x		&0x01,-(%sp)		# save result to stack
   14390 	mov.w		(%sp),%d1		# fetch new exponent
   14391 	add.l		&0xc,%sp		# clear result
   14392 	andi.l		&0x7fff,%d1		# strip sign
   14393 	sub.l		%d0,%d1			# add scale factor
   14394 	cmp.l		%d1,&0x7fff		# did divide overflow?
   14395 	blt.b		fsgldiv_normal_exit
   14396 
   14397 fsgldiv_ovfl_tst:
   14398 	or.w		&ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
   14399 
   14400 	mov.b		FPCR_ENABLE(%a6),%d1
   14401 	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
   14402 	bne.b		fsgldiv_ovfl_ena	# yes
   14403 
   14404 fsgldiv_ovfl_dis:
   14405 	btst		&neg_bit,FPSR_CC(%a6) 	# is result negative
   14406 	sne		%d1			# set sign param accordingly
   14407 	mov.l		L_SCR3(%a6),%d0		# pass prec:rnd
   14408 	andi.b		&0x30,%d0		# kill precision
   14409 	bsr.l		ovf_res			# calculate default result
   14410 	or.b		%d0,FPSR_CC(%a6)	# set INF if applicable
   14411 	fmovm.x		(%a0),&0x80		# return default result in fp0
   14412 	rts
   14413 
   14414 fsgldiv_ovfl_ena:
   14415 	fmovm.x		&0x80,FP_SCR0(%a6)	# move result to stack
   14416 
   14417 	mov.l		%d2,-(%sp)		# save d2
   14418 	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
   14419 	mov.l		%d1,%d2			# make a copy
   14420 	andi.l		&0x7fff,%d1		# strip sign
   14421 	andi.w		&0x8000,%d2		# keep old sign
   14422 	sub.l		%d0,%d1			# add scale factor
   14423 	subi.l		&0x6000,%d1		# subtract new bias
   14424 	andi.w		&0x7fff,%d1		# clear ms bit
   14425 	or.w		%d2,%d1			# concat old sign,new exp
   14426 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
   14427 	mov.l		(%sp)+,%d2		# restore d2
   14428 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
   14429 	bra.b		fsgldiv_ovfl_dis
   14430 
   14431 fsgldiv_unfl:
   14432 	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
   14433 
   14434 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
   14435 
   14436 	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
   14437 	fmov.l		&0x0,%fpsr		# clear FPSR
   14438 
   14439 	fsgldiv.x	FP_SCR0(%a6),%fp0	# execute sgl divide
   14440 
   14441 	fmov.l		%fpsr,%d1		# save status
   14442 	fmov.l		&0x0,%fpcr		# clear FPCR
   14443 
   14444 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
   14445 
   14446 	mov.b		FPCR_ENABLE(%a6),%d1
   14447 	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
   14448 	bne.b		fsgldiv_unfl_ena	# yes
   14449 
   14450 fsgldiv_unfl_dis:
   14451 	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
   14452 
   14453 	lea		FP_SCR0(%a6),%a0	# pass: result addr
   14454 	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
   14455 	bsr.l		unf_res4		# calculate default result
   14456 	or.b		%d0,FPSR_CC(%a6)	# 'Z' bit may have been set
   14457 	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
   14458 	rts
   14459 
   14460 #
   14461 # UNFL is enabled.
   14462 #
   14463 fsgldiv_unfl_ena:
   14464 	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op
   14465 
   14466 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   14467 	fmov.l		&0x0,%fpsr		# clear FPSR
   14468 
   14469 	fsgldiv.x	FP_SCR0(%a6),%fp1	# execute sgl divide
   14470 
   14471 	fmov.l		&0x0,%fpcr		# clear FPCR
   14472 
   14473 	fmovm.x		&0x40,FP_SCR0(%a6)	# save result to stack
   14474 	mov.l		%d2,-(%sp)		# save d2
   14475 	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
   14476 	mov.l		%d1,%d2			# make a copy
   14477 	andi.l		&0x7fff,%d1		# strip sign
   14478 	andi.w		&0x8000,%d2		# keep old sign
   14479 	sub.l		%d0,%d1			# add scale factor
   14480 	addi.l		&0x6000,%d1		# add bias
   14481 	andi.w		&0x7fff,%d1		# clear top bit
   14482 	or.w		%d2,%d1			# concat old sign, new exp
   14483 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
   14484 	mov.l		(%sp)+,%d2		# restore d2
   14485 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
   14486 	bra.b		fsgldiv_unfl_dis
   14487 
   14488 #
   14489 # the divide operation MAY underflow:
   14490 #
   14491 fsgldiv_may_unfl:
   14492 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
   14493 
   14494 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   14495 	fmov.l		&0x0,%fpsr		# clear FPSR
   14496 
   14497 	fsgldiv.x	FP_SCR0(%a6),%fp0	# execute sgl divide
   14498 
   14499 	fmov.l		%fpsr,%d1		# save status
   14500 	fmov.l		&0x0,%fpcr		# clear FPCR
   14501 
   14502 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
   14503 
   14504 	fabs.x		%fp0,%fp1		# make a copy of result
   14505 	fcmp.b		%fp1,&0x1		# is |result| > 1.b?
   14506 	fbgt.w		fsgldiv_normal_exit	# no; no underflow occurred
   14507 	fblt.w		fsgldiv_unfl		# yes; underflow occurred
   14508 
   14509 #
   14510 # we still don't know if underflow occurred. result is ~ equal to 1. but,
   14511 # we don't know if the result was an underflow that rounded up to a 1
   14512 # or a normalized number that rounded down to a 1. so, redo the entire
   14513 # operation using RZ as the rounding mode to see what the pre-rounded
   14514 # result is. this case should be relatively rare.
   14515 #
   14516 	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op into %fp1
   14517 
   14518 	clr.l		%d1			# clear scratch register
   14519 	ori.b		&rz_mode*0x10,%d1	# force RZ rnd mode
   14520 
   14521 	fmov.l		%d1,%fpcr		# set FPCR
   14522 	fmov.l		&0x0,%fpsr		# clear FPSR
   14523 
   14524 	fsgldiv.x	FP_SCR0(%a6),%fp1	# execute sgl divide
   14525 
   14526 	fmov.l		&0x0,%fpcr		# clear FPCR
   14527 	fabs.x		%fp1			# make absolute value
   14528 	fcmp.b		%fp1,&0x1		# is |result| < 1.b?
   14529 	fbge.w		fsgldiv_normal_exit	# no; no underflow occurred
   14530 	bra.w		fsgldiv_unfl		# yes; underflow occurred
   14531 
   14532 ############################################################################
   14533 
   14534 #
   14535 # Divide: inputs are not both normalized; what are they?
   14536 #
   14537 fsgldiv_not_norm:
   14538 	mov.w		(tbl_fsgldiv_op.b,%pc,%d1.w*2),%d1
   14539 	jmp		(tbl_fsgldiv_op.b,%pc,%d1.w*1)
   14540 
   14541 	swbeg		&48
   14542 tbl_fsgldiv_op:
   14543 	short		fsgldiv_norm		- tbl_fsgldiv_op # NORM / NORM
   14544 	short		fsgldiv_inf_load	- tbl_fsgldiv_op # NORM / ZERO
   14545 	short		fsgldiv_zero_load	- tbl_fsgldiv_op # NORM / INF
   14546 	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # NORM / QNAN
   14547 	short		fsgldiv_norm		- tbl_fsgldiv_op # NORM / DENORM
   14548 	short		fsgldiv_res_snan	- tbl_fsgldiv_op # NORM / SNAN
   14549 	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
   14550 	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
   14551 
   14552 	short		fsgldiv_zero_load	- tbl_fsgldiv_op # ZERO / NORM
   14553 	short		fsgldiv_res_operr	- tbl_fsgldiv_op # ZERO / ZERO
   14554 	short		fsgldiv_zero_load	- tbl_fsgldiv_op # ZERO / INF
   14555 	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # ZERO / QNAN
   14556 	short		fsgldiv_zero_load	- tbl_fsgldiv_op # ZERO / DENORM
   14557 	short		fsgldiv_res_snan	- tbl_fsgldiv_op # ZERO / SNAN
   14558 	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
   14559 	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
   14560 
   14561 	short		fsgldiv_inf_dst		- tbl_fsgldiv_op # INF / NORM
   14562 	short		fsgldiv_inf_dst		- tbl_fsgldiv_op # INF / ZERO
   14563 	short		fsgldiv_res_operr	- tbl_fsgldiv_op # INF / INF
   14564 	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # INF / QNAN
   14565 	short		fsgldiv_inf_dst		- tbl_fsgldiv_op # INF / DENORM
   14566 	short		fsgldiv_res_snan	- tbl_fsgldiv_op # INF / SNAN
   14567 	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
   14568 	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
   14569 
   14570 	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # QNAN / NORM
   14571 	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # QNAN / ZERO
   14572 	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # QNAN / INF
   14573 	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # QNAN / QNAN
   14574 	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # QNAN / DENORM
   14575 	short		fsgldiv_res_snan	- tbl_fsgldiv_op # QNAN / SNAN
   14576 	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
   14577 	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
   14578 
   14579 	short		fsgldiv_norm		- tbl_fsgldiv_op # DENORM / NORM
   14580 	short		fsgldiv_inf_load	- tbl_fsgldiv_op # DENORM / ZERO
   14581 	short		fsgldiv_zero_load	- tbl_fsgldiv_op # DENORM / INF
   14582 	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # DENORM / QNAN
   14583 	short		fsgldiv_norm		- tbl_fsgldiv_op # DENORM / DENORM
   14584 	short		fsgldiv_res_snan	- tbl_fsgldiv_op # DENORM / SNAN
   14585 	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
   14586 	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
   14587 
   14588 	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / NORM
   14589 	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / ZERO
   14590 	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / INF
   14591 	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / QNAN
   14592 	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / DENORM
   14593 	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / SNAN
   14594 	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
   14595 	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
   14596 
   14597 fsgldiv_res_qnan:
   14598 	bra.l		res_qnan
   14599 fsgldiv_res_snan:
   14600 	bra.l		res_snan
   14601 fsgldiv_res_operr:
   14602 	bra.l		res_operr
   14603 fsgldiv_inf_load:
   14604 	bra.l		fdiv_inf_load
   14605 fsgldiv_zero_load:
   14606 	bra.l		fdiv_zero_load
   14607 fsgldiv_inf_dst:
   14608 	bra.l		fdiv_inf_dst
   14609 
   14610 #########################################################################
   14611 # XDEF ****************************************************************	#
   14612 #	fadd(): emulates the fadd instruction				#
   14613 #	fsadd(): emulates the fadd instruction				#
   14614 #	fdadd(): emulates the fdadd instruction				#
   14615 #									#
   14616 # XREF ****************************************************************	#
   14617 # 	addsub_scaler2() - scale the operands so they won't take exc	#
   14618 #	ovf_res() - return default overflow result			#
   14619 #	unf_res() - return default underflow result			#
   14620 #	res_qnan() - set QNAN result					#
   14621 # 	res_snan() - set SNAN result					#
   14622 #	res_operr() - set OPERR result					#
   14623 #	scale_to_zero_src() - set src operand exponent equal to zero	#
   14624 #	scale_to_zero_dst() - set dst operand exponent equal to zero	#
   14625 #									#
   14626 # INPUT ***************************************************************	#
   14627 #	a0 = pointer to extended precision source operand		#
   14628 # 	a1 = pointer to extended precision destination operand		#
   14629 #									#
   14630 # OUTPUT **************************************************************	#
   14631 #	fp0 = result							#
   14632 #	fp1 = EXOP (if exception occurred)				#
   14633 #									#
   14634 # ALGORITHM ***********************************************************	#
   14635 # 	Handle NANs, infinities, and zeroes as special cases. Divide	#
   14636 # norms into extended, single, and double precision.			#
   14637 #	Do addition after scaling exponents such that exception won't	#
   14638 # occur. Then, check result exponent to see if exception would have	#
   14639 # occurred. If so, return default result and maybe EXOP. Else, insert	#
   14640 # the correct result exponent and return. Set FPSR bits as appropriate.	#
   14641 #									#
   14642 #########################################################################
   14643 
   14644 	global		fsadd
   14645 fsadd:
   14646 	andi.b		&0x30,%d0		# clear rnd prec
   14647 	ori.b		&s_mode*0x10,%d0	# insert sgl prec
   14648 	bra.b		fadd
   14649 
   14650 	global		fdadd
   14651 fdadd:
   14652 	andi.b		&0x30,%d0		# clear rnd prec
   14653 	ori.b		&d_mode*0x10,%d0	# insert dbl prec
   14654 
   14655 	global		fadd
   14656 fadd:
   14657 	mov.l		%d0,L_SCR3(%a6)		# store rnd info
   14658 
   14659 	clr.w		%d1
   14660 	mov.b		DTAG(%a6),%d1
   14661 	lsl.b		&0x3,%d1
   14662 	or.b		STAG(%a6),%d1		# combine src tags
   14663 
   14664 	bne.w		fadd_not_norm		# optimize on non-norm input
   14665 
   14666 #
   14667 # ADD: norms and denorms
   14668 #
   14669 fadd_norm:
   14670 	bsr.l		addsub_scaler2		# scale exponents
   14671 
   14672 fadd_zero_entry:
   14673 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
   14674 
   14675 	fmov.l		&0x0,%fpsr		# clear FPSR
   14676 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   14677 
   14678 	fadd.x		FP_SCR0(%a6),%fp0	# execute add
   14679 
   14680 	fmov.l		&0x0,%fpcr		# clear FPCR
   14681 	fmov.l		%fpsr,%d1		# fetch INEX2,N,Z
   14682 
   14683 	or.l		%d1,USER_FPSR(%a6)	# save exc and ccode bits
   14684 
   14685 	fbeq.w		fadd_zero_exit		# if result is zero, end now
   14686 
   14687 	mov.l		%d2,-(%sp)		# save d2
   14688 
   14689 	fmovm.x		&0x01,-(%sp)		# save result to stack
   14690 
   14691 	mov.w		2+L_SCR3(%a6),%d1
   14692 	lsr.b		&0x6,%d1
   14693 
   14694 	mov.w		(%sp),%d2		# fetch new sign, exp
   14695 	andi.l		&0x7fff,%d2		# strip sign
   14696 	sub.l		%d0,%d2			# add scale factor
   14697 
   14698 	cmp.l		%d2,(tbl_fadd_ovfl.b,%pc,%d1.w*4) # is it an overflow?
   14699 	bge.b		fadd_ovfl		# yes
   14700 
   14701 	cmp.l		%d2,(tbl_fadd_unfl.b,%pc,%d1.w*4) # is it an underflow?
   14702 	blt.w		fadd_unfl		# yes
   14703 	beq.w		fadd_may_unfl		# maybe; go find out
   14704 
   14705 fadd_normal:
   14706 	mov.w		(%sp),%d1
   14707 	andi.w		&0x8000,%d1		# keep sign
   14708 	or.w		%d2,%d1			# concat sign,new exp
   14709 	mov.w		%d1,(%sp)		# insert new exponent
   14710 
   14711 	fmovm.x		(%sp)+,&0x80		# return result in fp0
   14712 
   14713 	mov.l		(%sp)+,%d2		# restore d2
   14714 	rts
   14715 
   14716 fadd_zero_exit:
   14717 #	fmov.s		&0x00000000,%fp0	# return zero in fp0
   14718 	rts
   14719 
   14720 tbl_fadd_ovfl:
   14721 	long		0x7fff			# ext ovfl
   14722 	long		0x407f			# sgl ovfl
   14723 	long		0x43ff			# dbl ovfl
   14724 
   14725 tbl_fadd_unfl:
   14726 	long	        0x0000			# ext unfl
   14727 	long		0x3f81			# sgl unfl
   14728 	long		0x3c01			# dbl unfl
   14729 
   14730 fadd_ovfl:
   14731 	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
   14732 
   14733 	mov.b		FPCR_ENABLE(%a6),%d1
   14734 	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
   14735 	bne.b		fadd_ovfl_ena		# yes
   14736 
   14737 	add.l		&0xc,%sp
   14738 fadd_ovfl_dis:
   14739 	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
   14740 	sne		%d1			# set sign param accordingly
   14741 	mov.l		L_SCR3(%a6),%d0		# pass prec:rnd
   14742 	bsr.l		ovf_res			# calculate default result
   14743 	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
   14744 	fmovm.x		(%a0),&0x80		# return default result in fp0
   14745 	mov.l		(%sp)+,%d2		# restore d2
   14746 	rts
   14747 
   14748 fadd_ovfl_ena:
   14749 	mov.b		L_SCR3(%a6),%d1
   14750 	andi.b		&0xc0,%d1		# is precision extended?
   14751 	bne.b		fadd_ovfl_ena_sd	# no; prec = sgl or dbl
   14752 
   14753 fadd_ovfl_ena_cont:
   14754 	mov.w		(%sp),%d1
   14755 	andi.w		&0x8000,%d1		# keep sign
   14756 	subi.l		&0x6000,%d2		# add extra bias
   14757 	andi.w		&0x7fff,%d2
   14758 	or.w		%d2,%d1			# concat sign,new exp
   14759 	mov.w		%d1,(%sp)		# insert new exponent
   14760 
   14761 	fmovm.x		(%sp)+,&0x40		# return EXOP in fp1
   14762 	bra.b		fadd_ovfl_dis
   14763 
   14764 fadd_ovfl_ena_sd:
   14765 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
   14766 
   14767 	mov.l		L_SCR3(%a6),%d1
   14768 	andi.b		&0x30,%d1		# keep rnd mode
   14769 	fmov.l		%d1,%fpcr		# set FPCR
   14770 
   14771 	fadd.x		FP_SCR0(%a6),%fp0	# execute add
   14772 
   14773 	fmov.l		&0x0,%fpcr		# clear FPCR
   14774 
   14775 	add.l		&0xc,%sp
   14776 	fmovm.x		&0x01,-(%sp)
   14777 	bra.b		fadd_ovfl_ena_cont
   14778 
   14779 fadd_unfl:
   14780 	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
   14781 
   14782 	add.l		&0xc,%sp
   14783 
   14784 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
   14785 
   14786 	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
   14787 	fmov.l		&0x0,%fpsr		# clear FPSR
   14788 
   14789 	fadd.x		FP_SCR0(%a6),%fp0	# execute add
   14790 
   14791 	fmov.l		&0x0,%fpcr		# clear FPCR
   14792 	fmov.l		%fpsr,%d1		# save status
   14793 
   14794 	or.l		%d1,USER_FPSR(%a6)	# save INEX,N
   14795 
   14796 	mov.b		FPCR_ENABLE(%a6),%d1
   14797 	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
   14798 	bne.b		fadd_unfl_ena		# yes
   14799 
   14800 fadd_unfl_dis:
   14801 	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
   14802 
   14803 	lea		FP_SCR0(%a6),%a0	# pass: result addr
   14804 	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
   14805 	bsr.l		unf_res			# calculate default result
   14806 	or.b		%d0,FPSR_CC(%a6)	# 'Z' bit may have been set
   14807 	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
   14808 	mov.l		(%sp)+,%d2		# restore d2
   14809 	rts
   14810 
   14811 fadd_unfl_ena:
   14812 	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op
   14813 
   14814 	mov.l		L_SCR3(%a6),%d1
   14815 	andi.b		&0xc0,%d1		# is precision extended?
   14816 	bne.b		fadd_unfl_ena_sd	# no; sgl or dbl
   14817 
   14818 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   14819 
   14820 fadd_unfl_ena_cont:
   14821 	fmov.l		&0x0,%fpsr		# clear FPSR
   14822 
   14823 	fadd.x		FP_SCR0(%a6),%fp1	# execute multiply
   14824 
   14825 	fmov.l		&0x0,%fpcr		# clear FPCR
   14826 
   14827 	fmovm.x		&0x40,FP_SCR0(%a6)	# save result to stack
   14828 	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
   14829 	mov.l		%d1,%d2			# make a copy
   14830 	andi.l		&0x7fff,%d1		# strip sign
   14831 	andi.w		&0x8000,%d2		# keep old sign
   14832 	sub.l		%d0,%d1			# add scale factor
   14833 	addi.l		&0x6000,%d1		# add new bias
   14834 	andi.w		&0x7fff,%d1		# clear top bit
   14835 	or.w		%d2,%d1			# concat sign,new exp
   14836 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
   14837 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
   14838 	bra.w		fadd_unfl_dis
   14839 
   14840 fadd_unfl_ena_sd:
   14841 	mov.l		L_SCR3(%a6),%d1
   14842 	andi.b		&0x30,%d1		# use only rnd mode
   14843 	fmov.l		%d1,%fpcr		# set FPCR
   14844 
   14845 	bra.b		fadd_unfl_ena_cont
   14846 
   14847 #
   14848 # result is equal to the smallest normalized number in the selected precision
   14849 # if the precision is extended, this result could not have come from an
   14850 # underflow that rounded up.
   14851 #
   14852 fadd_may_unfl:
   14853 	mov.l		L_SCR3(%a6),%d1
   14854 	andi.b		&0xc0,%d1
   14855 	beq.w		fadd_normal		# yes; no underflow occurred
   14856 
   14857 	mov.l		0x4(%sp),%d1		# extract hi(man)
   14858 	cmpi.l		%d1,&0x80000000		# is hi(man) = 0x80000000?
   14859 	bne.w		fadd_normal		# no; no underflow occurred
   14860 
   14861 	tst.l		0x8(%sp)		# is lo(man) = 0x0?
   14862 	bne.w		fadd_normal		# no; no underflow occurred
   14863 
   14864 	btst		&inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
   14865 	beq.w		fadd_normal		# no; no underflow occurred
   14866 
   14867 #
   14868 # ok, so now the result has a exponent equal to the smallest normalized
   14869 # exponent for the selected precision. also, the mantissa is equal to
   14870 # 0x8000000000000000 and this mantissa is the result of rounding non-zero
   14871 # g,r,s.
   14872 # now, we must determine whether the pre-rounded result was an underflow
   14873 # rounded "up" or a normalized number rounded "down".
   14874 # so, we do this be re-executing the add using RZ as the rounding mode and
   14875 # seeing if the new result is smaller or equal to the current result.
   14876 #
   14877 	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op into fp1
   14878 
   14879 	mov.l		L_SCR3(%a6),%d1
   14880 	andi.b		&0xc0,%d1		# keep rnd prec
   14881 	ori.b		&rz_mode*0x10,%d1	# insert rnd mode
   14882 	fmov.l		%d1,%fpcr		# set FPCR
   14883 	fmov.l		&0x0,%fpsr		# clear FPSR
   14884 
   14885 	fadd.x		FP_SCR0(%a6),%fp1	# execute add
   14886 
   14887 	fmov.l		&0x0,%fpcr		# clear FPCR
   14888 
   14889 	fabs.x		%fp0			# compare absolute values
   14890 	fabs.x		%fp1
   14891 	fcmp.x		%fp0,%fp1		# is first result > second?
   14892 
   14893 	fbgt.w		fadd_unfl		# yes; it's an underflow
   14894 	bra.w		fadd_normal		# no; it's not an underflow
   14895 
   14896 ##########################################################################
   14897 
   14898 #
   14899 # Add: inputs are not both normalized; what are they?
   14900 #
   14901 fadd_not_norm:
   14902 	mov.w		(tbl_fadd_op.b,%pc,%d1.w*2),%d1
   14903 	jmp		(tbl_fadd_op.b,%pc,%d1.w*1)
   14904 
   14905 	swbeg		&48
   14906 tbl_fadd_op:
   14907 	short		fadd_norm	- tbl_fadd_op # NORM + NORM
   14908 	short		fadd_zero_src	- tbl_fadd_op # NORM + ZERO
   14909 	short		fadd_inf_src	- tbl_fadd_op # NORM + INF
   14910 	short		fadd_res_qnan	- tbl_fadd_op # NORM + QNAN
   14911 	short		fadd_norm	- tbl_fadd_op # NORM + DENORM
   14912 	short		fadd_res_snan	- tbl_fadd_op # NORM + SNAN
   14913 	short		tbl_fadd_op	- tbl_fadd_op #
   14914 	short		tbl_fadd_op	- tbl_fadd_op #
   14915 
   14916 	short		fadd_zero_dst	- tbl_fadd_op # ZERO + NORM
   14917 	short		fadd_zero_2	- tbl_fadd_op # ZERO + ZERO
   14918 	short		fadd_inf_src	- tbl_fadd_op # ZERO + INF
   14919 	short		fadd_res_qnan	- tbl_fadd_op # NORM + QNAN
   14920 	short		fadd_zero_dst	- tbl_fadd_op # ZERO + DENORM
   14921 	short		fadd_res_snan	- tbl_fadd_op # NORM + SNAN
   14922 	short		tbl_fadd_op	- tbl_fadd_op #
   14923 	short		tbl_fadd_op	- tbl_fadd_op #
   14924 
   14925 	short		fadd_inf_dst	- tbl_fadd_op # INF + NORM
   14926 	short		fadd_inf_dst	- tbl_fadd_op # INF + ZERO
   14927 	short		fadd_inf_2	- tbl_fadd_op # INF + INF
   14928 	short		fadd_res_qnan	- tbl_fadd_op # NORM + QNAN
   14929 	short		fadd_inf_dst	- tbl_fadd_op # INF + DENORM
   14930 	short		fadd_res_snan	- tbl_fadd_op # NORM + SNAN
   14931 	short		tbl_fadd_op	- tbl_fadd_op #
   14932 	short		tbl_fadd_op	- tbl_fadd_op #
   14933 
   14934 	short		fadd_res_qnan	- tbl_fadd_op # QNAN + NORM
   14935 	short		fadd_res_qnan	- tbl_fadd_op # QNAN + ZERO
   14936 	short		fadd_res_qnan	- tbl_fadd_op # QNAN + INF
   14937 	short		fadd_res_qnan	- tbl_fadd_op # QNAN + QNAN
   14938 	short		fadd_res_qnan	- tbl_fadd_op # QNAN + DENORM
   14939 	short		fadd_res_snan	- tbl_fadd_op # QNAN + SNAN
   14940 	short		tbl_fadd_op	- tbl_fadd_op #
   14941 	short		tbl_fadd_op	- tbl_fadd_op #
   14942 
   14943 	short		fadd_norm	- tbl_fadd_op # DENORM + NORM
   14944 	short		fadd_zero_src	- tbl_fadd_op # DENORM + ZERO
   14945 	short		fadd_inf_src	- tbl_fadd_op # DENORM + INF
   14946 	short		fadd_res_qnan	- tbl_fadd_op # NORM + QNAN
   14947 	short		fadd_norm	- tbl_fadd_op # DENORM + DENORM
   14948 	short		fadd_res_snan	- tbl_fadd_op # NORM + SNAN
   14949 	short		tbl_fadd_op	- tbl_fadd_op #
   14950 	short		tbl_fadd_op	- tbl_fadd_op #
   14951 
   14952 	short		fadd_res_snan	- tbl_fadd_op # SNAN + NORM
   14953 	short		fadd_res_snan	- tbl_fadd_op # SNAN + ZERO
   14954 	short		fadd_res_snan	- tbl_fadd_op # SNAN + INF
   14955 	short		fadd_res_snan	- tbl_fadd_op # SNAN + QNAN
   14956 	short		fadd_res_snan	- tbl_fadd_op # SNAN + DENORM
   14957 	short		fadd_res_snan	- tbl_fadd_op # SNAN + SNAN
   14958 	short		tbl_fadd_op	- tbl_fadd_op #
   14959 	short		tbl_fadd_op	- tbl_fadd_op #
   14960 
   14961 fadd_res_qnan:
   14962 	bra.l		res_qnan
   14963 fadd_res_snan:
   14964 	bra.l		res_snan
   14965 
   14966 #
   14967 # both operands are ZEROes
   14968 #
   14969 fadd_zero_2:
   14970 	mov.b		SRC_EX(%a0),%d0		# are the signs opposite
   14971 	mov.b		DST_EX(%a1),%d1
   14972 	eor.b		%d0,%d1
   14973 	bmi.w		fadd_zero_2_chk_rm	# weed out (-ZERO)+(+ZERO)
   14974 
   14975 # the signs are the same. so determine whether they are positive or negative
   14976 # and return the appropriately signed zero.
   14977 	tst.b		%d0			# are ZEROes positive or negative?
   14978 	bmi.b		fadd_zero_rm		# negative
   14979 	fmov.s		&0x00000000,%fp0	# return +ZERO
   14980 	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
   14981 	rts
   14982 
   14983 #
   14984 # the ZEROes have opposite signs:
   14985 # - therefore, we return +ZERO if the rounding modes are RN,RZ, or RP.
   14986 # - -ZERO is returned in the case of RM.
   14987 #
   14988 fadd_zero_2_chk_rm:
   14989 	mov.b		3+L_SCR3(%a6),%d1
   14990 	andi.b		&0x30,%d1		# extract rnd mode
   14991 	cmpi.b		%d1,&rm_mode*0x10	# is rnd mode == RM?
   14992 	beq.b		fadd_zero_rm		# yes
   14993 	fmov.s		&0x00000000,%fp0	# return +ZERO
   14994 	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
   14995 	rts
   14996 
   14997 fadd_zero_rm:
   14998 	fmov.s		&0x80000000,%fp0	# return -ZERO
   14999 	mov.b		&neg_bmask+z_bmask,FPSR_CC(%a6) # set NEG/Z
   15000 	rts
   15001 
   15002 #
   15003 # one operand is a ZERO and the other is a DENORM or NORM. scale
   15004 # the DENORM or NORM and jump to the regular fadd routine.
   15005 #
   15006 fadd_zero_dst:
   15007 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
   15008 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
   15009 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
   15010 	bsr.l		scale_to_zero_src	# scale the operand
   15011 	clr.w		FP_SCR1_EX(%a6)
   15012 	clr.l		FP_SCR1_HI(%a6)
   15013 	clr.l		FP_SCR1_LO(%a6)
   15014 	bra.w		fadd_zero_entry		# go execute fadd
   15015 
   15016 fadd_zero_src:
   15017 	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
   15018 	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
   15019 	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
   15020 	bsr.l		scale_to_zero_dst	# scale the operand
   15021 	clr.w		FP_SCR0_EX(%a6)
   15022 	clr.l		FP_SCR0_HI(%a6)
   15023 	clr.l		FP_SCR0_LO(%a6)
   15024 	bra.w		fadd_zero_entry		# go execute fadd
   15025 
   15026 #
   15027 # both operands are INFs. an OPERR will result if the INFs have
   15028 # different signs. else, an INF of the same sign is returned
   15029 #
   15030 fadd_inf_2:
   15031 	mov.b		SRC_EX(%a0),%d0		# exclusive or the signs
   15032 	mov.b		DST_EX(%a1),%d1
   15033 	eor.b		%d1,%d0
   15034 	bmi.l		res_operr		# weed out (-INF)+(+INF)
   15035 
   15036 # ok, so it's not an OPERR. but, we do have to remember to return the
   15037 # src INF since that's where the 881/882 gets the j-bit from...
   15038 
   15039 #
   15040 # operands are INF and one of {ZERO, INF, DENORM, NORM}
   15041 #
   15042 fadd_inf_src:
   15043 	fmovm.x		SRC(%a0),&0x80		# return src INF
   15044 	tst.b		SRC_EX(%a0)		# is INF positive?
   15045 	bpl.b		fadd_inf_done		# yes; we're done
   15046 	mov.b		&neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
   15047 	rts
   15048 
   15049 #
   15050 # operands are INF and one of {ZERO, INF, DENORM, NORM}
   15051 #
   15052 fadd_inf_dst:
   15053 	fmovm.x		DST(%a1),&0x80		# return dst INF
   15054 	tst.b		DST_EX(%a1)		# is INF positive?
   15055 	bpl.b		fadd_inf_done		# yes; we're done
   15056 	mov.b		&neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
   15057 	rts
   15058 
   15059 fadd_inf_done:
   15060 	mov.b		&inf_bmask,FPSR_CC(%a6) # set INF
   15061 	rts
   15062 
   15063 #########################################################################
   15064 # XDEF ****************************************************************	#
   15065 #	fsub(): emulates the fsub instruction				#
   15066 #	fssub(): emulates the fssub instruction				#
   15067 #	fdsub(): emulates the fdsub instruction				#
   15068 #									#
   15069 # XREF ****************************************************************	#
   15070 # 	addsub_scaler2() - scale the operands so they won't take exc	#
   15071 #	ovf_res() - return default overflow result			#
   15072 #	unf_res() - return default underflow result			#
   15073 #	res_qnan() - set QNAN result					#
   15074 # 	res_snan() - set SNAN result					#
   15075 #	res_operr() - set OPERR result					#
   15076 #	scale_to_zero_src() - set src operand exponent equal to zero	#
   15077 #	scale_to_zero_dst() - set dst operand exponent equal to zero	#
   15078 #									#
   15079 # INPUT ***************************************************************	#
   15080 #	a0 = pointer to extended precision source operand		#
   15081 # 	a1 = pointer to extended precision destination operand		#
   15082 #									#
   15083 # OUTPUT **************************************************************	#
   15084 #	fp0 = result							#
   15085 #	fp1 = EXOP (if exception occurred)				#
   15086 #									#
   15087 # ALGORITHM ***********************************************************	#
   15088 # 	Handle NANs, infinities, and zeroes as special cases. Divide	#
   15089 # norms into extended, single, and double precision.			#
   15090 #	Do subtraction after scaling exponents such that exception won't#
   15091 # occur. Then, check result exponent to see if exception would have	#
   15092 # occurred. If so, return default result and maybe EXOP. Else, insert	#
   15093 # the correct result exponent and return. Set FPSR bits as appropriate.	#
   15094 #									#
   15095 #########################################################################
   15096 
   15097 	global		fssub
   15098 fssub:
   15099 	andi.b		&0x30,%d0		# clear rnd prec
   15100 	ori.b		&s_mode*0x10,%d0	# insert sgl prec
   15101 	bra.b		fsub
   15102 
   15103 	global		fdsub
   15104 fdsub:
   15105 	andi.b		&0x30,%d0		# clear rnd prec
   15106 	ori.b		&d_mode*0x10,%d0	# insert dbl prec
   15107 
   15108 	global		fsub
   15109 fsub:
   15110 	mov.l		%d0,L_SCR3(%a6)		# store rnd info
   15111 
   15112 	clr.w		%d1
   15113 	mov.b		DTAG(%a6),%d1
   15114 	lsl.b		&0x3,%d1
   15115 	or.b		STAG(%a6),%d1		# combine src tags
   15116 
   15117 	bne.w		fsub_not_norm		# optimize on non-norm input
   15118 
   15119 #
   15120 # SUB: norms and denorms
   15121 #
   15122 fsub_norm:
   15123 	bsr.l		addsub_scaler2		# scale exponents
   15124 
   15125 fsub_zero_entry:
   15126 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
   15127 
   15128 	fmov.l		&0x0,%fpsr		# clear FPSR
   15129 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   15130 
   15131 	fsub.x		FP_SCR0(%a6),%fp0	# execute subtract
   15132 
   15133 	fmov.l		&0x0,%fpcr		# clear FPCR
   15134 	fmov.l		%fpsr,%d1		# fetch INEX2, N, Z
   15135 
   15136 	or.l		%d1,USER_FPSR(%a6)	# save exc and ccode bits
   15137 
   15138 	fbeq.w		fsub_zero_exit		# if result zero, end now
   15139 
   15140 	mov.l		%d2,-(%sp)		# save d2
   15141 
   15142 	fmovm.x		&0x01,-(%sp)		# save result to stack
   15143 
   15144 	mov.w		2+L_SCR3(%a6),%d1
   15145 	lsr.b		&0x6,%d1
   15146 
   15147 	mov.w		(%sp),%d2		# fetch new exponent
   15148 	andi.l		&0x7fff,%d2		# strip sign
   15149 	sub.l		%d0,%d2			# add scale factor
   15150 
   15151 	cmp.l		%d2,(tbl_fsub_ovfl.b,%pc,%d1.w*4) # is it an overflow?
   15152 	bge.b		fsub_ovfl		# yes
   15153 
   15154 	cmp.l		%d2,(tbl_fsub_unfl.b,%pc,%d1.w*4) # is it an underflow?
   15155 	blt.w		fsub_unfl		# yes
   15156 	beq.w		fsub_may_unfl		# maybe; go find out
   15157 
   15158 fsub_normal:
   15159 	mov.w		(%sp),%d1
   15160 	andi.w		&0x8000,%d1		# keep sign
   15161 	or.w		%d2,%d1			# insert new exponent
   15162 	mov.w		%d1,(%sp)		# insert new exponent
   15163 
   15164 	fmovm.x		(%sp)+,&0x80		# return result in fp0
   15165 
   15166 	mov.l		(%sp)+,%d2		# restore d2
   15167 	rts
   15168 
   15169 fsub_zero_exit:
   15170 #	fmov.s		&0x00000000,%fp0	# return zero in fp0
   15171 	rts
   15172 
   15173 tbl_fsub_ovfl:
   15174 	long		0x7fff			# ext ovfl
   15175 	long		0x407f			# sgl ovfl
   15176 	long		0x43ff			# dbl ovfl
   15177 
   15178 tbl_fsub_unfl:
   15179 	long	        0x0000			# ext unfl
   15180 	long		0x3f81			# sgl unfl
   15181 	long		0x3c01			# dbl unfl
   15182 
   15183 fsub_ovfl:
   15184 	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
   15185 
   15186 	mov.b		FPCR_ENABLE(%a6),%d1
   15187 	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
   15188 	bne.b		fsub_ovfl_ena		# yes
   15189 
   15190 	add.l		&0xc,%sp
   15191 fsub_ovfl_dis:
   15192 	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
   15193 	sne		%d1			# set sign param accordingly
   15194 	mov.l		L_SCR3(%a6),%d0		# pass prec:rnd
   15195 	bsr.l		ovf_res			# calculate default result
   15196 	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
   15197 	fmovm.x		(%a0),&0x80		# return default result in fp0
   15198 	mov.l		(%sp)+,%d2		# restore d2
   15199 	rts
   15200 
   15201 fsub_ovfl_ena:
   15202 	mov.b		L_SCR3(%a6),%d1
   15203 	andi.b		&0xc0,%d1		# is precision extended?
   15204 	bne.b		fsub_ovfl_ena_sd	# no
   15205 
   15206 fsub_ovfl_ena_cont:
   15207 	mov.w		(%sp),%d1		# fetch {sgn,exp}
   15208 	andi.w		&0x8000,%d1		# keep sign
   15209 	subi.l		&0x6000,%d2		# subtract new bias
   15210 	andi.w		&0x7fff,%d2		# clear top bit
   15211 	or.w		%d2,%d1			# concat sign,exp
   15212 	mov.w		%d1,(%sp)		# insert new exponent
   15213 
   15214 	fmovm.x		(%sp)+,&0x40		# return EXOP in fp1
   15215 	bra.b		fsub_ovfl_dis
   15216 
   15217 fsub_ovfl_ena_sd:
   15218 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
   15219 
   15220 	mov.l		L_SCR3(%a6),%d1
   15221 	andi.b		&0x30,%d1		# clear rnd prec
   15222 	fmov.l		%d1,%fpcr		# set FPCR
   15223 
   15224 	fsub.x		FP_SCR0(%a6),%fp0	# execute subtract
   15225 
   15226 	fmov.l		&0x0,%fpcr		# clear FPCR
   15227 
   15228 	add.l		&0xc,%sp
   15229 	fmovm.x		&0x01,-(%sp)
   15230 	bra.b		fsub_ovfl_ena_cont
   15231 
   15232 fsub_unfl:
   15233 	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
   15234 
   15235 	add.l		&0xc,%sp
   15236 
   15237 	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
   15238 
   15239 	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
   15240 	fmov.l		&0x0,%fpsr		# clear FPSR
   15241 
   15242 	fsub.x		FP_SCR0(%a6),%fp0	# execute subtract
   15243 
   15244 	fmov.l		&0x0,%fpcr		# clear FPCR
   15245 	fmov.l		%fpsr,%d1		# save status
   15246 
   15247 	or.l		%d1,USER_FPSR(%a6)
   15248 
   15249 	mov.b		FPCR_ENABLE(%a6),%d1
   15250 	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
   15251 	bne.b		fsub_unfl_ena		# yes
   15252 
   15253 fsub_unfl_dis:
   15254 	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
   15255 
   15256 	lea		FP_SCR0(%a6),%a0	# pass: result addr
   15257 	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
   15258 	bsr.l		unf_res			# calculate default result
   15259 	or.b		%d0,FPSR_CC(%a6)	# 'Z' may have been set
   15260 	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
   15261 	mov.l		(%sp)+,%d2		# restore d2
   15262 	rts
   15263 
   15264 fsub_unfl_ena:
   15265 	fmovm.x		FP_SCR1(%a6),&0x40
   15266 
   15267 	mov.l		L_SCR3(%a6),%d1
   15268 	andi.b		&0xc0,%d1		# is precision extended?
   15269 	bne.b		fsub_unfl_ena_sd	# no
   15270 
   15271 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   15272 
   15273 fsub_unfl_ena_cont:
   15274 	fmov.l		&0x0,%fpsr		# clear FPSR
   15275 
   15276 	fsub.x		FP_SCR0(%a6),%fp1	# execute subtract
   15277 
   15278 	fmov.l		&0x0,%fpcr		# clear FPCR
   15279 
   15280 	fmovm.x		&0x40,FP_SCR0(%a6)	# store result to stack
   15281 	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
   15282 	mov.l		%d1,%d2			# make a copy
   15283 	andi.l		&0x7fff,%d1		# strip sign
   15284 	andi.w		&0x8000,%d2		# keep old sign
   15285 	sub.l		%d0,%d1			# add scale factor
   15286 	addi.l		&0x6000,%d1		# subtract new bias
   15287 	andi.w		&0x7fff,%d1		# clear top bit
   15288 	or.w		%d2,%d1			# concat sgn,exp
   15289 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
   15290 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
   15291 	bra.w		fsub_unfl_dis
   15292 
   15293 fsub_unfl_ena_sd:
   15294 	mov.l		L_SCR3(%a6),%d1
   15295 	andi.b		&0x30,%d1		# clear rnd prec
   15296 	fmov.l		%d1,%fpcr		# set FPCR
   15297 
   15298 	bra.b		fsub_unfl_ena_cont
   15299 
   15300 #
   15301 # result is equal to the smallest normalized number in the selected precision
   15302 # if the precision is extended, this result could not have come from an
   15303 # underflow that rounded up.
   15304 #
   15305 fsub_may_unfl:
   15306 	mov.l		L_SCR3(%a6),%d1
   15307 	andi.b		&0xc0,%d1		# fetch rnd prec
   15308 	beq.w		fsub_normal		# yes; no underflow occurred
   15309 
   15310 	mov.l		0x4(%sp),%d1
   15311 	cmpi.l		%d1,&0x80000000		# is hi(man) = 0x80000000?
   15312 	bne.w		fsub_normal		# no; no underflow occurred
   15313 
   15314 	tst.l		0x8(%sp)		# is lo(man) = 0x0?
   15315 	bne.w		fsub_normal		# no; no underflow occurred
   15316 
   15317 	btst		&inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
   15318 	beq.w		fsub_normal		# no; no underflow occurred
   15319 
   15320 #
   15321 # ok, so now the result has a exponent equal to the smallest normalized
   15322 # exponent for the selected precision. also, the mantissa is equal to
   15323 # 0x8000000000000000 and this mantissa is the result of rounding non-zero
   15324 # g,r,s.
   15325 # now, we must determine whether the pre-rounded result was an underflow
   15326 # rounded "up" or a normalized number rounded "down".
   15327 # so, we do this be re-executing the add using RZ as the rounding mode and
   15328 # seeing if the new result is smaller or equal to the current result.
   15329 #
   15330 	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op into fp1
   15331 
   15332 	mov.l		L_SCR3(%a6),%d1
   15333 	andi.b		&0xc0,%d1		# keep rnd prec
   15334 	ori.b		&rz_mode*0x10,%d1	# insert rnd mode
   15335 	fmov.l		%d1,%fpcr		# set FPCR
   15336 	fmov.l		&0x0,%fpsr		# clear FPSR
   15337 
   15338 	fsub.x		FP_SCR0(%a6),%fp1	# execute subtract
   15339 
   15340 	fmov.l		&0x0,%fpcr		# clear FPCR
   15341 
   15342 	fabs.x		%fp0			# compare absolute values
   15343 	fabs.x		%fp1
   15344 	fcmp.x		%fp0,%fp1		# is first result > second?
   15345 
   15346 	fbgt.w		fsub_unfl		# yes; it's an underflow
   15347 	bra.w		fsub_normal		# no; it's not an underflow
   15348 
   15349 ##########################################################################
   15350 
   15351 #
   15352 # Sub: inputs are not both normalized; what are they?
   15353 #
   15354 fsub_not_norm:
   15355 	mov.w		(tbl_fsub_op.b,%pc,%d1.w*2),%d1
   15356 	jmp		(tbl_fsub_op.b,%pc,%d1.w*1)
   15357 
   15358 	swbeg		&48
   15359 tbl_fsub_op:
   15360 	short		fsub_norm	- tbl_fsub_op # NORM - NORM
   15361 	short		fsub_zero_src	- tbl_fsub_op # NORM - ZERO
   15362 	short		fsub_inf_src	- tbl_fsub_op # NORM - INF
   15363 	short		fsub_res_qnan	- tbl_fsub_op # NORM - QNAN
   15364 	short		fsub_norm	- tbl_fsub_op # NORM - DENORM
   15365 	short		fsub_res_snan	- tbl_fsub_op # NORM - SNAN
   15366 	short		tbl_fsub_op	- tbl_fsub_op #
   15367 	short		tbl_fsub_op	- tbl_fsub_op #
   15368 
   15369 	short		fsub_zero_dst	- tbl_fsub_op # ZERO - NORM
   15370 	short		fsub_zero_2	- tbl_fsub_op # ZERO - ZERO
   15371 	short		fsub_inf_src	- tbl_fsub_op # ZERO - INF
   15372 	short		fsub_res_qnan	- tbl_fsub_op # NORM - QNAN
   15373 	short		fsub_zero_dst	- tbl_fsub_op # ZERO - DENORM
   15374 	short		fsub_res_snan	- tbl_fsub_op # NORM - SNAN
   15375 	short		tbl_fsub_op	- tbl_fsub_op #
   15376 	short		tbl_fsub_op	- tbl_fsub_op #
   15377 
   15378 	short		fsub_inf_dst	- tbl_fsub_op # INF - NORM
   15379 	short		fsub_inf_dst	- tbl_fsub_op # INF - ZERO
   15380 	short		fsub_inf_2	- tbl_fsub_op # INF - INF
   15381 	short		fsub_res_qnan	- tbl_fsub_op # NORM - QNAN
   15382 	short		fsub_inf_dst	- tbl_fsub_op # INF - DENORM
   15383 	short		fsub_res_snan	- tbl_fsub_op # NORM - SNAN
   15384 	short		tbl_fsub_op	- tbl_fsub_op #
   15385 	short		tbl_fsub_op	- tbl_fsub_op #
   15386 
   15387 	short		fsub_res_qnan	- tbl_fsub_op # QNAN - NORM
   15388 	short		fsub_res_qnan	- tbl_fsub_op # QNAN - ZERO
   15389 	short		fsub_res_qnan	- tbl_fsub_op # QNAN - INF
   15390 	short		fsub_res_qnan	- tbl_fsub_op # QNAN - QNAN
   15391 	short		fsub_res_qnan	- tbl_fsub_op # QNAN - DENORM
   15392 	short		fsub_res_snan	- tbl_fsub_op # QNAN - SNAN
   15393 	short		tbl_fsub_op	- tbl_fsub_op #
   15394 	short		tbl_fsub_op	- tbl_fsub_op #
   15395 
   15396 	short		fsub_norm	- tbl_fsub_op # DENORM - NORM
   15397 	short		fsub_zero_src	- tbl_fsub_op # DENORM - ZERO
   15398 	short		fsub_inf_src	- tbl_fsub_op # DENORM - INF
   15399 	short		fsub_res_qnan	- tbl_fsub_op # NORM - QNAN
   15400 	short		fsub_norm	- tbl_fsub_op # DENORM - DENORM
   15401 	short		fsub_res_snan	- tbl_fsub_op # NORM - SNAN
   15402 	short		tbl_fsub_op	- tbl_fsub_op #
   15403 	short		tbl_fsub_op	- tbl_fsub_op #
   15404 
   15405 	short		fsub_res_snan	- tbl_fsub_op # SNAN - NORM
   15406 	short		fsub_res_snan	- tbl_fsub_op # SNAN - ZERO
   15407 	short		fsub_res_snan	- tbl_fsub_op # SNAN - INF
   15408 	short		fsub_res_snan	- tbl_fsub_op # SNAN - QNAN
   15409 	short		fsub_res_snan	- tbl_fsub_op # SNAN - DENORM
   15410 	short		fsub_res_snan	- tbl_fsub_op # SNAN - SNAN
   15411 	short		tbl_fsub_op	- tbl_fsub_op #
   15412 	short		tbl_fsub_op	- tbl_fsub_op #
   15413 
   15414 fsub_res_qnan:
   15415 	bra.l		res_qnan
   15416 fsub_res_snan:
   15417 	bra.l		res_snan
   15418 
   15419 #
   15420 # both operands are ZEROes
   15421 #
   15422 fsub_zero_2:
   15423 	mov.b		SRC_EX(%a0),%d0
   15424 	mov.b		DST_EX(%a1),%d1
   15425 	eor.b		%d1,%d0
   15426 	bpl.b		fsub_zero_2_chk_rm
   15427 
   15428 # the signs are opposite, so, return a ZERO w/ the sign of the dst ZERO
   15429 	tst.b		%d0			# is dst negative?
   15430 	bmi.b		fsub_zero_2_rm		# yes
   15431 	fmov.s		&0x00000000,%fp0	# no; return +ZERO
   15432 	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
   15433 	rts
   15434 
   15435 #
   15436 # the ZEROes have the same signs:
   15437 # - therefore, we return +ZERO if the rounding mode is RN,RZ, or RP
   15438 # - -ZERO is returned in the case of RM.
   15439 #
   15440 fsub_zero_2_chk_rm:
   15441 	mov.b		3+L_SCR3(%a6),%d1
   15442 	andi.b		&0x30,%d1		# extract rnd mode
   15443 	cmpi.b		%d1,&rm_mode*0x10	# is rnd mode = RM?
   15444 	beq.b		fsub_zero_2_rm		# yes
   15445 	fmov.s		&0x00000000,%fp0	# no; return +ZERO
   15446 	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
   15447 	rts
   15448 
   15449 fsub_zero_2_rm:
   15450 	fmov.s		&0x80000000,%fp0	# return -ZERO
   15451 	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6)	# set Z/NEG
   15452 	rts
   15453 
   15454 #
   15455 # one operand is a ZERO and the other is a DENORM or a NORM.
   15456 # scale the DENORM or NORM and jump to the regular fsub routine.
   15457 #
   15458 fsub_zero_dst:
   15459 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
   15460 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
   15461 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
   15462 	bsr.l		scale_to_zero_src	# scale the operand
   15463 	clr.w		FP_SCR1_EX(%a6)
   15464 	clr.l		FP_SCR1_HI(%a6)
   15465 	clr.l		FP_SCR1_LO(%a6)
   15466 	bra.w		fsub_zero_entry		# go execute fsub
   15467 
   15468 fsub_zero_src:
   15469 	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
   15470 	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
   15471 	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
   15472 	bsr.l		scale_to_zero_dst	# scale the operand
   15473 	clr.w		FP_SCR0_EX(%a6)
   15474 	clr.l		FP_SCR0_HI(%a6)
   15475 	clr.l		FP_SCR0_LO(%a6)
   15476 	bra.w		fsub_zero_entry		# go execute fsub
   15477 
   15478 #
   15479 # both operands are INFs. an OPERR will result if the INFs have the
   15480 # same signs. else,
   15481 #
   15482 fsub_inf_2:
   15483 	mov.b		SRC_EX(%a0),%d0		# exclusive or the signs
   15484 	mov.b		DST_EX(%a1),%d1
   15485 	eor.b		%d1,%d0
   15486 	bpl.l		res_operr		# weed out (-INF)+(+INF)
   15487 
   15488 # ok, so it's not an OPERR. but we do have to remember to return
   15489 # the src INF since that's where the 881/882 gets the j-bit.
   15490 
   15491 fsub_inf_src:
   15492 	fmovm.x		SRC(%a0),&0x80		# return src INF
   15493 	fneg.x		%fp0			# invert sign
   15494 	fbge.w		fsub_inf_done		# sign is now positive
   15495 	mov.b		&neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
   15496 	rts
   15497 
   15498 fsub_inf_dst:
   15499 	fmovm.x		DST(%a1),&0x80		# return dst INF
   15500 	tst.b		DST_EX(%a1)		# is INF negative?
   15501 	bpl.b		fsub_inf_done		# no
   15502 	mov.b		&neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
   15503 	rts
   15504 
   15505 fsub_inf_done:
   15506 	mov.b		&inf_bmask,FPSR_CC(%a6)	# set INF
   15507 	rts
   15508 
   15509 #########################################################################
   15510 # XDEF ****************************************************************	#
   15511 # 	fsqrt(): emulates the fsqrt instruction				#
   15512 #	fssqrt(): emulates the fssqrt instruction			#
   15513 #	fdsqrt(): emulates the fdsqrt instruction			#
   15514 #									#
   15515 # XREF ****************************************************************	#
   15516 #	scale_sqrt() - scale the source operand				#
   15517 #	unf_res() - return default underflow result			#
   15518 #	ovf_res() - return default overflow result			#
   15519 # 	res_qnan_1op() - return QNAN result				#
   15520 # 	res_snan_1op() - return SNAN result				#
   15521 #									#
   15522 # INPUT ***************************************************************	#
   15523 #	a0 = pointer to extended precision source operand		#
   15524 #	d0  rnd prec,mode						#
   15525 #									#
   15526 # OUTPUT **************************************************************	#
   15527 #	fp0 = result							#
   15528 #	fp1 = EXOP (if exception occurred)				#
   15529 #									#
   15530 # ALGORITHM ***********************************************************	#
   15531 #	Handle NANs, infinities, and zeroes as special cases. Divide	#
   15532 # norms/denorms into ext/sgl/dbl precision.				#
   15533 #	For norms/denorms, scale the exponents such that a sqrt		#
   15534 # instruction won't cause an exception. Use the regular fsqrt to	#
   15535 # compute a result. Check if the regular operands would have taken	#
   15536 # an exception. If so, return the default overflow/underflow result	#
   15537 # and return the EXOP if exceptions are enabled. Else, scale the 	#
   15538 # result operand to the proper exponent.				#
   15539 #									#
   15540 #########################################################################
   15541 
   15542 	global		fssqrt
   15543 fssqrt:
   15544 	andi.b		&0x30,%d0		# clear rnd prec
   15545 	ori.b		&s_mode*0x10,%d0	# insert sgl precision
   15546 	bra.b		fsqrt
   15547 
   15548 	global		fdsqrt
   15549 fdsqrt:
   15550 	andi.b		&0x30,%d0		# clear rnd prec
   15551 	ori.b		&d_mode*0x10,%d0	# insert dbl precision
   15552 
   15553 	global		fsqrt
   15554 fsqrt:
   15555 	mov.l		%d0,L_SCR3(%a6)		# store rnd info
   15556 	clr.w		%d1
   15557 	mov.b		STAG(%a6),%d1
   15558 	bne.w		fsqrt_not_norm		# optimize on non-norm input
   15559 
   15560 #
   15561 # SQUARE ROOT: norms and denorms ONLY!
   15562 #
   15563 fsqrt_norm:
   15564 	tst.b		SRC_EX(%a0)		# is operand negative?
   15565 	bmi.l		res_operr		# yes
   15566 
   15567 	andi.b		&0xc0,%d0		# is precision extended?
   15568 	bne.b		fsqrt_not_ext		# no; go handle sgl or dbl
   15569 
   15570 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   15571 	fmov.l		&0x0,%fpsr		# clear FPSR
   15572 
   15573 	fsqrt.x		(%a0),%fp0		# execute square root
   15574 
   15575 	fmov.l		%fpsr,%d1
   15576 	or.l		%d1,USER_FPSR(%a6)	# set N,INEX
   15577 
   15578 	rts
   15579 
   15580 fsqrt_denorm:
   15581 	tst.b		SRC_EX(%a0)		# is operand negative?
   15582 	bmi.l		res_operr		# yes
   15583 
   15584 	andi.b		&0xc0,%d0		# is precision extended?
   15585 	bne.b		fsqrt_not_ext		# no; go handle sgl or dbl
   15586 
   15587 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
   15588 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
   15589 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
   15590 
   15591 	bsr.l		scale_sqrt		# calculate scale factor
   15592 
   15593 	bra.w		fsqrt_sd_normal
   15594 
   15595 #
   15596 # operand is either single or double
   15597 #
   15598 fsqrt_not_ext:
   15599 	cmpi.b		%d0,&s_mode*0x10	# separate sgl/dbl prec
   15600 	bne.w		fsqrt_dbl
   15601 
   15602 #
   15603 # operand is to be rounded to single precision
   15604 #
   15605 fsqrt_sgl:
   15606 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
   15607 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
   15608 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
   15609 
   15610 	bsr.l		scale_sqrt		# calculate scale factor
   15611 
   15612 	cmpi.l		%d0,&0x3fff-0x3f81	# will move in underflow?
   15613 	beq.w		fsqrt_sd_may_unfl
   15614 	bgt.w		fsqrt_sd_unfl		# yes; go handle underflow
   15615 	cmpi.l		%d0,&0x3fff-0x407f	# will move in overflow?
   15616 	beq.w		fsqrt_sd_may_ovfl	# maybe; go check
   15617 	blt.w		fsqrt_sd_ovfl		# yes; go handle overflow
   15618 
   15619 #
   15620 # operand will NOT overflow or underflow when moved in to the fp reg file
   15621 #
   15622 fsqrt_sd_normal:
   15623 	fmov.l		&0x0,%fpsr		# clear FPSR
   15624 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   15625 
   15626 	fsqrt.x		FP_SCR0(%a6),%fp0	# perform absolute
   15627 
   15628 	fmov.l		%fpsr,%d1		# save FPSR
   15629 	fmov.l		&0x0,%fpcr		# clear FPCR
   15630 
   15631 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
   15632 
   15633 fsqrt_sd_normal_exit:
   15634 	mov.l		%d2,-(%sp)		# save d2
   15635 	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
   15636 	mov.w		FP_SCR0_EX(%a6),%d1	# load sgn,exp
   15637 	mov.l		%d1,%d2			# make a copy
   15638 	andi.l		&0x7fff,%d1		# strip sign
   15639 	sub.l		%d0,%d1			# add scale factor
   15640 	andi.w		&0x8000,%d2		# keep old sign
   15641 	or.w		%d1,%d2			# concat old sign,new exp
   15642 	mov.w		%d2,FP_SCR0_EX(%a6)	# insert new exponent
   15643 	mov.l		(%sp)+,%d2		# restore d2
   15644 	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
   15645 	rts
   15646 
   15647 #
   15648 # operand is to be rounded to double precision
   15649 #
   15650 fsqrt_dbl:
   15651 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
   15652 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
   15653 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
   15654 
   15655 	bsr.l		scale_sqrt		# calculate scale factor
   15656 
   15657 	cmpi.l		%d0,&0x3fff-0x3c01	# will move in underflow?
   15658 	beq.w		fsqrt_sd_may_unfl
   15659 	bgt.b		fsqrt_sd_unfl		# yes; go handle underflow
   15660 	cmpi.l		%d0,&0x3fff-0x43ff	# will move in overflow?
   15661 	beq.w		fsqrt_sd_may_ovfl	# maybe; go check
   15662 	blt.w		fsqrt_sd_ovfl		# yes; go handle overflow
   15663 	bra.w		fsqrt_sd_normal		# no; ho handle normalized op
   15664 
   15665 # we're on the line here and the distinguising characteristic is whether
   15666 # the exponent is 3fff or 3ffe. if it's 3ffe, then it's a safe number
   15667 # elsewise fall through to underflow.
   15668 fsqrt_sd_may_unfl:
   15669 	btst		&0x0,1+FP_SCR0_EX(%a6)	# is exponent 0x3fff?
   15670 	bne.w		fsqrt_sd_normal		# yes, so no underflow
   15671 
   15672 #
   15673 # operand WILL underflow when moved in to the fp register file
   15674 #
   15675 fsqrt_sd_unfl:
   15676 	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
   15677 
   15678 	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
   15679 	fmov.l		&0x0,%fpsr		# clear FPSR
   15680 
   15681 	fsqrt.x 	FP_SCR0(%a6),%fp0	# execute square root
   15682 
   15683 	fmov.l		%fpsr,%d1		# save status
   15684 	fmov.l		&0x0,%fpcr		# clear FPCR
   15685 
   15686 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
   15687 
   15688 # if underflow or inexact is enabled, go calculate EXOP first.
   15689 	mov.b		FPCR_ENABLE(%a6),%d1
   15690 	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
   15691 	bne.b		fsqrt_sd_unfl_ena	# yes
   15692 
   15693 fsqrt_sd_unfl_dis:
   15694 	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
   15695 
   15696 	lea		FP_SCR0(%a6),%a0	# pass: result addr
   15697 	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
   15698 	bsr.l		unf_res			# calculate default result
   15699 	or.b		%d0,FPSR_CC(%a6)	# set possible 'Z' ccode
   15700 	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
   15701 	rts
   15702 
   15703 #
   15704 # operand will underflow AND underflow is enabled.
   15705 # therefore, we must return the result rounded to extended precision.
   15706 #
   15707 fsqrt_sd_unfl_ena:
   15708 	mov.l		FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
   15709 	mov.l		FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
   15710 	mov.w		FP_SCR0_EX(%a6),%d1	# load current exponent
   15711 
   15712 	mov.l		%d2,-(%sp)		# save d2
   15713 	mov.l		%d1,%d2			# make a copy
   15714 	andi.l		&0x7fff,%d1		# strip sign
   15715 	andi.w		&0x8000,%d2		# keep old sign
   15716 	sub.l		%d0,%d1			# subtract scale factor
   15717 	addi.l		&0x6000,%d1		# add new bias
   15718 	andi.w		&0x7fff,%d1
   15719 	or.w		%d2,%d1			# concat new sign,new exp
   15720 	mov.w		%d1,FP_SCR1_EX(%a6)	# insert new exp
   15721 	fmovm.x		FP_SCR1(%a6),&0x40	# return EXOP in fp1
   15722 	mov.l		(%sp)+,%d2		# restore d2
   15723 	bra.b		fsqrt_sd_unfl_dis
   15724 
   15725 #
   15726 # operand WILL overflow.
   15727 #
   15728 fsqrt_sd_ovfl:
   15729 	fmov.l		&0x0,%fpsr		# clear FPSR
   15730 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   15731 
   15732 	fsqrt.x		FP_SCR0(%a6),%fp0	# perform square root
   15733 
   15734 	fmov.l		&0x0,%fpcr		# clear FPCR
   15735 	fmov.l		%fpsr,%d1		# save FPSR
   15736 
   15737 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
   15738 
   15739 fsqrt_sd_ovfl_tst:
   15740 	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
   15741 
   15742 	mov.b		FPCR_ENABLE(%a6),%d1
   15743 	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
   15744 	bne.b		fsqrt_sd_ovfl_ena	# yes
   15745 
   15746 #
   15747 # OVFL is not enabled; therefore, we must create the default result by
   15748 # calling ovf_res().
   15749 #
   15750 fsqrt_sd_ovfl_dis:
   15751 	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
   15752 	sne		%d1			# set sign param accordingly
   15753 	mov.l		L_SCR3(%a6),%d0		# pass: prec,mode
   15754 	bsr.l		ovf_res			# calculate default result
   15755 	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
   15756 	fmovm.x		(%a0),&0x80		# return default result in fp0
   15757 	rts
   15758 
   15759 #
   15760 # OVFL is enabled.
   15761 # the INEX2 bit has already been updated by the round to the correct precision.
   15762 # now, round to extended(and don't alter the FPSR).
   15763 #
   15764 fsqrt_sd_ovfl_ena:
   15765 	mov.l		%d2,-(%sp)		# save d2
   15766 	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
   15767 	mov.l		%d1,%d2			# make a copy
   15768 	andi.l		&0x7fff,%d1		# strip sign
   15769 	andi.w		&0x8000,%d2		# keep old sign
   15770 	sub.l		%d0,%d1			# add scale factor
   15771 	subi.l		&0x6000,%d1		# subtract bias
   15772 	andi.w		&0x7fff,%d1
   15773 	or.w		%d2,%d1			# concat sign,exp
   15774 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
   15775 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
   15776 	mov.l		(%sp)+,%d2		# restore d2
   15777 	bra.b		fsqrt_sd_ovfl_dis
   15778 
   15779 #
   15780 # the move in MAY underflow. so...
   15781 #
   15782 fsqrt_sd_may_ovfl:
   15783 	btst		&0x0,1+FP_SCR0_EX(%a6)	# is exponent 0x3fff?
   15784 	bne.w		fsqrt_sd_ovfl		# yes, so overflow
   15785 
   15786 	fmov.l		&0x0,%fpsr		# clear FPSR
   15787 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   15788 
   15789 	fsqrt.x		FP_SCR0(%a6),%fp0	# perform absolute
   15790 
   15791 	fmov.l		%fpsr,%d1		# save status
   15792 	fmov.l		&0x0,%fpcr		# clear FPCR
   15793 
   15794 	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
   15795 
   15796 	fmov.x		%fp0,%fp1		# make a copy of result
   15797 	fcmp.b		%fp1,&0x1		# is |result| >= 1.b?
   15798 	fbge.w		fsqrt_sd_ovfl_tst	# yes; overflow has occurred
   15799 
   15800 # no, it didn't overflow; we have correct result
   15801 	bra.w		fsqrt_sd_normal_exit
   15802 
   15803 ##########################################################################
   15804 
   15805 #
   15806 # input is not normalized; what is it?
   15807 #
   15808 fsqrt_not_norm:
   15809 	cmpi.b		%d1,&DENORM		# weed out DENORM
   15810 	beq.w		fsqrt_denorm
   15811 	cmpi.b		%d1,&ZERO		# weed out ZERO
   15812 	beq.b		fsqrt_zero
   15813 	cmpi.b		%d1,&INF		# weed out INF
   15814 	beq.b		fsqrt_inf
   15815 	cmpi.b		%d1,&SNAN		# weed out SNAN
   15816 	beq.l		res_snan_1op
   15817 	bra.l		res_qnan_1op
   15818 
   15819 #
   15820 # 	fsqrt(+0) = +0
   15821 # 	fsqrt(-0) = -0
   15822 #	fsqrt(+INF) = +INF
   15823 # 	fsqrt(-INF) = OPERR
   15824 #
   15825 fsqrt_zero:
   15826 	tst.b		SRC_EX(%a0)		# is ZERO positive or negative?
   15827 	bmi.b		fsqrt_zero_m		# negative
   15828 fsqrt_zero_p:
   15829 	fmov.s		&0x00000000,%fp0	# return +ZERO
   15830 	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
   15831 	rts
   15832 fsqrt_zero_m:
   15833 	fmov.s		&0x80000000,%fp0	# return -ZERO
   15834 	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6)	# set 'Z','N' ccode bits
   15835 	rts
   15836 
   15837 fsqrt_inf:
   15838 	tst.b		SRC_EX(%a0)		# is INF positive or negative?
   15839 	bmi.l		res_operr		# negative
   15840 fsqrt_inf_p:
   15841 	fmovm.x		SRC(%a0),&0x80		# return +INF in fp0
   15842 	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'I' ccode bit
   15843 	rts
   15844 
   15845 ##########################################################################
   15846 
   15847 #########################################################################
   15848 # XDEF ****************************************************************	#
   15849 #	addsub_scaler2(): scale inputs to fadd/fsub such that no	#
   15850 #			  OVFL/UNFL exceptions will result		#
   15851 #									#
   15852 # XREF ****************************************************************	#
   15853 #	norm() - normalize mantissa after adjusting exponent		#
   15854 #									#
   15855 # INPUT ***************************************************************	#
   15856 #	FP_SRC(a6) = fp op1(src)					#
   15857 #	FP_DST(a6) = fp op2(dst)					#
   15858 # 									#
   15859 # OUTPUT **************************************************************	#
   15860 #	FP_SRC(a6) = fp op1 scaled(src)					#
   15861 #	FP_DST(a6) = fp op2 scaled(dst)					#
   15862 #	d0         = scale amount					#
   15863 #									#
   15864 # ALGORITHM ***********************************************************	#
   15865 # 	If the DST exponent is > the SRC exponent, set the DST exponent	#
   15866 # equal to 0x3fff and scale the SRC exponent by the value that the	#
   15867 # DST exponent was scaled by. If the SRC exponent is greater or equal,	#
   15868 # do the opposite. Return this scale factor in d0.			#
   15869 #	If the two exponents differ by > the number of mantissa bits	#
   15870 # plus two, then set the smallest exponent to a very small value as a	#
   15871 # quick shortcut.							#
   15872 #									#
   15873 #########################################################################
   15874 
   15875 	global		addsub_scaler2
   15876 addsub_scaler2:
   15877 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
   15878 	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
   15879 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
   15880 	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
   15881 	mov.w		SRC_EX(%a0),%d0
   15882 	mov.w		DST_EX(%a1),%d1
   15883 	mov.w		%d0,FP_SCR0_EX(%a6)
   15884 	mov.w		%d1,FP_SCR1_EX(%a6)
   15885 
   15886 	andi.w		&0x7fff,%d0
   15887 	andi.w		&0x7fff,%d1
   15888 	mov.w		%d0,L_SCR1(%a6)		# store src exponent
   15889 	mov.w		%d1,2+L_SCR1(%a6)	# store dst exponent
   15890 
   15891 	cmp.w		%d0, %d1		# is src exp >= dst exp?
   15892 	bge.l		src_exp_ge2
   15893 
   15894 # dst exp is >  src exp; scale dst to exp = 0x3fff
   15895 dst_exp_gt2:
   15896 	bsr.l		scale_to_zero_dst
   15897 	mov.l		%d0,-(%sp)		# save scale factor
   15898 
   15899 	cmpi.b		STAG(%a6),&DENORM	# is dst denormalized?
   15900 	bne.b		cmpexp12
   15901 
   15902 	lea		FP_SCR0(%a6),%a0
   15903 	bsr.l		norm			# normalize the denorm; result is new exp
   15904 	neg.w		%d0			# new exp = -(shft val)
   15905 	mov.w		%d0,L_SCR1(%a6)		# inset new exp
   15906 
   15907 cmpexp12:
   15908 	mov.w		2+L_SCR1(%a6),%d0
   15909 	subi.w		&mantissalen+2,%d0	# subtract mantissalen+2 from larger exp
   15910 
   15911 	cmp.w		%d0,L_SCR1(%a6)		# is difference >= len(mantissa)+2?
   15912 	bge.b		quick_scale12
   15913 
   15914 	mov.w		L_SCR1(%a6),%d0
   15915 	add.w		0x2(%sp),%d0		# scale src exponent by scale factor
   15916 	mov.w		FP_SCR0_EX(%a6),%d1
   15917 	and.w		&0x8000,%d1
   15918 	or.w		%d1,%d0			# concat {sgn,new exp}
   15919 	mov.w		%d0,FP_SCR0_EX(%a6)	# insert new dst exponent
   15920 
   15921 	mov.l		(%sp)+,%d0		# return SCALE factor
   15922 	rts
   15923 
   15924 quick_scale12:
   15925 	andi.w		&0x8000,FP_SCR0_EX(%a6)	# zero src exponent
   15926 	bset		&0x0,1+FP_SCR0_EX(%a6)	# set exp = 1
   15927 
   15928 	mov.l		(%sp)+,%d0		# return SCALE factor
   15929 	rts
   15930 
   15931 # src exp is >= dst exp; scale src to exp = 0x3fff
   15932 src_exp_ge2:
   15933 	bsr.l		scale_to_zero_src
   15934 	mov.l		%d0,-(%sp)		# save scale factor
   15935 
   15936 	cmpi.b		DTAG(%a6),&DENORM	# is dst denormalized?
   15937 	bne.b		cmpexp22
   15938 	lea		FP_SCR1(%a6),%a0
   15939 	bsr.l		norm			# normalize the denorm; result is new exp
   15940 	neg.w		%d0			# new exp = -(shft val)
   15941 	mov.w		%d0,2+L_SCR1(%a6)	# inset new exp
   15942 
   15943 cmpexp22:
   15944 	mov.w		L_SCR1(%a6),%d0
   15945 	subi.w		&mantissalen+2,%d0	# subtract mantissalen+2 from larger exp
   15946 
   15947 	cmp.w		%d0,2+L_SCR1(%a6)	# is difference >= len(mantissa)+2?
   15948 	bge.b		quick_scale22
   15949 
   15950 	mov.w		2+L_SCR1(%a6),%d0
   15951 	add.w		0x2(%sp),%d0		# scale dst exponent by scale factor
   15952 	mov.w		FP_SCR1_EX(%a6),%d1
   15953 	andi.w		&0x8000,%d1
   15954 	or.w		%d1,%d0			# concat {sgn,new exp}
   15955 	mov.w		%d0,FP_SCR1_EX(%a6)	# insert new dst exponent
   15956 
   15957 	mov.l		(%sp)+,%d0		# return SCALE factor
   15958 	rts
   15959 
   15960 quick_scale22:
   15961 	andi.w		&0x8000,FP_SCR1_EX(%a6)	# zero dst exponent
   15962 	bset		&0x0,1+FP_SCR1_EX(%a6)	# set exp = 1
   15963 
   15964 	mov.l		(%sp)+,%d0		# return SCALE factor
   15965 	rts
   15966 
   15967 ##########################################################################
   15968 
   15969 #########################################################################
   15970 # XDEF ****************************************************************	#
   15971 #	scale_to_zero_src(): scale the exponent of extended precision	#
   15972 #			     value at FP_SCR0(a6).			#
   15973 #									#
   15974 # XREF ****************************************************************	#
   15975 #	norm() - normalize the mantissa if the operand was a DENORM	#
   15976 #									#
   15977 # INPUT ***************************************************************	#
   15978 #	FP_SCR0(a6) = extended precision operand to be scaled		#
   15979 # 									#
   15980 # OUTPUT **************************************************************	#
   15981 #	FP_SCR0(a6) = scaled extended precision operand			#
   15982 #	d0	    = scale value					#
   15983 #									#
   15984 # ALGORITHM ***********************************************************	#
   15985 # 	Set the exponent of the input operand to 0x3fff. Save the value	#
   15986 # of the difference between the original and new exponent. Then, 	#
   15987 # normalize the operand if it was a DENORM. Add this normalization	#
   15988 # value to the previous value. Return the result.			#
   15989 #									#
   15990 #########################################################################
   15991 
   15992 	global		scale_to_zero_src
   15993 scale_to_zero_src:
   15994 	mov.w		FP_SCR0_EX(%a6),%d1	# extract operand's {sgn,exp}
   15995 	mov.w		%d1,%d0			# make a copy
   15996 
   15997 	andi.l		&0x7fff,%d1		# extract operand's exponent
   15998 
   15999 	andi.w		&0x8000,%d0		# extract operand's sgn
   16000 	or.w		&0x3fff,%d0		# insert new operand's exponent(=0)
   16001 
   16002 	mov.w		%d0,FP_SCR0_EX(%a6)	# insert biased exponent
   16003 
   16004 	cmpi.b		STAG(%a6),&DENORM	# is operand normalized?
   16005 	beq.b		stzs_denorm		# normalize the DENORM
   16006 
   16007 stzs_norm:
   16008 	mov.l		&0x3fff,%d0
   16009 	sub.l		%d1,%d0			# scale = BIAS + (-exp)
   16010 
   16011 	rts
   16012 
   16013 stzs_denorm:
   16014 	lea		FP_SCR0(%a6),%a0	# pass ptr to src op
   16015 	bsr.l		norm			# normalize denorm
   16016 	neg.l		%d0			# new exponent = -(shft val)
   16017 	mov.l		%d0,%d1			# prepare for op_norm call
   16018 	bra.b		stzs_norm		# finish scaling
   16019 
   16020 ###
   16021 
   16022 #########################################################################
   16023 # XDEF ****************************************************************	#
   16024 #	scale_sqrt(): scale the input operand exponent so a subsequent	#
   16025 #		      fsqrt operation won't take an exception.		#
   16026 #									#
   16027 # XREF ****************************************************************	#
   16028 #	norm() - normalize the mantissa if the operand was a DENORM	#
   16029 #									#
   16030 # INPUT ***************************************************************	#
   16031 #	FP_SCR0(a6) = extended precision operand to be scaled		#
   16032 # 									#
   16033 # OUTPUT **************************************************************	#
   16034 #	FP_SCR0(a6) = scaled extended precision operand			#
   16035 #	d0	    = scale value					#
   16036 #									#
   16037 # ALGORITHM ***********************************************************	#
   16038 #	If the input operand is a DENORM, normalize it.			#
   16039 # 	If the exponent of the input operand is even, set the exponent	#
   16040 # to 0x3ffe and return a scale factor of "(exp-0x3ffe)/2". If the 	#
   16041 # exponent of the input operand is off, set the exponent to ox3fff and	#
   16042 # return a scale factor of "(exp-0x3fff)/2". 				#
   16043 #									#
   16044 #########################################################################
   16045 
   16046 	global		scale_sqrt
   16047 scale_sqrt:
   16048 	cmpi.b		STAG(%a6),&DENORM	# is operand normalized?
   16049 	beq.b		ss_denorm		# normalize the DENORM
   16050 
   16051 	mov.w		FP_SCR0_EX(%a6),%d1	# extract operand's {sgn,exp}
   16052 	andi.l		&0x7fff,%d1		# extract operand's exponent
   16053 
   16054 	andi.w		&0x8000,FP_SCR0_EX(%a6)	# extract operand's sgn
   16055 
   16056 	btst		&0x0,%d1		# is exp even or odd?
   16057 	beq.b		ss_norm_even
   16058 
   16059 	ori.w		&0x3fff,FP_SCR0_EX(%a6)	# insert new operand's exponent(=0)
   16060 
   16061 	mov.l		&0x3fff,%d0
   16062 	sub.l		%d1,%d0			# scale = BIAS + (-exp)
   16063 	asr.l		&0x1,%d0		# divide scale factor by 2
   16064 	rts
   16065 
   16066 ss_norm_even:
   16067 	ori.w		&0x3ffe,FP_SCR0_EX(%a6)	# insert new operand's exponent(=0)
   16068 
   16069 	mov.l		&0x3ffe,%d0
   16070 	sub.l		%d1,%d0			# scale = BIAS + (-exp)
   16071 	asr.l		&0x1,%d0		# divide scale factor by 2
   16072 	rts
   16073 
   16074 ss_denorm:
   16075 	lea		FP_SCR0(%a6),%a0	# pass ptr to src op
   16076 	bsr.l		norm			# normalize denorm
   16077 
   16078 	btst		&0x0,%d0		# is exp even or odd?
   16079 	beq.b		ss_denorm_even
   16080 
   16081 	ori.w		&0x3fff,FP_SCR0_EX(%a6)	# insert new operand's exponent(=0)
   16082 
   16083 	add.l		&0x3fff,%d0
   16084 	asr.l		&0x1,%d0		# divide scale factor by 2
   16085 	rts
   16086 
   16087 ss_denorm_even:
   16088 	ori.w		&0x3ffe,FP_SCR0_EX(%a6)	# insert new operand's exponent(=0)
   16089 
   16090 	add.l		&0x3ffe,%d0
   16091 	asr.l		&0x1,%d0		# divide scale factor by 2
   16092 	rts
   16093 
   16094 ###
   16095 
   16096 #########################################################################
   16097 # XDEF ****************************************************************	#
   16098 #	scale_to_zero_dst(): scale the exponent of extended precision	#
   16099 #			     value at FP_SCR1(a6).			#
   16100 #									#
   16101 # XREF ****************************************************************	#
   16102 #	norm() - normalize the mantissa if the operand was a DENORM	#
   16103 #									#
   16104 # INPUT ***************************************************************	#
   16105 #	FP_SCR1(a6) = extended precision operand to be scaled		#
   16106 # 									#
   16107 # OUTPUT **************************************************************	#
   16108 #	FP_SCR1(a6) = scaled extended precision operand			#
   16109 #	d0	    = scale value					#
   16110 #									#
   16111 # ALGORITHM ***********************************************************	#
   16112 # 	Set the exponent of the input operand to 0x3fff. Save the value	#
   16113 # of the difference between the original and new exponent. Then, 	#
   16114 # normalize the operand if it was a DENORM. Add this normalization	#
   16115 # value to the previous value. Return the result.			#
   16116 #									#
   16117 #########################################################################
   16118 
   16119 	global		scale_to_zero_dst
   16120 scale_to_zero_dst:
   16121 	mov.w		FP_SCR1_EX(%a6),%d1	# extract operand's {sgn,exp}
   16122 	mov.w		%d1,%d0			# make a copy
   16123 
   16124 	andi.l		&0x7fff,%d1		# extract operand's exponent
   16125 
   16126 	andi.w		&0x8000,%d0		# extract operand's sgn
   16127 	or.w		&0x3fff,%d0		# insert new operand's exponent(=0)
   16128 
   16129 	mov.w		%d0,FP_SCR1_EX(%a6)	# insert biased exponent
   16130 
   16131 	cmpi.b		DTAG(%a6),&DENORM	# is operand normalized?
   16132 	beq.b		stzd_denorm		# normalize the DENORM
   16133 
   16134 stzd_norm:
   16135 	mov.l		&0x3fff,%d0
   16136 	sub.l		%d1,%d0			# scale = BIAS + (-exp)
   16137 	rts
   16138 
   16139 stzd_denorm:
   16140 	lea		FP_SCR1(%a6),%a0	# pass ptr to dst op
   16141 	bsr.l		norm			# normalize denorm
   16142 	neg.l		%d0			# new exponent = -(shft val)
   16143 	mov.l		%d0,%d1			# prepare for op_norm call
   16144 	bra.b		stzd_norm		# finish scaling
   16145 
   16146 ##########################################################################
   16147 
   16148 #########################################################################
   16149 # XDEF ****************************************************************	#
   16150 #	res_qnan(): return default result w/ QNAN operand for dyadic	#
   16151 #	res_snan(): return default result w/ SNAN operand for dyadic	#
   16152 #	res_qnan_1op(): return dflt result w/ QNAN operand for monadic	#
   16153 #	res_snan_1op(): return dflt result w/ SNAN operand for monadic	#
   16154 #									#
   16155 # XREF ****************************************************************	#
   16156 #	None								#
   16157 #									#
   16158 # INPUT ***************************************************************	#
   16159 #	FP_SRC(a6) = pointer to extended precision src operand		#
   16160 #	FP_DST(a6) = pointer to extended precision dst operand		#
   16161 # 									#
   16162 # OUTPUT **************************************************************	#
   16163 #	fp0 = default result						#
   16164 #									#
   16165 # ALGORITHM ***********************************************************	#
   16166 # 	If either operand (but not both operands) of an operation is a	#
   16167 # nonsignalling NAN, then that NAN is returned as the result. If both	#
   16168 # operands are nonsignalling NANs, then the destination operand 	#
   16169 # nonsignalling NAN is returned as the result.				#
   16170 # 	If either operand to an operation is a signalling NAN (SNAN),	#
   16171 # then, the SNAN bit is set in the FPSR EXC byte. If the SNAN trap	#
   16172 # enable bit is set in the FPCR, then the trap is taken and the 	#
   16173 # destination is not modified. If the SNAN trap enable bit is not set,	#
   16174 # then the SNAN is converted to a nonsignalling NAN (by setting the 	#
   16175 # SNAN bit in the operand to one), and the operation continues as 	#
   16176 # described in the preceding paragraph, for nonsignalling NANs.		#
   16177 #	Make sure the appropriate FPSR bits are set before exiting.	#
   16178 #									#
   16179 #########################################################################
   16180 
   16181 	global		res_qnan
   16182 	global		res_snan
   16183 res_qnan:
   16184 res_snan:
   16185 	cmp.b		DTAG(%a6), &SNAN	# is the dst an SNAN?
   16186 	beq.b		dst_snan2
   16187 	cmp.b		DTAG(%a6), &QNAN	# is the dst a  QNAN?
   16188 	beq.b		dst_qnan2
   16189 src_nan:
   16190 	cmp.b		STAG(%a6), &QNAN
   16191 	beq.b		src_qnan2
   16192 	global		res_snan_1op
   16193 res_snan_1op:
   16194 src_snan2:
   16195 	bset		&0x6, FP_SRC_HI(%a6)	# set SNAN bit
   16196 	or.l		&nan_mask+aiop_mask+snan_mask, USER_FPSR(%a6)
   16197 	lea		FP_SRC(%a6), %a0
   16198 	bra.b		nan_comp
   16199 	global		res_qnan_1op
   16200 res_qnan_1op:
   16201 src_qnan2:
   16202 	or.l		&nan_mask, USER_FPSR(%a6)
   16203 	lea		FP_SRC(%a6), %a0
   16204 	bra.b		nan_comp
   16205 dst_snan2:
   16206 	or.l		&nan_mask+aiop_mask+snan_mask, USER_FPSR(%a6)
   16207 	bset		&0x6, FP_DST_HI(%a6)	# set SNAN bit
   16208 	lea		FP_DST(%a6), %a0
   16209 	bra.b		nan_comp
   16210 dst_qnan2:
   16211 	lea		FP_DST(%a6), %a0
   16212 	cmp.b		STAG(%a6), &SNAN
   16213 	bne		nan_done
   16214 	or.l		&aiop_mask+snan_mask, USER_FPSR(%a6)
   16215 nan_done:
   16216 	or.l		&nan_mask, USER_FPSR(%a6)
   16217 nan_comp:
   16218 	btst		&0x7, FTEMP_EX(%a0)	# is NAN neg?
   16219 	beq.b		nan_not_neg
   16220 	or.l		&neg_mask, USER_FPSR(%a6)
   16221 nan_not_neg:
   16222 	fmovm.x		(%a0), &0x80
   16223 	rts
   16224 
   16225 #########################################################################
   16226 # XDEF ****************************************************************	#
   16227 # 	res_operr(): return default result during operand error		#
   16228 #									#
   16229 # XREF ****************************************************************	#
   16230 #	None								#
   16231 #									#
   16232 # INPUT ***************************************************************	#
   16233 #	None								#
   16234 # 									#
   16235 # OUTPUT **************************************************************	#
   16236 #	fp0 = default operand error result				#
   16237 #									#
   16238 # ALGORITHM ***********************************************************	#
   16239 #	An nonsignalling NAN is returned as the default result when	#
   16240 # an operand error occurs for the following cases:			#
   16241 #									#
   16242 # 	Multiply: (Infinity x Zero)					#
   16243 # 	Divide  : (Zero / Zero) || (Infinity / Infinity)		#
   16244 #									#
   16245 #########################################################################
   16246 
   16247 	global		res_operr
   16248 res_operr:
   16249 	or.l		&nan_mask+operr_mask+aiop_mask, USER_FPSR(%a6)
   16250 	fmovm.x		nan_return(%pc), &0x80
   16251 	rts
   16252 
   16253 nan_return:
   16254 	long		0x7fff0000, 0xffffffff, 0xffffffff
   16255 
   16256 #########################################################################
   16257 # fdbcc(): routine to emulate the fdbcc instruction			#
   16258 #									#
   16259 # XDEF **************************************************************** #
   16260 #	_fdbcc()							#
   16261 #									#
   16262 # XREF **************************************************************** #
   16263 #	fetch_dreg() - fetch Dn value					#
   16264 #	store_dreg_l() - store updated Dn value				#
   16265 #									#
   16266 # INPUT ***************************************************************	#
   16267 #	d0 = displacement						#
   16268 #									#
   16269 # OUTPUT ************************************************************** #
   16270 #	none								#
   16271 #									#
   16272 # ALGORITHM ***********************************************************	#
   16273 #	This routine checks which conditional predicate is specified by	#
   16274 # the stacked fdbcc instruction opcode and then branches to a routine	#
   16275 # for that predicate. The corresponding fbcc instruction is then used	#
   16276 # to see whether the condition (specified by the stacked FPSR) is true	#
   16277 # or false.								#
   16278 #	If a BSUN exception should be indicated, the BSUN and ABSUN	#
   16279 # bits are set in the stacked FPSR. If the BSUN exception is enabled,	#
   16280 # the fbsun_flg is set in the SPCOND_FLG location on the stack. If an 	#
   16281 # enabled BSUN should not be flagged and the predicate is true, then	#
   16282 # Dn is fetched and decremented by one. If Dn is not equal to -1, add	#
   16283 # the displacement value to the stacked PC so that when an "rte" is	#
   16284 # finally executed, the branch occurs.					#
   16285 #									#
   16286 #########################################################################
   16287 	global		_fdbcc
   16288 _fdbcc:
   16289 	mov.l		%d0,L_SCR1(%a6)		# save displacement
   16290 
   16291 	mov.w		EXC_CMDREG(%a6),%d0	# fetch predicate
   16292 
   16293 	clr.l		%d1			# clear scratch reg
   16294 	mov.b		FPSR_CC(%a6),%d1	# fetch fp ccodes
   16295 	ror.l		&0x8,%d1		# rotate to top byte
   16296 	fmov.l		%d1,%fpsr		# insert into FPSR
   16297 
   16298 	mov.w		(tbl_fdbcc.b,%pc,%d0.w*2),%d1 # load table
   16299 	jmp		(tbl_fdbcc.b,%pc,%d1.w) # jump to fdbcc routine
   16300 
   16301 tbl_fdbcc:
   16302 	short		fdbcc_f		-	tbl_fdbcc	# 00
   16303 	short		fdbcc_eq	-	tbl_fdbcc	# 01
   16304 	short		fdbcc_ogt	-	tbl_fdbcc	# 02
   16305 	short		fdbcc_oge	-	tbl_fdbcc	# 03
   16306 	short		fdbcc_olt	-	tbl_fdbcc	# 04
   16307 	short		fdbcc_ole	-	tbl_fdbcc	# 05
   16308 	short		fdbcc_ogl	-	tbl_fdbcc	# 06
   16309 	short		fdbcc_or	-	tbl_fdbcc	# 07
   16310 	short		fdbcc_un	-	tbl_fdbcc	# 08
   16311 	short		fdbcc_ueq	-	tbl_fdbcc	# 09
   16312 	short		fdbcc_ugt	-	tbl_fdbcc	# 10
   16313 	short		fdbcc_uge	-	tbl_fdbcc	# 11
   16314 	short		fdbcc_ult	-	tbl_fdbcc	# 12
   16315 	short		fdbcc_ule	-	tbl_fdbcc	# 13
   16316 	short		fdbcc_neq	-	tbl_fdbcc	# 14
   16317 	short		fdbcc_t		-	tbl_fdbcc	# 15
   16318 	short		fdbcc_sf	-	tbl_fdbcc	# 16
   16319 	short		fdbcc_seq	-	tbl_fdbcc	# 17
   16320 	short		fdbcc_gt	-	tbl_fdbcc	# 18
   16321 	short		fdbcc_ge	-	tbl_fdbcc	# 19
   16322 	short		fdbcc_lt	-	tbl_fdbcc	# 20
   16323 	short		fdbcc_le	-	tbl_fdbcc	# 21
   16324 	short		fdbcc_gl	-	tbl_fdbcc	# 22
   16325 	short		fdbcc_gle	-	tbl_fdbcc	# 23
   16326 	short		fdbcc_ngle	-	tbl_fdbcc	# 24
   16327 	short		fdbcc_ngl	-	tbl_fdbcc	# 25
   16328 	short		fdbcc_nle	-	tbl_fdbcc	# 26
   16329 	short		fdbcc_nlt	-	tbl_fdbcc	# 27
   16330 	short		fdbcc_nge	-	tbl_fdbcc	# 28
   16331 	short		fdbcc_ngt	-	tbl_fdbcc	# 29
   16332 	short		fdbcc_sneq	-	tbl_fdbcc	# 30
   16333 	short		fdbcc_st	-	tbl_fdbcc	# 31
   16334 
   16335 #########################################################################
   16336 #									#
   16337 # IEEE Nonaware tests							#
   16338 #									#
   16339 # For the IEEE nonaware tests, only the false branch changes the 	#
   16340 # counter. However, the true branch may set bsun so we check to see	#
   16341 # if the NAN bit is set, in which case BSUN and AIOP will be set.	#
   16342 #									#
   16343 # The cases EQ and NE are shared by the Aware and Nonaware groups	#
   16344 # and are incapable of setting the BSUN exception bit.			#
   16345 #									#
   16346 # Typically, only one of the two possible branch directions could	#
   16347 # have the NAN bit set.							#
   16348 # (This is assuming the mutual exclusiveness of FPSR cc bit groupings	#
   16349 #  is preserved.)							#
   16350 #									#
   16351 #########################################################################
   16352 
   16353 #
   16354 # equal:
   16355 #
   16356 #	Z
   16357 #
   16358 fdbcc_eq:
   16359 	fbeq.w		fdbcc_eq_yes		# equal?
   16360 fdbcc_eq_no:
   16361 	bra.w		fdbcc_false		# no; go handle counter
   16362 fdbcc_eq_yes:
   16363 	rts
   16364 
   16365 #
   16366 # not equal:
   16367 #	_
   16368 #	Z
   16369 #
   16370 fdbcc_neq:
   16371 	fbneq.w		fdbcc_neq_yes		# not equal?
   16372 fdbcc_neq_no:
   16373 	bra.w		fdbcc_false		# no; go handle counter
   16374 fdbcc_neq_yes:
   16375 	rts
   16376 
   16377 #
   16378 # greater than:
   16379 #	_______
   16380 #	NANvZvN
   16381 #
   16382 fdbcc_gt:
   16383 	fbgt.w		fdbcc_gt_yes		# greater than?
   16384 	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
   16385 	beq.w		fdbcc_false		# no;go handle counter
   16386 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
   16387 	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
   16388 	bne.w		fdbcc_bsun		# yes; we have an exception
   16389 	bra.w		fdbcc_false		# no; go handle counter
   16390 fdbcc_gt_yes:
   16391 	rts					# do nothing
   16392 
   16393 #
   16394 # not greater than:
   16395 #
   16396 #	NANvZvN
   16397 #
   16398 fdbcc_ngt:
   16399 	fbngt.w		fdbcc_ngt_yes		# not greater than?
   16400 fdbcc_ngt_no:
   16401 	bra.w		fdbcc_false		# no; go handle counter
   16402 fdbcc_ngt_yes:
   16403 	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
   16404 	beq.b		fdbcc_ngt_done		# no;go finish
   16405 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
   16406 	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
   16407 	bne.w		fdbcc_bsun		# yes; we have an exception
   16408 fdbcc_ngt_done:
   16409 	rts					# no; do nothing
   16410 
   16411 #
   16412 # greater than or equal:
   16413 #	   _____
   16414 #	Zv(NANvN)
   16415 #
   16416 fdbcc_ge:
   16417 	fbge.w		fdbcc_ge_yes		# greater than or equal?
   16418 fdbcc_ge_no:
   16419 	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
   16420 	beq.w		fdbcc_false		# no;go handle counter
   16421 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
   16422 	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
   16423 	bne.w		fdbcc_bsun		# yes; we have an exception
   16424 	bra.w		fdbcc_false		# no; go handle counter
   16425 fdbcc_ge_yes:
   16426 	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
   16427 	beq.b		fdbcc_ge_yes_done	# no;go do nothing
   16428 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
   16429 	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
   16430 	bne.w		fdbcc_bsun		# yes; we have an exception
   16431 fdbcc_ge_yes_done:
   16432 	rts					# do nothing
   16433 
   16434 #
   16435 # not (greater than or equal):
   16436 #	       _
   16437 #	NANv(N^Z)
   16438 #
   16439 fdbcc_nge:
   16440 	fbnge.w		fdbcc_nge_yes		# not (greater than or equal)?
   16441 fdbcc_nge_no:
   16442 	bra.w		fdbcc_false		# no; go handle counter
   16443 fdbcc_nge_yes:
   16444 	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
   16445 	beq.b		fdbcc_nge_done		# no;go finish
   16446 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
   16447 	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
   16448 	bne.w		fdbcc_bsun		# yes; we have an exception
   16449 fdbcc_nge_done:
   16450 	rts					# no; do nothing
   16451 
   16452 #
   16453 # less than:
   16454 #	   _____
   16455 #	N^(NANvZ)
   16456 #
   16457 fdbcc_lt:
   16458 	fblt.w		fdbcc_lt_yes		# less than?
   16459 fdbcc_lt_no:
   16460 	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
   16461 	beq.w		fdbcc_false		# no; go handle counter
   16462 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
   16463 	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
   16464 	bne.w		fdbcc_bsun		# yes; we have an exception
   16465 	bra.w		fdbcc_false		# no; go handle counter
   16466 fdbcc_lt_yes:
   16467 	rts					# do nothing
   16468 
   16469 #
   16470 # not less than:
   16471 #	       _
   16472 #	NANv(ZvN)
   16473 #
   16474 fdbcc_nlt:
   16475 	fbnlt.w		fdbcc_nlt_yes		# not less than?
   16476 fdbcc_nlt_no:
   16477 	bra.w		fdbcc_false		# no; go handle counter
   16478 fdbcc_nlt_yes:
   16479 	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
   16480 	beq.b		fdbcc_nlt_done		# no;go finish
   16481 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
   16482 	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
   16483 	bne.w		fdbcc_bsun		# yes; we have an exception
   16484 fdbcc_nlt_done:
   16485 	rts					# no; do nothing
   16486 
   16487 #
   16488 # less than or equal:
   16489 #	     ___
   16490 #	Zv(N^NAN)
   16491 #
   16492 fdbcc_le:
   16493 	fble.w		fdbcc_le_yes		# less than or equal?
   16494 fdbcc_le_no:
   16495 	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
   16496 	beq.w		fdbcc_false		# no; go handle counter
   16497 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
   16498 	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
   16499 	bne.w		fdbcc_bsun		# yes; we have an exception
   16500 	bra.w		fdbcc_false		# no; go handle counter
   16501 fdbcc_le_yes:
   16502 	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
   16503 	beq.b		fdbcc_le_yes_done	# no; go do nothing
   16504 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
   16505 	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
   16506 	bne.w		fdbcc_bsun		# yes; we have an exception
   16507 fdbcc_le_yes_done:
   16508 	rts					# do nothing
   16509 
   16510 #
   16511 # not (less than or equal):
   16512 #	     ___
   16513 #	NANv(NvZ)
   16514 #
   16515 fdbcc_nle:
   16516 	fbnle.w		fdbcc_nle_yes		# not (less than or equal)?
   16517 fdbcc_nle_no:
   16518 	bra.w		fdbcc_false		# no; go handle counter
   16519 fdbcc_nle_yes:
   16520 	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
   16521 	beq.w		fdbcc_nle_done		# no; go finish
   16522 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
   16523 	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
   16524 	bne.w		fdbcc_bsun		# yes; we have an exception
   16525 fdbcc_nle_done:
   16526 	rts					# no; do nothing
   16527 
   16528 #
   16529 # greater or less than:
   16530 #	_____
   16531 #	NANvZ
   16532 #
   16533 fdbcc_gl:
   16534 	fbgl.w		fdbcc_gl_yes		# greater or less than?
   16535 fdbcc_gl_no:
   16536 	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
   16537 	beq.w		fdbcc_false		# no; handle counter
   16538 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
   16539 	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
   16540 	bne.w		fdbcc_bsun		# yes; we have an exception
   16541 	bra.w		fdbcc_false		# no; go handle counter
   16542 fdbcc_gl_yes:
   16543 	rts					# do nothing
   16544 
   16545 #
   16546 # not (greater or less than):
   16547 #
   16548 #	NANvZ
   16549 #
   16550 fdbcc_ngl:
   16551 	fbngl.w		fdbcc_ngl_yes		# not (greater or less than)?
   16552 fdbcc_ngl_no:
   16553 	bra.w		fdbcc_false		# no; go handle counter
   16554 fdbcc_ngl_yes:
   16555 	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
   16556 	beq.b		fdbcc_ngl_done		# no; go finish
   16557 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
   16558 	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
   16559 	bne.w		fdbcc_bsun		# yes; we have an exception
   16560 fdbcc_ngl_done:
   16561 	rts					# no; do nothing
   16562 
   16563 #
   16564 # greater, less, or equal:
   16565 #	___
   16566 #	NAN
   16567 #
   16568 fdbcc_gle:
   16569 	fbgle.w		fdbcc_gle_yes		# greater, less, or equal?
   16570 fdbcc_gle_no:
   16571 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
   16572 	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
   16573 	bne.w		fdbcc_bsun		# yes; we have an exception
   16574 	bra.w		fdbcc_false		# no; go handle counter
   16575 fdbcc_gle_yes:
   16576 	rts					# do nothing
   16577 
   16578 #
   16579 # not (greater, less, or equal):
   16580 #
   16581 #	NAN
   16582 #
   16583 fdbcc_ngle:
   16584 	fbngle.w	fdbcc_ngle_yes		# not (greater, less, or equal)?
   16585 fdbcc_ngle_no:
   16586 	bra.w		fdbcc_false		# no; go handle counter
   16587 fdbcc_ngle_yes:
   16588 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
   16589 	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
   16590 	bne.w		fdbcc_bsun		# yes; we have an exception
   16591 	rts					# no; do nothing
   16592 
   16593 #########################################################################
   16594 #									#
   16595 # Miscellaneous tests							#
   16596 #									#
   16597 # For the IEEE miscellaneous tests, all but fdbf and fdbt can set bsun. #
   16598 #									#
   16599 #########################################################################
   16600 
   16601 #
   16602 # false:
   16603 #
   16604 #	False
   16605 #
   16606 fdbcc_f:					# no bsun possible
   16607 	bra.w		fdbcc_false		# go handle counter
   16608 
   16609 #
   16610 # true:
   16611 #
   16612 #	True
   16613 #
   16614 fdbcc_t:					# no bsun possible
   16615 	rts					# do nothing
   16616 
   16617 #
   16618 # signalling false:
   16619 #
   16620 #	False
   16621 #
   16622 fdbcc_sf:
   16623 	btst		&nan_bit, FPSR_CC(%a6) 	# is NAN set?
   16624 	beq.w		fdbcc_false		# no;go handle counter
   16625 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
   16626 	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
   16627 	bne.w		fdbcc_bsun		# yes; we have an exception
   16628 	bra.w		fdbcc_false		# go handle counter
   16629 
   16630 #
   16631 # signalling true:
   16632 #
   16633 #	True
   16634 #
   16635 fdbcc_st:
   16636 	btst		&nan_bit, FPSR_CC(%a6) 	# is NAN set?
   16637 	beq.b		fdbcc_st_done		# no;go finish
   16638 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
   16639 	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
   16640 	bne.w		fdbcc_bsun		# yes; we have an exception
   16641 fdbcc_st_done:
   16642 	rts
   16643 
   16644 #
   16645 # signalling equal:
   16646 #
   16647 #	Z
   16648 #
   16649 fdbcc_seq:
   16650 	fbseq.w		fdbcc_seq_yes		# signalling equal?
   16651 fdbcc_seq_no:
   16652 	btst		&nan_bit, FPSR_CC(%a6) 	# is NAN set?
   16653 	beq.w		fdbcc_false		# no;go handle counter
   16654 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
   16655 	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
   16656 	bne.w		fdbcc_bsun		# yes; we have an exception
   16657 	bra.w		fdbcc_false		# go handle counter
   16658 fdbcc_seq_yes:
   16659 	btst		&nan_bit, FPSR_CC(%a6) 	# is NAN set?
   16660 	beq.b		fdbcc_seq_yes_done	# no;go do nothing
   16661 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
   16662 	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
   16663 	bne.w		fdbcc_bsun		# yes; we have an exception
   16664 fdbcc_seq_yes_done:
   16665 	rts					# yes; do nothing
   16666 
   16667 #
   16668 # signalling not equal:
   16669 #	_
   16670 #	Z
   16671 #
   16672 fdbcc_sneq:
   16673 	fbsneq.w	fdbcc_sneq_yes		# signalling not equal?
   16674 fdbcc_sneq_no:
   16675 	btst		&nan_bit, FPSR_CC(%a6) 	# is NAN set?
   16676 	beq.w		fdbcc_false		# no;go handle counter
   16677 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
   16678 	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
   16679 	bne.w		fdbcc_bsun		# yes; we have an exception
   16680 	bra.w		fdbcc_false		# go handle counter
   16681 fdbcc_sneq_yes:
   16682 	btst		&nan_bit, FPSR_CC(%a6) 	# set BSUN exc bit
   16683 	beq.w		fdbcc_sneq_done		# no;go finish
   16684 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
   16685 	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
   16686 	bne.w		fdbcc_bsun		# yes; we have an exception
   16687 fdbcc_sneq_done:
   16688 	rts
   16689 
   16690 #########################################################################
   16691 #									#
   16692 # IEEE Aware tests							#
   16693 #									#
   16694 # For the IEEE aware tests, action is only taken if the result is false.#
   16695 # Therefore, the opposite branch type is used to jump to the decrement	#
   16696 # routine. 								#
   16697 # The BSUN exception will not be set for any of these tests.		#
   16698 #									#
   16699 #########################################################################
   16700 
   16701 #
   16702 # ordered greater than:
   16703 #	_______
   16704 #	NANvZvN
   16705 #
   16706 fdbcc_ogt:
   16707 	fbogt.w		fdbcc_ogt_yes		# ordered greater than?
   16708 fdbcc_ogt_no:
   16709 	bra.w		fdbcc_false		# no; go handle counter
   16710 fdbcc_ogt_yes:
   16711 	rts					# yes; do nothing
   16712 
   16713 #
   16714 # unordered or less or equal:
   16715 #	_______
   16716 #	NANvZvN
   16717 #
   16718 fdbcc_ule:
   16719 	fbule.w		fdbcc_ule_yes		# unordered or less or equal?
   16720 fdbcc_ule_no:
   16721 	bra.w		fdbcc_false		# no; go handle counter
   16722 fdbcc_ule_yes:
   16723 	rts					# yes; do nothing
   16724 
   16725 #
   16726 # ordered greater than or equal:
   16727 #	   _____
   16728 #	Zv(NANvN)
   16729 #
   16730 fdbcc_oge:
   16731 	fboge.w		fdbcc_oge_yes		# ordered greater than or equal?
   16732 fdbcc_oge_no:
   16733 	bra.w		fdbcc_false		# no; go handle counter
   16734 fdbcc_oge_yes:
   16735 	rts					# yes; do nothing
   16736 
   16737 #
   16738 # unordered or less than:
   16739 #	       _
   16740 #	NANv(N^Z)
   16741 #
   16742 fdbcc_ult:
   16743 	fbult.w		fdbcc_ult_yes		# unordered or less than?
   16744 fdbcc_ult_no:
   16745 	bra.w		fdbcc_false		# no; go handle counter
   16746 fdbcc_ult_yes:
   16747 	rts					# yes; do nothing
   16748 
   16749 #
   16750 # ordered less than:
   16751 #	   _____
   16752 #	N^(NANvZ)
   16753 #
   16754 fdbcc_olt:
   16755 	fbolt.w		fdbcc_olt_yes		# ordered less than?
   16756 fdbcc_olt_no:
   16757 	bra.w		fdbcc_false		# no; go handle counter
   16758 fdbcc_olt_yes:
   16759 	rts					# yes; do nothing
   16760 
   16761 #
   16762 # unordered or greater or equal:
   16763 #
   16764 #	NANvZvN
   16765 #
   16766 fdbcc_uge:
   16767 	fbuge.w		fdbcc_uge_yes		# unordered or greater than?
   16768 fdbcc_uge_no:
   16769 	bra.w		fdbcc_false		# no; go handle counter
   16770 fdbcc_uge_yes:
   16771 	rts					# yes; do nothing
   16772 
   16773 #
   16774 # ordered less than or equal:
   16775 #	     ___
   16776 #	Zv(N^NAN)
   16777 #
   16778 fdbcc_ole:
   16779 	fbole.w		fdbcc_ole_yes		# ordered greater or less than?
   16780 fdbcc_ole_no:
   16781 	bra.w		fdbcc_false		# no; go handle counter
   16782 fdbcc_ole_yes:
   16783 	rts					# yes; do nothing
   16784 
   16785 #
   16786 # unordered or greater than:
   16787 #	     ___
   16788 #	NANv(NvZ)
   16789 #
   16790 fdbcc_ugt:
   16791 	fbugt.w		fdbcc_ugt_yes		# unordered or greater than?
   16792 fdbcc_ugt_no:
   16793 	bra.w		fdbcc_false		# no; go handle counter
   16794 fdbcc_ugt_yes:
   16795 	rts					# yes; do nothing
   16796 
   16797 #
   16798 # ordered greater or less than:
   16799 #	_____
   16800 #	NANvZ
   16801 #
   16802 fdbcc_ogl:
   16803 	fbogl.w		fdbcc_ogl_yes		# ordered greater or less than?
   16804 fdbcc_ogl_no:
   16805 	bra.w		fdbcc_false		# no; go handle counter
   16806 fdbcc_ogl_yes:
   16807 	rts					# yes; do nothing
   16808 
   16809 #
   16810 # unordered or equal:
   16811 #
   16812 #	NANvZ
   16813 #
   16814 fdbcc_ueq:
   16815 	fbueq.w		fdbcc_ueq_yes		# unordered or equal?
   16816 fdbcc_ueq_no:
   16817 	bra.w		fdbcc_false		# no; go handle counter
   16818 fdbcc_ueq_yes:
   16819 	rts					# yes; do nothing
   16820 
   16821 #
   16822 # ordered:
   16823 #	___
   16824 #	NAN
   16825 #
   16826 fdbcc_or:
   16827 	fbor.w		fdbcc_or_yes		# ordered?
   16828 fdbcc_or_no:
   16829 	bra.w		fdbcc_false		# no; go handle counter
   16830 fdbcc_or_yes:
   16831 	rts					# yes; do nothing
   16832 
   16833 #
   16834 # unordered:
   16835 #
   16836 #	NAN
   16837 #
   16838 fdbcc_un:
   16839 	fbun.w		fdbcc_un_yes		# unordered?
   16840 fdbcc_un_no:
   16841 	bra.w		fdbcc_false		# no; go handle counter
   16842 fdbcc_un_yes:
   16843 	rts					# yes; do nothing
   16844 
   16845 #######################################################################
   16846 
   16847 #
   16848 # the bsun exception bit was not set.
   16849 #
   16850 # (1) subtract 1 from the count register
   16851 # (2) if (cr == -1) then
   16852 #	pc = pc of next instruction
   16853 #     else
   16854 #	pc += sign_ext(16-bit displacement)
   16855 #
   16856 fdbcc_false:
   16857 	mov.b		1+EXC_OPWORD(%a6), %d1	# fetch lo opword
   16858 	andi.w		&0x7, %d1		# extract count register
   16859 
   16860 	bsr.l		fetch_dreg		# fetch count value
   16861 # make sure that d0 isn't corrupted between calls...
   16862 
   16863 	subq.w		&0x1, %d0		# Dn - 1 -> Dn
   16864 
   16865 	bsr.l		store_dreg_l		# store new count value
   16866 
   16867 	cmpi.w		%d0, &-0x1		# is (Dn == -1)?
   16868 	bne.b		fdbcc_false_cont	# no;
   16869 	rts
   16870 
   16871 fdbcc_false_cont:
   16872 	mov.l		L_SCR1(%a6),%d0		# fetch displacement
   16873 	add.l		USER_FPIAR(%a6),%d0	# add instruction PC
   16874 	addq.l		&0x4,%d0		# add instruction length
   16875 	mov.l		%d0,EXC_PC(%a6)		# set new PC
   16876 	rts
   16877 
   16878 # the emulation routine set bsun and BSUN was enabled. have to
   16879 # fix stack and jump to the bsun handler.
   16880 # let the caller of this routine shift the stack frame up to
   16881 # eliminate the effective address field.
   16882 fdbcc_bsun:
   16883 	mov.b		&fbsun_flg,SPCOND_FLG(%a6)
   16884 	rts
   16885 
   16886 #########################################################################
   16887 # ftrapcc(): routine to emulate the ftrapcc instruction			#
   16888 #									#
   16889 # XDEF ****************************************************************	#
   16890 #	_ftrapcc()							#
   16891 #									#
   16892 # XREF ****************************************************************	#
   16893 #	none								#
   16894 #									#
   16895 # INPUT *************************************************************** #
   16896 #	none								#
   16897 #									#
   16898 # OUTPUT ************************************************************** #
   16899 #	none								#
   16900 #									#
   16901 # ALGORITHM *********************************************************** #
   16902 #	This routine checks which conditional predicate is specified by	#
   16903 # the stacked ftrapcc instruction opcode and then branches to a routine	#
   16904 # for that predicate. The corresponding fbcc instruction is then used	#
   16905 # to see whether the condition (specified by the stacked FPSR) is true	#
   16906 # or false.								#
   16907 #	If a BSUN exception should be indicated, the BSUN and ABSUN	#
   16908 # bits are set in the stacked FPSR. If the BSUN exception is enabled,	#
   16909 # the fbsun_flg is set in the SPCOND_FLG location on the stack. If an 	#
   16910 # enabled BSUN should not be flagged and the predicate is true, then	#
   16911 # the ftrapcc_flg is set in the SPCOND_FLG location. These special	#
   16912 # flags indicate to the calling routine to emulate the exceptional	#
   16913 # condition.								#
   16914 #									#
   16915 #########################################################################
   16916 
   16917 	global		_ftrapcc
   16918 _ftrapcc:
   16919 	mov.w		EXC_CMDREG(%a6),%d0	# fetch predicate
   16920 
   16921 	clr.l		%d1			# clear scratch reg
   16922 	mov.b		FPSR_CC(%a6),%d1	# fetch fp ccodes
   16923 	ror.l		&0x8,%d1		# rotate to top byte
   16924 	fmov.l		%d1,%fpsr		# insert into FPSR
   16925 
   16926 	mov.w		(tbl_ftrapcc.b,%pc,%d0.w*2), %d1 # load table
   16927 	jmp		(tbl_ftrapcc.b,%pc,%d1.w) # jump to ftrapcc routine
   16928 
   16929 tbl_ftrapcc:
   16930 	short		ftrapcc_f	-	tbl_ftrapcc	# 00
   16931 	short		ftrapcc_eq	-	tbl_ftrapcc	# 01
   16932 	short		ftrapcc_ogt	-	tbl_ftrapcc	# 02
   16933 	short		ftrapcc_oge	-	tbl_ftrapcc	# 03
   16934 	short		ftrapcc_olt	-	tbl_ftrapcc	# 04
   16935 	short		ftrapcc_ole	-	tbl_ftrapcc	# 05
   16936 	short		ftrapcc_ogl	-	tbl_ftrapcc	# 06
   16937 	short		ftrapcc_or	-	tbl_ftrapcc	# 07
   16938 	short		ftrapcc_un	-	tbl_ftrapcc	# 08
   16939 	short		ftrapcc_ueq	-	tbl_ftrapcc	# 09
   16940 	short		ftrapcc_ugt	-	tbl_ftrapcc	# 10
   16941 	short		ftrapcc_uge	-	tbl_ftrapcc	# 11
   16942 	short		ftrapcc_ult	-	tbl_ftrapcc	# 12
   16943 	short		ftrapcc_ule	-	tbl_ftrapcc	# 13
   16944 	short		ftrapcc_neq	-	tbl_ftrapcc	# 14
   16945 	short		ftrapcc_t	-	tbl_ftrapcc	# 15
   16946 	short		ftrapcc_sf	-	tbl_ftrapcc	# 16
   16947 	short		ftrapcc_seq	-	tbl_ftrapcc	# 17
   16948 	short		ftrapcc_gt	-	tbl_ftrapcc	# 18
   16949 	short		ftrapcc_ge	-	tbl_ftrapcc	# 19
   16950 	short		ftrapcc_lt	-	tbl_ftrapcc	# 20
   16951 	short		ftrapcc_le	-	tbl_ftrapcc	# 21
   16952 	short		ftrapcc_gl	-	tbl_ftrapcc	# 22
   16953 	short		ftrapcc_gle	-	tbl_ftrapcc	# 23
   16954 	short		ftrapcc_ngle	-	tbl_ftrapcc	# 24
   16955 	short		ftrapcc_ngl	-	tbl_ftrapcc	# 25
   16956 	short		ftrapcc_nle	-	tbl_ftrapcc	# 26
   16957 	short		ftrapcc_nlt	-	tbl_ftrapcc	# 27
   16958 	short		ftrapcc_nge	-	tbl_ftrapcc	# 28
   16959 	short		ftrapcc_ngt	-	tbl_ftrapcc	# 29
   16960 	short		ftrapcc_sneq	-	tbl_ftrapcc	# 30
   16961 	short		ftrapcc_st	-	tbl_ftrapcc	# 31
   16962 
   16963 #########################################################################
   16964 #									#
   16965 # IEEE Nonaware tests							#
   16966 #									#
   16967 # For the IEEE nonaware tests, we set the result based on the		#
   16968 # floating point condition codes. In addition, we check to see		#
   16969 # if the NAN bit is set, in which case BSUN and AIOP will be set.	#
   16970 #									#
   16971 # The cases EQ and NE are shared by the Aware and Nonaware groups	#
   16972 # and are incapable of setting the BSUN exception bit.			#
   16973 #									#
   16974 # Typically, only one of the two possible branch directions could	#
   16975 # have the NAN bit set.							#
   16976 #									#
   16977 #########################################################################
   16978 
   16979 #
   16980 # equal:
   16981 #
   16982 #	Z
   16983 #
   16984 ftrapcc_eq:
   16985 	fbeq.w		ftrapcc_trap		# equal?
   16986 ftrapcc_eq_no:
   16987 	rts					# do nothing
   16988 
   16989 #
   16990 # not equal:
   16991 #	_
   16992 #	Z
   16993 #
   16994 ftrapcc_neq:
   16995 	fbneq.w		ftrapcc_trap		# not equal?
   16996 ftrapcc_neq_no:
   16997 	rts					# do nothing
   16998 
   16999 #
   17000 # greater than:
   17001 #	_______
   17002 #	NANvZvN
   17003 #
   17004 ftrapcc_gt:
   17005 	fbgt.w		ftrapcc_trap		# greater than?
   17006 ftrapcc_gt_no:
   17007 	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
   17008 	beq.b		ftrapcc_gt_done		# no
   17009 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
   17010 	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
   17011 	bne.w		ftrapcc_bsun		# yes
   17012 ftrapcc_gt_done:
   17013 	rts					# no; do nothing
   17014 
   17015 #
   17016 # not greater than:
   17017 #
   17018 #	NANvZvN
   17019 #
   17020 ftrapcc_ngt:
   17021 	fbngt.w		ftrapcc_ngt_yes		# not greater than?
   17022 ftrapcc_ngt_no:
   17023 	rts					# do nothing
   17024 ftrapcc_ngt_yes:
   17025 	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
   17026 	beq.w		ftrapcc_trap		# no; go take trap
   17027 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
   17028 	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
   17029 	bne.w		ftrapcc_bsun		# yes
   17030 	bra.w		ftrapcc_trap		# no; go take trap
   17031 
   17032 #
   17033 # greater than or equal:
   17034 #	   _____
   17035 #	Zv(NANvN)
   17036 #
   17037 ftrapcc_ge:
   17038 	fbge.w		ftrapcc_ge_yes		# greater than or equal?
   17039 ftrapcc_ge_no:
   17040 	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
   17041 	beq.b		ftrapcc_ge_done		# no; go finish
   17042 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
   17043 	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
   17044 	bne.w		ftrapcc_bsun		# yes
   17045 ftrapcc_ge_done:
   17046 	rts					# no; do nothing
   17047 ftrapcc_ge_yes:
   17048 	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
   17049 	beq.w		ftrapcc_trap		# no; go take trap
   17050 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
   17051 	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
   17052 	bne.w		ftrapcc_bsun		# yes
   17053 	bra.w		ftrapcc_trap		# no; go take trap
   17054 
   17055 #
   17056 # not (greater than or equal):
   17057 #	       _
   17058 #	NANv(N^Z)
   17059 #
   17060 ftrapcc_nge:
   17061 	fbnge.w		ftrapcc_nge_yes		# not (greater than or equal)?
   17062 ftrapcc_nge_no:
   17063 	rts					# do nothing
   17064 ftrapcc_nge_yes:
   17065 	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
   17066 	beq.w		ftrapcc_trap		# no; go take trap
   17067 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
   17068 	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
   17069 	bne.w		ftrapcc_bsun		# yes
   17070 	bra.w		ftrapcc_trap		# no; go take trap
   17071 
   17072 #
   17073 # less than:
   17074 #	   _____
   17075 #	N^(NANvZ)
   17076 #
   17077 ftrapcc_lt:
   17078 	fblt.w		ftrapcc_trap		# less than?
   17079 ftrapcc_lt_no:
   17080 	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
   17081 	beq.b		ftrapcc_lt_done		# no; go finish
   17082 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
   17083 	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
   17084 	bne.w		ftrapcc_bsun		# yes
   17085 ftrapcc_lt_done:
   17086 	rts					# no; do nothing
   17087 
   17088 #
   17089 # not less than:
   17090 #	       _
   17091 #	NANv(ZvN)
   17092 #
   17093 ftrapcc_nlt:
   17094 	fbnlt.w		ftrapcc_nlt_yes		# not less than?
   17095 ftrapcc_nlt_no:
   17096 	rts					# do nothing
   17097 ftrapcc_nlt_yes:
   17098 	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
   17099 	beq.w		ftrapcc_trap		# no; go take trap
   17100 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
   17101 	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
   17102 	bne.w		ftrapcc_bsun		# yes
   17103 	bra.w		ftrapcc_trap		# no; go take trap
   17104 
   17105 #
   17106 # less than or equal:
   17107 #	     ___
   17108 #	Zv(N^NAN)
   17109 #
   17110 ftrapcc_le:
   17111 	fble.w		ftrapcc_le_yes		# less than or equal?
   17112 ftrapcc_le_no:
   17113 	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
   17114 	beq.b		ftrapcc_le_done		# no; go finish
   17115 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
   17116 	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
   17117 	bne.w		ftrapcc_bsun		# yes
   17118 ftrapcc_le_done:
   17119 	rts					# no; do nothing
   17120 ftrapcc_le_yes:
   17121 	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
   17122 	beq.w		ftrapcc_trap		# no; go take trap
   17123 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
   17124 	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
   17125 	bne.w		ftrapcc_bsun		# yes
   17126 	bra.w		ftrapcc_trap		# no; go take trap
   17127 
   17128 #
   17129 # not (less than or equal):
   17130 #	     ___
   17131 #	NANv(NvZ)
   17132 #
   17133 ftrapcc_nle:
   17134 	fbnle.w		ftrapcc_nle_yes		# not (less than or equal)?
   17135 ftrapcc_nle_no:
   17136 	rts					# do nothing
   17137 ftrapcc_nle_yes:
   17138 	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
   17139 	beq.w		ftrapcc_trap		# no; go take trap
   17140 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
   17141 	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
   17142 	bne.w		ftrapcc_bsun		# yes
   17143 	bra.w		ftrapcc_trap		# no; go take trap
   17144 
   17145 #
   17146 # greater or less than:
   17147 #	_____
   17148 #	NANvZ
   17149 #
   17150 ftrapcc_gl:
   17151 	fbgl.w		ftrapcc_trap		# greater or less than?
   17152 ftrapcc_gl_no:
   17153 	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
   17154 	beq.b		ftrapcc_gl_done		# no; go finish
   17155 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
   17156 	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
   17157 	bne.w		ftrapcc_bsun		# yes
   17158 ftrapcc_gl_done:
   17159 	rts					# no; do nothing
   17160 
   17161 #
   17162 # not (greater or less than):
   17163 #
   17164 #	NANvZ
   17165 #
   17166 ftrapcc_ngl:
   17167 	fbngl.w		ftrapcc_ngl_yes		# not (greater or less than)?
   17168 ftrapcc_ngl_no:
   17169 	rts					# do nothing
   17170 ftrapcc_ngl_yes:
   17171 	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
   17172 	beq.w		ftrapcc_trap		# no; go take trap
   17173 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
   17174 	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
   17175 	bne.w		ftrapcc_bsun		# yes
   17176 	bra.w		ftrapcc_trap		# no; go take trap
   17177 
   17178 #
   17179 # greater, less, or equal:
   17180 #	___
   17181 #	NAN
   17182 #
   17183 ftrapcc_gle:
   17184 	fbgle.w		ftrapcc_trap		# greater, less, or equal?
   17185 ftrapcc_gle_no:
   17186 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
   17187 	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
   17188 	bne.w		ftrapcc_bsun		# yes
   17189 	rts					# no; do nothing
   17190 
   17191 #
   17192 # not (greater, less, or equal):
   17193 #
   17194 #	NAN
   17195 #
   17196 ftrapcc_ngle:
   17197 	fbngle.w	ftrapcc_ngle_yes	# not (greater, less, or equal)?
   17198 ftrapcc_ngle_no:
   17199 	rts					# do nothing
   17200 ftrapcc_ngle_yes:
   17201 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
   17202 	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
   17203 	bne.w		ftrapcc_bsun		# yes
   17204 	bra.w		ftrapcc_trap		# no; go take trap
   17205 
   17206 #########################################################################
   17207 #									#
   17208 # Miscellaneous tests							#
   17209 #									#
   17210 # For the IEEE aware tests, we only have to set the result based on the	#
   17211 # floating point condition codes. The BSUN exception will not be	#
   17212 # set for any of these tests.						#
   17213 #									#
   17214 #########################################################################
   17215 
   17216 #
   17217 # false:
   17218 #
   17219 #	False
   17220 #
   17221 ftrapcc_f:
   17222 	rts					# do nothing
   17223 
   17224 #
   17225 # true:
   17226 #
   17227 #	True
   17228 #
   17229 ftrapcc_t:
   17230 	bra.w		ftrapcc_trap		# go take trap
   17231 
   17232 #
   17233 # signalling false:
   17234 #
   17235 #	False
   17236 #
   17237 ftrapcc_sf:
   17238 	btst		&nan_bit, FPSR_CC(%a6) 	# set BSUN exc bit
   17239 	beq.b		ftrapcc_sf_done		# no; go finish
   17240 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
   17241 	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
   17242 	bne.w		ftrapcc_bsun		# yes
   17243 ftrapcc_sf_done:
   17244 	rts					# no; do nothing
   17245 
   17246 #
   17247 # signalling true:
   17248 #
   17249 #	True
   17250 #
   17251 ftrapcc_st:
   17252 	btst		&nan_bit, FPSR_CC(%a6) 	# set BSUN exc bit
   17253 	beq.w		ftrapcc_trap		# no; go take trap
   17254 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
   17255 	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
   17256 	bne.w		ftrapcc_bsun		# yes
   17257 	bra.w		ftrapcc_trap		# no; go take trap
   17258 
   17259 #
   17260 # signalling equal:
   17261 #
   17262 #	Z
   17263 #
   17264 ftrapcc_seq:
   17265 	fbseq.w		ftrapcc_seq_yes		# signalling equal?
   17266 ftrapcc_seq_no:
   17267 	btst		&nan_bit, FPSR_CC(%a6) 	# set BSUN exc bit
   17268 	beq.w		ftrapcc_seq_done	# no; go finish
   17269 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
   17270 	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
   17271 	bne.w		ftrapcc_bsun		# yes
   17272 ftrapcc_seq_done:
   17273 	rts					# no; do nothing
   17274 ftrapcc_seq_yes:
   17275 	btst		&nan_bit, FPSR_CC(%a6) 	# set BSUN exc bit
   17276 	beq.w		ftrapcc_trap		# no; go take trap
   17277 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
   17278 	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
   17279 	bne.w		ftrapcc_bsun		# yes
   17280 	bra.w		ftrapcc_trap		# no; go take trap
   17281 
   17282 #
   17283 # signalling not equal:
   17284 #	_
   17285 #	Z
   17286 #
   17287 ftrapcc_sneq:
   17288 	fbsneq.w	ftrapcc_sneq_yes	# signalling equal?
   17289 ftrapcc_sneq_no:
   17290 	btst		&nan_bit, FPSR_CC(%a6) 	# set BSUN exc bit
   17291 	beq.w		ftrapcc_sneq_no_done	# no; go finish
   17292 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
   17293 	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
   17294 	bne.w		ftrapcc_bsun		# yes
   17295 ftrapcc_sneq_no_done:
   17296 	rts					# do nothing
   17297 ftrapcc_sneq_yes:
   17298 	btst		&nan_bit, FPSR_CC(%a6) 	# set BSUN exc bit
   17299 	beq.w		ftrapcc_trap		# no; go take trap
   17300 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
   17301 	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
   17302 	bne.w		ftrapcc_bsun		# yes
   17303 	bra.w		ftrapcc_trap		# no; go take trap
   17304 
   17305 #########################################################################
   17306 #									#
   17307 # IEEE Aware tests							#
   17308 #									#
   17309 # For the IEEE aware tests, we only have to set the result based on the	#
   17310 # floating point condition codes. The BSUN exception will not be	#
   17311 # set for any of these tests.						#
   17312 #									#
   17313 #########################################################################
   17314 
   17315 #
   17316 # ordered greater than:
   17317 #	_______
   17318 #	NANvZvN
   17319 #
   17320 ftrapcc_ogt:
   17321 	fbogt.w		ftrapcc_trap		# ordered greater than?
   17322 ftrapcc_ogt_no:
   17323 	rts					# do nothing
   17324 
   17325 #
   17326 # unordered or less or equal:
   17327 #	_______
   17328 #	NANvZvN
   17329 #
   17330 ftrapcc_ule:
   17331 	fbule.w		ftrapcc_trap		# unordered or less or equal?
   17332 ftrapcc_ule_no:
   17333 	rts					# do nothing
   17334 
   17335 #
   17336 # ordered greater than or equal:
   17337 #	   _____
   17338 #	Zv(NANvN)
   17339 #
   17340 ftrapcc_oge:
   17341 	fboge.w		ftrapcc_trap		# ordered greater than or equal?
   17342 ftrapcc_oge_no:
   17343 	rts					# do nothing
   17344 
   17345 #
   17346 # unordered or less than:
   17347 #	       _
   17348 #	NANv(N^Z)
   17349 #
   17350 ftrapcc_ult:
   17351 	fbult.w		ftrapcc_trap		# unordered or less than?
   17352 ftrapcc_ult_no:
   17353 	rts					# do nothing
   17354 
   17355 #
   17356 # ordered less than:
   17357 #	   _____
   17358 #	N^(NANvZ)
   17359 #
   17360 ftrapcc_olt:
   17361 	fbolt.w		ftrapcc_trap		# ordered less than?
   17362 ftrapcc_olt_no:
   17363 	rts					# do nothing
   17364 
   17365 #
   17366 # unordered or greater or equal:
   17367 #
   17368 #	NANvZvN
   17369 #
   17370 ftrapcc_uge:
   17371 	fbuge.w		ftrapcc_trap		# unordered or greater than?
   17372 ftrapcc_uge_no:
   17373 	rts					# do nothing
   17374 
   17375 #
   17376 # ordered less than or equal:
   17377 #	     ___
   17378 #	Zv(N^NAN)
   17379 #
   17380 ftrapcc_ole:
   17381 	fbole.w		ftrapcc_trap		# ordered greater or less than?
   17382 ftrapcc_ole_no:
   17383 	rts					# do nothing
   17384 
   17385 #
   17386 # unordered or greater than:
   17387 #	     ___
   17388 #	NANv(NvZ)
   17389 #
   17390 ftrapcc_ugt:
   17391 	fbugt.w		ftrapcc_trap		# unordered or greater than?
   17392 ftrapcc_ugt_no:
   17393 	rts					# do nothing
   17394 
   17395 #
   17396 # ordered greater or less than:
   17397 #	_____
   17398 #	NANvZ
   17399 #
   17400 ftrapcc_ogl:
   17401 	fbogl.w		ftrapcc_trap		# ordered greater or less than?
   17402 ftrapcc_ogl_no:
   17403 	rts					# do nothing
   17404 
   17405 #
   17406 # unordered or equal:
   17407 #
   17408 #	NANvZ
   17409 #
   17410 ftrapcc_ueq:
   17411 	fbueq.w		ftrapcc_trap		# unordered or equal?
   17412 ftrapcc_ueq_no:
   17413 	rts					# do nothing
   17414 
   17415 #
   17416 # ordered:
   17417 #	___
   17418 #	NAN
   17419 #
   17420 ftrapcc_or:
   17421 	fbor.w		ftrapcc_trap		# ordered?
   17422 ftrapcc_or_no:
   17423 	rts					# do nothing
   17424 
   17425 #
   17426 # unordered:
   17427 #
   17428 #	NAN
   17429 #
   17430 ftrapcc_un:
   17431 	fbun.w		ftrapcc_trap		# unordered?
   17432 ftrapcc_un_no:
   17433 	rts					# do nothing
   17434 
   17435 #######################################################################
   17436 
   17437 # the bsun exception bit was not set.
   17438 # we will need to jump to the ftrapcc vector. the stack frame
   17439 # is the same size as that of the fp unimp instruction. the
   17440 # only difference is that the <ea> field should hold the PC
   17441 # of the ftrapcc instruction and the vector offset field
   17442 # should denote the ftrapcc trap.
   17443 ftrapcc_trap:
   17444 	mov.b		&ftrapcc_flg,SPCOND_FLG(%a6)
   17445 	rts
   17446 
   17447 # the emulation routine set bsun and BSUN was enabled. have to
   17448 # fix stack and jump to the bsun handler.
   17449 # let the caller of this routine shift the stack frame up to
   17450 # eliminate the effective address field.
   17451 ftrapcc_bsun:
   17452 	mov.b		&fbsun_flg,SPCOND_FLG(%a6)
   17453 	rts
   17454 
   17455 #########################################################################
   17456 # fscc(): routine to emulate the fscc instruction			#
   17457 #									#
   17458 # XDEF **************************************************************** #
   17459 #	_fscc()								#
   17460 #									#
   17461 # XREF **************************************************************** #
   17462 #	store_dreg_b() - store result to data register file		#
   17463 #	dec_areg() - decrement an areg for -(an) mode			#
   17464 #	inc_areg() - increment an areg for (an)+ mode			#
   17465 #	_dmem_write_byte() - store result to memory			#
   17466 #									#
   17467 # INPUT ***************************************************************	#
   17468 #	none								#
   17469 #									#
   17470 # OUTPUT ************************************************************** #
   17471 #	none								#
   17472 #									#
   17473 # ALGORITHM ***********************************************************	#
   17474 #	This routine checks which conditional predicate is specified by	#
   17475 # the stacked fscc instruction opcode and then branches to a routine	#
   17476 # for that predicate. The corresponding fbcc instruction is then used	#
   17477 # to see whether the condition (specified by the stacked FPSR) is true	#
   17478 # or false.								#
   17479 #	If a BSUN exception should be indicated, the BSUN and ABSUN	#
   17480 # bits are set in the stacked FPSR. If the BSUN exception is enabled,	#
   17481 # the fbsun_flg is set in the SPCOND_FLG location on the stack. If an 	#
   17482 # enabled BSUN should not be flagged and the predicate is true, then	#
   17483 # the result is stored to the data register file or memory		#
   17484 #									#
   17485 #########################################################################
   17486 
   17487 	global		_fscc
   17488 _fscc:
   17489 	mov.w		EXC_CMDREG(%a6),%d0	# fetch predicate
   17490 
   17491 	clr.l		%d1			# clear scratch reg
   17492 	mov.b		FPSR_CC(%a6),%d1	# fetch fp ccodes
   17493 	ror.l		&0x8,%d1		# rotate to top byte
   17494 	fmov.l		%d1,%fpsr		# insert into FPSR
   17495 
   17496 	mov.w		(tbl_fscc.b,%pc,%d0.w*2),%d1 # load table
   17497 	jmp		(tbl_fscc.b,%pc,%d1.w) 	# jump to fscc routine
   17498 
   17499 tbl_fscc:
   17500 	short		fscc_f		-	tbl_fscc	# 00
   17501 	short		fscc_eq		-	tbl_fscc	# 01
   17502 	short		fscc_ogt	-	tbl_fscc	# 02
   17503 	short		fscc_oge	-	tbl_fscc	# 03
   17504 	short		fscc_olt	-	tbl_fscc	# 04
   17505 	short		fscc_ole	-	tbl_fscc	# 05
   17506 	short		fscc_ogl	-	tbl_fscc	# 06
   17507 	short		fscc_or		-	tbl_fscc	# 07
   17508 	short		fscc_un		-	tbl_fscc	# 08
   17509 	short		fscc_ueq	-	tbl_fscc	# 09
   17510 	short		fscc_ugt	-	tbl_fscc	# 10
   17511 	short		fscc_uge	-	tbl_fscc	# 11
   17512 	short		fscc_ult	-	tbl_fscc	# 12
   17513 	short		fscc_ule	-	tbl_fscc	# 13
   17514 	short		fscc_neq	-	tbl_fscc	# 14
   17515 	short		fscc_t		-	tbl_fscc	# 15
   17516 	short		fscc_sf		-	tbl_fscc	# 16
   17517 	short		fscc_seq	-	tbl_fscc	# 17
   17518 	short		fscc_gt		-	tbl_fscc	# 18
   17519 	short		fscc_ge		-	tbl_fscc	# 19
   17520 	short		fscc_lt		-	tbl_fscc	# 20
   17521 	short		fscc_le		-	tbl_fscc	# 21
   17522 	short		fscc_gl		-	tbl_fscc	# 22
   17523 	short		fscc_gle	-	tbl_fscc	# 23
   17524 	short		fscc_ngle	-	tbl_fscc	# 24
   17525 	short		fscc_ngl	-	tbl_fscc	# 25
   17526 	short		fscc_nle	-	tbl_fscc	# 26
   17527 	short		fscc_nlt	-	tbl_fscc	# 27
   17528 	short		fscc_nge	-	tbl_fscc	# 28
   17529 	short		fscc_ngt	-	tbl_fscc	# 29
   17530 	short		fscc_sneq	-	tbl_fscc	# 30
   17531 	short		fscc_st		-	tbl_fscc	# 31
   17532 
   17533 #########################################################################
   17534 #									#
   17535 # IEEE Nonaware tests							#
   17536 #									#
   17537 # For the IEEE nonaware tests, we set the result based on the		#
   17538 # floating point condition codes. In addition, we check to see		#
   17539 # if the NAN bit is set, in which case BSUN and AIOP will be set.	#
   17540 #									#
   17541 # The cases EQ and NE are shared by the Aware and Nonaware groups	#
   17542 # and are incapable of setting the BSUN exception bit.			#
   17543 #									#
   17544 # Typically, only one of the two possible branch directions could	#
   17545 # have the NAN bit set.							#
   17546 #									#
   17547 #########################################################################
   17548 
   17549 #
   17550 # equal:
   17551 #
   17552 #	Z
   17553 #
   17554 fscc_eq:
   17555 	fbeq.w		fscc_eq_yes		# equal?
   17556 fscc_eq_no:
   17557 	clr.b		%d0			# set false
   17558 	bra.w		fscc_done		# go finish
   17559 fscc_eq_yes:
   17560 	st		%d0			# set true
   17561 	bra.w		fscc_done		# go finish
   17562 
   17563 #
   17564 # not equal:
   17565 #	_
   17566 #	Z
   17567 #
   17568 fscc_neq:
   17569 	fbneq.w		fscc_neq_yes		# not equal?
   17570 fscc_neq_no:
   17571 	clr.b		%d0			# set false
   17572 	bra.w		fscc_done		# go finish
   17573 fscc_neq_yes:
   17574 	st		%d0			# set true
   17575 	bra.w		fscc_done		# go finish
   17576 
   17577 #
   17578 # greater than:
   17579 #	_______
   17580 #	NANvZvN
   17581 #
   17582 fscc_gt:
   17583 	fbgt.w		fscc_gt_yes		# greater than?
   17584 fscc_gt_no:
   17585 	clr.b		%d0			# set false
   17586 	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
   17587 	beq.w		fscc_done		# no;go finish
   17588 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
   17589 	bra.w		fscc_chk_bsun		# go finish
   17590 fscc_gt_yes:
   17591 	st		%d0			# set true
   17592 	bra.w		fscc_done		# go finish
   17593 
   17594 #
   17595 # not greater than:
   17596 #
   17597 #	NANvZvN
   17598 #
   17599 fscc_ngt:
   17600 	fbngt.w		fscc_ngt_yes		# not greater than?
   17601 fscc_ngt_no:
   17602 	clr.b		%d0			# set false
   17603 	bra.w		fscc_done		# go finish
   17604 fscc_ngt_yes:
   17605 	st		%d0			# set true
   17606 	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
   17607 	beq.w		fscc_done		# no;go finish
   17608 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
   17609 	bra.w		fscc_chk_bsun		# go finish
   17610 
   17611 #
   17612 # greater than or equal:
   17613 #	   _____
   17614 #	Zv(NANvN)
   17615 #
   17616 fscc_ge:
   17617 	fbge.w		fscc_ge_yes		# greater than or equal?
   17618 fscc_ge_no:
   17619 	clr.b		%d0			# set false
   17620 	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
   17621 	beq.w		fscc_done		# no;go finish
   17622 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
   17623 	bra.w		fscc_chk_bsun		# go finish
   17624 fscc_ge_yes:
   17625 	st		%d0			# set true
   17626 	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
   17627 	beq.w		fscc_done		# no;go finish
   17628 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
   17629 	bra.w		fscc_chk_bsun		# go finish
   17630 
   17631 #
   17632 # not (greater than or equal):
   17633 #	       _
   17634 #	NANv(N^Z)
   17635 #
   17636 fscc_nge:
   17637 	fbnge.w		fscc_nge_yes		# not (greater than or equal)?
   17638 fscc_nge_no:
   17639 	clr.b		%d0			# set false
   17640 	bra.w		fscc_done		# go finish
   17641 fscc_nge_yes:
   17642 	st		%d0			# set true
   17643 	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
   17644 	beq.w		fscc_done		# no;go finish
   17645 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
   17646 	bra.w		fscc_chk_bsun		# go finish
   17647 
   17648 #
   17649 # less than:
   17650 #	   _____
   17651 #	N^(NANvZ)
   17652 #
   17653 fscc_lt:
   17654 	fblt.w		fscc_lt_yes		# less than?
   17655 fscc_lt_no:
   17656 	clr.b		%d0			# set false
   17657 	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
   17658 	beq.w		fscc_done		# no;go finish
   17659 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
   17660 	bra.w		fscc_chk_bsun		# go finish
   17661 fscc_lt_yes:
   17662 	st		%d0			# set true
   17663 	bra.w		fscc_done		# go finish
   17664 
   17665 #
   17666 # not less than:
   17667 #	       _
   17668 #	NANv(ZvN)
   17669 #
   17670 fscc_nlt:
   17671 	fbnlt.w		fscc_nlt_yes		# not less than?
   17672 fscc_nlt_no:
   17673 	clr.b		%d0			# set false
   17674 	bra.w		fscc_done		# go finish
   17675 fscc_nlt_yes:
   17676 	st		%d0			# set true
   17677 	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
   17678 	beq.w		fscc_done		# no;go finish
   17679 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
   17680 	bra.w		fscc_chk_bsun		# go finish
   17681 
   17682 #
   17683 # less than or equal:
   17684 #	     ___
   17685 #	Zv(N^NAN)
   17686 #
   17687 fscc_le:
   17688 	fble.w		fscc_le_yes		# less than or equal?
   17689 fscc_le_no:
   17690 	clr.b		%d0			# set false
   17691 	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
   17692 	beq.w		fscc_done		# no;go finish
   17693 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
   17694 	bra.w		fscc_chk_bsun		# go finish
   17695 fscc_le_yes:
   17696 	st		%d0			# set true
   17697 	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
   17698 	beq.w		fscc_done		# no;go finish
   17699 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
   17700 	bra.w		fscc_chk_bsun		# go finish
   17701 
   17702 #
   17703 # not (less than or equal):
   17704 #	     ___
   17705 #	NANv(NvZ)
   17706 #
   17707 fscc_nle:
   17708 	fbnle.w		fscc_nle_yes		# not (less than or equal)?
   17709 fscc_nle_no:
   17710 	clr.b		%d0			# set false
   17711 	bra.w		fscc_done		# go finish
   17712 fscc_nle_yes:
   17713 	st		%d0			# set true
   17714 	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
   17715 	beq.w		fscc_done		# no;go finish
   17716 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
   17717 	bra.w		fscc_chk_bsun		# go finish
   17718 
   17719 #
   17720 # greater or less than:
   17721 #	_____
   17722 #	NANvZ
   17723 #
   17724 fscc_gl:
   17725 	fbgl.w		fscc_gl_yes		# greater or less than?
   17726 fscc_gl_no:
   17727 	clr.b		%d0			# set false
   17728 	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
   17729 	beq.w		fscc_done		# no;go finish
   17730 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
   17731 	bra.w		fscc_chk_bsun		# go finish
   17732 fscc_gl_yes:
   17733 	st		%d0			# set true
   17734 	bra.w		fscc_done		# go finish
   17735 
   17736 #
   17737 # not (greater or less than):
   17738 #
   17739 #	NANvZ
   17740 #
   17741 fscc_ngl:
   17742 	fbngl.w		fscc_ngl_yes		# not (greater or less than)?
   17743 fscc_ngl_no:
   17744 	clr.b		%d0			# set false
   17745 	bra.w		fscc_done		# go finish
   17746 fscc_ngl_yes:
   17747 	st		%d0			# set true
   17748 	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
   17749 	beq.w		fscc_done		# no;go finish
   17750 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
   17751 	bra.w		fscc_chk_bsun		# go finish
   17752 
   17753 #
   17754 # greater, less, or equal:
   17755 #	___
   17756 #	NAN
   17757 #
   17758 fscc_gle:
   17759 	fbgle.w		fscc_gle_yes		# greater, less, or equal?
   17760 fscc_gle_no:
   17761 	clr.b		%d0			# set false
   17762 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
   17763 	bra.w		fscc_chk_bsun		# go finish
   17764 fscc_gle_yes:
   17765 	st		%d0			# set true
   17766 	bra.w		fscc_done		# go finish
   17767 
   17768 #
   17769 # not (greater, less, or equal):
   17770 #
   17771 #	NAN
   17772 #
   17773 fscc_ngle:
   17774 	fbngle.w		fscc_ngle_yes	# not (greater, less, or equal)?
   17775 fscc_ngle_no:
   17776 	clr.b		%d0			# set false
   17777 	bra.w		fscc_done		# go finish
   17778 fscc_ngle_yes:
   17779 	st		%d0			# set true
   17780 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
   17781 	bra.w		fscc_chk_bsun		# go finish
   17782 
   17783 #########################################################################
   17784 #									#
   17785 # Miscellaneous tests							#
   17786 #									#
   17787 # For the IEEE aware tests, we only have to set the result based on the	#
   17788 # floating point condition codes. The BSUN exception will not be	#
   17789 # set for any of these tests.						#
   17790 #									#
   17791 #########################################################################
   17792 
   17793 #
   17794 # false:
   17795 #
   17796 #	False
   17797 #
   17798 fscc_f:
   17799 	clr.b		%d0			# set false
   17800 	bra.w		fscc_done		# go finish
   17801 
   17802 #
   17803 # true:
   17804 #
   17805 #	True
   17806 #
   17807 fscc_t:
   17808 	st		%d0			# set true
   17809 	bra.w		fscc_done		# go finish
   17810 
   17811 #
   17812 # signalling false:
   17813 #
   17814 #	False
   17815 #
   17816 fscc_sf:
   17817 	clr.b		%d0			# set false
   17818 	btst		&nan_bit, FPSR_CC(%a6) 	# set BSUN exc bit
   17819 	beq.w		fscc_done		# no;go finish
   17820 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
   17821 	bra.w		fscc_chk_bsun		# go finish
   17822 
   17823 #
   17824 # signalling true:
   17825 #
   17826 #	True
   17827 #
   17828 fscc_st:
   17829 	st		%d0			# set false
   17830 	btst		&nan_bit, FPSR_CC(%a6) 	# set BSUN exc bit
   17831 	beq.w		fscc_done		# no;go finish
   17832 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
   17833 	bra.w		fscc_chk_bsun		# go finish
   17834 
   17835 #
   17836 # signalling equal:
   17837 #
   17838 #	Z
   17839 #
   17840 fscc_seq:
   17841 	fbseq.w		fscc_seq_yes		# signalling equal?
   17842 fscc_seq_no:
   17843 	clr.b		%d0			# set false
   17844 	btst		&nan_bit, FPSR_CC(%a6) 	# set BSUN exc bit
   17845 	beq.w		fscc_done		# no;go finish
   17846 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
   17847 	bra.w		fscc_chk_bsun		# go finish
   17848 fscc_seq_yes:
   17849 	st		%d0			# set true
   17850 	btst		&nan_bit, FPSR_CC(%a6) 	# set BSUN exc bit
   17851 	beq.w		fscc_done		# no;go finish
   17852 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
   17853 	bra.w		fscc_chk_bsun		# go finish
   17854 
   17855 #
   17856 # signalling not equal:
   17857 #	_
   17858 #	Z
   17859 #
   17860 fscc_sneq:
   17861 	fbsneq.w	fscc_sneq_yes		# signalling equal?
   17862 fscc_sneq_no:
   17863 	clr.b		%d0			# set false
   17864 	btst		&nan_bit, FPSR_CC(%a6) 	# set BSUN exc bit
   17865 	beq.w		fscc_done		# no;go finish
   17866 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
   17867 	bra.w		fscc_chk_bsun		# go finish
   17868 fscc_sneq_yes:
   17869 	st		%d0			# set true
   17870 	btst		&nan_bit, FPSR_CC(%a6) 	# set BSUN exc bit
   17871 	beq.w		fscc_done		# no;go finish
   17872 	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
   17873 	bra.w		fscc_chk_bsun		# go finish
   17874 
   17875 #########################################################################
   17876 #									#
   17877 # IEEE Aware tests							#
   17878 #									#
   17879 # For the IEEE aware tests, we only have to set the result based on the	#
   17880 # floating point condition codes. The BSUN exception will not be	#
   17881 # set for any of these tests.						#
   17882 #									#
   17883 #########################################################################
   17884 
   17885 #
   17886 # ordered greater than:
   17887 #	_______
   17888 #	NANvZvN
   17889 #
   17890 fscc_ogt:
   17891 	fbogt.w		fscc_ogt_yes		# ordered greater than?
   17892 fscc_ogt_no:
   17893 	clr.b		%d0			# set false
   17894 	bra.w		fscc_done		# go finish
   17895 fscc_ogt_yes:
   17896 	st		%d0			# set true
   17897 	bra.w		fscc_done		# go finish
   17898 
   17899 #
   17900 # unordered or less or equal:
   17901 #	_______
   17902 #	NANvZvN
   17903 #
   17904 fscc_ule:
   17905 	fbule.w		fscc_ule_yes		# unordered or less or equal?
   17906 fscc_ule_no:
   17907 	clr.b		%d0			# set false
   17908 	bra.w		fscc_done		# go finish
   17909 fscc_ule_yes:
   17910 	st		%d0			# set true
   17911 	bra.w		fscc_done		# go finish
   17912 
   17913 #
   17914 # ordered greater than or equal:
   17915 #	   _____
   17916 #	Zv(NANvN)
   17917 #
   17918 fscc_oge:
   17919 	fboge.w		fscc_oge_yes		# ordered greater than or equal?
   17920 fscc_oge_no:
   17921 	clr.b		%d0			# set false
   17922 	bra.w		fscc_done		# go finish
   17923 fscc_oge_yes:
   17924 	st		%d0			# set true
   17925 	bra.w		fscc_done		# go finish
   17926 
   17927 #
   17928 # unordered or less than:
   17929 #	       _
   17930 #	NANv(N^Z)
   17931 #
   17932 fscc_ult:
   17933 	fbult.w		fscc_ult_yes		# unordered or less than?
   17934 fscc_ult_no:
   17935 	clr.b		%d0			# set false
   17936 	bra.w		fscc_done		# go finish
   17937 fscc_ult_yes:
   17938 	st		%d0			# set true
   17939 	bra.w		fscc_done		# go finish
   17940 
   17941 #
   17942 # ordered less than:
   17943 #	   _____
   17944 #	N^(NANvZ)
   17945 #
   17946 fscc_olt:
   17947 	fbolt.w		fscc_olt_yes		# ordered less than?
   17948 fscc_olt_no:
   17949 	clr.b		%d0			# set false
   17950 	bra.w		fscc_done		# go finish
   17951 fscc_olt_yes:
   17952 	st		%d0			# set true
   17953 	bra.w		fscc_done		# go finish
   17954 
   17955 #
   17956 # unordered or greater or equal:
   17957 #
   17958 #	NANvZvN
   17959 #
   17960 fscc_uge:
   17961 	fbuge.w		fscc_uge_yes		# unordered or greater than?
   17962 fscc_uge_no:
   17963 	clr.b		%d0			# set false
   17964 	bra.w		fscc_done		# go finish
   17965 fscc_uge_yes:
   17966 	st		%d0			# set true
   17967 	bra.w		fscc_done		# go finish
   17968 
   17969 #
   17970 # ordered less than or equal:
   17971 #	     ___
   17972 #	Zv(N^NAN)
   17973 #
   17974 fscc_ole:
   17975 	fbole.w		fscc_ole_yes		# ordered greater or less than?
   17976 fscc_ole_no:
   17977 	clr.b		%d0			# set false
   17978 	bra.w		fscc_done		# go finish
   17979 fscc_ole_yes:
   17980 	st		%d0			# set true
   17981 	bra.w		fscc_done		# go finish
   17982 
   17983 #
   17984 # unordered or greater than:
   17985 #	     ___
   17986 #	NANv(NvZ)
   17987 #
   17988 fscc_ugt:
   17989 	fbugt.w		fscc_ugt_yes		# unordered or greater than?
   17990 fscc_ugt_no:
   17991 	clr.b		%d0			# set false
   17992 	bra.w		fscc_done		# go finish
   17993 fscc_ugt_yes:
   17994 	st		%d0			# set true
   17995 	bra.w		fscc_done		# go finish
   17996 
   17997 #
   17998 # ordered greater or less than:
   17999 #	_____
   18000 #	NANvZ
   18001 #
   18002 fscc_ogl:
   18003 	fbogl.w		fscc_ogl_yes		# ordered greater or less than?
   18004 fscc_ogl_no:
   18005 	clr.b		%d0			# set false
   18006 	bra.w		fscc_done		# go finish
   18007 fscc_ogl_yes:
   18008 	st		%d0			# set true
   18009 	bra.w		fscc_done		# go finish
   18010 
   18011 #
   18012 # unordered or equal:
   18013 #
   18014 #	NANvZ
   18015 #
   18016 fscc_ueq:
   18017 	fbueq.w		fscc_ueq_yes		# unordered or equal?
   18018 fscc_ueq_no:
   18019 	clr.b		%d0			# set false
   18020 	bra.w		fscc_done		# go finish
   18021 fscc_ueq_yes:
   18022 	st		%d0			# set true
   18023 	bra.w		fscc_done		# go finish
   18024 
   18025 #
   18026 # ordered:
   18027 #	___
   18028 #	NAN
   18029 #
   18030 fscc_or:
   18031 	fbor.w		fscc_or_yes		# ordered?
   18032 fscc_or_no:
   18033 	clr.b		%d0			# set false
   18034 	bra.w		fscc_done		# go finish
   18035 fscc_or_yes:
   18036 	st		%d0			# set true
   18037 	bra.w		fscc_done		# go finish
   18038 
   18039 #
   18040 # unordered:
   18041 #
   18042 #	NAN
   18043 #
   18044 fscc_un:
   18045 	fbun.w		fscc_un_yes		# unordered?
   18046 fscc_un_no:
   18047 	clr.b		%d0			# set false
   18048 	bra.w		fscc_done		# go finish
   18049 fscc_un_yes:
   18050 	st		%d0			# set true
   18051 	bra.w		fscc_done		# go finish
   18052 
   18053 #######################################################################
   18054 
   18055 #
   18056 # the bsun exception bit was set. now, check to see is BSUN
   18057 # is enabled. if so, don't store result and correct stack frame
   18058 # for a bsun exception.
   18059 #
   18060 fscc_chk_bsun:
   18061 	btst		&bsun_bit,FPCR_ENABLE(%a6) # was BSUN set?
   18062 	bne.w		fscc_bsun
   18063 
   18064 #
   18065 # the bsun exception bit was not set.
   18066 # the result has been selected.
   18067 # now, check to see if the result is to be stored in the data register
   18068 # file or in memory.
   18069 #
   18070 fscc_done:
   18071 	mov.l		%d0,%a0			# save result for a moment
   18072 
   18073 	mov.b		1+EXC_OPWORD(%a6),%d1	# fetch lo opword
   18074 	mov.l		%d1,%d0			# make a copy
   18075 	andi.b		&0x38,%d1		# extract src mode
   18076 
   18077 	bne.b		fscc_mem_op		# it's a memory operation
   18078 
   18079 	mov.l		%d0,%d1
   18080 	andi.w		&0x7,%d1		# pass index in d1
   18081 	mov.l		%a0,%d0			# pass result in d0
   18082 	bsr.l		store_dreg_b		# save result in regfile
   18083 	rts
   18084 
   18085 #
   18086 # the stacked <ea> is correct with the exception of:
   18087 # 	-> Dn : <ea> is garbage
   18088 #
   18089 # if the addressing mode is post-increment or pre-decrement,
   18090 # then the address registers have not been updated.
   18091 #
   18092 fscc_mem_op:
   18093 	cmpi.b		%d1,&0x18		# is <ea> (An)+ ?
   18094 	beq.b		fscc_mem_inc		# yes
   18095 	cmpi.b		%d1,&0x20		# is <ea> -(An) ?
   18096 	beq.b		fscc_mem_dec		# yes
   18097 
   18098 	mov.l		%a0,%d0			# pass result in d0
   18099 	mov.l		EXC_EA(%a6),%a0		# fetch <ea>
   18100 	bsr.l		_dmem_write_byte	# write result byte
   18101 
   18102 	tst.l		%d1			# did dstore fail?
   18103 	bne.w		fscc_err		# yes
   18104 
   18105 	rts
   18106 
   18107 # addresing mode is post-increment. write the result byte. if the write
   18108 # fails then don't update the address register. if write passes then
   18109 # call inc_areg() to update the address register.
   18110 fscc_mem_inc:
   18111 	mov.l		%a0,%d0			# pass result in d0
   18112 	mov.l		EXC_EA(%a6),%a0		# fetch <ea>
   18113 	bsr.l		_dmem_write_byte	# write result byte
   18114 
   18115 	tst.l		%d1			# did dstore fail?
   18116 	bne.w		fscc_err		# yes
   18117 
   18118 	mov.b		0x1+EXC_OPWORD(%a6),%d1	# fetch opword
   18119 	andi.w		&0x7,%d1		# pass index in d1
   18120 	movq.l		&0x1,%d0		# pass amt to inc by
   18121 	bsr.l		inc_areg		# increment address register
   18122 
   18123 	rts
   18124 
   18125 # addressing mode is pre-decrement. write the result byte. if the write
   18126 # fails then don't update the address register. if the write passes then
   18127 # call dec_areg() to update the address register.
   18128 fscc_mem_dec:
   18129 	mov.l		%a0,%d0			# pass result in d0
   18130 	mov.l		EXC_EA(%a6),%a0		# fetch <ea>
   18131 	bsr.l		_dmem_write_byte	# write result byte
   18132 
   18133 	tst.l		%d1			# did dstore fail?
   18134 	bne.w		fscc_err		# yes
   18135 
   18136 	mov.b		0x1+EXC_OPWORD(%a6),%d1	# fetch opword
   18137 	andi.w		&0x7,%d1		# pass index in d1
   18138 	movq.l		&0x1,%d0		# pass amt to dec by
   18139 	bsr.l		dec_areg		# decrement address register
   18140 
   18141 	rts
   18142 
   18143 # the emulation routine set bsun and BSUN was enabled. have to
   18144 # fix stack and jump to the bsun handler.
   18145 # let the caller of this routine shift the stack frame up to
   18146 # eliminate the effective address field.
   18147 fscc_bsun:
   18148 	mov.b		&fbsun_flg,SPCOND_FLG(%a6)
   18149 	rts
   18150 
   18151 # the byte write to memory has failed. pass the failing effective address
   18152 # and a FSLW to funimp_dacc().
   18153 fscc_err:
   18154 	mov.w		&0x00a1,EXC_VOFF(%a6)
   18155 	bra.l		facc_finish
   18156 
   18157 #########################################################################
   18158 # XDEF ****************************************************************	#
   18159 #	fmovm_dynamic(): emulate "fmovm" dynamic instruction		#
   18160 #									#
   18161 # XREF ****************************************************************	#
   18162 #	fetch_dreg() - fetch data register				#
   18163 #	{i,d,}mem_read() - fetch data from memory			#
   18164 #	_mem_write() - write data to memory				#
   18165 #	iea_iacc() - instruction memory access error occurred		#
   18166 #	iea_dacc() - data memory access error occurred			#
   18167 #	restore() - restore An index regs if access error occurred	#
   18168 #									#
   18169 # INPUT ***************************************************************	#
   18170 #	None								#
   18171 # 									#
   18172 # OUTPUT **************************************************************	#
   18173 #	If instr is "fmovm Dn,-(A7)" from supervisor mode,		#
   18174 #		d0 = size of dump					#
   18175 #		d1 = Dn							#
   18176 #	Else if instruction access error,				#
   18177 #		d0 = FSLW						#
   18178 #	Else if data access error,					#
   18179 #		d0 = FSLW						#
   18180 #		a0 = address of fault					#
   18181 #	Else								#
   18182 #		none.							#
   18183 #									#
   18184 # ALGORITHM ***********************************************************	#
   18185 #	The effective address must be calculated since this is entered	#
   18186 # from an "Unimplemented Effective Address" exception handler. So, we	#
   18187 # have our own fcalc_ea() routine here. If an access error is flagged	#
   18188 # by a _{i,d,}mem_read() call, we must exit through the special		#
   18189 # handler.								#
   18190 #	The data register is determined and its value loaded to get the	#
   18191 # string of FP registers affected. This value is used as an index into	#
   18192 # a lookup table such that we can determine the number of bytes		#
   18193 # involved. 								#
   18194 #	If the instruction is "fmovm.x <ea>,Dn", a _mem_read() is used	#
   18195 # to read in all FP values. Again, _mem_read() may fail and require a	#
   18196 # special exit. 							#
   18197 #	If the instruction is "fmovm.x DN,<ea>", a _mem_write() is used	#
   18198 # to write all FP values. _mem_write() may also fail.			#
   18199 # 	If the instruction is "fmovm.x DN,-(a7)" from supervisor mode,	#
   18200 # then we return the size of the dump and the string to the caller	#
   18201 # so that the move can occur outside of this routine. This special	#
   18202 # case is required so that moves to the system stack are handled	#
   18203 # correctly.								#
   18204 #									#
   18205 # DYNAMIC:								#
   18206 # 	fmovm.x	dn, <ea>						#
   18207 # 	fmovm.x	<ea>, dn						#
   18208 #									#
   18209 #	      <WORD 1>		      <WORD2>				#
   18210 #	1111 0010 00 |<ea>|	11@& 1000 0$$$ 0000			#
   18211 #					  				#
   18212 #	& = (0): predecrement addressing mode				#
   18213 #	    (1): postincrement or control addressing mode		#
   18214 #	@ = (0): move listed regs from memory to the FPU		#
   18215 #	    (1): move listed regs from the FPU to memory		#
   18216 #	$$$    : index of data register holding reg select mask		#
   18217 #									#
   18218 # NOTES:								#
   18219 #	If the data register holds a zero, then the			#
   18220 #	instruction is a nop.						#
   18221 #									#
   18222 #########################################################################
   18223 
   18224 	global		fmovm_dynamic
   18225 fmovm_dynamic:
   18226 
   18227 # extract the data register in which the bit string resides...
   18228 	mov.b		1+EXC_EXTWORD(%a6),%d1	# fetch extword
   18229 	andi.w		&0x70,%d1		# extract reg bits
   18230 	lsr.b		&0x4,%d1		# shift into lo bits
   18231 
   18232 # fetch the bit string into d0...
   18233 	bsr.l		fetch_dreg		# fetch reg string
   18234 
   18235 	andi.l		&0x000000ff,%d0		# keep only lo byte
   18236 
   18237 	mov.l		%d0,-(%sp)		# save strg
   18238 	mov.b		(tbl_fmovm_size.w,%pc,%d0),%d0
   18239 	mov.l		%d0,-(%sp)		# save size
   18240 	bsr.l		fmovm_calc_ea		# calculate <ea>
   18241 	mov.l		(%sp)+,%d0		# restore size
   18242 	mov.l		(%sp)+,%d1		# restore strg
   18243 
   18244 # if the bit string is a zero, then the operation is a no-op
   18245 # but, make sure that we've calculated ea and advanced the opword pointer
   18246 	beq.w		fmovm_data_done
   18247 
   18248 # separate move ins from move outs...
   18249 	btst		&0x5,EXC_EXTWORD(%a6)	# is it a move in or out?
   18250 	beq.w		fmovm_data_in		# it's a move out
   18251 
   18252 #############
   18253 # MOVE OUT: #
   18254 #############
   18255 fmovm_data_out:
   18256 	btst		&0x4,EXC_EXTWORD(%a6)	# control or predecrement?
   18257 	bne.w		fmovm_out_ctrl		# control
   18258 
   18259 ############################
   18260 fmovm_out_predec:
   18261 # for predecrement mode, the bit string is the opposite of both control
   18262 # operations and postincrement mode. (bit7 = FP7 ... bit0 = FP0)
   18263 # here, we convert it to be just like the others...
   18264 	mov.b		(tbl_fmovm_convert.w,%pc,%d1.w*1),%d1
   18265 
   18266 	btst		&0x5,EXC_SR(%a6)	# user or supervisor mode?
   18267 	beq.b		fmovm_out_ctrl		# user
   18268 
   18269 fmovm_out_predec_s:
   18270 	cmpi.b		SPCOND_FLG(%a6),&mda7_flg # is <ea> mode -(a7)?
   18271 	bne.b		fmovm_out_ctrl
   18272 
   18273 # the operation was unfortunately an: fmovm.x dn,-(sp)
   18274 # called from supervisor mode.
   18275 # we're also passing "size" and "strg" back to the calling routine
   18276 	rts
   18277 
   18278 ############################
   18279 fmovm_out_ctrl:
   18280 	mov.l		%a0,%a1			# move <ea> to a1
   18281 
   18282 	sub.l		%d0,%sp			# subtract size of dump
   18283 	lea		(%sp),%a0
   18284 
   18285 	tst.b		%d1			# should FP0 be moved?
   18286 	bpl.b		fmovm_out_ctrl_fp1	# no
   18287 
   18288 	mov.l		0x0+EXC_FP0(%a6),(%a0)+	# yes
   18289 	mov.l		0x4+EXC_FP0(%a6),(%a0)+
   18290 	mov.l		0x8+EXC_FP0(%a6),(%a0)+
   18291 
   18292 fmovm_out_ctrl_fp1:
   18293 	lsl.b		&0x1,%d1		# should FP1 be moved?
   18294 	bpl.b		fmovm_out_ctrl_fp2	# no
   18295 
   18296 	mov.l		0x0+EXC_FP1(%a6),(%a0)+	# yes
   18297 	mov.l		0x4+EXC_FP1(%a6),(%a0)+
   18298 	mov.l		0x8+EXC_FP1(%a6),(%a0)+
   18299 
   18300 fmovm_out_ctrl_fp2:
   18301 	lsl.b		&0x1,%d1		# should FP2 be moved?
   18302 	bpl.b		fmovm_out_ctrl_fp3	# no
   18303 
   18304 	fmovm.x		&0x20,(%a0)		# yes
   18305 	add.l		&0xc,%a0
   18306 
   18307 fmovm_out_ctrl_fp3:
   18308 	lsl.b		&0x1,%d1		# should FP3 be moved?
   18309 	bpl.b		fmovm_out_ctrl_fp4	# no
   18310 
   18311 	fmovm.x		&0x10,(%a0)		# yes
   18312 	add.l		&0xc,%a0
   18313 
   18314 fmovm_out_ctrl_fp4:
   18315 	lsl.b		&0x1,%d1		# should FP4 be moved?
   18316 	bpl.b		fmovm_out_ctrl_fp5	# no
   18317 
   18318 	fmovm.x		&0x08,(%a0)		# yes
   18319 	add.l		&0xc,%a0
   18320 
   18321 fmovm_out_ctrl_fp5:
   18322 	lsl.b		&0x1,%d1		# should FP5 be moved?
   18323 	bpl.b		fmovm_out_ctrl_fp6	# no
   18324 
   18325 	fmovm.x		&0x04,(%a0)		# yes
   18326 	add.l		&0xc,%a0
   18327 
   18328 fmovm_out_ctrl_fp6:
   18329 	lsl.b		&0x1,%d1		# should FP6 be moved?
   18330 	bpl.b		fmovm_out_ctrl_fp7	# no
   18331 
   18332 	fmovm.x		&0x02,(%a0)		# yes
   18333 	add.l		&0xc,%a0
   18334 
   18335 fmovm_out_ctrl_fp7:
   18336 	lsl.b		&0x1,%d1		# should FP7 be moved?
   18337 	bpl.b		fmovm_out_ctrl_done	# no
   18338 
   18339 	fmovm.x		&0x01,(%a0)		# yes
   18340 	add.l		&0xc,%a0
   18341 
   18342 fmovm_out_ctrl_done:
   18343 	mov.l		%a1,L_SCR1(%a6)
   18344 
   18345 	lea		(%sp),%a0		# pass: supervisor src
   18346 	mov.l		%d0,-(%sp)		# save size
   18347 	bsr.l		_dmem_write		# copy data to user mem
   18348 
   18349 	mov.l		(%sp)+,%d0
   18350 	add.l		%d0,%sp			# clear fpreg data from stack
   18351 
   18352 	tst.l		%d1			# did dstore err?
   18353 	bne.w		fmovm_out_err		# yes
   18354 
   18355 	rts
   18356 
   18357 ############
   18358 # MOVE IN: #
   18359 ############
   18360 fmovm_data_in:
   18361 	mov.l		%a0,L_SCR1(%a6)
   18362 
   18363 	sub.l		%d0,%sp			# make room for fpregs
   18364 	lea		(%sp),%a1
   18365 
   18366 	mov.l		%d1,-(%sp)		# save bit string for later
   18367 	mov.l		%d0,-(%sp)		# save # of bytes
   18368 
   18369 	bsr.l		_dmem_read		# copy data from user mem
   18370 
   18371 	mov.l		(%sp)+,%d0		# retrieve # of bytes
   18372 
   18373 	tst.l		%d1			# did dfetch fail?
   18374 	bne.w		fmovm_in_err		# yes
   18375 
   18376 	mov.l		(%sp)+,%d1		# load bit string
   18377 
   18378 	lea		(%sp),%a0		# addr of stack
   18379 
   18380 	tst.b		%d1			# should FP0 be moved?
   18381 	bpl.b		fmovm_data_in_fp1	# no
   18382 
   18383 	mov.l		(%a0)+,0x0+EXC_FP0(%a6)	# yes
   18384 	mov.l		(%a0)+,0x4+EXC_FP0(%a6)
   18385 	mov.l		(%a0)+,0x8+EXC_FP0(%a6)
   18386 
   18387 fmovm_data_in_fp1:
   18388 	lsl.b		&0x1,%d1		# should FP1 be moved?
   18389 	bpl.b		fmovm_data_in_fp2	# no
   18390 
   18391 	mov.l		(%a0)+,0x0+EXC_FP1(%a6)	# yes
   18392 	mov.l		(%a0)+,0x4+EXC_FP1(%a6)
   18393 	mov.l		(%a0)+,0x8+EXC_FP1(%a6)
   18394 
   18395 fmovm_data_in_fp2:
   18396 	lsl.b		&0x1,%d1		# should FP2 be moved?
   18397 	bpl.b		fmovm_data_in_fp3	# no
   18398 
   18399 	fmovm.x		(%a0)+,&0x20		# yes
   18400 
   18401 fmovm_data_in_fp3:
   18402 	lsl.b		&0x1,%d1		# should FP3 be moved?
   18403 	bpl.b		fmovm_data_in_fp4	# no
   18404 
   18405 	fmovm.x		(%a0)+,&0x10		# yes
   18406 
   18407 fmovm_data_in_fp4:
   18408 	lsl.b		&0x1,%d1		# should FP4 be moved?
   18409 	bpl.b		fmovm_data_in_fp5	# no
   18410 
   18411 	fmovm.x		(%a0)+,&0x08		# yes
   18412 
   18413 fmovm_data_in_fp5:
   18414 	lsl.b		&0x1,%d1		# should FP5 be moved?
   18415 	bpl.b		fmovm_data_in_fp6	# no
   18416 
   18417 	fmovm.x		(%a0)+,&0x04		# yes
   18418 
   18419 fmovm_data_in_fp6:
   18420 	lsl.b		&0x1,%d1		# should FP6 be moved?
   18421 	bpl.b		fmovm_data_in_fp7	# no
   18422 
   18423 	fmovm.x		(%a0)+,&0x02		# yes
   18424 
   18425 fmovm_data_in_fp7:
   18426 	lsl.b		&0x1,%d1		# should FP7 be moved?
   18427 	bpl.b		fmovm_data_in_done	# no
   18428 
   18429 	fmovm.x		(%a0)+,&0x01		# yes
   18430 
   18431 fmovm_data_in_done:
   18432 	add.l		%d0,%sp			# remove fpregs from stack
   18433 	rts
   18434 
   18435 #####################################
   18436 
   18437 fmovm_data_done:
   18438 	rts
   18439 
   18440 ##############################################################################
   18441 
   18442 #
   18443 # table indexed by the operation's bit string that gives the number
   18444 # of bytes that will be moved.
   18445 #
   18446 # number of bytes = (# of 1's in bit string) * 12(bytes/fpreg)
   18447 #
   18448 tbl_fmovm_size:
   18449 	byte	0x00,0x0c,0x0c,0x18,0x0c,0x18,0x18,0x24
   18450 	byte	0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
   18451 	byte	0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
   18452 	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
   18453 	byte	0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
   18454 	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
   18455 	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
   18456 	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
   18457 	byte	0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
   18458 	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
   18459 	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
   18460 	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
   18461 	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
   18462 	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
   18463 	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
   18464 	byte	0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
   18465 	byte	0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
   18466 	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
   18467 	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
   18468 	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
   18469 	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
   18470 	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
   18471 	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
   18472 	byte	0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
   18473 	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
   18474 	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
   18475 	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
   18476 	byte	0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
   18477 	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
   18478 	byte	0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
   18479 	byte	0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
   18480 	byte	0x3c,0x48,0x48,0x54,0x48,0x54,0x54,0x60
   18481 
   18482 #
   18483 # table to convert a pre-decrement bit string into a post-increment
   18484 # or control bit string.
   18485 # ex: 	0x00	==>	0x00
   18486 #	0x01	==>	0x80
   18487 #	0x02	==>	0x40
   18488 #		.
   18489 #		.
   18490 #	0xfd	==>	0xbf
   18491 #	0xfe	==>	0x7f
   18492 #	0xff	==>	0xff
   18493 #
   18494 tbl_fmovm_convert:
   18495 	byte	0x00,0x80,0x40,0xc0,0x20,0xa0,0x60,0xe0
   18496 	byte	0x10,0x90,0x50,0xd0,0x30,0xb0,0x70,0xf0
   18497 	byte	0x08,0x88,0x48,0xc8,0x28,0xa8,0x68,0xe8
   18498 	byte	0x18,0x98,0x58,0xd8,0x38,0xb8,0x78,0xf8
   18499 	byte	0x04,0x84,0x44,0xc4,0x24,0xa4,0x64,0xe4
   18500 	byte	0x14,0x94,0x54,0xd4,0x34,0xb4,0x74,0xf4
   18501 	byte	0x0c,0x8c,0x4c,0xcc,0x2c,0xac,0x6c,0xec
   18502 	byte	0x1c,0x9c,0x5c,0xdc,0x3c,0xbc,0x7c,0xfc
   18503 	byte	0x02,0x82,0x42,0xc2,0x22,0xa2,0x62,0xe2
   18504 	byte	0x12,0x92,0x52,0xd2,0x32,0xb2,0x72,0xf2
   18505 	byte	0x0a,0x8a,0x4a,0xca,0x2a,0xaa,0x6a,0xea
   18506 	byte	0x1a,0x9a,0x5a,0xda,0x3a,0xba,0x7a,0xfa
   18507 	byte	0x06,0x86,0x46,0xc6,0x26,0xa6,0x66,0xe6
   18508 	byte	0x16,0x96,0x56,0xd6,0x36,0xb6,0x76,0xf6
   18509 	byte	0x0e,0x8e,0x4e,0xce,0x2e,0xae,0x6e,0xee
   18510 	byte	0x1e,0x9e,0x5e,0xde,0x3e,0xbe,0x7e,0xfe
   18511 	byte	0x01,0x81,0x41,0xc1,0x21,0xa1,0x61,0xe1
   18512 	byte	0x11,0x91,0x51,0xd1,0x31,0xb1,0x71,0xf1
   18513 	byte	0x09,0x89,0x49,0xc9,0x29,0xa9,0x69,0xe9
   18514 	byte	0x19,0x99,0x59,0xd9,0x39,0xb9,0x79,0xf9
   18515 	byte	0x05,0x85,0x45,0xc5,0x25,0xa5,0x65,0xe5
   18516 	byte	0x15,0x95,0x55,0xd5,0x35,0xb5,0x75,0xf5
   18517 	byte	0x0d,0x8d,0x4d,0xcd,0x2d,0xad,0x6d,0xed
   18518 	byte	0x1d,0x9d,0x5d,0xdd,0x3d,0xbd,0x7d,0xfd
   18519 	byte	0x03,0x83,0x43,0xc3,0x23,0xa3,0x63,0xe3
   18520 	byte	0x13,0x93,0x53,0xd3,0x33,0xb3,0x73,0xf3
   18521 	byte	0x0b,0x8b,0x4b,0xcb,0x2b,0xab,0x6b,0xeb
   18522 	byte	0x1b,0x9b,0x5b,0xdb,0x3b,0xbb,0x7b,0xfb
   18523 	byte	0x07,0x87,0x47,0xc7,0x27,0xa7,0x67,0xe7
   18524 	byte	0x17,0x97,0x57,0xd7,0x37,0xb7,0x77,0xf7
   18525 	byte	0x0f,0x8f,0x4f,0xcf,0x2f,0xaf,0x6f,0xef
   18526 	byte	0x1f,0x9f,0x5f,0xdf,0x3f,0xbf,0x7f,0xff
   18527 
   18528 	global		fmovm_calc_ea
   18529 ###############################################
   18530 # _fmovm_calc_ea: calculate effective address #
   18531 ###############################################
   18532 fmovm_calc_ea:
   18533 	mov.l		%d0,%a0			# move # bytes to a0
   18534 
   18535 # currently, MODE and REG are taken from the EXC_OPWORD. this could be
   18536 # easily changed if they were inputs passed in registers.
   18537 	mov.w		EXC_OPWORD(%a6),%d0	# fetch opcode word
   18538 	mov.w		%d0,%d1			# make a copy
   18539 
   18540 	andi.w		&0x3f,%d0		# extract mode field
   18541 	andi.l		&0x7,%d1		# extract reg  field
   18542 
   18543 # jump to the corresponding function for each {MODE,REG} pair.
   18544 	mov.w		(tbl_fea_mode.b,%pc,%d0.w*2),%d0 # fetch jmp distance
   18545 	jmp		(tbl_fea_mode.b,%pc,%d0.w*1) # jmp to correct ea mode
   18546 
   18547 	swbeg		&64
   18548 tbl_fea_mode:
   18549 	short		tbl_fea_mode	-	tbl_fea_mode
   18550 	short		tbl_fea_mode	-	tbl_fea_mode
   18551 	short		tbl_fea_mode	-	tbl_fea_mode
   18552 	short		tbl_fea_mode	-	tbl_fea_mode
   18553 	short		tbl_fea_mode	-	tbl_fea_mode
   18554 	short		tbl_fea_mode	-	tbl_fea_mode
   18555 	short		tbl_fea_mode	-	tbl_fea_mode
   18556 	short		tbl_fea_mode	-	tbl_fea_mode
   18557 
   18558 	short		tbl_fea_mode	-	tbl_fea_mode
   18559 	short		tbl_fea_mode	-	tbl_fea_mode
   18560 	short		tbl_fea_mode	-	tbl_fea_mode
   18561 	short		tbl_fea_mode	-	tbl_fea_mode
   18562 	short		tbl_fea_mode	-	tbl_fea_mode
   18563 	short		tbl_fea_mode	-	tbl_fea_mode
   18564 	short		tbl_fea_mode	-	tbl_fea_mode
   18565 	short		tbl_fea_mode	-	tbl_fea_mode
   18566 
   18567 	short		faddr_ind_a0	- 	tbl_fea_mode
   18568 	short		faddr_ind_a1	- 	tbl_fea_mode
   18569 	short		faddr_ind_a2	- 	tbl_fea_mode
   18570 	short		faddr_ind_a3 	- 	tbl_fea_mode
   18571 	short		faddr_ind_a4 	- 	tbl_fea_mode
   18572 	short		faddr_ind_a5 	- 	tbl_fea_mode
   18573 	short		faddr_ind_a6 	- 	tbl_fea_mode
   18574 	short		faddr_ind_a7 	- 	tbl_fea_mode
   18575 
   18576 	short		faddr_ind_p_a0	- 	tbl_fea_mode
   18577 	short		faddr_ind_p_a1 	- 	tbl_fea_mode
   18578 	short		faddr_ind_p_a2 	- 	tbl_fea_mode
   18579 	short		faddr_ind_p_a3 	- 	tbl_fea_mode
   18580 	short		faddr_ind_p_a4 	- 	tbl_fea_mode
   18581 	short		faddr_ind_p_a5 	- 	tbl_fea_mode
   18582 	short		faddr_ind_p_a6 	- 	tbl_fea_mode
   18583 	short		faddr_ind_p_a7 	- 	tbl_fea_mode
   18584 
   18585 	short		faddr_ind_m_a0 	- 	tbl_fea_mode
   18586 	short		faddr_ind_m_a1 	- 	tbl_fea_mode
   18587 	short		faddr_ind_m_a2 	- 	tbl_fea_mode
   18588 	short		faddr_ind_m_a3 	- 	tbl_fea_mode
   18589 	short		faddr_ind_m_a4 	- 	tbl_fea_mode
   18590 	short		faddr_ind_m_a5 	- 	tbl_fea_mode
   18591 	short		faddr_ind_m_a6 	- 	tbl_fea_mode
   18592 	short		faddr_ind_m_a7 	- 	tbl_fea_mode
   18593 
   18594 	short		faddr_ind_disp_a0	- 	tbl_fea_mode
   18595 	short		faddr_ind_disp_a1 	- 	tbl_fea_mode
   18596 	short		faddr_ind_disp_a2 	- 	tbl_fea_mode
   18597 	short		faddr_ind_disp_a3 	- 	tbl_fea_mode
   18598 	short		faddr_ind_disp_a4 	- 	tbl_fea_mode
   18599 	short		faddr_ind_disp_a5 	- 	tbl_fea_mode
   18600 	short		faddr_ind_disp_a6 	- 	tbl_fea_mode
   18601 	short		faddr_ind_disp_a7	-	tbl_fea_mode
   18602 
   18603 	short		faddr_ind_ext 	- 	tbl_fea_mode
   18604 	short		faddr_ind_ext 	- 	tbl_fea_mode
   18605 	short		faddr_ind_ext 	- 	tbl_fea_mode
   18606 	short		faddr_ind_ext 	- 	tbl_fea_mode
   18607 	short		faddr_ind_ext 	- 	tbl_fea_mode
   18608 	short		faddr_ind_ext 	- 	tbl_fea_mode
   18609 	short		faddr_ind_ext 	- 	tbl_fea_mode
   18610 	short		faddr_ind_ext 	- 	tbl_fea_mode
   18611 
   18612 	short		fabs_short	- 	tbl_fea_mode
   18613 	short		fabs_long	- 	tbl_fea_mode
   18614 	short		fpc_ind		- 	tbl_fea_mode
   18615 	short		fpc_ind_ext	- 	tbl_fea_mode
   18616 	short		tbl_fea_mode	- 	tbl_fea_mode
   18617 	short		tbl_fea_mode	- 	tbl_fea_mode
   18618 	short		tbl_fea_mode	- 	tbl_fea_mode
   18619 	short		tbl_fea_mode	- 	tbl_fea_mode
   18620 
   18621 ###################################
   18622 # Address register indirect: (An) #
   18623 ###################################
   18624 faddr_ind_a0:
   18625 	mov.l		EXC_DREGS+0x8(%a6),%a0	# Get current a0
   18626 	rts
   18627 
   18628 faddr_ind_a1:
   18629 	mov.l		EXC_DREGS+0xc(%a6),%a0	# Get current a1
   18630 	rts
   18631 
   18632 faddr_ind_a2:
   18633 	mov.l		%a2,%a0			# Get current a2
   18634 	rts
   18635 
   18636 faddr_ind_a3:
   18637 	mov.l		%a3,%a0			# Get current a3
   18638 	rts
   18639 
   18640 faddr_ind_a4:
   18641 	mov.l		%a4,%a0			# Get current a4
   18642 	rts
   18643 
   18644 faddr_ind_a5:
   18645 	mov.l		%a5,%a0			# Get current a5
   18646 	rts
   18647 
   18648 faddr_ind_a6:
   18649 	mov.l		(%a6),%a0		# Get current a6
   18650 	rts
   18651 
   18652 faddr_ind_a7:
   18653 	mov.l		EXC_A7(%a6),%a0		# Get current a7
   18654 	rts
   18655 
   18656 #####################################################
   18657 # Address register indirect w/ postincrement: (An)+ #
   18658 #####################################################
   18659 faddr_ind_p_a0:
   18660 	mov.l		EXC_DREGS+0x8(%a6),%d0	# Get current a0
   18661 	mov.l		%d0,%d1
   18662 	add.l		%a0,%d1			# Increment
   18663 	mov.l		%d1,EXC_DREGS+0x8(%a6)	# Save incr value
   18664 	mov.l		%d0,%a0
   18665 	rts
   18666 
   18667 faddr_ind_p_a1:
   18668 	mov.l		EXC_DREGS+0xc(%a6),%d0	# Get current a1
   18669 	mov.l		%d0,%d1
   18670 	add.l		%a0,%d1			# Increment
   18671 	mov.l		%d1,EXC_DREGS+0xc(%a6)	# Save incr value
   18672 	mov.l		%d0,%a0
   18673 	rts
   18674 
   18675 faddr_ind_p_a2:
   18676 	mov.l		%a2,%d0			# Get current a2
   18677 	mov.l		%d0,%d1
   18678 	add.l		%a0,%d1			# Increment
   18679 	mov.l		%d1,%a2			# Save incr value
   18680 	mov.l		%d0,%a0
   18681 	rts
   18682 
   18683 faddr_ind_p_a3:
   18684 	mov.l		%a3,%d0			# Get current a3
   18685 	mov.l		%d0,%d1
   18686 	add.l		%a0,%d1			# Increment
   18687 	mov.l		%d1,%a3			# Save incr value
   18688 	mov.l		%d0,%a0
   18689 	rts
   18690 
   18691 faddr_ind_p_a4:
   18692 	mov.l		%a4,%d0			# Get current a4
   18693 	mov.l		%d0,%d1
   18694 	add.l		%a0,%d1			# Increment
   18695 	mov.l		%d1,%a4			# Save incr value
   18696 	mov.l		%d0,%a0
   18697 	rts
   18698 
   18699 faddr_ind_p_a5:
   18700 	mov.l		%a5,%d0			# Get current a5
   18701 	mov.l		%d0,%d1
   18702 	add.l		%a0,%d1			# Increment
   18703 	mov.l		%d1,%a5			# Save incr value
   18704 	mov.l		%d0,%a0
   18705 	rts
   18706 
   18707 faddr_ind_p_a6:
   18708 	mov.l		(%a6),%d0		# Get current a6
   18709 	mov.l		%d0,%d1
   18710 	add.l		%a0,%d1			# Increment
   18711 	mov.l		%d1,(%a6)		# Save incr value
   18712 	mov.l		%d0,%a0
   18713 	rts
   18714 
   18715 faddr_ind_p_a7:
   18716 	mov.b		&mia7_flg,SPCOND_FLG(%a6) # set "special case" flag
   18717 
   18718 	mov.l		EXC_A7(%a6),%d0		# Get current a7
   18719 	mov.l		%d0,%d1
   18720 	add.l		%a0,%d1			# Increment
   18721 	mov.l		%d1,EXC_A7(%a6)		# Save incr value
   18722 	mov.l		%d0,%a0
   18723 	rts
   18724 
   18725 ####################################################
   18726 # Address register indirect w/ predecrement: -(An) #
   18727 ####################################################
   18728 faddr_ind_m_a0:
   18729 	mov.l		EXC_DREGS+0x8(%a6),%d0	# Get current a0
   18730 	sub.l		%a0,%d0			# Decrement
   18731 	mov.l		%d0,EXC_DREGS+0x8(%a6)	# Save decr value
   18732 	mov.l		%d0,%a0
   18733 	rts
   18734 
   18735 faddr_ind_m_a1:
   18736 	mov.l		EXC_DREGS+0xc(%a6),%d0	# Get current a1
   18737 	sub.l		%a0,%d0			# Decrement
   18738 	mov.l		%d0,EXC_DREGS+0xc(%a6)	# Save decr value
   18739 	mov.l		%d0,%a0
   18740 	rts
   18741 
   18742 faddr_ind_m_a2:
   18743 	mov.l		%a2,%d0			# Get current a2
   18744 	sub.l		%a0,%d0			# Decrement
   18745 	mov.l		%d0,%a2			# Save decr value
   18746 	mov.l		%d0,%a0
   18747 	rts
   18748 
   18749 faddr_ind_m_a3:
   18750 	mov.l		%a3,%d0			# Get current a3
   18751 	sub.l		%a0,%d0			# Decrement
   18752 	mov.l		%d0,%a3			# Save decr value
   18753 	mov.l		%d0,%a0
   18754 	rts
   18755 
   18756 faddr_ind_m_a4:
   18757 	mov.l		%a4,%d0			# Get current a4
   18758 	sub.l		%a0,%d0			# Decrement
   18759 	mov.l		%d0,%a4			# Save decr value
   18760 	mov.l		%d0,%a0
   18761 	rts
   18762 
   18763 faddr_ind_m_a5:
   18764 	mov.l		%a5,%d0			# Get current a5
   18765 	sub.l		%a0,%d0			# Decrement
   18766 	mov.l		%d0,%a5			# Save decr value
   18767 	mov.l		%d0,%a0
   18768 	rts
   18769 
   18770 faddr_ind_m_a6:
   18771 	mov.l		(%a6),%d0		# Get current a6
   18772 	sub.l		%a0,%d0			# Decrement
   18773 	mov.l		%d0,(%a6)		# Save decr value
   18774 	mov.l		%d0,%a0
   18775 	rts
   18776 
   18777 faddr_ind_m_a7:
   18778 	mov.b		&mda7_flg,SPCOND_FLG(%a6) # set "special case" flag
   18779 
   18780 	mov.l		EXC_A7(%a6),%d0		# Get current a7
   18781 	sub.l		%a0,%d0			# Decrement
   18782 	mov.l		%d0,EXC_A7(%a6)		# Save decr value
   18783 	mov.l		%d0,%a0
   18784 	rts
   18785 
   18786 ########################################################
   18787 # Address register indirect w/ displacement: (d16, An) #
   18788 ########################################################
   18789 faddr_ind_disp_a0:
   18790 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   18791 	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
   18792 	bsr.l		_imem_read_word
   18793 
   18794 	tst.l		%d1			# did ifetch fail?
   18795 	bne.l		iea_iacc		# yes
   18796 
   18797 	mov.w		%d0,%a0			# sign extend displacement
   18798 
   18799 	add.l		EXC_DREGS+0x8(%a6),%a0	# a0 + d16
   18800 	rts
   18801 
   18802 faddr_ind_disp_a1:
   18803 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   18804 	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
   18805 	bsr.l		_imem_read_word
   18806 
   18807 	tst.l		%d1			# did ifetch fail?
   18808 	bne.l		iea_iacc		# yes
   18809 
   18810 	mov.w		%d0,%a0			# sign extend displacement
   18811 
   18812 	add.l		EXC_DREGS+0xc(%a6),%a0	# a1 + d16
   18813 	rts
   18814 
   18815 faddr_ind_disp_a2:
   18816 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   18817 	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
   18818 	bsr.l		_imem_read_word
   18819 
   18820 	tst.l		%d1			# did ifetch fail?
   18821 	bne.l		iea_iacc		# yes
   18822 
   18823 	mov.w		%d0,%a0			# sign extend displacement
   18824 
   18825 	add.l		%a2,%a0			# a2 + d16
   18826 	rts
   18827 
   18828 faddr_ind_disp_a3:
   18829 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   18830 	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
   18831 	bsr.l		_imem_read_word
   18832 
   18833 	tst.l		%d1			# did ifetch fail?
   18834 	bne.l		iea_iacc		# yes
   18835 
   18836 	mov.w		%d0,%a0			# sign extend displacement
   18837 
   18838 	add.l		%a3,%a0			# a3 + d16
   18839 	rts
   18840 
   18841 faddr_ind_disp_a4:
   18842 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   18843 	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
   18844 	bsr.l		_imem_read_word
   18845 
   18846 	tst.l		%d1			# did ifetch fail?
   18847 	bne.l		iea_iacc		# yes
   18848 
   18849 	mov.w		%d0,%a0			# sign extend displacement
   18850 
   18851 	add.l		%a4,%a0			# a4 + d16
   18852 	rts
   18853 
   18854 faddr_ind_disp_a5:
   18855 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   18856 	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
   18857 	bsr.l		_imem_read_word
   18858 
   18859 	tst.l		%d1			# did ifetch fail?
   18860 	bne.l		iea_iacc		# yes
   18861 
   18862 	mov.w		%d0,%a0			# sign extend displacement
   18863 
   18864 	add.l		%a5,%a0			# a5 + d16
   18865 	rts
   18866 
   18867 faddr_ind_disp_a6:
   18868 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   18869 	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
   18870 	bsr.l		_imem_read_word
   18871 
   18872 	tst.l		%d1			# did ifetch fail?
   18873 	bne.l		iea_iacc		# yes
   18874 
   18875 	mov.w		%d0,%a0			# sign extend displacement
   18876 
   18877 	add.l		(%a6),%a0		# a6 + d16
   18878 	rts
   18879 
   18880 faddr_ind_disp_a7:
   18881 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   18882 	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
   18883 	bsr.l		_imem_read_word
   18884 
   18885 	tst.l		%d1			# did ifetch fail?
   18886 	bne.l		iea_iacc		# yes
   18887 
   18888 	mov.w		%d0,%a0			# sign extend displacement
   18889 
   18890 	add.l		EXC_A7(%a6),%a0		# a7 + d16
   18891 	rts
   18892 
   18893 ########################################################################
   18894 # Address register indirect w/ index(8-bit displacement): (d8, An, Xn) #
   18895 #    "       "         "    w/   "  (base displacement): (bd, An, Xn)  #
   18896 # Memory indirect postindexed: ([bd, An], Xn, od)		       #
   18897 # Memory indirect preindexed: ([bd, An, Xn], od)		       #
   18898 ########################################################################
   18899 faddr_ind_ext:
   18900 	addq.l		&0x8,%d1
   18901 	bsr.l		fetch_dreg		# fetch base areg
   18902 	mov.l		%d0,-(%sp)
   18903 
   18904 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   18905 	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
   18906 	bsr.l		_imem_read_word		# fetch extword in d0
   18907 
   18908 	tst.l		%d1			# did ifetch fail?
   18909 	bne.l		iea_iacc		# yes
   18910 
   18911 	mov.l		(%sp)+,%a0
   18912 
   18913 	btst		&0x8,%d0
   18914 	bne.w		fcalc_mem_ind
   18915 
   18916 	mov.l		%d0,L_SCR1(%a6)		# hold opword
   18917 
   18918 	mov.l		%d0,%d1
   18919 	rol.w		&0x4,%d1
   18920 	andi.w		&0xf,%d1		# extract index regno
   18921 
   18922 # count on fetch_dreg() not to alter a0...
   18923 	bsr.l		fetch_dreg		# fetch index
   18924 
   18925 	mov.l		%d2,-(%sp)		# save d2
   18926 	mov.l		L_SCR1(%a6),%d2		# fetch opword
   18927 
   18928 	btst		&0xb,%d2		# is it word or long?
   18929 	bne.b		faii8_long
   18930 	ext.l		%d0			# sign extend word index
   18931 faii8_long:
   18932 	mov.l		%d2,%d1
   18933 	rol.w		&0x7,%d1
   18934 	andi.l		&0x3,%d1		# extract scale value
   18935 
   18936 	lsl.l		%d1,%d0			# shift index by scale
   18937 
   18938 	extb.l		%d2			# sign extend displacement
   18939 	add.l		%d2,%d0			# index + disp
   18940 	add.l		%d0,%a0			# An + (index + disp)
   18941 
   18942 	mov.l		(%sp)+,%d2		# restore old d2
   18943 	rts
   18944 
   18945 ###########################
   18946 # Absolute short: (XXX).W #
   18947 ###########################
   18948 fabs_short:
   18949 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   18950 	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
   18951 	bsr.l		_imem_read_word		# fetch short address
   18952 
   18953 	tst.l		%d1			# did ifetch fail?
   18954 	bne.l		iea_iacc		# yes
   18955 
   18956 	mov.w		%d0,%a0			# return <ea> in a0
   18957 	rts
   18958 
   18959 ##########################
   18960 # Absolute long: (XXX).L #
   18961 ##########################
   18962 fabs_long:
   18963 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   18964 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
   18965 	bsr.l		_imem_read_long		# fetch long address
   18966 
   18967 	tst.l		%d1			# did ifetch fail?
   18968 	bne.l		iea_iacc		# yes
   18969 
   18970 	mov.l		%d0,%a0			# return <ea> in a0
   18971 	rts
   18972 
   18973 #######################################################
   18974 # Program counter indirect w/ displacement: (d16, PC) #
   18975 #######################################################
   18976 fpc_ind:
   18977 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   18978 	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
   18979 	bsr.l		_imem_read_word		# fetch word displacement
   18980 
   18981 	tst.l		%d1			# did ifetch fail?
   18982 	bne.l		iea_iacc		# yes
   18983 
   18984 	mov.w		%d0,%a0			# sign extend displacement
   18985 
   18986 	add.l		EXC_EXTWPTR(%a6),%a0	# pc + d16
   18987 
   18988 # _imem_read_word() increased the extwptr by 2. need to adjust here.
   18989 	subq.l		&0x2,%a0		# adjust <ea>
   18990 	rts
   18991 
   18992 ##########################################################
   18993 # PC indirect w/ index(8-bit displacement): (d8, PC, An) #
   18994 # "     "     w/   "  (base displacement): (bd, PC, An)  #
   18995 # PC memory indirect postindexed: ([bd, PC], Xn, od)     #
   18996 # PC memory indirect preindexed: ([bd, PC, Xn], od)      #
   18997 ##########################################################
   18998 fpc_ind_ext:
   18999 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   19000 	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
   19001 	bsr.l		_imem_read_word		# fetch ext word
   19002 
   19003 	tst.l		%d1			# did ifetch fail?
   19004 	bne.l		iea_iacc		# yes
   19005 
   19006 	mov.l		EXC_EXTWPTR(%a6),%a0	# put base in a0
   19007 	subq.l		&0x2,%a0		# adjust base
   19008 
   19009 	btst		&0x8,%d0		# is disp only 8 bits?
   19010 	bne.w		fcalc_mem_ind		# calc memory indirect
   19011 
   19012 	mov.l		%d0,L_SCR1(%a6)		# store opword
   19013 
   19014 	mov.l		%d0,%d1			# make extword copy
   19015 	rol.w		&0x4,%d1		# rotate reg num into place
   19016 	andi.w		&0xf,%d1		# extract register number
   19017 
   19018 # count on fetch_dreg() not to alter a0...
   19019 	bsr.l		fetch_dreg		# fetch index
   19020 
   19021 	mov.l		%d2,-(%sp)		# save d2
   19022 	mov.l		L_SCR1(%a6),%d2		# fetch opword
   19023 
   19024 	btst		&0xb,%d2		# is index word or long?
   19025 	bne.b		fpii8_long		# long
   19026 	ext.l		%d0			# sign extend word index
   19027 fpii8_long:
   19028 	mov.l		%d2,%d1
   19029 	rol.w		&0x7,%d1		# rotate scale value into place
   19030 	andi.l		&0x3,%d1		# extract scale value
   19031 
   19032 	lsl.l		%d1,%d0			# shift index by scale
   19033 
   19034 	extb.l		%d2			# sign extend displacement
   19035 	add.l		%d2,%d0			# disp + index
   19036 	add.l		%d0,%a0			# An + (index + disp)
   19037 
   19038 	mov.l		(%sp)+,%d2		# restore temp register
   19039 	rts
   19040 
   19041 # d2 = index
   19042 # d3 = base
   19043 # d4 = od
   19044 # d5 = extword
   19045 fcalc_mem_ind:
   19046 	btst		&0x6,%d0		# is the index suppressed?
   19047 	beq.b		fcalc_index
   19048 
   19049 	movm.l		&0x3c00,-(%sp)		# save d2-d5
   19050 
   19051 	mov.l		%d0,%d5			# put extword in d5
   19052 	mov.l		%a0,%d3			# put base in d3
   19053 
   19054 	clr.l		%d2			# yes, so index = 0
   19055 	bra.b		fbase_supp_ck
   19056 
   19057 # index:
   19058 fcalc_index:
   19059 	mov.l		%d0,L_SCR1(%a6)		# save d0 (opword)
   19060 	bfextu		%d0{&16:&4},%d1		# fetch dreg index
   19061 	bsr.l		fetch_dreg
   19062 
   19063 	movm.l		&0x3c00,-(%sp)		# save d2-d5
   19064 	mov.l		%d0,%d2			# put index in d2
   19065 	mov.l		L_SCR1(%a6),%d5
   19066 	mov.l		%a0,%d3
   19067 
   19068 	btst		&0xb,%d5		# is index word or long?
   19069 	bne.b		fno_ext
   19070 	ext.l		%d2
   19071 
   19072 fno_ext:
   19073 	bfextu		%d5{&21:&2},%d0
   19074 	lsl.l		%d0,%d2
   19075 
   19076 # base address (passed as parameter in d3):
   19077 # we clear the value here if it should actually be suppressed.
   19078 fbase_supp_ck:
   19079 	btst		&0x7,%d5		# is the bd suppressed?
   19080 	beq.b		fno_base_sup
   19081 	clr.l		%d3
   19082 
   19083 # base displacement:
   19084 fno_base_sup:
   19085 	bfextu		%d5{&26:&2},%d0		# get bd size
   19086 #	beq.l		fmovm_error		# if (size == 0) it's reserved
   19087 
   19088 	cmpi.b	 	%d0,&0x2
   19089 	blt.b		fno_bd
   19090 	beq.b		fget_word_bd
   19091 
   19092 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   19093 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
   19094 	bsr.l		_imem_read_long
   19095 
   19096 	tst.l		%d1			# did ifetch fail?
   19097 	bne.l		fcea_iacc		# yes
   19098 
   19099 	bra.b		fchk_ind
   19100 
   19101 fget_word_bd:
   19102 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   19103 	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
   19104 	bsr.l		_imem_read_word
   19105 
   19106 	tst.l		%d1			# did ifetch fail?
   19107 	bne.l		fcea_iacc		# yes
   19108 
   19109 	ext.l		%d0			# sign extend bd
   19110 
   19111 fchk_ind:
   19112 	add.l		%d0,%d3			# base += bd
   19113 
   19114 # outer displacement:
   19115 fno_bd:
   19116 	bfextu		%d5{&30:&2},%d0		# is od suppressed?
   19117 	beq.w		faii_bd
   19118 
   19119 	cmpi.b	 	%d0,&0x2
   19120 	blt.b		fnull_od
   19121 	beq.b		fword_od
   19122 
   19123 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   19124 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
   19125 	bsr.l		_imem_read_long
   19126 
   19127 	tst.l		%d1			# did ifetch fail?
   19128 	bne.l		fcea_iacc		# yes
   19129 
   19130 	bra.b 		fadd_them
   19131 
   19132 fword_od:
   19133 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   19134 	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
   19135 	bsr.l		_imem_read_word
   19136 
   19137 	tst.l		%d1			# did ifetch fail?
   19138 	bne.l		fcea_iacc		# yes
   19139 
   19140 	ext.l		%d0			# sign extend od
   19141 	bra.b		fadd_them
   19142 
   19143 fnull_od:
   19144 	clr.l		%d0
   19145 
   19146 fadd_them:
   19147 	mov.l		%d0,%d4
   19148 
   19149 	btst		&0x2,%d5		# pre or post indexing?
   19150 	beq.b		fpre_indexed
   19151 
   19152 	mov.l		%d3,%a0
   19153 	bsr.l		_dmem_read_long
   19154 
   19155 	tst.l		%d1			# did dfetch fail?
   19156 	bne.w		fcea_err		# yes
   19157 
   19158 	add.l		%d2,%d0			# <ea> += index
   19159 	add.l		%d4,%d0			# <ea> += od
   19160 	bra.b		fdone_ea
   19161 
   19162 fpre_indexed:
   19163 	add.l		%d2,%d3			# preindexing
   19164 	mov.l		%d3,%a0
   19165 	bsr.l		_dmem_read_long
   19166 
   19167 	tst.l		%d1			# did dfetch fail?
   19168 	bne.w		fcea_err		# yes
   19169 
   19170 	add.l		%d4,%d0			# ea += od
   19171 	bra.b		fdone_ea
   19172 
   19173 faii_bd:
   19174 	add.l		%d2,%d3			# ea = (base + bd) + index
   19175 	mov.l		%d3,%d0
   19176 fdone_ea:
   19177 	mov.l		%d0,%a0
   19178 
   19179 	movm.l		(%sp)+,&0x003c		# restore d2-d5
   19180 	rts
   19181 
   19182 #########################################################
   19183 fcea_err:
   19184 	mov.l		%d3,%a0
   19185 
   19186 	movm.l		(%sp)+,&0x003c		# restore d2-d5
   19187 	mov.w		&0x0101,%d0
   19188 	bra.l		iea_dacc
   19189 
   19190 fcea_iacc:
   19191 	movm.l		(%sp)+,&0x003c		# restore d2-d5
   19192 	bra.l		iea_iacc
   19193 
   19194 fmovm_out_err:
   19195 	bsr.l		restore
   19196 	mov.w		&0x00e1,%d0
   19197 	bra.b		fmovm_err
   19198 
   19199 fmovm_in_err:
   19200 	bsr.l		restore
   19201 	mov.w		&0x0161,%d0
   19202 
   19203 fmovm_err:
   19204 	mov.l		L_SCR1(%a6),%a0
   19205 	bra.l		iea_dacc
   19206 
   19207 #########################################################################
   19208 # XDEF ****************************************************************	#
   19209 # 	fmovm_ctrl(): emulate fmovm.l of control registers instr	#
   19210 #									#
   19211 # XREF ****************************************************************	#
   19212 #	_imem_read_long() - read longword from memory			#
   19213 #	iea_iacc() - _imem_read_long() failed; error recovery		#
   19214 #									#
   19215 # INPUT ***************************************************************	#
   19216 #	None								#
   19217 # 									#
   19218 # OUTPUT **************************************************************	#
   19219 #	If _imem_read_long() doesn't fail:				#
   19220 #		USER_FPCR(a6)  = new FPCR value				#
   19221 #		USER_FPSR(a6)  = new FPSR value				#
   19222 #		USER_FPIAR(a6) = new FPIAR value			#
   19223 #									#
   19224 # ALGORITHM ***********************************************************	#
   19225 # 	Decode the instruction type by looking at the extension word 	#
   19226 # in order to see how many control registers to fetch from memory.	#
   19227 # Fetch them using _imem_read_long(). If this fetch fails, exit through	#
   19228 # the special access error exit handler iea_iacc().			#
   19229 #									#
   19230 # Instruction word decoding:						#
   19231 #									#
   19232 # 	fmovem.l #<data>, {FPIAR&|FPCR&|FPSR}				#
   19233 #									#
   19234 #		WORD1			WORD2				#
   19235 #	1111 0010 00 111100	100$ $$00 0000 0000			#
   19236 #									#
   19237 #	$$$ (100): FPCR							#
   19238 #	    (010): FPSR							#
   19239 #	    (001): FPIAR						#
   19240 #	    (000): FPIAR						#
   19241 #									#
   19242 #########################################################################
   19243 
   19244 	global		fmovm_ctrl
   19245 fmovm_ctrl:
   19246 	mov.b		EXC_EXTWORD(%a6),%d0	# fetch reg select bits
   19247 	cmpi.b		%d0,&0x9c		# fpcr & fpsr & fpiar ?
   19248 	beq.w		fctrl_in_7		# yes
   19249 	cmpi.b		%d0,&0x98		# fpcr & fpsr ?
   19250 	beq.w		fctrl_in_6		# yes
   19251 	cmpi.b		%d0,&0x94		# fpcr & fpiar ?
   19252 	beq.b		fctrl_in_5		# yes
   19253 
   19254 # fmovem.l #<data>, fpsr/fpiar
   19255 fctrl_in_3:
   19256 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   19257 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
   19258 	bsr.l		_imem_read_long		# fetch FPSR from mem
   19259 
   19260 	tst.l		%d1			# did ifetch fail?
   19261 	bne.l		iea_iacc		# yes
   19262 
   19263 	mov.l		%d0,USER_FPSR(%a6)	# store new FPSR to stack
   19264 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   19265 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
   19266 	bsr.l		_imem_read_long		# fetch FPIAR from mem
   19267 
   19268 	tst.l		%d1			# did ifetch fail?
   19269 	bne.l		iea_iacc		# yes
   19270 
   19271 	mov.l		%d0,USER_FPIAR(%a6)	# store new FPIAR to stack
   19272 	rts
   19273 
   19274 # fmovem.l #<data>, fpcr/fpiar
   19275 fctrl_in_5:
   19276 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   19277 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
   19278 	bsr.l		_imem_read_long		# fetch FPCR from mem
   19279 
   19280 	tst.l		%d1			# did ifetch fail?
   19281 	bne.l		iea_iacc		# yes
   19282 
   19283 	mov.l		%d0,USER_FPCR(%a6)	# store new FPCR to stack
   19284 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   19285 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
   19286 	bsr.l		_imem_read_long		# fetch FPIAR from mem
   19287 
   19288 	tst.l		%d1			# did ifetch fail?
   19289 	bne.l		iea_iacc		# yes
   19290 
   19291 	mov.l		%d0,USER_FPIAR(%a6)	# store new FPIAR to stack
   19292 	rts
   19293 
   19294 # fmovem.l #<data>, fpcr/fpsr
   19295 fctrl_in_6:
   19296 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   19297 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
   19298 	bsr.l		_imem_read_long		# fetch FPCR from mem
   19299 
   19300 	tst.l		%d1			# did ifetch fail?
   19301 	bne.l		iea_iacc		# yes
   19302 
   19303 	mov.l		%d0,USER_FPCR(%a6)	# store new FPCR to mem
   19304 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   19305 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
   19306 	bsr.l		_imem_read_long		# fetch FPSR from mem
   19307 
   19308 	tst.l		%d1			# did ifetch fail?
   19309 	bne.l		iea_iacc		# yes
   19310 
   19311 	mov.l		%d0,USER_FPSR(%a6)	# store new FPSR to mem
   19312 	rts
   19313 
   19314 # fmovem.l #<data>, fpcr/fpsr/fpiar
   19315 fctrl_in_7:
   19316 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   19317 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
   19318 	bsr.l		_imem_read_long		# fetch FPCR from mem
   19319 
   19320 	tst.l		%d1			# did ifetch fail?
   19321 	bne.l		iea_iacc		# yes
   19322 
   19323 	mov.l		%d0,USER_FPCR(%a6)	# store new FPCR to mem
   19324 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   19325 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
   19326 	bsr.l		_imem_read_long		# fetch FPSR from mem
   19327 
   19328 	tst.l		%d1			# did ifetch fail?
   19329 	bne.l		iea_iacc		# yes
   19330 
   19331 	mov.l		%d0,USER_FPSR(%a6)	# store new FPSR to mem
   19332 	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   19333 	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
   19334 	bsr.l		_imem_read_long		# fetch FPIAR from mem
   19335 
   19336 	tst.l		%d1			# did ifetch fail?
   19337 	bne.l		iea_iacc		# yes
   19338 
   19339 	mov.l		%d0,USER_FPIAR(%a6)	# store new FPIAR to mem
   19340 	rts
   19341 
   19342 #########################################################################
   19343 # XDEF ****************************************************************	#
   19344 #	_dcalc_ea(): calc correct <ea> from <ea> stacked on exception	#
   19345 #									#
   19346 # XREF ****************************************************************	#
   19347 #	inc_areg() - increment an address register			#
   19348 #	dec_areg() - decrement an address register			#
   19349 #									#
   19350 # INPUT ***************************************************************	#
   19351 #	d0 = number of bytes to adjust <ea> by				#
   19352 # 									#
   19353 # OUTPUT **************************************************************	#
   19354 #	None								#
   19355 #									#
   19356 # ALGORITHM ***********************************************************	#
   19357 # "Dummy" CALCulate Effective Address:					#
   19358 # 	The stacked <ea> for FP unimplemented instructions and opclass	#
   19359 #	two packed instructions is correct with the exception of...	#
   19360 #									#
   19361 #	1) -(An)   : The register is not updated regardless of size.	#
   19362 #		     Also, for extended precision and packed, the 	#
   19363 #		     stacked <ea> value is 8 bytes too big		#
   19364 #	2) (An)+   : The register is not updated.			#
   19365 #	3) #<data> : The upper longword of the immediate operand is 	#
   19366 #		     stacked b,w,l and s sizes are completely stacked. 	#
   19367 #		     d,x, and p are not.				#
   19368 #									#
   19369 #########################################################################
   19370 
   19371 	global		_dcalc_ea
   19372 _dcalc_ea:
   19373 	mov.l		%d0, %a0		# move # bytes to %a0
   19374 
   19375 	mov.b		1+EXC_OPWORD(%a6), %d0	# fetch opcode word
   19376 	mov.l		%d0, %d1		# make a copy
   19377 
   19378 	andi.w		&0x38, %d0		# extract mode field
   19379 	andi.l		&0x7, %d1		# extract reg  field
   19380 
   19381 	cmpi.b		%d0,&0x18		# is mode (An)+ ?
   19382 	beq.b		dcea_pi			# yes
   19383 
   19384 	cmpi.b		%d0,&0x20		# is mode -(An) ?
   19385 	beq.b		dcea_pd			# yes
   19386 
   19387 	or.w		%d1,%d0			# concat mode,reg
   19388 	cmpi.b		%d0,&0x3c		# is mode #<data>?
   19389 
   19390 	beq.b		dcea_imm		# yes
   19391 
   19392 	mov.l		EXC_EA(%a6),%a0		# return <ea>
   19393 	rts
   19394 
   19395 # need to set immediate data flag here since we'll need to do
   19396 # an imem_read to fetch this later.
   19397 dcea_imm:
   19398 	mov.b		&immed_flg,SPCOND_FLG(%a6)
   19399 	lea		([USER_FPIAR,%a6],0x4),%a0 # no; return <ea>
   19400 	rts
   19401 
   19402 # here, the <ea> is stacked correctly. however, we must update the
   19403 # address register...
   19404 dcea_pi:
   19405 	mov.l		%a0,%d0			# pass amt to inc by
   19406 	bsr.l		inc_areg		# inc addr register
   19407 
   19408 	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
   19409 	rts
   19410 
   19411 # the <ea> is stacked correctly for all but extended and packed which
   19412 # the <ea>s are 8 bytes too large.
   19413 # it would make no sense to have a pre-decrement to a7 in supervisor
   19414 # mode so we don't even worry about this tricky case here : )
   19415 dcea_pd:
   19416 	mov.l		%a0,%d0			# pass amt to dec by
   19417 	bsr.l		dec_areg		# dec addr register
   19418 
   19419 	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
   19420 
   19421 	cmpi.b		%d0,&0xc		# is opsize ext or packed?
   19422 	beq.b		dcea_pd2		# yes
   19423 	rts
   19424 dcea_pd2:
   19425 	sub.l		&0x8,%a0		# correct <ea>
   19426 	mov.l		%a0,EXC_EA(%a6)		# put correct <ea> on stack
   19427 	rts
   19428 
   19429 #########################################################################
   19430 # XDEF ****************************************************************	#
   19431 # 	_calc_ea_fout(): calculate correct stacked <ea> for extended	#
   19432 #			 and packed data opclass 3 operations.		#
   19433 #									#
   19434 # XREF ****************************************************************	#
   19435 #	None								#
   19436 #									#
   19437 # INPUT ***************************************************************	#
   19438 #	None								#
   19439 # 									#
   19440 # OUTPUT **************************************************************	#
   19441 #	a0 = return correct effective address				#
   19442 #									#
   19443 # ALGORITHM ***********************************************************	#
   19444 #	For opclass 3 extended and packed data operations, the <ea>	#
   19445 # stacked for the exception is incorrect for -(an) and (an)+ addressing	#
   19446 # modes. Also, while we're at it, the index register itself must get 	#
   19447 # updated.								#
   19448 # 	So, for -(an), we must subtract 8 off of the stacked <ea> value	#
   19449 # and return that value as the correct <ea> and store that value in An.	#
   19450 # For (an)+, the stacked <ea> is correct but we must adjust An by +12.	#
   19451 #									#
   19452 #########################################################################
   19453 
   19454 # This calc_ea is currently used to retrieve the correct <ea>
   19455 # for fmove outs of type extended and packed.
   19456 	global		_calc_ea_fout
   19457 _calc_ea_fout:
   19458 	mov.b		1+EXC_OPWORD(%a6),%d0	# fetch opcode word
   19459 	mov.l		%d0,%d1			# make a copy
   19460 
   19461 	andi.w		&0x38,%d0		# extract mode field
   19462 	andi.l		&0x7,%d1		# extract reg  field
   19463 
   19464 	cmpi.b		%d0,&0x18		# is mode (An)+ ?
   19465 	beq.b		ceaf_pi			# yes
   19466 
   19467 	cmpi.b		%d0,&0x20		# is mode -(An) ?
   19468 	beq.w		ceaf_pd			# yes
   19469 
   19470 	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
   19471 	rts
   19472 
   19473 # (An)+ : extended and packed fmove out
   19474 #	: stacked <ea> is correct
   19475 #	: "An" not updated
   19476 ceaf_pi:
   19477 	mov.w		(tbl_ceaf_pi.b,%pc,%d1.w*2),%d1
   19478 	mov.l		EXC_EA(%a6),%a0
   19479 	jmp		(tbl_ceaf_pi.b,%pc,%d1.w*1)
   19480 
   19481 	swbeg		&0x8
   19482 tbl_ceaf_pi:
   19483 	short		ceaf_pi0 - tbl_ceaf_pi
   19484 	short		ceaf_pi1 - tbl_ceaf_pi
   19485 	short		ceaf_pi2 - tbl_ceaf_pi
   19486 	short		ceaf_pi3 - tbl_ceaf_pi
   19487 	short		ceaf_pi4 - tbl_ceaf_pi
   19488 	short		ceaf_pi5 - tbl_ceaf_pi
   19489 	short		ceaf_pi6 - tbl_ceaf_pi
   19490 	short		ceaf_pi7 - tbl_ceaf_pi
   19491 
   19492 ceaf_pi0:
   19493 	addi.l		&0xc,EXC_DREGS+0x8(%a6)
   19494 	rts
   19495 ceaf_pi1:
   19496 	addi.l		&0xc,EXC_DREGS+0xc(%a6)
   19497 	rts
   19498 ceaf_pi2:
   19499 	add.l		&0xc,%a2
   19500 	rts
   19501 ceaf_pi3:
   19502 	add.l		&0xc,%a3
   19503 	rts
   19504 ceaf_pi4:
   19505 	add.l		&0xc,%a4
   19506 	rts
   19507 ceaf_pi5:
   19508 	add.l		&0xc,%a5
   19509 	rts
   19510 ceaf_pi6:
   19511 	addi.l		&0xc,EXC_A6(%a6)
   19512 	rts
   19513 ceaf_pi7:
   19514 	mov.b		&mia7_flg,SPCOND_FLG(%a6)
   19515 	addi.l		&0xc,EXC_A7(%a6)
   19516 	rts
   19517 
   19518 # -(An) : extended and packed fmove out
   19519 #	: stacked <ea> = actual <ea> + 8
   19520 #	: "An" not updated
   19521 ceaf_pd:
   19522 	mov.w		(tbl_ceaf_pd.b,%pc,%d1.w*2),%d1
   19523 	mov.l		EXC_EA(%a6),%a0
   19524 	sub.l		&0x8,%a0
   19525 	sub.l		&0x8,EXC_EA(%a6)
   19526 	jmp		(tbl_ceaf_pd.b,%pc,%d1.w*1)
   19527 
   19528 	swbeg		&0x8
   19529 tbl_ceaf_pd:
   19530 	short		ceaf_pd0 - tbl_ceaf_pd
   19531 	short		ceaf_pd1 - tbl_ceaf_pd
   19532 	short		ceaf_pd2 - tbl_ceaf_pd
   19533 	short		ceaf_pd3 - tbl_ceaf_pd
   19534 	short		ceaf_pd4 - tbl_ceaf_pd
   19535 	short		ceaf_pd5 - tbl_ceaf_pd
   19536 	short		ceaf_pd6 - tbl_ceaf_pd
   19537 	short		ceaf_pd7 - tbl_ceaf_pd
   19538 
   19539 ceaf_pd0:
   19540 	mov.l		%a0,EXC_DREGS+0x8(%a6)
   19541 	rts
   19542 ceaf_pd1:
   19543 	mov.l		%a0,EXC_DREGS+0xc(%a6)
   19544 	rts
   19545 ceaf_pd2:
   19546 	mov.l		%a0,%a2
   19547 	rts
   19548 ceaf_pd3:
   19549 	mov.l		%a0,%a3
   19550 	rts
   19551 ceaf_pd4:
   19552 	mov.l		%a0,%a4
   19553 	rts
   19554 ceaf_pd5:
   19555 	mov.l		%a0,%a5
   19556 	rts
   19557 ceaf_pd6:
   19558 	mov.l		%a0,EXC_A6(%a6)
   19559 	rts
   19560 ceaf_pd7:
   19561 	mov.l		%a0,EXC_A7(%a6)
   19562 	mov.b		&mda7_flg,SPCOND_FLG(%a6)
   19563 	rts
   19564 
   19565 #########################################################################
   19566 # XDEF ****************************************************************	#
   19567 #	_load_fop(): load operand for unimplemented FP exception	#
   19568 #									#
   19569 # XREF ****************************************************************	#
   19570 #	set_tag_x() - determine ext prec optype tag			#
   19571 #	set_tag_s() - determine sgl prec optype tag			#
   19572 #	set_tag_d() - determine dbl prec optype tag			#
   19573 #	unnorm_fix() - convert normalized number to denorm or zero	#
   19574 #	norm() - normalize a denormalized number			#
   19575 #	get_packed() - fetch a packed operand from memory		#
   19576 #	_dcalc_ea() - calculate <ea>, fixing An in process		#
   19577 #									#
   19578 #	_imem_read_{word,long}() - read from instruction memory		#
   19579 #	_dmem_read() - read from data memory				#
   19580 #	_dmem_read_{byte,word,long}() - read from data memory		#
   19581 #									#
   19582 #	facc_in_{b,w,l,d,x}() - mem read failed; special exit point	#
   19583 #									#
   19584 # INPUT ***************************************************************	#
   19585 #	None								#
   19586 # 									#
   19587 # OUTPUT **************************************************************	#
   19588 #	If memory access doesn't fail:					#
   19589 #		FP_SRC(a6) = source operand in extended precision	#
   19590 # 		FP_DST(a6) = destination operand in extended precision	#
   19591 #									#
   19592 # ALGORITHM ***********************************************************	#
   19593 # 	This is called from the Unimplemented FP exception handler in	#
   19594 # order to load the source and maybe destination operand into		#
   19595 # FP_SRC(a6) and FP_DST(a6). If the instruction was opclass zero, load	#
   19596 # the source and destination from the FP register file. Set the optype	#
   19597 # tags for both if dyadic, one for monadic. If a number is an UNNORM,	#
   19598 # convert it to a DENORM or a ZERO.					#
   19599 # 	If the instruction is opclass two (memory->reg), then fetch	#
   19600 # the destination from the register file and the source operand from 	#
   19601 # memory. Tag and fix both as above w/ opclass zero instructions.	#
   19602 # 	If the source operand is byte,word,long, or single, it may be	#
   19603 # in the data register file. If it's actually out in memory, use one of	#
   19604 # the mem_read() routines to fetch it. If the mem_read() access returns	#
   19605 # a failing value, exit through the special facc_in() routine which	#
   19606 # will create an acess error exception frame from the current exception #
   19607 # frame.								#
   19608 # 	Immediate data and regular data accesses are separated because 	#
   19609 # if an immediate data access fails, the resulting fault status		#
   19610 # longword stacked for the access error exception must have the 	#
   19611 # instruction bit set.							#
   19612 #									#
   19613 #########################################################################
   19614 
   19615 	global		_load_fop
   19616 _load_fop:
   19617 
   19618 #  15     13 12 10  9 7  6       0
   19619 # /        \ /   \ /  \ /         \
   19620 # ---------------------------------
   19621 # | opclass | RX  | RY | EXTENSION |  (2nd word of general FP instruction)
   19622 # ---------------------------------
   19623 #
   19624 
   19625 #	bfextu		EXC_CMDREG(%a6){&0:&3}, %d0 # extract opclass
   19626 #	cmpi.b		%d0, &0x2		# which class is it? ('000,'010,'011)
   19627 #	beq.w		op010			# handle <ea> -> fpn
   19628 #	bgt.w		op011			# handle fpn -> <ea>
   19629 
   19630 # we're not using op011 for now...
   19631 	btst		&0x6,EXC_CMDREG(%a6)
   19632 	bne.b		op010
   19633 
   19634 ############################
   19635 # OPCLASS '000: reg -> reg #
   19636 ############################
   19637 op000:
   19638 	mov.b		1+EXC_CMDREG(%a6),%d0	# fetch extension word lo
   19639 	btst		&0x5,%d0		# testing extension bits
   19640 	beq.b		op000_src		# (bit 5 == 0) => monadic
   19641 	btst		&0x4,%d0		# (bit 5 == 1)
   19642 	beq.b		op000_dst		# (bit 4 == 0) => dyadic
   19643 	and.w		&0x007f,%d0		# extract extension bits {6:0}
   19644 	cmpi.w		%d0,&0x0038		# is it an fcmp (dyadic) ?
   19645 	bne.b		op000_src		# it's an fcmp
   19646 
   19647 op000_dst:
   19648 	bfextu		EXC_CMDREG(%a6){&6:&3}, %d0 # extract dst field
   19649 	bsr.l		load_fpn2		# fetch dst fpreg into FP_DST
   19650 
   19651 	bsr.l		set_tag_x		# get dst optype tag
   19652 
   19653 	cmpi.b		%d0, &UNNORM		# is dst fpreg an UNNORM?
   19654 	beq.b		op000_dst_unnorm	# yes
   19655 op000_dst_cont:
   19656 	mov.b 		%d0, DTAG(%a6)		# store the dst optype tag
   19657 
   19658 op000_src:
   19659 	bfextu		EXC_CMDREG(%a6){&3:&3}, %d0 # extract src field
   19660 	bsr.l		load_fpn1		# fetch src fpreg into FP_SRC
   19661 
   19662 	bsr.l		set_tag_x		# get src optype tag
   19663 
   19664 	cmpi.b		%d0, &UNNORM		# is src fpreg an UNNORM?
   19665 	beq.b		op000_src_unnorm	# yes
   19666 op000_src_cont:
   19667 	mov.b		%d0, STAG(%a6)		# store the src optype tag
   19668 	rts
   19669 
   19670 op000_dst_unnorm:
   19671 	bsr.l		unnorm_fix		# fix the dst UNNORM
   19672 	bra.b		op000_dst_cont
   19673 op000_src_unnorm:
   19674 	bsr.l		unnorm_fix		# fix the src UNNORM
   19675 	bra.b		op000_src_cont
   19676 
   19677 #############################
   19678 # OPCLASS '010: <ea> -> reg #
   19679 #############################
   19680 op010:
   19681 	mov.w		EXC_CMDREG(%a6),%d0	# fetch extension word
   19682 	btst		&0x5,%d0		# testing extension bits
   19683 	beq.b		op010_src		# (bit 5 == 0) => monadic
   19684 	btst		&0x4,%d0		# (bit 5 == 1)
   19685 	beq.b		op010_dst		# (bit 4 == 0) => dyadic
   19686 	and.w		&0x007f,%d0		# extract extension bits {6:0}
   19687 	cmpi.w		%d0,&0x0038		# is it an fcmp (dyadic) ?
   19688 	bne.b		op010_src		# it's an fcmp
   19689 
   19690 op010_dst:
   19691 	bfextu		EXC_CMDREG(%a6){&6:&3}, %d0 # extract dst field
   19692 	bsr.l		load_fpn2		# fetch dst fpreg ptr
   19693 
   19694 	bsr.l		set_tag_x		# get dst type tag
   19695 
   19696 	cmpi.b		%d0, &UNNORM		# is dst fpreg an UNNORM?
   19697 	beq.b		op010_dst_unnorm	# yes
   19698 op010_dst_cont:
   19699 	mov.b		%d0, DTAG(%a6)		# store the dst optype tag
   19700 
   19701 op010_src:
   19702 	bfextu		EXC_CMDREG(%a6){&3:&3}, %d0 # extract src type field
   19703 
   19704 	bfextu		EXC_OPWORD(%a6){&10:&3}, %d1 # extract <ea> mode field
   19705 	bne.w		fetch_from_mem		# src op is in memory
   19706 
   19707 op010_dreg:
   19708 	clr.b		STAG(%a6)		# either NORM or ZERO
   19709 	bfextu		EXC_OPWORD(%a6){&13:&3}, %d1 # extract src reg field
   19710 
   19711 	mov.w		(tbl_op010_dreg.b,%pc,%d0.w*2), %d0 # jmp based on optype
   19712 	jmp		(tbl_op010_dreg.b,%pc,%d0.w*1) # fetch src from dreg
   19713 
   19714 op010_dst_unnorm:
   19715 	bsr.l		unnorm_fix		# fix the dst UNNORM
   19716 	bra.b		op010_dst_cont
   19717 
   19718 	swbeg		&0x8
   19719 tbl_op010_dreg:
   19720 	short		opd_long	- tbl_op010_dreg
   19721 	short		opd_sgl 	- tbl_op010_dreg
   19722 	short		tbl_op010_dreg	- tbl_op010_dreg
   19723 	short		tbl_op010_dreg	- tbl_op010_dreg
   19724 	short		opd_word	- tbl_op010_dreg
   19725 	short		tbl_op010_dreg	- tbl_op010_dreg
   19726 	short		opd_byte	- tbl_op010_dreg
   19727 	short		tbl_op010_dreg	- tbl_op010_dreg
   19728 
   19729 #
   19730 # LONG: can be either NORM or ZERO...
   19731 #
   19732 opd_long:
   19733 	bsr.l		fetch_dreg		# fetch long in d0
   19734 	fmov.l		%d0, %fp0 		# load a long
   19735 	fmovm.x		&0x80, FP_SRC(%a6)	# return src op in FP_SRC
   19736 	fbeq.w		opd_long_zero		# long is a ZERO
   19737 	rts
   19738 opd_long_zero:
   19739 	mov.b		&ZERO, STAG(%a6)	# set ZERO optype flag
   19740 	rts
   19741 
   19742 #
   19743 # WORD: can be either NORM or ZERO...
   19744 #
   19745 opd_word:
   19746 	bsr.l		fetch_dreg		# fetch word in d0
   19747 	fmov.w		%d0, %fp0 		# load a word
   19748 	fmovm.x		&0x80, FP_SRC(%a6)	# return src op in FP_SRC
   19749 	fbeq.w		opd_word_zero		# WORD is a ZERO
   19750 	rts
   19751 opd_word_zero:
   19752 	mov.b		&ZERO, STAG(%a6)	# set ZERO optype flag
   19753 	rts
   19754 
   19755 #
   19756 # BYTE: can be either NORM or ZERO...
   19757 #
   19758 opd_byte:
   19759 	bsr.l		fetch_dreg		# fetch word in d0
   19760 	fmov.b		%d0, %fp0 		# load a byte
   19761 	fmovm.x		&0x80, FP_SRC(%a6)	# return src op in FP_SRC
   19762 	fbeq.w		opd_byte_zero		# byte is a ZERO
   19763 	rts
   19764 opd_byte_zero:
   19765 	mov.b		&ZERO, STAG(%a6)	# set ZERO optype flag
   19766 	rts
   19767 
   19768 #
   19769 # SGL: can be either NORM, DENORM, ZERO, INF, QNAN or SNAN but not UNNORM
   19770 #
   19771 # separate SNANs and DENORMs so they can be loaded w/ special care.
   19772 # all others can simply be moved "in" using fmove.
   19773 #
   19774 opd_sgl:
   19775 	bsr.l		fetch_dreg		# fetch sgl in d0
   19776 	mov.l		%d0,L_SCR1(%a6)
   19777 
   19778 	lea		L_SCR1(%a6), %a0 	# pass: ptr to the sgl
   19779 	bsr.l		set_tag_s		# determine sgl type
   19780 	mov.b		%d0, STAG(%a6)		# save the src tag
   19781 
   19782 	cmpi.b		%d0, &SNAN		# is it an SNAN?
   19783 	beq.w		get_sgl_snan		# yes
   19784 
   19785 	cmpi.b		%d0, &DENORM		# is it a DENORM?
   19786 	beq.w		get_sgl_denorm		# yes
   19787 
   19788 	fmov.s		(%a0), %fp0		# no, so can load it regular
   19789 	fmovm.x		&0x80, FP_SRC(%a6)	# return src op in FP_SRC
   19790 	rts
   19791 
   19792 ##############################################################################
   19793 
   19794 #########################################################################
   19795 # fetch_from_mem():							#
   19796 # - src is out in memory. must:						#
   19797 #	(1) calc ea - must read AFTER you know the src type since	#
   19798 #		      if the ea is -() or ()+, need to know # of bytes.	#
   19799 #	(2) read it in from either user or supervisor space		#
   19800 #	(3) if (b || w || l) then simply read in			#
   19801 #	    if (s || d || x) then check for SNAN,UNNORM,DENORM		#
   19802 #	    if (packed) then punt for now				#
   19803 # INPUT:								#
   19804 #	%d0 : src type field						#
   19805 #########################################################################
   19806 fetch_from_mem:
   19807 	clr.b		STAG(%a6)		# either NORM or ZERO
   19808 
   19809 	mov.w		(tbl_fp_type.b,%pc,%d0.w*2), %d0 # index by src type field
   19810 	jmp		(tbl_fp_type.b,%pc,%d0.w*1)
   19811 
   19812 	swbeg		&0x8
   19813 tbl_fp_type:
   19814 	short		load_long	- tbl_fp_type
   19815 	short		load_sgl	- tbl_fp_type
   19816 	short		load_ext	- tbl_fp_type
   19817 	short		load_packed	- tbl_fp_type
   19818 	short		load_word	- tbl_fp_type
   19819 	short		load_dbl	- tbl_fp_type
   19820 	short		load_byte	- tbl_fp_type
   19821 	short		tbl_fp_type	- tbl_fp_type
   19822 
   19823 #########################################
   19824 # load a LONG into %fp0:		#
   19825 # 	-number can't fault		#
   19826 #	(1) calc ea			#
   19827 #	(2) read 4 bytes into L_SCR1	#
   19828 #	(3) fmov.l into %fp0		#
   19829 #########################################
   19830 load_long:
   19831 	movq.l		&0x4, %d0		# pass: 4 (bytes)
   19832 	bsr.l		_dcalc_ea		# calc <ea>; <ea> in %a0
   19833 
   19834 	cmpi.b		SPCOND_FLG(%a6),&immed_flg
   19835 	beq.b		load_long_immed
   19836 
   19837 	bsr.l		_dmem_read_long		# fetch src operand from memory
   19838 
   19839 	tst.l		%d1			# did dfetch fail?
   19840 	bne.l		facc_in_l		# yes
   19841 
   19842 load_long_cont:
   19843 	fmov.l		%d0, %fp0		# read into %fp0;convert to xprec
   19844 	fmovm.x		&0x80, FP_SRC(%a6)	# return src op in FP_SRC
   19845 
   19846 	fbeq.w		load_long_zero		# src op is a ZERO
   19847 	rts
   19848 load_long_zero:
   19849 	mov.b		&ZERO, STAG(%a6)	# set optype tag to ZERO
   19850 	rts
   19851 
   19852 load_long_immed:
   19853 	bsr.l		_imem_read_long		# fetch src operand immed data
   19854 
   19855 	tst.l		%d1			# did ifetch fail?
   19856 	bne.l		funimp_iacc		# yes
   19857 	bra.b		load_long_cont
   19858 
   19859 #########################################
   19860 # load a WORD into %fp0:		#
   19861 # 	-number can't fault		#
   19862 #	(1) calc ea			#
   19863 #	(2) read 2 bytes into L_SCR1	#
   19864 #	(3) fmov.w into %fp0		#
   19865 #########################################
   19866 load_word:
   19867 	movq.l		&0x2, %d0		# pass: 2 (bytes)
   19868 	bsr.l		_dcalc_ea		# calc <ea>; <ea> in %a0
   19869 
   19870 	cmpi.b		SPCOND_FLG(%a6),&immed_flg
   19871 	beq.b		load_word_immed
   19872 
   19873 	bsr.l		_dmem_read_word		# fetch src operand from memory
   19874 
   19875 	tst.l		%d1			# did dfetch fail?
   19876 	bne.l		facc_in_w		# yes
   19877 
   19878 load_word_cont:
   19879 	fmov.w		%d0, %fp0		# read into %fp0;convert to xprec
   19880 	fmovm.x		&0x80, FP_SRC(%a6)	# return src op in FP_SRC
   19881 
   19882 	fbeq.w		load_word_zero		# src op is a ZERO
   19883 	rts
   19884 load_word_zero:
   19885 	mov.b		&ZERO, STAG(%a6)	# set optype tag to ZERO
   19886 	rts
   19887 
   19888 load_word_immed:
   19889 	bsr.l		_imem_read_word		# fetch src operand immed data
   19890 
   19891 	tst.l		%d1			# did ifetch fail?
   19892 	bne.l		funimp_iacc		# yes
   19893 	bra.b		load_word_cont
   19894 
   19895 #########################################
   19896 # load a BYTE into %fp0:		#
   19897 # 	-number can't fault		#
   19898 #	(1) calc ea			#
   19899 #	(2) read 1 byte into L_SCR1	#
   19900 #	(3) fmov.b into %fp0		#
   19901 #########################################
   19902 load_byte:
   19903 	movq.l		&0x1, %d0		# pass: 1 (byte)
   19904 	bsr.l		_dcalc_ea		# calc <ea>; <ea> in %a0
   19905 
   19906 	cmpi.b		SPCOND_FLG(%a6),&immed_flg
   19907 	beq.b		load_byte_immed
   19908 
   19909 	bsr.l		_dmem_read_byte		# fetch src operand from memory
   19910 
   19911 	tst.l		%d1			# did dfetch fail?
   19912 	bne.l		facc_in_b		# yes
   19913 
   19914 load_byte_cont:
   19915 	fmov.b		%d0, %fp0		# read into %fp0;convert to xprec
   19916 	fmovm.x		&0x80, FP_SRC(%a6)	# return src op in FP_SRC
   19917 
   19918 	fbeq.w		load_byte_zero		# src op is a ZERO
   19919 	rts
   19920 load_byte_zero:
   19921 	mov.b		&ZERO, STAG(%a6)	# set optype tag to ZERO
   19922 	rts
   19923 
   19924 load_byte_immed:
   19925 	bsr.l		_imem_read_word		# fetch src operand immed data
   19926 
   19927 	tst.l		%d1			# did ifetch fail?
   19928 	bne.l		funimp_iacc		# yes
   19929 	bra.b		load_byte_cont
   19930 
   19931 #########################################
   19932 # load a SGL into %fp0:			#
   19933 # 	-number can't fault		#
   19934 #	(1) calc ea			#
   19935 #	(2) read 4 bytes into L_SCR1	#
   19936 #	(3) fmov.s into %fp0		#
   19937 #########################################
   19938 load_sgl:
   19939 	movq.l		&0x4, %d0		# pass: 4 (bytes)
   19940 	bsr.l		_dcalc_ea		# calc <ea>; <ea> in %a0
   19941 
   19942 	cmpi.b		SPCOND_FLG(%a6),&immed_flg
   19943 	beq.b		load_sgl_immed
   19944 
   19945 	bsr.l		_dmem_read_long		# fetch src operand from memory
   19946 	mov.l		%d0, L_SCR1(%a6)	# store src op on stack
   19947 
   19948 	tst.l		%d1			# did dfetch fail?
   19949 	bne.l		facc_in_l		# yes
   19950 
   19951 load_sgl_cont:
   19952 	lea		L_SCR1(%a6), %a0	# pass: ptr to sgl src op
   19953 	bsr.l		set_tag_s		# determine src type tag
   19954 	mov.b		%d0, STAG(%a6)		# save src optype tag on stack
   19955 
   19956 	cmpi.b		%d0, &DENORM		# is it a sgl DENORM?
   19957 	beq.w		get_sgl_denorm		# yes
   19958 
   19959 	cmpi.b		%d0, &SNAN		# is it a sgl SNAN?
   19960 	beq.w		get_sgl_snan		# yes
   19961 
   19962 	fmov.s		L_SCR1(%a6), %fp0	# read into %fp0;convert to xprec
   19963 	fmovm.x		&0x80, FP_SRC(%a6)	# return src op in FP_SRC
   19964 	rts
   19965 
   19966 load_sgl_immed:
   19967 	bsr.l		_imem_read_long		# fetch src operand immed data
   19968 
   19969 	tst.l		%d1			# did ifetch fail?
   19970 	bne.l		funimp_iacc		# yes
   19971 	bra.b		load_sgl_cont
   19972 
   19973 # must convert sgl denorm format to an Xprec denorm fmt suitable for
   19974 # normalization...
   19975 # %a0 : points to sgl denorm
   19976 get_sgl_denorm:
   19977 	clr.w		FP_SRC_EX(%a6)
   19978 	bfextu		(%a0){&9:&23}, %d0	# fetch sgl hi(_mantissa)
   19979 	lsl.l		&0x8, %d0
   19980 	mov.l		%d0, FP_SRC_HI(%a6)	# set ext hi(_mantissa)
   19981 	clr.l		FP_SRC_LO(%a6)		# set ext lo(_mantissa)
   19982 
   19983 	clr.w		FP_SRC_EX(%a6)
   19984 	btst		&0x7, (%a0)		# is sgn bit set?
   19985 	beq.b		sgl_dnrm_norm
   19986 	bset		&0x7, FP_SRC_EX(%a6)	# set sgn of xprec value
   19987 
   19988 sgl_dnrm_norm:
   19989 	lea		FP_SRC(%a6), %a0
   19990 	bsr.l		norm			# normalize number
   19991 	mov.w		&0x3f81, %d1		# xprec exp = 0x3f81
   19992 	sub.w		%d0, %d1		# exp = 0x3f81 - shft amt.
   19993 	or.w		%d1, FP_SRC_EX(%a6)	# {sgn,exp}
   19994 
   19995 	mov.b		&NORM, STAG(%a6)	# fix src type tag
   19996 	rts
   19997 
   19998 # convert sgl to ext SNAN
   19999 # %a0 : points to sgl SNAN
   20000 get_sgl_snan:
   20001 	mov.w		&0x7fff, FP_SRC_EX(%a6) # set exp of SNAN
   20002 	bfextu		(%a0){&9:&23}, %d0
   20003 	lsl.l		&0x8, %d0		# extract and insert hi(man)
   20004 	mov.l		%d0, FP_SRC_HI(%a6)
   20005 	clr.l		FP_SRC_LO(%a6)
   20006 
   20007 	btst		&0x7, (%a0)		# see if sign of SNAN is set
   20008 	beq.b		no_sgl_snan_sgn
   20009 	bset		&0x7, FP_SRC_EX(%a6)
   20010 no_sgl_snan_sgn:
   20011 	rts
   20012 
   20013 #########################################
   20014 # load a DBL into %fp0:			#
   20015 # 	-number can't fault		#
   20016 #	(1) calc ea			#
   20017 #	(2) read 8 bytes into L_SCR(1,2)#
   20018 #	(3) fmov.d into %fp0		#
   20019 #########################################
   20020 load_dbl:
   20021 	movq.l		&0x8, %d0		# pass: 8 (bytes)
   20022 	bsr.l		_dcalc_ea		# calc <ea>; <ea> in %a0
   20023 
   20024 	cmpi.b		SPCOND_FLG(%a6),&immed_flg
   20025 	beq.b		load_dbl_immed
   20026 
   20027 	lea		L_SCR1(%a6), %a1	# pass: ptr to input dbl tmp space
   20028 	movq.l		&0x8, %d0		# pass: # bytes to read
   20029 	bsr.l		_dmem_read		# fetch src operand from memory
   20030 
   20031 	tst.l		%d1			# did dfetch fail?
   20032 	bne.l		facc_in_d		# yes
   20033 
   20034 load_dbl_cont:
   20035 	lea		L_SCR1(%a6), %a0	# pass: ptr to input dbl
   20036 	bsr.l		set_tag_d		# determine src type tag
   20037 	mov.b		%d0, STAG(%a6)		# set src optype tag
   20038 
   20039 	cmpi.b		%d0, &DENORM		# is it a dbl DENORM?
   20040 	beq.w		get_dbl_denorm		# yes
   20041 
   20042 	cmpi.b		%d0, &SNAN		# is it a dbl SNAN?
   20043 	beq.w		get_dbl_snan		# yes
   20044 
   20045 	fmov.d		L_SCR1(%a6), %fp0	# read into %fp0;convert to xprec
   20046 	fmovm.x		&0x80, FP_SRC(%a6)	# return src op in FP_SRC
   20047 	rts
   20048 
   20049 load_dbl_immed:
   20050 	lea		L_SCR1(%a6), %a1	# pass: ptr to input dbl tmp space
   20051 	movq.l		&0x8, %d0		# pass: # bytes to read
   20052 	bsr.l		_imem_read		# fetch src operand from memory
   20053 
   20054 	tst.l		%d1			# did ifetch fail?
   20055 	bne.l		funimp_iacc		# yes
   20056 	bra.b		load_dbl_cont
   20057 
   20058 # must convert dbl denorm format to an Xprec denorm fmt suitable for
   20059 # normalization...
   20060 # %a0 : loc. of dbl denorm
   20061 get_dbl_denorm:
   20062 	clr.w		FP_SRC_EX(%a6)
   20063 	bfextu		(%a0){&12:&31}, %d0	# fetch hi(_mantissa)
   20064 	mov.l		%d0, FP_SRC_HI(%a6)
   20065 	bfextu		4(%a0){&11:&21}, %d0	# fetch lo(_mantissa)
   20066 	mov.l		&0xb, %d1
   20067 	lsl.l		%d1, %d0
   20068 	mov.l		%d0, FP_SRC_LO(%a6)
   20069 
   20070 	btst		&0x7, (%a0)		# is sgn bit set?
   20071 	beq.b		dbl_dnrm_norm
   20072 	bset		&0x7, FP_SRC_EX(%a6)	# set sgn of xprec value
   20073 
   20074 dbl_dnrm_norm:
   20075 	lea		FP_SRC(%a6), %a0
   20076 	bsr.l		norm			# normalize number
   20077 	mov.w		&0x3c01, %d1		# xprec exp = 0x3c01
   20078 	sub.w		%d0, %d1		# exp = 0x3c01 - shft amt.
   20079 	or.w		%d1, FP_SRC_EX(%a6)	# {sgn,exp}
   20080 
   20081 	mov.b		&NORM, STAG(%a6)	# fix src type tag
   20082 	rts
   20083 
   20084 # convert dbl to ext SNAN
   20085 # %a0 : points to dbl SNAN
   20086 get_dbl_snan:
   20087 	mov.w		&0x7fff, FP_SRC_EX(%a6) # set exp of SNAN
   20088 
   20089 	bfextu		(%a0){&12:&31}, %d0	# fetch hi(_mantissa)
   20090 	mov.l		%d0, FP_SRC_HI(%a6)
   20091 	bfextu		4(%a0){&11:&21}, %d0	# fetch lo(_mantissa)
   20092 	mov.l		&0xb, %d1
   20093 	lsl.l		%d1, %d0
   20094 	mov.l		%d0, FP_SRC_LO(%a6)
   20095 
   20096 	btst		&0x7, (%a0)		# see if sign of SNAN is set
   20097 	beq.b		no_dbl_snan_sgn
   20098 	bset		&0x7, FP_SRC_EX(%a6)
   20099 no_dbl_snan_sgn:
   20100 	rts
   20101 
   20102 #################################################
   20103 # load a Xprec into %fp0:			#
   20104 # 	-number can't fault			#
   20105 #	(1) calc ea				#
   20106 #	(2) read 12 bytes into L_SCR(1,2)	#
   20107 #	(3) fmov.x into %fp0			#
   20108 #################################################
   20109 load_ext:
   20110 	mov.l		&0xc, %d0		# pass: 12 (bytes)
   20111 	bsr.l		_dcalc_ea		# calc <ea>
   20112 
   20113 	lea		FP_SRC(%a6), %a1	# pass: ptr to input ext tmp space
   20114 	mov.l		&0xc, %d0		# pass: # of bytes to read
   20115 	bsr.l		_dmem_read		# fetch src operand from memory
   20116 
   20117 	tst.l		%d1			# did dfetch fail?
   20118 	bne.l		facc_in_x		# yes
   20119 
   20120 	lea		FP_SRC(%a6), %a0	# pass: ptr to src op
   20121 	bsr.l		set_tag_x		# determine src type tag
   20122 
   20123 	cmpi.b		%d0, &UNNORM		# is the src op an UNNORM?
   20124 	beq.b		load_ext_unnorm		# yes
   20125 
   20126 	mov.b		%d0, STAG(%a6)		# store the src optype tag
   20127 	rts
   20128 
   20129 load_ext_unnorm:
   20130 	bsr.l		unnorm_fix		# fix the src UNNORM
   20131 	mov.b		%d0, STAG(%a6)		# store the src optype tag
   20132 	rts
   20133 
   20134 #################################################
   20135 # load a packed into %fp0:			#
   20136 # 	-number can't fault			#
   20137 #	(1) calc ea				#
   20138 #	(2) read 12 bytes into L_SCR(1,2,3)	#
   20139 #	(3) fmov.x into %fp0			#
   20140 #################################################
   20141 load_packed:
   20142 	bsr.l		get_packed
   20143 
   20144 	lea		FP_SRC(%a6),%a0		# pass ptr to src op
   20145 	bsr.l		set_tag_x		# determine src type tag
   20146 	cmpi.b		%d0,&UNNORM		# is the src op an UNNORM ZERO?
   20147 	beq.b		load_packed_unnorm	# yes
   20148 
   20149 	mov.b		%d0,STAG(%a6)		# store the src optype tag
   20150 	rts
   20151 
   20152 load_packed_unnorm:
   20153 	bsr.l		unnorm_fix		# fix the UNNORM ZERO
   20154 	mov.b		%d0,STAG(%a6)		# store the src optype tag
   20155 	rts
   20156 
   20157 #########################################################################
   20158 # XDEF ****************************************************************	#
   20159 # 	fout(): move from fp register to memory or data register	#
   20160 #									#
   20161 # XREF ****************************************************************	#
   20162 #	_round() - needed to create EXOP for sgl/dbl precision		#
   20163 #	norm() - needed to create EXOP for extended precision		#
   20164 #	ovf_res() - create default overflow result for sgl/dbl precision#
   20165 #	unf_res() - create default underflow result for sgl/dbl prec.	#
   20166 #	dst_dbl() - create rounded dbl precision result.		#
   20167 #	dst_sgl() - create rounded sgl precision result.		#
   20168 #	fetch_dreg() - fetch dynamic k-factor reg for packed.		#
   20169 #	bindec() - convert FP binary number to packed number.		#
   20170 #	_mem_write() - write data to memory.				#
   20171 #	_mem_write2() - write data to memory unless supv mode -(a7) exc.#
   20172 #	_dmem_write_{byte,word,long}() - write data to memory.		#
   20173 #	store_dreg_{b,w,l}() - store data to data register file.	#
   20174 #	facc_out_{b,w,l,d,x}() - data access error occurred.		#
   20175 #									#
   20176 # INPUT ***************************************************************	#
   20177 #	a0 = pointer to extended precision source operand		#
   20178 #	d0 = round prec,mode						#
   20179 # 									#
   20180 # OUTPUT **************************************************************	#
   20181 #	fp0 : intermediate underflow or overflow result if		#
   20182 #	      OVFL/UNFL occurred for a sgl or dbl operand		#
   20183 #									#
   20184 # ALGORITHM ***********************************************************	#
   20185 #	This routine is accessed by many handlers that need to do an	#
   20186 # opclass three move of an operand out to memory.			#
   20187 #	Decode an fmove out (opclass 3) instruction to determine if	#
   20188 # it's b,w,l,s,d,x, or p in size. b,w,l can be stored to either a data	#
   20189 # register or memory. The algorithm uses a standard "fmove" to create	#
   20190 # the rounded result. Also, since exceptions are disabled, this also	#
   20191 # create the correct OPERR default result if appropriate.		#
   20192 #	For sgl or dbl precision, overflow or underflow can occur. If	#
   20193 # either occurs and is enabled, the EXOP.				#
   20194 #	For extended precision, the stacked <ea> must be fixed along	#
   20195 # w/ the address index register as appropriate w/ _calc_ea_fout(). If	#
   20196 # the source is a denorm and if underflow is enabled, an EXOP must be	#
   20197 # created.								#
   20198 # 	For packed, the k-factor must be fetched from the instruction	#
   20199 # word or a data register. The <ea> must be fixed as w/ extended 	#
   20200 # precision. Then, bindec() is called to create the appropriate 	#
   20201 # packed result.							#
   20202 #	If at any time an access error is flagged by one of the move-	#
   20203 # to-memory routines, then a special exit must be made so that the	#
   20204 # access error can be handled properly.					#
   20205 #									#
   20206 #########################################################################
   20207 
   20208 	global		fout
   20209 fout:
   20210 	bfextu		EXC_CMDREG(%a6){&3:&3},%d1 # extract dst fmt
   20211 	mov.w		(tbl_fout.b,%pc,%d1.w*2),%a1 # use as index
   20212 	jmp		(tbl_fout.b,%pc,%a1)	# jump to routine
   20213 
   20214 	swbeg		&0x8
   20215 tbl_fout:
   20216 	short		fout_long	-	tbl_fout
   20217 	short		fout_sgl	-	tbl_fout
   20218 	short		fout_ext	-	tbl_fout
   20219 	short		fout_pack	-	tbl_fout
   20220 	short		fout_word	-	tbl_fout
   20221 	short		fout_dbl	-	tbl_fout
   20222 	short		fout_byte	-	tbl_fout
   20223 	short		fout_pack	-	tbl_fout
   20224 
   20225 #################################################################
   20226 # fmove.b out ###################################################
   20227 #################################################################
   20228 
   20229 # Only "Unimplemented Data Type" exceptions enter here. The operand
   20230 # is either a DENORM or a NORM.
   20231 fout_byte:
   20232 	tst.b		STAG(%a6)		# is operand normalized?
   20233 	bne.b		fout_byte_denorm	# no
   20234 
   20235 	fmovm.x		SRC(%a0),&0x80		# load value
   20236 
   20237 fout_byte_norm:
   20238 	fmov.l		%d0,%fpcr		# insert rnd prec,mode
   20239 
   20240 	fmov.b		%fp0,%d0		# exec move out w/ correct rnd mode
   20241 
   20242 	fmov.l		&0x0,%fpcr		# clear FPCR
   20243 	fmov.l		%fpsr,%d1		# fetch FPSR
   20244 	or.w		%d1,2+USER_FPSR(%a6)	# save new exc,accrued bits
   20245 
   20246 	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
   20247 	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
   20248 	beq.b		fout_byte_dn		# must save to integer regfile
   20249 
   20250 	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
   20251 	bsr.l		_dmem_write_byte	# write byte
   20252 
   20253 	tst.l		%d1			# did dstore fail?
   20254 	bne.l		facc_out_b		# yes
   20255 
   20256 	rts
   20257 
   20258 fout_byte_dn:
   20259 	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
   20260 	andi.w		&0x7,%d1
   20261 	bsr.l		store_dreg_b
   20262 	rts
   20263 
   20264 fout_byte_denorm:
   20265 	mov.l		SRC_EX(%a0),%d1
   20266 	andi.l		&0x80000000,%d1		# keep DENORM sign
   20267 	ori.l		&0x00800000,%d1		# make smallest sgl
   20268 	fmov.s		%d1,%fp0
   20269 	bra.b		fout_byte_norm
   20270 
   20271 #################################################################
   20272 # fmove.w out ###################################################
   20273 #################################################################
   20274 
   20275 # Only "Unimplemented Data Type" exceptions enter here. The operand
   20276 # is either a DENORM or a NORM.
   20277 fout_word:
   20278 	tst.b		STAG(%a6)		# is operand normalized?
   20279 	bne.b		fout_word_denorm	# no
   20280 
   20281 	fmovm.x		SRC(%a0),&0x80		# load value
   20282 
   20283 fout_word_norm:
   20284 	fmov.l		%d0,%fpcr		# insert rnd prec:mode
   20285 
   20286 	fmov.w		%fp0,%d0		# exec move out w/ correct rnd mode
   20287 
   20288 	fmov.l		&0x0,%fpcr		# clear FPCR
   20289 	fmov.l		%fpsr,%d1		# fetch FPSR
   20290 	or.w		%d1,2+USER_FPSR(%a6)	# save new exc,accrued bits
   20291 
   20292 	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
   20293 	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
   20294 	beq.b		fout_word_dn		# must save to integer regfile
   20295 
   20296 	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
   20297 	bsr.l		_dmem_write_word	# write word
   20298 
   20299 	tst.l		%d1			# did dstore fail?
   20300 	bne.l		facc_out_w		# yes
   20301 
   20302 	rts
   20303 
   20304 fout_word_dn:
   20305 	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
   20306 	andi.w		&0x7,%d1
   20307 	bsr.l		store_dreg_w
   20308 	rts
   20309 
   20310 fout_word_denorm:
   20311 	mov.l		SRC_EX(%a0),%d1
   20312 	andi.l		&0x80000000,%d1		# keep DENORM sign
   20313 	ori.l		&0x00800000,%d1		# make smallest sgl
   20314 	fmov.s		%d1,%fp0
   20315 	bra.b		fout_word_norm
   20316 
   20317 #################################################################
   20318 # fmove.l out ###################################################
   20319 #################################################################
   20320 
   20321 # Only "Unimplemented Data Type" exceptions enter here. The operand
   20322 # is either a DENORM or a NORM.
   20323 fout_long:
   20324 	tst.b		STAG(%a6)		# is operand normalized?
   20325 	bne.b		fout_long_denorm	# no
   20326 
   20327 	fmovm.x		SRC(%a0),&0x80		# load value
   20328 
   20329 fout_long_norm:
   20330 	fmov.l		%d0,%fpcr		# insert rnd prec:mode
   20331 
   20332 	fmov.l		%fp0,%d0		# exec move out w/ correct rnd mode
   20333 
   20334 	fmov.l		&0x0,%fpcr		# clear FPCR
   20335 	fmov.l		%fpsr,%d1		# fetch FPSR
   20336 	or.w		%d1,2+USER_FPSR(%a6)	# save new exc,accrued bits
   20337 
   20338 fout_long_write:
   20339 	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
   20340 	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
   20341 	beq.b		fout_long_dn		# must save to integer regfile
   20342 
   20343 	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
   20344 	bsr.l		_dmem_write_long	# write long
   20345 
   20346 	tst.l		%d1			# did dstore fail?
   20347 	bne.l		facc_out_l		# yes
   20348 
   20349 	rts
   20350 
   20351 fout_long_dn:
   20352 	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
   20353 	andi.w		&0x7,%d1
   20354 	bsr.l		store_dreg_l
   20355 	rts
   20356 
   20357 fout_long_denorm:
   20358 	mov.l		SRC_EX(%a0),%d1
   20359 	andi.l		&0x80000000,%d1		# keep DENORM sign
   20360 	ori.l		&0x00800000,%d1		# make smallest sgl
   20361 	fmov.s		%d1,%fp0
   20362 	bra.b		fout_long_norm
   20363 
   20364 #################################################################
   20365 # fmove.x out ###################################################
   20366 #################################################################
   20367 
   20368 # Only "Unimplemented Data Type" exceptions enter here. The operand
   20369 # is either a DENORM or a NORM.
   20370 # The DENORM causes an Underflow exception.
   20371 fout_ext:
   20372 
   20373 # we copy the extended precision result to FP_SCR0 so that the reserved
   20374 # 16-bit field gets zeroed. we do this since we promise not to disturb
   20375 # what's at SRC(a0).
   20376 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
   20377 	clr.w		2+FP_SCR0_EX(%a6)	# clear reserved field
   20378 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
   20379 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
   20380 
   20381 	fmovm.x		SRC(%a0),&0x80		# return result
   20382 
   20383 	bsr.l		_calc_ea_fout		# fix stacked <ea>
   20384 
   20385 	mov.l		%a0,%a1			# pass: dst addr
   20386 	lea		FP_SCR0(%a6),%a0	# pass: src addr
   20387 	mov.l		&0xc,%d0		# pass: opsize is 12 bytes
   20388 
   20389 # we must not yet write the extended precision data to the stack
   20390 # in the pre-decrement case from supervisor mode or else we'll corrupt
   20391 # the stack frame. so, leave it in FP_SRC for now and deal with it later...
   20392 	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
   20393 	beq.b		fout_ext_a7
   20394 
   20395 	bsr.l		_dmem_write		# write ext prec number to memory
   20396 
   20397 	tst.l		%d1			# did dstore fail?
   20398 	bne.w		fout_ext_err		# yes
   20399 
   20400 	tst.b		STAG(%a6)		# is operand normalized?
   20401 	bne.b		fout_ext_denorm		# no
   20402 	rts
   20403 
   20404 # the number is a DENORM. must set the underflow exception bit
   20405 fout_ext_denorm:
   20406 	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set underflow exc bit
   20407 
   20408 	mov.b		FPCR_ENABLE(%a6),%d0
   20409 	andi.b		&0x0a,%d0		# is UNFL or INEX enabled?
   20410 	bne.b		fout_ext_exc		# yes
   20411 	rts
   20412 
   20413 # we don't want to do the write if the exception occurred in supervisor mode
   20414 # so _mem_write2() handles this for us.
   20415 fout_ext_a7:
   20416 	bsr.l		_mem_write2		# write ext prec number to memory
   20417 
   20418 	tst.l		%d1			# did dstore fail?
   20419 	bne.w		fout_ext_err		# yes
   20420 
   20421 	tst.b		STAG(%a6)		# is operand normalized?
   20422 	bne.b		fout_ext_denorm		# no
   20423 	rts
   20424 
   20425 fout_ext_exc:
   20426 	lea		FP_SCR0(%a6),%a0
   20427 	bsr.l		norm			# normalize the mantissa
   20428 	neg.w		%d0			# new exp = -(shft amt)
   20429 	andi.w		&0x7fff,%d0
   20430 	andi.w		&0x8000,FP_SCR0_EX(%a6)	# keep only old sign
   20431 	or.w		%d0,FP_SCR0_EX(%a6)	# insert new exponent
   20432 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
   20433 	rts
   20434 
   20435 fout_ext_err:
   20436 	mov.l		EXC_A6(%a6),(%a6)	# fix stacked a6
   20437 	bra.l		facc_out_x
   20438 
   20439 #########################################################################
   20440 # fmove.s out ###########################################################
   20441 #########################################################################
   20442 fout_sgl:
   20443 	andi.b		&0x30,%d0		# clear rnd prec
   20444 	ori.b		&s_mode*0x10,%d0	# insert sgl prec
   20445 	mov.l		%d0,L_SCR3(%a6)		# save rnd prec,mode on stack
   20446 
   20447 #
   20448 # operand is a normalized number. first, we check to see if the move out
   20449 # would cause either an underflow or overflow. these cases are handled
   20450 # separately. otherwise, set the FPCR to the proper rounding mode and
   20451 # execute the move.
   20452 #
   20453 	mov.w		SRC_EX(%a0),%d0		# extract exponent
   20454 	andi.w		&0x7fff,%d0		# strip sign
   20455 
   20456 	cmpi.w		%d0,&SGL_HI		# will operand overflow?
   20457 	bgt.w		fout_sgl_ovfl		# yes; go handle OVFL
   20458 	beq.w		fout_sgl_may_ovfl	# maybe; go handle possible OVFL
   20459 	cmpi.w		%d0,&SGL_LO		# will operand underflow?
   20460 	blt.w		fout_sgl_unfl		# yes; go handle underflow
   20461 
   20462 #
   20463 # NORMs(in range) can be stored out by a simple "fmov.s"
   20464 # Unnormalized inputs can come through this point.
   20465 #
   20466 fout_sgl_exg:
   20467 	fmovm.x		SRC(%a0),&0x80		# fetch fop from stack
   20468 
   20469 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   20470 	fmov.l		&0x0,%fpsr		# clear FPSR
   20471 
   20472 	fmov.s		%fp0,%d0		# store does convert and round
   20473 
   20474 	fmov.l		&0x0,%fpcr		# clear FPCR
   20475 	fmov.l		%fpsr,%d1		# save FPSR
   20476 
   20477 	or.w		%d1,2+USER_FPSR(%a6) 	# set possible inex2/ainex
   20478 
   20479 fout_sgl_exg_write:
   20480 	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
   20481 	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
   20482 	beq.b		fout_sgl_exg_write_dn	# must save to integer regfile
   20483 
   20484 	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
   20485 	bsr.l		_dmem_write_long	# write long
   20486 
   20487 	tst.l		%d1			# did dstore fail?
   20488 	bne.l		facc_out_l		# yes
   20489 
   20490 	rts
   20491 
   20492 fout_sgl_exg_write_dn:
   20493 	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
   20494 	andi.w		&0x7,%d1
   20495 	bsr.l		store_dreg_l
   20496 	rts
   20497 
   20498 #
   20499 # here, we know that the operand would UNFL if moved out to single prec,
   20500 # so, denorm and round and then use generic store single routine to
   20501 # write the value to memory.
   20502 #
   20503 fout_sgl_unfl:
   20504 	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set UNFL
   20505 
   20506 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
   20507 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
   20508 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
   20509 	mov.l		%a0,-(%sp)
   20510 
   20511 	clr.l		%d0			# pass: S.F. = 0
   20512 
   20513 	cmpi.b		STAG(%a6),&DENORM	# fetch src optype tag
   20514 	bne.b		fout_sgl_unfl_cont	# let DENORMs fall through
   20515 
   20516 	lea		FP_SCR0(%a6),%a0
   20517 	bsr.l		norm			# normalize the DENORM
   20518 
   20519 fout_sgl_unfl_cont:
   20520 	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
   20521 	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
   20522 	bsr.l		unf_res			# calc default underflow result
   20523 
   20524 	lea		FP_SCR0(%a6),%a0	# pass: ptr to fop
   20525 	bsr.l		dst_sgl			# convert to single prec
   20526 
   20527 	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
   20528 	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
   20529 	beq.b		fout_sgl_unfl_dn	# must save to integer regfile
   20530 
   20531 	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
   20532 	bsr.l		_dmem_write_long	# write long
   20533 
   20534 	tst.l		%d1			# did dstore fail?
   20535 	bne.l		facc_out_l		# yes
   20536 
   20537 	bra.b		fout_sgl_unfl_chkexc
   20538 
   20539 fout_sgl_unfl_dn:
   20540 	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
   20541 	andi.w		&0x7,%d1
   20542 	bsr.l		store_dreg_l
   20543 
   20544 fout_sgl_unfl_chkexc:
   20545 	mov.b		FPCR_ENABLE(%a6),%d1
   20546 	andi.b		&0x0a,%d1		# is UNFL or INEX enabled?
   20547 	bne.w		fout_sd_exc_unfl	# yes
   20548 	addq.l		&0x4,%sp
   20549 	rts
   20550 
   20551 #
   20552 # it's definitely an overflow so call ovf_res to get the correct answer
   20553 #
   20554 fout_sgl_ovfl:
   20555 	tst.b		3+SRC_HI(%a0)		# is result inexact?
   20556 	bne.b		fout_sgl_ovfl_inex2
   20557 	tst.l		SRC_LO(%a0)		# is result inexact?
   20558 	bne.b		fout_sgl_ovfl_inex2
   20559 	ori.w		&ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
   20560 	bra.b		fout_sgl_ovfl_cont
   20561 fout_sgl_ovfl_inex2:
   20562 	ori.w		&ovfinx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex/inex2
   20563 
   20564 fout_sgl_ovfl_cont:
   20565 	mov.l		%a0,-(%sp)
   20566 
   20567 # call ovf_res() w/ sgl prec and the correct rnd mode to create the default
   20568 # overflow result. DON'T save the returned ccodes from ovf_res() since
   20569 # fmove out doesn't alter them.
   20570 	tst.b		SRC_EX(%a0)		# is operand negative?
   20571 	smi		%d1			# set if so
   20572 	mov.l		L_SCR3(%a6),%d0		# pass: sgl prec,rnd mode
   20573 	bsr.l		ovf_res			# calc OVFL result
   20574 	fmovm.x		(%a0),&0x80		# load default overflow result
   20575 	fmov.s		%fp0,%d0		# store to single
   20576 
   20577 	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
   20578 	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
   20579 	beq.b		fout_sgl_ovfl_dn	# must save to integer regfile
   20580 
   20581 	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
   20582 	bsr.l		_dmem_write_long	# write long
   20583 
   20584 	tst.l		%d1			# did dstore fail?
   20585 	bne.l		facc_out_l		# yes
   20586 
   20587 	bra.b		fout_sgl_ovfl_chkexc
   20588 
   20589 fout_sgl_ovfl_dn:
   20590 	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
   20591 	andi.w		&0x7,%d1
   20592 	bsr.l		store_dreg_l
   20593 
   20594 fout_sgl_ovfl_chkexc:
   20595 	mov.b		FPCR_ENABLE(%a6),%d1
   20596 	andi.b		&0x0a,%d1		# is UNFL or INEX enabled?
   20597 	bne.w		fout_sd_exc_ovfl	# yes
   20598 	addq.l		&0x4,%sp
   20599 	rts
   20600 
   20601 #
   20602 # move out MAY overflow:
   20603 # (1) force the exp to 0x3fff
   20604 # (2) do a move w/ appropriate rnd mode
   20605 # (3) if exp still equals zero, then insert original exponent
   20606 #	for the correct result.
   20607 #     if exp now equals one, then it overflowed so call ovf_res.
   20608 #
   20609 fout_sgl_may_ovfl:
   20610 	mov.w		SRC_EX(%a0),%d1		# fetch current sign
   20611 	andi.w		&0x8000,%d1		# keep it,clear exp
   20612 	ori.w		&0x3fff,%d1		# insert exp = 0
   20613 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert scaled exp
   20614 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6) # copy hi(man)
   20615 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6) # copy lo(man)
   20616 
   20617 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   20618 
   20619 	fmov.x		FP_SCR0(%a6),%fp0	# force fop to be rounded
   20620 	fmov.l		&0x0,%fpcr		# clear FPCR
   20621 
   20622 	fabs.x		%fp0			# need absolute value
   20623 	fcmp.b		%fp0,&0x2		# did exponent increase?
   20624 	fblt.w		fout_sgl_exg		# no; go finish NORM
   20625 	bra.w		fout_sgl_ovfl		# yes; go handle overflow
   20626 
   20627 ################
   20628 
   20629 fout_sd_exc_unfl:
   20630 	mov.l		(%sp)+,%a0
   20631 
   20632 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
   20633 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
   20634 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
   20635 
   20636 	cmpi.b		STAG(%a6),&DENORM	# was src a DENORM?
   20637 	bne.b		fout_sd_exc_cont	# no
   20638 
   20639 	lea		FP_SCR0(%a6),%a0
   20640 	bsr.l		norm
   20641 	neg.l		%d0
   20642 	andi.w		&0x7fff,%d0
   20643 	bfins		%d0,FP_SCR0_EX(%a6){&1:&15}
   20644 	bra.b		fout_sd_exc_cont
   20645 
   20646 fout_sd_exc:
   20647 fout_sd_exc_ovfl:
   20648 	mov.l		(%sp)+,%a0		# restore a0
   20649 
   20650 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
   20651 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
   20652 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
   20653 
   20654 fout_sd_exc_cont:
   20655 	bclr		&0x7,FP_SCR0_EX(%a6)	# clear sign bit
   20656 	sne.b		2+FP_SCR0_EX(%a6)	# set internal sign bit
   20657 	lea		FP_SCR0(%a6),%a0	# pass: ptr to DENORM
   20658 
   20659 	mov.b		3+L_SCR3(%a6),%d1
   20660 	lsr.b		&0x4,%d1
   20661 	andi.w		&0x0c,%d1
   20662 	swap		%d1
   20663 	mov.b		3+L_SCR3(%a6),%d1
   20664 	lsr.b		&0x4,%d1
   20665 	andi.w		&0x03,%d1
   20666 	clr.l		%d0			# pass: zero g,r,s
   20667 	bsr.l		_round			# round the DENORM
   20668 
   20669 	tst.b		2+FP_SCR0_EX(%a6)	# is EXOP negative?
   20670 	beq.b		fout_sd_exc_done	# no
   20671 	bset		&0x7,FP_SCR0_EX(%a6)	# yes
   20672 
   20673 fout_sd_exc_done:
   20674 	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
   20675 	rts
   20676 
   20677 #################################################################
   20678 # fmove.d out ###################################################
   20679 #################################################################
   20680 fout_dbl:
   20681 	andi.b		&0x30,%d0		# clear rnd prec
   20682 	ori.b		&d_mode*0x10,%d0	# insert dbl prec
   20683 	mov.l		%d0,L_SCR3(%a6)		# save rnd prec,mode on stack
   20684 
   20685 #
   20686 # operand is a normalized number. first, we check to see if the move out
   20687 # would cause either an underflow or overflow. these cases are handled
   20688 # separately. otherwise, set the FPCR to the proper rounding mode and
   20689 # execute the move.
   20690 #
   20691 	mov.w		SRC_EX(%a0),%d0		# extract exponent
   20692 	andi.w		&0x7fff,%d0		# strip sign
   20693 
   20694 	cmpi.w		%d0,&DBL_HI		# will operand overflow?
   20695 	bgt.w		fout_dbl_ovfl		# yes; go handle OVFL
   20696 	beq.w		fout_dbl_may_ovfl	# maybe; go handle possible OVFL
   20697 	cmpi.w		%d0,&DBL_LO		# will operand underflow?
   20698 	blt.w		fout_dbl_unfl		# yes; go handle underflow
   20699 
   20700 #
   20701 # NORMs(in range) can be stored out by a simple "fmov.d"
   20702 # Unnormalized inputs can come through this point.
   20703 #
   20704 fout_dbl_exg:
   20705 	fmovm.x		SRC(%a0),&0x80		# fetch fop from stack
   20706 
   20707 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   20708 	fmov.l		&0x0,%fpsr		# clear FPSR
   20709 
   20710 	fmov.d		%fp0,L_SCR1(%a6)	# store does convert and round
   20711 
   20712 	fmov.l		&0x0,%fpcr		# clear FPCR
   20713 	fmov.l		%fpsr,%d0		# save FPSR
   20714 
   20715 	or.w		%d0,2+USER_FPSR(%a6) 	# set possible inex2/ainex
   20716 
   20717 	mov.l		EXC_EA(%a6),%a1		# pass: dst addr
   20718 	lea		L_SCR1(%a6),%a0		# pass: src addr
   20719 	movq.l		&0x8,%d0		# pass: opsize is 8 bytes
   20720 	bsr.l		_dmem_write		# store dbl fop to memory
   20721 
   20722 	tst.l		%d1			# did dstore fail?
   20723 	bne.l		facc_out_d		# yes
   20724 
   20725 	rts					# no; so we're finished
   20726 
   20727 #
   20728 # here, we know that the operand would UNFL if moved out to double prec,
   20729 # so, denorm and round and then use generic store double routine to
   20730 # write the value to memory.
   20731 #
   20732 fout_dbl_unfl:
   20733 	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set UNFL
   20734 
   20735 	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
   20736 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
   20737 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
   20738 	mov.l		%a0,-(%sp)
   20739 
   20740 	clr.l		%d0			# pass: S.F. = 0
   20741 
   20742 	cmpi.b		STAG(%a6),&DENORM	# fetch src optype tag
   20743 	bne.b		fout_dbl_unfl_cont	# let DENORMs fall through
   20744 
   20745 	lea		FP_SCR0(%a6),%a0
   20746 	bsr.l		norm			# normalize the DENORM
   20747 
   20748 fout_dbl_unfl_cont:
   20749 	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
   20750 	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
   20751 	bsr.l		unf_res			# calc default underflow result
   20752 
   20753 	lea		FP_SCR0(%a6),%a0	# pass: ptr to fop
   20754 	bsr.l		dst_dbl			# convert to single prec
   20755 	mov.l		%d0,L_SCR1(%a6)
   20756 	mov.l		%d1,L_SCR2(%a6)
   20757 
   20758 	mov.l		EXC_EA(%a6),%a1		# pass: dst addr
   20759 	lea		L_SCR1(%a6),%a0		# pass: src addr
   20760 	movq.l		&0x8,%d0		# pass: opsize is 8 bytes
   20761 	bsr.l		_dmem_write		# store dbl fop to memory
   20762 
   20763 	tst.l		%d1			# did dstore fail?
   20764 	bne.l		facc_out_d		# yes
   20765 
   20766 	mov.b		FPCR_ENABLE(%a6),%d1
   20767 	andi.b		&0x0a,%d1		# is UNFL or INEX enabled?
   20768 	bne.w		fout_sd_exc_unfl	# yes
   20769 	addq.l		&0x4,%sp
   20770 	rts
   20771 
   20772 #
   20773 # it's definitely an overflow so call ovf_res to get the correct answer
   20774 #
   20775 fout_dbl_ovfl:
   20776 	mov.w		2+SRC_LO(%a0),%d0
   20777 	andi.w		&0x7ff,%d0
   20778 	bne.b		fout_dbl_ovfl_inex2
   20779 
   20780 	ori.w		&ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
   20781 	bra.b		fout_dbl_ovfl_cont
   20782 fout_dbl_ovfl_inex2:
   20783 	ori.w		&ovfinx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex/inex2
   20784 
   20785 fout_dbl_ovfl_cont:
   20786 	mov.l		%a0,-(%sp)
   20787 
   20788 # call ovf_res() w/ dbl prec and the correct rnd mode to create the default
   20789 # overflow result. DON'T save the returned ccodes from ovf_res() since
   20790 # fmove out doesn't alter them.
   20791 	tst.b		SRC_EX(%a0)		# is operand negative?
   20792 	smi		%d1			# set if so
   20793 	mov.l		L_SCR3(%a6),%d0		# pass: dbl prec,rnd mode
   20794 	bsr.l		ovf_res			# calc OVFL result
   20795 	fmovm.x		(%a0),&0x80		# load default overflow result
   20796 	fmov.d		%fp0,L_SCR1(%a6)	# store to double
   20797 
   20798 	mov.l		EXC_EA(%a6),%a1		# pass: dst addr
   20799 	lea		L_SCR1(%a6),%a0		# pass: src addr
   20800 	movq.l		&0x8,%d0		# pass: opsize is 8 bytes
   20801 	bsr.l		_dmem_write		# store dbl fop to memory
   20802 
   20803 	tst.l		%d1			# did dstore fail?
   20804 	bne.l		facc_out_d		# yes
   20805 
   20806 	mov.b		FPCR_ENABLE(%a6),%d1
   20807 	andi.b		&0x0a,%d1		# is UNFL or INEX enabled?
   20808 	bne.w		fout_sd_exc_ovfl	# yes
   20809 	addq.l		&0x4,%sp
   20810 	rts
   20811 
   20812 #
   20813 # move out MAY overflow:
   20814 # (1) force the exp to 0x3fff
   20815 # (2) do a move w/ appropriate rnd mode
   20816 # (3) if exp still equals zero, then insert original exponent
   20817 #	for the correct result.
   20818 #     if exp now equals one, then it overflowed so call ovf_res.
   20819 #
   20820 fout_dbl_may_ovfl:
   20821 	mov.w		SRC_EX(%a0),%d1		# fetch current sign
   20822 	andi.w		&0x8000,%d1		# keep it,clear exp
   20823 	ori.w		&0x3fff,%d1		# insert exp = 0
   20824 	mov.w		%d1,FP_SCR0_EX(%a6)	# insert scaled exp
   20825 	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6) # copy hi(man)
   20826 	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6) # copy lo(man)
   20827 
   20828 	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   20829 
   20830 	fmov.x		FP_SCR0(%a6),%fp0	# force fop to be rounded
   20831 	fmov.l		&0x0,%fpcr		# clear FPCR
   20832 
   20833 	fabs.x		%fp0			# need absolute value
   20834 	fcmp.b		%fp0,&0x2		# did exponent increase?
   20835 	fblt.w		fout_dbl_exg		# no; go finish NORM
   20836 	bra.w		fout_dbl_ovfl		# yes; go handle overflow
   20837 
   20838 #########################################################################
   20839 # XDEF ****************************************************************	#
   20840 # 	dst_dbl(): create double precision value from extended prec.	#
   20841 #									#
   20842 # XREF ****************************************************************	#
   20843 #	None								#
   20844 #									#
   20845 # INPUT ***************************************************************	#
   20846 #	a0 = pointer to source operand in extended precision		#
   20847 # 									#
   20848 # OUTPUT **************************************************************	#
   20849 #	d0 = hi(double precision result)				#
   20850 #	d1 = lo(double precision result)				#
   20851 #									#
   20852 # ALGORITHM ***********************************************************	#
   20853 #									#
   20854 #  Changes extended precision to double precision.			#
   20855 #  Note: no attempt is made to round the extended value to double.	#
   20856 #	dbl_sign = ext_sign						#
   20857 #	dbl_exp = ext_exp - $3fff(ext bias) + $7ff(dbl bias)		#
   20858 #	get rid of ext integer bit					#
   20859 #	dbl_mant = ext_mant{62:12}					#
   20860 #									#
   20861 #	    	---------------   ---------------    ---------------	#
   20862 #  extended ->  |s|    exp    |   |1| ms mant   |    | ls mant     |	#
   20863 #	    	---------------   ---------------    ---------------	#
   20864 #	   	 95	    64    63 62	      32      31     11	  0	#
   20865 #				     |			     |		#
   20866 #				     |			     |		#
   20867 #				     |			     |		#
   20868 #		 	             v   		     v		#
   20869 #	    		      ---------------   ---------------		#
   20870 #  double   ->  	      |s|exp| mant  |   |  mant       |		#
   20871 #	    		      ---------------   ---------------		#
   20872 #	   	 	      63     51   32   31	       0	#
   20873 #									#
   20874 #########################################################################
   20875 
   20876 dst_dbl:
   20877 	clr.l		%d0			# clear d0
   20878 	mov.w		FTEMP_EX(%a0),%d0	# get exponent
   20879 	subi.w		&EXT_BIAS,%d0		# subtract extended precision bias
   20880 	addi.w		&DBL_BIAS,%d0		# add double precision bias
   20881 	tst.b		FTEMP_HI(%a0)		# is number a denorm?
   20882 	bmi.b		dst_get_dupper		# no
   20883 	subq.w		&0x1,%d0		# yes; denorm bias = DBL_BIAS - 1
   20884 dst_get_dupper:
   20885 	swap		%d0			# d0 now in upper word
   20886 	lsl.l		&0x4,%d0		# d0 in proper place for dbl prec exp
   20887 	tst.b		FTEMP_EX(%a0)		# test sign
   20888 	bpl.b		dst_get_dman		# if postive, go process mantissa
   20889 	bset		&0x1f,%d0		# if negative, set sign
   20890 dst_get_dman:
   20891 	mov.l		FTEMP_HI(%a0),%d1	# get ms mantissa
   20892 	bfextu		%d1{&1:&20},%d1		# get upper 20 bits of ms
   20893 	or.l		%d1,%d0			# put these bits in ms word of double
   20894 	mov.l		%d0,L_SCR1(%a6)		# put the new exp back on the stack
   20895 	mov.l		FTEMP_HI(%a0),%d1	# get ms mantissa
   20896 	mov.l		&21,%d0			# load shift count
   20897 	lsl.l		%d0,%d1			# put lower 11 bits in upper bits
   20898 	mov.l		%d1,L_SCR2(%a6)		# build lower lword in memory
   20899 	mov.l		FTEMP_LO(%a0),%d1	# get ls mantissa
   20900 	bfextu		%d1{&0:&21},%d0		# get ls 21 bits of double
   20901 	mov.l		L_SCR2(%a6),%d1
   20902 	or.l		%d0,%d1			# put them in double result
   20903 	mov.l		L_SCR1(%a6),%d0
   20904 	rts
   20905 
   20906 #########################################################################
   20907 # XDEF ****************************************************************	#
   20908 # 	dst_sgl(): create single precision value from extended prec	#
   20909 #									#
   20910 # XREF ****************************************************************	#
   20911 #									#
   20912 # INPUT ***************************************************************	#
   20913 #	a0 = pointer to source operand in extended precision		#
   20914 # 									#
   20915 # OUTPUT **************************************************************	#
   20916 #	d0 = single precision result					#
   20917 #									#
   20918 # ALGORITHM ***********************************************************	#
   20919 #									#
   20920 # Changes extended precision to single precision.			#
   20921 #	sgl_sign = ext_sign						#
   20922 #	sgl_exp = ext_exp - $3fff(ext bias) + $7f(sgl bias)		#
   20923 #	get rid of ext integer bit					#
   20924 #	sgl_mant = ext_mant{62:12}					#
   20925 #									#
   20926 #	    	---------------   ---------------    ---------------	#
   20927 #  extended ->  |s|    exp    |   |1| ms mant   |    | ls mant     |	#
   20928 #	    	---------------   ---------------    ---------------	#
   20929 #	   	 95	    64    63 62	   40 32      31     12	  0	#
   20930 #				     |	   |				#
   20931 #				     |	   |				#
   20932 #				     |	   |				#
   20933 #		 	             v     v				#
   20934 #	    		      ---------------				#
   20935 #  single   ->  	      |s|exp| mant  |				#
   20936 #	    		      ---------------				#
   20937 #	   	 	      31     22     0				#
   20938 #									#
   20939 #########################################################################
   20940 
   20941 dst_sgl:
   20942 	clr.l		%d0
   20943 	mov.w		FTEMP_EX(%a0),%d0	# get exponent
   20944 	subi.w		&EXT_BIAS,%d0		# subtract extended precision bias
   20945 	addi.w		&SGL_BIAS,%d0		# add single precision bias
   20946 	tst.b		FTEMP_HI(%a0)		# is number a denorm?
   20947 	bmi.b		dst_get_supper		# no
   20948 	subq.w		&0x1,%d0		# yes; denorm bias = SGL_BIAS - 1
   20949 dst_get_supper:
   20950 	swap		%d0			# put exp in upper word of d0
   20951 	lsl.l		&0x7,%d0		# shift it into single exp bits
   20952 	tst.b		FTEMP_EX(%a0)		# test sign
   20953 	bpl.b		dst_get_sman		# if positive, continue
   20954 	bset		&0x1f,%d0		# if negative, put in sign first
   20955 dst_get_sman:
   20956 	mov.l		FTEMP_HI(%a0),%d1	# get ms mantissa
   20957 	andi.l		&0x7fffff00,%d1		# get upper 23 bits of ms
   20958 	lsr.l		&0x8,%d1		# and put them flush right
   20959 	or.l		%d1,%d0			# put these bits in ms word of single
   20960 	rts
   20961 
   20962 ##############################################################################
   20963 fout_pack:
   20964 	bsr.l		_calc_ea_fout		# fetch the <ea>
   20965 	mov.l		%a0,-(%sp)
   20966 
   20967 	mov.b		STAG(%a6),%d0		# fetch input type
   20968 	bne.w		fout_pack_not_norm	# input is not NORM
   20969 
   20970 fout_pack_norm:
   20971 	btst		&0x4,EXC_CMDREG(%a6)	# static or dynamic?
   20972 	beq.b		fout_pack_s		# static
   20973 
   20974 fout_pack_d:
   20975 	mov.b		1+EXC_CMDREG(%a6),%d1	# fetch dynamic reg
   20976 	lsr.b		&0x4,%d1
   20977 	andi.w		&0x7,%d1
   20978 
   20979 	bsr.l		fetch_dreg		# fetch Dn w/ k-factor
   20980 
   20981 	bra.b		fout_pack_type
   20982 fout_pack_s:
   20983 	mov.b		1+EXC_CMDREG(%a6),%d0	# fetch static field
   20984 
   20985 fout_pack_type:
   20986 	bfexts		%d0{&25:&7},%d0		# extract k-factor
   20987 	mov.l	%d0,-(%sp)
   20988 
   20989 	lea		FP_SRC(%a6),%a0		# pass: ptr to input
   20990 
   20991 # bindec is currently scrambling FP_SRC for denorm inputs.
   20992 # we'll have to change this, but for now, tough luck!!!
   20993 	bsr.l		bindec			# convert xprec to packed
   20994 
   20995 #	andi.l		&0xcfff000f,FP_SCR0(%a6) # clear unused fields
   20996 	andi.l		&0xcffff00f,FP_SCR0(%a6) # clear unused fields
   20997 
   20998 	mov.l	(%sp)+,%d0
   20999 
   21000 	tst.b		3+FP_SCR0_EX(%a6)
   21001 	bne.b		fout_pack_set
   21002 	tst.l		FP_SCR0_HI(%a6)
   21003 	bne.b		fout_pack_set
   21004 	tst.l		FP_SCR0_LO(%a6)
   21005 	bne.b		fout_pack_set
   21006 
   21007 # add the extra condition that only if the k-factor was zero, too, should
   21008 # we zero the exponent
   21009 	tst.l		%d0
   21010 	bne.b		fout_pack_set
   21011 # "mantissa" is all zero which means that the answer is zero. but, the '040
   21012 # algorithm allows the exponent to be non-zero. the 881/2 do not. therefore,
   21013 # if the mantissa is zero, I will zero the exponent, too.
   21014 # the question now is whether the exponents sign bit is allowed to be non-zero
   21015 # for a zero, also...
   21016 	andi.w		&0xf000,FP_SCR0(%a6)
   21017 
   21018 fout_pack_set:
   21019 
   21020 	lea		FP_SCR0(%a6),%a0	# pass: src addr
   21021 
   21022 fout_pack_write:
   21023 	mov.l		(%sp)+,%a1		# pass: dst addr
   21024 	mov.l		&0xc,%d0		# pass: opsize is 12 bytes
   21025 
   21026 	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
   21027 	beq.b		fout_pack_a7
   21028 
   21029 	bsr.l		_dmem_write		# write ext prec number to memory
   21030 
   21031 	tst.l		%d1			# did dstore fail?
   21032 	bne.w		fout_ext_err		# yes
   21033 
   21034 	rts
   21035 
   21036 # we don't want to do the write if the exception occurred in supervisor mode
   21037 # so _mem_write2() handles this for us.
   21038 fout_pack_a7:
   21039 	bsr.l		_mem_write2		# write ext prec number to memory
   21040 
   21041 	tst.l		%d1			# did dstore fail?
   21042 	bne.w		fout_ext_err		# yes
   21043 
   21044 	rts
   21045 
   21046 fout_pack_not_norm:
   21047 	cmpi.b		%d0,&DENORM		# is it a DENORM?
   21048 	beq.w		fout_pack_norm		# yes
   21049 	lea		FP_SRC(%a6),%a0
   21050 	clr.w		2+FP_SRC_EX(%a6)
   21051 	cmpi.b		%d0,&SNAN		# is it an SNAN?
   21052 	beq.b		fout_pack_snan		# yes
   21053 	bra.b		fout_pack_write		# no
   21054 
   21055 fout_pack_snan:
   21056 	ori.w		&snaniop2_mask,FPSR_EXCEPT(%a6) # set SNAN/AIOP
   21057 	bset		&0x6,FP_SRC_HI(%a6)	# set snan bit
   21058 	bra.b		fout_pack_write
   21059 
   21060 #########################################################################
   21061 # XDEF ****************************************************************	#
   21062 #	fetch_dreg(): fetch register according to index in d1		#
   21063 #									#
   21064 # XREF ****************************************************************	#
   21065 #	None								#
   21066 #									#
   21067 # INPUT ***************************************************************	#
   21068 #	d1 = index of register to fetch from				#
   21069 # 									#
   21070 # OUTPUT **************************************************************	#
   21071 #	d0 = value of register fetched					#
   21072 #									#
   21073 # ALGORITHM ***********************************************************	#
   21074 #	According to the index value in d1 which can range from zero 	#
   21075 # to fifteen, load the corresponding register file value (where 	#
   21076 # address register indexes start at 8). D0/D1/A0/A1/A6/A7 are on the	#
   21077 # stack. The rest should still be in their original places.		#
   21078 #									#
   21079 #########################################################################
   21080 
   21081 # this routine leaves d1 intact for subsequent store_dreg calls.
   21082 	global		fetch_dreg
   21083 fetch_dreg:
   21084 	mov.w		(tbl_fdreg.b,%pc,%d1.w*2),%d0
   21085 	jmp		(tbl_fdreg.b,%pc,%d0.w*1)
   21086 
   21087 tbl_fdreg:
   21088 	short		fdreg0 - tbl_fdreg
   21089 	short		fdreg1 - tbl_fdreg
   21090 	short		fdreg2 - tbl_fdreg
   21091 	short		fdreg3 - tbl_fdreg
   21092 	short		fdreg4 - tbl_fdreg
   21093 	short		fdreg5 - tbl_fdreg
   21094 	short		fdreg6 - tbl_fdreg
   21095 	short		fdreg7 - tbl_fdreg
   21096 	short		fdreg8 - tbl_fdreg
   21097 	short		fdreg9 - tbl_fdreg
   21098 	short		fdrega - tbl_fdreg
   21099 	short		fdregb - tbl_fdreg
   21100 	short		fdregc - tbl_fdreg
   21101 	short		fdregd - tbl_fdreg
   21102 	short		fdrege - tbl_fdreg
   21103 	short		fdregf - tbl_fdreg
   21104 
   21105 fdreg0:
   21106 	mov.l		EXC_DREGS+0x0(%a6),%d0
   21107 	rts
   21108 fdreg1:
   21109 	mov.l		EXC_DREGS+0x4(%a6),%d0
   21110 	rts
   21111 fdreg2:
   21112 	mov.l		%d2,%d0
   21113 	rts
   21114 fdreg3:
   21115 	mov.l		%d3,%d0
   21116 	rts
   21117 fdreg4:
   21118 	mov.l		%d4,%d0
   21119 	rts
   21120 fdreg5:
   21121 	mov.l		%d5,%d0
   21122 	rts
   21123 fdreg6:
   21124 	mov.l		%d6,%d0
   21125 	rts
   21126 fdreg7:
   21127 	mov.l		%d7,%d0
   21128 	rts
   21129 fdreg8:
   21130 	mov.l		EXC_DREGS+0x8(%a6),%d0
   21131 	rts
   21132 fdreg9:
   21133 	mov.l		EXC_DREGS+0xc(%a6),%d0
   21134 	rts
   21135 fdrega:
   21136 	mov.l		%a2,%d0
   21137 	rts
   21138 fdregb:
   21139 	mov.l		%a3,%d0
   21140 	rts
   21141 fdregc:
   21142 	mov.l		%a4,%d0
   21143 	rts
   21144 fdregd:
   21145 	mov.l		%a5,%d0
   21146 	rts
   21147 fdrege:
   21148 	mov.l		(%a6),%d0
   21149 	rts
   21150 fdregf:
   21151 	mov.l		EXC_A7(%a6),%d0
   21152 	rts
   21153 
   21154 #########################################################################
   21155 # XDEF ****************************************************************	#
   21156 #	store_dreg_l(): store longword to data register specified by d1	#
   21157 #									#
   21158 # XREF ****************************************************************	#
   21159 #	None								#
   21160 #									#
   21161 # INPUT ***************************************************************	#
   21162 #	d0 = longowrd value to store					#
   21163 #	d1 = index of register to fetch from				#
   21164 # 									#
   21165 # OUTPUT **************************************************************	#
   21166 #	(data register is updated)					#
   21167 #									#
   21168 # ALGORITHM ***********************************************************	#
   21169 #	According to the index value in d1, store the longword value	#
   21170 # in d0 to the corresponding data register. D0/D1 are on the stack	#
   21171 # while the rest are in their initial places.				#
   21172 #									#
   21173 #########################################################################
   21174 
   21175 	global		store_dreg_l
   21176 store_dreg_l:
   21177 	mov.w		(tbl_sdregl.b,%pc,%d1.w*2),%d1
   21178 	jmp		(tbl_sdregl.b,%pc,%d1.w*1)
   21179 
   21180 tbl_sdregl:
   21181 	short		sdregl0 - tbl_sdregl
   21182 	short		sdregl1 - tbl_sdregl
   21183 	short		sdregl2 - tbl_sdregl
   21184 	short		sdregl3 - tbl_sdregl
   21185 	short		sdregl4 - tbl_sdregl
   21186 	short		sdregl5 - tbl_sdregl
   21187 	short		sdregl6 - tbl_sdregl
   21188 	short		sdregl7 - tbl_sdregl
   21189 
   21190 sdregl0:
   21191 	mov.l		%d0,EXC_DREGS+0x0(%a6)
   21192 	rts
   21193 sdregl1:
   21194 	mov.l		%d0,EXC_DREGS+0x4(%a6)
   21195 	rts
   21196 sdregl2:
   21197 	mov.l		%d0,%d2
   21198 	rts
   21199 sdregl3:
   21200 	mov.l		%d0,%d3
   21201 	rts
   21202 sdregl4:
   21203 	mov.l		%d0,%d4
   21204 	rts
   21205 sdregl5:
   21206 	mov.l		%d0,%d5
   21207 	rts
   21208 sdregl6:
   21209 	mov.l		%d0,%d6
   21210 	rts
   21211 sdregl7:
   21212 	mov.l		%d0,%d7
   21213 	rts
   21214 
   21215 #########################################################################
   21216 # XDEF ****************************************************************	#
   21217 #	store_dreg_w(): store word to data register specified by d1	#
   21218 #									#
   21219 # XREF ****************************************************************	#
   21220 #	None								#
   21221 #									#
   21222 # INPUT ***************************************************************	#
   21223 #	d0 = word value to store					#
   21224 #	d1 = index of register to fetch from				#
   21225 # 									#
   21226 # OUTPUT **************************************************************	#
   21227 #	(data register is updated)					#
   21228 #									#
   21229 # ALGORITHM ***********************************************************	#
   21230 #	According to the index value in d1, store the word value	#
   21231 # in d0 to the corresponding data register. D0/D1 are on the stack	#
   21232 # while the rest are in their initial places.				#
   21233 #									#
   21234 #########################################################################
   21235 
   21236 	global		store_dreg_w
   21237 store_dreg_w:
   21238 	mov.w		(tbl_sdregw.b,%pc,%d1.w*2),%d1
   21239 	jmp		(tbl_sdregw.b,%pc,%d1.w*1)
   21240 
   21241 tbl_sdregw:
   21242 	short		sdregw0 - tbl_sdregw
   21243 	short		sdregw1 - tbl_sdregw
   21244 	short		sdregw2 - tbl_sdregw
   21245 	short		sdregw3 - tbl_sdregw
   21246 	short		sdregw4 - tbl_sdregw
   21247 	short		sdregw5 - tbl_sdregw
   21248 	short		sdregw6 - tbl_sdregw
   21249 	short		sdregw7 - tbl_sdregw
   21250 
   21251 sdregw0:
   21252 	mov.w		%d0,2+EXC_DREGS+0x0(%a6)
   21253 	rts
   21254 sdregw1:
   21255 	mov.w		%d0,2+EXC_DREGS+0x4(%a6)
   21256 	rts
   21257 sdregw2:
   21258 	mov.w		%d0,%d2
   21259 	rts
   21260 sdregw3:
   21261 	mov.w		%d0,%d3
   21262 	rts
   21263 sdregw4:
   21264 	mov.w		%d0,%d4
   21265 	rts
   21266 sdregw5:
   21267 	mov.w		%d0,%d5
   21268 	rts
   21269 sdregw6:
   21270 	mov.w		%d0,%d6
   21271 	rts
   21272 sdregw7:
   21273 	mov.w		%d0,%d7
   21274 	rts
   21275 
   21276 #########################################################################
   21277 # XDEF ****************************************************************	#
   21278 #	store_dreg_b(): store byte to data register specified by d1	#
   21279 #									#
   21280 # XREF ****************************************************************	#
   21281 #	None								#
   21282 #									#
   21283 # INPUT ***************************************************************	#
   21284 #	d0 = byte value to store					#
   21285 #	d1 = index of register to fetch from				#
   21286 # 									#
   21287 # OUTPUT **************************************************************	#
   21288 #	(data register is updated)					#
   21289 #									#
   21290 # ALGORITHM ***********************************************************	#
   21291 #	According to the index value in d1, store the byte value	#
   21292 # in d0 to the corresponding data register. D0/D1 are on the stack	#
   21293 # while the rest are in their initial places.				#
   21294 #									#
   21295 #########################################################################
   21296 
   21297 	global		store_dreg_b
   21298 store_dreg_b:
   21299 	mov.w		(tbl_sdregb.b,%pc,%d1.w*2),%d1
   21300 	jmp		(tbl_sdregb.b,%pc,%d1.w*1)
   21301 
   21302 tbl_sdregb:
   21303 	short		sdregb0 - tbl_sdregb
   21304 	short		sdregb1 - tbl_sdregb
   21305 	short		sdregb2 - tbl_sdregb
   21306 	short		sdregb3 - tbl_sdregb
   21307 	short		sdregb4 - tbl_sdregb
   21308 	short		sdregb5 - tbl_sdregb
   21309 	short		sdregb6 - tbl_sdregb
   21310 	short		sdregb7 - tbl_sdregb
   21311 
   21312 sdregb0:
   21313 	mov.b		%d0,3+EXC_DREGS+0x0(%a6)
   21314 	rts
   21315 sdregb1:
   21316 	mov.b		%d0,3+EXC_DREGS+0x4(%a6)
   21317 	rts
   21318 sdregb2:
   21319 	mov.b		%d0,%d2
   21320 	rts
   21321 sdregb3:
   21322 	mov.b		%d0,%d3
   21323 	rts
   21324 sdregb4:
   21325 	mov.b		%d0,%d4
   21326 	rts
   21327 sdregb5:
   21328 	mov.b		%d0,%d5
   21329 	rts
   21330 sdregb6:
   21331 	mov.b		%d0,%d6
   21332 	rts
   21333 sdregb7:
   21334 	mov.b		%d0,%d7
   21335 	rts
   21336 
   21337 #########################################################################
   21338 # XDEF ****************************************************************	#
   21339 #	inc_areg(): increment an address register by the value in d0	#
   21340 #									#
   21341 # XREF ****************************************************************	#
   21342 #	None								#
   21343 #									#
   21344 # INPUT ***************************************************************	#
   21345 #	d0 = amount to increment by					#
   21346 #	d1 = index of address register to increment			#
   21347 # 									#
   21348 # OUTPUT **************************************************************	#
   21349 #	(address register is updated)					#
   21350 #									#
   21351 # ALGORITHM ***********************************************************	#
   21352 # 	Typically used for an instruction w/ a post-increment <ea>, 	#
   21353 # this routine adds the increment value in d0 to the address register	#
   21354 # specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside	#
   21355 # in their original places.						#
   21356 # 	For a7, if the increment amount is one, then we have to 	#
   21357 # increment by two. For any a7 update, set the mia7_flag so that if	#
   21358 # an access error exception occurs later in emulation, this address	#
   21359 # register update can be undone.					#
   21360 #									#
   21361 #########################################################################
   21362 
   21363 	global		inc_areg
   21364 inc_areg:
   21365 	mov.w		(tbl_iareg.b,%pc,%d1.w*2),%d1
   21366 	jmp		(tbl_iareg.b,%pc,%d1.w*1)
   21367 
   21368 tbl_iareg:
   21369 	short		iareg0 - tbl_iareg
   21370 	short		iareg1 - tbl_iareg
   21371 	short		iareg2 - tbl_iareg
   21372 	short		iareg3 - tbl_iareg
   21373 	short		iareg4 - tbl_iareg
   21374 	short		iareg5 - tbl_iareg
   21375 	short		iareg6 - tbl_iareg
   21376 	short		iareg7 - tbl_iareg
   21377 
   21378 iareg0:	add.l		%d0,EXC_DREGS+0x8(%a6)
   21379 	rts
   21380 iareg1:	add.l		%d0,EXC_DREGS+0xc(%a6)
   21381 	rts
   21382 iareg2:	add.l		%d0,%a2
   21383 	rts
   21384 iareg3:	add.l		%d0,%a3
   21385 	rts
   21386 iareg4:	add.l		%d0,%a4
   21387 	rts
   21388 iareg5:	add.l		%d0,%a5
   21389 	rts
   21390 iareg6:	add.l		%d0,(%a6)
   21391 	rts
   21392 iareg7:	mov.b		&mia7_flg,SPCOND_FLG(%a6)
   21393 	cmpi.b		%d0,&0x1
   21394 	beq.b		iareg7b
   21395 	add.l		%d0,EXC_A7(%a6)
   21396 	rts
   21397 iareg7b:
   21398 	addq.l		&0x2,EXC_A7(%a6)
   21399 	rts
   21400 
   21401 #########################################################################
   21402 # XDEF ****************************************************************	#
   21403 #	dec_areg(): decrement an address register by the value in d0	#
   21404 #									#
   21405 # XREF ****************************************************************	#
   21406 #	None								#
   21407 #									#
   21408 # INPUT ***************************************************************	#
   21409 #	d0 = amount to decrement by					#
   21410 #	d1 = index of address register to decrement			#
   21411 # 									#
   21412 # OUTPUT **************************************************************	#
   21413 #	(address register is updated)					#
   21414 #									#
   21415 # ALGORITHM ***********************************************************	#
   21416 # 	Typically used for an instruction w/ a pre-decrement <ea>, 	#
   21417 # this routine adds the decrement value in d0 to the address register	#
   21418 # specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside	#
   21419 # in their original places.						#
   21420 # 	For a7, if the decrement amount is one, then we have to 	#
   21421 # decrement by two. For any a7 update, set the mda7_flag so that if	#
   21422 # an access error exception occurs later in emulation, this address	#
   21423 # register update can be undone.					#
   21424 #									#
   21425 #########################################################################
   21426 
   21427 	global		dec_areg
   21428 dec_areg:
   21429 	mov.w		(tbl_dareg.b,%pc,%d1.w*2),%d1
   21430 	jmp		(tbl_dareg.b,%pc,%d1.w*1)
   21431 
   21432 tbl_dareg:
   21433 	short		dareg0 - tbl_dareg
   21434 	short		dareg1 - tbl_dareg
   21435 	short		dareg2 - tbl_dareg
   21436 	short		dareg3 - tbl_dareg
   21437 	short		dareg4 - tbl_dareg
   21438 	short		dareg5 - tbl_dareg
   21439 	short		dareg6 - tbl_dareg
   21440 	short		dareg7 - tbl_dareg
   21441 
   21442 dareg0:	sub.l		%d0,EXC_DREGS+0x8(%a6)
   21443 	rts
   21444 dareg1:	sub.l		%d0,EXC_DREGS+0xc(%a6)
   21445 	rts
   21446 dareg2:	sub.l		%d0,%a2
   21447 	rts
   21448 dareg3:	sub.l		%d0,%a3
   21449 	rts
   21450 dareg4:	sub.l		%d0,%a4
   21451 	rts
   21452 dareg5:	sub.l		%d0,%a5
   21453 	rts
   21454 dareg6:	sub.l		%d0,(%a6)
   21455 	rts
   21456 dareg7:	mov.b		&mda7_flg,SPCOND_FLG(%a6)
   21457 	cmpi.b		%d0,&0x1
   21458 	beq.b		dareg7b
   21459 	sub.l		%d0,EXC_A7(%a6)
   21460 	rts
   21461 dareg7b:
   21462 	subq.l		&0x2,EXC_A7(%a6)
   21463 	rts
   21464 
   21465 ##############################################################################
   21466 
   21467 #########################################################################
   21468 # XDEF ****************************************************************	#
   21469 #	load_fpn1(): load FP register value into FP_SRC(a6).		#
   21470 #									#
   21471 # XREF ****************************************************************	#
   21472 #	None								#
   21473 #									#
   21474 # INPUT ***************************************************************	#
   21475 #	d0 = index of FP register to load				#
   21476 # 									#
   21477 # OUTPUT **************************************************************	#
   21478 #	FP_SRC(a6) = value loaded from FP register file			#
   21479 #									#
   21480 # ALGORITHM ***********************************************************	#
   21481 #	Using the index in d0, load FP_SRC(a6) with a number from the 	#
   21482 # FP register file.							#
   21483 #									#
   21484 #########################################################################
   21485 
   21486 	global 		load_fpn1
   21487 load_fpn1:
   21488 	mov.w		(tbl_load_fpn1.b,%pc,%d0.w*2), %d0
   21489 	jmp		(tbl_load_fpn1.b,%pc,%d0.w*1)
   21490 
   21491 tbl_load_fpn1:
   21492 	short		load_fpn1_0 - tbl_load_fpn1
   21493 	short		load_fpn1_1 - tbl_load_fpn1
   21494 	short		load_fpn1_2 - tbl_load_fpn1
   21495 	short		load_fpn1_3 - tbl_load_fpn1
   21496 	short		load_fpn1_4 - tbl_load_fpn1
   21497 	short		load_fpn1_5 - tbl_load_fpn1
   21498 	short		load_fpn1_6 - tbl_load_fpn1
   21499 	short		load_fpn1_7 - tbl_load_fpn1
   21500 
   21501 load_fpn1_0:
   21502 	mov.l		0+EXC_FP0(%a6), 0+FP_SRC(%a6)
   21503 	mov.l		4+EXC_FP0(%a6), 4+FP_SRC(%a6)
   21504 	mov.l		8+EXC_FP0(%a6), 8+FP_SRC(%a6)
   21505 	lea		FP_SRC(%a6), %a0
   21506 	rts
   21507 load_fpn1_1:
   21508 	mov.l		0+EXC_FP1(%a6), 0+FP_SRC(%a6)
   21509 	mov.l		4+EXC_FP1(%a6), 4+FP_SRC(%a6)
   21510 	mov.l		8+EXC_FP1(%a6), 8+FP_SRC(%a6)
   21511 	lea		FP_SRC(%a6), %a0
   21512 	rts
   21513 load_fpn1_2:
   21514 	fmovm.x		&0x20, FP_SRC(%a6)
   21515 	lea		FP_SRC(%a6), %a0
   21516 	rts
   21517 load_fpn1_3:
   21518 	fmovm.x		&0x10, FP_SRC(%a6)
   21519 	lea		FP_SRC(%a6), %a0
   21520 	rts
   21521 load_fpn1_4:
   21522 	fmovm.x		&0x08, FP_SRC(%a6)
   21523 	lea		FP_SRC(%a6), %a0
   21524 	rts
   21525 load_fpn1_5:
   21526 	fmovm.x		&0x04, FP_SRC(%a6)
   21527 	lea		FP_SRC(%a6), %a0
   21528 	rts
   21529 load_fpn1_6:
   21530 	fmovm.x		&0x02, FP_SRC(%a6)
   21531 	lea		FP_SRC(%a6), %a0
   21532 	rts
   21533 load_fpn1_7:
   21534 	fmovm.x		&0x01, FP_SRC(%a6)
   21535 	lea		FP_SRC(%a6), %a0
   21536 	rts
   21537 
   21538 #############################################################################
   21539 
   21540 #########################################################################
   21541 # XDEF ****************************************************************	#
   21542 #	load_fpn2(): load FP register value into FP_DST(a6).		#
   21543 #									#
   21544 # XREF ****************************************************************	#
   21545 #	None								#
   21546 #									#
   21547 # INPUT ***************************************************************	#
   21548 #	d0 = index of FP register to load				#
   21549 # 									#
   21550 # OUTPUT **************************************************************	#
   21551 #	FP_DST(a6) = value loaded from FP register file			#
   21552 #									#
   21553 # ALGORITHM ***********************************************************	#
   21554 #	Using the index in d0, load FP_DST(a6) with a number from the 	#
   21555 # FP register file.							#
   21556 #									#
   21557 #########################################################################
   21558 
   21559 	global		load_fpn2
   21560 load_fpn2:
   21561 	mov.w		(tbl_load_fpn2.b,%pc,%d0.w*2), %d0
   21562 	jmp		(tbl_load_fpn2.b,%pc,%d0.w*1)
   21563 
   21564 tbl_load_fpn2:
   21565 	short		load_fpn2_0 - tbl_load_fpn2
   21566 	short		load_fpn2_1 - tbl_load_fpn2
   21567 	short		load_fpn2_2 - tbl_load_fpn2
   21568 	short		load_fpn2_3 - tbl_load_fpn2
   21569 	short		load_fpn2_4 - tbl_load_fpn2
   21570 	short		load_fpn2_5 - tbl_load_fpn2
   21571 	short		load_fpn2_6 - tbl_load_fpn2
   21572 	short		load_fpn2_7 - tbl_load_fpn2
   21573 
   21574 load_fpn2_0:
   21575 	mov.l		0+EXC_FP0(%a6), 0+FP_DST(%a6)
   21576 	mov.l		4+EXC_FP0(%a6), 4+FP_DST(%a6)
   21577 	mov.l		8+EXC_FP0(%a6), 8+FP_DST(%a6)
   21578 	lea		FP_DST(%a6), %a0
   21579 	rts
   21580 load_fpn2_1:
   21581 	mov.l		0+EXC_FP1(%a6), 0+FP_DST(%a6)
   21582 	mov.l		4+EXC_FP1(%a6), 4+FP_DST(%a6)
   21583 	mov.l		8+EXC_FP1(%a6), 8+FP_DST(%a6)
   21584 	lea		FP_DST(%a6), %a0
   21585 	rts
   21586 load_fpn2_2:
   21587 	fmovm.x		&0x20, FP_DST(%a6)
   21588 	lea		FP_DST(%a6), %a0
   21589 	rts
   21590 load_fpn2_3:
   21591 	fmovm.x		&0x10, FP_DST(%a6)
   21592 	lea		FP_DST(%a6), %a0
   21593 	rts
   21594 load_fpn2_4:
   21595 	fmovm.x		&0x08, FP_DST(%a6)
   21596 	lea		FP_DST(%a6), %a0
   21597 	rts
   21598 load_fpn2_5:
   21599 	fmovm.x		&0x04, FP_DST(%a6)
   21600 	lea		FP_DST(%a6), %a0
   21601 	rts
   21602 load_fpn2_6:
   21603 	fmovm.x		&0x02, FP_DST(%a6)
   21604 	lea		FP_DST(%a6), %a0
   21605 	rts
   21606 load_fpn2_7:
   21607 	fmovm.x		&0x01, FP_DST(%a6)
   21608 	lea		FP_DST(%a6), %a0
   21609 	rts
   21610 
   21611 #############################################################################
   21612 
   21613 #########################################################################
   21614 # XDEF ****************************************************************	#
   21615 # 	store_fpreg(): store an fp value to the fpreg designated d0.	#
   21616 #									#
   21617 # XREF ****************************************************************	#
   21618 #	None								#
   21619 #									#
   21620 # INPUT ***************************************************************	#
   21621 #	fp0 = extended precision value to store				#
   21622 #	d0  = index of floating-point register				#
   21623 # 									#
   21624 # OUTPUT **************************************************************	#
   21625 #	None								#
   21626 #									#
   21627 # ALGORITHM ***********************************************************	#
   21628 #	Store the value in fp0 to the FP register designated by the	#
   21629 # value in d0. The FP number can be DENORM or SNAN so we have to be	#
   21630 # careful that we don't take an exception here.				#
   21631 #									#
   21632 #########################################################################
   21633 
   21634 	global		store_fpreg
   21635 store_fpreg:
   21636 	mov.w		(tbl_store_fpreg.b,%pc,%d0.w*2), %d0
   21637 	jmp		(tbl_store_fpreg.b,%pc,%d0.w*1)
   21638 
   21639 tbl_store_fpreg:
   21640 	short		store_fpreg_0 - tbl_store_fpreg
   21641 	short		store_fpreg_1 - tbl_store_fpreg
   21642 	short		store_fpreg_2 - tbl_store_fpreg
   21643 	short		store_fpreg_3 - tbl_store_fpreg
   21644 	short		store_fpreg_4 - tbl_store_fpreg
   21645 	short		store_fpreg_5 - tbl_store_fpreg
   21646 	short		store_fpreg_6 - tbl_store_fpreg
   21647 	short		store_fpreg_7 - tbl_store_fpreg
   21648 
   21649 store_fpreg_0:
   21650 	fmovm.x		&0x80, EXC_FP0(%a6)
   21651 	rts
   21652 store_fpreg_1:
   21653 	fmovm.x		&0x80, EXC_FP1(%a6)
   21654 	rts
   21655 store_fpreg_2:
   21656 	fmovm.x 	&0x01, -(%sp)
   21657 	fmovm.x		(%sp)+, &0x20
   21658 	rts
   21659 store_fpreg_3:
   21660 	fmovm.x 	&0x01, -(%sp)
   21661 	fmovm.x		(%sp)+, &0x10
   21662 	rts
   21663 store_fpreg_4:
   21664 	fmovm.x 	&0x01, -(%sp)
   21665 	fmovm.x		(%sp)+, &0x08
   21666 	rts
   21667 store_fpreg_5:
   21668 	fmovm.x 	&0x01, -(%sp)
   21669 	fmovm.x		(%sp)+, &0x04
   21670 	rts
   21671 store_fpreg_6:
   21672 	fmovm.x 	&0x01, -(%sp)
   21673 	fmovm.x		(%sp)+, &0x02
   21674 	rts
   21675 store_fpreg_7:
   21676 	fmovm.x 	&0x01, -(%sp)
   21677 	fmovm.x		(%sp)+, &0x01
   21678 	rts
   21679 
   21680 #########################################################################
   21681 # XDEF ****************************************************************	#
   21682 # 	_denorm(): denormalize an intermediate result			#
   21683 #									#
   21684 # XREF ****************************************************************	#
   21685 #	None								#
   21686 #									#
   21687 # INPUT *************************************************************** #
   21688 #	a0 = points to the operand to be denormalized			#
   21689 #		(in the internal extended format)			#
   21690 #		 							#
   21691 #	d0 = rounding precision						#
   21692 #									#
   21693 # OUTPUT **************************************************************	#
   21694 #	a0 = pointer to the denormalized result				#
   21695 #		(in the internal extended format)			#
   21696 #									#
   21697 #	d0 = guard,round,sticky						#
   21698 #									#
   21699 # ALGORITHM ***********************************************************	#
   21700 # 	According to the exponent underflow threshold for the given	#
   21701 # precision, shift the mantissa bits to the right in order raise the	#
   21702 # exponent of the operand to the threshold value. While shifting the 	#
   21703 # mantissa bits right, maintain the value of the guard, round, and 	#
   21704 # sticky bits.								#
   21705 # other notes:								#
   21706 #	(1) _denorm() is called by the underflow routines		#
   21707 #	(2) _denorm() does NOT affect the status register		#
   21708 #									#
   21709 #########################################################################
   21710 
   21711 #
   21712 # table of exponent threshold values for each precision
   21713 #
   21714 tbl_thresh:
   21715 	short		0x0
   21716 	short		sgl_thresh
   21717 	short		dbl_thresh
   21718 
   21719 	global		_denorm
   21720 _denorm:
   21721 #
   21722 # Load the exponent threshold for the precision selected and check
   21723 # to see if (threshold - exponent) is > 65 in which case we can
   21724 # simply calculate the sticky bit and zero the mantissa. otherwise
   21725 # we have to call the denormalization routine.
   21726 #
   21727 	lsr.b		&0x2, %d0		# shift prec to lo bits
   21728 	mov.w		(tbl_thresh.b,%pc,%d0.w*2), %d1 # load prec threshold
   21729 	mov.w		%d1, %d0		# copy d1 into d0
   21730 	sub.w		FTEMP_EX(%a0), %d0	# diff = threshold - exp
   21731 	cmpi.w		%d0, &66		# is diff > 65? (mant + g,r bits)
   21732 	bpl.b		denorm_set_stky		# yes; just calc sticky
   21733 
   21734 	clr.l		%d0			# clear g,r,s
   21735 	btst		&inex2_bit, FPSR_EXCEPT(%a6) # yes; was INEX2 set?
   21736 	beq.b		denorm_call		# no; don't change anything
   21737 	bset		&29, %d0		# yes; set sticky bit
   21738 
   21739 denorm_call:
   21740 	bsr.l		dnrm_lp			# denormalize the number
   21741 	rts
   21742 
   21743 #
   21744 # all bit would have been shifted off during the denorm so simply
   21745 # calculate if the sticky should be set and clear the entire mantissa.
   21746 #
   21747 denorm_set_stky:
   21748 	mov.l		&0x20000000, %d0	# set sticky bit in return value
   21749 	mov.w		%d1, FTEMP_EX(%a0)	# load exp with threshold
   21750 	clr.l		FTEMP_HI(%a0)		# set d1 = 0 (ms mantissa)
   21751 	clr.l		FTEMP_LO(%a0)		# set d2 = 0 (ms mantissa)
   21752 	rts
   21753 
   21754 #									#
   21755 # dnrm_lp(): normalize exponent/mantissa to specified threshhold	#
   21756 #									#
   21757 # INPUT:								#
   21758 #	%a0	   : points to the operand to be denormalized		#
   21759 #	%d0{31:29} : initial guard,round,sticky				#
   21760 #	%d1{15:0}  : denormalization threshold				#
   21761 # OUTPUT:								#
   21762 #	%a0	   : points to the denormalized operand		 	#
   21763 #	%d0{31:29} : final guard,round,sticky				#
   21764 #									#
   21765 
   21766 # *** Local Equates *** #
   21767 set	GRS,		L_SCR2			# g,r,s temp storage
   21768 set	FTEMP_LO2,	L_SCR1			# FTEMP_LO copy
   21769 
   21770 	global		dnrm_lp
   21771 dnrm_lp:
   21772 
   21773 #
   21774 # make a copy of FTEMP_LO and place the g,r,s bits directly after it
   21775 # in memory so as to make the bitfield extraction for denormalization easier.
   21776 #
   21777 	mov.l		FTEMP_LO(%a0), FTEMP_LO2(%a6) # make FTEMP_LO copy
   21778 	mov.l		%d0, GRS(%a6)		# place g,r,s after it
   21779 
   21780 #
   21781 # check to see how much less than the underflow threshold the operand
   21782 # exponent is.
   21783 #
   21784 	mov.l		%d1, %d0		# copy the denorm threshold
   21785 	sub.w		FTEMP_EX(%a0), %d1	# d1 = threshold - uns exponent
   21786 	ble.b		dnrm_no_lp		# d1 <= 0
   21787 	cmpi.w		%d1, &0x20		# is ( 0 <= d1 < 32) ?
   21788 	blt.b		case_1			# yes
   21789 	cmpi.w		%d1, &0x40		# is (32 <= d1 < 64) ?
   21790 	blt.b		case_2			# yes
   21791 	bra.w		case_3			# (d1 >= 64)
   21792 
   21793 #
   21794 # No normalization necessary
   21795 #
   21796 dnrm_no_lp:
   21797 	mov.l		GRS(%a6), %d0 		# restore original g,r,s
   21798 	rts
   21799 
   21800 #
   21801 # case (0<d1<32)
   21802 #
   21803 # %d0 = denorm threshold
   21804 # %d1 = "n" = amt to shift
   21805 #
   21806 #	---------------------------------------------------------
   21807 #	|     FTEMP_HI	  |    	FTEMP_LO     |grs000.........000|
   21808 #	---------------------------------------------------------
   21809 #	<-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)->
   21810 #	\	   \		      \			 \
   21811 #	 \	    \		       \		  \
   21812 #	  \	     \			\		   \
   21813 #	   \	      \			 \		    \
   21814 #	    \	       \		  \		     \
   21815 #	     \		\		   \		      \
   21816 #	      \		 \		    \		       \
   21817 #	       \	  \		     \			\
   21818 #	<-(n)-><-(32 - n)-><------(32)-------><------(32)------->
   21819 #	---------------------------------------------------------
   21820 #	|0.....0| NEW_HI  |  NEW_FTEMP_LO     |grs		|
   21821 #	---------------------------------------------------------
   21822 #
   21823 case_1:
   21824 	mov.l		%d2, -(%sp)		# create temp storage
   21825 
   21826 	mov.w		%d0, FTEMP_EX(%a0)	# exponent = denorm threshold
   21827 	mov.l		&32, %d0
   21828 	sub.w		%d1, %d0		# %d0 = 32 - %d1
   21829 
   21830 	cmpi.w		%d1, &29		# is shft amt >= 29
   21831 	blt.b		case1_extract		# no; no fix needed
   21832 	mov.b		GRS(%a6), %d2
   21833 	or.b		%d2, 3+FTEMP_LO2(%a6)
   21834 
   21835 case1_extract:
   21836 	bfextu		FTEMP_HI(%a0){&0:%d0}, %d2 # %d2 = new FTEMP_HI
   21837 	bfextu		FTEMP_HI(%a0){%d0:&32}, %d1 # %d1 = new FTEMP_LO
   21838 	bfextu		FTEMP_LO2(%a6){%d0:&32}, %d0 # %d0 = new G,R,S
   21839 
   21840 	mov.l		%d2, FTEMP_HI(%a0)	# store new FTEMP_HI
   21841 	mov.l		%d1, FTEMP_LO(%a0)	# store new FTEMP_LO
   21842 
   21843 	bftst		%d0{&2:&30}		# were bits shifted off?
   21844 	beq.b		case1_sticky_clear	# no; go finish
   21845 	bset		&rnd_stky_bit, %d0	# yes; set sticky bit
   21846 
   21847 case1_sticky_clear:
   21848 	and.l		&0xe0000000, %d0	# clear all but G,R,S
   21849 	mov.l		(%sp)+, %d2		# restore temp register
   21850 	rts
   21851 
   21852 #
   21853 # case (32<=d1<64)
   21854 #
   21855 # %d0 = denorm threshold
   21856 # %d1 = "n" = amt to shift
   21857 #
   21858 #	---------------------------------------------------------
   21859 #	|     FTEMP_HI	  |    	FTEMP_LO     |grs000.........000|
   21860 #	---------------------------------------------------------
   21861 #	<-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)->
   21862 #	\	   \		      \
   21863 #	 \	    \		       \
   21864 #	  \	     \			-------------------
   21865 #	   \	      --------------------		   \
   21866 #	    -------------------	  	  \		    \
   21867 #	     		       \	   \		     \
   21868 #	      		 	\     	    \		      \
   21869 #	       		  	 \	     \		       \
   21870 #	<-------(32)------><-(n)-><-(32 - n)-><------(32)------->
   21871 #	---------------------------------------------------------
   21872 #	|0...............0|0....0| NEW_LO     |grs		|
   21873 #	---------------------------------------------------------
   21874 #
   21875 case_2:
   21876 	mov.l		%d2, -(%sp)		# create temp storage
   21877 
   21878 	mov.w		%d0, FTEMP_EX(%a0)	# exponent = denorm threshold
   21879 	subi.w		&0x20, %d1		# %d1 now between 0 and 32
   21880 	mov.l		&0x20, %d0
   21881 	sub.w		%d1, %d0		# %d0 = 32 - %d1
   21882 
   21883 # subtle step here; or in the g,r,s at the bottom of FTEMP_LO to minimize
   21884 # the number of bits to check for the sticky detect.
   21885 # it only plays a role in shift amounts of 61-63.
   21886 	mov.b		GRS(%a6), %d2
   21887 	or.b		%d2, 3+FTEMP_LO2(%a6)
   21888 
   21889 	bfextu		FTEMP_HI(%a0){&0:%d0}, %d2 # %d2 = new FTEMP_LO
   21890 	bfextu		FTEMP_HI(%a0){%d0:&32}, %d1 # %d1 = new G,R,S
   21891 
   21892 	bftst		%d1{&2:&30}		# were any bits shifted off?
   21893 	bne.b		case2_set_sticky	# yes; set sticky bit
   21894 	bftst		FTEMP_LO2(%a6){%d0:&31}	# were any bits shifted off?
   21895 	bne.b		case2_set_sticky	# yes; set sticky bit
   21896 
   21897 	mov.l		%d1, %d0		# move new G,R,S to %d0
   21898 	bra.b		case2_end
   21899 
   21900 case2_set_sticky:
   21901 	mov.l		%d1, %d0		# move new G,R,S to %d0
   21902 	bset		&rnd_stky_bit, %d0	# set sticky bit
   21903 
   21904 case2_end:
   21905 	clr.l		FTEMP_HI(%a0)		# store FTEMP_HI = 0
   21906 	mov.l		%d2, FTEMP_LO(%a0)	# store FTEMP_LO
   21907 	and.l		&0xe0000000, %d0	# clear all but G,R,S
   21908 
   21909 	mov.l		(%sp)+,%d2		# restore temp register
   21910 	rts
   21911 
   21912 #
   21913 # case (d1>=64)
   21914 #
   21915 # %d0 = denorm threshold
   21916 # %d1 = amt to shift
   21917 #
   21918 case_3:
   21919 	mov.w		%d0, FTEMP_EX(%a0)	# insert denorm threshold
   21920 
   21921 	cmpi.w		%d1, &65		# is shift amt > 65?
   21922 	blt.b		case3_64		# no; it's == 64
   21923 	beq.b		case3_65		# no; it's == 65
   21924 
   21925 #
   21926 # case (d1>65)
   21927 #
   21928 # Shift value is > 65 and out of range. All bits are shifted off.
   21929 # Return a zero mantissa with the sticky bit set
   21930 #
   21931 	clr.l		FTEMP_HI(%a0)		# clear hi(mantissa)
   21932 	clr.l		FTEMP_LO(%a0)		# clear lo(mantissa)
   21933 	mov.l		&0x20000000, %d0	# set sticky bit
   21934 	rts
   21935 
   21936 #
   21937 # case (d1 == 64)
   21938 #
   21939 #	---------------------------------------------------------
   21940 #	|     FTEMP_HI	  |    	FTEMP_LO     |grs000.........000|
   21941 #	---------------------------------------------------------
   21942 #	<-------(32)------>
   21943 #	\	   	   \
   21944 #	 \	    	    \
   21945 #	  \	     	     \
   21946 #	   \	      	      ------------------------------
   21947 #	    -------------------------------		    \
   21948 #	     		       		   \		     \
   21949 #	      		 	     	    \		      \
   21950 #	       		  	 	     \		       \
   21951 #					      <-------(32)------>
   21952 #	---------------------------------------------------------
   21953 #	|0...............0|0................0|grs		|
   21954 #	---------------------------------------------------------
   21955 #
   21956 case3_64:
   21957 	mov.l		FTEMP_HI(%a0), %d0	# fetch hi(mantissa)
   21958 	mov.l		%d0, %d1		# make a copy
   21959 	and.l		&0xc0000000, %d0	# extract G,R
   21960 	and.l		&0x3fffffff, %d1	# extract other bits
   21961 
   21962 	bra.b		case3_complete
   21963 
   21964 #
   21965 # case (d1 == 65)
   21966 #
   21967 #	---------------------------------------------------------
   21968 #	|     FTEMP_HI	  |    	FTEMP_LO     |grs000.........000|
   21969 #	---------------------------------------------------------
   21970 #	<-------(32)------>
   21971 #	\	   	   \
   21972 #	 \	    	    \
   21973 #	  \	     	     \
   21974 #	   \	      	      ------------------------------
   21975 #	    --------------------------------		    \
   21976 #	     		       		    \		     \
   21977 #	      		 	     	     \		      \
   21978 #	       		  	 	      \		       \
   21979 #					       <-------(31)----->
   21980 #	---------------------------------------------------------
   21981 #	|0...............0|0................0|0rs		|
   21982 #	---------------------------------------------------------
   21983 #
   21984 case3_65:
   21985 	mov.l		FTEMP_HI(%a0), %d0	# fetch hi(mantissa)
   21986 	and.l		&0x80000000, %d0	# extract R bit
   21987 	lsr.l		&0x1, %d0		# shift high bit into R bit
   21988 	and.l		&0x7fffffff, %d1	# extract other bits
   21989 
   21990 case3_complete:
   21991 # last operation done was an "and" of the bits shifted off so the condition
   21992 # codes are already set so branch accordingly.
   21993 	bne.b		case3_set_sticky	# yes; go set new sticky
   21994 	tst.l		FTEMP_LO(%a0)		# were any bits shifted off?
   21995 	bne.b		case3_set_sticky	# yes; go set new sticky
   21996 	tst.b		GRS(%a6)		# were any bits shifted off?
   21997 	bne.b		case3_set_sticky	# yes; go set new sticky
   21998 
   21999 #
   22000 # no bits were shifted off so don't set the sticky bit.
   22001 # the guard and
   22002 # the entire mantissa is zero.
   22003 #
   22004 	clr.l		FTEMP_HI(%a0)		# clear hi(mantissa)
   22005 	clr.l		FTEMP_LO(%a0)		# clear lo(mantissa)
   22006 	rts
   22007 
   22008 #
   22009 # some bits were shifted off so set the sticky bit.
   22010 # the entire mantissa is zero.
   22011 #
   22012 case3_set_sticky:
   22013 	bset		&rnd_stky_bit,%d0	# set new sticky bit
   22014 	clr.l		FTEMP_HI(%a0)		# clear hi(mantissa)
   22015 	clr.l		FTEMP_LO(%a0)		# clear lo(mantissa)
   22016 	rts
   22017 
   22018 #########################################################################
   22019 # XDEF ****************************************************************	#
   22020 #	_round(): round result according to precision/mode		#
   22021 #									#
   22022 # XREF ****************************************************************	#
   22023 #	None								#
   22024 #									#
   22025 # INPUT ***************************************************************	#
   22026 #	a0	  = ptr to input operand in internal extended format 	#
   22027 #	d1(hi)    = contains rounding precision:			#
   22028 #			ext = $0000xxxx					#
   22029 #			sgl = $0004xxxx					#
   22030 #			dbl = $0008xxxx					#
   22031 #	d1(lo)	  = contains rounding mode:				#
   22032 #			RN  = $xxxx0000					#
   22033 #			RZ  = $xxxx0001					#
   22034 #			RM  = $xxxx0002					#
   22035 #			RP  = $xxxx0003					#
   22036 #	d0{31:29} = contains the g,r,s bits (extended)			#
   22037 #									#
   22038 # OUTPUT **************************************************************	#
   22039 #	a0 = pointer to rounded result					#
   22040 #									#
   22041 # ALGORITHM ***********************************************************	#
   22042 #	On return the value pointed to by a0 is correctly rounded,	#
   22043 #	a0 is preserved and the g-r-s bits in d0 are cleared.		#
   22044 #	The result is not typed - the tag field is invalid.  The	#
   22045 #	result is still in the internal extended format.		#
   22046 #									#
   22047 #	The INEX bit of USER_FPSR will be set if the rounded result was	#
   22048 #	inexact (i.e. if any of the g-r-s bits were set).		#
   22049 #									#
   22050 #########################################################################
   22051 
   22052 	global		_round
   22053 _round:
   22054 #
   22055 # ext_grs() looks at the rounding precision and sets the appropriate
   22056 # G,R,S bits.
   22057 # If (G,R,S == 0) then result is exact and round is done, else set
   22058 # the inex flag in status reg and continue.
   22059 #
   22060 	bsr.l		ext_grs			# extract G,R,S
   22061 
   22062 	tst.l		%d0			# are G,R,S zero?
   22063 	beq.w		truncate		# yes; round is complete
   22064 
   22065 	or.w		&inx2a_mask, 2+USER_FPSR(%a6) # set inex2/ainex
   22066 
   22067 #
   22068 # Use rounding mode as an index into a jump table for these modes.
   22069 # All of the following assumes grs != 0.
   22070 #
   22071 	mov.w		(tbl_mode.b,%pc,%d1.w*2), %a1 # load jump offset
   22072 	jmp		(tbl_mode.b,%pc,%a1)	# jmp to rnd mode handler
   22073 
   22074 tbl_mode:
   22075 	short		rnd_near - tbl_mode
   22076 	short		truncate - tbl_mode	# RZ always truncates
   22077 	short		rnd_mnus - tbl_mode
   22078 	short		rnd_plus - tbl_mode
   22079 
   22080 #################################################################
   22081 #	ROUND PLUS INFINITY					#
   22082 #								#
   22083 #	If sign of fp number = 0 (positive), then add 1 to l.	#
   22084 #################################################################
   22085 rnd_plus:
   22086 	tst.b		FTEMP_SGN(%a0)		# check for sign
   22087 	bmi.w		truncate		# if positive then truncate
   22088 
   22089 	mov.l		&0xffffffff, %d0	# force g,r,s to be all f's
   22090 	swap		%d1			# set up d1 for round prec.
   22091 
   22092 	cmpi.b		%d1, &s_mode		# is prec = sgl?
   22093 	beq.w		add_sgl			# yes
   22094 	bgt.w		add_dbl			# no; it's dbl
   22095 	bra.w		add_ext			# no; it's ext
   22096 
   22097 #################################################################
   22098 #	ROUND MINUS INFINITY					#
   22099 #								#
   22100 #	If sign of fp number = 1 (negative), then add 1 to l.	#
   22101 #################################################################
   22102 rnd_mnus:
   22103 	tst.b		FTEMP_SGN(%a0)		# check for sign
   22104 	bpl.w		truncate		# if negative then truncate
   22105 
   22106 	mov.l		&0xffffffff, %d0	# force g,r,s to be all f's
   22107 	swap		%d1			# set up d1 for round prec.
   22108 
   22109 	cmpi.b		%d1, &s_mode		# is prec = sgl?
   22110 	beq.w		add_sgl			# yes
   22111 	bgt.w		add_dbl			# no; it's dbl
   22112 	bra.w		add_ext			# no; it's ext
   22113 
   22114 #################################################################
   22115 #	ROUND NEAREST						#
   22116 #								#
   22117 #	If (g=1), then add 1 to l and if (r=s=0), then clear l	#
   22118 #	Note that this will round to even in case of a tie.	#
   22119 #################################################################
   22120 rnd_near:
   22121 	asl.l		&0x1, %d0		# shift g-bit to c-bit
   22122 	bcc.w		truncate		# if (g=1) then
   22123 
   22124 	swap		%d1			# set up d1 for round prec.
   22125 
   22126 	cmpi.b		%d1, &s_mode		# is prec = sgl?
   22127 	beq.w		add_sgl			# yes
   22128 	bgt.w		add_dbl			# no; it's dbl
   22129 	bra.w		add_ext			# no; it's ext
   22130 
   22131 # *** LOCAL EQUATES ***
   22132 set	ad_1_sgl,	0x00000100	# constant to add 1 to l-bit in sgl prec
   22133 set	ad_1_dbl,	0x00000800	# constant to add 1 to l-bit in dbl prec
   22134 
   22135 #########################
   22136 #	ADD SINGLE	#
   22137 #########################
   22138 add_sgl:
   22139 	add.l		&ad_1_sgl, FTEMP_HI(%a0)
   22140 	bcc.b		scc_clr			# no mantissa overflow
   22141 	roxr.w		FTEMP_HI(%a0)		# shift v-bit back in
   22142 	roxr.w		FTEMP_HI+2(%a0)		# shift v-bit back in
   22143 	add.w		&0x1, FTEMP_EX(%a0)	# and incr exponent
   22144 scc_clr:
   22145 	tst.l		%d0			# test for rs = 0
   22146 	bne.b		sgl_done
   22147 	and.w		&0xfe00, FTEMP_HI+2(%a0) # clear the l-bit
   22148 sgl_done:
   22149 	and.l		&0xffffff00, FTEMP_HI(%a0) # truncate bits beyond sgl limit
   22150 	clr.l		FTEMP_LO(%a0)		# clear d2
   22151 	rts
   22152 
   22153 #########################
   22154 #	ADD EXTENDED	#
   22155 #########################
   22156 add_ext:
   22157 	addq.l		&1,FTEMP_LO(%a0)	# add 1 to l-bit
   22158 	bcc.b		xcc_clr			# test for carry out
   22159 	addq.l		&1,FTEMP_HI(%a0)	# propogate carry
   22160 	bcc.b		xcc_clr
   22161 	roxr.w		FTEMP_HI(%a0)		# mant is 0 so restore v-bit
   22162 	roxr.w		FTEMP_HI+2(%a0)		# mant is 0 so restore v-bit
   22163 	roxr.w		FTEMP_LO(%a0)
   22164 	roxr.w		FTEMP_LO+2(%a0)
   22165 	add.w		&0x1,FTEMP_EX(%a0)	# and inc exp
   22166 xcc_clr:
   22167 	tst.l		%d0			# test rs = 0
   22168 	bne.b		add_ext_done
   22169 	and.b		&0xfe,FTEMP_LO+3(%a0)	# clear the l bit
   22170 add_ext_done:
   22171 	rts
   22172 
   22173 #########################
   22174 #	ADD DOUBLE	#
   22175 #########################
   22176 add_dbl:
   22177 	add.l		&ad_1_dbl, FTEMP_LO(%a0) # add 1 to lsb
   22178 	bcc.b		dcc_clr			# no carry
   22179 	addq.l		&0x1, FTEMP_HI(%a0)	# propogate carry
   22180 	bcc.b		dcc_clr			# no carry
   22181 
   22182 	roxr.w		FTEMP_HI(%a0)		# mant is 0 so restore v-bit
   22183 	roxr.w		FTEMP_HI+2(%a0)		# mant is 0 so restore v-bit
   22184 	roxr.w		FTEMP_LO(%a0)
   22185 	roxr.w		FTEMP_LO+2(%a0)
   22186 	addq.w		&0x1, FTEMP_EX(%a0)	# incr exponent
   22187 dcc_clr:
   22188 	tst.l		%d0			# test for rs = 0
   22189 	bne.b		dbl_done
   22190 	and.w		&0xf000, FTEMP_LO+2(%a0) # clear the l-bit
   22191 
   22192 dbl_done:
   22193 	and.l		&0xfffff800,FTEMP_LO(%a0) # truncate bits beyond dbl limit
   22194 	rts
   22195 
   22196 ###########################
   22197 # Truncate all other bits #
   22198 ###########################
   22199 truncate:
   22200 	swap		%d1			# select rnd prec
   22201 
   22202 	cmpi.b		%d1, &s_mode		# is prec sgl?
   22203 	beq.w		sgl_done		# yes
   22204 	bgt.b		dbl_done		# no; it's dbl
   22205 	rts					# no; it's ext
   22206 
   22207 
   22208 #
   22209 # ext_grs(): extract guard, round and sticky bits according to
   22210 #	     rounding precision.
   22211 #
   22212 # INPUT
   22213 #	d0	   = extended precision g,r,s (in d0{31:29})
   22214 #	d1 	   = {PREC,ROUND}
   22215 # OUTPUT
   22216 #	d0{31:29}  = guard, round, sticky
   22217 #
   22218 # The ext_grs extract the guard/round/sticky bits according to the
   22219 # selected rounding precision. It is called by the round subroutine
   22220 # only.  All registers except d0 are kept intact. d0 becomes an
   22221 # updated guard,round,sticky in d0{31:29}
   22222 #
   22223 # Notes: the ext_grs uses the round PREC, and therefore has to swap d1
   22224 #	 prior to usage, and needs to restore d1 to original. this
   22225 #	 routine is tightly tied to the round routine and not meant to
   22226 #	 uphold standard subroutine calling practices.
   22227 #
   22228 
   22229 ext_grs:
   22230 	swap		%d1			# have d1.w point to round precision
   22231 	tst.b		%d1			# is rnd prec = extended?
   22232 	bne.b		ext_grs_not_ext		# no; go handle sgl or dbl
   22233 
   22234 #
   22235 # %d0 actually already hold g,r,s since _round() had it before calling
   22236 # this function. so, as long as we don't disturb it, we are "returning" it.
   22237 #
   22238 ext_grs_ext:
   22239 	swap		%d1			# yes; return to correct positions
   22240 	rts
   22241 
   22242 ext_grs_not_ext:
   22243 	movm.l		&0x3000, -(%sp)		# make some temp registers {d2/d3}
   22244 
   22245 	cmpi.b		%d1, &s_mode		# is rnd prec = sgl?
   22246 	bne.b		ext_grs_dbl		# no; go handle dbl
   22247 
   22248 #
   22249 # sgl:
   22250 #	96		64	  40	32		0
   22251 #	-----------------------------------------------------
   22252 #	| EXP	|XXXXXXX|	  |xx	|		|grs|
   22253 #	-----------------------------------------------------
   22254 #			<--(24)--->nn\			   /
   22255 #				   ee ---------------------
   22256 #				   ww		|
   22257 #						v
   22258 #				   gr	   new sticky
   22259 #
   22260 ext_grs_sgl:
   22261 	bfextu		FTEMP_HI(%a0){&24:&2}, %d3 # sgl prec. g-r are 2 bits right
   22262 	mov.l		&30, %d2		# of the sgl prec. limits
   22263 	lsl.l		%d2, %d3		# shift g-r bits to MSB of d3
   22264 	mov.l		FTEMP_HI(%a0), %d2	# get word 2 for s-bit test
   22265 	and.l		&0x0000003f, %d2	# s bit is the or of all other
   22266 	bne.b		ext_grs_st_stky		# bits to the right of g-r
   22267 	tst.l		FTEMP_LO(%a0)		# test lower mantissa
   22268 	bne.b		ext_grs_st_stky		# if any are set, set sticky
   22269 	tst.l		%d0			# test original g,r,s
   22270 	bne.b		ext_grs_st_stky		# if any are set, set sticky
   22271 	bra.b		ext_grs_end_sd		# if words 3 and 4 are clr, exit
   22272 
   22273 #
   22274 # dbl:
   22275 #	96		64	  	32	 11	0
   22276 #	-----------------------------------------------------
   22277 #	| EXP	|XXXXXXX|	  	|	 |xx	|grs|
   22278 #	-----------------------------------------------------
   22279 #						  nn\	    /
   22280 #						  ee -------
   22281 #						  ww	|
   22282 #							v
   22283 #						  gr	new sticky
   22284 #
   22285 ext_grs_dbl:
   22286 	bfextu		FTEMP_LO(%a0){&21:&2}, %d3 # dbl-prec. g-r are 2 bits right
   22287 	mov.l		&30, %d2		# of the dbl prec. limits
   22288 	lsl.l		%d2, %d3		# shift g-r bits to the MSB of d3
   22289 	mov.l		FTEMP_LO(%a0), %d2	# get lower mantissa  for s-bit test
   22290 	and.l		&0x000001ff, %d2	# s bit is the or-ing of all
   22291 	bne.b		ext_grs_st_stky		# other bits to the right of g-r
   22292 	tst.l		%d0			# test word original g,r,s
   22293 	bne.b		ext_grs_st_stky		# if any are set, set sticky
   22294 	bra.b		ext_grs_end_sd		# if clear, exit
   22295 
   22296 ext_grs_st_stky:
   22297 	bset		&rnd_stky_bit, %d3	# set sticky bit
   22298 ext_grs_end_sd:
   22299 	mov.l		%d3, %d0		# return grs to d0
   22300 
   22301 	movm.l		(%sp)+, &0xc		# restore scratch registers {d2/d3}
   22302 
   22303 	swap		%d1			# restore d1 to original
   22304 	rts
   22305 
   22306 #########################################################################
   22307 # norm(): normalize the mantissa of an extended precision input. the	#
   22308 #	  input operand should not be normalized already.		#
   22309 #									#
   22310 # XDEF ****************************************************************	#
   22311 #	norm()								#
   22312 #									#
   22313 # XREF **************************************************************** #
   22314 #	none								#
   22315 #									#
   22316 # INPUT *************************************************************** #
   22317 #	a0 = pointer fp extended precision operand to normalize		#
   22318 #									#
   22319 # OUTPUT ************************************************************** #
   22320 # 	d0 = number of bit positions the mantissa was shifted		#
   22321 #	a0 = the input operand's mantissa is normalized; the exponent	#
   22322 #	     is unchanged.						#
   22323 #									#
   22324 #########################################################################
   22325 	global		norm
   22326 norm:
   22327 	mov.l		%d2, -(%sp)		# create some temp regs
   22328 	mov.l		%d3, -(%sp)
   22329 
   22330 	mov.l		FTEMP_HI(%a0), %d0	# load hi(mantissa)
   22331 	mov.l		FTEMP_LO(%a0), %d1	# load lo(mantissa)
   22332 
   22333 	bfffo		%d0{&0:&32}, %d2	# how many places to shift?
   22334 	beq.b		norm_lo			# hi(man) is all zeroes!
   22335 
   22336 norm_hi:
   22337 	lsl.l		%d2, %d0		# left shift hi(man)
   22338 	bfextu		%d1{&0:%d2}, %d3	# extract lo bits
   22339 
   22340 	or.l		%d3, %d0		# create hi(man)
   22341 	lsl.l		%d2, %d1		# create lo(man)
   22342 
   22343 	mov.l		%d0, FTEMP_HI(%a0)	# store new hi(man)
   22344 	mov.l		%d1, FTEMP_LO(%a0)	# store new lo(man)
   22345 
   22346 	mov.l		%d2, %d0		# return shift amount
   22347 
   22348 	mov.l		(%sp)+, %d3		# restore temp regs
   22349 	mov.l		(%sp)+, %d2
   22350 
   22351 	rts
   22352 
   22353 norm_lo:
   22354 	bfffo		%d1{&0:&32}, %d2	# how many places to shift?
   22355 	lsl.l		%d2, %d1		# shift lo(man)
   22356 	add.l		&32, %d2		# add 32 to shft amount
   22357 
   22358 	mov.l		%d1, FTEMP_HI(%a0)	# store hi(man)
   22359 	clr.l		FTEMP_LO(%a0)		# lo(man) is now zero
   22360 
   22361 	mov.l		%d2, %d0		# return shift amount
   22362 
   22363 	mov.l		(%sp)+, %d3		# restore temp regs
   22364 	mov.l		(%sp)+, %d2
   22365 
   22366 	rts
   22367 
   22368 #########################################################################
   22369 # unnorm_fix(): - changes an UNNORM to one of NORM, DENORM, or ZERO	#
   22370 #		- returns corresponding optype tag			#
   22371 #									#
   22372 # XDEF ****************************************************************	#
   22373 #	unnorm_fix()							#
   22374 #									#
   22375 # XREF **************************************************************** #
   22376 #	norm() - normalize the mantissa					#
   22377 #									#
   22378 # INPUT *************************************************************** #
   22379 #	a0 = pointer to unnormalized extended precision number		#
   22380 #									#
   22381 # OUTPUT ************************************************************** #
   22382 #	d0 = optype tag - is corrected to one of NORM, DENORM, or ZERO	#
   22383 #	a0 = input operand has been converted to a norm, denorm, or	#
   22384 #	     zero; both the exponent and mantissa are changed.		#
   22385 #									#
   22386 #########################################################################
   22387 
   22388 	global		unnorm_fix
   22389 unnorm_fix:
   22390 	bfffo		FTEMP_HI(%a0){&0:&32}, %d0 # how many shifts are needed?
   22391 	bne.b		unnorm_shift		# hi(man) is not all zeroes
   22392 
   22393 #
   22394 # hi(man) is all zeroes so see if any bits in lo(man) are set
   22395 #
   22396 unnorm_chk_lo:
   22397 	bfffo		FTEMP_LO(%a0){&0:&32}, %d0 # is operand really a zero?
   22398 	beq.w		unnorm_zero		# yes
   22399 
   22400 	add.w		&32, %d0		# no; fix shift distance
   22401 
   22402 #
   22403 # d0 = # shifts needed for complete normalization
   22404 #
   22405 unnorm_shift:
   22406 	clr.l		%d1			# clear top word
   22407 	mov.w		FTEMP_EX(%a0), %d1	# extract exponent
   22408 	and.w		&0x7fff, %d1		# strip off sgn
   22409 
   22410 	cmp.w		%d0, %d1		# will denorm push exp < 0?
   22411 	bgt.b		unnorm_nrm_zero		# yes; denorm only until exp = 0
   22412 
   22413 #
   22414 # exponent would not go < 0. therefore, number stays normalized
   22415 #
   22416 	sub.w		%d0, %d1		# shift exponent value
   22417 	mov.w		FTEMP_EX(%a0), %d0	# load old exponent
   22418 	and.w		&0x8000, %d0		# save old sign
   22419 	or.w		%d0, %d1		# {sgn,new exp}
   22420 	mov.w		%d1, FTEMP_EX(%a0)	# insert new exponent
   22421 
   22422 	bsr.l		norm			# normalize UNNORM
   22423 
   22424 	mov.b		&NORM, %d0		# return new optype tag
   22425 	rts
   22426 
   22427 #
   22428 # exponent would go < 0, so only denormalize until exp = 0
   22429 #
   22430 unnorm_nrm_zero:
   22431 	cmp.b		%d1, &32		# is exp <= 32?
   22432 	bgt.b		unnorm_nrm_zero_lrg	# no; go handle large exponent
   22433 
   22434 	bfextu		FTEMP_HI(%a0){%d1:&32}, %d0 # extract new hi(man)
   22435 	mov.l		%d0, FTEMP_HI(%a0)	# save new hi(man)
   22436 
   22437 	mov.l		FTEMP_LO(%a0), %d0	# fetch old lo(man)
   22438 	lsl.l		%d1, %d0		# extract new lo(man)
   22439 	mov.l		%d0, FTEMP_LO(%a0)	# save new lo(man)
   22440 
   22441 	and.w		&0x8000, FTEMP_EX(%a0)	# set exp = 0
   22442 
   22443 	mov.b		&DENORM, %d0		# return new optype tag
   22444 	rts
   22445 
   22446 #
   22447 # only mantissa bits set are in lo(man)
   22448 #
   22449 unnorm_nrm_zero_lrg:
   22450 	sub.w		&32, %d1		# adjust shft amt by 32
   22451 
   22452 	mov.l		FTEMP_LO(%a0), %d0	# fetch old lo(man)
   22453 	lsl.l		%d1, %d0		# left shift lo(man)
   22454 
   22455 	mov.l		%d0, FTEMP_HI(%a0)	# store new hi(man)
   22456 	clr.l		FTEMP_LO(%a0)		# lo(man) = 0
   22457 
   22458 	and.w		&0x8000, FTEMP_EX(%a0)	# set exp = 0
   22459 
   22460 	mov.b		&DENORM, %d0		# return new optype tag
   22461 	rts
   22462 
   22463 #
   22464 # whole mantissa is zero so this UNNORM is actually a zero
   22465 #
   22466 unnorm_zero:
   22467 	and.w		&0x8000, FTEMP_EX(%a0) 	# force exponent to zero
   22468 
   22469 	mov.b		&ZERO, %d0		# fix optype tag
   22470 	rts
   22471 
   22472 #########################################################################
   22473 # XDEF ****************************************************************	#
   22474 # 	set_tag_x(): return the optype of the input ext fp number	#
   22475 #									#
   22476 # XREF ****************************************************************	#
   22477 #	None								#
   22478 #									#
   22479 # INPUT ***************************************************************	#
   22480 #	a0 = pointer to extended precision operand			#
   22481 # 									#
   22482 # OUTPUT **************************************************************	#
   22483 #	d0 = value of type tag						#
   22484 # 		one of: NORM, INF, QNAN, SNAN, DENORM, UNNORM, ZERO	#
   22485 #									#
   22486 # ALGORITHM ***********************************************************	#
   22487 #	Simply test the exponent, j-bit, and mantissa values to 	#
   22488 # determine the type of operand.					#
   22489 #	If it's an unnormalized zero, alter the operand and force it	#
   22490 # to be a normal zero.							#
   22491 #									#
   22492 #########################################################################
   22493 
   22494 	global		set_tag_x
   22495 set_tag_x:
   22496 	mov.w		FTEMP_EX(%a0), %d0	# extract exponent
   22497 	andi.w		&0x7fff, %d0		# strip off sign
   22498 	cmpi.w		%d0, &0x7fff		# is (EXP == MAX)?
   22499 	beq.b		inf_or_nan_x
   22500 not_inf_or_nan_x:
   22501 	btst		&0x7,FTEMP_HI(%a0)
   22502 	beq.b		not_norm_x
   22503 is_norm_x:
   22504 	mov.b		&NORM, %d0
   22505 	rts
   22506 not_norm_x:
   22507 	tst.w		%d0			# is exponent = 0?
   22508 	bne.b		is_unnorm_x
   22509 not_unnorm_x:
   22510 	tst.l		FTEMP_HI(%a0)
   22511 	bne.b		is_denorm_x
   22512 	tst.l		FTEMP_LO(%a0)
   22513 	bne.b		is_denorm_x
   22514 is_zero_x:
   22515 	mov.b		&ZERO, %d0
   22516 	rts
   22517 is_denorm_x:
   22518 	mov.b		&DENORM, %d0
   22519 	rts
   22520 # must distinguish now "Unnormalized zeroes" which we
   22521 # must convert to zero.
   22522 is_unnorm_x:
   22523 	tst.l		FTEMP_HI(%a0)
   22524 	bne.b		is_unnorm_reg_x
   22525 	tst.l		FTEMP_LO(%a0)
   22526 	bne.b		is_unnorm_reg_x
   22527 # it's an "unnormalized zero". let's convert it to an actual zero...
   22528 	andi.w		&0x8000,FTEMP_EX(%a0)	# clear exponent
   22529 	mov.b		&ZERO, %d0
   22530 	rts
   22531 is_unnorm_reg_x:
   22532 	mov.b		&UNNORM, %d0
   22533 	rts
   22534 inf_or_nan_x:
   22535 	tst.l		FTEMP_LO(%a0)
   22536 	bne.b		is_nan_x
   22537 	mov.l		FTEMP_HI(%a0), %d0
   22538 	and.l		&0x7fffffff, %d0	# msb is a don't care!
   22539 	bne.b		is_nan_x
   22540 is_inf_x:
   22541 	mov.b		&INF, %d0
   22542 	rts
   22543 is_nan_x:
   22544 	btst		&0x6, FTEMP_HI(%a0)
   22545 	beq.b		is_snan_x
   22546 	mov.b		&QNAN, %d0
   22547 	rts
   22548 is_snan_x:
   22549 	mov.b		&SNAN, %d0
   22550 	rts
   22551 
   22552 #########################################################################
   22553 # XDEF ****************************************************************	#
   22554 # 	set_tag_d(): return the optype of the input dbl fp number	#
   22555 #									#
   22556 # XREF ****************************************************************	#
   22557 #	None								#
   22558 #									#
   22559 # INPUT ***************************************************************	#
   22560 #	a0 = points to double precision operand				#
   22561 # 									#
   22562 # OUTPUT **************************************************************	#
   22563 #	d0 = value of type tag						#
   22564 # 		one of: NORM, INF, QNAN, SNAN, DENORM, ZERO		#
   22565 #									#
   22566 # ALGORITHM ***********************************************************	#
   22567 #	Simply test the exponent, j-bit, and mantissa values to 	#
   22568 # determine the type of operand.					#
   22569 #									#
   22570 #########################################################################
   22571 
   22572 	global		set_tag_d
   22573 set_tag_d:
   22574 	mov.l		FTEMP(%a0), %d0
   22575 	mov.l		%d0, %d1
   22576 
   22577 	andi.l		&0x7ff00000, %d0
   22578 	beq.b		zero_or_denorm_d
   22579 
   22580 	cmpi.l		%d0, &0x7ff00000
   22581 	beq.b		inf_or_nan_d
   22582 
   22583 is_norm_d:
   22584 	mov.b		&NORM, %d0
   22585 	rts
   22586 zero_or_denorm_d:
   22587 	and.l		&0x000fffff, %d1
   22588 	bne		is_denorm_d
   22589 	tst.l		4+FTEMP(%a0)
   22590 	bne		is_denorm_d
   22591 is_zero_d:
   22592 	mov.b		&ZERO, %d0
   22593 	rts
   22594 is_denorm_d:
   22595 	mov.b		&DENORM, %d0
   22596 	rts
   22597 inf_or_nan_d:
   22598 	and.l		&0x000fffff, %d1
   22599 	bne		is_nan_d
   22600 	tst.l		4+FTEMP(%a0)
   22601 	bne		is_nan_d
   22602 is_inf_d:
   22603 	mov.b		&INF, %d0
   22604 	rts
   22605 is_nan_d:
   22606 	btst		&19, %d1
   22607 	bne		is_qnan_d
   22608 is_snan_d:
   22609 	mov.b		&SNAN, %d0
   22610 	rts
   22611 is_qnan_d:
   22612 	mov.b		&QNAN, %d0
   22613 	rts
   22614 
   22615 #########################################################################
   22616 # XDEF ****************************************************************	#
   22617 # 	set_tag_s(): return the optype of the input sgl fp number	#
   22618 #									#
   22619 # XREF ****************************************************************	#
   22620 #	None								#
   22621 #									#
   22622 # INPUT ***************************************************************	#
   22623 #	a0 = pointer to single precision operand			#
   22624 # 									#
   22625 # OUTPUT **************************************************************	#
   22626 #	d0 = value of type tag						#
   22627 # 		one of: NORM, INF, QNAN, SNAN, DENORM, ZERO		#
   22628 #									#
   22629 # ALGORITHM ***********************************************************	#
   22630 #	Simply test the exponent, j-bit, and mantissa values to 	#
   22631 # determine the type of operand.					#
   22632 #									#
   22633 #########################################################################
   22634 
   22635 	global		set_tag_s
   22636 set_tag_s:
   22637 	mov.l		FTEMP(%a0), %d0
   22638 	mov.l		%d0, %d1
   22639 
   22640 	andi.l		&0x7f800000, %d0
   22641 	beq.b		zero_or_denorm_s
   22642 
   22643 	cmpi.l		%d0, &0x7f800000
   22644 	beq.b		inf_or_nan_s
   22645 
   22646 is_norm_s:
   22647 	mov.b		&NORM, %d0
   22648 	rts
   22649 zero_or_denorm_s:
   22650 	and.l		&0x007fffff, %d1
   22651 	bne		is_denorm_s
   22652 is_zero_s:
   22653 	mov.b		&ZERO, %d0
   22654 	rts
   22655 is_denorm_s:
   22656 	mov.b		&DENORM, %d0
   22657 	rts
   22658 inf_or_nan_s:
   22659 	and.l		&0x007fffff, %d1
   22660 	bne		is_nan_s
   22661 is_inf_s:
   22662 	mov.b		&INF, %d0
   22663 	rts
   22664 is_nan_s:
   22665 	btst		&22, %d1
   22666 	bne		is_qnan_s
   22667 is_snan_s:
   22668 	mov.b		&SNAN, %d0
   22669 	rts
   22670 is_qnan_s:
   22671 	mov.b		&QNAN, %d0
   22672 	rts
   22673 
   22674 #########################################################################
   22675 # XDEF ****************************************************************	#
   22676 # 	unf_res(): routine to produce default underflow result of a 	#
   22677 #	 	   scaled extended precision number; this is used by 	#
   22678 #		   fadd/fdiv/fmul/etc. emulation routines.		#
   22679 # 	unf_res4(): same as above but for fsglmul/fsgldiv which use	#
   22680 #		    single round prec and extended prec mode.		#
   22681 #									#
   22682 # XREF ****************************************************************	#
   22683 #	_denorm() - denormalize according to scale factor		#
   22684 # 	_round() - round denormalized number according to rnd prec	#
   22685 #									#
   22686 # INPUT ***************************************************************	#
   22687 #	a0 = pointer to extended precison operand			#
   22688 #	d0 = scale factor						#
   22689 #	d1 = rounding precision/mode					#
   22690 #									#
   22691 # OUTPUT **************************************************************	#
   22692 #	a0 = pointer to default underflow result in extended precision	#
   22693 #	d0.b = result FPSR_cc which caller may or may not want to save	#
   22694 #									#
   22695 # ALGORITHM ***********************************************************	#
   22696 # 	Convert the input operand to "internal format" which means the	#
   22697 # exponent is extended to 16 bits and the sign is stored in the unused	#
   22698 # portion of the extended precison operand. Denormalize the number	#
   22699 # according to the scale factor passed in d0. Then, round the 		#
   22700 # denormalized result.							#
   22701 # 	Set the FPSR_exc bits as appropriate but return the cc bits in	#
   22702 # d0 in case the caller doesn't want to save them (as is the case for	#
   22703 # fmove out).								#
   22704 # 	unf_res4() for fsglmul/fsgldiv forces the denorm to extended	#
   22705 # precision and the rounding mode to single.				#
   22706 #									#
   22707 #########################################################################
   22708 	global		unf_res
   22709 unf_res:
   22710 	mov.l		%d1, -(%sp)		# save rnd prec,mode on stack
   22711 
   22712 	btst		&0x7, FTEMP_EX(%a0)	# make "internal" format
   22713 	sne		FTEMP_SGN(%a0)
   22714 
   22715 	mov.w		FTEMP_EX(%a0), %d1	# extract exponent
   22716 	and.w		&0x7fff, %d1
   22717 	sub.w		%d0, %d1
   22718 	mov.w		%d1, FTEMP_EX(%a0)	# insert 16 bit exponent
   22719 
   22720 	mov.l		%a0, -(%sp)		# save operand ptr during calls
   22721 
   22722 	mov.l		0x4(%sp),%d0		# pass rnd prec.
   22723 	andi.w		&0x00c0,%d0
   22724 	lsr.w		&0x4,%d0
   22725 	bsr.l		_denorm			# denorm result
   22726 
   22727 	mov.l		(%sp),%a0
   22728 	mov.w		0x6(%sp),%d1		# load prec:mode into %d1
   22729 	andi.w		&0xc0,%d1		# extract rnd prec
   22730 	lsr.w		&0x4,%d1
   22731 	swap		%d1
   22732 	mov.w		0x6(%sp),%d1
   22733 	andi.w		&0x30,%d1
   22734 	lsr.w		&0x4,%d1
   22735 	bsr.l		_round			# round the denorm
   22736 
   22737 	mov.l		(%sp)+, %a0
   22738 
   22739 # result is now rounded properly. convert back to normal format
   22740 	bclr		&0x7, FTEMP_EX(%a0)	# clear sgn first; may have residue
   22741 	tst.b		FTEMP_SGN(%a0)		# is "internal result" sign set?
   22742 	beq.b		unf_res_chkifzero	# no; result is positive
   22743 	bset		&0x7, FTEMP_EX(%a0)	# set result sgn
   22744 	clr.b		FTEMP_SGN(%a0)		# clear temp sign
   22745 
   22746 # the number may have become zero after rounding. set ccodes accordingly.
   22747 unf_res_chkifzero:
   22748 	clr.l		%d0
   22749 	tst.l		FTEMP_HI(%a0)		# is value now a zero?
   22750 	bne.b		unf_res_cont		# no
   22751 	tst.l		FTEMP_LO(%a0)
   22752 	bne.b		unf_res_cont		# no
   22753 #	bset		&z_bit, FPSR_CC(%a6)	# yes; set zero ccode bit
   22754 	bset		&z_bit, %d0		# yes; set zero ccode bit
   22755 
   22756 unf_res_cont:
   22757 
   22758 #
   22759 # can inex1 also be set along with unfl and inex2???
   22760 #
   22761 # we know that underflow has occurred. aunfl should be set if INEX2 is also set.
   22762 #
   22763 	btst		&inex2_bit, FPSR_EXCEPT(%a6) # is INEX2 set?
   22764 	beq.b		unf_res_end		# no
   22765 	bset		&aunfl_bit, FPSR_AEXCEPT(%a6) # yes; set aunfl
   22766 
   22767 unf_res_end:
   22768 	add.l		&0x4, %sp		# clear stack
   22769 	rts
   22770 
   22771 # unf_res() for fsglmul() and fsgldiv().
   22772 	global		unf_res4
   22773 unf_res4:
   22774 	mov.l		%d1,-(%sp)		# save rnd prec,mode on stack
   22775 
   22776 	btst		&0x7,FTEMP_EX(%a0)	# make "internal" format
   22777 	sne		FTEMP_SGN(%a0)
   22778 
   22779 	mov.w		FTEMP_EX(%a0),%d1	# extract exponent
   22780 	and.w		&0x7fff,%d1
   22781 	sub.w		%d0,%d1
   22782 	mov.w		%d1,FTEMP_EX(%a0)	# insert 16 bit exponent
   22783 
   22784 	mov.l		%a0,-(%sp)		# save operand ptr during calls
   22785 
   22786 	clr.l		%d0			# force rnd prec = ext
   22787 	bsr.l		_denorm			# denorm result
   22788 
   22789 	mov.l		(%sp),%a0
   22790 	mov.w		&s_mode,%d1		# force rnd prec = sgl
   22791 	swap		%d1
   22792 	mov.w		0x6(%sp),%d1		# load rnd mode
   22793 	andi.w		&0x30,%d1		# extract rnd prec
   22794 	lsr.w		&0x4,%d1
   22795 	bsr.l		_round			# round the denorm
   22796 
   22797 	mov.l		(%sp)+,%a0
   22798 
   22799 # result is now rounded properly. convert back to normal format
   22800 	bclr		&0x7,FTEMP_EX(%a0)	# clear sgn first; may have residue
   22801 	tst.b		FTEMP_SGN(%a0)		# is "internal result" sign set?
   22802 	beq.b		unf_res4_chkifzero	# no; result is positive
   22803 	bset		&0x7,FTEMP_EX(%a0)	# set result sgn
   22804 	clr.b		FTEMP_SGN(%a0)		# clear temp sign
   22805 
   22806 # the number may have become zero after rounding. set ccodes accordingly.
   22807 unf_res4_chkifzero:
   22808 	clr.l		%d0
   22809 	tst.l		FTEMP_HI(%a0)		# is value now a zero?
   22810 	bne.b		unf_res4_cont		# no
   22811 	tst.l		FTEMP_LO(%a0)
   22812 	bne.b		unf_res4_cont		# no
   22813 #	bset		&z_bit,FPSR_CC(%a6)	# yes; set zero ccode bit
   22814 	bset		&z_bit,%d0		# yes; set zero ccode bit
   22815 
   22816 unf_res4_cont:
   22817 
   22818 #
   22819 # can inex1 also be set along with unfl and inex2???
   22820 #
   22821 # we know that underflow has occurred. aunfl should be set if INEX2 is also set.
   22822 #
   22823 	btst		&inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
   22824 	beq.b		unf_res4_end		# no
   22825 	bset		&aunfl_bit,FPSR_AEXCEPT(%a6) # yes; set aunfl
   22826 
   22827 unf_res4_end:
   22828 	add.l		&0x4,%sp		# clear stack
   22829 	rts
   22830 
   22831 #########################################################################
   22832 # XDEF ****************************************************************	#
   22833 #	ovf_res(): routine to produce the default overflow result of	#
   22834 #		   an overflowing number.				#
   22835 #	ovf_res2(): same as above but the rnd mode/prec are passed	#
   22836 #		    differently.					#
   22837 #									#
   22838 # XREF ****************************************************************	#
   22839 #	none								#
   22840 #									#
   22841 # INPUT ***************************************************************	#
   22842 #	d1.b 	= '-1' => (-); '0' => (+)				#
   22843 #   ovf_res():								#
   22844 #	d0 	= rnd mode/prec						#
   22845 #   ovf_res2():								#
   22846 #	hi(d0) 	= rnd prec						#
   22847 #	lo(d0)	= rnd mode						#
   22848 #									#
   22849 # OUTPUT **************************************************************	#
   22850 #	a0   	= points to extended precision result			#
   22851 #	d0.b 	= condition code bits					#
   22852 #									#
   22853 # ALGORITHM ***********************************************************	#
   22854 #	The default overflow result can be determined by the sign of	#
   22855 # the result and the rounding mode/prec in effect. These bits are	#
   22856 # concatenated together to create an index into the default result 	#
   22857 # table. A pointer to the correct result is returned in a0. The		#
   22858 # resulting condition codes are returned in d0 in case the caller 	#
   22859 # doesn't want FPSR_cc altered (as is the case for fmove out).		#
   22860 #									#
   22861 #########################################################################
   22862 
   22863 	global		ovf_res
   22864 ovf_res:
   22865 	andi.w		&0x10,%d1		# keep result sign
   22866 	lsr.b		&0x4,%d0		# shift prec/mode
   22867 	or.b		%d0,%d1			# concat the two
   22868 	mov.w		%d1,%d0			# make a copy
   22869 	lsl.b		&0x1,%d1		# multiply d1 by 2
   22870 	bra.b		ovf_res_load
   22871 
   22872 	global		ovf_res2
   22873 ovf_res2:
   22874 	and.w		&0x10, %d1		# keep result sign
   22875 	or.b		%d0, %d1		# insert rnd mode
   22876 	swap		%d0
   22877 	or.b		%d0, %d1		# insert rnd prec
   22878 	mov.w		%d1, %d0		# make a copy
   22879 	lsl.b		&0x1, %d1		# shift left by 1
   22880 
   22881 #
   22882 # use the rounding mode, precision, and result sign as in index into the
   22883 # two tables below to fetch the default result and the result ccodes.
   22884 #
   22885 ovf_res_load:
   22886 	mov.b		(tbl_ovfl_cc.b,%pc,%d0.w*1), %d0 # fetch result ccodes
   22887 	lea		(tbl_ovfl_result.b,%pc,%d1.w*8), %a0 # return result ptr
   22888 
   22889 	rts
   22890 
   22891 tbl_ovfl_cc:
   22892 	byte		0x2, 0x0, 0x0, 0x2
   22893 	byte		0x2, 0x0, 0x0, 0x2
   22894 	byte		0x2, 0x0, 0x0, 0x2
   22895 	byte		0x0, 0x0, 0x0, 0x0
   22896 	byte		0x2+0x8, 0x8, 0x2+0x8, 0x8
   22897 	byte		0x2+0x8, 0x8, 0x2+0x8, 0x8
   22898 	byte		0x2+0x8, 0x8, 0x2+0x8, 0x8
   22899 
   22900 tbl_ovfl_result:
   22901 	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
   22902 	long		0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RZ
   22903 	long		0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RM
   22904 	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
   22905 
   22906 	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
   22907 	long		0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RZ
   22908 	long		0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RM
   22909 	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
   22910 
   22911 	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
   22912 	long		0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RZ
   22913 	long		0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RM
   22914 	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
   22915 
   22916 	long		0x00000000,0x00000000,0x00000000,0x00000000
   22917 	long		0x00000000,0x00000000,0x00000000,0x00000000
   22918 	long		0x00000000,0x00000000,0x00000000,0x00000000
   22919 	long		0x00000000,0x00000000,0x00000000,0x00000000
   22920 
   22921 	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
   22922 	long		0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RZ
   22923 	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
   22924 	long		0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RP
   22925 
   22926 	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
   22927 	long		0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RZ
   22928 	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
   22929 	long		0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RP
   22930 
   22931 	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
   22932 	long		0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RZ
   22933 	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
   22934 	long		0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RP
   22935 
   22936 #########################################################################
   22937 # XDEF ****************************************************************	#
   22938 #	get_packed(): fetch a packed operand from memory and then	#
   22939 #		      convert it to a floating-point binary number.	#
   22940 #									#
   22941 # XREF ****************************************************************	#
   22942 #	_dcalc_ea() - calculate the correct <ea>			#
   22943 #	_mem_read() - fetch the packed operand from memory		#
   22944 #	facc_in_x() - the fetch failed so jump to special exit code	#
   22945 #	decbin()    - convert packed to binary extended precision	#
   22946 #									#
   22947 # INPUT ***************************************************************	#
   22948 #	None								#
   22949 # 									#
   22950 # OUTPUT **************************************************************	#
   22951 #	If no failure on _mem_read():					#
   22952 # 	FP_SRC(a6) = packed operand now as a binary FP number		#
   22953 #									#
   22954 # ALGORITHM ***********************************************************	#
   22955 #	Get the correct <ea> whihc is the value on the exception stack 	#
   22956 # frame w/ maybe a correction factor if the <ea> is -(an) or (an)+.	#
   22957 # Then, fetch the operand from memory. If the fetch fails, exit		#
   22958 # through facc_in_x().							#
   22959 #	If the packed operand is a ZERO,NAN, or INF, convert it to	#
   22960 # its binary representation here. Else, call decbin() which will 	#
   22961 # convert the packed value to an extended precision binary value.	#
   22962 #									#
   22963 #########################################################################
   22964 
   22965 # the stacked <ea> for packed is correct except for -(An).
   22966 # the base reg must be updated for both -(An) and (An)+.
   22967 	global		get_packed
   22968 get_packed:
   22969 	mov.l		&0xc,%d0		# packed is 12 bytes
   22970 	bsr.l		_dcalc_ea		# fetch <ea>; correct An
   22971 
   22972 	lea		FP_SRC(%a6),%a1		# pass: ptr to super dst
   22973 	mov.l		&0xc,%d0		# pass: 12 bytes
   22974 	bsr.l		_dmem_read		# read packed operand
   22975 
   22976 	tst.l		%d1			# did dfetch fail?
   22977 	bne.l		facc_in_x		# yes
   22978 
   22979 # The packed operand is an INF or a NAN if the exponent field is all ones.
   22980 	bfextu		FP_SRC(%a6){&1:&15},%d0	# get exp
   22981 	cmpi.w		%d0,&0x7fff		# INF or NAN?
   22982 	bne.b		gp_try_zero		# no
   22983 	rts					# operand is an INF or NAN
   22984 
   22985 # The packed operand is a zero if the mantissa is all zero, else it's
   22986 # a normal packed op.
   22987 gp_try_zero:
   22988 	mov.b		3+FP_SRC(%a6),%d0	# get byte 4
   22989 	andi.b		&0x0f,%d0		# clear all but last nybble
   22990 	bne.b		gp_not_spec		# not a zero
   22991 	tst.l		FP_SRC_HI(%a6)		# is lw 2 zero?
   22992 	bne.b		gp_not_spec		# not a zero
   22993 	tst.l		FP_SRC_LO(%a6)		# is lw 3 zero?
   22994 	bne.b		gp_not_spec		# not a zero
   22995 	rts					# operand is a ZERO
   22996 gp_not_spec:
   22997 	lea		FP_SRC(%a6),%a0		# pass: ptr to packed op
   22998 	bsr.l		decbin			# convert to extended
   22999 	fmovm.x		&0x80,FP_SRC(%a6)	# make this the srcop
   23000 	rts
   23001 
   23002 #########################################################################
   23003 # decbin(): Converts normalized packed bcd value pointed to by register	#
   23004 #	    a0 to extended-precision value in fp0.			#
   23005 #									#
   23006 # INPUT ***************************************************************	#
   23007 #	a0 = pointer to normalized packed bcd value			#
   23008 #									#
   23009 # OUTPUT **************************************************************	#
   23010 #	fp0 = exact fp representation of the packed bcd value.		#
   23011 #									#
   23012 # ALGORITHM ***********************************************************	#
   23013 #	Expected is a normal bcd (i.e. non-exceptional; all inf, zero,	#
   23014 #	and NaN operands are dispatched without entering this routine)	#
   23015 #	value in 68881/882 format at location (a0).			#
   23016 #									#
   23017 #	A1. Convert the bcd exponent to binary by successive adds and 	#
   23018 #	muls. Set the sign according to SE. Subtract 16 to compensate	#
   23019 #	for the mantissa which is to be interpreted as 17 integer	#
   23020 #	digits, rather than 1 integer and 16 fraction digits.		#
   23021 #	Note: this operation can never overflow.			#
   23022 #									#
   23023 #	A2. Convert the bcd mantissa to binary by successive		#
   23024 #	adds and muls in FP0. Set the sign according to SM.		#
   23025 #	The mantissa digits will be converted with the decimal point	#
   23026 #	assumed following the least-significant digit.			#
   23027 #	Note: this operation can never overflow.			#
   23028 #									#
   23029 #	A3. Count the number of leading/trailing zeros in the		#
   23030 #	bcd string.  If SE is positive, count the leading zeros;	#
   23031 #	if negative, count the trailing zeros.  Set the adjusted	#
   23032 #	exponent equal to the exponent from A1 and the zero count	#
   23033 #	added if SM = 1 and subtracted if SM = 0.  Scale the		#
   23034 #	mantissa the equivalent of forcing in the bcd value:		#
   23035 #									#
   23036 #	SM = 0	a non-zero digit in the integer position		#
   23037 #	SM = 1	a non-zero digit in Mant0, lsd of the fraction		#
   23038 #									#
   23039 #	this will insure that any value, regardless of its		#
   23040 #	representation (ex. 0.1E2, 1E1, 10E0, 100E-1), is converted	#
   23041 #	consistently.							#
   23042 #									#
   23043 #	A4. Calculate the factor 10^exp in FP1 using a table of		#
   23044 #	10^(2^n) values.  To reduce the error in forming factors	#
   23045 #	greater than 10^27, a directed rounding scheme is used with	#
   23046 #	tables rounded to RN, RM, and RP, according to the table	#
   23047 #	in the comments of the pwrten section.				#
   23048 #									#
   23049 #	A5. Form the final binary number by scaling the mantissa by	#
   23050 #	the exponent factor.  This is done by multiplying the		#
   23051 #	mantissa in FP0 by the factor in FP1 if the adjusted		#
   23052 #	exponent sign is positive, and dividing FP0 by FP1 if		#
   23053 #	it is negative.							#
   23054 #									#
   23055 #	Clean up and return. Check if the final mul or div was inexact.	#
   23056 #	If so, set INEX1 in USER_FPSR.					#
   23057 #									#
   23058 #########################################################################
   23059 
   23060 #
   23061 #	PTENRN, PTENRM, and PTENRP are arrays of powers of 10 rounded
   23062 #	to nearest, minus, and plus, respectively.  The tables include
   23063 #	10**{1,2,4,8,16,32,64,128,256,512,1024,2048,4096}.  No rounding
   23064 #	is required until the power is greater than 27, however, all
   23065 #	tables include the first 5 for ease of indexing.
   23066 #
   23067 RTABLE:
   23068 	byte		0,0,0,0
   23069 	byte		2,3,2,3
   23070 	byte		2,3,3,2
   23071 	byte		3,2,2,3
   23072 
   23073 	set		FNIBS,7
   23074 	set		FSTRT,0
   23075 
   23076 	set		ESTRT,4
   23077 	set		EDIGITS,2
   23078 
   23079 	global		decbin
   23080 decbin:
   23081 	mov.l		0x0(%a0),FP_SCR0_EX(%a6) # make a copy of input
   23082 	mov.l		0x4(%a0),FP_SCR0_HI(%a6) # so we don't alter it
   23083 	mov.l		0x8(%a0),FP_SCR0_LO(%a6)
   23084 
   23085 	lea		FP_SCR0(%a6),%a0
   23086 
   23087 	movm.l		&0x3c00,-(%sp)		# save d2-d5
   23088 	fmovm.x		&0x1,-(%sp)		# save fp1
   23089 #
   23090 # Calculate exponent:
   23091 #  1. Copy bcd value in memory for use as a working copy.
   23092 #  2. Calculate absolute value of exponent in d1 by mul and add.
   23093 #  3. Correct for exponent sign.
   23094 #  4. Subtract 16 to compensate for interpreting the mant as all integer digits.
   23095 #     (i.e., all digits assumed left of the decimal point.)
   23096 #
   23097 # Register usage:
   23098 #
   23099 #  calc_e:
   23100 #	(*)  d0: temp digit storage
   23101 #	(*)  d1: accumulator for binary exponent
   23102 #	(*)  d2: digit count
   23103 #	(*)  d3: offset pointer
   23104 #	( )  d4: first word of bcd
   23105 #	( )  a0: pointer to working bcd value
   23106 #	( )  a6: pointer to original bcd value
   23107 #	(*)  FP_SCR1: working copy of original bcd value
   23108 #	(*)  L_SCR1: copy of original exponent word
   23109 #
   23110 calc_e:
   23111 	mov.l		&EDIGITS,%d2		# # of nibbles (digits) in fraction part
   23112 	mov.l		&ESTRT,%d3		# counter to pick up digits
   23113 	mov.l		(%a0),%d4		# get first word of bcd
   23114 	clr.l		%d1			# zero d1 for accumulator
   23115 e_gd:
   23116 	mulu.l		&0xa,%d1		# mul partial product by one digit place
   23117 	bfextu		%d4{%d3:&4},%d0		# get the digit and zero extend into d0
   23118 	add.l		%d0,%d1			# d1 = d1 + d0
   23119 	addq.b		&4,%d3			# advance d3 to the next digit
   23120 	dbf.w		%d2,e_gd		# if we have used all 3 digits, exit loop
   23121 	btst		&30,%d4			# get SE
   23122 	beq.b		e_pos			# don't negate if pos
   23123 	neg.l		%d1			# negate before subtracting
   23124 e_pos:
   23125 	sub.l		&16,%d1			# sub to compensate for shift of mant
   23126 	bge.b		e_save			# if still pos, do not neg
   23127 	neg.l		%d1			# now negative, make pos and set SE
   23128 	or.l		&0x40000000,%d4		# set SE in d4,
   23129 	or.l		&0x40000000,(%a0)	# and in working bcd
   23130 e_save:
   23131 	mov.l		%d1,-(%sp)		# save exp on stack
   23132 #
   23133 #
   23134 # Calculate mantissa:
   23135 #  1. Calculate absolute value of mantissa in fp0 by mul and add.
   23136 #  2. Correct for mantissa sign.
   23137 #     (i.e., all digits assumed left of the decimal point.)
   23138 #
   23139 # Register usage:
   23140 #
   23141 #  calc_m:
   23142 #	(*)  d0: temp digit storage
   23143 #	(*)  d1: lword counter
   23144 #	(*)  d2: digit count
   23145 #	(*)  d3: offset pointer
   23146 #	( )  d4: words 2 and 3 of bcd
   23147 #	( )  a0: pointer to working bcd value
   23148 #	( )  a6: pointer to original bcd value
   23149 #	(*) fp0: mantissa accumulator
   23150 #	( )  FP_SCR1: working copy of original bcd value
   23151 #	( )  L_SCR1: copy of original exponent word
   23152 #
   23153 calc_m:
   23154 	mov.l		&1,%d1			# word counter, init to 1
   23155 	fmov.s		&0x00000000,%fp0	# accumulator
   23156 #
   23157 #
   23158 #  Since the packed number has a long word between the first & second parts,
   23159 #  get the integer digit then skip down & get the rest of the
   23160 #  mantissa.  We will unroll the loop once.
   23161 #
   23162 	bfextu		(%a0){&28:&4},%d0	# integer part is ls digit in long word
   23163 	fadd.b		%d0,%fp0		# add digit to sum in fp0
   23164 #
   23165 #
   23166 #  Get the rest of the mantissa.
   23167 #
   23168 loadlw:
   23169 	mov.l		(%a0,%d1.L*4),%d4	# load mantissa lonqword into d4
   23170 	mov.l		&FSTRT,%d3		# counter to pick up digits
   23171 	mov.l		&FNIBS,%d2		# reset number of digits per a0 ptr
   23172 md2b:
   23173 	fmul.s		&0x41200000,%fp0	# fp0 = fp0 * 10
   23174 	bfextu		%d4{%d3:&4},%d0		# get the digit and zero extend
   23175 	fadd.b		%d0,%fp0		# fp0 = fp0 + digit
   23176 #
   23177 #
   23178 #  If all the digits (8) in that long word have been converted (d2=0),
   23179 #  then inc d1 (=2) to point to the next long word and reset d3 to 0
   23180 #  to initialize the digit offset, and set d2 to 7 for the digit count;
   23181 #  else continue with this long word.
   23182 #
   23183 	addq.b		&4,%d3			# advance d3 to the next digit
   23184 	dbf.w		%d2,md2b		# check for last digit in this lw
   23185 nextlw:
   23186 	addq.l		&1,%d1			# inc lw pointer in mantissa
   23187 	cmp.l		%d1,&2			# test for last lw
   23188 	ble.b		loadlw			# if not, get last one
   23189 #
   23190 #  Check the sign of the mant and make the value in fp0 the same sign.
   23191 #
   23192 m_sign:
   23193 	btst		&31,(%a0)		# test sign of the mantissa
   23194 	beq.b		ap_st_z			# if clear, go to append/strip zeros
   23195 	fneg.x		%fp0			# if set, negate fp0
   23196 #
   23197 # Append/strip zeros:
   23198 #
   23199 #  For adjusted exponents which have an absolute value greater than 27*,
   23200 #  this routine calculates the amount needed to normalize the mantissa
   23201 #  for the adjusted exponent.  That number is subtracted from the exp
   23202 #  if the exp was positive, and added if it was negative.  The purpose
   23203 #  of this is to reduce the value of the exponent and the possibility
   23204 #  of error in calculation of pwrten.
   23205 #
   23206 #  1. Branch on the sign of the adjusted exponent.
   23207 #  2p.(positive exp)
   23208 #   2. Check M16 and the digits in lwords 2 and 3 in decending order.
   23209 #   3. Add one for each zero encountered until a non-zero digit.
   23210 #   4. Subtract the count from the exp.
   23211 #   5. Check if the exp has crossed zero in #3 above; make the exp abs
   23212 #	   and set SE.
   23213 #	6. Multiply the mantissa by 10**count.
   23214 #  2n.(negative exp)
   23215 #   2. Check the digits in lwords 3 and 2 in decending order.
   23216 #   3. Add one for each zero encountered until a non-zero digit.
   23217 #   4. Add the count to the exp.
   23218 #   5. Check if the exp has crossed zero in #3 above; clear SE.
   23219 #   6. Divide the mantissa by 10**count.
   23220 #
   23221 #  *Why 27?  If the adjusted exponent is within -28 < expA < 28, than
   23222 #   any adjustment due to append/strip zeros will drive the resultane
   23223 #   exponent towards zero.  Since all pwrten constants with a power
   23224 #   of 27 or less are exact, there is no need to use this routine to
   23225 #   attempt to lessen the resultant exponent.
   23226 #
   23227 # Register usage:
   23228 #
   23229 #  ap_st_z:
   23230 #	(*)  d0: temp digit storage
   23231 #	(*)  d1: zero count
   23232 #	(*)  d2: digit count
   23233 #	(*)  d3: offset pointer
   23234 #	( )  d4: first word of bcd
   23235 #	(*)  d5: lword counter
   23236 #	( )  a0: pointer to working bcd value
   23237 #	( )  FP_SCR1: working copy of original bcd value
   23238 #	( )  L_SCR1: copy of original exponent word
   23239 #
   23240 #
   23241 # First check the absolute value of the exponent to see if this
   23242 # routine is necessary.  If so, then check the sign of the exponent
   23243 # and do append (+) or strip (-) zeros accordingly.
   23244 # This section handles a positive adjusted exponent.
   23245 #
   23246 ap_st_z:
   23247 	mov.l		(%sp),%d1		# load expA for range test
   23248 	cmp.l		%d1,&27			# test is with 27
   23249 	ble.w		pwrten			# if abs(expA) <28, skip ap/st zeros
   23250 	btst		&30,(%a0)		# check sign of exp
   23251 	bne.b		ap_st_n			# if neg, go to neg side
   23252 	clr.l		%d1			# zero count reg
   23253 	mov.l		(%a0),%d4		# load lword 1 to d4
   23254 	bfextu		%d4{&28:&4},%d0		# get M16 in d0
   23255 	bne.b		ap_p_fx			# if M16 is non-zero, go fix exp
   23256 	addq.l		&1,%d1			# inc zero count
   23257 	mov.l		&1,%d5			# init lword counter
   23258 	mov.l		(%a0,%d5.L*4),%d4	# get lword 2 to d4
   23259 	bne.b		ap_p_cl			# if lw 2 is zero, skip it
   23260 	addq.l		&8,%d1			# and inc count by 8
   23261 	addq.l		&1,%d5			# inc lword counter
   23262 	mov.l		(%a0,%d5.L*4),%d4	# get lword 3 to d4
   23263 ap_p_cl:
   23264 	clr.l		%d3			# init offset reg
   23265 	mov.l		&7,%d2			# init digit counter
   23266 ap_p_gd:
   23267 	bfextu		%d4{%d3:&4},%d0		# get digit
   23268 	bne.b		ap_p_fx			# if non-zero, go to fix exp
   23269 	addq.l		&4,%d3			# point to next digit
   23270 	addq.l		&1,%d1			# inc digit counter
   23271 	dbf.w		%d2,ap_p_gd		# get next digit
   23272 ap_p_fx:
   23273 	mov.l		%d1,%d0			# copy counter to d2
   23274 	mov.l		(%sp),%d1		# get adjusted exp from memory
   23275 	sub.l		%d0,%d1			# subtract count from exp
   23276 	bge.b		ap_p_fm			# if still pos, go to pwrten
   23277 	neg.l		%d1			# now its neg; get abs
   23278 	mov.l		(%a0),%d4		# load lword 1 to d4
   23279 	or.l		&0x40000000,%d4		# and set SE in d4
   23280 	or.l		&0x40000000,(%a0)	# and in memory
   23281 #
   23282 # Calculate the mantissa multiplier to compensate for the striping of
   23283 # zeros from the mantissa.
   23284 #
   23285 ap_p_fm:
   23286 	lea.l		PTENRN(%pc),%a1		# get address of power-of-ten table
   23287 	clr.l		%d3			# init table index
   23288 	fmov.s		&0x3f800000,%fp1	# init fp1 to 1
   23289 	mov.l		&3,%d2			# init d2 to count bits in counter
   23290 ap_p_el:
   23291 	asr.l		&1,%d0			# shift lsb into carry
   23292 	bcc.b		ap_p_en			# if 1, mul fp1 by pwrten factor
   23293 	fmul.x		(%a1,%d3),%fp1		# mul by 10**(d3_bit_no)
   23294 ap_p_en:
   23295 	add.l		&12,%d3			# inc d3 to next rtable entry
   23296 	tst.l		%d0			# check if d0 is zero
   23297 	bne.b		ap_p_el			# if not, get next bit
   23298 	fmul.x		%fp1,%fp0		# mul mantissa by 10**(no_bits_shifted)
   23299 	bra.b		pwrten			# go calc pwrten
   23300 #
   23301 # This section handles a negative adjusted exponent.
   23302 #
   23303 ap_st_n:
   23304 	clr.l		%d1			# clr counter
   23305 	mov.l		&2,%d5			# set up d5 to point to lword 3
   23306 	mov.l		(%a0,%d5.L*4),%d4	# get lword 3
   23307 	bne.b		ap_n_cl			# if not zero, check digits
   23308 	sub.l		&1,%d5			# dec d5 to point to lword 2
   23309 	addq.l		&8,%d1			# inc counter by 8
   23310 	mov.l		(%a0,%d5.L*4),%d4	# get lword 2
   23311 ap_n_cl:
   23312 	mov.l		&28,%d3			# point to last digit
   23313 	mov.l		&7,%d2			# init digit counter
   23314 ap_n_gd:
   23315 	bfextu		%d4{%d3:&4},%d0		# get digit
   23316 	bne.b		ap_n_fx			# if non-zero, go to exp fix
   23317 	subq.l		&4,%d3			# point to previous digit
   23318 	addq.l		&1,%d1			# inc digit counter
   23319 	dbf.w		%d2,ap_n_gd		# get next digit
   23320 ap_n_fx:
   23321 	mov.l		%d1,%d0			# copy counter to d0
   23322 	mov.l		(%sp),%d1		# get adjusted exp from memory
   23323 	sub.l		%d0,%d1			# subtract count from exp
   23324 	bgt.b		ap_n_fm			# if still pos, go fix mantissa
   23325 	neg.l		%d1			# take abs of exp and clr SE
   23326 	mov.l		(%a0),%d4		# load lword 1 to d4
   23327 	and.l		&0xbfffffff,%d4		# and clr SE in d4
   23328 	and.l		&0xbfffffff,(%a0)	# and in memory
   23329 #
   23330 # Calculate the mantissa multiplier to compensate for the appending of
   23331 # zeros to the mantissa.
   23332 #
   23333 ap_n_fm:
   23334 	lea.l		PTENRN(%pc),%a1		# get address of power-of-ten table
   23335 	clr.l		%d3			# init table index
   23336 	fmov.s		&0x3f800000,%fp1	# init fp1 to 1
   23337 	mov.l		&3,%d2			# init d2 to count bits in counter
   23338 ap_n_el:
   23339 	asr.l		&1,%d0			# shift lsb into carry
   23340 	bcc.b		ap_n_en			# if 1, mul fp1 by pwrten factor
   23341 	fmul.x		(%a1,%d3),%fp1		# mul by 10**(d3_bit_no)
   23342 ap_n_en:
   23343 	add.l		&12,%d3			# inc d3 to next rtable entry
   23344 	tst.l		%d0			# check if d0 is zero
   23345 	bne.b		ap_n_el			# if not, get next bit
   23346 	fdiv.x		%fp1,%fp0		# div mantissa by 10**(no_bits_shifted)
   23347 #
   23348 #
   23349 # Calculate power-of-ten factor from adjusted and shifted exponent.
   23350 #
   23351 # Register usage:
   23352 #
   23353 #  pwrten:
   23354 #	(*)  d0: temp
   23355 #	( )  d1: exponent
   23356 #	(*)  d2: {FPCR[6:5],SM,SE} as index in RTABLE; temp
   23357 #	(*)  d3: FPCR work copy
   23358 #	( )  d4: first word of bcd
   23359 #	(*)  a1: RTABLE pointer
   23360 #  calc_p:
   23361 #	(*)  d0: temp
   23362 #	( )  d1: exponent
   23363 #	(*)  d3: PWRTxx table index
   23364 #	( )  a0: pointer to working copy of bcd
   23365 #	(*)  a1: PWRTxx pointer
   23366 #	(*) fp1: power-of-ten accumulator
   23367 #
   23368 # Pwrten calculates the exponent factor in the selected rounding mode
   23369 # according to the following table:
   23370 #
   23371 #	Sign of Mant  Sign of Exp  Rounding Mode  PWRTEN Rounding Mode
   23372 #
   23373 #	ANY	  ANY	RN	RN
   23374 #
   23375 #	 +	   +	RP	RP
   23376 #	 -	   +	RP	RM
   23377 #	 +	   -	RP	RM
   23378 #	 -	   -	RP	RP
   23379 #
   23380 #	 +	   +	RM	RM
   23381 #	 -	   +	RM	RP
   23382 #	 +	   -	RM	RP
   23383 #	 -	   -	RM	RM
   23384 #
   23385 #	 +	   +	RZ	RM
   23386 #	 -	   +	RZ	RM
   23387 #	 +	   -	RZ	RP
   23388 #	 -	   -	RZ	RP
   23389 #
   23390 #
   23391 pwrten:
   23392 	mov.l		USER_FPCR(%a6),%d3	# get user's FPCR
   23393 	bfextu		%d3{&26:&2},%d2		# isolate rounding mode bits
   23394 	mov.l		(%a0),%d4		# reload 1st bcd word to d4
   23395 	asl.l		&2,%d2			# format d2 to be
   23396 	bfextu		%d4{&0:&2},%d0		# {FPCR[6],FPCR[5],SM,SE}
   23397 	add.l		%d0,%d2			# in d2 as index into RTABLE
   23398 	lea.l		RTABLE(%pc),%a1		# load rtable base
   23399 	mov.b		(%a1,%d2),%d0		# load new rounding bits from table
   23400 	clr.l		%d3			# clear d3 to force no exc and extended
   23401 	bfins		%d0,%d3{&26:&2}		# stuff new rounding bits in FPCR
   23402 	fmov.l		%d3,%fpcr		# write new FPCR
   23403 	asr.l		&1,%d0			# write correct PTENxx table
   23404 	bcc.b		not_rp			# to a1
   23405 	lea.l		PTENRP(%pc),%a1		# it is RP
   23406 	bra.b		calc_p			# go to init section
   23407 not_rp:
   23408 	asr.l		&1,%d0			# keep checking
   23409 	bcc.b		not_rm
   23410 	lea.l		PTENRM(%pc),%a1		# it is RM
   23411 	bra.b		calc_p			# go to init section
   23412 not_rm:
   23413 	lea.l		PTENRN(%pc),%a1		# it is RN
   23414 calc_p:
   23415 	mov.l		%d1,%d0			# copy exp to d0;use d0
   23416 	bpl.b		no_neg			# if exp is negative,
   23417 	neg.l		%d0			# invert it
   23418 	or.l		&0x40000000,(%a0)	# and set SE bit
   23419 no_neg:
   23420 	clr.l		%d3			# table index
   23421 	fmov.s		&0x3f800000,%fp1	# init fp1 to 1
   23422 e_loop:
   23423 	asr.l		&1,%d0			# shift next bit into carry
   23424 	bcc.b		e_next			# if zero, skip the mul
   23425 	fmul.x		(%a1,%d3),%fp1		# mul by 10**(d3_bit_no)
   23426 e_next:
   23427 	add.l		&12,%d3			# inc d3 to next rtable entry
   23428 	tst.l		%d0			# check if d0 is zero
   23429 	bne.b		e_loop			# not zero, continue shifting
   23430 #
   23431 #
   23432 #  Check the sign of the adjusted exp and make the value in fp0 the
   23433 #  same sign. If the exp was pos then multiply fp1*fp0;
   23434 #  else divide fp0/fp1.
   23435 #
   23436 # Register Usage:
   23437 #  norm:
   23438 #	( )  a0: pointer to working bcd value
   23439 #	(*) fp0: mantissa accumulator
   23440 #	( ) fp1: scaling factor - 10**(abs(exp))
   23441 #
   23442 pnorm:
   23443 	btst		&30,(%a0)		# test the sign of the exponent
   23444 	beq.b		mul			# if clear, go to multiply
   23445 div:
   23446 	fdiv.x		%fp1,%fp0		# exp is negative, so divide mant by exp
   23447 	bra.b		end_dec
   23448 mul:
   23449 	fmul.x		%fp1,%fp0		# exp is positive, so multiply by exp
   23450 #
   23451 #
   23452 # Clean up and return with result in fp0.
   23453 #
   23454 # If the final mul/div in decbin incurred an inex exception,
   23455 # it will be inex2, but will be reported as inex1 by get_op.
   23456 #
   23457 end_dec:
   23458 	fmov.l		%fpsr,%d0		# get status register
   23459 	bclr		&inex2_bit+8,%d0	# test for inex2 and clear it
   23460 	beq.b		no_exc			# skip this if no exc
   23461 	ori.w		&inx1a_mask,2+USER_FPSR(%a6) # set INEX1/AINEX
   23462 no_exc:
   23463 	add.l		&0x4,%sp		# clear 1 lw param
   23464 	fmovm.x		(%sp)+,&0x40		# restore fp1
   23465 	movm.l		(%sp)+,&0x3c		# restore d2-d5
   23466 	fmov.l		&0x0,%fpcr
   23467 	fmov.l		&0x0,%fpsr
   23468 	rts
   23469 
   23470 #########################################################################
   23471 # bindec(): Converts an input in extended precision format to bcd format#
   23472 #									#
   23473 # INPUT ***************************************************************	#
   23474 #	a0 = pointer to the input extended precision value in memory.	#
   23475 #	     the input may be either normalized, unnormalized, or 	#
   23476 #	     denormalized.						#
   23477 #	d0 = contains the k-factor sign-extended to 32-bits. 		#
   23478 #									#
   23479 # OUTPUT **************************************************************	#
   23480 #	FP_SCR0(a6) = bcd format result on the stack.			#
   23481 #									#
   23482 # ALGORITHM ***********************************************************	#
   23483 #									#
   23484 #	A1.	Set RM and size ext;  Set SIGMA = sign of input.  	#
   23485 #		The k-factor is saved for use in d7. Clear the		#
   23486 #		BINDEC_FLG for separating normalized/denormalized	#
   23487 #		input.  If input is unnormalized or denormalized,	#
   23488 #		normalize it.						#
   23489 #									#
   23490 #	A2.	Set X = abs(input).					#
   23491 #									#
   23492 #	A3.	Compute ILOG.						#
   23493 #		ILOG is the log base 10 of the input value.  It is	#
   23494 #		approximated by adding e + 0.f when the original 	#
   23495 #		value is viewed as 2^^e * 1.f in extended precision.  	#
   23496 #		This value is stored in d6.				#
   23497 #									#
   23498 #	A4.	Clr INEX bit.						#
   23499 #		The operation in A3 above may have set INEX2.  		#
   23500 #									#
   23501 #	A5.	Set ICTR = 0;						#
   23502 #		ICTR is a flag used in A13.  It must be set before the 	#
   23503 #		loop entry A6.						#
   23504 #									#
   23505 #	A6.	Calculate LEN.						#
   23506 #		LEN is the number of digits to be displayed.  The	#
   23507 #		k-factor can dictate either the total number of digits,	#
   23508 #		if it is a positive number, or the number of digits	#
   23509 #		after the decimal point which are to be included as	#
   23510 #		significant.  See the 68882 manual for examples.	#
   23511 #		If LEN is computed to be greater than 17, set OPERR in	#
   23512 #		USER_FPSR.  LEN is stored in d4.			#
   23513 #									#
   23514 #	A7.	Calculate SCALE.					#
   23515 #		SCALE is equal to 10^ISCALE, where ISCALE is the number	#
   23516 #		of decimal places needed to insure LEN integer digits	#
   23517 #		in the output before conversion to bcd. LAMBDA is the	#
   23518 #		sign of ISCALE, used in A9. Fp1 contains		#
   23519 #		10^^(abs(ISCALE)) using a rounding mode which is a	#
   23520 #		function of the original rounding mode and the signs	#
   23521 #		of ISCALE and X.  A table is given in the code.		#
   23522 #									#
   23523 #	A8.	Clr INEX; Force RZ.					#
   23524 #		The operation in A3 above may have set INEX2.  		#
   23525 #		RZ mode is forced for the scaling operation to insure	#
   23526 #		only one rounding error.  The grs bits are collected in #
   23527 #		the INEX flag for use in A10.				#
   23528 #									#
   23529 #	A9.	Scale X -> Y.						#
   23530 #		The mantissa is scaled to the desired number of		#
   23531 #		significant digits.  The excess digits are collected	#
   23532 #		in INEX2.						#
   23533 #									#
   23534 #	A10.	Or in INEX.						#
   23535 #		If INEX is set, round error occurred.  This is		#
   23536 #		compensated for by 'or-ing' in the INEX2 flag to	#
   23537 #		the lsb of Y.						#
   23538 #									#
   23539 #	A11.	Restore original FPCR; set size ext.			#
   23540 #		Perform FINT operation in the user's rounding mode.	#
   23541 #		Keep the size to extended.				#
   23542 #									#
   23543 #	A12.	Calculate YINT = FINT(Y) according to user's rounding	#
   23544 #		mode.  The FPSP routine sintd0 is used.  The output	#
   23545 #		is in fp0.						#
   23546 #									#
   23547 #	A13.	Check for LEN digits.					#
   23548 #		If the int operation results in more than LEN digits,	#
   23549 #		or less than LEN -1 digits, adjust ILOG and repeat from	#
   23550 #		A6.  This test occurs only on the first pass.  If the	#
   23551 #		result is exactly 10^LEN, decrement ILOG and divide	#
   23552 #		the mantissa by 10.					#
   23553 #									#
   23554 #	A14.	Convert the mantissa to bcd.				#
   23555 #		The binstr routine is used to convert the LEN digit 	#
   23556 #		mantissa to bcd in memory.  The input to binstr is	#
   23557 #		to be a fraction; i.e. (mantissa)/10^LEN and adjusted	#
   23558 #		such that the decimal point is to the left of bit 63.	#
   23559 #		The bcd digits are stored in the correct position in 	#
   23560 #		the final string area in memory.			#
   23561 #									#
   23562 #	A15.	Convert the exponent to bcd.				#
   23563 #		As in A14 above, the exp is converted to bcd and the	#
   23564 #		digits are stored in the final string.			#
   23565 #		Test the length of the final exponent string.  If the	#
   23566 #		length is 4, set operr.					#
   23567 #									#
   23568 #	A16.	Write sign bits to final string.			#
   23569 #									#
   23570 #########################################################################
   23571 
   23572 set	BINDEC_FLG,	EXC_TEMP	# DENORM flag
   23573 
   23574 # Constants in extended precision
   23575 PLOG2:
   23576 	long		0x3FFD0000,0x9A209A84,0xFBCFF798,0x00000000
   23577 PLOG2UP1:
   23578 	long		0x3FFD0000,0x9A209A84,0xFBCFF799,0x00000000
   23579 
   23580 # Constants in single precision
   23581 FONE:
   23582 	long		0x3F800000,0x00000000,0x00000000,0x00000000
   23583 FTWO:
   23584 	long		0x40000000,0x00000000,0x00000000,0x00000000
   23585 FTEN:
   23586 	long		0x41200000,0x00000000,0x00000000,0x00000000
   23587 F4933:
   23588 	long		0x459A2800,0x00000000,0x00000000,0x00000000
   23589 
   23590 RBDTBL:
   23591 	byte		0,0,0,0
   23592 	byte		3,3,2,2
   23593 	byte		3,2,2,3
   23594 	byte		2,3,3,2
   23595 
   23596 #	Implementation Notes:
   23597 #
   23598 #	The registers are used as follows:
   23599 #
   23600 #		d0: scratch; LEN input to binstr
   23601 #		d1: scratch
   23602 #		d2: upper 32-bits of mantissa for binstr
   23603 #		d3: scratch;lower 32-bits of mantissa for binstr
   23604 #		d4: LEN
   23605 #      		d5: LAMBDA/ICTR
   23606 #		d6: ILOG
   23607 #		d7: k-factor
   23608 #		a0: ptr for original operand/final result
   23609 #		a1: scratch pointer
   23610 #		a2: pointer to FP_X; abs(original value) in ext
   23611 #		fp0: scratch
   23612 #		fp1: scratch
   23613 #		fp2: scratch
   23614 #		F_SCR1:
   23615 #		F_SCR2:
   23616 #		L_SCR1:
   23617 #		L_SCR2:
   23618 
   23619 	global		bindec
   23620 bindec:
   23621 	movm.l		&0x3f20,-(%sp)	#  {%d2-%d7/%a2}
   23622 	fmovm.x		&0x7,-(%sp)	#  {%fp0-%fp2}
   23623 
   23624 # A1. Set RM and size ext. Set SIGMA = sign input;
   23625 #     The k-factor is saved for use in d7.  Clear BINDEC_FLG for
   23626 #     separating  normalized/denormalized input.  If the input
   23627 #     is a denormalized number, set the BINDEC_FLG memory word
   23628 #     to signal denorm.  If the input is unnormalized, normalize
   23629 #     the input and test for denormalized result.
   23630 #
   23631 	fmov.l		&rm_mode*0x10,%fpcr	# set RM and ext
   23632 	mov.l		(%a0),L_SCR2(%a6)	# save exponent for sign check
   23633 	mov.l		%d0,%d7		# move k-factor to d7
   23634 
   23635 	clr.b		BINDEC_FLG(%a6)	# clr norm/denorm flag
   23636 	cmpi.b		STAG(%a6),&DENORM # is input a DENORM?
   23637 	bne.w		A2_str		# no; input is a NORM
   23638 
   23639 #
   23640 # Normalize the denorm
   23641 #
   23642 un_de_norm:
   23643 	mov.w		(%a0),%d0
   23644 	and.w		&0x7fff,%d0	# strip sign of normalized exp
   23645 	mov.l		4(%a0),%d1
   23646 	mov.l		8(%a0),%d2
   23647 norm_loop:
   23648 	sub.w		&1,%d0
   23649 	lsl.l		&1,%d2
   23650 	roxl.l		&1,%d1
   23651 	tst.l		%d1
   23652 	bge.b		norm_loop
   23653 #
   23654 # Test if the normalized input is denormalized
   23655 #
   23656 	tst.w		%d0
   23657 	bgt.b		pos_exp		# if greater than zero, it is a norm
   23658 	st		BINDEC_FLG(%a6)	# set flag for denorm
   23659 pos_exp:
   23660 	and.w		&0x7fff,%d0	# strip sign of normalized exp
   23661 	mov.w		%d0,(%a0)
   23662 	mov.l		%d1,4(%a0)
   23663 	mov.l		%d2,8(%a0)
   23664 
   23665 # A2. Set X = abs(input).
   23666 #
   23667 A2_str:
   23668 	mov.l		(%a0),FP_SCR1(%a6)	# move input to work space
   23669 	mov.l		4(%a0),FP_SCR1+4(%a6)	# move input to work space
   23670 	mov.l		8(%a0),FP_SCR1+8(%a6)	# move input to work space
   23671 	and.l		&0x7fffffff,FP_SCR1(%a6)	# create abs(X)
   23672 
   23673 # A3. Compute ILOG.
   23674 #     ILOG is the log base 10 of the input value.  It is approx-
   23675 #     imated by adding e + 0.f when the original value is viewed
   23676 #     as 2^^e * 1.f in extended precision.  This value is stored
   23677 #     in d6.
   23678 #
   23679 # Register usage:
   23680 #	Input/Output
   23681 #	d0: k-factor/exponent
   23682 #	d2: x/x
   23683 #	d3: x/x
   23684 #	d4: x/x
   23685 #	d5: x/x
   23686 #	d6: x/ILOG
   23687 #	d7: k-factor/Unchanged
   23688 #	a0: ptr for original operand/final result
   23689 #	a1: x/x
   23690 #	a2: x/x
   23691 #	fp0: x/float(ILOG)
   23692 #	fp1: x/x
   23693 #	fp2: x/x
   23694 #	F_SCR1:x/x
   23695 #	F_SCR2:Abs(X)/Abs(X) with $3fff exponent
   23696 #	L_SCR1:x/x
   23697 #	L_SCR2:first word of X packed/Unchanged
   23698 
   23699 	tst.b		BINDEC_FLG(%a6)	# check for denorm
   23700 	beq.b		A3_cont		# if clr, continue with norm
   23701 	mov.l		&-4933,%d6	# force ILOG = -4933
   23702 	bra.b		A4_str
   23703 A3_cont:
   23704 	mov.w		FP_SCR1(%a6),%d0	# move exp to d0
   23705 	mov.w		&0x3fff,FP_SCR1(%a6)	# replace exponent with 0x3fff
   23706 	fmov.x		FP_SCR1(%a6),%fp0	# now fp0 has 1.f
   23707 	sub.w		&0x3fff,%d0	# strip off bias
   23708 	fadd.w		%d0,%fp0	# add in exp
   23709 	fsub.s		FONE(%pc),%fp0	# subtract off 1.0
   23710 	fbge.w		pos_res		# if pos, branch
   23711 	fmul.x		PLOG2UP1(%pc),%fp0	# if neg, mul by LOG2UP1
   23712 	fmov.l		%fp0,%d6	# put ILOG in d6 as a lword
   23713 	bra.b		A4_str		# go move out ILOG
   23714 pos_res:
   23715 	fmul.x		PLOG2(%pc),%fp0	# if pos, mul by LOG2
   23716 	fmov.l		%fp0,%d6	# put ILOG in d6 as a lword
   23717 
   23718 
   23719 # A4. Clr INEX bit.
   23720 #     The operation in A3 above may have set INEX2.
   23721 
   23722 A4_str:
   23723 	fmov.l		&0,%fpsr	# zero all of fpsr - nothing needed
   23724 
   23725 
   23726 # A5. Set ICTR = 0;
   23727 #     ICTR is a flag used in A13.  It must be set before the
   23728 #     loop entry A6. The lower word of d5 is used for ICTR.
   23729 
   23730 	clr.w		%d5		# clear ICTR
   23731 
   23732 # A6. Calculate LEN.
   23733 #     LEN is the number of digits to be displayed.  The k-factor
   23734 #     can dictate either the total number of digits, if it is
   23735 #     a positive number, or the number of digits after the
   23736 #     original decimal point which are to be included as
   23737 #     significant.  See the 68882 manual for examples.
   23738 #     If LEN is computed to be greater than 17, set OPERR in
   23739 #     USER_FPSR.  LEN is stored in d4.
   23740 #
   23741 # Register usage:
   23742 #	Input/Output
   23743 #	d0: exponent/Unchanged
   23744 #	d2: x/x/scratch
   23745 #	d3: x/x
   23746 #	d4: exc picture/LEN
   23747 #	d5: ICTR/Unchanged
   23748 #	d6: ILOG/Unchanged
   23749 #	d7: k-factor/Unchanged
   23750 #	a0: ptr for original operand/final result
   23751 #	a1: x/x
   23752 #	a2: x/x
   23753 #	fp0: float(ILOG)/Unchanged
   23754 #	fp1: x/x
   23755 #	fp2: x/x
   23756 #	F_SCR1:x/x
   23757 #	F_SCR2:Abs(X) with $3fff exponent/Unchanged
   23758 #	L_SCR1:x/x
   23759 #	L_SCR2:first word of X packed/Unchanged
   23760 
   23761 A6_str:
   23762 	tst.l		%d7		# branch on sign of k
   23763 	ble.b		k_neg		# if k <= 0, LEN = ILOG + 1 - k
   23764 	mov.l		%d7,%d4		# if k > 0, LEN = k
   23765 	bra.b		len_ck		# skip to LEN check
   23766 k_neg:
   23767 	mov.l		%d6,%d4		# first load ILOG to d4
   23768 	sub.l		%d7,%d4		# subtract off k
   23769 	addq.l		&1,%d4		# add in the 1
   23770 len_ck:
   23771 	tst.l		%d4		# LEN check: branch on sign of LEN
   23772 	ble.b		LEN_ng		# if neg, set LEN = 1
   23773 	cmp.l		%d4,&17		# test if LEN > 17
   23774 	ble.b		A7_str		# if not, forget it
   23775 	mov.l		&17,%d4		# set max LEN = 17
   23776 	tst.l		%d7		# if negative, never set OPERR
   23777 	ble.b		A7_str		# if positive, continue
   23778 	or.l		&opaop_mask,USER_FPSR(%a6)	# set OPERR & AIOP in USER_FPSR
   23779 	bra.b		A7_str		# finished here
   23780 LEN_ng:
   23781 	mov.l		&1,%d4		# min LEN is 1
   23782 
   23783 
   23784 # A7. Calculate SCALE.
   23785 #     SCALE is equal to 10^ISCALE, where ISCALE is the number
   23786 #     of decimal places needed to insure LEN integer digits
   23787 #     in the output before conversion to bcd. LAMBDA is the sign
   23788 #     of ISCALE, used in A9.  Fp1 contains 10^^(abs(ISCALE)) using
   23789 #     the rounding mode as given in the following table (see
   23790 #     Coonen, p. 7.23 as ref.; however, the SCALE variable is
   23791 #     of opposite sign in bindec.sa from Coonen).
   23792 #
   23793 #	Initial					USE
   23794 #	FPCR[6:5]	LAMBDA	SIGN(X)		FPCR[6:5]
   23795 #	----------------------------------------------
   23796 #	 RN	00	   0	   0		00/0	RN
   23797 #	 RN	00	   0	   1		00/0	RN
   23798 #	 RN	00	   1	   0		00/0	RN
   23799 #	 RN	00	   1	   1		00/0	RN
   23800 #	 RZ	01	   0	   0		11/3	RP
   23801 #	 RZ	01	   0	   1		11/3	RP
   23802 #	 RZ	01	   1	   0		10/2	RM
   23803 #	 RZ	01	   1	   1		10/2	RM
   23804 #	 RM	10	   0	   0		11/3	RP
   23805 #	 RM	10	   0	   1		10/2	RM
   23806 #	 RM	10	   1	   0		10/2	RM
   23807 #	 RM	10	   1	   1		11/3	RP
   23808 #	 RP	11	   0	   0		10/2	RM
   23809 #	 RP	11	   0	   1		11/3	RP
   23810 #	 RP	11	   1	   0		11/3	RP
   23811 #	 RP	11	   1	   1		10/2	RM
   23812 #
   23813 # Register usage:
   23814 #	Input/Output
   23815 #	d0: exponent/scratch - final is 0
   23816 #	d2: x/0 or 24 for A9
   23817 #	d3: x/scratch - offset ptr into PTENRM array
   23818 #	d4: LEN/Unchanged
   23819 #	d5: 0/ICTR:LAMBDA
   23820 #	d6: ILOG/ILOG or k if ((k<=0)&(ILOG<k))
   23821 #	d7: k-factor/Unchanged
   23822 #	a0: ptr for original operand/final result
   23823 #	a1: x/ptr to PTENRM array
   23824 #	a2: x/x
   23825 #	fp0: float(ILOG)/Unchanged
   23826 #	fp1: x/10^ISCALE
   23827 #	fp2: x/x
   23828 #	F_SCR1:x/x
   23829 #	F_SCR2:Abs(X) with $3fff exponent/Unchanged
   23830 #	L_SCR1:x/x
   23831 #	L_SCR2:first word of X packed/Unchanged
   23832 
   23833 A7_str:
   23834 	tst.l		%d7		# test sign of k
   23835 	bgt.b		k_pos		# if pos and > 0, skip this
   23836 	cmp.l		%d7,%d6		# test k - ILOG
   23837 	blt.b		k_pos		# if ILOG >= k, skip this
   23838 	mov.l		%d7,%d6		# if ((k<0) & (ILOG < k)) ILOG = k
   23839 k_pos:
   23840 	mov.l		%d6,%d0		# calc ILOG + 1 - LEN in d0
   23841 	addq.l		&1,%d0		# add the 1
   23842 	sub.l		%d4,%d0		# sub off LEN
   23843 	swap		%d5		# use upper word of d5 for LAMBDA
   23844 	clr.w		%d5		# set it zero initially
   23845 	clr.w		%d2		# set up d2 for very small case
   23846 	tst.l		%d0		# test sign of ISCALE
   23847 	bge.b		iscale		# if pos, skip next inst
   23848 	addq.w		&1,%d5		# if neg, set LAMBDA true
   23849 	cmp.l		%d0,&0xffffecd4	# test iscale <= -4908
   23850 	bgt.b		no_inf		# if false, skip rest
   23851 	add.l		&24,%d0		# add in 24 to iscale
   23852 	mov.l		&24,%d2		# put 24 in d2 for A9
   23853 no_inf:
   23854 	neg.l		%d0		# and take abs of ISCALE
   23855 iscale:
   23856 	fmov.s		FONE(%pc),%fp1	# init fp1 to 1
   23857 	bfextu		USER_FPCR(%a6){&26:&2},%d1	# get initial rmode bits
   23858 	lsl.w		&1,%d1		# put them in bits 2:1
   23859 	add.w		%d5,%d1		# add in LAMBDA
   23860 	lsl.w		&1,%d1		# put them in bits 3:1
   23861 	tst.l		L_SCR2(%a6)	# test sign of original x
   23862 	bge.b		x_pos		# if pos, don't set bit 0
   23863 	addq.l		&1,%d1		# if neg, set bit 0
   23864 x_pos:
   23865 	lea.l		RBDTBL(%pc),%a2	# load rbdtbl base
   23866 	mov.b		(%a2,%d1),%d3	# load d3 with new rmode
   23867 	lsl.l		&4,%d3		# put bits in proper position
   23868 	fmov.l		%d3,%fpcr	# load bits into fpu
   23869 	lsr.l		&4,%d3		# put bits in proper position
   23870 	tst.b		%d3		# decode new rmode for pten table
   23871 	bne.b		not_rn		# if zero, it is RN
   23872 	lea.l		PTENRN(%pc),%a1	# load a1 with RN table base
   23873 	bra.b		rmode		# exit decode
   23874 not_rn:
   23875 	lsr.b		&1,%d3		# get lsb in carry
   23876 	bcc.b		not_rp2		# if carry clear, it is RM
   23877 	lea.l		PTENRP(%pc),%a1	# load a1 with RP table base
   23878 	bra.b		rmode		# exit decode
   23879 not_rp2:
   23880 	lea.l		PTENRM(%pc),%a1	# load a1 with RM table base
   23881 rmode:
   23882 	clr.l		%d3		# clr table index
   23883 e_loop2:
   23884 	lsr.l		&1,%d0		# shift next bit into carry
   23885 	bcc.b		e_next2		# if zero, skip the mul
   23886 	fmul.x		(%a1,%d3),%fp1	# mul by 10**(d3_bit_no)
   23887 e_next2:
   23888 	add.l		&12,%d3		# inc d3 to next pwrten table entry
   23889 	tst.l		%d0		# test if ISCALE is zero
   23890 	bne.b		e_loop2		# if not, loop
   23891 
   23892 # A8. Clr INEX; Force RZ.
   23893 #     The operation in A3 above may have set INEX2.
   23894 #     RZ mode is forced for the scaling operation to insure
   23895 #     only one rounding error.  The grs bits are collected in
   23896 #     the INEX flag for use in A10.
   23897 #
   23898 # Register usage:
   23899 #	Input/Output
   23900 
   23901 	fmov.l		&0,%fpsr	# clr INEX
   23902 	fmov.l		&rz_mode*0x10,%fpcr	# set RZ rounding mode
   23903 
   23904 # A9. Scale X -> Y.
   23905 #     The mantissa is scaled to the desired number of significant
   23906 #     digits.  The excess digits are collected in INEX2. If mul,
   23907 #     Check d2 for excess 10 exponential value.  If not zero,
   23908 #     the iscale value would have caused the pwrten calculation
   23909 #     to overflow.  Only a negative iscale can cause this, so
   23910 #     multiply by 10^(d2), which is now only allowed to be 24,
   23911 #     with a multiply by 10^8 and 10^16, which is exact since
   23912 #     10^24 is exact.  If the input was denormalized, we must
   23913 #     create a busy stack frame with the mul command and the
   23914 #     two operands, and allow the fpu to complete the multiply.
   23915 #
   23916 # Register usage:
   23917 #	Input/Output
   23918 #	d0: FPCR with RZ mode/Unchanged
   23919 #	d2: 0 or 24/unchanged
   23920 #	d3: x/x
   23921 #	d4: LEN/Unchanged
   23922 #	d5: ICTR:LAMBDA
   23923 #	d6: ILOG/Unchanged
   23924 #	d7: k-factor/Unchanged
   23925 #	a0: ptr for original operand/final result
   23926 #	a1: ptr to PTENRM array/Unchanged
   23927 #	a2: x/x
   23928 #	fp0: float(ILOG)/X adjusted for SCALE (Y)
   23929 #	fp1: 10^ISCALE/Unchanged
   23930 #	fp2: x/x
   23931 #	F_SCR1:x/x
   23932 #	F_SCR2:Abs(X) with $3fff exponent/Unchanged
   23933 #	L_SCR1:x/x
   23934 #	L_SCR2:first word of X packed/Unchanged
   23935 
   23936 A9_str:
   23937 	fmov.x		(%a0),%fp0	# load X from memory
   23938 	fabs.x		%fp0		# use abs(X)
   23939 	tst.w		%d5		# LAMBDA is in lower word of d5
   23940 	bne.b		sc_mul		# if neg (LAMBDA = 1), scale by mul
   23941 	fdiv.x		%fp1,%fp0	# calculate X / SCALE -> Y to fp0
   23942 	bra.w		A10_st		# branch to A10
   23943 
   23944 sc_mul:
   23945 	tst.b		BINDEC_FLG(%a6)	# check for denorm
   23946 	beq.w		A9_norm		# if norm, continue with mul
   23947 
   23948 # for DENORM, we must calculate:
   23949 #	fp0 = input_op * 10^ISCALE * 10^24
   23950 # since the input operand is a DENORM, we can't multiply it directly.
   23951 # so, we do the multiplication of the exponents and mantissas separately.
   23952 # in this way, we avoid underflow on intermediate stages of the
   23953 # multiplication and guarantee a result without exception.
   23954 	fmovm.x		&0x2,-(%sp)	# save 10^ISCALE to stack
   23955 
   23956 	mov.w		(%sp),%d3	# grab exponent
   23957 	andi.w		&0x7fff,%d3	# clear sign
   23958 	ori.w		&0x8000,(%a0)	# make DENORM exp negative
   23959 	add.w		(%a0),%d3	# add DENORM exp to 10^ISCALE exp
   23960 	subi.w		&0x3fff,%d3	# subtract BIAS
   23961 	add.w		36(%a1),%d3
   23962 	subi.w		&0x3fff,%d3	# subtract BIAS
   23963 	add.w		48(%a1),%d3
   23964 	subi.w		&0x3fff,%d3	# subtract BIAS
   23965 
   23966 	bmi.w		sc_mul_err	# is result is DENORM, punt!!!
   23967 
   23968 	andi.w		&0x8000,(%sp)	# keep sign
   23969 	or.w		%d3,(%sp)	# insert new exponent
   23970 	andi.w		&0x7fff,(%a0)	# clear sign bit on DENORM again
   23971 	mov.l		0x8(%a0),-(%sp) # put input op mantissa on stk
   23972 	mov.l		0x4(%a0),-(%sp)
   23973 	mov.l		&0x3fff0000,-(%sp) # force exp to zero
   23974 	fmovm.x		(%sp)+,&0x80	# load normalized DENORM into fp0
   23975 	fmul.x		(%sp)+,%fp0
   23976 
   23977 #	fmul.x	36(%a1),%fp0	# multiply fp0 by 10^8
   23978 #	fmul.x	48(%a1),%fp0	# multiply fp0 by 10^16
   23979 	mov.l		36+8(%a1),-(%sp) # get 10^8 mantissa
   23980 	mov.l		36+4(%a1),-(%sp)
   23981 	mov.l		&0x3fff0000,-(%sp) # force exp to zero
   23982 	mov.l		48+8(%a1),-(%sp) # get 10^16 mantissa
   23983 	mov.l		48+4(%a1),-(%sp)
   23984 	mov.l		&0x3fff0000,-(%sp)# force exp to zero
   23985 	fmul.x		(%sp)+,%fp0	# multiply fp0 by 10^8
   23986 	fmul.x		(%sp)+,%fp0	# multiply fp0 by 10^16
   23987 	bra.b		A10_st
   23988 
   23989 sc_mul_err:
   23990 	bra.b		sc_mul_err
   23991 
   23992 A9_norm:
   23993 	tst.w		%d2		# test for small exp case
   23994 	beq.b		A9_con		# if zero, continue as normal
   23995 	fmul.x		36(%a1),%fp0	# multiply fp0 by 10^8
   23996 	fmul.x		48(%a1),%fp0	# multiply fp0 by 10^16
   23997 A9_con:
   23998 	fmul.x		%fp1,%fp0	# calculate X * SCALE -> Y to fp0
   23999 
   24000 # A10. Or in INEX.
   24001 #      If INEX is set, round error occurred.  This is compensated
   24002 #      for by 'or-ing' in the INEX2 flag to the lsb of Y.
   24003 #
   24004 # Register usage:
   24005 #	Input/Output
   24006 #	d0: FPCR with RZ mode/FPSR with INEX2 isolated
   24007 #	d2: x/x
   24008 #	d3: x/x
   24009 #	d4: LEN/Unchanged
   24010 #	d5: ICTR:LAMBDA
   24011 #	d6: ILOG/Unchanged
   24012 #	d7: k-factor/Unchanged
   24013 #	a0: ptr for original operand/final result
   24014 #	a1: ptr to PTENxx array/Unchanged
   24015 #	a2: x/ptr to FP_SCR1(a6)
   24016 #	fp0: Y/Y with lsb adjusted
   24017 #	fp1: 10^ISCALE/Unchanged
   24018 #	fp2: x/x
   24019 
   24020 A10_st:
   24021 	fmov.l		%fpsr,%d0	# get FPSR
   24022 	fmov.x		%fp0,FP_SCR1(%a6)	# move Y to memory
   24023 	lea.l		FP_SCR1(%a6),%a2	# load a2 with ptr to FP_SCR1
   24024 	btst		&9,%d0		# check if INEX2 set
   24025 	beq.b		A11_st		# if clear, skip rest
   24026 	or.l		&1,8(%a2)	# or in 1 to lsb of mantissa
   24027 	fmov.x		FP_SCR1(%a6),%fp0	# write adjusted Y back to fpu
   24028 
   24029 
   24030 # A11. Restore original FPCR; set size ext.
   24031 #      Perform FINT operation in the user's rounding mode.  Keep
   24032 #      the size to extended.  The sintdo entry point in the sint
   24033 #      routine expects the FPCR value to be in USER_FPCR for
   24034 #      mode and precision.  The original FPCR is saved in L_SCR1.
   24035 
   24036 A11_st:
   24037 	mov.l		USER_FPCR(%a6),L_SCR1(%a6)	# save it for later
   24038 	and.l		&0x00000030,USER_FPCR(%a6)	# set size to ext,
   24039 #					;block exceptions
   24040 
   24041 
   24042 # A12. Calculate YINT = FINT(Y) according to user's rounding mode.
   24043 #      The FPSP routine sintd0 is used.  The output is in fp0.
   24044 #
   24045 # Register usage:
   24046 #	Input/Output
   24047 #	d0: FPSR with AINEX cleared/FPCR with size set to ext
   24048 #	d2: x/x/scratch
   24049 #	d3: x/x
   24050 #	d4: LEN/Unchanged
   24051 #	d5: ICTR:LAMBDA/Unchanged
   24052 #	d6: ILOG/Unchanged
   24053 #	d7: k-factor/Unchanged
   24054 #	a0: ptr for original operand/src ptr for sintdo
   24055 #	a1: ptr to PTENxx array/Unchanged
   24056 #	a2: ptr to FP_SCR1(a6)/Unchanged
   24057 #	a6: temp pointer to FP_SCR1(a6) - orig value saved and restored
   24058 #	fp0: Y/YINT
   24059 #	fp1: 10^ISCALE/Unchanged
   24060 #	fp2: x/x
   24061 #	F_SCR1:x/x
   24062 #	F_SCR2:Y adjusted for inex/Y with original exponent
   24063 #	L_SCR1:x/original USER_FPCR
   24064 #	L_SCR2:first word of X packed/Unchanged
   24065 
   24066 A12_st:
   24067 	movm.l	&0xc0c0,-(%sp)	# save regs used by sintd0	 {%d0-%d1/%a0-%a1}
   24068 	mov.l	L_SCR1(%a6),-(%sp)
   24069 	mov.l	L_SCR2(%a6),-(%sp)
   24070 
   24071 	lea.l		FP_SCR1(%a6),%a0	# a0 is ptr to FP_SCR1(a6)
   24072 	fmov.x		%fp0,(%a0)	# move Y to memory at FP_SCR1(a6)
   24073 	tst.l		L_SCR2(%a6)	# test sign of original operand
   24074 	bge.b		do_fint12		# if pos, use Y
   24075 	or.l		&0x80000000,(%a0)	# if neg, use -Y
   24076 do_fint12:
   24077 	mov.l	USER_FPSR(%a6),-(%sp)
   24078 #	bsr	sintdo		# sint routine returns int in fp0
   24079 
   24080 	fmov.l	USER_FPCR(%a6),%fpcr
   24081 	fmov.l	&0x0,%fpsr			# clear the AEXC bits!!!
   24082 ##	mov.l		USER_FPCR(%a6),%d0	# ext prec/keep rnd mode
   24083 ##	andi.l		&0x00000030,%d0
   24084 ##	fmov.l		%d0,%fpcr
   24085 	fint.x		FP_SCR1(%a6),%fp0	# do fint()
   24086 	fmov.l	%fpsr,%d0
   24087 	or.w	%d0,FPSR_EXCEPT(%a6)
   24088 ##	fmov.l		&0x0,%fpcr
   24089 ##	fmov.l		%fpsr,%d0		# don't keep ccodes
   24090 ##	or.w		%d0,FPSR_EXCEPT(%a6)
   24091 
   24092 	mov.b	(%sp),USER_FPSR(%a6)
   24093 	add.l	&4,%sp
   24094 
   24095 	mov.l	(%sp)+,L_SCR2(%a6)
   24096 	mov.l	(%sp)+,L_SCR1(%a6)
   24097 	movm.l	(%sp)+,&0x303	# restore regs used by sint	 {%d0-%d1/%a0-%a1}
   24098 
   24099 	mov.l	L_SCR2(%a6),FP_SCR1(%a6)	# restore original exponent
   24100 	mov.l	L_SCR1(%a6),USER_FPCR(%a6)	# restore user's FPCR
   24101 
   24102 # A13. Check for LEN digits.
   24103 #      If the int operation results in more than LEN digits,
   24104 #      or less than LEN -1 digits, adjust ILOG and repeat from
   24105 #      A6.  This test occurs only on the first pass.  If the
   24106 #      result is exactly 10^LEN, decrement ILOG and divide
   24107 #      the mantissa by 10.  The calculation of 10^LEN cannot
   24108 #      be inexact, since all powers of ten upto 10^27 are exact
   24109 #      in extended precision, so the use of a previous power-of-ten
   24110 #      table will introduce no error.
   24111 #
   24112 #
   24113 # Register usage:
   24114 #	Input/Output
   24115 #	d0: FPCR with size set to ext/scratch final = 0
   24116 #	d2: x/x
   24117 #	d3: x/scratch final = x
   24118 #	d4: LEN/LEN adjusted
   24119 #	d5: ICTR:LAMBDA/LAMBDA:ICTR
   24120 #	d6: ILOG/ILOG adjusted
   24121 #	d7: k-factor/Unchanged
   24122 #	a0: pointer into memory for packed bcd string formation
   24123 #	a1: ptr to PTENxx array/Unchanged
   24124 #	a2: ptr to FP_SCR1(a6)/Unchanged
   24125 #	fp0: int portion of Y/abs(YINT) adjusted
   24126 #	fp1: 10^ISCALE/Unchanged
   24127 #	fp2: x/10^LEN
   24128 #	F_SCR1:x/x
   24129 #	F_SCR2:Y with original exponent/Unchanged
   24130 #	L_SCR1:original USER_FPCR/Unchanged
   24131 #	L_SCR2:first word of X packed/Unchanged
   24132 
   24133 A13_st:
   24134 	swap		%d5		# put ICTR in lower word of d5
   24135 	tst.w		%d5		# check if ICTR = 0
   24136 	bne		not_zr		# if non-zero, go to second test
   24137 #
   24138 # Compute 10^(LEN-1)
   24139 #
   24140 	fmov.s		FONE(%pc),%fp2	# init fp2 to 1.0
   24141 	mov.l		%d4,%d0		# put LEN in d0
   24142 	subq.l		&1,%d0		# d0 = LEN -1
   24143 	clr.l		%d3		# clr table index
   24144 l_loop:
   24145 	lsr.l		&1,%d0		# shift next bit into carry
   24146 	bcc.b		l_next		# if zero, skip the mul
   24147 	fmul.x		(%a1,%d3),%fp2	# mul by 10**(d3_bit_no)
   24148 l_next:
   24149 	add.l		&12,%d3		# inc d3 to next pwrten table entry
   24150 	tst.l		%d0		# test if LEN is zero
   24151 	bne.b		l_loop		# if not, loop
   24152 #
   24153 # 10^LEN-1 is computed for this test and A14.  If the input was
   24154 # denormalized, check only the case in which YINT > 10^LEN.
   24155 #
   24156 	tst.b		BINDEC_FLG(%a6)	# check if input was norm
   24157 	beq.b		A13_con		# if norm, continue with checking
   24158 	fabs.x		%fp0		# take abs of YINT
   24159 	bra		test_2
   24160 #
   24161 # Compare abs(YINT) to 10^(LEN-1) and 10^LEN
   24162 #
   24163 A13_con:
   24164 	fabs.x		%fp0		# take abs of YINT
   24165 	fcmp.x		%fp0,%fp2	# compare abs(YINT) with 10^(LEN-1)
   24166 	fbge.w		test_2		# if greater, do next test
   24167 	subq.l		&1,%d6		# subtract 1 from ILOG
   24168 	mov.w		&1,%d5		# set ICTR
   24169 	fmov.l		&rm_mode*0x10,%fpcr	# set rmode to RM
   24170 	fmul.s		FTEN(%pc),%fp2	# compute 10^LEN
   24171 	bra.w		A6_str		# return to A6 and recompute YINT
   24172 test_2:
   24173 	fmul.s		FTEN(%pc),%fp2	# compute 10^LEN
   24174 	fcmp.x		%fp0,%fp2	# compare abs(YINT) with 10^LEN
   24175 	fblt.w		A14_st		# if less, all is ok, go to A14
   24176 	fbgt.w		fix_ex		# if greater, fix and redo
   24177 	fdiv.s		FTEN(%pc),%fp0	# if equal, divide by 10
   24178 	addq.l		&1,%d6		# and inc ILOG
   24179 	bra.b		A14_st		# and continue elsewhere
   24180 fix_ex:
   24181 	addq.l		&1,%d6		# increment ILOG by 1
   24182 	mov.w		&1,%d5		# set ICTR
   24183 	fmov.l		&rm_mode*0x10,%fpcr	# set rmode to RM
   24184 	bra.w		A6_str		# return to A6 and recompute YINT
   24185 #
   24186 # Since ICTR <> 0, we have already been through one adjustment,
   24187 # and shouldn't have another; this is to check if abs(YINT) = 10^LEN
   24188 # 10^LEN is again computed using whatever table is in a1 since the
   24189 # value calculated cannot be inexact.
   24190 #
   24191 not_zr:
   24192 	fmov.s		FONE(%pc),%fp2	# init fp2 to 1.0
   24193 	mov.l		%d4,%d0		# put LEN in d0
   24194 	clr.l		%d3		# clr table index
   24195 z_loop:
   24196 	lsr.l		&1,%d0		# shift next bit into carry
   24197 	bcc.b		z_next		# if zero, skip the mul
   24198 	fmul.x		(%a1,%d3),%fp2	# mul by 10**(d3_bit_no)
   24199 z_next:
   24200 	add.l		&12,%d3		# inc d3 to next pwrten table entry
   24201 	tst.l		%d0		# test if LEN is zero
   24202 	bne.b		z_loop		# if not, loop
   24203 	fabs.x		%fp0		# get abs(YINT)
   24204 	fcmp.x		%fp0,%fp2	# check if abs(YINT) = 10^LEN
   24205 	fbneq.w		A14_st		# if not, skip this
   24206 	fdiv.s		FTEN(%pc),%fp0	# divide abs(YINT) by 10
   24207 	addq.l		&1,%d6		# and inc ILOG by 1
   24208 	addq.l		&1,%d4		# and inc LEN
   24209 	fmul.s		FTEN(%pc),%fp2	# if LEN++, the get 10^^LEN
   24210 
   24211 # A14. Convert the mantissa to bcd.
   24212 #      The binstr routine is used to convert the LEN digit
   24213 #      mantissa to bcd in memory.  The input to binstr is
   24214 #      to be a fraction; i.e. (mantissa)/10^LEN and adjusted
   24215 #      such that the decimal point is to the left of bit 63.
   24216 #      The bcd digits are stored in the correct position in
   24217 #      the final string area in memory.
   24218 #
   24219 #
   24220 # Register usage:
   24221 #	Input/Output
   24222 #	d0: x/LEN call to binstr - final is 0
   24223 #	d1: x/0
   24224 #	d2: x/ms 32-bits of mant of abs(YINT)
   24225 #	d3: x/ls 32-bits of mant of abs(YINT)
   24226 #	d4: LEN/Unchanged
   24227 #	d5: ICTR:LAMBDA/LAMBDA:ICTR
   24228 #	d6: ILOG
   24229 #	d7: k-factor/Unchanged
   24230 #	a0: pointer into memory for packed bcd string formation
   24231 #	    /ptr to first mantissa byte in result string
   24232 #	a1: ptr to PTENxx array/Unchanged
   24233 #	a2: ptr to FP_SCR1(a6)/Unchanged
   24234 #	fp0: int portion of Y/abs(YINT) adjusted
   24235 #	fp1: 10^ISCALE/Unchanged
   24236 #	fp2: 10^LEN/Unchanged
   24237 #	F_SCR1:x/Work area for final result
   24238 #	F_SCR2:Y with original exponent/Unchanged
   24239 #	L_SCR1:original USER_FPCR/Unchanged
   24240 #	L_SCR2:first word of X packed/Unchanged
   24241 
   24242 A14_st:
   24243 	fmov.l		&rz_mode*0x10,%fpcr	# force rz for conversion
   24244 	fdiv.x		%fp2,%fp0	# divide abs(YINT) by 10^LEN
   24245 	lea.l		FP_SCR0(%a6),%a0
   24246 	fmov.x		%fp0,(%a0)	# move abs(YINT)/10^LEN to memory
   24247 	mov.l		4(%a0),%d2	# move 2nd word of FP_RES to d2
   24248 	mov.l		8(%a0),%d3	# move 3rd word of FP_RES to d3
   24249 	clr.l		4(%a0)		# zero word 2 of FP_RES
   24250 	clr.l		8(%a0)		# zero word 3 of FP_RES
   24251 	mov.l		(%a0),%d0	# move exponent to d0
   24252 	swap		%d0		# put exponent in lower word
   24253 	beq.b		no_sft		# if zero, don't shift
   24254 	sub.l		&0x3ffd,%d0	# sub bias less 2 to make fract
   24255 	tst.l		%d0		# check if > 1
   24256 	bgt.b		no_sft		# if so, don't shift
   24257 	neg.l		%d0		# make exp positive
   24258 m_loop:
   24259 	lsr.l		&1,%d2		# shift d2:d3 right, add 0s
   24260 	roxr.l		&1,%d3		# the number of places
   24261 	dbf.w		%d0,m_loop	# given in d0
   24262 no_sft:
   24263 	tst.l		%d2		# check for mantissa of zero
   24264 	bne.b		no_zr		# if not, go on
   24265 	tst.l		%d3		# continue zero check
   24266 	beq.b		zer_m		# if zero, go directly to binstr
   24267 no_zr:
   24268 	clr.l		%d1		# put zero in d1 for addx
   24269 	add.l		&0x00000080,%d3	# inc at bit 7
   24270 	addx.l		%d1,%d2		# continue inc
   24271 	and.l		&0xffffff80,%d3	# strip off lsb not used by 882
   24272 zer_m:
   24273 	mov.l		%d4,%d0		# put LEN in d0 for binstr call
   24274 	addq.l		&3,%a0		# a0 points to M16 byte in result
   24275 	bsr		binstr		# call binstr to convert mant
   24276 
   24277 
   24278 # A15. Convert the exponent to bcd.
   24279 #      As in A14 above, the exp is converted to bcd and the
   24280 #      digits are stored in the final string.
   24281 #
   24282 #      Digits are stored in L_SCR1(a6) on return from BINDEC as:
   24283 #
   24284 #  	 32               16 15                0
   24285 #	-----------------------------------------
   24286 #  	|  0 | e3 | e2 | e1 | e4 |  X |  X |  X |
   24287 #	-----------------------------------------
   24288 #
   24289 # And are moved into their proper places in FP_SCR0.  If digit e4
   24290 # is non-zero, OPERR is signaled.  In all cases, all 4 digits are
   24291 # written as specified in the 881/882 manual for packed decimal.
   24292 #
   24293 # Register usage:
   24294 #	Input/Output
   24295 #	d0: x/LEN call to binstr - final is 0
   24296 #	d1: x/scratch (0);shift count for final exponent packing
   24297 #	d2: x/ms 32-bits of exp fraction/scratch
   24298 #	d3: x/ls 32-bits of exp fraction
   24299 #	d4: LEN/Unchanged
   24300 #	d5: ICTR:LAMBDA/LAMBDA:ICTR
   24301 #	d6: ILOG
   24302 #	d7: k-factor/Unchanged
   24303 #	a0: ptr to result string/ptr to L_SCR1(a6)
   24304 #	a1: ptr to PTENxx array/Unchanged
   24305 #	a2: ptr to FP_SCR1(a6)/Unchanged
   24306 #	fp0: abs(YINT) adjusted/float(ILOG)
   24307 #	fp1: 10^ISCALE/Unchanged
   24308 #	fp2: 10^LEN/Unchanged
   24309 #	F_SCR1:Work area for final result/BCD result
   24310 #	F_SCR2:Y with original exponent/ILOG/10^4
   24311 #	L_SCR1:original USER_FPCR/Exponent digits on return from binstr
   24312 #	L_SCR2:first word of X packed/Unchanged
   24313 
   24314 A15_st:
   24315 	tst.b		BINDEC_FLG(%a6)	# check for denorm
   24316 	beq.b		not_denorm
   24317 	ftest.x		%fp0		# test for zero
   24318 	fbeq.w		den_zero	# if zero, use k-factor or 4933
   24319 	fmov.l		%d6,%fp0	# float ILOG
   24320 	fabs.x		%fp0		# get abs of ILOG
   24321 	bra.b		convrt
   24322 den_zero:
   24323 	tst.l		%d7		# check sign of the k-factor
   24324 	blt.b		use_ilog	# if negative, use ILOG
   24325 	fmov.s		F4933(%pc),%fp0	# force exponent to 4933
   24326 	bra.b		convrt		# do it
   24327 use_ilog:
   24328 	fmov.l		%d6,%fp0	# float ILOG
   24329 	fabs.x		%fp0		# get abs of ILOG
   24330 	bra.b		convrt
   24331 not_denorm:
   24332 	ftest.x		%fp0		# test for zero
   24333 	fbneq.w		not_zero	# if zero, force exponent
   24334 	fmov.s		FONE(%pc),%fp0	# force exponent to 1
   24335 	bra.b		convrt		# do it
   24336 not_zero:
   24337 	fmov.l		%d6,%fp0	# float ILOG
   24338 	fabs.x		%fp0		# get abs of ILOG
   24339 convrt:
   24340 	fdiv.x		24(%a1),%fp0	# compute ILOG/10^4
   24341 	fmov.x		%fp0,FP_SCR1(%a6)	# store fp0 in memory
   24342 	mov.l		4(%a2),%d2	# move word 2 to d2
   24343 	mov.l		8(%a2),%d3	# move word 3 to d3
   24344 	mov.w		(%a2),%d0	# move exp to d0
   24345 	beq.b		x_loop_fin	# if zero, skip the shift
   24346 	sub.w		&0x3ffd,%d0	# subtract off bias
   24347 	neg.w		%d0		# make exp positive
   24348 x_loop:
   24349 	lsr.l		&1,%d2		# shift d2:d3 right
   24350 	roxr.l		&1,%d3		# the number of places
   24351 	dbf.w		%d0,x_loop	# given in d0
   24352 x_loop_fin:
   24353 	clr.l		%d1		# put zero in d1 for addx
   24354 	add.l		&0x00000080,%d3	# inc at bit 6
   24355 	addx.l		%d1,%d2		# continue inc
   24356 	and.l		&0xffffff80,%d3	# strip off lsb not used by 882
   24357 	mov.l		&4,%d0		# put 4 in d0 for binstr call
   24358 	lea.l		L_SCR1(%a6),%a0	# a0 is ptr to L_SCR1 for exp digits
   24359 	bsr		binstr		# call binstr to convert exp
   24360 	mov.l		L_SCR1(%a6),%d0	# load L_SCR1 lword to d0
   24361 	mov.l		&12,%d1		# use d1 for shift count
   24362 	lsr.l		%d1,%d0		# shift d0 right by 12
   24363 	bfins		%d0,FP_SCR0(%a6){&4:&12}	# put e3:e2:e1 in FP_SCR0
   24364 	lsr.l		%d1,%d0		# shift d0 right by 12
   24365 	bfins		%d0,FP_SCR0(%a6){&16:&4}	# put e4 in FP_SCR0
   24366 	tst.b		%d0		# check if e4 is zero
   24367 	beq.b		A16_st		# if zero, skip rest
   24368 	or.l		&opaop_mask,USER_FPSR(%a6)	# set OPERR & AIOP in USER_FPSR
   24369 
   24370 
   24371 # A16. Write sign bits to final string.
   24372 #	   Sigma is bit 31 of initial value; RHO is bit 31 of d6 (ILOG).
   24373 #
   24374 # Register usage:
   24375 #	Input/Output
   24376 #	d0: x/scratch - final is x
   24377 #	d2: x/x
   24378 #	d3: x/x
   24379 #	d4: LEN/Unchanged
   24380 #	d5: ICTR:LAMBDA/LAMBDA:ICTR
   24381 #	d6: ILOG/ILOG adjusted
   24382 #	d7: k-factor/Unchanged
   24383 #	a0: ptr to L_SCR1(a6)/Unchanged
   24384 #	a1: ptr to PTENxx array/Unchanged
   24385 #	a2: ptr to FP_SCR1(a6)/Unchanged
   24386 #	fp0: float(ILOG)/Unchanged
   24387 #	fp1: 10^ISCALE/Unchanged
   24388 #	fp2: 10^LEN/Unchanged
   24389 #	F_SCR1:BCD result with correct signs
   24390 #	F_SCR2:ILOG/10^4
   24391 #	L_SCR1:Exponent digits on return from binstr
   24392 #	L_SCR2:first word of X packed/Unchanged
   24393 
   24394 A16_st:
   24395 	clr.l		%d0		# clr d0 for collection of signs
   24396 	and.b		&0x0f,FP_SCR0(%a6)	# clear first nibble of FP_SCR0
   24397 	tst.l		L_SCR2(%a6)	# check sign of original mantissa
   24398 	bge.b		mant_p		# if pos, don't set SM
   24399 	mov.l		&2,%d0		# move 2 in to d0 for SM
   24400 mant_p:
   24401 	tst.l		%d6		# check sign of ILOG
   24402 	bge.b		wr_sgn		# if pos, don't set SE
   24403 	addq.l		&1,%d0		# set bit 0 in d0 for SE
   24404 wr_sgn:
   24405 	bfins		%d0,FP_SCR0(%a6){&0:&2}	# insert SM and SE into FP_SCR0
   24406 
   24407 # Clean up and restore all registers used.
   24408 
   24409 	fmov.l		&0,%fpsr	# clear possible inex2/ainex bits
   24410 	fmovm.x		(%sp)+,&0xe0	#  {%fp0-%fp2}
   24411 	movm.l		(%sp)+,&0x4fc	#  {%d2-%d7/%a2}
   24412 	rts
   24413 
   24414 	global		PTENRN
   24415 PTENRN:
   24416 	long		0x40020000,0xA0000000,0x00000000	# 10 ^ 1
   24417 	long		0x40050000,0xC8000000,0x00000000	# 10 ^ 2
   24418 	long		0x400C0000,0x9C400000,0x00000000	# 10 ^ 4
   24419 	long		0x40190000,0xBEBC2000,0x00000000	# 10 ^ 8
   24420 	long		0x40340000,0x8E1BC9BF,0x04000000	# 10 ^ 16
   24421 	long		0x40690000,0x9DC5ADA8,0x2B70B59E	# 10 ^ 32
   24422 	long		0x40D30000,0xC2781F49,0xFFCFA6D5	# 10 ^ 64
   24423 	long		0x41A80000,0x93BA47C9,0x80E98CE0	# 10 ^ 128
   24424 	long		0x43510000,0xAA7EEBFB,0x9DF9DE8E	# 10 ^ 256
   24425 	long		0x46A30000,0xE319A0AE,0xA60E91C7	# 10 ^ 512
   24426 	long		0x4D480000,0xC9767586,0x81750C17	# 10 ^ 1024
   24427 	long		0x5A920000,0x9E8B3B5D,0xC53D5DE5	# 10 ^ 2048
   24428 	long		0x75250000,0xC4605202,0x8A20979B	# 10 ^ 4096
   24429 
   24430 	global		PTENRP
   24431 PTENRP:
   24432 	long		0x40020000,0xA0000000,0x00000000	# 10 ^ 1
   24433 	long		0x40050000,0xC8000000,0x00000000	# 10 ^ 2
   24434 	long		0x400C0000,0x9C400000,0x00000000	# 10 ^ 4
   24435 	long		0x40190000,0xBEBC2000,0x00000000	# 10 ^ 8
   24436 	long		0x40340000,0x8E1BC9BF,0x04000000	# 10 ^ 16
   24437 	long		0x40690000,0x9DC5ADA8,0x2B70B59E	# 10 ^ 32
   24438 	long		0x40D30000,0xC2781F49,0xFFCFA6D6	# 10 ^ 64
   24439 	long		0x41A80000,0x93BA47C9,0x80E98CE0	# 10 ^ 128
   24440 	long		0x43510000,0xAA7EEBFB,0x9DF9DE8E	# 10 ^ 256
   24441 	long		0x46A30000,0xE319A0AE,0xA60E91C7	# 10 ^ 512
   24442 	long		0x4D480000,0xC9767586,0x81750C18	# 10 ^ 1024
   24443 	long		0x5A920000,0x9E8B3B5D,0xC53D5DE5	# 10 ^ 2048
   24444 	long		0x75250000,0xC4605202,0x8A20979B	# 10 ^ 4096
   24445 
   24446 	global		PTENRM
   24447 PTENRM:
   24448 	long		0x40020000,0xA0000000,0x00000000	# 10 ^ 1
   24449 	long		0x40050000,0xC8000000,0x00000000	# 10 ^ 2
   24450 	long		0x400C0000,0x9C400000,0x00000000	# 10 ^ 4
   24451 	long		0x40190000,0xBEBC2000,0x00000000	# 10 ^ 8
   24452 	long		0x40340000,0x8E1BC9BF,0x04000000	# 10 ^ 16
   24453 	long		0x40690000,0x9DC5ADA8,0x2B70B59D	# 10 ^ 32
   24454 	long		0x40D30000,0xC2781F49,0xFFCFA6D5	# 10 ^ 64
   24455 	long		0x41A80000,0x93BA47C9,0x80E98CDF	# 10 ^ 128
   24456 	long		0x43510000,0xAA7EEBFB,0x9DF9DE8D	# 10 ^ 256
   24457 	long		0x46A30000,0xE319A0AE,0xA60E91C6	# 10 ^ 512
   24458 	long		0x4D480000,0xC9767586,0x81750C17	# 10 ^ 1024
   24459 	long		0x5A920000,0x9E8B3B5D,0xC53D5DE4	# 10 ^ 2048
   24460 	long		0x75250000,0xC4605202,0x8A20979A	# 10 ^ 4096
   24461 
   24462 #########################################################################
   24463 # binstr(): Converts a 64-bit binary integer to bcd.			#
   24464 #									#
   24465 # INPUT *************************************************************** #
   24466 #	d2:d3 = 64-bit binary integer					#
   24467 #	d0    = desired length (LEN)					#
   24468 #	a0    = pointer to start in memory for bcd characters		#
   24469 #          	(This pointer must point to byte 4 of the first		#
   24470 #          	 lword of the packed decimal memory string.)		#
   24471 #									#
   24472 # OUTPUT ************************************************************** #
   24473 #	a0 = pointer to LEN bcd digits representing the 64-bit integer.	#
   24474 #									#
   24475 # ALGORITHM ***********************************************************	#
   24476 #	The 64-bit binary is assumed to have a decimal point before	#
   24477 #	bit 63.  The fraction is multiplied by 10 using a mul by 2	#
   24478 #	shift and a mul by 8 shift.  The bits shifted out of the	#
   24479 #	msb form a decimal digit.  This process is iterated until	#
   24480 #	LEN digits are formed.						#
   24481 #									#
   24482 # A1. Init d7 to 1.  D7 is the byte digit counter, and if 1, the	#
   24483 #     digit formed will be assumed the least significant.  This is	#
   24484 #     to force the first byte formed to have a 0 in the upper 4 bits.	#
   24485 #									#
   24486 # A2. Beginning of the loop:						#
   24487 #     Copy the fraction in d2:d3 to d4:d5.				#
   24488 #									#
   24489 # A3. Multiply the fraction in d2:d3 by 8 using bit-field		#
   24490 #     extracts and shifts.  The three msbs from d2 will go into d1.	#
   24491 #									#
   24492 # A4. Multiply the fraction in d4:d5 by 2 using shifts.  The msb	#
   24493 #     will be collected by the carry.					#
   24494 #									#
   24495 # A5. Add using the carry the 64-bit quantities in d2:d3 and d4:d5	#
   24496 #     into d2:d3.  D1 will contain the bcd digit formed.		#
   24497 #									#
   24498 # A6. Test d7.  If zero, the digit formed is the ms digit.  If non-	#
   24499 #     zero, it is the ls digit.  Put the digit in its place in the	#
   24500 #     upper word of d0.  If it is the ls digit, write the word		#
   24501 #     from d0 to memory.						#
   24502 #									#
   24503 # A7. Decrement d6 (LEN counter) and repeat the loop until zero.	#
   24504 #									#
   24505 #########################################################################
   24506 
   24507 #	Implementation Notes:
   24508 #
   24509 #	The registers are used as follows:
   24510 #
   24511 #		d0: LEN counter
   24512 #		d1: temp used to form the digit
   24513 #		d2: upper 32-bits of fraction for mul by 8
   24514 #		d3: lower 32-bits of fraction for mul by 8
   24515 #		d4: upper 32-bits of fraction for mul by 2
   24516 #		d5: lower 32-bits of fraction for mul by 2
   24517 #		d6: temp for bit-field extracts
   24518 #		d7: byte digit formation word;digit count {0,1}
   24519 #		a0: pointer into memory for packed bcd string formation
   24520 #
   24521 
   24522 	global		binstr
   24523 binstr:
   24524 	movm.l		&0xff00,-(%sp)	#  {%d0-%d7}
   24525 
   24526 #
   24527 # A1: Init d7
   24528 #
   24529 	mov.l		&1,%d7		# init d7 for second digit
   24530 	subq.l		&1,%d0		# for dbf d0 would have LEN+1 passes
   24531 #
   24532 # A2. Copy d2:d3 to d4:d5.  Start loop.
   24533 #
   24534 loop:
   24535 	mov.l		%d2,%d4		# copy the fraction before muls
   24536 	mov.l		%d3,%d5		# to d4:d5
   24537 #
   24538 # A3. Multiply d2:d3 by 8; extract msbs into d1.
   24539 #
   24540 	bfextu		%d2{&0:&3},%d1	# copy 3 msbs of d2 into d1
   24541 	asl.l		&3,%d2		# shift d2 left by 3 places
   24542 	bfextu		%d3{&0:&3},%d6	# copy 3 msbs of d3 into d6
   24543 	asl.l		&3,%d3		# shift d3 left by 3 places
   24544 	or.l		%d6,%d2		# or in msbs from d3 into d2
   24545 #
   24546 # A4. Multiply d4:d5 by 2; add carry out to d1.
   24547 #
   24548 	asl.l		&1,%d5		# mul d5 by 2
   24549 	roxl.l		&1,%d4		# mul d4 by 2
   24550 	swap		%d6		# put 0 in d6 lower word
   24551 	addx.w		%d6,%d1		# add in extend from mul by 2
   24552 #
   24553 # A5. Add mul by 8 to mul by 2.  D1 contains the digit formed.
   24554 #
   24555 	add.l		%d5,%d3		# add lower 32 bits
   24556 	nop				# ERRATA FIX #13 (Rev. 1.2 6/6/90)
   24557 	addx.l		%d4,%d2		# add with extend upper 32 bits
   24558 	nop				# ERRATA FIX #13 (Rev. 1.2 6/6/90)
   24559 	addx.w		%d6,%d1		# add in extend from add to d1
   24560 	swap		%d6		# with d6 = 0; put 0 in upper word
   24561 #
   24562 # A6. Test d7 and branch.
   24563 #
   24564 	tst.w		%d7		# if zero, store digit & to loop
   24565 	beq.b		first_d		# if non-zero, form byte & write
   24566 sec_d:
   24567 	swap		%d7		# bring first digit to word d7b
   24568 	asl.w		&4,%d7		# first digit in upper 4 bits d7b
   24569 	add.w		%d1,%d7		# add in ls digit to d7b
   24570 	mov.b		%d7,(%a0)+	# store d7b byte in memory
   24571 	swap		%d7		# put LEN counter in word d7a
   24572 	clr.w		%d7		# set d7a to signal no digits done
   24573 	dbf.w		%d0,loop	# do loop some more!
   24574 	bra.b		end_bstr	# finished, so exit
   24575 first_d:
   24576 	swap		%d7		# put digit word in d7b
   24577 	mov.w		%d1,%d7		# put new digit in d7b
   24578 	swap		%d7		# put LEN counter in word d7a
   24579 	addq.w		&1,%d7		# set d7a to signal first digit done
   24580 	dbf.w		%d0,loop	# do loop some more!
   24581 	swap		%d7		# put last digit in string
   24582 	lsl.w		&4,%d7		# move it to upper 4 bits
   24583 	mov.b		%d7,(%a0)+	# store it in memory string
   24584 #
   24585 # Clean up and return with result in fp0.
   24586 #
   24587 end_bstr:
   24588 	movm.l		(%sp)+,&0xff	#  {%d0-%d7}
   24589 	rts
   24590 
   24591 #########################################################################
   24592 # XDEF ****************************************************************	#
   24593 #	facc_in_b(): dmem_read_byte failed				#
   24594 #	facc_in_w(): dmem_read_word failed				#
   24595 #	facc_in_l(): dmem_read_long failed				#
   24596 #	facc_in_d(): dmem_read of dbl prec failed			#
   24597 #	facc_in_x(): dmem_read of ext prec failed			#
   24598 #									#
   24599 #	facc_out_b(): dmem_write_byte failed				#
   24600 #	facc_out_w(): dmem_write_word failed				#
   24601 #	facc_out_l(): dmem_write_long failed				#
   24602 #	facc_out_d(): dmem_write of dbl prec failed			#
   24603 #	facc_out_x(): dmem_write of ext prec failed			#
   24604 #									#
   24605 # XREF ****************************************************************	#
   24606 #	_real_access() - exit through access error handler		#
   24607 #									#
   24608 # INPUT ***************************************************************	#
   24609 #	None								#
   24610 # 									#
   24611 # OUTPUT **************************************************************	#
   24612 #	None								#
   24613 #									#
   24614 # ALGORITHM ***********************************************************	#
   24615 # 	Flow jumps here when an FP data fetch call gets an error 	#
   24616 # result. This means the operating system wants an access error frame	#
   24617 # made out of the current exception stack frame. 			#
   24618 #	So, we first call restore() which makes sure that any updated	#
   24619 # -(an)+ register gets returned to its pre-exception value and then	#
   24620 # we change the stack to an acess error stack frame.			#
   24621 #									#
   24622 #########################################################################
   24623 
   24624 facc_in_b:
   24625 	movq.l		&0x1,%d0			# one byte
   24626 	bsr.w		restore				# fix An
   24627 
   24628 	mov.w		&0x0121,EXC_VOFF(%a6)		# set FSLW
   24629 	bra.w		facc_finish
   24630 
   24631 facc_in_w:
   24632 	movq.l		&0x2,%d0			# two bytes
   24633 	bsr.w		restore				# fix An
   24634 
   24635 	mov.w		&0x0141,EXC_VOFF(%a6)		# set FSLW
   24636 	bra.b		facc_finish
   24637 
   24638 facc_in_l:
   24639 	movq.l		&0x4,%d0			# four bytes
   24640 	bsr.w		restore				# fix An
   24641 
   24642 	mov.w		&0x0101,EXC_VOFF(%a6)		# set FSLW
   24643 	bra.b		facc_finish
   24644 
   24645 facc_in_d:
   24646 	movq.l		&0x8,%d0			# eight bytes
   24647 	bsr.w		restore				# fix An
   24648 
   24649 	mov.w		&0x0161,EXC_VOFF(%a6)		# set FSLW
   24650 	bra.b		facc_finish
   24651 
   24652 facc_in_x:
   24653 	movq.l		&0xc,%d0			# twelve bytes
   24654 	bsr.w		restore				# fix An
   24655 
   24656 	mov.w		&0x0161,EXC_VOFF(%a6)		# set FSLW
   24657 	bra.b		facc_finish
   24658 
   24659 ################################################################
   24660 
   24661 facc_out_b:
   24662 	movq.l		&0x1,%d0			# one byte
   24663 	bsr.w		restore				# restore An
   24664 
   24665 	mov.w		&0x00a1,EXC_VOFF(%a6)		# set FSLW
   24666 	bra.b		facc_finish
   24667 
   24668 facc_out_w:
   24669 	movq.l		&0x2,%d0			# two bytes
   24670 	bsr.w		restore				# restore An
   24671 
   24672 	mov.w		&0x00c1,EXC_VOFF(%a6)		# set FSLW
   24673 	bra.b		facc_finish
   24674 
   24675 facc_out_l:
   24676 	movq.l		&0x4,%d0			# four bytes
   24677 	bsr.w		restore				# restore An
   24678 
   24679 	mov.w		&0x0081,EXC_VOFF(%a6)		# set FSLW
   24680 	bra.b		facc_finish
   24681 
   24682 facc_out_d:
   24683 	movq.l		&0x8,%d0			# eight bytes
   24684 	bsr.w		restore				# restore An
   24685 
   24686 	mov.w		&0x00e1,EXC_VOFF(%a6)		# set FSLW
   24687 	bra.b		facc_finish
   24688 
   24689 facc_out_x:
   24690 	mov.l		&0xc,%d0			# twelve bytes
   24691 	bsr.w		restore				# restore An
   24692 
   24693 	mov.w		&0x00e1,EXC_VOFF(%a6)		# set FSLW
   24694 
   24695 # here's where we actually create the access error frame from the
   24696 # current exception stack frame.
   24697 facc_finish:
   24698 	mov.l		USER_FPIAR(%a6),EXC_PC(%a6) # store current PC
   24699 
   24700 	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
   24701 	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   24702 	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   24703 
   24704 	unlk		%a6
   24705 
   24706 	mov.l		(%sp),-(%sp)		# store SR, hi(PC)
   24707 	mov.l		0x8(%sp),0x4(%sp)	# store lo(PC)
   24708 	mov.l		0xc(%sp),0x8(%sp)	# store EA
   24709 	mov.l		&0x00000001,0xc(%sp)	# store FSLW
   24710 	mov.w		0x6(%sp),0xc(%sp)	# fix FSLW (size)
   24711 	mov.w		&0x4008,0x6(%sp)	# store voff
   24712 
   24713 	btst		&0x5,(%sp)		# supervisor or user mode?
   24714 	beq.b		facc_out2		# user
   24715 	bset		&0x2,0xd(%sp)		# set supervisor TM bit
   24716 
   24717 facc_out2:
   24718 	bra.l		_real_access
   24719 
   24720 ##################################################################
   24721 
   24722 # if the effective addressing mode was predecrement or postincrement,
   24723 # the emulation has already changed its value to the correct post-
   24724 # instruction value. but since we're exiting to the access error
   24725 # handler, then AN must be returned to its pre-instruction value.
   24726 # we do that here.
   24727 restore:
   24728 	mov.b		EXC_OPWORD+0x1(%a6),%d1
   24729 	andi.b		&0x38,%d1		# extract opmode
   24730 	cmpi.b		%d1,&0x18		# postinc?
   24731 	beq.w		rest_inc
   24732 	cmpi.b		%d1,&0x20		# predec?
   24733 	beq.w		rest_dec
   24734 	rts
   24735 
   24736 rest_inc:
   24737 	mov.b		EXC_OPWORD+0x1(%a6),%d1
   24738 	andi.w		&0x0007,%d1		# fetch An
   24739 
   24740 	mov.w		(tbl_rest_inc.b,%pc,%d1.w*2),%d1
   24741 	jmp		(tbl_rest_inc.b,%pc,%d1.w*1)
   24742 
   24743 tbl_rest_inc:
   24744 	short		ri_a0 - tbl_rest_inc
   24745 	short		ri_a1 - tbl_rest_inc
   24746 	short		ri_a2 - tbl_rest_inc
   24747 	short		ri_a3 - tbl_rest_inc
   24748 	short		ri_a4 - tbl_rest_inc
   24749 	short		ri_a5 - tbl_rest_inc
   24750 	short		ri_a6 - tbl_rest_inc
   24751 	short		ri_a7 - tbl_rest_inc
   24752 
   24753 ri_a0:
   24754 	sub.l		%d0,EXC_DREGS+0x8(%a6)	# fix stacked a0
   24755 	rts
   24756 ri_a1:
   24757 	sub.l		%d0,EXC_DREGS+0xc(%a6)	# fix stacked a1
   24758 	rts
   24759 ri_a2:
   24760 	sub.l		%d0,%a2			# fix a2
   24761 	rts
   24762 ri_a3:
   24763 	sub.l		%d0,%a3			# fix a3
   24764 	rts
   24765 ri_a4:
   24766 	sub.l		%d0,%a4			# fix a4
   24767 	rts
   24768 ri_a5:
   24769 	sub.l		%d0,%a5			# fix a5
   24770 	rts
   24771 ri_a6:
   24772 	sub.l		%d0,(%a6)		# fix stacked a6
   24773 	rts
   24774 # if it's a fmove out instruction, we don't have to fix a7
   24775 # because we hadn't changed it yet. if it's an opclass two
   24776 # instruction (data moved in) and the exception was in supervisor
   24777 # mode, then also also wasn't updated. if it was user mode, then
   24778 # restore the correct a7 which is in the USP currently.
   24779 ri_a7:
   24780 	cmpi.b		EXC_VOFF(%a6),&0x30	# move in or out?
   24781 	bne.b		ri_a7_done		# out
   24782 
   24783 	btst		&0x5,EXC_SR(%a6)	# user or supervisor?
   24784 	bne.b		ri_a7_done		# supervisor
   24785 	movc		%usp,%a0		# restore USP
   24786 	sub.l		%d0,%a0
   24787 	movc		%a0,%usp
   24788 ri_a7_done:
   24789 	rts
   24790 
   24791 # need to invert adjustment value if the <ea> was predec
   24792 rest_dec:
   24793 	neg.l		%d0
   24794 	bra.b		rest_inc
   24795