pfpsp.s revision 1.2 1 #
2 # $NetBSD: pfpsp.s,v 1.2 2001/09/16 16:34:31 wiz Exp $
3 #
4
5 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
6 # MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
7 # M68000 Hi-Performance Microprocessor Division
8 # M68060 Software Package Production Release
9 #
10 # M68060 Software Package Copyright (C) 1993, 1994, 1995, 1996 Motorola Inc.
11 # All rights reserved.
12 #
13 # THE SOFTWARE is provided on an "AS IS" basis and without warranty.
14 # To the maximum extent permitted by applicable law,
15 # MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
16 # INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS
17 # FOR A PARTICULAR PURPOSE and any warranty against infringement with
18 # regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
19 # and any accompanying written materials.
20 #
21 # To the maximum extent permitted by applicable law,
22 # IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
23 # (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS,
24 # BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS)
25 # ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE.
26 #
27 # Motorola assumes no responsibility for the maintenance and support
28 # of the SOFTWARE.
29 #
30 # You are hereby granted a copyright license to use, modify, and distribute the
31 # SOFTWARE so long as this entire notice is retained without alteration
32 # in any modified and/or redistributed versions, and that such modified
33 # versions are clearly identified as such.
34 # No licenses are granted by implication, estoppel or otherwise under any
35 # patents or trademarks of Motorola, Inc.
36 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
37
38 #
39 # freal.s:
40 # This file is appended to the top of the 060FPSP package
41 # and contains the entry points into the package. The user, in
42 # effect, branches to one of the branch table entries located
43 # after _060FPSP_TABLE.
44 # Also, subroutine stubs exist in this file (_fpsp_done for
45 # example) that are referenced by the FPSP package itself in order
46 # to call a given routine. The stub routine actually performs the
47 # callout. The FPSP code does a "bsr" to the stub routine. This
48 # extra layer of hierarchy adds a slight performance penalty but
49 # it makes the FPSP code easier to read and more mainatinable.
50 #
51
52 set _off_bsun, 0x00
53 set _off_snan, 0x04
54 set _off_operr, 0x08
55 set _off_ovfl, 0x0c
56 set _off_unfl, 0x10
57 set _off_dz, 0x14
58 set _off_inex, 0x18
59 set _off_fline, 0x1c
60 set _off_fpu_dis, 0x20
61 set _off_trap, 0x24
62 set _off_trace, 0x28
63 set _off_access, 0x2c
64 set _off_done, 0x30
65
66 set _off_imr, 0x40
67 set _off_dmr, 0x44
68 set _off_dmw, 0x48
69 set _off_irw, 0x4c
70 set _off_irl, 0x50
71 set _off_drb, 0x54
72 set _off_drw, 0x58
73 set _off_drl, 0x5c
74 set _off_dwb, 0x60
75 set _off_dww, 0x64
76 set _off_dwl, 0x68
77
78 _060FPSP_TABLE:
79
80 ###############################################################
81
82 # Here's the table of ENTRY POINTS for those linking the package.
83 bra.l _fpsp_snan
84 short 0x0000
85 bra.l _fpsp_operr
86 short 0x0000
87 bra.l _fpsp_ovfl
88 short 0x0000
89 bra.l _fpsp_unfl
90 short 0x0000
91 bra.l _fpsp_dz
92 short 0x0000
93 bra.l _fpsp_inex
94 short 0x0000
95 bra.l _fpsp_fline
96 short 0x0000
97 bra.l _fpsp_unsupp
98 short 0x0000
99 bra.l _fpsp_effadd
100 short 0x0000
101
102 space 56
103
104 ###############################################################
105 global _fpsp_done
106 _fpsp_done:
107 mov.l %d0,-(%sp)
108 mov.l (_060FPSP_TABLE-0x80+_off_done,%pc),%d0
109 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
110 mov.l 0x4(%sp),%d0
111 rtd &0x4
112
113 global _real_ovfl
114 _real_ovfl:
115 mov.l %d0,-(%sp)
116 mov.l (_060FPSP_TABLE-0x80+_off_ovfl,%pc),%d0
117 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
118 mov.l 0x4(%sp),%d0
119 rtd &0x4
120
121 global _real_unfl
122 _real_unfl:
123 mov.l %d0,-(%sp)
124 mov.l (_060FPSP_TABLE-0x80+_off_unfl,%pc),%d0
125 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
126 mov.l 0x4(%sp),%d0
127 rtd &0x4
128
129 global _real_inex
130 _real_inex:
131 mov.l %d0,-(%sp)
132 mov.l (_060FPSP_TABLE-0x80+_off_inex,%pc),%d0
133 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
134 mov.l 0x4(%sp),%d0
135 rtd &0x4
136
137 global _real_bsun
138 _real_bsun:
139 mov.l %d0,-(%sp)
140 mov.l (_060FPSP_TABLE-0x80+_off_bsun,%pc),%d0
141 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
142 mov.l 0x4(%sp),%d0
143 rtd &0x4
144
145 global _real_operr
146 _real_operr:
147 mov.l %d0,-(%sp)
148 mov.l (_060FPSP_TABLE-0x80+_off_operr,%pc),%d0
149 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
150 mov.l 0x4(%sp),%d0
151 rtd &0x4
152
153 global _real_snan
154 _real_snan:
155 mov.l %d0,-(%sp)
156 mov.l (_060FPSP_TABLE-0x80+_off_snan,%pc),%d0
157 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
158 mov.l 0x4(%sp),%d0
159 rtd &0x4
160
161 global _real_dz
162 _real_dz:
163 mov.l %d0,-(%sp)
164 mov.l (_060FPSP_TABLE-0x80+_off_dz,%pc),%d0
165 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
166 mov.l 0x4(%sp),%d0
167 rtd &0x4
168
169 global _real_fline
170 _real_fline:
171 mov.l %d0,-(%sp)
172 mov.l (_060FPSP_TABLE-0x80+_off_fline,%pc),%d0
173 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
174 mov.l 0x4(%sp),%d0
175 rtd &0x4
176
177 global _real_fpu_disabled
178 _real_fpu_disabled:
179 mov.l %d0,-(%sp)
180 mov.l (_060FPSP_TABLE-0x80+_off_fpu_dis,%pc),%d0
181 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
182 mov.l 0x4(%sp),%d0
183 rtd &0x4
184
185 global _real_trap
186 _real_trap:
187 mov.l %d0,-(%sp)
188 mov.l (_060FPSP_TABLE-0x80+_off_trap,%pc),%d0
189 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
190 mov.l 0x4(%sp),%d0
191 rtd &0x4
192
193 global _real_trace
194 _real_trace:
195 mov.l %d0,-(%sp)
196 mov.l (_060FPSP_TABLE-0x80+_off_trace,%pc),%d0
197 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
198 mov.l 0x4(%sp),%d0
199 rtd &0x4
200
201 global _real_access
202 _real_access:
203 mov.l %d0,-(%sp)
204 mov.l (_060FPSP_TABLE-0x80+_off_access,%pc),%d0
205 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
206 mov.l 0x4(%sp),%d0
207 rtd &0x4
208
209 #######################################
210
211 global _imem_read
212 _imem_read:
213 mov.l %d0,-(%sp)
214 mov.l (_060FPSP_TABLE-0x80+_off_imr,%pc),%d0
215 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
216 mov.l 0x4(%sp),%d0
217 rtd &0x4
218
219 global _dmem_read
220 _dmem_read:
221 mov.l %d0,-(%sp)
222 mov.l (_060FPSP_TABLE-0x80+_off_dmr,%pc),%d0
223 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
224 mov.l 0x4(%sp),%d0
225 rtd &0x4
226
227 global _dmem_write
228 _dmem_write:
229 mov.l %d0,-(%sp)
230 mov.l (_060FPSP_TABLE-0x80+_off_dmw,%pc),%d0
231 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
232 mov.l 0x4(%sp),%d0
233 rtd &0x4
234
235 global _imem_read_word
236 _imem_read_word:
237 mov.l %d0,-(%sp)
238 mov.l (_060FPSP_TABLE-0x80+_off_irw,%pc),%d0
239 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
240 mov.l 0x4(%sp),%d0
241 rtd &0x4
242
243 global _imem_read_long
244 _imem_read_long:
245 mov.l %d0,-(%sp)
246 mov.l (_060FPSP_TABLE-0x80+_off_irl,%pc),%d0
247 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
248 mov.l 0x4(%sp),%d0
249 rtd &0x4
250
251 global _dmem_read_byte
252 _dmem_read_byte:
253 mov.l %d0,-(%sp)
254 mov.l (_060FPSP_TABLE-0x80+_off_drb,%pc),%d0
255 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
256 mov.l 0x4(%sp),%d0
257 rtd &0x4
258
259 global _dmem_read_word
260 _dmem_read_word:
261 mov.l %d0,-(%sp)
262 mov.l (_060FPSP_TABLE-0x80+_off_drw,%pc),%d0
263 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
264 mov.l 0x4(%sp),%d0
265 rtd &0x4
266
267 global _dmem_read_long
268 _dmem_read_long:
269 mov.l %d0,-(%sp)
270 mov.l (_060FPSP_TABLE-0x80+_off_drl,%pc),%d0
271 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
272 mov.l 0x4(%sp),%d0
273 rtd &0x4
274
275 global _dmem_write_byte
276 _dmem_write_byte:
277 mov.l %d0,-(%sp)
278 mov.l (_060FPSP_TABLE-0x80+_off_dwb,%pc),%d0
279 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
280 mov.l 0x4(%sp),%d0
281 rtd &0x4
282
283 global _dmem_write_word
284 _dmem_write_word:
285 mov.l %d0,-(%sp)
286 mov.l (_060FPSP_TABLE-0x80+_off_dww,%pc),%d0
287 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
288 mov.l 0x4(%sp),%d0
289 rtd &0x4
290
291 global _dmem_write_long
292 _dmem_write_long:
293 mov.l %d0,-(%sp)
294 mov.l (_060FPSP_TABLE-0x80+_off_dwl,%pc),%d0
295 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
296 mov.l 0x4(%sp),%d0
297 rtd &0x4
298
299 #
300 # This file contains a set of define statements for constants
301 # in order to promote readability within the corecode itself.
302 #
303
304 set LOCAL_SIZE, 192 # stack frame size(bytes)
305 set LV, -LOCAL_SIZE # stack offset
306
307 set EXC_SR, 0x4 # stack status register
308 set EXC_PC, 0x6 # stack pc
309 set EXC_VOFF, 0xa # stacked vector offset
310 set EXC_EA, 0xc # stacked <ea>
311
312 set EXC_FP, 0x0 # frame pointer
313
314 set EXC_AREGS, -68 # offset of all address regs
315 set EXC_DREGS, -100 # offset of all data regs
316 set EXC_FPREGS, -36 # offset of all fp regs
317
318 set EXC_A7, EXC_AREGS+(7*4) # offset of saved a7
319 set OLD_A7, EXC_AREGS+(6*4) # extra copy of saved a7
320 set EXC_A6, EXC_AREGS+(6*4) # offset of saved a6
321 set EXC_A5, EXC_AREGS+(5*4)
322 set EXC_A4, EXC_AREGS+(4*4)
323 set EXC_A3, EXC_AREGS+(3*4)
324 set EXC_A2, EXC_AREGS+(2*4)
325 set EXC_A1, EXC_AREGS+(1*4)
326 set EXC_A0, EXC_AREGS+(0*4)
327 set EXC_D7, EXC_DREGS+(7*4)
328 set EXC_D6, EXC_DREGS+(6*4)
329 set EXC_D5, EXC_DREGS+(5*4)
330 set EXC_D4, EXC_DREGS+(4*4)
331 set EXC_D3, EXC_DREGS+(3*4)
332 set EXC_D2, EXC_DREGS+(2*4)
333 set EXC_D1, EXC_DREGS+(1*4)
334 set EXC_D0, EXC_DREGS+(0*4)
335
336 set EXC_FP0, EXC_FPREGS+(0*12) # offset of saved fp0
337 set EXC_FP1, EXC_FPREGS+(1*12) # offset of saved fp1
338 set EXC_FP2, EXC_FPREGS+(2*12) # offset of saved fp2 (not used)
339
340 set FP_SCR1, LV+80 # fp scratch 1
341 set FP_SCR1_EX, FP_SCR1+0
342 set FP_SCR1_SGN, FP_SCR1+2
343 set FP_SCR1_HI, FP_SCR1+4
344 set FP_SCR1_LO, FP_SCR1+8
345
346 set FP_SCR0, LV+68 # fp scratch 0
347 set FP_SCR0_EX, FP_SCR0+0
348 set FP_SCR0_SGN, FP_SCR0+2
349 set FP_SCR0_HI, FP_SCR0+4
350 set FP_SCR0_LO, FP_SCR0+8
351
352 set FP_DST, LV+56 # fp destination operand
353 set FP_DST_EX, FP_DST+0
354 set FP_DST_SGN, FP_DST+2
355 set FP_DST_HI, FP_DST+4
356 set FP_DST_LO, FP_DST+8
357
358 set FP_SRC, LV+44 # fp source operand
359 set FP_SRC_EX, FP_SRC+0
360 set FP_SRC_SGN, FP_SRC+2
361 set FP_SRC_HI, FP_SRC+4
362 set FP_SRC_LO, FP_SRC+8
363
364 set USER_FPIAR, LV+40 # FP instr address register
365
366 set USER_FPSR, LV+36 # FP status register
367 set FPSR_CC, USER_FPSR+0 # FPSR condition codes
368 set FPSR_QBYTE, USER_FPSR+1 # FPSR qoutient byte
369 set FPSR_EXCEPT, USER_FPSR+2 # FPSR exception status byte
370 set FPSR_AEXCEPT, USER_FPSR+3 # FPSR accrued exception byte
371
372 set USER_FPCR, LV+32 # FP control register
373 set FPCR_ENABLE, USER_FPCR+2 # FPCR exception enable
374 set FPCR_MODE, USER_FPCR+3 # FPCR rounding mode control
375
376 set L_SCR3, LV+28 # integer scratch 3
377 set L_SCR2, LV+24 # integer scratch 2
378 set L_SCR1, LV+20 # integer scratch 1
379
380 set STORE_FLG, LV+19 # flag: operand store (ie. not fcmp/ftst)
381
382 set EXC_TEMP2, LV+24 # temporary space
383 set EXC_TEMP, LV+16 # temporary space
384
385 set DTAG, LV+15 # destination operand type
386 set STAG, LV+14 # source operand type
387
388 set SPCOND_FLG, LV+10 # flag: special case (see below)
389
390 set EXC_CC, LV+8 # saved condition codes
391 set EXC_EXTWPTR, LV+4 # saved current PC (active)
392 set EXC_EXTWORD, LV+2 # saved extension word
393 set EXC_CMDREG, LV+2 # saved extension word
394 set EXC_OPWORD, LV+0 # saved operation word
395
396 ################################
397
398 # Helpful macros
399
400 set FTEMP, 0 # offsets within an
401 set FTEMP_EX, 0 # extended precision
402 set FTEMP_SGN, 2 # value saved in memory.
403 set FTEMP_HI, 4
404 set FTEMP_LO, 8
405 set FTEMP_GRS, 12
406
407 set LOCAL, 0 # offsets within an
408 set LOCAL_EX, 0 # extended precision
409 set LOCAL_SGN, 2 # value saved in memory.
410 set LOCAL_HI, 4
411 set LOCAL_LO, 8
412 set LOCAL_GRS, 12
413
414 set DST, 0 # offsets within an
415 set DST_EX, 0 # extended precision
416 set DST_HI, 4 # value saved in memory.
417 set DST_LO, 8
418
419 set SRC, 0 # offsets within an
420 set SRC_EX, 0 # extended precision
421 set SRC_HI, 4 # value saved in memory.
422 set SRC_LO, 8
423
424 set SGL_LO, 0x3f81 # min sgl prec exponent
425 set SGL_HI, 0x407e # max sgl prec exponent
426 set DBL_LO, 0x3c01 # min dbl prec exponent
427 set DBL_HI, 0x43fe # max dbl prec exponent
428 set EXT_LO, 0x0 # min ext prec exponent
429 set EXT_HI, 0x7ffe # max ext prec exponent
430
431 set EXT_BIAS, 0x3fff # extended precision bias
432 set SGL_BIAS, 0x007f # single precision bias
433 set DBL_BIAS, 0x03ff # double precision bias
434
435 set NORM, 0x00 # operand type for STAG/DTAG
436 set ZERO, 0x01 # operand type for STAG/DTAG
437 set INF, 0x02 # operand type for STAG/DTAG
438 set QNAN, 0x03 # operand type for STAG/DTAG
439 set DENORM, 0x04 # operand type for STAG/DTAG
440 set SNAN, 0x05 # operand type for STAG/DTAG
441 set UNNORM, 0x06 # operand type for STAG/DTAG
442
443 ##################
444 # FPSR/FPCR bits #
445 ##################
446 set neg_bit, 0x3 # negative result
447 set z_bit, 0x2 # zero result
448 set inf_bit, 0x1 # infinite result
449 set nan_bit, 0x0 # NAN result
450
451 set q_sn_bit, 0x7 # sign bit of quotient byte
452
453 set bsun_bit, 7 # branch on unordered
454 set snan_bit, 6 # signalling NAN
455 set operr_bit, 5 # operand error
456 set ovfl_bit, 4 # overflow
457 set unfl_bit, 3 # underflow
458 set dz_bit, 2 # divide by zero
459 set inex2_bit, 1 # inexact result 2
460 set inex1_bit, 0 # inexact result 1
461
462 set aiop_bit, 7 # accrued inexact operation bit
463 set aovfl_bit, 6 # accrued overflow bit
464 set aunfl_bit, 5 # accrued underflow bit
465 set adz_bit, 4 # accrued dz bit
466 set ainex_bit, 3 # accrued inexact bit
467
468 #############################
469 # FPSR individual bit masks #
470 #############################
471 set neg_mask, 0x08000000 # negative bit mask (lw)
472 set inf_mask, 0x02000000 # infinity bit mask (lw)
473 set z_mask, 0x04000000 # zero bit mask (lw)
474 set nan_mask, 0x01000000 # nan bit mask (lw)
475
476 set neg_bmask, 0x08 # negative bit mask (byte)
477 set inf_bmask, 0x02 # infinity bit mask (byte)
478 set z_bmask, 0x04 # zero bit mask (byte)
479 set nan_bmask, 0x01 # nan bit mask (byte)
480
481 set bsun_mask, 0x00008000 # bsun exception mask
482 set snan_mask, 0x00004000 # snan exception mask
483 set operr_mask, 0x00002000 # operr exception mask
484 set ovfl_mask, 0x00001000 # overflow exception mask
485 set unfl_mask, 0x00000800 # underflow exception mask
486 set dz_mask, 0x00000400 # dz exception mask
487 set inex2_mask, 0x00000200 # inex2 exception mask
488 set inex1_mask, 0x00000100 # inex1 exception mask
489
490 set aiop_mask, 0x00000080 # accrued illegal operation
491 set aovfl_mask, 0x00000040 # accrued overflow
492 set aunfl_mask, 0x00000020 # accrued underflow
493 set adz_mask, 0x00000010 # accrued divide by zero
494 set ainex_mask, 0x00000008 # accrued inexact
495
496 ######################################
497 # FPSR combinations used in the FPSP #
498 ######################################
499 set dzinf_mask, inf_mask+dz_mask+adz_mask
500 set opnan_mask, nan_mask+operr_mask+aiop_mask
501 set nzi_mask, 0x01ffffff #clears N, Z, and I
502 set unfinx_mask, unfl_mask+inex2_mask+aunfl_mask+ainex_mask
503 set unf2inx_mask, unfl_mask+inex2_mask+ainex_mask
504 set ovfinx_mask, ovfl_mask+inex2_mask+aovfl_mask+ainex_mask
505 set inx1a_mask, inex1_mask+ainex_mask
506 set inx2a_mask, inex2_mask+ainex_mask
507 set snaniop_mask, nan_mask+snan_mask+aiop_mask
508 set snaniop2_mask, snan_mask+aiop_mask
509 set naniop_mask, nan_mask+aiop_mask
510 set neginf_mask, neg_mask+inf_mask
511 set infaiop_mask, inf_mask+aiop_mask
512 set negz_mask, neg_mask+z_mask
513 set opaop_mask, operr_mask+aiop_mask
514 set unfl_inx_mask, unfl_mask+aunfl_mask+ainex_mask
515 set ovfl_inx_mask, ovfl_mask+aovfl_mask+ainex_mask
516
517 #########
518 # misc. #
519 #########
520 set rnd_stky_bit, 29 # stky bit pos in longword
521
522 set sign_bit, 0x7 # sign bit
523 set signan_bit, 0x6 # signalling nan bit
524
525 set sgl_thresh, 0x3f81 # minimum sgl exponent
526 set dbl_thresh, 0x3c01 # minimum dbl exponent
527
528 set x_mode, 0x0 # extended precision
529 set s_mode, 0x4 # single precision
530 set d_mode, 0x8 # double precision
531
532 set rn_mode, 0x0 # round-to-nearest
533 set rz_mode, 0x1 # round-to-zero
534 set rm_mode, 0x2 # round-tp-minus-infinity
535 set rp_mode, 0x3 # round-to-plus-infinity
536
537 set mantissalen, 64 # length of mantissa in bits
538
539 set BYTE, 1 # len(byte) == 1 byte
540 set WORD, 2 # len(word) == 2 bytes
541 set LONG, 4 # len(longword) == 2 bytes
542
543 set BSUN_VEC, 0xc0 # bsun vector offset
544 set INEX_VEC, 0xc4 # inexact vector offset
545 set DZ_VEC, 0xc8 # dz vector offset
546 set UNFL_VEC, 0xcc # unfl vector offset
547 set OPERR_VEC, 0xd0 # operr vector offset
548 set OVFL_VEC, 0xd4 # ovfl vector offset
549 set SNAN_VEC, 0xd8 # snan vector offset
550
551 ###########################
552 # SPecial CONDition FLaGs #
553 ###########################
554 set ftrapcc_flg, 0x01 # flag bit: ftrapcc exception
555 set fbsun_flg, 0x02 # flag bit: bsun exception
556 set mia7_flg, 0x04 # flag bit: (a7)+ <ea>
557 set mda7_flg, 0x08 # flag bit: -(a7) <ea>
558 set fmovm_flg, 0x40 # flag bit: fmovm instruction
559 set immed_flg, 0x80 # flag bit: &<data> <ea>
560
561 set ftrapcc_bit, 0x0
562 set fbsun_bit, 0x1
563 set mia7_bit, 0x2
564 set mda7_bit, 0x3
565 set immed_bit, 0x7
566
567 ##################################
568 # TRANSCENDENTAL "LAST-OP" FLAGS #
569 ##################################
570 set FMUL_OP, 0x0 # fmul instr performed last
571 set FDIV_OP, 0x1 # fdiv performed last
572 set FADD_OP, 0x2 # fadd performed last
573 set FMOV_OP, 0x3 # fmov performed last
574
575 #############
576 # CONSTANTS #
577 #############
578 T1: long 0x40C62D38,0xD3D64634 # 16381 LOG2 LEAD
579 T2: long 0x3D6F90AE,0xB1E75CC7 # 16381 LOG2 TRAIL
580
581 PI: long 0x40000000,0xC90FDAA2,0x2168C235,0x00000000
582 PIBY2: long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
583
584 TWOBYPI:
585 long 0x3FE45F30,0x6DC9C883
586
587 #########################################################################
588 # XDEF **************************************************************** #
589 # _fpsp_ovfl(): 060FPSP entry point for FP Overflow exception. #
590 # #
591 # This handler should be the first code executed upon taking the #
592 # FP Overflow exception in an operating system. #
593 # #
594 # XREF **************************************************************** #
595 # _imem_read_long() - read instruction longword #
596 # fix_skewed_ops() - adjust src operand in fsave frame #
597 # set_tag_x() - determine optype of src/dst operands #
598 # store_fpreg() - store opclass 0 or 2 result to FP regfile #
599 # unnorm_fix() - change UNNORM operands to NORM or ZERO #
600 # load_fpn2() - load dst operand from FP regfile #
601 # fout() - emulate an opclass 3 instruction #
602 # tbl_unsupp - add of table of emulation routines for opclass 0,2 #
603 # _fpsp_done() - "callout" for 060FPSP exit (all work done!) #
604 # _real_ovfl() - "callout" for Overflow exception enabled code #
605 # _real_inex() - "callout" for Inexact exception enabled code #
606 # _real_trace() - "callout" for Trace exception code #
607 # #
608 # INPUT *************************************************************** #
609 # - The system stack contains the FP Ovfl exception stack frame #
610 # - The fsave frame contains the source operand #
611 # #
612 # OUTPUT ************************************************************** #
613 # Overflow Exception enabled: #
614 # - The system stack is unchanged #
615 # - The fsave frame contains the adjusted src op for opclass 0,2 #
616 # Overflow Exception disabled: #
617 # - The system stack is unchanged #
618 # - The "exception present" flag in the fsave frame is cleared #
619 # #
620 # ALGORITHM *********************************************************** #
621 # On the 060, if an FP overflow is present as the result of any #
622 # instruction, the 060 will take an overflow exception whether the #
623 # exception is enabled or disabled in the FPCR. For the disabled case, #
624 # This handler emulates the instruction to determine what the correct #
625 # default result should be for the operation. This default result is #
626 # then stored in either the FP regfile, data regfile, or memory. #
627 # Finally, the handler exits through the "callout" _fpsp_done() #
628 # denoting that no exceptional conditions exist within the machine. #
629 # If the exception is enabled, then this handler must create the #
630 # exceptional operand and plave it in the fsave state frame, and store #
631 # the default result (only if the instruction is opclass 3). For #
632 # exceptions enabled, this handler must exit through the "callout" #
633 # _real_ovfl() so that the operating system enabled overflow handler #
634 # can handle this case. #
635 # Two other conditions exist. First, if overflow was disabled #
636 # but the inexact exception was enabled, this handler must exit #
637 # through the "callout" _real_inex() regardless of whether the result #
638 # was inexact. #
639 # Also, in the case of an opclass three instruction where #
640 # overflow was disabled and the trace exception was enabled, this #
641 # handler must exit through the "callout" _real_trace(). #
642 # #
643 #########################################################################
644
645 global _fpsp_ovfl
646 _fpsp_ovfl:
647
648 #$# sub.l &24,%sp # make room for src/dst
649
650 link.w %a6,&-LOCAL_SIZE # init stack frame
651
652 fsave FP_SRC(%a6) # grab the "busy" frame
653
654 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
655 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
656 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
657
658 # the FPIAR holds the "current PC" of the faulting instruction
659 mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
660 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
661 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
662 bsr.l _imem_read_long # fetch the instruction words
663 mov.l %d0,EXC_OPWORD(%a6)
664
665 ##############################################################################
666
667 btst &0x5,EXC_CMDREG(%a6) # is instr an fmove out?
668 bne.w fovfl_out
669
670
671 lea FP_SRC(%a6),%a0 # pass: ptr to src op
672 bsr.l fix_skewed_ops # fix src op
673
674 # since, I believe, only NORMs and DENORMs can come through here,
675 # maybe we can avoid the subroutine call.
676 lea FP_SRC(%a6),%a0 # pass: ptr to src op
677 bsr.l set_tag_x # tag the operand type
678 mov.b %d0,STAG(%a6) # maybe NORM,DENORM
679
680 # bit five of the fp extension word separates the monadic and dyadic operations
681 # that can pass through fpsp_ovfl(). remember that fcmp, ftst, and fsincos
682 # will never take this exception.
683 btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic?
684 beq.b fovfl_extract # monadic
685
686 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
687 bsr.l load_fpn2 # load dst into FP_DST
688
689 lea FP_DST(%a6),%a0 # pass: ptr to dst op
690 bsr.l set_tag_x # tag the operand type
691 cmpi.b %d0,&UNNORM # is operand an UNNORM?
692 bne.b fovfl_op2_done # no
693 bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO
694 fovfl_op2_done:
695 mov.b %d0,DTAG(%a6) # save dst optype tag
696
697 fovfl_extract:
698
699 #$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
700 #$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
701 #$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
702 #$# mov.l FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6)
703 #$# mov.l FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6)
704 #$# mov.l FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6)
705
706 clr.l %d0
707 mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode
708
709 mov.b 1+EXC_CMDREG(%a6),%d1
710 andi.w &0x007f,%d1 # extract extension
711
712 andi.l &0x00ff01ff,USER_FPSR(%a6) # zero all but accured field
713
714 fmov.l &0x0,%fpcr # zero current control regs
715 fmov.l &0x0,%fpsr
716
717 lea FP_SRC(%a6),%a0
718 lea FP_DST(%a6),%a1
719
720 # maybe we can make these entry points ONLY the OVFL entry points of each routine.
721 mov.l (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
722 jsr (tbl_unsupp.l,%pc,%d1.l*1)
723
724 # the operation has been emulated. the result is in fp0.
725 # the EXOP, if an exception occurred, is in fp1.
726 # we must save the default result regardless of whether
727 # traps are enabled or disabled.
728 bfextu EXC_CMDREG(%a6){&6:&3},%d0
729 bsr.l store_fpreg
730
731 # the exceptional possibilities we have left ourselves with are ONLY overflow
732 # and inexact. and, the inexact is such that overflow occurred and was disabled
733 # but inexact was enabled.
734 btst &ovfl_bit,FPCR_ENABLE(%a6)
735 bne.b fovfl_ovfl_on
736
737 btst &inex2_bit,FPCR_ENABLE(%a6)
738 bne.b fovfl_inex_on
739
740 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
741 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
742 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
743
744 unlk %a6
745 #$# add.l &24,%sp
746 bra.l _fpsp_done
747
748 # overflow is enabled AND overflow, of course, occurred. so, we have the EXOP
749 # in fp1. now, simply jump to _real_ovfl()!
750 fovfl_ovfl_on:
751 fmovm.x &0x40,FP_SRC(%a6) # save EXOP (fp1) to stack
752
753 mov.w &0xe005,2+FP_SRC(%a6) # save exc status
754
755 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
756 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
757 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
758
759 frestore FP_SRC(%a6) # do this after fmovm,other f<op>s!
760
761 unlk %a6
762
763 bra.l _real_ovfl
764
765 # overflow occurred but is disabled. meanwhile, inexact is enabled. therefore,
766 # we must jump to real_inex().
767 fovfl_inex_on:
768
769 fmovm.x &0x40,FP_SRC(%a6) # save EXOP (fp1) to stack
770
771 mov.b &0xc4,1+EXC_VOFF(%a6) # vector offset = 0xc4
772 mov.w &0xe001,2+FP_SRC(%a6) # save exc status
773
774 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
775 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
776 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
777
778 frestore FP_SRC(%a6) # do this after fmovm,other f<op>s!
779
780 unlk %a6
781
782 bra.l _real_inex
783
784 ########################################################################
785 fovfl_out:
786
787
788 #$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
789 #$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
790 #$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
791
792 # the src operand is definitely a NORM(!), so tag it as such
793 mov.b &NORM,STAG(%a6) # set src optype tag
794
795 clr.l %d0
796 mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode
797
798 and.l &0xffff00ff,USER_FPSR(%a6) # zero all but accured field
799
800 fmov.l &0x0,%fpcr # zero current control regs
801 fmov.l &0x0,%fpsr
802
803 lea FP_SRC(%a6),%a0 # pass ptr to src operand
804
805 bsr.l fout
806
807 btst &ovfl_bit,FPCR_ENABLE(%a6)
808 bne.w fovfl_ovfl_on
809
810 btst &inex2_bit,FPCR_ENABLE(%a6)
811 bne.w fovfl_inex_on
812
813 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
814 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
815 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
816
817 unlk %a6
818 #$# add.l &24,%sp
819
820 btst &0x7,(%sp) # is trace on?
821 beq.l _fpsp_done # no
822
823 fmov.l %fpiar,0x8(%sp) # "Current PC" is in FPIAR
824 mov.w &0x2024,0x6(%sp) # stk fmt = 0x2; voff = 0x024
825 bra.l _real_trace
826
827 #########################################################################
828 # XDEF **************************************************************** #
829 # _fpsp_unfl(): 060FPSP entry point for FP Underflow exception. #
830 # #
831 # This handler should be the first code executed upon taking the #
832 # FP Underflow exception in an operating system. #
833 # #
834 # XREF **************************************************************** #
835 # _imem_read_long() - read instruction longword #
836 # fix_skewed_ops() - adjust src operand in fsave frame #
837 # set_tag_x() - determine optype of src/dst operands #
838 # store_fpreg() - store opclass 0 or 2 result to FP regfile #
839 # unnorm_fix() - change UNNORM operands to NORM or ZERO #
840 # load_fpn2() - load dst operand from FP regfile #
841 # fout() - emulate an opclass 3 instruction #
842 # tbl_unsupp - add of table of emulation routines for opclass 0,2 #
843 # _fpsp_done() - "callout" for 060FPSP exit (all work done!) #
844 # _real_ovfl() - "callout" for Overflow exception enabled code #
845 # _real_inex() - "callout" for Inexact exception enabled code #
846 # _real_trace() - "callout" for Trace exception code #
847 # #
848 # INPUT *************************************************************** #
849 # - The system stack contains the FP Unfl exception stack frame #
850 # - The fsave frame contains the source operand #
851 # #
852 # OUTPUT ************************************************************** #
853 # Underflow Exception enabled: #
854 # - The system stack is unchanged #
855 # - The fsave frame contains the adjusted src op for opclass 0,2 #
856 # Underflow Exception disabled: #
857 # - The system stack is unchanged #
858 # - The "exception present" flag in the fsave frame is cleared #
859 # #
860 # ALGORITHM *********************************************************** #
861 # On the 060, if an FP underflow is present as the result of any #
862 # instruction, the 060 will take an underflow exception whether the #
863 # exception is enabled or disabled in the FPCR. For the disabled case, #
864 # This handler emulates the instruction to determine what the correct #
865 # default result should be for the operation. This default result is #
866 # then stored in either the FP regfile, data regfile, or memory. #
867 # Finally, the handler exits through the "callout" _fpsp_done() #
868 # denoting that no exceptional conditions exist within the machine. #
869 # If the exception is enabled, then this handler must create the #
870 # exceptional operand and plave it in the fsave state frame, and store #
871 # the default result (only if the instruction is opclass 3). For #
872 # exceptions enabled, this handler must exit through the "callout" #
873 # _real_unfl() so that the operating system enabled overflow handler #
874 # can handle this case. #
875 # Two other conditions exist. First, if underflow was disabled #
876 # but the inexact exception was enabled and the result was inexact, #
877 # this handler must exit through the "callout" _real_inex(). #
878 # was inexact. #
879 # Also, in the case of an opclass three instruction where #
880 # underflow was disabled and the trace exception was enabled, this #
881 # handler must exit through the "callout" _real_trace(). #
882 # #
883 #########################################################################
884
885 global _fpsp_unfl
886 _fpsp_unfl:
887
888 #$# sub.l &24,%sp # make room for src/dst
889
890 link.w %a6,&-LOCAL_SIZE # init stack frame
891
892 fsave FP_SRC(%a6) # grab the "busy" frame
893
894 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
895 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
896 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
897
898 # the FPIAR holds the "current PC" of the faulting instruction
899 mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
900 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
901 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
902 bsr.l _imem_read_long # fetch the instruction words
903 mov.l %d0,EXC_OPWORD(%a6)
904
905 ##############################################################################
906
907 btst &0x5,EXC_CMDREG(%a6) # is instr an fmove out?
908 bne.w funfl_out
909
910
911 lea FP_SRC(%a6),%a0 # pass: ptr to src op
912 bsr.l fix_skewed_ops # fix src op
913
914 lea FP_SRC(%a6),%a0 # pass: ptr to src op
915 bsr.l set_tag_x # tag the operand type
916 mov.b %d0,STAG(%a6) # maybe NORM,DENORM
917
918 # bit five of the fp ext word separates the monadic and dyadic operations
919 # that can pass through fpsp_unfl(). remember that fcmp, and ftst
920 # will never take this exception.
921 btst &0x5,1+EXC_CMDREG(%a6) # is op monadic or dyadic?
922 beq.b funfl_extract # monadic
923
924 # now, what's left that's not dyadic is fsincos. we can distinguish it
925 # from all dyadics by the '0110xxx pattern
926 btst &0x4,1+EXC_CMDREG(%a6) # is op an fsincos?
927 bne.b funfl_extract # yes
928
929 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
930 bsr.l load_fpn2 # load dst into FP_DST
931
932 lea FP_DST(%a6),%a0 # pass: ptr to dst op
933 bsr.l set_tag_x # tag the operand type
934 cmpi.b %d0,&UNNORM # is operand an UNNORM?
935 bne.b funfl_op2_done # no
936 bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO
937 funfl_op2_done:
938 mov.b %d0,DTAG(%a6) # save dst optype tag
939
940 funfl_extract:
941
942 #$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
943 #$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
944 #$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
945 #$# mov.l FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6)
946 #$# mov.l FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6)
947 #$# mov.l FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6)
948
949 clr.l %d0
950 mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode
951
952 mov.b 1+EXC_CMDREG(%a6),%d1
953 andi.w &0x007f,%d1 # extract extension
954
955 andi.l &0x00ff01ff,USER_FPSR(%a6)
956
957 fmov.l &0x0,%fpcr # zero current control regs
958 fmov.l &0x0,%fpsr
959
960 lea FP_SRC(%a6),%a0
961 lea FP_DST(%a6),%a1
962
963 # maybe we can make these entry points ONLY the OVFL entry points of each routine.
964 mov.l (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
965 jsr (tbl_unsupp.l,%pc,%d1.l*1)
966
967 bfextu EXC_CMDREG(%a6){&6:&3},%d0
968 bsr.l store_fpreg
969
970 # The `060 FPU multiplier hardware is such that if the result of a
971 # multiply operation is the smallest possible normalized number
972 # (0x00000000_80000000_00000000), then the machine will take an
973 # underflow exception. Since this is incorrect, we need to check
974 # if our emulation, after re-doing the operation, decided that
975 # no underflow was called for. We do these checks only in
976 # funfl_{unfl,inex}_on() because w/ both exceptions disabled, this
977 # special case will simply exit gracefully with the correct result.
978
979 # the exceptional possibilities we have left ourselves with are ONLY overflow
980 # and inexact. and, the inexact is such that overflow occurred and was disabled
981 # but inexact was enabled.
982 btst &unfl_bit,FPCR_ENABLE(%a6)
983 bne.b funfl_unfl_on
984
985 funfl_chkinex:
986 btst &inex2_bit,FPCR_ENABLE(%a6)
987 bne.b funfl_inex_on
988
989 funfl_exit:
990 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
991 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
992 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
993
994 unlk %a6
995 #$# add.l &24,%sp
996 bra.l _fpsp_done
997
998 # overflow is enabled AND overflow, of course, occurred. so, we have the EXOP
999 # in fp1 (don't forget to save fp0). what to do now?
1000 # well, we simply have to get to go to _real_unfl()!
1001 funfl_unfl_on:
1002
1003 # The `060 FPU multiplier hardware is such that if the result of a
1004 # multiply operation is the smallest possible normalized number
1005 # (0x00000000_80000000_00000000), then the machine will take an
1006 # underflow exception. Since this is incorrect, we check here to see
1007 # if our emulation, after re-doing the operation, decided that
1008 # no underflow was called for.
1009 btst &unfl_bit,FPSR_EXCEPT(%a6)
1010 beq.w funfl_chkinex
1011
1012 funfl_unfl_on2:
1013 fmovm.x &0x40,FP_SRC(%a6) # save EXOP (fp1) to stack
1014
1015 mov.w &0xe003,2+FP_SRC(%a6) # save exc status
1016
1017 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
1018 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1019 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1020
1021 frestore FP_SRC(%a6) # do this after fmovm,other f<op>s!
1022
1023 unlk %a6
1024
1025 bra.l _real_unfl
1026
1027 # undeflow occurred but is disabled. meanwhile, inexact is enabled. therefore,
1028 # we must jump to real_inex().
1029 funfl_inex_on:
1030
1031 # The `060 FPU multiplier hardware is such that if the result of a
1032 # multiply operation is the smallest possible normalized number
1033 # (0x00000000_80000000_00000000), then the machine will take an
1034 # underflow exception.
1035 # But, whether bogus or not, if inexact is enabled AND it occurred,
1036 # then we have to branch to real_inex.
1037
1038 btst &inex2_bit,FPSR_EXCEPT(%a6)
1039 beq.w funfl_exit
1040
1041 funfl_inex_on2:
1042
1043 fmovm.x &0x40,FP_SRC(%a6) # save EXOP to stack
1044
1045 mov.b &0xc4,1+EXC_VOFF(%a6) # vector offset = 0xc4
1046 mov.w &0xe001,2+FP_SRC(%a6) # save exc status
1047
1048 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
1049 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1050 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1051
1052 frestore FP_SRC(%a6) # do this after fmovm,other f<op>s!
1053
1054 unlk %a6
1055
1056 bra.l _real_inex
1057
1058 #######################################################################
1059 funfl_out:
1060
1061
1062 #$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
1063 #$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
1064 #$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
1065
1066 # the src operand is definitely a NORM(!), so tag it as such
1067 mov.b &NORM,STAG(%a6) # set src optype tag
1068
1069 clr.l %d0
1070 mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode
1071
1072 and.l &0xffff00ff,USER_FPSR(%a6) # zero all but accured field
1073
1074 fmov.l &0x0,%fpcr # zero current control regs
1075 fmov.l &0x0,%fpsr
1076
1077 lea FP_SRC(%a6),%a0 # pass ptr to src operand
1078
1079 bsr.l fout
1080
1081 btst &unfl_bit,FPCR_ENABLE(%a6)
1082 bne.w funfl_unfl_on2
1083
1084 btst &inex2_bit,FPCR_ENABLE(%a6)
1085 bne.w funfl_inex_on2
1086
1087 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
1088 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1089 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1090
1091 unlk %a6
1092 #$# add.l &24,%sp
1093
1094 btst &0x7,(%sp) # is trace on?
1095 beq.l _fpsp_done # no
1096
1097 fmov.l %fpiar,0x8(%sp) # "Current PC" is in FPIAR
1098 mov.w &0x2024,0x6(%sp) # stk fmt = 0x2; voff = 0x024
1099 bra.l _real_trace
1100
1101 #########################################################################
1102 # XDEF **************************************************************** #
1103 # _fpsp_unsupp(): 060FPSP entry point for FP "Unimplemented #
1104 # Data Type" exception. #
1105 # #
1106 # This handler should be the first code executed upon taking the #
1107 # FP Unimplemented Data Type exception in an operating system. #
1108 # #
1109 # XREF **************************************************************** #
1110 # _imem_read_{word,long}() - read instruction word/longword #
1111 # fix_skewed_ops() - adjust src operand in fsave frame #
1112 # set_tag_x() - determine optype of src/dst operands #
1113 # store_fpreg() - store opclass 0 or 2 result to FP regfile #
1114 # unnorm_fix() - change UNNORM operands to NORM or ZERO #
1115 # load_fpn2() - load dst operand from FP regfile #
1116 # load_fpn1() - load src operand from FP regfile #
1117 # fout() - emulate an opclass 3 instruction #
1118 # tbl_unsupp - add of table of emulation routines for opclass 0,2 #
1119 # _real_inex() - "callout" to operating system inexact handler #
1120 # _fpsp_done() - "callout" for exit; work all done #
1121 # _real_trace() - "callout" for Trace enabled exception #
1122 # funimp_skew() - adjust fsave src ops to "incorrect" value #
1123 # _real_snan() - "callout" for SNAN exception #
1124 # _real_operr() - "callout" for OPERR exception #
1125 # _real_ovfl() - "callout" for OVFL exception #
1126 # _real_unfl() - "callout" for UNFL exception #
1127 # get_packed() - fetch packed operand from memory #
1128 # #
1129 # INPUT *************************************************************** #
1130 # - The system stack contains the "Unimp Data Type" stk frame #
1131 # - The fsave frame contains the ssrc op (for UNNORM/DENORM) #
1132 # #
1133 # OUTPUT ************************************************************** #
1134 # If Inexact exception (opclass 3): #
1135 # - The system stack is changed to an Inexact exception stk frame #
1136 # If SNAN exception (opclass 3): #
1137 # - The system stack is changed to an SNAN exception stk frame #
1138 # If OPERR exception (opclass 3): #
1139 # - The system stack is changed to an OPERR exception stk frame #
1140 # If OVFL exception (opclass 3): #
1141 # - The system stack is changed to an OVFL exception stk frame #
1142 # If UNFL exception (opclass 3): #
1143 # - The system stack is changed to an UNFL exception stack frame #
1144 # If Trace exception enabled: #
1145 # - The system stack is changed to a Trace exception stack frame #
1146 # Else: (normal case) #
1147 # - Correct result has been stored as appropriate #
1148 # #
1149 # ALGORITHM *********************************************************** #
1150 # Two main instruction types can enter here: (1) DENORM or UNNORM #
1151 # unimplemented data types. These can be either opclass 0,2 or 3 #
1152 # instructions, and (2) PACKED unimplemented data format instructions #
1153 # also of opclasses 0,2, or 3. #
1154 # For UNNORM/DENORM opclass 0 and 2, the handler fetches the src #
1155 # operand from the fsave state frame and the dst operand (if dyadic) #
1156 # from the FP register file. The instruction is then emulated by #
1157 # choosing an emulation routine from a table of routines indexed by #
1158 # instruction type. Once the instruction has been emulated and result #
1159 # saved, then we check to see if any enabled exceptions resulted from #
1160 # instruction emulation. If none, then we exit through the "callout" #
1161 # _fpsp_done(). If there is an enabled FP exception, then we insert #
1162 # this exception into the FPU in the fsave state frame and then exit #
1163 # through _fpsp_done(). #
1164 # PACKED opclass 0 and 2 is similar in how the instruction is #
1165 # emulated and exceptions handled. The differences occur in how the #
1166 # handler loads the packed op (by calling get_packed() routine) and #
1167 # by the fact that a Trace exception could be pending for PACKED ops. #
1168 # If a Trace exception is pending, then the current exception stack #
1169 # frame is changed to a Trace exception stack frame and an exit is #
1170 # made through _real_trace(). #
1171 # For UNNORM/DENORM opclass 3, the actual move out to memory is #
1172 # performed by calling the routine fout(). If no exception should occur #
1173 # as the result of emulation, then an exit either occurs through #
1174 # _fpsp_done() or through _real_trace() if a Trace exception is pending #
1175 # (a Trace stack frame must be created here, too). If an FP exception #
1176 # should occur, then we must create an exception stack frame of that #
1177 # type and jump to either _real_snan(), _real_operr(), _real_inex(), #
1178 # _real_unfl(), or _real_ovfl() as appropriate. PACKED opclass 3 #
1179 # emulation is performed in a similar manner. #
1180 # #
1181 #########################################################################
1182
1183 #
1184 # (1) DENORM and UNNORM (unimplemented) data types:
1185 #
1186 # post-instruction
1187 # *****************
1188 # * EA *
1189 # pre-instruction * *
1190 # ***************** *****************
1191 # * 0x0 * 0x0dc * * 0x3 * 0x0dc *
1192 # ***************** *****************
1193 # * Next * * Next *
1194 # * PC * * PC *
1195 # ***************** *****************
1196 # * SR * * SR *
1197 # ***************** *****************
1198 #
1199 # (2) PACKED format (unsupported) opclasses two and three:
1200 # *****************
1201 # * EA *
1202 # * *
1203 # *****************
1204 # * 0x2 * 0x0dc *
1205 # *****************
1206 # * Next *
1207 # * PC *
1208 # *****************
1209 # * SR *
1210 # *****************
1211 #
1212 global _fpsp_unsupp
1213 _fpsp_unsupp:
1214
1215 link.w %a6,&-LOCAL_SIZE # init stack frame
1216
1217 fsave FP_SRC(%a6) # save fp state
1218
1219 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
1220 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
1221 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
1222
1223 btst &0x5,EXC_SR(%a6) # user or supervisor mode?
1224 bne.b fu_s
1225 fu_u:
1226 mov.l %usp,%a0 # fetch user stack pointer
1227 mov.l %a0,EXC_A7(%a6) # save on stack
1228 bra.b fu_cont
1229 # if the exception is an opclass zero or two unimplemented data type
1230 # exception, then the a7' calculated here is wrong since it doesn't
1231 # stack an ea. however, we don't need an a7' for this case anyways.
1232 fu_s:
1233 lea 0x4+EXC_EA(%a6),%a0 # load old a7'
1234 mov.l %a0,EXC_A7(%a6) # save on stack
1235
1236 fu_cont:
1237
1238 # the FPIAR holds the "current PC" of the faulting instruction
1239 # the FPIAR should be set correctly for ALL exceptions passing through
1240 # this point.
1241 mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
1242 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
1243 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
1244 bsr.l _imem_read_long # fetch the instruction words
1245 mov.l %d0,EXC_OPWORD(%a6) # store OPWORD and EXTWORD
1246
1247 ############################
1248
1249 clr.b SPCOND_FLG(%a6) # clear special condition flag
1250
1251 # Separate opclass three (fpn-to-mem) ops since they have a different
1252 # stack frame and protocol.
1253 btst &0x5,EXC_CMDREG(%a6) # is it an fmove out?
1254 bne.w fu_out # yes
1255
1256 # Separate packed opclass two instructions.
1257 bfextu EXC_CMDREG(%a6){&0:&6},%d0
1258 cmpi.b %d0,&0x13
1259 beq.w fu_in_pack
1260
1261
1262 # I'm not sure at this point what FPSR bits are valid for this instruction.
1263 # so, since the emulation routines re-create them anyways, zero exception field
1264 andi.l &0x00ff00ff,USER_FPSR(%a6) # zero exception field
1265
1266 fmov.l &0x0,%fpcr # zero current control regs
1267 fmov.l &0x0,%fpsr
1268
1269 # Opclass two w/ memory-to-fpn operation will have an incorrect extended
1270 # precision format if the src format was single or double and the
1271 # source data type was an INF, NAN, DENORM, or UNNORM
1272 lea FP_SRC(%a6),%a0 # pass ptr to input
1273 bsr.l fix_skewed_ops
1274
1275 # we don't know whether the src operand or the dst operand (or both) is the
1276 # UNNORM or DENORM. call the function that tags the operand type. if the
1277 # input is an UNNORM, then convert it to a NORM, DENORM, or ZERO.
1278 lea FP_SRC(%a6),%a0 # pass: ptr to src op
1279 bsr.l set_tag_x # tag the operand type
1280 cmpi.b %d0,&UNNORM # is operand an UNNORM?
1281 bne.b fu_op2 # no
1282 bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO
1283
1284 fu_op2:
1285 mov.b %d0,STAG(%a6) # save src optype tag
1286
1287 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1288
1289 # bit five of the fp extension word separates the monadic and dyadic operations
1290 # at this point
1291 btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic?
1292 beq.b fu_extract # monadic
1293 cmpi.b 1+EXC_CMDREG(%a6),&0x3a # is operation an ftst?
1294 beq.b fu_extract # yes, so it's monadic, too
1295
1296 bsr.l load_fpn2 # load dst into FP_DST
1297
1298 lea FP_DST(%a6),%a0 # pass: ptr to dst op
1299 bsr.l set_tag_x # tag the operand type
1300 cmpi.b %d0,&UNNORM # is operand an UNNORM?
1301 bne.b fu_op2_done # no
1302 bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO
1303 fu_op2_done:
1304 mov.b %d0,DTAG(%a6) # save dst optype tag
1305
1306 fu_extract:
1307 clr.l %d0
1308 mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode/prec
1309
1310 bfextu 1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension
1311
1312 lea FP_SRC(%a6),%a0
1313 lea FP_DST(%a6),%a1
1314
1315 mov.l (tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr
1316 jsr (tbl_unsupp.l,%pc,%d1.l*1)
1317
1318 #
1319 # Exceptions in order of precedence:
1320 # BSUN : none
1321 # SNAN : all dyadic ops
1322 # OPERR : fsqrt(-NORM)
1323 # OVFL : all except ftst,fcmp
1324 # UNFL : all except ftst,fcmp
1325 # DZ : fdiv
1326 # INEX2 : all except ftst,fcmp
1327 # INEX1 : none (packed doesn't go through here)
1328 #
1329
1330 # we determine the highest priority exception(if any) set by the
1331 # emulation routine that has also been enabled by the user.
1332 mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions set
1333 bne.b fu_in_ena # some are enabled
1334
1335 fu_in_cont:
1336 # fcmp and ftst do not store any result.
1337 mov.b 1+EXC_CMDREG(%a6),%d0 # fetch extension
1338 andi.b &0x38,%d0 # extract bits 3-5
1339 cmpi.b %d0,&0x38 # is instr fcmp or ftst?
1340 beq.b fu_in_exit # yes
1341
1342 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1343 bsr.l store_fpreg # store the result
1344
1345 fu_in_exit:
1346
1347 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1348 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1349 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1350
1351 unlk %a6
1352
1353 bra.l _fpsp_done
1354
1355 fu_in_ena:
1356 and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled
1357 bfffo %d0{&24:&8},%d0 # find highest priority exception
1358 bne.b fu_in_exc # there is at least one set
1359
1360 #
1361 # No exceptions occurred that were also enabled. Now:
1362 #
1363 # if (OVFL && ovfl_disabled && inexact_enabled) {
1364 # branch to _real_inex() (even if the result was exact!);
1365 # } else {
1366 # save the result in the proper fp reg (unless the op is fcmp or ftst);
1367 # return;
1368 # }
1369 #
1370 btst &ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
1371 beq.b fu_in_cont # no
1372
1373 fu_in_ovflchk:
1374 btst &inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
1375 beq.b fu_in_cont # no
1376 bra.w fu_in_exc_ovfl # go insert overflow frame
1377
1378 #
1379 # An exception occurred and that exception was enabled:
1380 #
1381 # shift enabled exception field into lo byte of d0;
1382 # if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) ||
1383 # ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) {
1384 # /*
1385 # * this is the case where we must call _real_inex() now or else
1386 # * there will be no other way to pass it the exceptional operand
1387 # */
1388 # call _real_inex();
1389 # } else {
1390 # restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU;
1391 # }
1392 #
1393 fu_in_exc:
1394 subi.l &24,%d0 # fix offset to be 0-8
1395 cmpi.b %d0,&0x6 # is exception INEX? (6)
1396 bne.b fu_in_exc_exit # no
1397
1398 # the enabled exception was inexact
1399 btst &unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur?
1400 bne.w fu_in_exc_unfl # yes
1401 btst &ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur?
1402 bne.w fu_in_exc_ovfl # yes
1403
1404 # here, we insert the correct fsave status value into the fsave frame for the
1405 # corresponding exception. the operand in the fsave frame should be the original
1406 # src operand.
1407 fu_in_exc_exit:
1408 mov.l %d0,-(%sp) # save d0
1409 bsr.l funimp_skew # skew sgl or dbl inputs
1410 mov.l (%sp)+,%d0 # restore d0
1411
1412 mov.w (tbl_except.b,%pc,%d0.w*2),2+FP_SRC(%a6) # create exc status
1413
1414 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1415 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1416 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1417
1418 frestore FP_SRC(%a6) # restore src op
1419
1420 unlk %a6
1421
1422 bra.l _fpsp_done
1423
1424 tbl_except:
1425 short 0xe000,0xe006,0xe004,0xe005
1426 short 0xe003,0xe002,0xe001,0xe001
1427
1428 fu_in_exc_unfl:
1429 mov.w &0x4,%d0
1430 bra.b fu_in_exc_exit
1431 fu_in_exc_ovfl:
1432 mov.w &0x03,%d0
1433 bra.b fu_in_exc_exit
1434
1435 # If the input operand to this operation was opclass two and a single
1436 # or double precision denorm, inf, or nan, the operand needs to be
1437 # "corrected" in order to have the proper equivalent extended precision
1438 # number.
1439 global fix_skewed_ops
1440 fix_skewed_ops:
1441 bfextu EXC_CMDREG(%a6){&0:&6},%d0 # extract opclass,src fmt
1442 cmpi.b %d0,&0x11 # is class = 2 & fmt = sgl?
1443 beq.b fso_sgl # yes
1444 cmpi.b %d0,&0x15 # is class = 2 & fmt = dbl?
1445 beq.b fso_dbl # yes
1446 rts # no
1447
1448 fso_sgl:
1449 mov.w LOCAL_EX(%a0),%d0 # fetch src exponent
1450 andi.w &0x7fff,%d0 # strip sign
1451 cmpi.w %d0,&0x3f80 # is |exp| == $3f80?
1452 beq.b fso_sgl_dnrm_zero # yes
1453 cmpi.w %d0,&0x407f # no; is |exp| == $407f?
1454 beq.b fso_infnan # yes
1455 rts # no
1456
1457 fso_sgl_dnrm_zero:
1458 andi.l &0x7fffffff,LOCAL_HI(%a0) # clear j-bit
1459 beq.b fso_zero # it's a skewed zero
1460 fso_sgl_dnrm:
1461 # here, we count on norm not to alter a0...
1462 bsr.l norm # normalize mantissa
1463 neg.w %d0 # -shft amt
1464 addi.w &0x3f81,%d0 # adjust new exponent
1465 andi.w &0x8000,LOCAL_EX(%a0) # clear old exponent
1466 or.w %d0,LOCAL_EX(%a0) # insert new exponent
1467 rts
1468
1469 fso_zero:
1470 andi.w &0x8000,LOCAL_EX(%a0) # clear bogus exponent
1471 rts
1472
1473 fso_infnan:
1474 andi.b &0x7f,LOCAL_HI(%a0) # clear j-bit
1475 ori.w &0x7fff,LOCAL_EX(%a0) # make exponent = $7fff
1476 rts
1477
1478 fso_dbl:
1479 mov.w LOCAL_EX(%a0),%d0 # fetch src exponent
1480 andi.w &0x7fff,%d0 # strip sign
1481 cmpi.w %d0,&0x3c00 # is |exp| == $3c00?
1482 beq.b fso_dbl_dnrm_zero # yes
1483 cmpi.w %d0,&0x43ff # no; is |exp| == $43ff?
1484 beq.b fso_infnan # yes
1485 rts # no
1486
1487 fso_dbl_dnrm_zero:
1488 andi.l &0x7fffffff,LOCAL_HI(%a0) # clear j-bit
1489 bne.b fso_dbl_dnrm # it's a skewed denorm
1490 tst.l LOCAL_LO(%a0) # is it a zero?
1491 beq.b fso_zero # yes
1492 fso_dbl_dnrm:
1493 # here, we count on norm not to alter a0...
1494 bsr.l norm # normalize mantissa
1495 neg.w %d0 # -shft amt
1496 addi.w &0x3c01,%d0 # adjust new exponent
1497 andi.w &0x8000,LOCAL_EX(%a0) # clear old exponent
1498 or.w %d0,LOCAL_EX(%a0) # insert new exponent
1499 rts
1500
1501 #################################################################
1502
1503 # fmove out took an unimplemented data type exception.
1504 # the src operand is in FP_SRC. Call _fout() to write out the result and
1505 # to determine which exceptions, if any, to take.
1506 fu_out:
1507
1508 # Separate packed move outs from the UNNORM and DENORM move outs.
1509 bfextu EXC_CMDREG(%a6){&3:&3},%d0
1510 cmpi.b %d0,&0x3
1511 beq.w fu_out_pack
1512 cmpi.b %d0,&0x7
1513 beq.w fu_out_pack
1514
1515
1516 # I'm not sure at this point what FPSR bits are valid for this instruction.
1517 # so, since the emulation routines re-create them anyways, zero exception field.
1518 # fmove out doesn't affect ccodes.
1519 and.l &0xffff00ff,USER_FPSR(%a6) # zero exception field
1520
1521 fmov.l &0x0,%fpcr # zero current control regs
1522 fmov.l &0x0,%fpsr
1523
1524 # the src can ONLY be a DENORM or an UNNORM! so, don't make any big subroutine
1525 # call here. just figure out what it is...
1526 mov.w FP_SRC_EX(%a6),%d0 # get exponent
1527 andi.w &0x7fff,%d0 # strip sign
1528 beq.b fu_out_denorm # it's a DENORM
1529
1530 lea FP_SRC(%a6),%a0
1531 bsr.l unnorm_fix # yes; fix it
1532
1533 mov.b %d0,STAG(%a6)
1534
1535 bra.b fu_out_cont
1536 fu_out_denorm:
1537 mov.b &DENORM,STAG(%a6)
1538 fu_out_cont:
1539
1540 clr.l %d0
1541 mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode/prec
1542
1543 lea FP_SRC(%a6),%a0 # pass ptr to src operand
1544
1545 mov.l (%a6),EXC_A6(%a6) # in case a6 changes
1546 bsr.l fout # call fmove out routine
1547
1548 # Exceptions in order of precedence:
1549 # BSUN : none
1550 # SNAN : none
1551 # OPERR : fmove.{b,w,l} out of large UNNORM
1552 # OVFL : fmove.{s,d}
1553 # UNFL : fmove.{s,d,x}
1554 # DZ : none
1555 # INEX2 : all
1556 # INEX1 : none (packed doesn't travel through here)
1557
1558 # determine the highest priority exception(if any) set by the
1559 # emulation routine that has also been enabled by the user.
1560 mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled
1561 bne.w fu_out_ena # some are enabled
1562
1563 fu_out_done:
1564
1565 mov.l EXC_A6(%a6),(%a6) # in case a6 changed
1566
1567 # on extended precision opclass three instructions using pre-decrement or
1568 # post-increment addressing mode, the address register is not updated. is the
1569 # address register was the stack pointer used from user mode, then let's update
1570 # it here. if it was used from supervisor mode, then we have to handle this
1571 # as a special case.
1572 btst &0x5,EXC_SR(%a6)
1573 bne.b fu_out_done_s
1574
1575 mov.l EXC_A7(%a6),%a0 # restore a7
1576 mov.l %a0,%usp
1577
1578 fu_out_done_cont:
1579 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1580 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1581 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1582
1583 unlk %a6
1584
1585 btst &0x7,(%sp) # is trace on?
1586 bne.b fu_out_trace # yes
1587
1588 bra.l _fpsp_done
1589
1590 # is the ea mode pre-decrement of the stack pointer from supervisor mode?
1591 # ("fmov.x fpm,-(a7)") if so,
1592 fu_out_done_s:
1593 cmpi.b SPCOND_FLG(%a6),&mda7_flg
1594 bne.b fu_out_done_cont
1595
1596 # the extended precision result is still in fp0. but, we need to save it
1597 # somewhere on the stack until we can copy it to its final resting place.
1598 # here, we're counting on the top of the stack to be the old place-holders
1599 # for fp0/fp1 which have already been restored. that way, we can write
1600 # over those destinations with the shifted stack frame.
1601 fmovm.x &0x80,FP_SRC(%a6) # put answer on stack
1602
1603 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1604 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1605 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1606
1607 mov.l (%a6),%a6 # restore frame pointer
1608
1609 mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
1610 mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
1611
1612 # now, copy the result to the proper place on the stack
1613 mov.l LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
1614 mov.l LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
1615 mov.l LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
1616
1617 add.l &LOCAL_SIZE-0x8,%sp
1618
1619 btst &0x7,(%sp)
1620 bne.b fu_out_trace
1621
1622 bra.l _fpsp_done
1623
1624 fu_out_ena:
1625 and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled
1626 bfffo %d0{&24:&8},%d0 # find highest priority exception
1627 bne.b fu_out_exc # there is at least one set
1628
1629 # no exceptions were set.
1630 # if a disabled overflow occurred and inexact was enabled but the result
1631 # was exact, then a branch to _real_inex() is made.
1632 btst &ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
1633 beq.w fu_out_done # no
1634
1635 fu_out_ovflchk:
1636 btst &inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
1637 beq.w fu_out_done # no
1638 bra.w fu_inex # yes
1639
1640 #
1641 # The fp move out that took the "Unimplemented Data Type" exception was
1642 # being traced. Since the stack frames are similar, get the "current" PC
1643 # from FPIAR and put it in the trace stack frame then jump to _real_trace().
1644 #
1645 # UNSUPP FRAME TRACE FRAME
1646 # ***************** *****************
1647 # * EA * * Current *
1648 # * * * PC *
1649 # ***************** *****************
1650 # * 0x3 * 0x0dc * * 0x2 * 0x024 *
1651 # ***************** *****************
1652 # * Next * * Next *
1653 # * PC * * PC *
1654 # ***************** *****************
1655 # * SR * * SR *
1656 # ***************** *****************
1657 #
1658 fu_out_trace:
1659 mov.w &0x2024,0x6(%sp)
1660 fmov.l %fpiar,0x8(%sp)
1661 bra.l _real_trace
1662
1663 # an exception occurred and that exception was enabled.
1664 fu_out_exc:
1665 subi.l &24,%d0 # fix offset to be 0-8
1666
1667 # we don't mess with the existing fsave frame. just re-insert it and
1668 # jump to the "_real_{}()" handler...
1669 mov.w (tbl_fu_out.b,%pc,%d0.w*2),%d0
1670 jmp (tbl_fu_out.b,%pc,%d0.w*1)
1671
1672 swbeg &0x8
1673 tbl_fu_out:
1674 short tbl_fu_out - tbl_fu_out # BSUN can't happen
1675 short tbl_fu_out - tbl_fu_out # SNAN can't happen
1676 short fu_operr - tbl_fu_out # OPERR
1677 short fu_ovfl - tbl_fu_out # OVFL
1678 short fu_unfl - tbl_fu_out # UNFL
1679 short tbl_fu_out - tbl_fu_out # DZ can't happen
1680 short fu_inex - tbl_fu_out # INEX2
1681 short tbl_fu_out - tbl_fu_out # INEX1 won't make it here
1682
1683 # for snan,operr,ovfl,unfl, src op is still in FP_SRC so just
1684 # frestore it.
1685 fu_snan:
1686 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1687 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1688 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1689
1690 mov.w &0x30d8,EXC_VOFF(%a6) # vector offset = 0xd8
1691 mov.w &0xe006,2+FP_SRC(%a6)
1692
1693 frestore FP_SRC(%a6)
1694
1695 unlk %a6
1696
1697
1698 bra.l _real_snan
1699
1700 fu_operr:
1701 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1702 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1703 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1704
1705 mov.w &0x30d0,EXC_VOFF(%a6) # vector offset = 0xd0
1706 mov.w &0xe004,2+FP_SRC(%a6)
1707
1708 frestore FP_SRC(%a6)
1709
1710 unlk %a6
1711
1712
1713 bra.l _real_operr
1714
1715 fu_ovfl:
1716 fmovm.x &0x40,FP_SRC(%a6) # save EXOP to the stack
1717
1718 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1719 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1720 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1721
1722 mov.w &0x30d4,EXC_VOFF(%a6) # vector offset = 0xd4
1723 mov.w &0xe005,2+FP_SRC(%a6)
1724
1725 frestore FP_SRC(%a6) # restore EXOP
1726
1727 unlk %a6
1728
1729 bra.l _real_ovfl
1730
1731 # underflow can happen for extended precision. extended precision opclass
1732 # three instruction exceptions don't update the stack pointer. so, if the
1733 # exception occurred from user mode, then simply update a7 and exit normally.
1734 # if the exception occurred from supervisor mode, check if
1735 fu_unfl:
1736 mov.l EXC_A6(%a6),(%a6) # restore a6
1737
1738 btst &0x5,EXC_SR(%a6)
1739 bne.w fu_unfl_s
1740
1741 mov.l EXC_A7(%a6),%a0 # restore a7 whether we need
1742 mov.l %a0,%usp # to or not...
1743
1744 fu_unfl_cont:
1745 fmovm.x &0x40,FP_SRC(%a6) # save EXOP to the stack
1746
1747 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1748 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1749 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1750
1751 mov.w &0x30cc,EXC_VOFF(%a6) # vector offset = 0xcc
1752 mov.w &0xe003,2+FP_SRC(%a6)
1753
1754 frestore FP_SRC(%a6) # restore EXOP
1755
1756 unlk %a6
1757
1758 bra.l _real_unfl
1759
1760 fu_unfl_s:
1761 cmpi.b SPCOND_FLG(%a6),&mda7_flg # was the <ea> mode -(sp)?
1762 bne.b fu_unfl_cont
1763
1764 # the extended precision result is still in fp0. but, we need to save it
1765 # somewhere on the stack until we can copy it to its final resting place
1766 # (where the exc frame is currently). make sure it's not at the top of the
1767 # frame or it will get overwritten when the exc stack frame is shifted "down".
1768 fmovm.x &0x80,FP_SRC(%a6) # put answer on stack
1769 fmovm.x &0x40,FP_DST(%a6) # put EXOP on stack
1770
1771 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1772 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1773 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1774
1775 mov.w &0x30cc,EXC_VOFF(%a6) # vector offset = 0xcc
1776 mov.w &0xe003,2+FP_DST(%a6)
1777
1778 frestore FP_DST(%a6) # restore EXOP
1779
1780 mov.l (%a6),%a6 # restore frame pointer
1781
1782 mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
1783 mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
1784 mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
1785
1786 # now, copy the result to the proper place on the stack
1787 mov.l LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
1788 mov.l LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
1789 mov.l LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
1790
1791 add.l &LOCAL_SIZE-0x8,%sp
1792
1793 bra.l _real_unfl
1794
1795 # fmove in and out enter here.
1796 fu_inex:
1797 fmovm.x &0x40,FP_SRC(%a6) # save EXOP to the stack
1798
1799 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1800 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1801 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1802
1803 mov.w &0x30c4,EXC_VOFF(%a6) # vector offset = 0xc4
1804 mov.w &0xe001,2+FP_SRC(%a6)
1805
1806 frestore FP_SRC(%a6) # restore EXOP
1807
1808 unlk %a6
1809
1810
1811 bra.l _real_inex
1812
1813 #########################################################################
1814 #########################################################################
1815 fu_in_pack:
1816
1817
1818 # I'm not sure at this point what FPSR bits are valid for this instruction.
1819 # so, since the emulation routines re-create them anyways, zero exception field
1820 andi.l &0x0ff00ff,USER_FPSR(%a6) # zero exception field
1821
1822 fmov.l &0x0,%fpcr # zero current control regs
1823 fmov.l &0x0,%fpsr
1824
1825 bsr.l get_packed # fetch packed src operand
1826
1827 lea FP_SRC(%a6),%a0 # pass ptr to src
1828 bsr.l set_tag_x # set src optype tag
1829
1830 mov.b %d0,STAG(%a6) # save src optype tag
1831
1832 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1833
1834 # bit five of the fp extension word separates the monadic and dyadic operations
1835 # at this point
1836 btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic?
1837 beq.b fu_extract_p # monadic
1838 cmpi.b 1+EXC_CMDREG(%a6),&0x3a # is operation an ftst?
1839 beq.b fu_extract_p # yes, so it's monadic, too
1840
1841 bsr.l load_fpn2 # load dst into FP_DST
1842
1843 lea FP_DST(%a6),%a0 # pass: ptr to dst op
1844 bsr.l set_tag_x # tag the operand type
1845 cmpi.b %d0,&UNNORM # is operand an UNNORM?
1846 bne.b fu_op2_done_p # no
1847 bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO
1848 fu_op2_done_p:
1849 mov.b %d0,DTAG(%a6) # save dst optype tag
1850
1851 fu_extract_p:
1852 clr.l %d0
1853 mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode/prec
1854
1855 bfextu 1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension
1856
1857 lea FP_SRC(%a6),%a0
1858 lea FP_DST(%a6),%a1
1859
1860 mov.l (tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr
1861 jsr (tbl_unsupp.l,%pc,%d1.l*1)
1862
1863 #
1864 # Exceptions in order of precedence:
1865 # BSUN : none
1866 # SNAN : all dyadic ops
1867 # OPERR : fsqrt(-NORM)
1868 # OVFL : all except ftst,fcmp
1869 # UNFL : all except ftst,fcmp
1870 # DZ : fdiv
1871 # INEX2 : all except ftst,fcmp
1872 # INEX1 : all
1873 #
1874
1875 # we determine the highest priority exception(if any) set by the
1876 # emulation routine that has also been enabled by the user.
1877 mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled
1878 bne.w fu_in_ena_p # some are enabled
1879
1880 fu_in_cont_p:
1881 # fcmp and ftst do not store any result.
1882 mov.b 1+EXC_CMDREG(%a6),%d0 # fetch extension
1883 andi.b &0x38,%d0 # extract bits 3-5
1884 cmpi.b %d0,&0x38 # is instr fcmp or ftst?
1885 beq.b fu_in_exit_p # yes
1886
1887 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1888 bsr.l store_fpreg # store the result
1889
1890 fu_in_exit_p:
1891
1892 btst &0x5,EXC_SR(%a6) # user or supervisor?
1893 bne.w fu_in_exit_s_p # supervisor
1894
1895 mov.l EXC_A7(%a6),%a0 # update user a7
1896 mov.l %a0,%usp
1897
1898 fu_in_exit_cont_p:
1899 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1900 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1901 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1902
1903 unlk %a6 # unravel stack frame
1904
1905 btst &0x7,(%sp) # is trace on?
1906 bne.w fu_trace_p # yes
1907
1908 bra.l _fpsp_done # exit to os
1909
1910 # the exception occurred in supervisor mode. check to see if the
1911 # addressing mode was (a7)+. if so, we'll need to shift the
1912 # stack frame "up".
1913 fu_in_exit_s_p:
1914 btst &mia7_bit,SPCOND_FLG(%a6) # was ea mode (a7)+
1915 beq.b fu_in_exit_cont_p # no
1916
1917 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1918 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1919 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1920
1921 unlk %a6 # unravel stack frame
1922
1923 # shift the stack frame "up". we don't really care about the <ea> field.
1924 mov.l 0x4(%sp),0x10(%sp)
1925 mov.l 0x0(%sp),0xc(%sp)
1926 add.l &0xc,%sp
1927
1928 btst &0x7,(%sp) # is trace on?
1929 bne.w fu_trace_p # yes
1930
1931 bra.l _fpsp_done # exit to os
1932
1933 fu_in_ena_p:
1934 and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled & set
1935 bfffo %d0{&24:&8},%d0 # find highest priority exception
1936 bne.b fu_in_exc_p # at least one was set
1937
1938 #
1939 # No exceptions occurred that were also enabled. Now:
1940 #
1941 # if (OVFL && ovfl_disabled && inexact_enabled) {
1942 # branch to _real_inex() (even if the result was exact!);
1943 # } else {
1944 # save the result in the proper fp reg (unless the op is fcmp or ftst);
1945 # return;
1946 # }
1947 #
1948 btst &ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
1949 beq.w fu_in_cont_p # no
1950
1951 fu_in_ovflchk_p:
1952 btst &inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
1953 beq.w fu_in_cont_p # no
1954 bra.w fu_in_exc_ovfl_p # do _real_inex() now
1955
1956 #
1957 # An exception occurred and that exception was enabled:
1958 #
1959 # shift enabled exception field into lo byte of d0;
1960 # if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) ||
1961 # ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) {
1962 # /*
1963 # * this is the case where we must call _real_inex() now or else
1964 # * there will be no other way to pass it the exceptional operand
1965 # */
1966 # call _real_inex();
1967 # } else {
1968 # restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU;
1969 # }
1970 #
1971 fu_in_exc_p:
1972 subi.l &24,%d0 # fix offset to be 0-8
1973 cmpi.b %d0,&0x6 # is exception INEX? (6 or 7)
1974 blt.b fu_in_exc_exit_p # no
1975
1976 # the enabled exception was inexact
1977 btst &unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur?
1978 bne.w fu_in_exc_unfl_p # yes
1979 btst &ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur?
1980 bne.w fu_in_exc_ovfl_p # yes
1981
1982 # here, we insert the correct fsave status value into the fsave frame for the
1983 # corresponding exception. the operand in the fsave frame should be the original
1984 # src operand.
1985 # as a reminder for future predicted pain and agony, we are passing in fsave the
1986 # "non-skewed" operand for cases of sgl and dbl src INFs,NANs, and DENORMs.
1987 # this is INCORRECT for enabled SNAN which would give to the user the skewed SNAN!!!
1988 fu_in_exc_exit_p:
1989 btst &0x5,EXC_SR(%a6) # user or supervisor?
1990 bne.w fu_in_exc_exit_s_p # supervisor
1991
1992 mov.l EXC_A7(%a6),%a0 # update user a7
1993 mov.l %a0,%usp
1994
1995 fu_in_exc_exit_cont_p:
1996 mov.w (tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6)
1997
1998 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1999 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2000 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2001
2002 frestore FP_SRC(%a6) # restore src op
2003
2004 unlk %a6
2005
2006 btst &0x7,(%sp) # is trace enabled?
2007 bne.w fu_trace_p # yes
2008
2009 bra.l _fpsp_done
2010
2011 tbl_except_p:
2012 short 0xe000,0xe006,0xe004,0xe005
2013 short 0xe003,0xe002,0xe001,0xe001
2014
2015 fu_in_exc_ovfl_p:
2016 mov.w &0x3,%d0
2017 bra.w fu_in_exc_exit_p
2018
2019 fu_in_exc_unfl_p:
2020 mov.w &0x4,%d0
2021 bra.w fu_in_exc_exit_p
2022
2023 fu_in_exc_exit_s_p:
2024 btst &mia7_bit,SPCOND_FLG(%a6)
2025 beq.b fu_in_exc_exit_cont_p
2026
2027 mov.w (tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6)
2028
2029 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
2030 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2031 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2032
2033 frestore FP_SRC(%a6) # restore src op
2034
2035 unlk %a6 # unravel stack frame
2036
2037 # shift stack frame "up". who cares about <ea> field.
2038 mov.l 0x4(%sp),0x10(%sp)
2039 mov.l 0x0(%sp),0xc(%sp)
2040 add.l &0xc,%sp
2041
2042 btst &0x7,(%sp) # is trace on?
2043 bne.b fu_trace_p # yes
2044
2045 bra.l _fpsp_done # exit to os
2046
2047 #
2048 # The opclass two PACKED instruction that took an "Unimplemented Data Type"
2049 # exception was being traced. Make the "current" PC the FPIAR and put it in the
2050 # trace stack frame then jump to _real_trace().
2051 #
2052 # UNSUPP FRAME TRACE FRAME
2053 # ***************** *****************
2054 # * EA * * Current *
2055 # * * * PC *
2056 # ***************** *****************
2057 # * 0x2 * 0x0dc * * 0x2 * 0x024 *
2058 # ***************** *****************
2059 # * Next * * Next *
2060 # * PC * * PC *
2061 # ***************** *****************
2062 # * SR * * SR *
2063 # ***************** *****************
2064 fu_trace_p:
2065 mov.w &0x2024,0x6(%sp)
2066 fmov.l %fpiar,0x8(%sp)
2067
2068 bra.l _real_trace
2069
2070 #########################################################
2071 #########################################################
2072 fu_out_pack:
2073
2074
2075 # I'm not sure at this point what FPSR bits are valid for this instruction.
2076 # so, since the emulation routines re-create them anyways, zero exception field.
2077 # fmove out doesn't affect ccodes.
2078 and.l &0xffff00ff,USER_FPSR(%a6) # zero exception field
2079
2080 fmov.l &0x0,%fpcr # zero current control regs
2081 fmov.l &0x0,%fpsr
2082
2083 bfextu EXC_CMDREG(%a6){&6:&3},%d0
2084 bsr.l load_fpn1
2085
2086 # unlike other opclass 3, unimplemented data type exceptions, packed must be
2087 # able to detect all operand types.
2088 lea FP_SRC(%a6),%a0
2089 bsr.l set_tag_x # tag the operand type
2090 cmpi.b %d0,&UNNORM # is operand an UNNORM?
2091 bne.b fu_op2_p # no
2092 bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO
2093
2094 fu_op2_p:
2095 mov.b %d0,STAG(%a6) # save src optype tag
2096
2097 clr.l %d0
2098 mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode/prec
2099
2100 lea FP_SRC(%a6),%a0 # pass ptr to src operand
2101
2102 mov.l (%a6),EXC_A6(%a6) # in case a6 changes
2103 bsr.l fout # call fmove out routine
2104
2105 # Exceptions in order of precedence:
2106 # BSUN : no
2107 # SNAN : yes
2108 # OPERR : if ((k_factor > +17) || (dec. exp exceeds 3 digits))
2109 # OVFL : no
2110 # UNFL : no
2111 # DZ : no
2112 # INEX2 : yes
2113 # INEX1 : no
2114
2115 # determine the highest priority exception(if any) set by the
2116 # emulation routine that has also been enabled by the user.
2117 mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled
2118 bne.w fu_out_ena_p # some are enabled
2119
2120 fu_out_exit_p:
2121 mov.l EXC_A6(%a6),(%a6) # restore a6
2122
2123 btst &0x5,EXC_SR(%a6) # user or supervisor?
2124 bne.b fu_out_exit_s_p # supervisor
2125
2126 mov.l EXC_A7(%a6),%a0 # update user a7
2127 mov.l %a0,%usp
2128
2129 fu_out_exit_cont_p:
2130 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
2131 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2132 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2133
2134 unlk %a6 # unravel stack frame
2135
2136 btst &0x7,(%sp) # is trace on?
2137 bne.w fu_trace_p # yes
2138
2139 bra.l _fpsp_done # exit to os
2140
2141 # the exception occurred in supervisor mode. check to see if the
2142 # addressing mode was -(a7). if so, we'll need to shift the
2143 # stack frame "down".
2144 fu_out_exit_s_p:
2145 btst &mda7_bit,SPCOND_FLG(%a6) # was ea mode -(a7)
2146 beq.b fu_out_exit_cont_p # no
2147
2148 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
2149 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2150 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2151
2152 mov.l (%a6),%a6 # restore frame pointer
2153
2154 mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2155 mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2156
2157 # now, copy the result to the proper place on the stack
2158 mov.l LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
2159 mov.l LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
2160 mov.l LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
2161
2162 add.l &LOCAL_SIZE-0x8,%sp
2163
2164 btst &0x7,(%sp)
2165 bne.w fu_trace_p
2166
2167 bra.l _fpsp_done
2168
2169 fu_out_ena_p:
2170 and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled
2171 bfffo %d0{&24:&8},%d0 # find highest priority exception
2172 beq.w fu_out_exit_p
2173
2174 mov.l EXC_A6(%a6),(%a6) # restore a6
2175
2176 # an exception occurred and that exception was enabled.
2177 # the only exception possible on packed move out are INEX, OPERR, and SNAN.
2178 fu_out_exc_p:
2179 cmpi.b %d0,&0x1a
2180 bgt.w fu_inex_p2
2181 beq.w fu_operr_p
2182
2183 fu_snan_p:
2184 btst &0x5,EXC_SR(%a6)
2185 bne.b fu_snan_s_p
2186
2187 mov.l EXC_A7(%a6),%a0
2188 mov.l %a0,%usp
2189 bra.w fu_snan
2190
2191 fu_snan_s_p:
2192 cmpi.b SPCOND_FLG(%a6),&mda7_flg
2193 bne.w fu_snan
2194
2195 # the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
2196 # the strategy is to move the exception frame "down" 12 bytes. then, we
2197 # can store the default result where the exception frame was.
2198 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
2199 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2200 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2201
2202 mov.w &0x30d8,EXC_VOFF(%a6) # vector offset = 0xd0
2203 mov.w &0xe006,2+FP_SRC(%a6) # set fsave status
2204
2205 frestore FP_SRC(%a6) # restore src operand
2206
2207 mov.l (%a6),%a6 # restore frame pointer
2208
2209 mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2210 mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2211 mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
2212
2213 # now, we copy the default result to it's proper location
2214 mov.l LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
2215 mov.l LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
2216 mov.l LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
2217
2218 add.l &LOCAL_SIZE-0x8,%sp
2219
2220
2221 bra.l _real_snan
2222
2223 fu_operr_p:
2224 btst &0x5,EXC_SR(%a6)
2225 bne.w fu_operr_p_s
2226
2227 mov.l EXC_A7(%a6),%a0
2228 mov.l %a0,%usp
2229 bra.w fu_operr
2230
2231 fu_operr_p_s:
2232 cmpi.b SPCOND_FLG(%a6),&mda7_flg
2233 bne.w fu_operr
2234
2235 # the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
2236 # the strategy is to move the exception frame "down" 12 bytes. then, we
2237 # can store the default result where the exception frame was.
2238 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
2239 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2240 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2241
2242 mov.w &0x30d0,EXC_VOFF(%a6) # vector offset = 0xd0
2243 mov.w &0xe004,2+FP_SRC(%a6) # set fsave status
2244
2245 frestore FP_SRC(%a6) # restore src operand
2246
2247 mov.l (%a6),%a6 # restore frame pointer
2248
2249 mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2250 mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2251 mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
2252
2253 # now, we copy the default result to it's proper location
2254 mov.l LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
2255 mov.l LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
2256 mov.l LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
2257
2258 add.l &LOCAL_SIZE-0x8,%sp
2259
2260
2261 bra.l _real_operr
2262
2263 fu_inex_p2:
2264 btst &0x5,EXC_SR(%a6)
2265 bne.w fu_inex_s_p2
2266
2267 mov.l EXC_A7(%a6),%a0
2268 mov.l %a0,%usp
2269 bra.w fu_inex
2270
2271 fu_inex_s_p2:
2272 cmpi.b SPCOND_FLG(%a6),&mda7_flg
2273 bne.w fu_inex
2274
2275 # the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
2276 # the strategy is to move the exception frame "down" 12 bytes. then, we
2277 # can store the default result where the exception frame was.
2278 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
2279 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2280 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2281
2282 mov.w &0x30c4,EXC_VOFF(%a6) # vector offset = 0xc4
2283 mov.w &0xe001,2+FP_SRC(%a6) # set fsave status
2284
2285 frestore FP_SRC(%a6) # restore src operand
2286
2287 mov.l (%a6),%a6 # restore frame pointer
2288
2289 mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2290 mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2291 mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
2292
2293 # now, we copy the default result to it's proper location
2294 mov.l LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
2295 mov.l LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
2296 mov.l LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
2297
2298 add.l &LOCAL_SIZE-0x8,%sp
2299
2300
2301 bra.l _real_inex
2302
2303 #########################################################################
2304
2305 #
2306 # if we're stuffing a source operand back into an fsave frame then we
2307 # have to make sure that for single or double source operands that the
2308 # format stuffed is as weird as the hardware usually makes it.
2309 #
2310 global funimp_skew
2311 funimp_skew:
2312 bfextu EXC_EXTWORD(%a6){&3:&3},%d0 # extract src specifier
2313 cmpi.b %d0,&0x1 # was src sgl?
2314 beq.b funimp_skew_sgl # yes
2315 cmpi.b %d0,&0x5 # was src dbl?
2316 beq.b funimp_skew_dbl # yes
2317 rts
2318
2319 funimp_skew_sgl:
2320 mov.w FP_SRC_EX(%a6),%d0 # fetch DENORM exponent
2321 andi.w &0x7fff,%d0 # strip sign
2322 beq.b funimp_skew_sgl_not
2323 cmpi.w %d0,&0x3f80
2324 bgt.b funimp_skew_sgl_not
2325 neg.w %d0 # make exponent negative
2326 addi.w &0x3f81,%d0 # find amt to shift
2327 mov.l FP_SRC_HI(%a6),%d1 # fetch DENORM hi(man)
2328 lsr.l %d0,%d1 # shift it
2329 bset &31,%d1 # set j-bit
2330 mov.l %d1,FP_SRC_HI(%a6) # insert new hi(man)
2331 andi.w &0x8000,FP_SRC_EX(%a6) # clear old exponent
2332 ori.w &0x3f80,FP_SRC_EX(%a6) # insert new "skewed" exponent
2333 funimp_skew_sgl_not:
2334 rts
2335
2336 funimp_skew_dbl:
2337 mov.w FP_SRC_EX(%a6),%d0 # fetch DENORM exponent
2338 andi.w &0x7fff,%d0 # strip sign
2339 beq.b funimp_skew_dbl_not
2340 cmpi.w %d0,&0x3c00
2341 bgt.b funimp_skew_dbl_not
2342
2343 tst.b FP_SRC_EX(%a6) # make "internal format"
2344 smi.b 0x2+FP_SRC(%a6)
2345 mov.w %d0,FP_SRC_EX(%a6) # insert exponent with cleared sign
2346 clr.l %d0 # clear g,r,s
2347 lea FP_SRC(%a6),%a0 # pass ptr to src op
2348 mov.w &0x3c01,%d1 # pass denorm threshold
2349 bsr.l dnrm_lp # denorm it
2350 mov.w &0x3c00,%d0 # new exponent
2351 tst.b 0x2+FP_SRC(%a6) # is sign set?
2352 beq.b fss_dbl_denorm_done # no
2353 bset &15,%d0 # set sign
2354 fss_dbl_denorm_done:
2355 bset &0x7,FP_SRC_HI(%a6) # set j-bit
2356 mov.w %d0,FP_SRC_EX(%a6) # insert new exponent
2357 funimp_skew_dbl_not:
2358 rts
2359
2360 #########################################################################
2361 global _mem_write2
2362 _mem_write2:
2363 btst &0x5,EXC_SR(%a6)
2364 beq.l _dmem_write
2365 mov.l 0x0(%a0),FP_DST_EX(%a6)
2366 mov.l 0x4(%a0),FP_DST_HI(%a6)
2367 mov.l 0x8(%a0),FP_DST_LO(%a6)
2368 clr.l %d1
2369 rts
2370
2371 #########################################################################
2372 # XDEF **************************************************************** #
2373 # _fpsp_effadd(): 060FPSP entry point for FP "Unimplemented #
2374 # effective address" exception. #
2375 # #
2376 # This handler should be the first code executed upon taking the #
2377 # FP Unimplemented Effective Address exception in an operating #
2378 # system. #
2379 # #
2380 # XREF **************************************************************** #
2381 # _imem_read_long() - read instruction longword #
2382 # fix_skewed_ops() - adjust src operand in fsave frame #
2383 # set_tag_x() - determine optype of src/dst operands #
2384 # store_fpreg() - store opclass 0 or 2 result to FP regfile #
2385 # unnorm_fix() - change UNNORM operands to NORM or ZERO #
2386 # load_fpn2() - load dst operand from FP regfile #
2387 # tbl_unsupp - add of table of emulation routines for opclass 0,2 #
2388 # decbin() - convert packed data to FP binary data #
2389 # _real_fpu_disabled() - "callout" for "FPU disabled" exception #
2390 # _real_access() - "callout" for access error exception #
2391 # _mem_read() - read extended immediate operand from memory #
2392 # _fpsp_done() - "callout" for exit; work all done #
2393 # _real_trace() - "callout" for Trace enabled exception #
2394 # fmovm_dynamic() - emulate dynamic fmovm instruction #
2395 # fmovm_ctrl() - emulate fmovm control instruction #
2396 # #
2397 # INPUT *************************************************************** #
2398 # - The system stack contains the "Unimplemented <ea>" stk frame #
2399 # #
2400 # OUTPUT ************************************************************** #
2401 # If access error: #
2402 # - The system stack is changed to an access error stack frame #
2403 # If FPU disabled: #
2404 # - The system stack is changed to an FPU disabled stack frame #
2405 # If Trace exception enabled: #
2406 # - The system stack is changed to a Trace exception stack frame #
2407 # Else: (normal case) #
2408 # - None (correct result has been stored as appropriate) #
2409 # #
2410 # ALGORITHM *********************************************************** #
2411 # This exception handles 3 types of operations: #
2412 # (1) FP Instructions using extended precision or packed immediate #
2413 # addressing mode. #
2414 # (2) The "fmovm.x" instruction w/ dynamic register specification. #
2415 # (3) The "fmovm.l" instruction w/ 2 or 3 control registers. #
2416 # #
2417 # For immediate data operations, the data is read in w/ a #
2418 # _mem_read() "callout", converted to FP binary (if packed), and used #
2419 # as the source operand to the instruction specified by the instruction #
2420 # word. If no FP exception should be reported ads a result of the #
2421 # emulation, then the result is stored to the destination register and #
2422 # the handler exits through _fpsp_done(). If an enabled exc has been #
2423 # signalled as a result of emulation, then an fsave state frame #
2424 # corresponding to the FP exception type must be entered into the 060 #
2425 # FPU before exiting. In either the enabled or disabled cases, we #
2426 # must also check if a Trace exception is pending, in which case, we #
2427 # must create a Trace exception stack frame from the current exception #
2428 # stack frame. If no Trace is pending, we simply exit through #
2429 # _fpsp_done(). #
2430 # For "fmovm.x", call the routine fmovm_dynamic() which will #
2431 # decode and emulate the instruction. No FP exceptions can be pending #
2432 # as a result of this operation emulation. A Trace exception can be #
2433 # pending, though, which means the current stack frame must be changed #
2434 # to a Trace stack frame and an exit made through _real_trace(). #
2435 # For the case of "fmovm.x Dn,-(a7)", where the offending instruction #
2436 # was executed from supervisor mode, this handler must store the FP #
2437 # register file values to the system stack by itself since #
2438 # fmovm_dynamic() can't handle this. A normal exit is made through #
2439 # fpsp_done(). #
2440 # For "fmovm.l", fmovm_ctrl() is used to emulate the instruction. #
2441 # Again, a Trace exception may be pending and an exit made through #
2442 # _real_trace(). Else, a normal exit is made through _fpsp_done(). #
2443 # #
2444 # Before any of the above is attempted, it must be checked to #
2445 # see if the FPU is disabled. Since the "Unimp <ea>" exception is taken #
2446 # before the "FPU disabled" exception, but the "FPU disabled" exception #
2447 # has higher priority, we check the disabled bit in the PCR. If set, #
2448 # then we must create an 8 word "FPU disabled" exception stack frame #
2449 # from the current 4 word exception stack frame. This includes #
2450 # reproducing the effective address of the instruction to put on the #
2451 # new stack frame. #
2452 # #
2453 # In the process of all emulation work, if a _mem_read() #
2454 # "callout" returns a failing result indicating an access error, then #
2455 # we must create an access error stack frame from the current stack #
2456 # frame. This information includes a faulting address and a fault- #
2457 # status-longword. These are created within this handler. #
2458 # #
2459 #########################################################################
2460
2461 global _fpsp_effadd
2462 _fpsp_effadd:
2463
2464 # This exception type takes priority over the "Line F Emulator"
2465 # exception. Therefore, the FPU could be disabled when entering here.
2466 # So, we must check to see if it's disabled and handle that case separately.
2467 mov.l %d0,-(%sp) # save d0
2468 movc %pcr,%d0 # load proc cr
2469 btst &0x1,%d0 # is FPU disabled?
2470 bne.w iea_disabled # yes
2471 mov.l (%sp)+,%d0 # restore d0
2472
2473 link %a6,&-LOCAL_SIZE # init stack frame
2474
2475 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2476 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
2477 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
2478
2479 # PC of instruction that took the exception is the PC in the frame
2480 mov.l EXC_PC(%a6),EXC_EXTWPTR(%a6)
2481
2482 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
2483 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
2484 bsr.l _imem_read_long # fetch the instruction words
2485 mov.l %d0,EXC_OPWORD(%a6) # store OPWORD and EXTWORD
2486
2487 #########################################################################
2488
2489 tst.w %d0 # is operation fmovem?
2490 bmi.w iea_fmovm # yes
2491
2492 #
2493 # here, we will have:
2494 # fabs fdabs fsabs facos fmod
2495 # fadd fdadd fsadd fasin frem
2496 # fcmp fatan fscale
2497 # fdiv fddiv fsdiv fatanh fsin
2498 # fint fcos fsincos
2499 # fintrz fcosh fsinh
2500 # fmove fdmove fsmove fetox ftan
2501 # fmul fdmul fsmul fetoxm1 ftanh
2502 # fneg fdneg fsneg fgetexp ftentox
2503 # fsgldiv fgetman ftwotox
2504 # fsglmul flog10
2505 # fsqrt flog2
2506 # fsub fdsub fssub flogn
2507 # ftst flognp1
2508 # which can all use f<op>.{x,p}
2509 # so, now it's immediate data extended precision AND PACKED FORMAT!
2510 #
2511 iea_op:
2512 andi.l &0x00ff00ff,USER_FPSR(%a6)
2513
2514 btst &0xa,%d0 # is src fmt x or p?
2515 bne.b iea_op_pack # packed
2516
2517
2518 mov.l EXC_EXTWPTR(%a6),%a0 # pass: ptr to #<data>
2519 lea FP_SRC(%a6),%a1 # pass: ptr to super addr
2520 mov.l &0xc,%d0 # pass: 12 bytes
2521 bsr.l _imem_read # read extended immediate
2522
2523 tst.l %d1 # did ifetch fail?
2524 bne.w iea_iacc # yes
2525
2526 bra.b iea_op_setsrc
2527
2528 iea_op_pack:
2529
2530 mov.l EXC_EXTWPTR(%a6),%a0 # pass: ptr to #<data>
2531 lea FP_SRC(%a6),%a1 # pass: ptr to super dst
2532 mov.l &0xc,%d0 # pass: 12 bytes
2533 bsr.l _imem_read # read packed operand
2534
2535 tst.l %d1 # did ifetch fail?
2536 bne.w iea_iacc # yes
2537
2538 # The packed operand is an INF or a NAN if the exponent field is all ones.
2539 bfextu FP_SRC(%a6){&1:&15},%d0 # get exp
2540 cmpi.w %d0,&0x7fff # INF or NAN?
2541 beq.b iea_op_setsrc # operand is an INF or NAN
2542
2543 # The packed operand is a zero if the mantissa is all zero, else it's
2544 # a normal packed op.
2545 mov.b 3+FP_SRC(%a6),%d0 # get byte 4
2546 andi.b &0x0f,%d0 # clear all but last nybble
2547 bne.b iea_op_gp_not_spec # not a zero
2548 tst.l FP_SRC_HI(%a6) # is lw 2 zero?
2549 bne.b iea_op_gp_not_spec # not a zero
2550 tst.l FP_SRC_LO(%a6) # is lw 3 zero?
2551 beq.b iea_op_setsrc # operand is a ZERO
2552 iea_op_gp_not_spec:
2553 lea FP_SRC(%a6),%a0 # pass: ptr to packed op
2554 bsr.l decbin # convert to extended
2555 fmovm.x &0x80,FP_SRC(%a6) # make this the srcop
2556
2557 iea_op_setsrc:
2558 addi.l &0xc,EXC_EXTWPTR(%a6) # update extension word pointer
2559
2560 # FP_SRC now holds the src operand.
2561 lea FP_SRC(%a6),%a0 # pass: ptr to src op
2562 bsr.l set_tag_x # tag the operand type
2563 mov.b %d0,STAG(%a6) # could be ANYTHING!!!
2564 cmpi.b %d0,&UNNORM # is operand an UNNORM?
2565 bne.b iea_op_getdst # no
2566 bsr.l unnorm_fix # yes; convert to NORM/DENORM/ZERO
2567 mov.b %d0,STAG(%a6) # set new optype tag
2568 iea_op_getdst:
2569 clr.b STORE_FLG(%a6) # clear "store result" boolean
2570
2571 btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic?
2572 beq.b iea_op_extract # monadic
2573 btst &0x4,1+EXC_CMDREG(%a6) # is operation fsincos,ftst,fcmp?
2574 bne.b iea_op_spec # yes
2575
2576 iea_op_loaddst:
2577 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno
2578 bsr.l load_fpn2 # load dst operand
2579
2580 lea FP_DST(%a6),%a0 # pass: ptr to dst op
2581 bsr.l set_tag_x # tag the operand type
2582 mov.b %d0,DTAG(%a6) # could be ANYTHING!!!
2583 cmpi.b %d0,&UNNORM # is operand an UNNORM?
2584 bne.b iea_op_extract # no
2585 bsr.l unnorm_fix # yes; convert to NORM/DENORM/ZERO
2586 mov.b %d0,DTAG(%a6) # set new optype tag
2587 bra.b iea_op_extract
2588
2589 # the operation is fsincos, ftst, or fcmp. only fcmp is dyadic
2590 iea_op_spec:
2591 btst &0x3,1+EXC_CMDREG(%a6) # is operation fsincos?
2592 beq.b iea_op_extract # yes
2593 # now, we're left with ftst and fcmp. so, first let's tag them so that they don't
2594 # store a result. then, only fcmp will branch back and pick up a dst operand.
2595 st STORE_FLG(%a6) # don't store a final result
2596 btst &0x1,1+EXC_CMDREG(%a6) # is operation fcmp?
2597 beq.b iea_op_loaddst # yes
2598
2599 iea_op_extract:
2600 clr.l %d0
2601 mov.b FPCR_MODE(%a6),%d0 # pass: rnd mode,prec
2602
2603 mov.b 1+EXC_CMDREG(%a6),%d1
2604 andi.w &0x007f,%d1 # extract extension
2605
2606 fmov.l &0x0,%fpcr
2607 fmov.l &0x0,%fpsr
2608
2609 lea FP_SRC(%a6),%a0
2610 lea FP_DST(%a6),%a1
2611
2612 mov.l (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
2613 jsr (tbl_unsupp.l,%pc,%d1.l*1)
2614
2615 #
2616 # Exceptions in order of precedence:
2617 # BSUN : none
2618 # SNAN : all operations
2619 # OPERR : all reg-reg or mem-reg operations that can normally operr
2620 # OVFL : same as OPERR
2621 # UNFL : same as OPERR
2622 # DZ : same as OPERR
2623 # INEX2 : same as OPERR
2624 # INEX1 : all packed immediate operations
2625 #
2626
2627 # we determine the highest priority exception(if any) set by the
2628 # emulation routine that has also been enabled by the user.
2629 mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled
2630 bne.b iea_op_ena # some are enabled
2631
2632 # now, we save the result, unless, of course, the operation was ftst or fcmp.
2633 # these don't save results.
2634 iea_op_save:
2635 tst.b STORE_FLG(%a6) # does this op store a result?
2636 bne.b iea_op_exit1 # exit with no frestore
2637
2638 iea_op_store:
2639 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno
2640 bsr.l store_fpreg # store the result
2641
2642 iea_op_exit1:
2643 mov.l EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC"
2644 mov.l EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame
2645
2646 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
2647 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2648 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2649
2650 unlk %a6 # unravel the frame
2651
2652 btst &0x7,(%sp) # is trace on?
2653 bne.w iea_op_trace # yes
2654
2655 bra.l _fpsp_done # exit to os
2656
2657 iea_op_ena:
2658 and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enable and set
2659 bfffo %d0{&24:&8},%d0 # find highest priority exception
2660 bne.b iea_op_exc # at least one was set
2661
2662 # no exception occurred. now, did a disabled, exact overflow occur with inexact
2663 # enabled? if so, then we have to stuff an overflow frame into the FPU.
2664 btst &ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
2665 beq.b iea_op_save
2666
2667 iea_op_ovfl:
2668 btst &inex2_bit,FPCR_ENABLE(%a6) # is inexact enabled?
2669 beq.b iea_op_store # no
2670 bra.b iea_op_exc_ovfl # yes
2671
2672 # an enabled exception occurred. we have to insert the exception type back into
2673 # the machine.
2674 iea_op_exc:
2675 subi.l &24,%d0 # fix offset to be 0-8
2676 cmpi.b %d0,&0x6 # is exception INEX?
2677 bne.b iea_op_exc_force # no
2678
2679 # the enabled exception was inexact. so, if it occurs with an overflow
2680 # or underflow that was disabled, then we have to force an overflow or
2681 # underflow frame.
2682 btst &ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
2683 bne.b iea_op_exc_ovfl # yes
2684 btst &unfl_bit,FPSR_EXCEPT(%a6) # did underflow occur?
2685 bne.b iea_op_exc_unfl # yes
2686
2687 iea_op_exc_force:
2688 mov.w (tbl_iea_except.b,%pc,%d0.w*2),2+FP_SRC(%a6)
2689 bra.b iea_op_exit2 # exit with frestore
2690
2691 tbl_iea_except:
2692 short 0xe002, 0xe006, 0xe004, 0xe005
2693 short 0xe003, 0xe002, 0xe001, 0xe001
2694
2695 iea_op_exc_ovfl:
2696 mov.w &0xe005,2+FP_SRC(%a6)
2697 bra.b iea_op_exit2
2698
2699 iea_op_exc_unfl:
2700 mov.w &0xe003,2+FP_SRC(%a6)
2701
2702 iea_op_exit2:
2703 mov.l EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC"
2704 mov.l EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame
2705
2706 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
2707 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2708 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2709
2710 frestore FP_SRC(%a6) # restore exceptional state
2711
2712 unlk %a6 # unravel the frame
2713
2714 btst &0x7,(%sp) # is trace on?
2715 bne.b iea_op_trace # yes
2716
2717 bra.l _fpsp_done # exit to os
2718
2719 #
2720 # The opclass two instruction that took an "Unimplemented Effective Address"
2721 # exception was being traced. Make the "current" PC the FPIAR and put it in
2722 # the trace stack frame then jump to _real_trace().
2723 #
2724 # UNIMP EA FRAME TRACE FRAME
2725 # ***************** *****************
2726 # * 0x0 * 0x0f0 * * Current *
2727 # ***************** * PC *
2728 # * Current * *****************
2729 # * PC * * 0x2 * 0x024 *
2730 # ***************** *****************
2731 # * SR * * Next *
2732 # ***************** * PC *
2733 # *****************
2734 # * SR *
2735 # *****************
2736 iea_op_trace:
2737 mov.l (%sp),-(%sp) # shift stack frame "down"
2738 mov.w 0x8(%sp),0x4(%sp)
2739 mov.w &0x2024,0x6(%sp) # stk fmt = 0x2; voff = 0x024
2740 fmov.l %fpiar,0x8(%sp) # "Current PC" is in FPIAR
2741
2742 bra.l _real_trace
2743
2744 #########################################################################
2745 iea_fmovm:
2746 btst &14,%d0 # ctrl or data reg
2747 beq.w iea_fmovm_ctrl
2748
2749 iea_fmovm_data:
2750
2751 btst &0x5,EXC_SR(%a6) # user or supervisor mode
2752 bne.b iea_fmovm_data_s
2753
2754 iea_fmovm_data_u:
2755 mov.l %usp,%a0
2756 mov.l %a0,EXC_A7(%a6) # store current a7
2757 bsr.l fmovm_dynamic # do dynamic fmovm
2758 mov.l EXC_A7(%a6),%a0 # load possibly new a7
2759 mov.l %a0,%usp # update usp
2760 bra.w iea_fmovm_exit
2761
2762 iea_fmovm_data_s:
2763 clr.b SPCOND_FLG(%a6)
2764 lea 0x2+EXC_VOFF(%a6),%a0
2765 mov.l %a0,EXC_A7(%a6)
2766 bsr.l fmovm_dynamic # do dynamic fmovm
2767
2768 cmpi.b SPCOND_FLG(%a6),&mda7_flg
2769 beq.w iea_fmovm_data_predec
2770 cmpi.b SPCOND_FLG(%a6),&mia7_flg
2771 bne.w iea_fmovm_exit
2772
2773 # right now, d0 = the size.
2774 # the data has been fetched from the supervisor stack, but we have not
2775 # incremented the stack pointer by the appropriate number of bytes.
2776 # do it here.
2777 iea_fmovm_data_postinc:
2778 btst &0x7,EXC_SR(%a6)
2779 bne.b iea_fmovm_data_pi_trace
2780
2781 mov.w EXC_SR(%a6),(EXC_SR,%a6,%d0)
2782 mov.l EXC_EXTWPTR(%a6),(EXC_PC,%a6,%d0)
2783 mov.w &0x00f0,(EXC_VOFF,%a6,%d0)
2784
2785 lea (EXC_SR,%a6,%d0),%a0
2786 mov.l %a0,EXC_SR(%a6)
2787
2788 fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1
2789 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2790 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2791
2792 unlk %a6
2793 mov.l (%sp)+,%sp
2794 bra.l _fpsp_done
2795
2796 iea_fmovm_data_pi_trace:
2797 mov.w EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0)
2798 mov.l EXC_EXTWPTR(%a6),(EXC_PC-0x4,%a6,%d0)
2799 mov.w &0x2024,(EXC_VOFF-0x4,%a6,%d0)
2800 mov.l EXC_PC(%a6),(EXC_VOFF+0x2-0x4,%a6,%d0)
2801
2802 lea (EXC_SR-0x4,%a6,%d0),%a0
2803 mov.l %a0,EXC_SR(%a6)
2804
2805 fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1
2806 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2807 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2808
2809 unlk %a6
2810 mov.l (%sp)+,%sp
2811 bra.l _real_trace
2812
2813 # right now, d1 = size and d0 = the strg.
2814 iea_fmovm_data_predec:
2815 mov.b %d1,EXC_VOFF(%a6) # store strg
2816 mov.b %d0,0x1+EXC_VOFF(%a6) # store size
2817
2818 fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1
2819 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2820 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2821
2822 mov.l (%a6),-(%sp) # make a copy of a6
2823 mov.l %d0,-(%sp) # save d0
2824 mov.l %d1,-(%sp) # save d1
2825 mov.l EXC_EXTWPTR(%a6),-(%sp) # make a copy of Next PC
2826
2827 clr.l %d0
2828 mov.b 0x1+EXC_VOFF(%a6),%d0 # fetch size
2829 neg.l %d0 # get negative of size
2830
2831 btst &0x7,EXC_SR(%a6) # is trace enabled?
2832 beq.b iea_fmovm_data_p2
2833
2834 mov.w EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0)
2835 mov.l EXC_PC(%a6),(EXC_VOFF-0x2,%a6,%d0)
2836 mov.l (%sp)+,(EXC_PC-0x4,%a6,%d0)
2837 mov.w &0x2024,(EXC_VOFF-0x4,%a6,%d0)
2838
2839 pea (%a6,%d0) # create final sp
2840 bra.b iea_fmovm_data_p3
2841
2842 iea_fmovm_data_p2:
2843 mov.w EXC_SR(%a6),(EXC_SR,%a6,%d0)
2844 mov.l (%sp)+,(EXC_PC,%a6,%d0)
2845 mov.w &0x00f0,(EXC_VOFF,%a6,%d0)
2846
2847 pea (0x4,%a6,%d0) # create final sp
2848
2849 iea_fmovm_data_p3:
2850 clr.l %d1
2851 mov.b EXC_VOFF(%a6),%d1 # fetch strg
2852
2853 tst.b %d1
2854 bpl.b fm_1
2855 fmovm.x &0x80,(0x4+0x8,%a6,%d0)
2856 addi.l &0xc,%d0
2857 fm_1:
2858 lsl.b &0x1,%d1
2859 bpl.b fm_2
2860 fmovm.x &0x40,(0x4+0x8,%a6,%d0)
2861 addi.l &0xc,%d0
2862 fm_2:
2863 lsl.b &0x1,%d1
2864 bpl.b fm_3
2865 fmovm.x &0x20,(0x4+0x8,%a6,%d0)
2866 addi.l &0xc,%d0
2867 fm_3:
2868 lsl.b &0x1,%d1
2869 bpl.b fm_4
2870 fmovm.x &0x10,(0x4+0x8,%a6,%d0)
2871 addi.l &0xc,%d0
2872 fm_4:
2873 lsl.b &0x1,%d1
2874 bpl.b fm_5
2875 fmovm.x &0x08,(0x4+0x8,%a6,%d0)
2876 addi.l &0xc,%d0
2877 fm_5:
2878 lsl.b &0x1,%d1
2879 bpl.b fm_6
2880 fmovm.x &0x04,(0x4+0x8,%a6,%d0)
2881 addi.l &0xc,%d0
2882 fm_6:
2883 lsl.b &0x1,%d1
2884 bpl.b fm_7
2885 fmovm.x &0x02,(0x4+0x8,%a6,%d0)
2886 addi.l &0xc,%d0
2887 fm_7:
2888 lsl.b &0x1,%d1
2889 bpl.b fm_end
2890 fmovm.x &0x01,(0x4+0x8,%a6,%d0)
2891 fm_end:
2892 mov.l 0x4(%sp),%d1
2893 mov.l 0x8(%sp),%d0
2894 mov.l 0xc(%sp),%a6
2895 mov.l (%sp)+,%sp
2896
2897 btst &0x7,(%sp) # is trace enabled?
2898 beq.l _fpsp_done
2899 bra.l _real_trace
2900
2901 #########################################################################
2902 iea_fmovm_ctrl:
2903
2904 bsr.l fmovm_ctrl # load ctrl regs
2905
2906 iea_fmovm_exit:
2907 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
2908 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2909 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2910
2911 btst &0x7,EXC_SR(%a6) # is trace on?
2912 bne.b iea_fmovm_trace # yes
2913
2914 mov.l EXC_EXTWPTR(%a6),EXC_PC(%a6) # set Next PC
2915
2916 unlk %a6 # unravel the frame
2917
2918 bra.l _fpsp_done # exit to os
2919
2920 #
2921 # The control reg instruction that took an "Unimplemented Effective Address"
2922 # exception was being traced. The "Current PC" for the trace frame is the
2923 # PC stacked for Unimp EA. The "Next PC" is in EXC_EXTWPTR.
2924 # After fixing the stack frame, jump to _real_trace().
2925 #
2926 # UNIMP EA FRAME TRACE FRAME
2927 # ***************** *****************
2928 # * 0x0 * 0x0f0 * * Current *
2929 # ***************** * PC *
2930 # * Current * *****************
2931 # * PC * * 0x2 * 0x024 *
2932 # ***************** *****************
2933 # * SR * * Next *
2934 # ***************** * PC *
2935 # *****************
2936 # * SR *
2937 # *****************
2938 # this ain't a pretty solution, but it works:
2939 # -restore a6 (not with unlk)
2940 # -shift stack frame down over where old a6 used to be
2941 # -add LOCAL_SIZE to stack pointer
2942 iea_fmovm_trace:
2943 mov.l (%a6),%a6 # restore frame pointer
2944 mov.w EXC_SR+LOCAL_SIZE(%sp),0x0+LOCAL_SIZE(%sp)
2945 mov.l EXC_PC+LOCAL_SIZE(%sp),0x8+LOCAL_SIZE(%sp)
2946 mov.l EXC_EXTWPTR+LOCAL_SIZE(%sp),0x2+LOCAL_SIZE(%sp)
2947 mov.w &0x2024,0x6+LOCAL_SIZE(%sp) # stk fmt = 0x2; voff = 0x024
2948 add.l &LOCAL_SIZE,%sp # clear stack frame
2949
2950 bra.l _real_trace
2951
2952 #########################################################################
2953 # The FPU is disabled and so we should really have taken the "Line
2954 # F Emulator" exception. So, here we create an 8-word stack frame
2955 # from our 4-word stack frame. This means we must calculate the length
2956 # of the faulting instruction to get the "next PC". This is trivial for
2957 # immediate operands but requires some extra work for fmovm dynamic
2958 # which can use most addressing modes.
2959 iea_disabled:
2960 mov.l (%sp)+,%d0 # restore d0
2961
2962 link %a6,&-LOCAL_SIZE # init stack frame
2963
2964 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2965
2966 # PC of instruction that took the exception is the PC in the frame
2967 mov.l EXC_PC(%a6),EXC_EXTWPTR(%a6)
2968 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
2969 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
2970 bsr.l _imem_read_long # fetch the instruction words
2971 mov.l %d0,EXC_OPWORD(%a6) # store OPWORD and EXTWORD
2972
2973 tst.w %d0 # is instr fmovm?
2974 bmi.b iea_dis_fmovm # yes
2975 # instruction is using an extended precision immediate operand. therefore,
2976 # the total instruction length is 16 bytes.
2977 iea_dis_immed:
2978 mov.l &0x10,%d0 # 16 bytes of instruction
2979 bra.b iea_dis_cont
2980 iea_dis_fmovm:
2981 btst &0xe,%d0 # is instr fmovm ctrl
2982 bne.b iea_dis_fmovm_data # no
2983 # the instruction is a fmovm.l with 2 or 3 registers.
2984 bfextu %d0{&19:&3},%d1
2985 mov.l &0xc,%d0
2986 cmpi.b %d1,&0x7 # move all regs?
2987 bne.b iea_dis_cont
2988 addq.l &0x4,%d0
2989 bra.b iea_dis_cont
2990 # the instruction is an fmovm.x dynamic which can use many addressing
2991 # modes and thus can have several different total instruction lengths.
2992 # call fmovm_calc_ea which will go through the ea calc process and,
2993 # as a by-product, will tell us how long the instruction is.
2994 iea_dis_fmovm_data:
2995 clr.l %d0
2996 bsr.l fmovm_calc_ea
2997 mov.l EXC_EXTWPTR(%a6),%d0
2998 sub.l EXC_PC(%a6),%d0
2999 iea_dis_cont:
3000 mov.w %d0,EXC_VOFF(%a6) # store stack shift value
3001
3002 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3003
3004 unlk %a6
3005
3006 # here, we actually create the 8-word frame from the 4-word frame,
3007 # with the "next PC" as additional info.
3008 # the <ea> field is let as undefined.
3009 subq.l &0x8,%sp # make room for new stack
3010 mov.l %d0,-(%sp) # save d0
3011 mov.w 0xc(%sp),0x4(%sp) # move SR
3012 mov.l 0xe(%sp),0x6(%sp) # move Current PC
3013 clr.l %d0
3014 mov.w 0x12(%sp),%d0
3015 mov.l 0x6(%sp),0x10(%sp) # move Current PC
3016 add.l %d0,0x6(%sp) # make Next PC
3017 mov.w &0x402c,0xa(%sp) # insert offset,frame format
3018 mov.l (%sp)+,%d0 # restore d0
3019
3020 bra.l _real_fpu_disabled
3021
3022 ##########
3023
3024 iea_iacc:
3025 movc %pcr,%d0
3026 btst &0x1,%d0
3027 bne.b iea_iacc_cont
3028 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3029 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 on stack
3030 iea_iacc_cont:
3031 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3032
3033 unlk %a6
3034
3035 subq.w &0x8,%sp # make stack frame bigger
3036 mov.l 0x8(%sp),(%sp) # store SR,hi(PC)
3037 mov.w 0xc(%sp),0x4(%sp) # store lo(PC)
3038 mov.w &0x4008,0x6(%sp) # store voff
3039 mov.l 0x2(%sp),0x8(%sp) # store ea
3040 mov.l &0x09428001,0xc(%sp) # store fslw
3041
3042 iea_acc_done:
3043 btst &0x5,(%sp) # user or supervisor mode?
3044 beq.b iea_acc_done2 # user
3045 bset &0x2,0xd(%sp) # set supervisor TM bit
3046
3047 iea_acc_done2:
3048 bra.l _real_access
3049
3050 iea_dacc:
3051 lea -LOCAL_SIZE(%a6),%sp
3052
3053 movc %pcr,%d1
3054 btst &0x1,%d1
3055 bne.b iea_dacc_cont
3056 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 on stack
3057 fmovm.l LOCAL_SIZE+USER_FPCR(%sp),%fpcr,%fpsr,%fpiar # restore ctrl regs
3058 iea_dacc_cont:
3059 mov.l (%a6),%a6
3060
3061 mov.l 0x4+LOCAL_SIZE(%sp),-0x8+0x4+LOCAL_SIZE(%sp)
3062 mov.w 0x8+LOCAL_SIZE(%sp),-0x8+0x8+LOCAL_SIZE(%sp)
3063 mov.w &0x4008,-0x8+0xa+LOCAL_SIZE(%sp)
3064 mov.l %a0,-0x8+0xc+LOCAL_SIZE(%sp)
3065 mov.w %d0,-0x8+0x10+LOCAL_SIZE(%sp)
3066 mov.w &0x0001,-0x8+0x12+LOCAL_SIZE(%sp)
3067
3068 movm.l LOCAL_SIZE+EXC_DREGS(%sp),&0x0303 # restore d0-d1/a0-a1
3069 add.w &LOCAL_SIZE-0x4,%sp
3070
3071 bra.b iea_acc_done
3072
3073 #########################################################################
3074 # XDEF **************************************************************** #
3075 # _fpsp_operr(): 060FPSP entry point for FP Operr exception. #
3076 # #
3077 # This handler should be the first code executed upon taking the #
3078 # FP Operand Error exception in an operating system. #
3079 # #
3080 # XREF **************************************************************** #
3081 # _imem_read_long() - read instruction longword #
3082 # fix_skewed_ops() - adjust src operand in fsave frame #
3083 # _real_operr() - "callout" to operating system operr handler #
3084 # _dmem_write_{byte,word,long}() - store data to mem (opclass 3) #
3085 # store_dreg_{b,w,l}() - store data to data regfile (opclass 3) #
3086 # facc_out_{b,w,l}() - store to memory took access error (opcl 3) #
3087 # #
3088 # INPUT *************************************************************** #
3089 # - The system stack contains the FP Operr exception frame #
3090 # - The fsave frame contains the source operand #
3091 # #
3092 # OUTPUT ************************************************************** #
3093 # No access error: #
3094 # - The system stack is unchanged #
3095 # - The fsave frame contains the adjusted src op for opclass 0,2 #
3096 # #
3097 # ALGORITHM *********************************************************** #
3098 # In a system where the FP Operr exception is enabled, the goal #
3099 # is to get to the handler specified at _real_operr(). But, on the 060, #
3100 # for opclass zero and two instruction taking this exception, the #
3101 # input operand in the fsave frame may be incorrect for some cases #
3102 # and needs to be corrected. This handler calls fix_skewed_ops() to #
3103 # do just this and then exits through _real_operr(). #
3104 # For opclass 3 instructions, the 060 doesn't store the default #
3105 # operr result out to memory or data register file as it should. #
3106 # This code must emulate the move out before finally exiting through #
3107 # _real_inex(). The move out, if to memory, is performed using #
3108 # _mem_write() "callout" routines that may return a failing result. #
3109 # In this special case, the handler must exit through facc_out() #
3110 # which creates an access error stack frame from the current operr #
3111 # stack frame. #
3112 # #
3113 #########################################################################
3114
3115 global _fpsp_operr
3116 _fpsp_operr:
3117
3118 link.w %a6,&-LOCAL_SIZE # init stack frame
3119
3120 fsave FP_SRC(%a6) # grab the "busy" frame
3121
3122 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3123 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3124 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
3125
3126 # the FPIAR holds the "current PC" of the faulting instruction
3127 mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3128
3129 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
3130 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
3131 bsr.l _imem_read_long # fetch the instruction words
3132 mov.l %d0,EXC_OPWORD(%a6)
3133
3134 ##############################################################################
3135
3136 btst &13,%d0 # is instr an fmove out?
3137 bne.b foperr_out # fmove out
3138
3139
3140 # here, we simply see if the operand in the fsave frame needs to be "unskewed".
3141 # this would be the case for opclass two operations with a source infinity or
3142 # denorm operand in the sgl or dbl format. NANs also become skewed, but can't
3143 # cause an operr so we don't need to check for them here.
3144 lea FP_SRC(%a6),%a0 # pass: ptr to src op
3145 bsr.l fix_skewed_ops # fix src op
3146
3147 foperr_exit:
3148 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
3149 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3150 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3151
3152 frestore FP_SRC(%a6)
3153
3154 unlk %a6
3155 bra.l _real_operr
3156
3157 ########################################################################
3158
3159 #
3160 # the hardware does not save the default result to memory on enabled
3161 # operand error exceptions. we do this here before passing control to
3162 # the user operand error handler.
3163 #
3164 # byte, word, and long destination format operations can pass
3165 # through here. we simply need to test the sign of the src
3166 # operand and save the appropriate minimum or maximum integer value
3167 # to the effective address as pointed to by the stacked effective address.
3168 #
3169 # although packed opclass three operations can take operand error
3170 # exceptions, they won't pass through here since they are caught
3171 # first by the unsupported data format exception handler. that handler
3172 # sends them directly to _real_operr() if necessary.
3173 #
3174 foperr_out:
3175
3176 mov.w FP_SRC_EX(%a6),%d1 # fetch exponent
3177 andi.w &0x7fff,%d1
3178 cmpi.w %d1,&0x7fff
3179 bne.b foperr_out_not_qnan
3180 # the operand is either an infinity or a QNAN.
3181 tst.l FP_SRC_LO(%a6)
3182 bne.b foperr_out_qnan
3183 mov.l FP_SRC_HI(%a6),%d1
3184 andi.l &0x7fffffff,%d1
3185 beq.b foperr_out_not_qnan
3186 foperr_out_qnan:
3187 mov.l FP_SRC_HI(%a6),L_SCR1(%a6)
3188 bra.b foperr_out_jmp
3189
3190 foperr_out_not_qnan:
3191 mov.l &0x7fffffff,%d1
3192 tst.b FP_SRC_EX(%a6)
3193 bpl.b foperr_out_not_qnan2
3194 addq.l &0x1,%d1
3195 foperr_out_not_qnan2:
3196 mov.l %d1,L_SCR1(%a6)
3197
3198 foperr_out_jmp:
3199 bfextu %d0{&19:&3},%d0 # extract dst format field
3200 mov.b 1+EXC_OPWORD(%a6),%d1 # extract <ea> mode,reg
3201 mov.w (tbl_operr.b,%pc,%d0.w*2),%a0
3202 jmp (tbl_operr.b,%pc,%a0)
3203
3204 tbl_operr:
3205 short foperr_out_l - tbl_operr # long word integer
3206 short tbl_operr - tbl_operr # sgl prec shouldn't happen
3207 short tbl_operr - tbl_operr # ext prec shouldn't happen
3208 short foperr_exit - tbl_operr # packed won't enter here
3209 short foperr_out_w - tbl_operr # word integer
3210 short tbl_operr - tbl_operr # dbl prec shouldn't happen
3211 short foperr_out_b - tbl_operr # byte integer
3212 short tbl_operr - tbl_operr # packed won't enter here
3213
3214 foperr_out_b:
3215 mov.b L_SCR1(%a6),%d0 # load positive default result
3216 cmpi.b %d1,&0x7 # is <ea> mode a data reg?
3217 ble.b foperr_out_b_save_dn # yes
3218 mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result
3219 bsr.l _dmem_write_byte # write the default result
3220
3221 tst.l %d1 # did dstore fail?
3222 bne.l facc_out_b # yes
3223
3224 bra.w foperr_exit
3225 foperr_out_b_save_dn:
3226 andi.w &0x0007,%d1
3227 bsr.l store_dreg_b # store result to regfile
3228 bra.w foperr_exit
3229
3230 foperr_out_w:
3231 mov.w L_SCR1(%a6),%d0 # load positive default result
3232 cmpi.b %d1,&0x7 # is <ea> mode a data reg?
3233 ble.b foperr_out_w_save_dn # yes
3234 mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result
3235 bsr.l _dmem_write_word # write the default result
3236
3237 tst.l %d1 # did dstore fail?
3238 bne.l facc_out_w # yes
3239
3240 bra.w foperr_exit
3241 foperr_out_w_save_dn:
3242 andi.w &0x0007,%d1
3243 bsr.l store_dreg_w # store result to regfile
3244 bra.w foperr_exit
3245
3246 foperr_out_l:
3247 mov.l L_SCR1(%a6),%d0 # load positive default result
3248 cmpi.b %d1,&0x7 # is <ea> mode a data reg?
3249 ble.b foperr_out_l_save_dn # yes
3250 mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result
3251 bsr.l _dmem_write_long # write the default result
3252
3253 tst.l %d1 # did dstore fail?
3254 bne.l facc_out_l # yes
3255
3256 bra.w foperr_exit
3257 foperr_out_l_save_dn:
3258 andi.w &0x0007,%d1
3259 bsr.l store_dreg_l # store result to regfile
3260 bra.w foperr_exit
3261
3262 #########################################################################
3263 # XDEF **************************************************************** #
3264 # _fpsp_snan(): 060FPSP entry point for FP SNAN exception. #
3265 # #
3266 # This handler should be the first code executed upon taking the #
3267 # FP Signalling NAN exception in an operating system. #
3268 # #
3269 # XREF **************************************************************** #
3270 # _imem_read_long() - read instruction longword #
3271 # fix_skewed_ops() - adjust src operand in fsave frame #
3272 # _real_snan() - "callout" to operating system SNAN handler #
3273 # _dmem_write_{byte,word,long}() - store data to mem (opclass 3) #
3274 # store_dreg_{b,w,l}() - store data to data regfile (opclass 3) #
3275 # facc_out_{b,w,l,d,x}() - store to mem took acc error (opcl 3) #
3276 # _calc_ea_fout() - fix An if <ea> is -() or ()+; also get <ea> #
3277 # #
3278 # INPUT *************************************************************** #
3279 # - The system stack contains the FP SNAN exception frame #
3280 # - The fsave frame contains the source operand #
3281 # #
3282 # OUTPUT ************************************************************** #
3283 # No access error: #
3284 # - The system stack is unchanged #
3285 # - The fsave frame contains the adjusted src op for opclass 0,2 #
3286 # #
3287 # ALGORITHM *********************************************************** #
3288 # In a system where the FP SNAN exception is enabled, the goal #
3289 # is to get to the handler specified at _real_snan(). But, on the 060, #
3290 # for opclass zero and two instructions taking this exception, the #
3291 # input operand in the fsave frame may be incorrect for some cases #
3292 # and needs to be corrected. This handler calls fix_skewed_ops() to #
3293 # do just this and then exits through _real_snan(). #
3294 # For opclass 3 instructions, the 060 doesn't store the default #
3295 # SNAN result out to memory or data register file as it should. #
3296 # This code must emulate the move out before finally exiting through #
3297 # _real_snan(). The move out, if to memory, is performed using #
3298 # _mem_write() "callout" routines that may return a failing result. #
3299 # In this special case, the handler must exit through facc_out() #
3300 # which creates an access error stack frame from the current SNAN #
3301 # stack frame. #
3302 # For the case of an extended precision opclass 3 instruction, #
3303 # if the effective addressing mode was -() or ()+, then the address #
3304 # register must get updated by calling _calc_ea_fout(). If the <ea> #
3305 # was -(a7) from supervisor mode, then the exception frame currently #
3306 # on the system stack must be carefully moved "down" to make room #
3307 # for the operand being moved. #
3308 # #
3309 #########################################################################
3310
3311 global _fpsp_snan
3312 _fpsp_snan:
3313
3314 link.w %a6,&-LOCAL_SIZE # init stack frame
3315
3316 fsave FP_SRC(%a6) # grab the "busy" frame
3317
3318 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3319 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3320 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
3321
3322 # the FPIAR holds the "current PC" of the faulting instruction
3323 mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3324
3325 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
3326 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
3327 bsr.l _imem_read_long # fetch the instruction words
3328 mov.l %d0,EXC_OPWORD(%a6)
3329
3330 ##############################################################################
3331
3332 btst &13,%d0 # is instr an fmove out?
3333 bne.w fsnan_out # fmove out
3334
3335
3336 # here, we simply see if the operand in the fsave frame needs to be "unskewed".
3337 # this would be the case for opclass two operations with a source infinity or
3338 # denorm operand in the sgl or dbl format. NANs also become skewed and must be
3339 # fixed here.
3340 lea FP_SRC(%a6),%a0 # pass: ptr to src op
3341 bsr.l fix_skewed_ops # fix src op
3342
3343 fsnan_exit:
3344 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
3345 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3346 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3347
3348 frestore FP_SRC(%a6)
3349
3350 unlk %a6
3351 bra.l _real_snan
3352
3353 ########################################################################
3354
3355 #
3356 # the hardware does not save the default result to memory on enabled
3357 # snan exceptions. we do this here before passing control to
3358 # the user snan handler.
3359 #
3360 # byte, word, long, and packed destination format operations can pass
3361 # through here. since packed format operations already were handled by
3362 # fpsp_unsupp(), then we need to do nothing else for them here.
3363 # for byte, word, and long, we simply need to test the sign of the src
3364 # operand and save the appropriate minimum or maximum integer value
3365 # to the effective address as pointed to by the stacked effective address.
3366 #
3367 fsnan_out:
3368
3369 bfextu %d0{&19:&3},%d0 # extract dst format field
3370 mov.b 1+EXC_OPWORD(%a6),%d1 # extract <ea> mode,reg
3371 mov.w (tbl_snan.b,%pc,%d0.w*2),%a0
3372 jmp (tbl_snan.b,%pc,%a0)
3373
3374 tbl_snan:
3375 short fsnan_out_l - tbl_snan # long word integer
3376 short fsnan_out_s - tbl_snan # sgl prec shouldn't happen
3377 short fsnan_out_x - tbl_snan # ext prec shouldn't happen
3378 short tbl_snan - tbl_snan # packed needs no help
3379 short fsnan_out_w - tbl_snan # word integer
3380 short fsnan_out_d - tbl_snan # dbl prec shouldn't happen
3381 short fsnan_out_b - tbl_snan # byte integer
3382 short tbl_snan - tbl_snan # packed needs no help
3383
3384 fsnan_out_b:
3385 mov.b FP_SRC_HI(%a6),%d0 # load upper byte of SNAN
3386 bset &6,%d0 # set SNAN bit
3387 cmpi.b %d1,&0x7 # is <ea> mode a data reg?
3388 ble.b fsnan_out_b_dn # yes
3389 mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result
3390 bsr.l _dmem_write_byte # write the default result
3391
3392 tst.l %d1 # did dstore fail?
3393 bne.l facc_out_b # yes
3394
3395 bra.w fsnan_exit
3396 fsnan_out_b_dn:
3397 andi.w &0x0007,%d1
3398 bsr.l store_dreg_b # store result to regfile
3399 bra.w fsnan_exit
3400
3401 fsnan_out_w:
3402 mov.w FP_SRC_HI(%a6),%d0 # load upper word of SNAN
3403 bset &14,%d0 # set SNAN bit
3404 cmpi.b %d1,&0x7 # is <ea> mode a data reg?
3405 ble.b fsnan_out_w_dn # yes
3406 mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result
3407 bsr.l _dmem_write_word # write the default result
3408
3409 tst.l %d1 # did dstore fail?
3410 bne.l facc_out_w # yes
3411
3412 bra.w fsnan_exit
3413 fsnan_out_w_dn:
3414 andi.w &0x0007,%d1
3415 bsr.l store_dreg_w # store result to regfile
3416 bra.w fsnan_exit
3417
3418 fsnan_out_l:
3419 mov.l FP_SRC_HI(%a6),%d0 # load upper longword of SNAN
3420 bset &30,%d0 # set SNAN bit
3421 cmpi.b %d1,&0x7 # is <ea> mode a data reg?
3422 ble.b fsnan_out_l_dn # yes
3423 mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result
3424 bsr.l _dmem_write_long # write the default result
3425
3426 tst.l %d1 # did dstore fail?
3427 bne.l facc_out_l # yes
3428
3429 bra.w fsnan_exit
3430 fsnan_out_l_dn:
3431 andi.w &0x0007,%d1
3432 bsr.l store_dreg_l # store result to regfile
3433 bra.w fsnan_exit
3434
3435 fsnan_out_s:
3436 cmpi.b %d1,&0x7 # is <ea> mode a data reg?
3437 ble.b fsnan_out_d_dn # yes
3438 mov.l FP_SRC_EX(%a6),%d0 # fetch SNAN sign
3439 andi.l &0x80000000,%d0 # keep sign
3440 ori.l &0x7fc00000,%d0 # insert new exponent,SNAN bit
3441 mov.l FP_SRC_HI(%a6),%d1 # load mantissa
3442 lsr.l &0x8,%d1 # shift mantissa for sgl
3443 or.l %d1,%d0 # create sgl SNAN
3444 mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result
3445 bsr.l _dmem_write_long # write the default result
3446
3447 tst.l %d1 # did dstore fail?
3448 bne.l facc_out_l # yes
3449
3450 bra.w fsnan_exit
3451 fsnan_out_d_dn:
3452 mov.l FP_SRC_EX(%a6),%d0 # fetch SNAN sign
3453 andi.l &0x80000000,%d0 # keep sign
3454 ori.l &0x7fc00000,%d0 # insert new exponent,SNAN bit
3455 mov.l %d1,-(%sp)
3456 mov.l FP_SRC_HI(%a6),%d1 # load mantissa
3457 lsr.l &0x8,%d1 # shift mantissa for sgl
3458 or.l %d1,%d0 # create sgl SNAN
3459 mov.l (%sp)+,%d1
3460 andi.w &0x0007,%d1
3461 bsr.l store_dreg_l # store result to regfile
3462 bra.w fsnan_exit
3463
3464 fsnan_out_d:
3465 mov.l FP_SRC_EX(%a6),%d0 # fetch SNAN sign
3466 andi.l &0x80000000,%d0 # keep sign
3467 ori.l &0x7ff80000,%d0 # insert new exponent,SNAN bit
3468 mov.l FP_SRC_HI(%a6),%d1 # load hi mantissa
3469 mov.l %d0,FP_SCR0_EX(%a6) # store to temp space
3470 mov.l &11,%d0 # load shift amt
3471 lsr.l %d0,%d1
3472 or.l %d1,FP_SCR0_EX(%a6) # create dbl hi
3473 mov.l FP_SRC_HI(%a6),%d1 # load hi mantissa
3474 andi.l &0x000007ff,%d1
3475 ror.l %d0,%d1
3476 mov.l %d1,FP_SCR0_HI(%a6) # store to temp space
3477 mov.l FP_SRC_LO(%a6),%d1 # load lo mantissa
3478 lsr.l %d0,%d1
3479 or.l %d1,FP_SCR0_HI(%a6) # create dbl lo
3480 lea FP_SCR0(%a6),%a0 # pass: ptr to operand
3481 mov.l EXC_EA(%a6),%a1 # pass: dst addr
3482 movq.l &0x8,%d0 # pass: size of 8 bytes
3483 bsr.l _dmem_write # write the default result
3484
3485 tst.l %d1 # did dstore fail?
3486 bne.l facc_out_d # yes
3487
3488 bra.w fsnan_exit
3489
3490 # for extended precision, if the addressing mode is pre-decrement or
3491 # post-increment, then the address register did not get updated.
3492 # in addition, for pre-decrement, the stacked <ea> is incorrect.
3493 fsnan_out_x:
3494 clr.b SPCOND_FLG(%a6) # clear special case flag
3495
3496 mov.w FP_SRC_EX(%a6),FP_SCR0_EX(%a6)
3497 clr.w 2+FP_SCR0(%a6)
3498 mov.l FP_SRC_HI(%a6),%d0
3499 bset &30,%d0
3500 mov.l %d0,FP_SCR0_HI(%a6)
3501 mov.l FP_SRC_LO(%a6),FP_SCR0_LO(%a6)
3502
3503 btst &0x5,EXC_SR(%a6) # supervisor mode exception?
3504 bne.b fsnan_out_x_s # yes
3505
3506 mov.l %usp,%a0 # fetch user stack pointer
3507 mov.l %a0,EXC_A7(%a6) # save on stack for calc_ea()
3508 mov.l (%a6),EXC_A6(%a6)
3509
3510 bsr.l _calc_ea_fout # find the correct ea,update An
3511 mov.l %a0,%a1
3512 mov.l %a0,EXC_EA(%a6) # stack correct <ea>
3513
3514 mov.l EXC_A7(%a6),%a0
3515 mov.l %a0,%usp # restore user stack pointer
3516 mov.l EXC_A6(%a6),(%a6)
3517
3518 fsnan_out_x_save:
3519 lea FP_SCR0(%a6),%a0 # pass: ptr to operand
3520 movq.l &0xc,%d0 # pass: size of extended
3521 bsr.l _dmem_write # write the default result
3522
3523 tst.l %d1 # did dstore fail?
3524 bne.l facc_out_x # yes
3525
3526 bra.w fsnan_exit
3527
3528 fsnan_out_x_s:
3529 mov.l (%a6),EXC_A6(%a6)
3530
3531 bsr.l _calc_ea_fout # find the correct ea,update An
3532 mov.l %a0,%a1
3533 mov.l %a0,EXC_EA(%a6) # stack correct <ea>
3534
3535 mov.l EXC_A6(%a6),(%a6)
3536
3537 cmpi.b SPCOND_FLG(%a6),&mda7_flg # is <ea> mode -(a7)?
3538 bne.b fsnan_out_x_save # no
3539
3540 # the operation was "fmove.x SNAN,-(a7)" from supervisor mode.
3541 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
3542 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3543 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3544
3545 frestore FP_SRC(%a6)
3546
3547 mov.l EXC_A6(%a6),%a6 # restore frame pointer
3548
3549 mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
3550 mov.l LOCAL_SIZE+EXC_PC+0x2(%sp),LOCAL_SIZE+EXC_PC+0x2-0xc(%sp)
3551 mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
3552
3553 mov.l LOCAL_SIZE+FP_SCR0_EX(%sp),LOCAL_SIZE+EXC_SR(%sp)
3554 mov.l LOCAL_SIZE+FP_SCR0_HI(%sp),LOCAL_SIZE+EXC_PC+0x2(%sp)
3555 mov.l LOCAL_SIZE+FP_SCR0_LO(%sp),LOCAL_SIZE+EXC_EA(%sp)
3556
3557 add.l &LOCAL_SIZE-0x8,%sp
3558
3559 bra.l _real_snan
3560
3561 #########################################################################
3562 # XDEF **************************************************************** #
3563 # _fpsp_inex(): 060FPSP entry point for FP Inexact exception. #
3564 # #
3565 # This handler should be the first code executed upon taking the #
3566 # FP Inexact exception in an operating system. #
3567 # #
3568 # XREF **************************************************************** #
3569 # _imem_read_long() - read instruction longword #
3570 # fix_skewed_ops() - adjust src operand in fsave frame #
3571 # set_tag_x() - determine optype of src/dst operands #
3572 # store_fpreg() - store opclass 0 or 2 result to FP regfile #
3573 # unnorm_fix() - change UNNORM operands to NORM or ZERO #
3574 # load_fpn2() - load dst operand from FP regfile #
3575 # smovcr() - emulate an "fmovcr" instruction #
3576 # fout() - emulate an opclass 3 instruction #
3577 # tbl_unsupp - add of table of emulation routines for opclass 0,2 #
3578 # _real_inex() - "callout" to operating system inexact handler #
3579 # #
3580 # INPUT *************************************************************** #
3581 # - The system stack contains the FP Inexact exception frame #
3582 # - The fsave frame contains the source operand #
3583 # #
3584 # OUTPUT ************************************************************** #
3585 # - The system stack is unchanged #
3586 # - The fsave frame contains the adjusted src op for opclass 0,2 #
3587 # #
3588 # ALGORITHM *********************************************************** #
3589 # In a system where the FP Inexact exception is enabled, the goal #
3590 # is to get to the handler specified at _real_inex(). But, on the 060, #
3591 # for opclass zero and two instruction taking this exception, the #
3592 # hardware doesn't store the correct result to the destination FP #
3593 # register as did the '040 and '881/2. This handler must emulate the #
3594 # instruction in order to get this value and then store it to the #
3595 # correct register before calling _real_inex(). #
3596 # For opclass 3 instructions, the 060 doesn't store the default #
3597 # inexact result out to memory or data register file as it should. #
3598 # This code must emulate the move out by calling fout() before finally #
3599 # exiting through _real_inex(). #
3600 # #
3601 #########################################################################
3602
3603 global _fpsp_inex
3604 _fpsp_inex:
3605
3606 link.w %a6,&-LOCAL_SIZE # init stack frame
3607
3608 fsave FP_SRC(%a6) # grab the "busy" frame
3609
3610 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3611 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3612 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
3613
3614 # the FPIAR holds the "current PC" of the faulting instruction
3615 mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3616
3617 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
3618 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
3619 bsr.l _imem_read_long # fetch the instruction words
3620 mov.l %d0,EXC_OPWORD(%a6)
3621
3622 ##############################################################################
3623
3624 btst &13,%d0 # is instr an fmove out?
3625 bne.w finex_out # fmove out
3626
3627
3628 # the hardware, for "fabs" and "fneg" w/ a long source format, puts the
3629 # longword integer directly into the upper longword of the mantissa along
3630 # w/ an exponent value of 0x401e. we convert this to extended precision here.
3631 bfextu %d0{&19:&3},%d0 # fetch instr size
3632 bne.b finex_cont # instr size is not long
3633 cmpi.w FP_SRC_EX(%a6),&0x401e # is exponent 0x401e?
3634 bne.b finex_cont # no
3635 fmov.l &0x0,%fpcr
3636 fmov.l FP_SRC_HI(%a6),%fp0 # load integer src
3637 fmov.x %fp0,FP_SRC(%a6) # store integer as extended precision
3638 mov.w &0xe001,0x2+FP_SRC(%a6)
3639
3640 finex_cont:
3641 lea FP_SRC(%a6),%a0 # pass: ptr to src op
3642 bsr.l fix_skewed_ops # fix src op
3643
3644 # Here, we zero the ccode and exception byte field since we're going to
3645 # emulate the whole instruction. Notice, though, that we don't kill the
3646 # INEX1 bit. This is because a packed op has long since been converted
3647 # to extended before arriving here. Therefore, we need to retain the
3648 # INEX1 bit from when the operand was first converted.
3649 andi.l &0x00ff01ff,USER_FPSR(%a6) # zero all but accured field
3650
3651 fmov.l &0x0,%fpcr # zero current control regs
3652 fmov.l &0x0,%fpsr
3653
3654 bfextu EXC_EXTWORD(%a6){&0:&6},%d1 # extract upper 6 of cmdreg
3655 cmpi.b %d1,&0x17 # is op an fmovecr?
3656 beq.w finex_fmovcr # yes
3657
3658 lea FP_SRC(%a6),%a0 # pass: ptr to src op
3659 bsr.l set_tag_x # tag the operand type
3660 mov.b %d0,STAG(%a6) # maybe NORM,DENORM
3661
3662 # bits four and five of the fp extension word separate the monadic and dyadic
3663 # operations that can pass through fpsp_inex(). remember that fcmp and ftst
3664 # will never take this exception, but fsincos will.
3665 btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic?
3666 beq.b finex_extract # monadic
3667
3668 btst &0x4,1+EXC_CMDREG(%a6) # is operation an fsincos?
3669 bne.b finex_extract # yes
3670
3671 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
3672 bsr.l load_fpn2 # load dst into FP_DST
3673
3674 lea FP_DST(%a6),%a0 # pass: ptr to dst op
3675 bsr.l set_tag_x # tag the operand type
3676 cmpi.b %d0,&UNNORM # is operand an UNNORM?
3677 bne.b finex_op2_done # no
3678 bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO
3679 finex_op2_done:
3680 mov.b %d0,DTAG(%a6) # save dst optype tag
3681
3682 finex_extract:
3683 clr.l %d0
3684 mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode
3685
3686 mov.b 1+EXC_CMDREG(%a6),%d1
3687 andi.w &0x007f,%d1 # extract extension
3688
3689 lea FP_SRC(%a6),%a0
3690 lea FP_DST(%a6),%a1
3691
3692 mov.l (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
3693 jsr (tbl_unsupp.l,%pc,%d1.l*1)
3694
3695 # the operation has been emulated. the result is in fp0.
3696 finex_save:
3697 bfextu EXC_CMDREG(%a6){&6:&3},%d0
3698 bsr.l store_fpreg
3699
3700 finex_exit:
3701 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
3702 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3703 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3704
3705 frestore FP_SRC(%a6)
3706
3707 unlk %a6
3708 bra.l _real_inex
3709
3710 finex_fmovcr:
3711 clr.l %d0
3712 mov.b FPCR_MODE(%a6),%d0 # pass rnd prec,mode
3713 mov.b 1+EXC_CMDREG(%a6),%d1
3714 andi.l &0x0000007f,%d1 # pass rom offset
3715 bsr.l smovcr
3716 bra.b finex_save
3717
3718 ########################################################################
3719
3720 #
3721 # the hardware does not save the default result to memory on enabled
3722 # inexact exceptions. we do this here before passing control to
3723 # the user inexact handler.
3724 #
3725 # byte, word, and long destination format operations can pass
3726 # through here. so can double and single precision.
3727 # although packed opclass three operations can take inexact
3728 # exceptions, they won't pass through here since they are caught
3729 # first by the unsupported data format exception handler. that handler
3730 # sends them directly to _real_inex() if necessary.
3731 #
3732 finex_out:
3733
3734 mov.b &NORM,STAG(%a6) # src is a NORM
3735
3736 clr.l %d0
3737 mov.b FPCR_MODE(%a6),%d0 # pass rnd prec,mode
3738
3739 andi.l &0xffff00ff,USER_FPSR(%a6) # zero exception field
3740
3741 lea FP_SRC(%a6),%a0 # pass ptr to src operand
3742
3743 bsr.l fout # store the default result
3744
3745 bra.b finex_exit
3746
3747 #########################################################################
3748 # XDEF **************************************************************** #
3749 # _fpsp_dz(): 060FPSP entry point for FP DZ exception. #
3750 # #
3751 # This handler should be the first code executed upon taking #
3752 # the FP DZ exception in an operating system. #
3753 # #
3754 # XREF **************************************************************** #
3755 # _imem_read_long() - read instruction longword from memory #
3756 # fix_skewed_ops() - adjust fsave operand #
3757 # _real_dz() - "callout" exit point from FP DZ handler #
3758 # #
3759 # INPUT *************************************************************** #
3760 # - The system stack contains the FP DZ exception stack. #
3761 # - The fsave frame contains the source operand. #
3762 # #
3763 # OUTPUT ************************************************************** #
3764 # - The system stack contains the FP DZ exception stack. #
3765 # - The fsave frame contains the adjusted source operand. #
3766 # #
3767 # ALGORITHM *********************************************************** #
3768 # In a system where the DZ exception is enabled, the goal is to #
3769 # get to the handler specified at _real_dz(). But, on the 060, when the #
3770 # exception is taken, the input operand in the fsave state frame may #
3771 # be incorrect for some cases and need to be adjusted. So, this package #
3772 # adjusts the operand using fix_skewed_ops() and then branches to #
3773 # _real_dz(). #
3774 # #
3775 #########################################################################
3776
3777 global _fpsp_dz
3778 _fpsp_dz:
3779
3780 link.w %a6,&-LOCAL_SIZE # init stack frame
3781
3782 fsave FP_SRC(%a6) # grab the "busy" frame
3783
3784 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3785 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3786 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
3787
3788 # the FPIAR holds the "current PC" of the faulting instruction
3789 mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3790
3791 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
3792 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
3793 bsr.l _imem_read_long # fetch the instruction words
3794 mov.l %d0,EXC_OPWORD(%a6)
3795
3796 ##############################################################################
3797
3798
3799 # here, we simply see if the operand in the fsave frame needs to be "unskewed".
3800 # this would be the case for opclass two operations with a source zero
3801 # in the sgl or dbl format.
3802 lea FP_SRC(%a6),%a0 # pass: ptr to src op
3803 bsr.l fix_skewed_ops # fix src op
3804
3805 fdz_exit:
3806 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
3807 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3808 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3809
3810 frestore FP_SRC(%a6)
3811
3812 unlk %a6
3813 bra.l _real_dz
3814
3815 #########################################################################
3816 # XDEF **************************************************************** #
3817 # _fpsp_fline(): 060FPSP entry point for "Line F emulator" #
3818 # exception when the "reduced" version of the #
3819 # FPSP is implemented that does not emulate #
3820 # FP unimplemented instructions. #
3821 # #
3822 # This handler should be the first code executed upon taking a #
3823 # "Line F Emulator" exception in an operating system integrating #
3824 # the reduced version of 060FPSP. #
3825 # #
3826 # XREF **************************************************************** #
3827 # _real_fpu_disabled() - Handle "FPU disabled" exceptions #
3828 # _real_fline() - Handle all other cases (treated equally) #
3829 # #
3830 # INPUT *************************************************************** #
3831 # - The system stack contains a "Line F Emulator" exception #
3832 # stack frame. #
3833 # #
3834 # OUTPUT ************************************************************** #
3835 # - The system stack is unchanged. #
3836 # #
3837 # ALGORITHM *********************************************************** #
3838 # When a "Line F Emulator" exception occurs in a system where #
3839 # "FPU Unimplemented" instructions will not be emulated, the exception #
3840 # can occur because then FPU is disabled or the instruction is to be #
3841 # classifed as "Line F". This module determines which case exists and #
3842 # calls the appropriate "callout". #
3843 # #
3844 #########################################################################
3845
3846 global _fpsp_fline
3847 _fpsp_fline:
3848
3849 # check to see if the FPU is disabled. if so, jump to the OS entry
3850 # point for that condition.
3851 cmpi.w 0x6(%sp),&0x402c
3852 beq.l _real_fpu_disabled
3853
3854 bra.l _real_fline
3855
3856 #########################################################################
3857 # XDEF **************************************************************** #
3858 # _dcalc_ea(): calc correct <ea> from <ea> stacked on exception #
3859 # #
3860 # XREF **************************************************************** #
3861 # inc_areg() - increment an address register #
3862 # dec_areg() - decrement an address register #
3863 # #
3864 # INPUT *************************************************************** #
3865 # d0 = number of bytes to adjust <ea> by #
3866 # #
3867 # OUTPUT ************************************************************** #
3868 # None #
3869 # #
3870 # ALGORITHM *********************************************************** #
3871 # "Dummy" CALCulate Effective Address: #
3872 # The stacked <ea> for FP unimplemented instructions and opclass #
3873 # two packed instructions is correct with the exception of... #
3874 # #
3875 # 1) -(An) : The register is not updated regardless of size. #
3876 # Also, for extended precision and packed, the #
3877 # stacked <ea> value is 8 bytes too big #
3878 # 2) (An)+ : The register is not updated. #
3879 # 3) #<data> : The upper longword of the immediate operand is #
3880 # stacked b,w,l and s sizes are completely stacked. #
3881 # d,x, and p are not. #
3882 # #
3883 #########################################################################
3884
3885 global _dcalc_ea
3886 _dcalc_ea:
3887 mov.l %d0, %a0 # move # bytes to %a0
3888
3889 mov.b 1+EXC_OPWORD(%a6), %d0 # fetch opcode word
3890 mov.l %d0, %d1 # make a copy
3891
3892 andi.w &0x38, %d0 # extract mode field
3893 andi.l &0x7, %d1 # extract reg field
3894
3895 cmpi.b %d0,&0x18 # is mode (An)+ ?
3896 beq.b dcea_pi # yes
3897
3898 cmpi.b %d0,&0x20 # is mode -(An) ?
3899 beq.b dcea_pd # yes
3900
3901 or.w %d1,%d0 # concat mode,reg
3902 cmpi.b %d0,&0x3c # is mode #<data>?
3903
3904 beq.b dcea_imm # yes
3905
3906 mov.l EXC_EA(%a6),%a0 # return <ea>
3907 rts
3908
3909 # need to set immediate data flag here since we'll need to do
3910 # an imem_read to fetch this later.
3911 dcea_imm:
3912 mov.b &immed_flg,SPCOND_FLG(%a6)
3913 lea ([USER_FPIAR,%a6],0x4),%a0 # no; return <ea>
3914 rts
3915
3916 # here, the <ea> is stacked correctly. however, we must update the
3917 # address register...
3918 dcea_pi:
3919 mov.l %a0,%d0 # pass amt to inc by
3920 bsr.l inc_areg # inc addr register
3921
3922 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
3923 rts
3924
3925 # the <ea> is stacked correctly for all but extended and packed which
3926 # the <ea>s are 8 bytes too large.
3927 # it would make no sense to have a pre-decrement to a7 in supervisor
3928 # mode so we don't even worry about this tricky case here : )
3929 dcea_pd:
3930 mov.l %a0,%d0 # pass amt to dec by
3931 bsr.l dec_areg # dec addr register
3932
3933 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
3934
3935 cmpi.b %d0,&0xc # is opsize ext or packed?
3936 beq.b dcea_pd2 # yes
3937 rts
3938 dcea_pd2:
3939 sub.l &0x8,%a0 # correct <ea>
3940 mov.l %a0,EXC_EA(%a6) # put correct <ea> on stack
3941 rts
3942
3943 #########################################################################
3944 # XDEF **************************************************************** #
3945 # _calc_ea_fout(): calculate correct stacked <ea> for extended #
3946 # and packed data opclass 3 operations. #
3947 # #
3948 # XREF **************************************************************** #
3949 # None #
3950 # #
3951 # INPUT *************************************************************** #
3952 # None #
3953 # #
3954 # OUTPUT ************************************************************** #
3955 # a0 = return correct effective address #
3956 # #
3957 # ALGORITHM *********************************************************** #
3958 # For opclass 3 extended and packed data operations, the <ea> #
3959 # stacked for the exception is incorrect for -(an) and (an)+ addressing #
3960 # modes. Also, while we're at it, the index register itself must get #
3961 # updated. #
3962 # So, for -(an), we must subtract 8 off of the stacked <ea> value #
3963 # and return that value as the correct <ea> and store that value in An. #
3964 # For (an)+, the stacked <ea> is correct but we must adjust An by +12. #
3965 # #
3966 #########################################################################
3967
3968 # This calc_ea is currently used to retrieve the correct <ea>
3969 # for fmove outs of type extended and packed.
3970 global _calc_ea_fout
3971 _calc_ea_fout:
3972 mov.b 1+EXC_OPWORD(%a6),%d0 # fetch opcode word
3973 mov.l %d0,%d1 # make a copy
3974
3975 andi.w &0x38,%d0 # extract mode field
3976 andi.l &0x7,%d1 # extract reg field
3977
3978 cmpi.b %d0,&0x18 # is mode (An)+ ?
3979 beq.b ceaf_pi # yes
3980
3981 cmpi.b %d0,&0x20 # is mode -(An) ?
3982 beq.w ceaf_pd # yes
3983
3984 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
3985 rts
3986
3987 # (An)+ : extended and packed fmove out
3988 # : stacked <ea> is correct
3989 # : "An" not updated
3990 ceaf_pi:
3991 mov.w (tbl_ceaf_pi.b,%pc,%d1.w*2),%d1
3992 mov.l EXC_EA(%a6),%a0
3993 jmp (tbl_ceaf_pi.b,%pc,%d1.w*1)
3994
3995 swbeg &0x8
3996 tbl_ceaf_pi:
3997 short ceaf_pi0 - tbl_ceaf_pi
3998 short ceaf_pi1 - tbl_ceaf_pi
3999 short ceaf_pi2 - tbl_ceaf_pi
4000 short ceaf_pi3 - tbl_ceaf_pi
4001 short ceaf_pi4 - tbl_ceaf_pi
4002 short ceaf_pi5 - tbl_ceaf_pi
4003 short ceaf_pi6 - tbl_ceaf_pi
4004 short ceaf_pi7 - tbl_ceaf_pi
4005
4006 ceaf_pi0:
4007 addi.l &0xc,EXC_DREGS+0x8(%a6)
4008 rts
4009 ceaf_pi1:
4010 addi.l &0xc,EXC_DREGS+0xc(%a6)
4011 rts
4012 ceaf_pi2:
4013 add.l &0xc,%a2
4014 rts
4015 ceaf_pi3:
4016 add.l &0xc,%a3
4017 rts
4018 ceaf_pi4:
4019 add.l &0xc,%a4
4020 rts
4021 ceaf_pi5:
4022 add.l &0xc,%a5
4023 rts
4024 ceaf_pi6:
4025 addi.l &0xc,EXC_A6(%a6)
4026 rts
4027 ceaf_pi7:
4028 mov.b &mia7_flg,SPCOND_FLG(%a6)
4029 addi.l &0xc,EXC_A7(%a6)
4030 rts
4031
4032 # -(An) : extended and packed fmove out
4033 # : stacked <ea> = actual <ea> + 8
4034 # : "An" not updated
4035 ceaf_pd:
4036 mov.w (tbl_ceaf_pd.b,%pc,%d1.w*2),%d1
4037 mov.l EXC_EA(%a6),%a0
4038 sub.l &0x8,%a0
4039 sub.l &0x8,EXC_EA(%a6)
4040 jmp (tbl_ceaf_pd.b,%pc,%d1.w*1)
4041
4042 swbeg &0x8
4043 tbl_ceaf_pd:
4044 short ceaf_pd0 - tbl_ceaf_pd
4045 short ceaf_pd1 - tbl_ceaf_pd
4046 short ceaf_pd2 - tbl_ceaf_pd
4047 short ceaf_pd3 - tbl_ceaf_pd
4048 short ceaf_pd4 - tbl_ceaf_pd
4049 short ceaf_pd5 - tbl_ceaf_pd
4050 short ceaf_pd6 - tbl_ceaf_pd
4051 short ceaf_pd7 - tbl_ceaf_pd
4052
4053 ceaf_pd0:
4054 mov.l %a0,EXC_DREGS+0x8(%a6)
4055 rts
4056 ceaf_pd1:
4057 mov.l %a0,EXC_DREGS+0xc(%a6)
4058 rts
4059 ceaf_pd2:
4060 mov.l %a0,%a2
4061 rts
4062 ceaf_pd3:
4063 mov.l %a0,%a3
4064 rts
4065 ceaf_pd4:
4066 mov.l %a0,%a4
4067 rts
4068 ceaf_pd5:
4069 mov.l %a0,%a5
4070 rts
4071 ceaf_pd6:
4072 mov.l %a0,EXC_A6(%a6)
4073 rts
4074 ceaf_pd7:
4075 mov.l %a0,EXC_A7(%a6)
4076 mov.b &mda7_flg,SPCOND_FLG(%a6)
4077 rts
4078
4079 #
4080 # This table holds the offsets of the emulation routines for each individual
4081 # math operation relative to the address of this table. Included are
4082 # routines like fadd/fmul/fabs. The transcendentals ARE NOT. This is because
4083 # this table is for the version if the 060FPSP without transcendentals.
4084 # The location within the table is determined by the extension bits of the
4085 # operation longword.
4086 #
4087
4088 swbeg &109
4089 tbl_unsupp:
4090 long fin - tbl_unsupp # 00: fmove
4091 long fint - tbl_unsupp # 01: fint
4092 long tbl_unsupp - tbl_unsupp # 02: fsinh
4093 long fintrz - tbl_unsupp # 03: fintrz
4094 long fsqrt - tbl_unsupp # 04: fsqrt
4095 long tbl_unsupp - tbl_unsupp
4096 long tbl_unsupp - tbl_unsupp # 06: flognp1
4097 long tbl_unsupp - tbl_unsupp
4098 long tbl_unsupp - tbl_unsupp # 08: fetoxm1
4099 long tbl_unsupp - tbl_unsupp # 09: ftanh
4100 long tbl_unsupp - tbl_unsupp # 0a: fatan
4101 long tbl_unsupp - tbl_unsupp
4102 long tbl_unsupp - tbl_unsupp # 0c: fasin
4103 long tbl_unsupp - tbl_unsupp # 0d: fatanh
4104 long tbl_unsupp - tbl_unsupp # 0e: fsin
4105 long tbl_unsupp - tbl_unsupp # 0f: ftan
4106 long tbl_unsupp - tbl_unsupp # 10: fetox
4107 long tbl_unsupp - tbl_unsupp # 11: ftwotox
4108 long tbl_unsupp - tbl_unsupp # 12: ftentox
4109 long tbl_unsupp - tbl_unsupp
4110 long tbl_unsupp - tbl_unsupp # 14: flogn
4111 long tbl_unsupp - tbl_unsupp # 15: flog10
4112 long tbl_unsupp - tbl_unsupp # 16: flog2
4113 long tbl_unsupp - tbl_unsupp
4114 long fabs - tbl_unsupp # 18: fabs
4115 long tbl_unsupp - tbl_unsupp # 19: fcosh
4116 long fneg - tbl_unsupp # 1a: fneg
4117 long tbl_unsupp - tbl_unsupp
4118 long tbl_unsupp - tbl_unsupp # 1c: facos
4119 long tbl_unsupp - tbl_unsupp # 1d: fcos
4120 long tbl_unsupp - tbl_unsupp # 1e: fgetexp
4121 long tbl_unsupp - tbl_unsupp # 1f: fgetman
4122 long fdiv - tbl_unsupp # 20: fdiv
4123 long tbl_unsupp - tbl_unsupp # 21: fmod
4124 long fadd - tbl_unsupp # 22: fadd
4125 long fmul - tbl_unsupp # 23: fmul
4126 long fsgldiv - tbl_unsupp # 24: fsgldiv
4127 long tbl_unsupp - tbl_unsupp # 25: frem
4128 long tbl_unsupp - tbl_unsupp # 26: fscale
4129 long fsglmul - tbl_unsupp # 27: fsglmul
4130 long fsub - tbl_unsupp # 28: fsub
4131 long tbl_unsupp - tbl_unsupp
4132 long tbl_unsupp - tbl_unsupp
4133 long tbl_unsupp - tbl_unsupp
4134 long tbl_unsupp - tbl_unsupp
4135 long tbl_unsupp - tbl_unsupp
4136 long tbl_unsupp - tbl_unsupp
4137 long tbl_unsupp - tbl_unsupp
4138 long tbl_unsupp - tbl_unsupp # 30: fsincos
4139 long tbl_unsupp - tbl_unsupp # 31: fsincos
4140 long tbl_unsupp - tbl_unsupp # 32: fsincos
4141 long tbl_unsupp - tbl_unsupp # 33: fsincos
4142 long tbl_unsupp - tbl_unsupp # 34: fsincos
4143 long tbl_unsupp - tbl_unsupp # 35: fsincos
4144 long tbl_unsupp - tbl_unsupp # 36: fsincos
4145 long tbl_unsupp - tbl_unsupp # 37: fsincos
4146 long fcmp - tbl_unsupp # 38: fcmp
4147 long tbl_unsupp - tbl_unsupp
4148 long ftst - tbl_unsupp # 3a: ftst
4149 long tbl_unsupp - tbl_unsupp
4150 long tbl_unsupp - tbl_unsupp
4151 long tbl_unsupp - tbl_unsupp
4152 long tbl_unsupp - tbl_unsupp
4153 long tbl_unsupp - tbl_unsupp
4154 long fsin - tbl_unsupp # 40: fsmove
4155 long fssqrt - tbl_unsupp # 41: fssqrt
4156 long tbl_unsupp - tbl_unsupp
4157 long tbl_unsupp - tbl_unsupp
4158 long fdin - tbl_unsupp # 44: fdmove
4159 long fdsqrt - tbl_unsupp # 45: fdsqrt
4160 long tbl_unsupp - tbl_unsupp
4161 long tbl_unsupp - tbl_unsupp
4162 long tbl_unsupp - tbl_unsupp
4163 long tbl_unsupp - tbl_unsupp
4164 long tbl_unsupp - tbl_unsupp
4165 long tbl_unsupp - tbl_unsupp
4166 long tbl_unsupp - tbl_unsupp
4167 long tbl_unsupp - tbl_unsupp
4168 long tbl_unsupp - tbl_unsupp
4169 long tbl_unsupp - tbl_unsupp
4170 long tbl_unsupp - tbl_unsupp
4171 long tbl_unsupp - tbl_unsupp
4172 long tbl_unsupp - tbl_unsupp
4173 long tbl_unsupp - tbl_unsupp
4174 long tbl_unsupp - tbl_unsupp
4175 long tbl_unsupp - tbl_unsupp
4176 long tbl_unsupp - tbl_unsupp
4177 long tbl_unsupp - tbl_unsupp
4178 long fsabs - tbl_unsupp # 58: fsabs
4179 long tbl_unsupp - tbl_unsupp
4180 long fsneg - tbl_unsupp # 5a: fsneg
4181 long tbl_unsupp - tbl_unsupp
4182 long fdabs - tbl_unsupp # 5c: fdabs
4183 long tbl_unsupp - tbl_unsupp
4184 long fdneg - tbl_unsupp # 5e: fdneg
4185 long tbl_unsupp - tbl_unsupp
4186 long fsdiv - tbl_unsupp # 60: fsdiv
4187 long tbl_unsupp - tbl_unsupp
4188 long fsadd - tbl_unsupp # 62: fsadd
4189 long fsmul - tbl_unsupp # 63: fsmul
4190 long fddiv - tbl_unsupp # 64: fddiv
4191 long tbl_unsupp - tbl_unsupp
4192 long fdadd - tbl_unsupp # 66: fdadd
4193 long fdmul - tbl_unsupp # 67: fdmul
4194 long fssub - tbl_unsupp # 68: fssub
4195 long tbl_unsupp - tbl_unsupp
4196 long tbl_unsupp - tbl_unsupp
4197 long tbl_unsupp - tbl_unsupp
4198 long fdsub - tbl_unsupp # 6c: fdsub
4199
4200 #################################################
4201 # Add this here so non-fp modules can compile.
4202 # (smovcr is called from fpsp_inex.)
4203 global smovcr
4204 smovcr:
4205 bra.b smovcr
4206
4207 #########################################################################
4208 # XDEF **************************************************************** #
4209 # fmovm_dynamic(): emulate "fmovm" dynamic instruction #
4210 # #
4211 # XREF **************************************************************** #
4212 # fetch_dreg() - fetch data register #
4213 # {i,d,}mem_read() - fetch data from memory #
4214 # _mem_write() - write data to memory #
4215 # iea_iacc() - instruction memory access error occurred #
4216 # iea_dacc() - data memory access error occurred #
4217 # restore() - restore An index regs if access error occurred #
4218 # #
4219 # INPUT *************************************************************** #
4220 # None #
4221 # #
4222 # OUTPUT ************************************************************** #
4223 # If instr is "fmovm Dn,-(A7)" from supervisor mode, #
4224 # d0 = size of dump #
4225 # d1 = Dn #
4226 # Else if instruction access error, #
4227 # d0 = FSLW #
4228 # Else if data access error, #
4229 # d0 = FSLW #
4230 # a0 = address of fault #
4231 # Else #
4232 # none. #
4233 # #
4234 # ALGORITHM *********************************************************** #
4235 # The effective address must be calculated since this is entered #
4236 # from an "Unimplemented Effective Address" exception handler. So, we #
4237 # have our own fcalc_ea() routine here. If an access error is flagged #
4238 # by a _{i,d,}mem_read() call, we must exit through the special #
4239 # handler. #
4240 # The data register is determined and its value loaded to get the #
4241 # string of FP registers affected. This value is used as an index into #
4242 # a lookup table such that we can determine the number of bytes #
4243 # involved. #
4244 # If the instruction is "fmovm.x <ea>,Dn", a _mem_read() is used #
4245 # to read in all FP values. Again, _mem_read() may fail and require a #
4246 # special exit. #
4247 # If the instruction is "fmovm.x DN,<ea>", a _mem_write() is used #
4248 # to write all FP values. _mem_write() may also fail. #
4249 # If the instruction is "fmovm.x DN,-(a7)" from supervisor mode, #
4250 # then we return the size of the dump and the string to the caller #
4251 # so that the move can occur outside of this routine. This special #
4252 # case is required so that moves to the system stack are handled #
4253 # correctly. #
4254 # #
4255 # DYNAMIC: #
4256 # fmovm.x dn, <ea> #
4257 # fmovm.x <ea>, dn #
4258 # #
4259 # <WORD 1> <WORD2> #
4260 # 1111 0010 00 |<ea>| 11@& 1000 0$$$ 0000 #
4261 # #
4262 # & = (0): predecrement addressing mode #
4263 # (1): postincrement or control addressing mode #
4264 # @ = (0): move listed regs from memory to the FPU #
4265 # (1): move listed regs from the FPU to memory #
4266 # $$$ : index of data register holding reg select mask #
4267 # #
4268 # NOTES: #
4269 # If the data register holds a zero, then the #
4270 # instruction is a nop. #
4271 # #
4272 #########################################################################
4273
4274 global fmovm_dynamic
4275 fmovm_dynamic:
4276
4277 # extract the data register in which the bit string resides...
4278 mov.b 1+EXC_EXTWORD(%a6),%d1 # fetch extword
4279 andi.w &0x70,%d1 # extract reg bits
4280 lsr.b &0x4,%d1 # shift into lo bits
4281
4282 # fetch the bit string into d0...
4283 bsr.l fetch_dreg # fetch reg string
4284
4285 andi.l &0x000000ff,%d0 # keep only lo byte
4286
4287 mov.l %d0,-(%sp) # save strg
4288 mov.b (tbl_fmovm_size.w,%pc,%d0),%d0
4289 mov.l %d0,-(%sp) # save size
4290 bsr.l fmovm_calc_ea # calculate <ea>
4291 mov.l (%sp)+,%d0 # restore size
4292 mov.l (%sp)+,%d1 # restore strg
4293
4294 # if the bit string is a zero, then the operation is a no-op
4295 # but, make sure that we've calculated ea and advanced the opword pointer
4296 beq.w fmovm_data_done
4297
4298 # separate move ins from move outs...
4299 btst &0x5,EXC_EXTWORD(%a6) # is it a move in or out?
4300 beq.w fmovm_data_in # it's a move out
4301
4302 #############
4303 # MOVE OUT: #
4304 #############
4305 fmovm_data_out:
4306 btst &0x4,EXC_EXTWORD(%a6) # control or predecrement?
4307 bne.w fmovm_out_ctrl # control
4308
4309 ############################
4310 fmovm_out_predec:
4311 # for predecrement mode, the bit string is the opposite of both control
4312 # operations and postincrement mode. (bit7 = FP7 ... bit0 = FP0)
4313 # here, we convert it to be just like the others...
4314 mov.b (tbl_fmovm_convert.w,%pc,%d1.w*1),%d1
4315
4316 btst &0x5,EXC_SR(%a6) # user or supervisor mode?
4317 beq.b fmovm_out_ctrl # user
4318
4319 fmovm_out_predec_s:
4320 cmpi.b SPCOND_FLG(%a6),&mda7_flg # is <ea> mode -(a7)?
4321 bne.b fmovm_out_ctrl
4322
4323 # the operation was unfortunately an: fmovm.x dn,-(sp)
4324 # called from supervisor mode.
4325 # we're also passing "size" and "strg" back to the calling routine
4326 rts
4327
4328 ############################
4329 fmovm_out_ctrl:
4330 mov.l %a0,%a1 # move <ea> to a1
4331
4332 sub.l %d0,%sp # subtract size of dump
4333 lea (%sp),%a0
4334
4335 tst.b %d1 # should FP0 be moved?
4336 bpl.b fmovm_out_ctrl_fp1 # no
4337
4338 mov.l 0x0+EXC_FP0(%a6),(%a0)+ # yes
4339 mov.l 0x4+EXC_FP0(%a6),(%a0)+
4340 mov.l 0x8+EXC_FP0(%a6),(%a0)+
4341
4342 fmovm_out_ctrl_fp1:
4343 lsl.b &0x1,%d1 # should FP1 be moved?
4344 bpl.b fmovm_out_ctrl_fp2 # no
4345
4346 mov.l 0x0+EXC_FP1(%a6),(%a0)+ # yes
4347 mov.l 0x4+EXC_FP1(%a6),(%a0)+
4348 mov.l 0x8+EXC_FP1(%a6),(%a0)+
4349
4350 fmovm_out_ctrl_fp2:
4351 lsl.b &0x1,%d1 # should FP2 be moved?
4352 bpl.b fmovm_out_ctrl_fp3 # no
4353
4354 fmovm.x &0x20,(%a0) # yes
4355 add.l &0xc,%a0
4356
4357 fmovm_out_ctrl_fp3:
4358 lsl.b &0x1,%d1 # should FP3 be moved?
4359 bpl.b fmovm_out_ctrl_fp4 # no
4360
4361 fmovm.x &0x10,(%a0) # yes
4362 add.l &0xc,%a0
4363
4364 fmovm_out_ctrl_fp4:
4365 lsl.b &0x1,%d1 # should FP4 be moved?
4366 bpl.b fmovm_out_ctrl_fp5 # no
4367
4368 fmovm.x &0x08,(%a0) # yes
4369 add.l &0xc,%a0
4370
4371 fmovm_out_ctrl_fp5:
4372 lsl.b &0x1,%d1 # should FP5 be moved?
4373 bpl.b fmovm_out_ctrl_fp6 # no
4374
4375 fmovm.x &0x04,(%a0) # yes
4376 add.l &0xc,%a0
4377
4378 fmovm_out_ctrl_fp6:
4379 lsl.b &0x1,%d1 # should FP6 be moved?
4380 bpl.b fmovm_out_ctrl_fp7 # no
4381
4382 fmovm.x &0x02,(%a0) # yes
4383 add.l &0xc,%a0
4384
4385 fmovm_out_ctrl_fp7:
4386 lsl.b &0x1,%d1 # should FP7 be moved?
4387 bpl.b fmovm_out_ctrl_done # no
4388
4389 fmovm.x &0x01,(%a0) # yes
4390 add.l &0xc,%a0
4391
4392 fmovm_out_ctrl_done:
4393 mov.l %a1,L_SCR1(%a6)
4394
4395 lea (%sp),%a0 # pass: supervisor src
4396 mov.l %d0,-(%sp) # save size
4397 bsr.l _dmem_write # copy data to user mem
4398
4399 mov.l (%sp)+,%d0
4400 add.l %d0,%sp # clear fpreg data from stack
4401
4402 tst.l %d1 # did dstore err?
4403 bne.w fmovm_out_err # yes
4404
4405 rts
4406
4407 ############
4408 # MOVE IN: #
4409 ############
4410 fmovm_data_in:
4411 mov.l %a0,L_SCR1(%a6)
4412
4413 sub.l %d0,%sp # make room for fpregs
4414 lea (%sp),%a1
4415
4416 mov.l %d1,-(%sp) # save bit string for later
4417 mov.l %d0,-(%sp) # save # of bytes
4418
4419 bsr.l _dmem_read # copy data from user mem
4420
4421 mov.l (%sp)+,%d0 # retrieve # of bytes
4422
4423 tst.l %d1 # did dfetch fail?
4424 bne.w fmovm_in_err # yes
4425
4426 mov.l (%sp)+,%d1 # load bit string
4427
4428 lea (%sp),%a0 # addr of stack
4429
4430 tst.b %d1 # should FP0 be moved?
4431 bpl.b fmovm_data_in_fp1 # no
4432
4433 mov.l (%a0)+,0x0+EXC_FP0(%a6) # yes
4434 mov.l (%a0)+,0x4+EXC_FP0(%a6)
4435 mov.l (%a0)+,0x8+EXC_FP0(%a6)
4436
4437 fmovm_data_in_fp1:
4438 lsl.b &0x1,%d1 # should FP1 be moved?
4439 bpl.b fmovm_data_in_fp2 # no
4440
4441 mov.l (%a0)+,0x0+EXC_FP1(%a6) # yes
4442 mov.l (%a0)+,0x4+EXC_FP1(%a6)
4443 mov.l (%a0)+,0x8+EXC_FP1(%a6)
4444
4445 fmovm_data_in_fp2:
4446 lsl.b &0x1,%d1 # should FP2 be moved?
4447 bpl.b fmovm_data_in_fp3 # no
4448
4449 fmovm.x (%a0)+,&0x20 # yes
4450
4451 fmovm_data_in_fp3:
4452 lsl.b &0x1,%d1 # should FP3 be moved?
4453 bpl.b fmovm_data_in_fp4 # no
4454
4455 fmovm.x (%a0)+,&0x10 # yes
4456
4457 fmovm_data_in_fp4:
4458 lsl.b &0x1,%d1 # should FP4 be moved?
4459 bpl.b fmovm_data_in_fp5 # no
4460
4461 fmovm.x (%a0)+,&0x08 # yes
4462
4463 fmovm_data_in_fp5:
4464 lsl.b &0x1,%d1 # should FP5 be moved?
4465 bpl.b fmovm_data_in_fp6 # no
4466
4467 fmovm.x (%a0)+,&0x04 # yes
4468
4469 fmovm_data_in_fp6:
4470 lsl.b &0x1,%d1 # should FP6 be moved?
4471 bpl.b fmovm_data_in_fp7 # no
4472
4473 fmovm.x (%a0)+,&0x02 # yes
4474
4475 fmovm_data_in_fp7:
4476 lsl.b &0x1,%d1 # should FP7 be moved?
4477 bpl.b fmovm_data_in_done # no
4478
4479 fmovm.x (%a0)+,&0x01 # yes
4480
4481 fmovm_data_in_done:
4482 add.l %d0,%sp # remove fpregs from stack
4483 rts
4484
4485 #####################################
4486
4487 fmovm_data_done:
4488 rts
4489
4490 ##############################################################################
4491
4492 #
4493 # table indexed by the operation's bit string that gives the number
4494 # of bytes that will be moved.
4495 #
4496 # number of bytes = (# of 1's in bit string) * 12(bytes/fpreg)
4497 #
4498 tbl_fmovm_size:
4499 byte 0x00,0x0c,0x0c,0x18,0x0c,0x18,0x18,0x24
4500 byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
4501 byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
4502 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4503 byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
4504 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4505 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4506 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4507 byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
4508 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4509 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4510 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4511 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4512 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4513 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4514 byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
4515 byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
4516 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4517 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4518 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4519 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4520 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4521 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4522 byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
4523 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4524 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4525 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4526 byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
4527 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4528 byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
4529 byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
4530 byte 0x3c,0x48,0x48,0x54,0x48,0x54,0x54,0x60
4531
4532 #
4533 # table to convert a pre-decrement bit string into a post-increment
4534 # or control bit string.
4535 # ex: 0x00 ==> 0x00
4536 # 0x01 ==> 0x80
4537 # 0x02 ==> 0x40
4538 # .
4539 # .
4540 # 0xfd ==> 0xbf
4541 # 0xfe ==> 0x7f
4542 # 0xff ==> 0xff
4543 #
4544 tbl_fmovm_convert:
4545 byte 0x00,0x80,0x40,0xc0,0x20,0xa0,0x60,0xe0
4546 byte 0x10,0x90,0x50,0xd0,0x30,0xb0,0x70,0xf0
4547 byte 0x08,0x88,0x48,0xc8,0x28,0xa8,0x68,0xe8
4548 byte 0x18,0x98,0x58,0xd8,0x38,0xb8,0x78,0xf8
4549 byte 0x04,0x84,0x44,0xc4,0x24,0xa4,0x64,0xe4
4550 byte 0x14,0x94,0x54,0xd4,0x34,0xb4,0x74,0xf4
4551 byte 0x0c,0x8c,0x4c,0xcc,0x2c,0xac,0x6c,0xec
4552 byte 0x1c,0x9c,0x5c,0xdc,0x3c,0xbc,0x7c,0xfc
4553 byte 0x02,0x82,0x42,0xc2,0x22,0xa2,0x62,0xe2
4554 byte 0x12,0x92,0x52,0xd2,0x32,0xb2,0x72,0xf2
4555 byte 0x0a,0x8a,0x4a,0xca,0x2a,0xaa,0x6a,0xea
4556 byte 0x1a,0x9a,0x5a,0xda,0x3a,0xba,0x7a,0xfa
4557 byte 0x06,0x86,0x46,0xc6,0x26,0xa6,0x66,0xe6
4558 byte 0x16,0x96,0x56,0xd6,0x36,0xb6,0x76,0xf6
4559 byte 0x0e,0x8e,0x4e,0xce,0x2e,0xae,0x6e,0xee
4560 byte 0x1e,0x9e,0x5e,0xde,0x3e,0xbe,0x7e,0xfe
4561 byte 0x01,0x81,0x41,0xc1,0x21,0xa1,0x61,0xe1
4562 byte 0x11,0x91,0x51,0xd1,0x31,0xb1,0x71,0xf1
4563 byte 0x09,0x89,0x49,0xc9,0x29,0xa9,0x69,0xe9
4564 byte 0x19,0x99,0x59,0xd9,0x39,0xb9,0x79,0xf9
4565 byte 0x05,0x85,0x45,0xc5,0x25,0xa5,0x65,0xe5
4566 byte 0x15,0x95,0x55,0xd5,0x35,0xb5,0x75,0xf5
4567 byte 0x0d,0x8d,0x4d,0xcd,0x2d,0xad,0x6d,0xed
4568 byte 0x1d,0x9d,0x5d,0xdd,0x3d,0xbd,0x7d,0xfd
4569 byte 0x03,0x83,0x43,0xc3,0x23,0xa3,0x63,0xe3
4570 byte 0x13,0x93,0x53,0xd3,0x33,0xb3,0x73,0xf3
4571 byte 0x0b,0x8b,0x4b,0xcb,0x2b,0xab,0x6b,0xeb
4572 byte 0x1b,0x9b,0x5b,0xdb,0x3b,0xbb,0x7b,0xfb
4573 byte 0x07,0x87,0x47,0xc7,0x27,0xa7,0x67,0xe7
4574 byte 0x17,0x97,0x57,0xd7,0x37,0xb7,0x77,0xf7
4575 byte 0x0f,0x8f,0x4f,0xcf,0x2f,0xaf,0x6f,0xef
4576 byte 0x1f,0x9f,0x5f,0xdf,0x3f,0xbf,0x7f,0xff
4577
4578 global fmovm_calc_ea
4579 ###############################################
4580 # _fmovm_calc_ea: calculate effective address #
4581 ###############################################
4582 fmovm_calc_ea:
4583 mov.l %d0,%a0 # move # bytes to a0
4584
4585 # currently, MODE and REG are taken from the EXC_OPWORD. this could be
4586 # easily changed if they were inputs passed in registers.
4587 mov.w EXC_OPWORD(%a6),%d0 # fetch opcode word
4588 mov.w %d0,%d1 # make a copy
4589
4590 andi.w &0x3f,%d0 # extract mode field
4591 andi.l &0x7,%d1 # extract reg field
4592
4593 # jump to the corresponding function for each {MODE,REG} pair.
4594 mov.w (tbl_fea_mode.b,%pc,%d0.w*2),%d0 # fetch jmp distance
4595 jmp (tbl_fea_mode.b,%pc,%d0.w*1) # jmp to correct ea mode
4596
4597 swbeg &64
4598 tbl_fea_mode:
4599 short tbl_fea_mode - tbl_fea_mode
4600 short tbl_fea_mode - tbl_fea_mode
4601 short tbl_fea_mode - tbl_fea_mode
4602 short tbl_fea_mode - tbl_fea_mode
4603 short tbl_fea_mode - tbl_fea_mode
4604 short tbl_fea_mode - tbl_fea_mode
4605 short tbl_fea_mode - tbl_fea_mode
4606 short tbl_fea_mode - tbl_fea_mode
4607
4608 short tbl_fea_mode - tbl_fea_mode
4609 short tbl_fea_mode - tbl_fea_mode
4610 short tbl_fea_mode - tbl_fea_mode
4611 short tbl_fea_mode - tbl_fea_mode
4612 short tbl_fea_mode - tbl_fea_mode
4613 short tbl_fea_mode - tbl_fea_mode
4614 short tbl_fea_mode - tbl_fea_mode
4615 short tbl_fea_mode - tbl_fea_mode
4616
4617 short faddr_ind_a0 - tbl_fea_mode
4618 short faddr_ind_a1 - tbl_fea_mode
4619 short faddr_ind_a2 - tbl_fea_mode
4620 short faddr_ind_a3 - tbl_fea_mode
4621 short faddr_ind_a4 - tbl_fea_mode
4622 short faddr_ind_a5 - tbl_fea_mode
4623 short faddr_ind_a6 - tbl_fea_mode
4624 short faddr_ind_a7 - tbl_fea_mode
4625
4626 short faddr_ind_p_a0 - tbl_fea_mode
4627 short faddr_ind_p_a1 - tbl_fea_mode
4628 short faddr_ind_p_a2 - tbl_fea_mode
4629 short faddr_ind_p_a3 - tbl_fea_mode
4630 short faddr_ind_p_a4 - tbl_fea_mode
4631 short faddr_ind_p_a5 - tbl_fea_mode
4632 short faddr_ind_p_a6 - tbl_fea_mode
4633 short faddr_ind_p_a7 - tbl_fea_mode
4634
4635 short faddr_ind_m_a0 - tbl_fea_mode
4636 short faddr_ind_m_a1 - tbl_fea_mode
4637 short faddr_ind_m_a2 - tbl_fea_mode
4638 short faddr_ind_m_a3 - tbl_fea_mode
4639 short faddr_ind_m_a4 - tbl_fea_mode
4640 short faddr_ind_m_a5 - tbl_fea_mode
4641 short faddr_ind_m_a6 - tbl_fea_mode
4642 short faddr_ind_m_a7 - tbl_fea_mode
4643
4644 short faddr_ind_disp_a0 - tbl_fea_mode
4645 short faddr_ind_disp_a1 - tbl_fea_mode
4646 short faddr_ind_disp_a2 - tbl_fea_mode
4647 short faddr_ind_disp_a3 - tbl_fea_mode
4648 short faddr_ind_disp_a4 - tbl_fea_mode
4649 short faddr_ind_disp_a5 - tbl_fea_mode
4650 short faddr_ind_disp_a6 - tbl_fea_mode
4651 short faddr_ind_disp_a7 - tbl_fea_mode
4652
4653 short faddr_ind_ext - tbl_fea_mode
4654 short faddr_ind_ext - tbl_fea_mode
4655 short faddr_ind_ext - tbl_fea_mode
4656 short faddr_ind_ext - tbl_fea_mode
4657 short faddr_ind_ext - tbl_fea_mode
4658 short faddr_ind_ext - tbl_fea_mode
4659 short faddr_ind_ext - tbl_fea_mode
4660 short faddr_ind_ext - tbl_fea_mode
4661
4662 short fabs_short - tbl_fea_mode
4663 short fabs_long - tbl_fea_mode
4664 short fpc_ind - tbl_fea_mode
4665 short fpc_ind_ext - tbl_fea_mode
4666 short tbl_fea_mode - tbl_fea_mode
4667 short tbl_fea_mode - tbl_fea_mode
4668 short tbl_fea_mode - tbl_fea_mode
4669 short tbl_fea_mode - tbl_fea_mode
4670
4671 ###################################
4672 # Address register indirect: (An) #
4673 ###################################
4674 faddr_ind_a0:
4675 mov.l EXC_DREGS+0x8(%a6),%a0 # Get current a0
4676 rts
4677
4678 faddr_ind_a1:
4679 mov.l EXC_DREGS+0xc(%a6),%a0 # Get current a1
4680 rts
4681
4682 faddr_ind_a2:
4683 mov.l %a2,%a0 # Get current a2
4684 rts
4685
4686 faddr_ind_a3:
4687 mov.l %a3,%a0 # Get current a3
4688 rts
4689
4690 faddr_ind_a4:
4691 mov.l %a4,%a0 # Get current a4
4692 rts
4693
4694 faddr_ind_a5:
4695 mov.l %a5,%a0 # Get current a5
4696 rts
4697
4698 faddr_ind_a6:
4699 mov.l (%a6),%a0 # Get current a6
4700 rts
4701
4702 faddr_ind_a7:
4703 mov.l EXC_A7(%a6),%a0 # Get current a7
4704 rts
4705
4706 #####################################################
4707 # Address register indirect w/ postincrement: (An)+ #
4708 #####################################################
4709 faddr_ind_p_a0:
4710 mov.l EXC_DREGS+0x8(%a6),%d0 # Get current a0
4711 mov.l %d0,%d1
4712 add.l %a0,%d1 # Increment
4713 mov.l %d1,EXC_DREGS+0x8(%a6) # Save incr value
4714 mov.l %d0,%a0
4715 rts
4716
4717 faddr_ind_p_a1:
4718 mov.l EXC_DREGS+0xc(%a6),%d0 # Get current a1
4719 mov.l %d0,%d1
4720 add.l %a0,%d1 # Increment
4721 mov.l %d1,EXC_DREGS+0xc(%a6) # Save incr value
4722 mov.l %d0,%a0
4723 rts
4724
4725 faddr_ind_p_a2:
4726 mov.l %a2,%d0 # Get current a2
4727 mov.l %d0,%d1
4728 add.l %a0,%d1 # Increment
4729 mov.l %d1,%a2 # Save incr value
4730 mov.l %d0,%a0
4731 rts
4732
4733 faddr_ind_p_a3:
4734 mov.l %a3,%d0 # Get current a3
4735 mov.l %d0,%d1
4736 add.l %a0,%d1 # Increment
4737 mov.l %d1,%a3 # Save incr value
4738 mov.l %d0,%a0
4739 rts
4740
4741 faddr_ind_p_a4:
4742 mov.l %a4,%d0 # Get current a4
4743 mov.l %d0,%d1
4744 add.l %a0,%d1 # Increment
4745 mov.l %d1,%a4 # Save incr value
4746 mov.l %d0,%a0
4747 rts
4748
4749 faddr_ind_p_a5:
4750 mov.l %a5,%d0 # Get current a5
4751 mov.l %d0,%d1
4752 add.l %a0,%d1 # Increment
4753 mov.l %d1,%a5 # Save incr value
4754 mov.l %d0,%a0
4755 rts
4756
4757 faddr_ind_p_a6:
4758 mov.l (%a6),%d0 # Get current a6
4759 mov.l %d0,%d1
4760 add.l %a0,%d1 # Increment
4761 mov.l %d1,(%a6) # Save incr value
4762 mov.l %d0,%a0
4763 rts
4764
4765 faddr_ind_p_a7:
4766 mov.b &mia7_flg,SPCOND_FLG(%a6) # set "special case" flag
4767
4768 mov.l EXC_A7(%a6),%d0 # Get current a7
4769 mov.l %d0,%d1
4770 add.l %a0,%d1 # Increment
4771 mov.l %d1,EXC_A7(%a6) # Save incr value
4772 mov.l %d0,%a0
4773 rts
4774
4775 ####################################################
4776 # Address register indirect w/ predecrement: -(An) #
4777 ####################################################
4778 faddr_ind_m_a0:
4779 mov.l EXC_DREGS+0x8(%a6),%d0 # Get current a0
4780 sub.l %a0,%d0 # Decrement
4781 mov.l %d0,EXC_DREGS+0x8(%a6) # Save decr value
4782 mov.l %d0,%a0
4783 rts
4784
4785 faddr_ind_m_a1:
4786 mov.l EXC_DREGS+0xc(%a6),%d0 # Get current a1
4787 sub.l %a0,%d0 # Decrement
4788 mov.l %d0,EXC_DREGS+0xc(%a6) # Save decr value
4789 mov.l %d0,%a0
4790 rts
4791
4792 faddr_ind_m_a2:
4793 mov.l %a2,%d0 # Get current a2
4794 sub.l %a0,%d0 # Decrement
4795 mov.l %d0,%a2 # Save decr value
4796 mov.l %d0,%a0
4797 rts
4798
4799 faddr_ind_m_a3:
4800 mov.l %a3,%d0 # Get current a3
4801 sub.l %a0,%d0 # Decrement
4802 mov.l %d0,%a3 # Save decr value
4803 mov.l %d0,%a0
4804 rts
4805
4806 faddr_ind_m_a4:
4807 mov.l %a4,%d0 # Get current a4
4808 sub.l %a0,%d0 # Decrement
4809 mov.l %d0,%a4 # Save decr value
4810 mov.l %d0,%a0
4811 rts
4812
4813 faddr_ind_m_a5:
4814 mov.l %a5,%d0 # Get current a5
4815 sub.l %a0,%d0 # Decrement
4816 mov.l %d0,%a5 # Save decr value
4817 mov.l %d0,%a0
4818 rts
4819
4820 faddr_ind_m_a6:
4821 mov.l (%a6),%d0 # Get current a6
4822 sub.l %a0,%d0 # Decrement
4823 mov.l %d0,(%a6) # Save decr value
4824 mov.l %d0,%a0
4825 rts
4826
4827 faddr_ind_m_a7:
4828 mov.b &mda7_flg,SPCOND_FLG(%a6) # set "special case" flag
4829
4830 mov.l EXC_A7(%a6),%d0 # Get current a7
4831 sub.l %a0,%d0 # Decrement
4832 mov.l %d0,EXC_A7(%a6) # Save decr value
4833 mov.l %d0,%a0
4834 rts
4835
4836 ########################################################
4837 # Address register indirect w/ displacement: (d16, An) #
4838 ########################################################
4839 faddr_ind_disp_a0:
4840 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
4841 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
4842 bsr.l _imem_read_word
4843
4844 tst.l %d1 # did ifetch fail?
4845 bne.l iea_iacc # yes
4846
4847 mov.w %d0,%a0 # sign extend displacement
4848
4849 add.l EXC_DREGS+0x8(%a6),%a0 # a0 + d16
4850 rts
4851
4852 faddr_ind_disp_a1:
4853 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
4854 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
4855 bsr.l _imem_read_word
4856
4857 tst.l %d1 # did ifetch fail?
4858 bne.l iea_iacc # yes
4859
4860 mov.w %d0,%a0 # sign extend displacement
4861
4862 add.l EXC_DREGS+0xc(%a6),%a0 # a1 + d16
4863 rts
4864
4865 faddr_ind_disp_a2:
4866 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
4867 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
4868 bsr.l _imem_read_word
4869
4870 tst.l %d1 # did ifetch fail?
4871 bne.l iea_iacc # yes
4872
4873 mov.w %d0,%a0 # sign extend displacement
4874
4875 add.l %a2,%a0 # a2 + d16
4876 rts
4877
4878 faddr_ind_disp_a3:
4879 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
4880 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
4881 bsr.l _imem_read_word
4882
4883 tst.l %d1 # did ifetch fail?
4884 bne.l iea_iacc # yes
4885
4886 mov.w %d0,%a0 # sign extend displacement
4887
4888 add.l %a3,%a0 # a3 + d16
4889 rts
4890
4891 faddr_ind_disp_a4:
4892 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
4893 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
4894 bsr.l _imem_read_word
4895
4896 tst.l %d1 # did ifetch fail?
4897 bne.l iea_iacc # yes
4898
4899 mov.w %d0,%a0 # sign extend displacement
4900
4901 add.l %a4,%a0 # a4 + d16
4902 rts
4903
4904 faddr_ind_disp_a5:
4905 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
4906 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
4907 bsr.l _imem_read_word
4908
4909 tst.l %d1 # did ifetch fail?
4910 bne.l iea_iacc # yes
4911
4912 mov.w %d0,%a0 # sign extend displacement
4913
4914 add.l %a5,%a0 # a5 + d16
4915 rts
4916
4917 faddr_ind_disp_a6:
4918 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
4919 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
4920 bsr.l _imem_read_word
4921
4922 tst.l %d1 # did ifetch fail?
4923 bne.l iea_iacc # yes
4924
4925 mov.w %d0,%a0 # sign extend displacement
4926
4927 add.l (%a6),%a0 # a6 + d16
4928 rts
4929
4930 faddr_ind_disp_a7:
4931 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
4932 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
4933 bsr.l _imem_read_word
4934
4935 tst.l %d1 # did ifetch fail?
4936 bne.l iea_iacc # yes
4937
4938 mov.w %d0,%a0 # sign extend displacement
4939
4940 add.l EXC_A7(%a6),%a0 # a7 + d16
4941 rts
4942
4943 ########################################################################
4944 # Address register indirect w/ index(8-bit displacement): (d8, An, Xn) #
4945 # " " " w/ " (base displacement): (bd, An, Xn) #
4946 # Memory indirect postindexed: ([bd, An], Xn, od) #
4947 # Memory indirect preindexed: ([bd, An, Xn], od) #
4948 ########################################################################
4949 faddr_ind_ext:
4950 addq.l &0x8,%d1
4951 bsr.l fetch_dreg # fetch base areg
4952 mov.l %d0,-(%sp)
4953
4954 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
4955 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
4956 bsr.l _imem_read_word # fetch extword in d0
4957
4958 tst.l %d1 # did ifetch fail?
4959 bne.l iea_iacc # yes
4960
4961 mov.l (%sp)+,%a0
4962
4963 btst &0x8,%d0
4964 bne.w fcalc_mem_ind
4965
4966 mov.l %d0,L_SCR1(%a6) # hold opword
4967
4968 mov.l %d0,%d1
4969 rol.w &0x4,%d1
4970 andi.w &0xf,%d1 # extract index regno
4971
4972 # count on fetch_dreg() not to alter a0...
4973 bsr.l fetch_dreg # fetch index
4974
4975 mov.l %d2,-(%sp) # save d2
4976 mov.l L_SCR1(%a6),%d2 # fetch opword
4977
4978 btst &0xb,%d2 # is it word or long?
4979 bne.b faii8_long
4980 ext.l %d0 # sign extend word index
4981 faii8_long:
4982 mov.l %d2,%d1
4983 rol.w &0x7,%d1
4984 andi.l &0x3,%d1 # extract scale value
4985
4986 lsl.l %d1,%d0 # shift index by scale
4987
4988 extb.l %d2 # sign extend displacement
4989 add.l %d2,%d0 # index + disp
4990 add.l %d0,%a0 # An + (index + disp)
4991
4992 mov.l (%sp)+,%d2 # restore old d2
4993 rts
4994
4995 ###########################
4996 # Absolute short: (XXX).W #
4997 ###########################
4998 fabs_short:
4999 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5000 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
5001 bsr.l _imem_read_word # fetch short address
5002
5003 tst.l %d1 # did ifetch fail?
5004 bne.l iea_iacc # yes
5005
5006 mov.w %d0,%a0 # return <ea> in a0
5007 rts
5008
5009 ##########################
5010 # Absolute long: (XXX).L #
5011 ##########################
5012 fabs_long:
5013 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5014 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
5015 bsr.l _imem_read_long # fetch long address
5016
5017 tst.l %d1 # did ifetch fail?
5018 bne.l iea_iacc # yes
5019
5020 mov.l %d0,%a0 # return <ea> in a0
5021 rts
5022
5023 #######################################################
5024 # Program counter indirect w/ displacement: (d16, PC) #
5025 #######################################################
5026 fpc_ind:
5027 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5028 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
5029 bsr.l _imem_read_word # fetch word displacement
5030
5031 tst.l %d1 # did ifetch fail?
5032 bne.l iea_iacc # yes
5033
5034 mov.w %d0,%a0 # sign extend displacement
5035
5036 add.l EXC_EXTWPTR(%a6),%a0 # pc + d16
5037
5038 # _imem_read_word() increased the extwptr by 2. need to adjust here.
5039 subq.l &0x2,%a0 # adjust <ea>
5040 rts
5041
5042 ##########################################################
5043 # PC indirect w/ index(8-bit displacement): (d8, PC, An) #
5044 # " " w/ " (base displacement): (bd, PC, An) #
5045 # PC memory indirect postindexed: ([bd, PC], Xn, od) #
5046 # PC memory indirect preindexed: ([bd, PC, Xn], od) #
5047 ##########################################################
5048 fpc_ind_ext:
5049 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5050 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
5051 bsr.l _imem_read_word # fetch ext word
5052
5053 tst.l %d1 # did ifetch fail?
5054 bne.l iea_iacc # yes
5055
5056 mov.l EXC_EXTWPTR(%a6),%a0 # put base in a0
5057 subq.l &0x2,%a0 # adjust base
5058
5059 btst &0x8,%d0 # is disp only 8 bits?
5060 bne.w fcalc_mem_ind # calc memory indirect
5061
5062 mov.l %d0,L_SCR1(%a6) # store opword
5063
5064 mov.l %d0,%d1 # make extword copy
5065 rol.w &0x4,%d1 # rotate reg num into place
5066 andi.w &0xf,%d1 # extract register number
5067
5068 # count on fetch_dreg() not to alter a0...
5069 bsr.l fetch_dreg # fetch index
5070
5071 mov.l %d2,-(%sp) # save d2
5072 mov.l L_SCR1(%a6),%d2 # fetch opword
5073
5074 btst &0xb,%d2 # is index word or long?
5075 bne.b fpii8_long # long
5076 ext.l %d0 # sign extend word index
5077 fpii8_long:
5078 mov.l %d2,%d1
5079 rol.w &0x7,%d1 # rotate scale value into place
5080 andi.l &0x3,%d1 # extract scale value
5081
5082 lsl.l %d1,%d0 # shift index by scale
5083
5084 extb.l %d2 # sign extend displacement
5085 add.l %d2,%d0 # disp + index
5086 add.l %d0,%a0 # An + (index + disp)
5087
5088 mov.l (%sp)+,%d2 # restore temp register
5089 rts
5090
5091 # d2 = index
5092 # d3 = base
5093 # d4 = od
5094 # d5 = extword
5095 fcalc_mem_ind:
5096 btst &0x6,%d0 # is the index suppressed?
5097 beq.b fcalc_index
5098
5099 movm.l &0x3c00,-(%sp) # save d2-d5
5100
5101 mov.l %d0,%d5 # put extword in d5
5102 mov.l %a0,%d3 # put base in d3
5103
5104 clr.l %d2 # yes, so index = 0
5105 bra.b fbase_supp_ck
5106
5107 # index:
5108 fcalc_index:
5109 mov.l %d0,L_SCR1(%a6) # save d0 (opword)
5110 bfextu %d0{&16:&4},%d1 # fetch dreg index
5111 bsr.l fetch_dreg
5112
5113 movm.l &0x3c00,-(%sp) # save d2-d5
5114 mov.l %d0,%d2 # put index in d2
5115 mov.l L_SCR1(%a6),%d5
5116 mov.l %a0,%d3
5117
5118 btst &0xb,%d5 # is index word or long?
5119 bne.b fno_ext
5120 ext.l %d2
5121
5122 fno_ext:
5123 bfextu %d5{&21:&2},%d0
5124 lsl.l %d0,%d2
5125
5126 # base address (passed as parameter in d3):
5127 # we clear the value here if it should actually be suppressed.
5128 fbase_supp_ck:
5129 btst &0x7,%d5 # is the bd suppressed?
5130 beq.b fno_base_sup
5131 clr.l %d3
5132
5133 # base displacement:
5134 fno_base_sup:
5135 bfextu %d5{&26:&2},%d0 # get bd size
5136 # beq.l fmovm_error # if (size == 0) it's reserved
5137
5138 cmpi.b %d0,&0x2
5139 blt.b fno_bd
5140 beq.b fget_word_bd
5141
5142 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5143 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
5144 bsr.l _imem_read_long
5145
5146 tst.l %d1 # did ifetch fail?
5147 bne.l fcea_iacc # yes
5148
5149 bra.b fchk_ind
5150
5151 fget_word_bd:
5152 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5153 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
5154 bsr.l _imem_read_word
5155
5156 tst.l %d1 # did ifetch fail?
5157 bne.l fcea_iacc # yes
5158
5159 ext.l %d0 # sign extend bd
5160
5161 fchk_ind:
5162 add.l %d0,%d3 # base += bd
5163
5164 # outer displacement:
5165 fno_bd:
5166 bfextu %d5{&30:&2},%d0 # is od suppressed?
5167 beq.w faii_bd
5168
5169 cmpi.b %d0,&0x2
5170 blt.b fnull_od
5171 beq.b fword_od
5172
5173 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5174 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
5175 bsr.l _imem_read_long
5176
5177 tst.l %d1 # did ifetch fail?
5178 bne.l fcea_iacc # yes
5179
5180 bra.b fadd_them
5181
5182 fword_od:
5183 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5184 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
5185 bsr.l _imem_read_word
5186
5187 tst.l %d1 # did ifetch fail?
5188 bne.l fcea_iacc # yes
5189
5190 ext.l %d0 # sign extend od
5191 bra.b fadd_them
5192
5193 fnull_od:
5194 clr.l %d0
5195
5196 fadd_them:
5197 mov.l %d0,%d4
5198
5199 btst &0x2,%d5 # pre or post indexing?
5200 beq.b fpre_indexed
5201
5202 mov.l %d3,%a0
5203 bsr.l _dmem_read_long
5204
5205 tst.l %d1 # did dfetch fail?
5206 bne.w fcea_err # yes
5207
5208 add.l %d2,%d0 # <ea> += index
5209 add.l %d4,%d0 # <ea> += od
5210 bra.b fdone_ea
5211
5212 fpre_indexed:
5213 add.l %d2,%d3 # preindexing
5214 mov.l %d3,%a0
5215 bsr.l _dmem_read_long
5216
5217 tst.l %d1 # did dfetch fail?
5218 bne.w fcea_err # yes
5219
5220 add.l %d4,%d0 # ea += od
5221 bra.b fdone_ea
5222
5223 faii_bd:
5224 add.l %d2,%d3 # ea = (base + bd) + index
5225 mov.l %d3,%d0
5226 fdone_ea:
5227 mov.l %d0,%a0
5228
5229 movm.l (%sp)+,&0x003c # restore d2-d5
5230 rts
5231
5232 #########################################################
5233 fcea_err:
5234 mov.l %d3,%a0
5235
5236 movm.l (%sp)+,&0x003c # restore d2-d5
5237 mov.w &0x0101,%d0
5238 bra.l iea_dacc
5239
5240 fcea_iacc:
5241 movm.l (%sp)+,&0x003c # restore d2-d5
5242 bra.l iea_iacc
5243
5244 fmovm_out_err:
5245 bsr.l restore
5246 mov.w &0x00e1,%d0
5247 bra.b fmovm_err
5248
5249 fmovm_in_err:
5250 bsr.l restore
5251 mov.w &0x0161,%d0
5252
5253 fmovm_err:
5254 mov.l L_SCR1(%a6),%a0
5255 bra.l iea_dacc
5256
5257 #########################################################################
5258 # XDEF **************************************************************** #
5259 # fmovm_ctrl(): emulate fmovm.l of control registers instr #
5260 # #
5261 # XREF **************************************************************** #
5262 # _imem_read_long() - read longword from memory #
5263 # iea_iacc() - _imem_read_long() failed; error recovery #
5264 # #
5265 # INPUT *************************************************************** #
5266 # None #
5267 # #
5268 # OUTPUT ************************************************************** #
5269 # If _imem_read_long() doesn't fail: #
5270 # USER_FPCR(a6) = new FPCR value #
5271 # USER_FPSR(a6) = new FPSR value #
5272 # USER_FPIAR(a6) = new FPIAR value #
5273 # #
5274 # ALGORITHM *********************************************************** #
5275 # Decode the instruction type by looking at the extension word #
5276 # in order to see how many control registers to fetch from memory. #
5277 # Fetch them using _imem_read_long(). If this fetch fails, exit through #
5278 # the special access error exit handler iea_iacc(). #
5279 # #
5280 # Instruction word decoding: #
5281 # #
5282 # fmovem.l #<data>, {FPIAR&|FPCR&|FPSR} #
5283 # #
5284 # WORD1 WORD2 #
5285 # 1111 0010 00 111100 100$ $$00 0000 0000 #
5286 # #
5287 # $$$ (100): FPCR #
5288 # (010): FPSR #
5289 # (001): FPIAR #
5290 # (000): FPIAR #
5291 # #
5292 #########################################################################
5293
5294 global fmovm_ctrl
5295 fmovm_ctrl:
5296 mov.b EXC_EXTWORD(%a6),%d0 # fetch reg select bits
5297 cmpi.b %d0,&0x9c # fpcr & fpsr & fpiar ?
5298 beq.w fctrl_in_7 # yes
5299 cmpi.b %d0,&0x98 # fpcr & fpsr ?
5300 beq.w fctrl_in_6 # yes
5301 cmpi.b %d0,&0x94 # fpcr & fpiar ?
5302 beq.b fctrl_in_5 # yes
5303
5304 # fmovem.l #<data>, fpsr/fpiar
5305 fctrl_in_3:
5306 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5307 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
5308 bsr.l _imem_read_long # fetch FPSR from mem
5309
5310 tst.l %d1 # did ifetch fail?
5311 bne.l iea_iacc # yes
5312
5313 mov.l %d0,USER_FPSR(%a6) # store new FPSR to stack
5314 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5315 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
5316 bsr.l _imem_read_long # fetch FPIAR from mem
5317
5318 tst.l %d1 # did ifetch fail?
5319 bne.l iea_iacc # yes
5320
5321 mov.l %d0,USER_FPIAR(%a6) # store new FPIAR to stack
5322 rts
5323
5324 # fmovem.l #<data>, fpcr/fpiar
5325 fctrl_in_5:
5326 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5327 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
5328 bsr.l _imem_read_long # fetch FPCR from mem
5329
5330 tst.l %d1 # did ifetch fail?
5331 bne.l iea_iacc # yes
5332
5333 mov.l %d0,USER_FPCR(%a6) # store new FPCR to stack
5334 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5335 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
5336 bsr.l _imem_read_long # fetch FPIAR from mem
5337
5338 tst.l %d1 # did ifetch fail?
5339 bne.l iea_iacc # yes
5340
5341 mov.l %d0,USER_FPIAR(%a6) # store new FPIAR to stack
5342 rts
5343
5344 # fmovem.l #<data>, fpcr/fpsr
5345 fctrl_in_6:
5346 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5347 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
5348 bsr.l _imem_read_long # fetch FPCR from mem
5349
5350 tst.l %d1 # did ifetch fail?
5351 bne.l iea_iacc # yes
5352
5353 mov.l %d0,USER_FPCR(%a6) # store new FPCR to mem
5354 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5355 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
5356 bsr.l _imem_read_long # fetch FPSR from mem
5357
5358 tst.l %d1 # did ifetch fail?
5359 bne.l iea_iacc # yes
5360
5361 mov.l %d0,USER_FPSR(%a6) # store new FPSR to mem
5362 rts
5363
5364 # fmovem.l #<data>, fpcr/fpsr/fpiar
5365 fctrl_in_7:
5366 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5367 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
5368 bsr.l _imem_read_long # fetch FPCR from mem
5369
5370 tst.l %d1 # did ifetch fail?
5371 bne.l iea_iacc # yes
5372
5373 mov.l %d0,USER_FPCR(%a6) # store new FPCR to mem
5374 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5375 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
5376 bsr.l _imem_read_long # fetch FPSR from mem
5377
5378 tst.l %d1 # did ifetch fail?
5379 bne.l iea_iacc # yes
5380
5381 mov.l %d0,USER_FPSR(%a6) # store new FPSR to mem
5382 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5383 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
5384 bsr.l _imem_read_long # fetch FPIAR from mem
5385
5386 tst.l %d1 # did ifetch fail?
5387 bne.l iea_iacc # yes
5388
5389 mov.l %d0,USER_FPIAR(%a6) # store new FPIAR to mem
5390 rts
5391
5392 ##########################################################################
5393
5394 #########################################################################
5395 # XDEF **************************************************************** #
5396 # addsub_scaler2(): scale inputs to fadd/fsub such that no #
5397 # OVFL/UNFL exceptions will result #
5398 # #
5399 # XREF **************************************************************** #
5400 # norm() - normalize mantissa after adjusting exponent #
5401 # #
5402 # INPUT *************************************************************** #
5403 # FP_SRC(a6) = fp op1(src) #
5404 # FP_DST(a6) = fp op2(dst) #
5405 # #
5406 # OUTPUT ************************************************************** #
5407 # FP_SRC(a6) = fp op1 scaled(src) #
5408 # FP_DST(a6) = fp op2 scaled(dst) #
5409 # d0 = scale amount #
5410 # #
5411 # ALGORITHM *********************************************************** #
5412 # If the DST exponent is > the SRC exponent, set the DST exponent #
5413 # equal to 0x3fff and scale the SRC exponent by the value that the #
5414 # DST exponent was scaled by. If the SRC exponent is greater or equal, #
5415 # do the opposite. Return this scale factor in d0. #
5416 # If the two exponents differ by > the number of mantissa bits #
5417 # plus two, then set the smallest exponent to a very small value as a #
5418 # quick shortcut. #
5419 # #
5420 #########################################################################
5421
5422 global addsub_scaler2
5423 addsub_scaler2:
5424 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
5425 mov.l DST_HI(%a1),FP_SCR1_HI(%a6)
5426 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
5427 mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
5428 mov.w SRC_EX(%a0),%d0
5429 mov.w DST_EX(%a1),%d1
5430 mov.w %d0,FP_SCR0_EX(%a6)
5431 mov.w %d1,FP_SCR1_EX(%a6)
5432
5433 andi.w &0x7fff,%d0
5434 andi.w &0x7fff,%d1
5435 mov.w %d0,L_SCR1(%a6) # store src exponent
5436 mov.w %d1,2+L_SCR1(%a6) # store dst exponent
5437
5438 cmp.w %d0, %d1 # is src exp >= dst exp?
5439 bge.l src_exp_ge2
5440
5441 # dst exp is > src exp; scale dst to exp = 0x3fff
5442 dst_exp_gt2:
5443 bsr.l scale_to_zero_dst
5444 mov.l %d0,-(%sp) # save scale factor
5445
5446 cmpi.b STAG(%a6),&DENORM # is dst denormalized?
5447 bne.b cmpexp12
5448
5449 lea FP_SCR0(%a6),%a0
5450 bsr.l norm # normalize the denorm; result is new exp
5451 neg.w %d0 # new exp = -(shft val)
5452 mov.w %d0,L_SCR1(%a6) # inset new exp
5453
5454 cmpexp12:
5455 mov.w 2+L_SCR1(%a6),%d0
5456 subi.w &mantissalen+2,%d0 # subtract mantissalen+2 from larger exp
5457
5458 cmp.w %d0,L_SCR1(%a6) # is difference >= len(mantissa)+2?
5459 bge.b quick_scale12
5460
5461 mov.w L_SCR1(%a6),%d0
5462 add.w 0x2(%sp),%d0 # scale src exponent by scale factor
5463 mov.w FP_SCR0_EX(%a6),%d1
5464 and.w &0x8000,%d1
5465 or.w %d1,%d0 # concat {sgn,new exp}
5466 mov.w %d0,FP_SCR0_EX(%a6) # insert new dst exponent
5467
5468 mov.l (%sp)+,%d0 # return SCALE factor
5469 rts
5470
5471 quick_scale12:
5472 andi.w &0x8000,FP_SCR0_EX(%a6) # zero src exponent
5473 bset &0x0,1+FP_SCR0_EX(%a6) # set exp = 1
5474
5475 mov.l (%sp)+,%d0 # return SCALE factor
5476 rts
5477
5478 # src exp is >= dst exp; scale src to exp = 0x3fff
5479 src_exp_ge2:
5480 bsr.l scale_to_zero_src
5481 mov.l %d0,-(%sp) # save scale factor
5482
5483 cmpi.b DTAG(%a6),&DENORM # is dst denormalized?
5484 bne.b cmpexp22
5485 lea FP_SCR1(%a6),%a0
5486 bsr.l norm # normalize the denorm; result is new exp
5487 neg.w %d0 # new exp = -(shft val)
5488 mov.w %d0,2+L_SCR1(%a6) # inset new exp
5489
5490 cmpexp22:
5491 mov.w L_SCR1(%a6),%d0
5492 subi.w &mantissalen+2,%d0 # subtract mantissalen+2 from larger exp
5493
5494 cmp.w %d0,2+L_SCR1(%a6) # is difference >= len(mantissa)+2?
5495 bge.b quick_scale22
5496
5497 mov.w 2+L_SCR1(%a6),%d0
5498 add.w 0x2(%sp),%d0 # scale dst exponent by scale factor
5499 mov.w FP_SCR1_EX(%a6),%d1
5500 andi.w &0x8000,%d1
5501 or.w %d1,%d0 # concat {sgn,new exp}
5502 mov.w %d0,FP_SCR1_EX(%a6) # insert new dst exponent
5503
5504 mov.l (%sp)+,%d0 # return SCALE factor
5505 rts
5506
5507 quick_scale22:
5508 andi.w &0x8000,FP_SCR1_EX(%a6) # zero dst exponent
5509 bset &0x0,1+FP_SCR1_EX(%a6) # set exp = 1
5510
5511 mov.l (%sp)+,%d0 # return SCALE factor
5512 rts
5513
5514 ##########################################################################
5515
5516 #########################################################################
5517 # XDEF **************************************************************** #
5518 # scale_to_zero_src(): scale the exponent of extended precision #
5519 # value at FP_SCR0(a6). #
5520 # #
5521 # XREF **************************************************************** #
5522 # norm() - normalize the mantissa if the operand was a DENORM #
5523 # #
5524 # INPUT *************************************************************** #
5525 # FP_SCR0(a6) = extended precision operand to be scaled #
5526 # #
5527 # OUTPUT ************************************************************** #
5528 # FP_SCR0(a6) = scaled extended precision operand #
5529 # d0 = scale value #
5530 # #
5531 # ALGORITHM *********************************************************** #
5532 # Set the exponent of the input operand to 0x3fff. Save the value #
5533 # of the difference between the original and new exponent. Then, #
5534 # normalize the operand if it was a DENORM. Add this normalization #
5535 # value to the previous value. Return the result. #
5536 # #
5537 #########################################################################
5538
5539 global scale_to_zero_src
5540 scale_to_zero_src:
5541 mov.w FP_SCR0_EX(%a6),%d1 # extract operand's {sgn,exp}
5542 mov.w %d1,%d0 # make a copy
5543
5544 andi.l &0x7fff,%d1 # extract operand's exponent
5545
5546 andi.w &0x8000,%d0 # extract operand's sgn
5547 or.w &0x3fff,%d0 # insert new operand's exponent(=0)
5548
5549 mov.w %d0,FP_SCR0_EX(%a6) # insert biased exponent
5550
5551 cmpi.b STAG(%a6),&DENORM # is operand normalized?
5552 beq.b stzs_denorm # normalize the DENORM
5553
5554 stzs_norm:
5555 mov.l &0x3fff,%d0
5556 sub.l %d1,%d0 # scale = BIAS + (-exp)
5557
5558 rts
5559
5560 stzs_denorm:
5561 lea FP_SCR0(%a6),%a0 # pass ptr to src op
5562 bsr.l norm # normalize denorm
5563 neg.l %d0 # new exponent = -(shft val)
5564 mov.l %d0,%d1 # prepare for op_norm call
5565 bra.b stzs_norm # finish scaling
5566
5567 ###
5568
5569 #########################################################################
5570 # XDEF **************************************************************** #
5571 # scale_sqrt(): scale the input operand exponent so a subsequent #
5572 # fsqrt operation won't take an exception. #
5573 # #
5574 # XREF **************************************************************** #
5575 # norm() - normalize the mantissa if the operand was a DENORM #
5576 # #
5577 # INPUT *************************************************************** #
5578 # FP_SCR0(a6) = extended precision operand to be scaled #
5579 # #
5580 # OUTPUT ************************************************************** #
5581 # FP_SCR0(a6) = scaled extended precision operand #
5582 # d0 = scale value #
5583 # #
5584 # ALGORITHM *********************************************************** #
5585 # If the input operand is a DENORM, normalize it. #
5586 # If the exponent of the input operand is even, set the exponent #
5587 # to 0x3ffe and return a scale factor of "(exp-0x3ffe)/2". If the #
5588 # exponent of the input operand is off, set the exponent to ox3fff and #
5589 # return a scale factor of "(exp-0x3fff)/2". #
5590 # #
5591 #########################################################################
5592
5593 global scale_sqrt
5594 scale_sqrt:
5595 cmpi.b STAG(%a6),&DENORM # is operand normalized?
5596 beq.b ss_denorm # normalize the DENORM
5597
5598 mov.w FP_SCR0_EX(%a6),%d1 # extract operand's {sgn,exp}
5599 andi.l &0x7fff,%d1 # extract operand's exponent
5600
5601 andi.w &0x8000,FP_SCR0_EX(%a6) # extract operand's sgn
5602
5603 btst &0x0,%d1 # is exp even or odd?
5604 beq.b ss_norm_even
5605
5606 ori.w &0x3fff,FP_SCR0_EX(%a6) # insert new operand's exponent(=0)
5607
5608 mov.l &0x3fff,%d0
5609 sub.l %d1,%d0 # scale = BIAS + (-exp)
5610 asr.l &0x1,%d0 # divide scale factor by 2
5611 rts
5612
5613 ss_norm_even:
5614 ori.w &0x3ffe,FP_SCR0_EX(%a6) # insert new operand's exponent(=0)
5615
5616 mov.l &0x3ffe,%d0
5617 sub.l %d1,%d0 # scale = BIAS + (-exp)
5618 asr.l &0x1,%d0 # divide scale factor by 2
5619 rts
5620
5621 ss_denorm:
5622 lea FP_SCR0(%a6),%a0 # pass ptr to src op
5623 bsr.l norm # normalize denorm
5624
5625 btst &0x0,%d0 # is exp even or odd?
5626 beq.b ss_denorm_even
5627
5628 ori.w &0x3fff,FP_SCR0_EX(%a6) # insert new operand's exponent(=0)
5629
5630 add.l &0x3fff,%d0
5631 asr.l &0x1,%d0 # divide scale factor by 2
5632 rts
5633
5634 ss_denorm_even:
5635 ori.w &0x3ffe,FP_SCR0_EX(%a6) # insert new operand's exponent(=0)
5636
5637 add.l &0x3ffe,%d0
5638 asr.l &0x1,%d0 # divide scale factor by 2
5639 rts
5640
5641 ###
5642
5643 #########################################################################
5644 # XDEF **************************************************************** #
5645 # scale_to_zero_dst(): scale the exponent of extended precision #
5646 # value at FP_SCR1(a6). #
5647 # #
5648 # XREF **************************************************************** #
5649 # norm() - normalize the mantissa if the operand was a DENORM #
5650 # #
5651 # INPUT *************************************************************** #
5652 # FP_SCR1(a6) = extended precision operand to be scaled #
5653 # #
5654 # OUTPUT ************************************************************** #
5655 # FP_SCR1(a6) = scaled extended precision operand #
5656 # d0 = scale value #
5657 # #
5658 # ALGORITHM *********************************************************** #
5659 # Set the exponent of the input operand to 0x3fff. Save the value #
5660 # of the difference between the original and new exponent. Then, #
5661 # normalize the operand if it was a DENORM. Add this normalization #
5662 # value to the previous value. Return the result. #
5663 # #
5664 #########################################################################
5665
5666 global scale_to_zero_dst
5667 scale_to_zero_dst:
5668 mov.w FP_SCR1_EX(%a6),%d1 # extract operand's {sgn,exp}
5669 mov.w %d1,%d0 # make a copy
5670
5671 andi.l &0x7fff,%d1 # extract operand's exponent
5672
5673 andi.w &0x8000,%d0 # extract operand's sgn
5674 or.w &0x3fff,%d0 # insert new operand's exponent(=0)
5675
5676 mov.w %d0,FP_SCR1_EX(%a6) # insert biased exponent
5677
5678 cmpi.b DTAG(%a6),&DENORM # is operand normalized?
5679 beq.b stzd_denorm # normalize the DENORM
5680
5681 stzd_norm:
5682 mov.l &0x3fff,%d0
5683 sub.l %d1,%d0 # scale = BIAS + (-exp)
5684 rts
5685
5686 stzd_denorm:
5687 lea FP_SCR1(%a6),%a0 # pass ptr to dst op
5688 bsr.l norm # normalize denorm
5689 neg.l %d0 # new exponent = -(shft val)
5690 mov.l %d0,%d1 # prepare for op_norm call
5691 bra.b stzd_norm # finish scaling
5692
5693 ##########################################################################
5694
5695 #########################################################################
5696 # XDEF **************************************************************** #
5697 # res_qnan(): return default result w/ QNAN operand for dyadic #
5698 # res_snan(): return default result w/ SNAN operand for dyadic #
5699 # res_qnan_1op(): return dflt result w/ QNAN operand for monadic #
5700 # res_snan_1op(): return dflt result w/ SNAN operand for monadic #
5701 # #
5702 # XREF **************************************************************** #
5703 # None #
5704 # #
5705 # INPUT *************************************************************** #
5706 # FP_SRC(a6) = pointer to extended precision src operand #
5707 # FP_DST(a6) = pointer to extended precision dst operand #
5708 # #
5709 # OUTPUT ************************************************************** #
5710 # fp0 = default result #
5711 # #
5712 # ALGORITHM *********************************************************** #
5713 # If either operand (but not both operands) of an operation is a #
5714 # nonsignalling NAN, then that NAN is returned as the result. If both #
5715 # operands are nonsignalling NANs, then the destination operand #
5716 # nonsignalling NAN is returned as the result. #
5717 # If either operand to an operation is a signalling NAN (SNAN), #
5718 # then, the SNAN bit is set in the FPSR EXC byte. If the SNAN trap #
5719 # enable bit is set in the FPCR, then the trap is taken and the #
5720 # destination is not modified. If the SNAN trap enable bit is not set, #
5721 # then the SNAN is converted to a nonsignalling NAN (by setting the #
5722 # SNAN bit in the operand to one), and the operation continues as #
5723 # described in the preceding paragraph, for nonsignalling NANs. #
5724 # Make sure the appropriate FPSR bits are set before exiting. #
5725 # #
5726 #########################################################################
5727
5728 global res_qnan
5729 global res_snan
5730 res_qnan:
5731 res_snan:
5732 cmp.b DTAG(%a6), &SNAN # is the dst an SNAN?
5733 beq.b dst_snan2
5734 cmp.b DTAG(%a6), &QNAN # is the dst a QNAN?
5735 beq.b dst_qnan2
5736 src_nan:
5737 cmp.b STAG(%a6), &QNAN
5738 beq.b src_qnan2
5739 global res_snan_1op
5740 res_snan_1op:
5741 src_snan2:
5742 bset &0x6, FP_SRC_HI(%a6) # set SNAN bit
5743 or.l &nan_mask+aiop_mask+snan_mask, USER_FPSR(%a6)
5744 lea FP_SRC(%a6), %a0
5745 bra.b nan_comp
5746 global res_qnan_1op
5747 res_qnan_1op:
5748 src_qnan2:
5749 or.l &nan_mask, USER_FPSR(%a6)
5750 lea FP_SRC(%a6), %a0
5751 bra.b nan_comp
5752 dst_snan2:
5753 or.l &nan_mask+aiop_mask+snan_mask, USER_FPSR(%a6)
5754 bset &0x6, FP_DST_HI(%a6) # set SNAN bit
5755 lea FP_DST(%a6), %a0
5756 bra.b nan_comp
5757 dst_qnan2:
5758 lea FP_DST(%a6), %a0
5759 cmp.b STAG(%a6), &SNAN
5760 bne nan_done
5761 or.l &aiop_mask+snan_mask, USER_FPSR(%a6)
5762 nan_done:
5763 or.l &nan_mask, USER_FPSR(%a6)
5764 nan_comp:
5765 btst &0x7, FTEMP_EX(%a0) # is NAN neg?
5766 beq.b nan_not_neg
5767 or.l &neg_mask, USER_FPSR(%a6)
5768 nan_not_neg:
5769 fmovm.x (%a0), &0x80
5770 rts
5771
5772 #########################################################################
5773 # XDEF **************************************************************** #
5774 # res_operr(): return default result during operand error #
5775 # #
5776 # XREF **************************************************************** #
5777 # None #
5778 # #
5779 # INPUT *************************************************************** #
5780 # None #
5781 # #
5782 # OUTPUT ************************************************************** #
5783 # fp0 = default operand error result #
5784 # #
5785 # ALGORITHM *********************************************************** #
5786 # An nonsignalling NAN is returned as the default result when #
5787 # an operand error occurs for the following cases: #
5788 # #
5789 # Multiply: (Infinity x Zero) #
5790 # Divide : (Zero / Zero) || (Infinity / Infinity) #
5791 # #
5792 #########################################################################
5793
5794 global res_operr
5795 res_operr:
5796 or.l &nan_mask+operr_mask+aiop_mask, USER_FPSR(%a6)
5797 fmovm.x nan_return(%pc), &0x80
5798 rts
5799
5800 nan_return:
5801 long 0x7fff0000, 0xffffffff, 0xffffffff
5802
5803 #########################################################################
5804 # XDEF **************************************************************** #
5805 # _denorm(): denormalize an intermediate result #
5806 # #
5807 # XREF **************************************************************** #
5808 # None #
5809 # #
5810 # INPUT *************************************************************** #
5811 # a0 = points to the operand to be denormalized #
5812 # (in the internal extended format) #
5813 # #
5814 # d0 = rounding precision #
5815 # #
5816 # OUTPUT ************************************************************** #
5817 # a0 = pointer to the denormalized result #
5818 # (in the internal extended format) #
5819 # #
5820 # d0 = guard,round,sticky #
5821 # #
5822 # ALGORITHM *********************************************************** #
5823 # According to the exponent underflow threshold for the given #
5824 # precision, shift the mantissa bits to the right in order raise the #
5825 # exponent of the operand to the threshold value. While shifting the #
5826 # mantissa bits right, maintain the value of the guard, round, and #
5827 # sticky bits. #
5828 # other notes: #
5829 # (1) _denorm() is called by the underflow routines #
5830 # (2) _denorm() does NOT affect the status register #
5831 # #
5832 #########################################################################
5833
5834 #
5835 # table of exponent threshold values for each precision
5836 #
5837 tbl_thresh:
5838 short 0x0
5839 short sgl_thresh
5840 short dbl_thresh
5841
5842 global _denorm
5843 _denorm:
5844 #
5845 # Load the exponent threshold for the precision selected and check
5846 # to see if (threshold - exponent) is > 65 in which case we can
5847 # simply calculate the sticky bit and zero the mantissa. otherwise
5848 # we have to call the denormalization routine.
5849 #
5850 lsr.b &0x2, %d0 # shift prec to lo bits
5851 mov.w (tbl_thresh.b,%pc,%d0.w*2), %d1 # load prec threshold
5852 mov.w %d1, %d0 # copy d1 into d0
5853 sub.w FTEMP_EX(%a0), %d0 # diff = threshold - exp
5854 cmpi.w %d0, &66 # is diff > 65? (mant + g,r bits)
5855 bpl.b denorm_set_stky # yes; just calc sticky
5856
5857 clr.l %d0 # clear g,r,s
5858 btst &inex2_bit, FPSR_EXCEPT(%a6) # yes; was INEX2 set?
5859 beq.b denorm_call # no; don't change anything
5860 bset &29, %d0 # yes; set sticky bit
5861
5862 denorm_call:
5863 bsr.l dnrm_lp # denormalize the number
5864 rts
5865
5866 #
5867 # all bit would have been shifted off during the denorm so simply
5868 # calculate if the sticky should be set and clear the entire mantissa.
5869 #
5870 denorm_set_stky:
5871 mov.l &0x20000000, %d0 # set sticky bit in return value
5872 mov.w %d1, FTEMP_EX(%a0) # load exp with threshold
5873 clr.l FTEMP_HI(%a0) # set d1 = 0 (ms mantissa)
5874 clr.l FTEMP_LO(%a0) # set d2 = 0 (ms mantissa)
5875 rts
5876
5877 # #
5878 # dnrm_lp(): normalize exponent/mantissa to specified threshhold #
5879 # #
5880 # INPUT: #
5881 # %a0 : points to the operand to be denormalized #
5882 # %d0{31:29} : initial guard,round,sticky #
5883 # %d1{15:0} : denormalization threshold #
5884 # OUTPUT: #
5885 # %a0 : points to the denormalized operand #
5886 # %d0{31:29} : final guard,round,sticky #
5887 # #
5888
5889 # *** Local Equates *** #
5890 set GRS, L_SCR2 # g,r,s temp storage
5891 set FTEMP_LO2, L_SCR1 # FTEMP_LO copy
5892
5893 global dnrm_lp
5894 dnrm_lp:
5895
5896 #
5897 # make a copy of FTEMP_LO and place the g,r,s bits directly after it
5898 # in memory so as to make the bitfield extraction for denormalization easier.
5899 #
5900 mov.l FTEMP_LO(%a0), FTEMP_LO2(%a6) # make FTEMP_LO copy
5901 mov.l %d0, GRS(%a6) # place g,r,s after it
5902
5903 #
5904 # check to see how much less than the underflow threshold the operand
5905 # exponent is.
5906 #
5907 mov.l %d1, %d0 # copy the denorm threshold
5908 sub.w FTEMP_EX(%a0), %d1 # d1 = threshold - uns exponent
5909 ble.b dnrm_no_lp # d1 <= 0
5910 cmpi.w %d1, &0x20 # is ( 0 <= d1 < 32) ?
5911 blt.b case_1 # yes
5912 cmpi.w %d1, &0x40 # is (32 <= d1 < 64) ?
5913 blt.b case_2 # yes
5914 bra.w case_3 # (d1 >= 64)
5915
5916 #
5917 # No normalization necessary
5918 #
5919 dnrm_no_lp:
5920 mov.l GRS(%a6), %d0 # restore original g,r,s
5921 rts
5922
5923 #
5924 # case (0<d1<32)
5925 #
5926 # %d0 = denorm threshold
5927 # %d1 = "n" = amt to shift
5928 #
5929 # ---------------------------------------------------------
5930 # | FTEMP_HI | FTEMP_LO |grs000.........000|
5931 # ---------------------------------------------------------
5932 # <-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)->
5933 # \ \ \ \
5934 # \ \ \ \
5935 # \ \ \ \
5936 # \ \ \ \
5937 # \ \ \ \
5938 # \ \ \ \
5939 # \ \ \ \
5940 # \ \ \ \
5941 # <-(n)-><-(32 - n)-><------(32)-------><------(32)------->
5942 # ---------------------------------------------------------
5943 # |0.....0| NEW_HI | NEW_FTEMP_LO |grs |
5944 # ---------------------------------------------------------
5945 #
5946 case_1:
5947 mov.l %d2, -(%sp) # create temp storage
5948
5949 mov.w %d0, FTEMP_EX(%a0) # exponent = denorm threshold
5950 mov.l &32, %d0
5951 sub.w %d1, %d0 # %d0 = 32 - %d1
5952
5953 cmpi.w %d1, &29 # is shft amt >= 29
5954 blt.b case1_extract # no; no fix needed
5955 mov.b GRS(%a6), %d2
5956 or.b %d2, 3+FTEMP_LO2(%a6)
5957
5958 case1_extract:
5959 bfextu FTEMP_HI(%a0){&0:%d0}, %d2 # %d2 = new FTEMP_HI
5960 bfextu FTEMP_HI(%a0){%d0:&32}, %d1 # %d1 = new FTEMP_LO
5961 bfextu FTEMP_LO2(%a6){%d0:&32}, %d0 # %d0 = new G,R,S
5962
5963 mov.l %d2, FTEMP_HI(%a0) # store new FTEMP_HI
5964 mov.l %d1, FTEMP_LO(%a0) # store new FTEMP_LO
5965
5966 bftst %d0{&2:&30} # were bits shifted off?
5967 beq.b case1_sticky_clear # no; go finish
5968 bset &rnd_stky_bit, %d0 # yes; set sticky bit
5969
5970 case1_sticky_clear:
5971 and.l &0xe0000000, %d0 # clear all but G,R,S
5972 mov.l (%sp)+, %d2 # restore temp register
5973 rts
5974
5975 #
5976 # case (32<=d1<64)
5977 #
5978 # %d0 = denorm threshold
5979 # %d1 = "n" = amt to shift
5980 #
5981 # ---------------------------------------------------------
5982 # | FTEMP_HI | FTEMP_LO |grs000.........000|
5983 # ---------------------------------------------------------
5984 # <-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)->
5985 # \ \ \
5986 # \ \ \
5987 # \ \ -------------------
5988 # \ -------------------- \
5989 # ------------------- \ \
5990 # \ \ \
5991 # \ \ \
5992 # \ \ \
5993 # <-------(32)------><-(n)-><-(32 - n)-><------(32)------->
5994 # ---------------------------------------------------------
5995 # |0...............0|0....0| NEW_LO |grs |
5996 # ---------------------------------------------------------
5997 #
5998 case_2:
5999 mov.l %d2, -(%sp) # create temp storage
6000
6001 mov.w %d0, FTEMP_EX(%a0) # exponent = denorm threshold
6002 subi.w &0x20, %d1 # %d1 now between 0 and 32
6003 mov.l &0x20, %d0
6004 sub.w %d1, %d0 # %d0 = 32 - %d1
6005
6006 # subtle step here; or in the g,r,s at the bottom of FTEMP_LO to minimize
6007 # the number of bits to check for the sticky detect.
6008 # it only plays a role in shift amounts of 61-63.
6009 mov.b GRS(%a6), %d2
6010 or.b %d2, 3+FTEMP_LO2(%a6)
6011
6012 bfextu FTEMP_HI(%a0){&0:%d0}, %d2 # %d2 = new FTEMP_LO
6013 bfextu FTEMP_HI(%a0){%d0:&32}, %d1 # %d1 = new G,R,S
6014
6015 bftst %d1{&2:&30} # were any bits shifted off?
6016 bne.b case2_set_sticky # yes; set sticky bit
6017 bftst FTEMP_LO2(%a6){%d0:&31} # were any bits shifted off?
6018 bne.b case2_set_sticky # yes; set sticky bit
6019
6020 mov.l %d1, %d0 # move new G,R,S to %d0
6021 bra.b case2_end
6022
6023 case2_set_sticky:
6024 mov.l %d1, %d0 # move new G,R,S to %d0
6025 bset &rnd_stky_bit, %d0 # set sticky bit
6026
6027 case2_end:
6028 clr.l FTEMP_HI(%a0) # store FTEMP_HI = 0
6029 mov.l %d2, FTEMP_LO(%a0) # store FTEMP_LO
6030 and.l &0xe0000000, %d0 # clear all but G,R,S
6031
6032 mov.l (%sp)+,%d2 # restore temp register
6033 rts
6034
6035 #
6036 # case (d1>=64)
6037 #
6038 # %d0 = denorm threshold
6039 # %d1 = amt to shift
6040 #
6041 case_3:
6042 mov.w %d0, FTEMP_EX(%a0) # insert denorm threshold
6043
6044 cmpi.w %d1, &65 # is shift amt > 65?
6045 blt.b case3_64 # no; it's == 64
6046 beq.b case3_65 # no; it's == 65
6047
6048 #
6049 # case (d1>65)
6050 #
6051 # Shift value is > 65 and out of range. All bits are shifted off.
6052 # Return a zero mantissa with the sticky bit set
6053 #
6054 clr.l FTEMP_HI(%a0) # clear hi(mantissa)
6055 clr.l FTEMP_LO(%a0) # clear lo(mantissa)
6056 mov.l &0x20000000, %d0 # set sticky bit
6057 rts
6058
6059 #
6060 # case (d1 == 64)
6061 #
6062 # ---------------------------------------------------------
6063 # | FTEMP_HI | FTEMP_LO |grs000.........000|
6064 # ---------------------------------------------------------
6065 # <-------(32)------>
6066 # \ \
6067 # \ \
6068 # \ \
6069 # \ ------------------------------
6070 # ------------------------------- \
6071 # \ \
6072 # \ \
6073 # \ \
6074 # <-------(32)------>
6075 # ---------------------------------------------------------
6076 # |0...............0|0................0|grs |
6077 # ---------------------------------------------------------
6078 #
6079 case3_64:
6080 mov.l FTEMP_HI(%a0), %d0 # fetch hi(mantissa)
6081 mov.l %d0, %d1 # make a copy
6082 and.l &0xc0000000, %d0 # extract G,R
6083 and.l &0x3fffffff, %d1 # extract other bits
6084
6085 bra.b case3_complete
6086
6087 #
6088 # case (d1 == 65)
6089 #
6090 # ---------------------------------------------------------
6091 # | FTEMP_HI | FTEMP_LO |grs000.........000|
6092 # ---------------------------------------------------------
6093 # <-------(32)------>
6094 # \ \
6095 # \ \
6096 # \ \
6097 # \ ------------------------------
6098 # -------------------------------- \
6099 # \ \
6100 # \ \
6101 # \ \
6102 # <-------(31)----->
6103 # ---------------------------------------------------------
6104 # |0...............0|0................0|0rs |
6105 # ---------------------------------------------------------
6106 #
6107 case3_65:
6108 mov.l FTEMP_HI(%a0), %d0 # fetch hi(mantissa)
6109 and.l &0x80000000, %d0 # extract R bit
6110 lsr.l &0x1, %d0 # shift high bit into R bit
6111 and.l &0x7fffffff, %d1 # extract other bits
6112
6113 case3_complete:
6114 # last operation done was an "and" of the bits shifted off so the condition
6115 # codes are already set so branch accordingly.
6116 bne.b case3_set_sticky # yes; go set new sticky
6117 tst.l FTEMP_LO(%a0) # were any bits shifted off?
6118 bne.b case3_set_sticky # yes; go set new sticky
6119 tst.b GRS(%a6) # were any bits shifted off?
6120 bne.b case3_set_sticky # yes; go set new sticky
6121
6122 #
6123 # no bits were shifted off so don't set the sticky bit.
6124 # the guard and
6125 # the entire mantissa is zero.
6126 #
6127 clr.l FTEMP_HI(%a0) # clear hi(mantissa)
6128 clr.l FTEMP_LO(%a0) # clear lo(mantissa)
6129 rts
6130
6131 #
6132 # some bits were shifted off so set the sticky bit.
6133 # the entire mantissa is zero.
6134 #
6135 case3_set_sticky:
6136 bset &rnd_stky_bit,%d0 # set new sticky bit
6137 clr.l FTEMP_HI(%a0) # clear hi(mantissa)
6138 clr.l FTEMP_LO(%a0) # clear lo(mantissa)
6139 rts
6140
6141 #########################################################################
6142 # XDEF **************************************************************** #
6143 # _round(): round result according to precision/mode #
6144 # #
6145 # XREF **************************************************************** #
6146 # None #
6147 # #
6148 # INPUT *************************************************************** #
6149 # a0 = ptr to input operand in internal extended format #
6150 # d1(hi) = contains rounding precision: #
6151 # ext = $0000xxxx #
6152 # sgl = $0004xxxx #
6153 # dbl = $0008xxxx #
6154 # d1(lo) = contains rounding mode: #
6155 # RN = $xxxx0000 #
6156 # RZ = $xxxx0001 #
6157 # RM = $xxxx0002 #
6158 # RP = $xxxx0003 #
6159 # d0{31:29} = contains the g,r,s bits (extended) #
6160 # #
6161 # OUTPUT ************************************************************** #
6162 # a0 = pointer to rounded result #
6163 # #
6164 # ALGORITHM *********************************************************** #
6165 # On return the value pointed to by a0 is correctly rounded, #
6166 # a0 is preserved and the g-r-s bits in d0 are cleared. #
6167 # The result is not typed - the tag field is invalid. The #
6168 # result is still in the internal extended format. #
6169 # #
6170 # The INEX bit of USER_FPSR will be set if the rounded result was #
6171 # inexact (i.e. if any of the g-r-s bits were set). #
6172 # #
6173 #########################################################################
6174
6175 global _round
6176 _round:
6177 #
6178 # ext_grs() looks at the rounding precision and sets the appropriate
6179 # G,R,S bits.
6180 # If (G,R,S == 0) then result is exact and round is done, else set
6181 # the inex flag in status reg and continue.
6182 #
6183 bsr.l ext_grs # extract G,R,S
6184
6185 tst.l %d0 # are G,R,S zero?
6186 beq.w truncate # yes; round is complete
6187
6188 or.w &inx2a_mask, 2+USER_FPSR(%a6) # set inex2/ainex
6189
6190 #
6191 # Use rounding mode as an index into a jump table for these modes.
6192 # All of the following assumes grs != 0.
6193 #
6194 mov.w (tbl_mode.b,%pc,%d1.w*2), %a1 # load jump offset
6195 jmp (tbl_mode.b,%pc,%a1) # jmp to rnd mode handler
6196
6197 tbl_mode:
6198 short rnd_near - tbl_mode
6199 short truncate - tbl_mode # RZ always truncates
6200 short rnd_mnus - tbl_mode
6201 short rnd_plus - tbl_mode
6202
6203 #################################################################
6204 # ROUND PLUS INFINITY #
6205 # #
6206 # If sign of fp number = 0 (positive), then add 1 to l. #
6207 #################################################################
6208 rnd_plus:
6209 tst.b FTEMP_SGN(%a0) # check for sign
6210 bmi.w truncate # if positive then truncate
6211
6212 mov.l &0xffffffff, %d0 # force g,r,s to be all f's
6213 swap %d1 # set up d1 for round prec.
6214
6215 cmpi.b %d1, &s_mode # is prec = sgl?
6216 beq.w add_sgl # yes
6217 bgt.w add_dbl # no; it's dbl
6218 bra.w add_ext # no; it's ext
6219
6220 #################################################################
6221 # ROUND MINUS INFINITY #
6222 # #
6223 # If sign of fp number = 1 (negative), then add 1 to l. #
6224 #################################################################
6225 rnd_mnus:
6226 tst.b FTEMP_SGN(%a0) # check for sign
6227 bpl.w truncate # if negative then truncate
6228
6229 mov.l &0xffffffff, %d0 # force g,r,s to be all f's
6230 swap %d1 # set up d1 for round prec.
6231
6232 cmpi.b %d1, &s_mode # is prec = sgl?
6233 beq.w add_sgl # yes
6234 bgt.w add_dbl # no; it's dbl
6235 bra.w add_ext # no; it's ext
6236
6237 #################################################################
6238 # ROUND NEAREST #
6239 # #
6240 # If (g=1), then add 1 to l and if (r=s=0), then clear l #
6241 # Note that this will round to even in case of a tie. #
6242 #################################################################
6243 rnd_near:
6244 asl.l &0x1, %d0 # shift g-bit to c-bit
6245 bcc.w truncate # if (g=1) then
6246
6247 swap %d1 # set up d1 for round prec.
6248
6249 cmpi.b %d1, &s_mode # is prec = sgl?
6250 beq.w add_sgl # yes
6251 bgt.w add_dbl # no; it's dbl
6252 bra.w add_ext # no; it's ext
6253
6254 # *** LOCAL EQUATES ***
6255 set ad_1_sgl, 0x00000100 # constant to add 1 to l-bit in sgl prec
6256 set ad_1_dbl, 0x00000800 # constant to add 1 to l-bit in dbl prec
6257
6258 #########################
6259 # ADD SINGLE #
6260 #########################
6261 add_sgl:
6262 add.l &ad_1_sgl, FTEMP_HI(%a0)
6263 bcc.b scc_clr # no mantissa overflow
6264 roxr.w FTEMP_HI(%a0) # shift v-bit back in
6265 roxr.w FTEMP_HI+2(%a0) # shift v-bit back in
6266 add.w &0x1, FTEMP_EX(%a0) # and incr exponent
6267 scc_clr:
6268 tst.l %d0 # test for rs = 0
6269 bne.b sgl_done
6270 and.w &0xfe00, FTEMP_HI+2(%a0) # clear the l-bit
6271 sgl_done:
6272 and.l &0xffffff00, FTEMP_HI(%a0) # truncate bits beyond sgl limit
6273 clr.l FTEMP_LO(%a0) # clear d2
6274 rts
6275
6276 #########################
6277 # ADD EXTENDED #
6278 #########################
6279 add_ext:
6280 addq.l &1,FTEMP_LO(%a0) # add 1 to l-bit
6281 bcc.b xcc_clr # test for carry out
6282 addq.l &1,FTEMP_HI(%a0) # propogate carry
6283 bcc.b xcc_clr
6284 roxr.w FTEMP_HI(%a0) # mant is 0 so restore v-bit
6285 roxr.w FTEMP_HI+2(%a0) # mant is 0 so restore v-bit
6286 roxr.w FTEMP_LO(%a0)
6287 roxr.w FTEMP_LO+2(%a0)
6288 add.w &0x1,FTEMP_EX(%a0) # and inc exp
6289 xcc_clr:
6290 tst.l %d0 # test rs = 0
6291 bne.b add_ext_done
6292 and.b &0xfe,FTEMP_LO+3(%a0) # clear the l bit
6293 add_ext_done:
6294 rts
6295
6296 #########################
6297 # ADD DOUBLE #
6298 #########################
6299 add_dbl:
6300 add.l &ad_1_dbl, FTEMP_LO(%a0) # add 1 to lsb
6301 bcc.b dcc_clr # no carry
6302 addq.l &0x1, FTEMP_HI(%a0) # propogate carry
6303 bcc.b dcc_clr # no carry
6304
6305 roxr.w FTEMP_HI(%a0) # mant is 0 so restore v-bit
6306 roxr.w FTEMP_HI+2(%a0) # mant is 0 so restore v-bit
6307 roxr.w FTEMP_LO(%a0)
6308 roxr.w FTEMP_LO+2(%a0)
6309 addq.w &0x1, FTEMP_EX(%a0) # incr exponent
6310 dcc_clr:
6311 tst.l %d0 # test for rs = 0
6312 bne.b dbl_done
6313 and.w &0xf000, FTEMP_LO+2(%a0) # clear the l-bit
6314
6315 dbl_done:
6316 and.l &0xfffff800,FTEMP_LO(%a0) # truncate bits beyond dbl limit
6317 rts
6318
6319 ###########################
6320 # Truncate all other bits #
6321 ###########################
6322 truncate:
6323 swap %d1 # select rnd prec
6324
6325 cmpi.b %d1, &s_mode # is prec sgl?
6326 beq.w sgl_done # yes
6327 bgt.b dbl_done # no; it's dbl
6328 rts # no; it's ext
6329
6330
6331 #
6332 # ext_grs(): extract guard, round and sticky bits according to
6333 # rounding precision.
6334 #
6335 # INPUT
6336 # d0 = extended precision g,r,s (in d0{31:29})
6337 # d1 = {PREC,ROUND}
6338 # OUTPUT
6339 # d0{31:29} = guard, round, sticky
6340 #
6341 # The ext_grs extract the guard/round/sticky bits according to the
6342 # selected rounding precision. It is called by the round subroutine
6343 # only. All registers except d0 are kept intact. d0 becomes an
6344 # updated guard,round,sticky in d0{31:29}
6345 #
6346 # Notes: the ext_grs uses the round PREC, and therefore has to swap d1
6347 # prior to usage, and needs to restore d1 to original. this
6348 # routine is tightly tied to the round routine and not meant to
6349 # uphold standard subroutine calling practices.
6350 #
6351
6352 ext_grs:
6353 swap %d1 # have d1.w point to round precision
6354 tst.b %d1 # is rnd prec = extended?
6355 bne.b ext_grs_not_ext # no; go handle sgl or dbl
6356
6357 #
6358 # %d0 actually already hold g,r,s since _round() had it before calling
6359 # this function. so, as long as we don't disturb it, we are "returning" it.
6360 #
6361 ext_grs_ext:
6362 swap %d1 # yes; return to correct positions
6363 rts
6364
6365 ext_grs_not_ext:
6366 movm.l &0x3000, -(%sp) # make some temp registers {d2/d3}
6367
6368 cmpi.b %d1, &s_mode # is rnd prec = sgl?
6369 bne.b ext_grs_dbl # no; go handle dbl
6370
6371 #
6372 # sgl:
6373 # 96 64 40 32 0
6374 # -----------------------------------------------------
6375 # | EXP |XXXXXXX| |xx | |grs|
6376 # -----------------------------------------------------
6377 # <--(24)--->nn\ /
6378 # ee ---------------------
6379 # ww |
6380 # v
6381 # gr new sticky
6382 #
6383 ext_grs_sgl:
6384 bfextu FTEMP_HI(%a0){&24:&2}, %d3 # sgl prec. g-r are 2 bits right
6385 mov.l &30, %d2 # of the sgl prec. limits
6386 lsl.l %d2, %d3 # shift g-r bits to MSB of d3
6387 mov.l FTEMP_HI(%a0), %d2 # get word 2 for s-bit test
6388 and.l &0x0000003f, %d2 # s bit is the or of all other
6389 bne.b ext_grs_st_stky # bits to the right of g-r
6390 tst.l FTEMP_LO(%a0) # test lower mantissa
6391 bne.b ext_grs_st_stky # if any are set, set sticky
6392 tst.l %d0 # test original g,r,s
6393 bne.b ext_grs_st_stky # if any are set, set sticky
6394 bra.b ext_grs_end_sd # if words 3 and 4 are clr, exit
6395
6396 #
6397 # dbl:
6398 # 96 64 32 11 0
6399 # -----------------------------------------------------
6400 # | EXP |XXXXXXX| | |xx |grs|
6401 # -----------------------------------------------------
6402 # nn\ /
6403 # ee -------
6404 # ww |
6405 # v
6406 # gr new sticky
6407 #
6408 ext_grs_dbl:
6409 bfextu FTEMP_LO(%a0){&21:&2}, %d3 # dbl-prec. g-r are 2 bits right
6410 mov.l &30, %d2 # of the dbl prec. limits
6411 lsl.l %d2, %d3 # shift g-r bits to the MSB of d3
6412 mov.l FTEMP_LO(%a0), %d2 # get lower mantissa for s-bit test
6413 and.l &0x000001ff, %d2 # s bit is the or-ing of all
6414 bne.b ext_grs_st_stky # other bits to the right of g-r
6415 tst.l %d0 # test word original g,r,s
6416 bne.b ext_grs_st_stky # if any are set, set sticky
6417 bra.b ext_grs_end_sd # if clear, exit
6418
6419 ext_grs_st_stky:
6420 bset &rnd_stky_bit, %d3 # set sticky bit
6421 ext_grs_end_sd:
6422 mov.l %d3, %d0 # return grs to d0
6423
6424 movm.l (%sp)+, &0xc # restore scratch registers {d2/d3}
6425
6426 swap %d1 # restore d1 to original
6427 rts
6428
6429 #########################################################################
6430 # norm(): normalize the mantissa of an extended precision input. the #
6431 # input operand should not be normalized already. #
6432 # #
6433 # XDEF **************************************************************** #
6434 # norm() #
6435 # #
6436 # XREF **************************************************************** #
6437 # none #
6438 # #
6439 # INPUT *************************************************************** #
6440 # a0 = pointer fp extended precision operand to normalize #
6441 # #
6442 # OUTPUT ************************************************************** #
6443 # d0 = number of bit positions the mantissa was shifted #
6444 # a0 = the input operand's mantissa is normalized; the exponent #
6445 # is unchanged. #
6446 # #
6447 #########################################################################
6448 global norm
6449 norm:
6450 mov.l %d2, -(%sp) # create some temp regs
6451 mov.l %d3, -(%sp)
6452
6453 mov.l FTEMP_HI(%a0), %d0 # load hi(mantissa)
6454 mov.l FTEMP_LO(%a0), %d1 # load lo(mantissa)
6455
6456 bfffo %d0{&0:&32}, %d2 # how many places to shift?
6457 beq.b norm_lo # hi(man) is all zeroes!
6458
6459 norm_hi:
6460 lsl.l %d2, %d0 # left shift hi(man)
6461 bfextu %d1{&0:%d2}, %d3 # extract lo bits
6462
6463 or.l %d3, %d0 # create hi(man)
6464 lsl.l %d2, %d1 # create lo(man)
6465
6466 mov.l %d0, FTEMP_HI(%a0) # store new hi(man)
6467 mov.l %d1, FTEMP_LO(%a0) # store new lo(man)
6468
6469 mov.l %d2, %d0 # return shift amount
6470
6471 mov.l (%sp)+, %d3 # restore temp regs
6472 mov.l (%sp)+, %d2
6473
6474 rts
6475
6476 norm_lo:
6477 bfffo %d1{&0:&32}, %d2 # how many places to shift?
6478 lsl.l %d2, %d1 # shift lo(man)
6479 add.l &32, %d2 # add 32 to shft amount
6480
6481 mov.l %d1, FTEMP_HI(%a0) # store hi(man)
6482 clr.l FTEMP_LO(%a0) # lo(man) is now zero
6483
6484 mov.l %d2, %d0 # return shift amount
6485
6486 mov.l (%sp)+, %d3 # restore temp regs
6487 mov.l (%sp)+, %d2
6488
6489 rts
6490
6491 #########################################################################
6492 # unnorm_fix(): - changes an UNNORM to one of NORM, DENORM, or ZERO #
6493 # - returns corresponding optype tag #
6494 # #
6495 # XDEF **************************************************************** #
6496 # unnorm_fix() #
6497 # #
6498 # XREF **************************************************************** #
6499 # norm() - normalize the mantissa #
6500 # #
6501 # INPUT *************************************************************** #
6502 # a0 = pointer to unnormalized extended precision number #
6503 # #
6504 # OUTPUT ************************************************************** #
6505 # d0 = optype tag - is corrected to one of NORM, DENORM, or ZERO #
6506 # a0 = input operand has been converted to a norm, denorm, or #
6507 # zero; both the exponent and mantissa are changed. #
6508 # #
6509 #########################################################################
6510
6511 global unnorm_fix
6512 unnorm_fix:
6513 bfffo FTEMP_HI(%a0){&0:&32}, %d0 # how many shifts are needed?
6514 bne.b unnorm_shift # hi(man) is not all zeroes
6515
6516 #
6517 # hi(man) is all zeroes so see if any bits in lo(man) are set
6518 #
6519 unnorm_chk_lo:
6520 bfffo FTEMP_LO(%a0){&0:&32}, %d0 # is operand really a zero?
6521 beq.w unnorm_zero # yes
6522
6523 add.w &32, %d0 # no; fix shift distance
6524
6525 #
6526 # d0 = # shifts needed for complete normalization
6527 #
6528 unnorm_shift:
6529 clr.l %d1 # clear top word
6530 mov.w FTEMP_EX(%a0), %d1 # extract exponent
6531 and.w &0x7fff, %d1 # strip off sgn
6532
6533 cmp.w %d0, %d1 # will denorm push exp < 0?
6534 bgt.b unnorm_nrm_zero # yes; denorm only until exp = 0
6535
6536 #
6537 # exponent would not go < 0. therefore, number stays normalized
6538 #
6539 sub.w %d0, %d1 # shift exponent value
6540 mov.w FTEMP_EX(%a0), %d0 # load old exponent
6541 and.w &0x8000, %d0 # save old sign
6542 or.w %d0, %d1 # {sgn,new exp}
6543 mov.w %d1, FTEMP_EX(%a0) # insert new exponent
6544
6545 bsr.l norm # normalize UNNORM
6546
6547 mov.b &NORM, %d0 # return new optype tag
6548 rts
6549
6550 #
6551 # exponent would go < 0, so only denormalize until exp = 0
6552 #
6553 unnorm_nrm_zero:
6554 cmp.b %d1, &32 # is exp <= 32?
6555 bgt.b unnorm_nrm_zero_lrg # no; go handle large exponent
6556
6557 bfextu FTEMP_HI(%a0){%d1:&32}, %d0 # extract new hi(man)
6558 mov.l %d0, FTEMP_HI(%a0) # save new hi(man)
6559
6560 mov.l FTEMP_LO(%a0), %d0 # fetch old lo(man)
6561 lsl.l %d1, %d0 # extract new lo(man)
6562 mov.l %d0, FTEMP_LO(%a0) # save new lo(man)
6563
6564 and.w &0x8000, FTEMP_EX(%a0) # set exp = 0
6565
6566 mov.b &DENORM, %d0 # return new optype tag
6567 rts
6568
6569 #
6570 # only mantissa bits set are in lo(man)
6571 #
6572 unnorm_nrm_zero_lrg:
6573 sub.w &32, %d1 # adjust shft amt by 32
6574
6575 mov.l FTEMP_LO(%a0), %d0 # fetch old lo(man)
6576 lsl.l %d1, %d0 # left shift lo(man)
6577
6578 mov.l %d0, FTEMP_HI(%a0) # store new hi(man)
6579 clr.l FTEMP_LO(%a0) # lo(man) = 0
6580
6581 and.w &0x8000, FTEMP_EX(%a0) # set exp = 0
6582
6583 mov.b &DENORM, %d0 # return new optype tag
6584 rts
6585
6586 #
6587 # whole mantissa is zero so this UNNORM is actually a zero
6588 #
6589 unnorm_zero:
6590 and.w &0x8000, FTEMP_EX(%a0) # force exponent to zero
6591
6592 mov.b &ZERO, %d0 # fix optype tag
6593 rts
6594
6595 #########################################################################
6596 # XDEF **************************************************************** #
6597 # set_tag_x(): return the optype of the input ext fp number #
6598 # #
6599 # XREF **************************************************************** #
6600 # None #
6601 # #
6602 # INPUT *************************************************************** #
6603 # a0 = pointer to extended precision operand #
6604 # #
6605 # OUTPUT ************************************************************** #
6606 # d0 = value of type tag #
6607 # one of: NORM, INF, QNAN, SNAN, DENORM, UNNORM, ZERO #
6608 # #
6609 # ALGORITHM *********************************************************** #
6610 # Simply test the exponent, j-bit, and mantissa values to #
6611 # determine the type of operand. #
6612 # If it's an unnormalized zero, alter the operand and force it #
6613 # to be a normal zero. #
6614 # #
6615 #########################################################################
6616
6617 global set_tag_x
6618 set_tag_x:
6619 mov.w FTEMP_EX(%a0), %d0 # extract exponent
6620 andi.w &0x7fff, %d0 # strip off sign
6621 cmpi.w %d0, &0x7fff # is (EXP == MAX)?
6622 beq.b inf_or_nan_x
6623 not_inf_or_nan_x:
6624 btst &0x7,FTEMP_HI(%a0)
6625 beq.b not_norm_x
6626 is_norm_x:
6627 mov.b &NORM, %d0
6628 rts
6629 not_norm_x:
6630 tst.w %d0 # is exponent = 0?
6631 bne.b is_unnorm_x
6632 not_unnorm_x:
6633 tst.l FTEMP_HI(%a0)
6634 bne.b is_denorm_x
6635 tst.l FTEMP_LO(%a0)
6636 bne.b is_denorm_x
6637 is_zero_x:
6638 mov.b &ZERO, %d0
6639 rts
6640 is_denorm_x:
6641 mov.b &DENORM, %d0
6642 rts
6643 # must distinguish now "Unnormalized zeroes" which we
6644 # must convert to zero.
6645 is_unnorm_x:
6646 tst.l FTEMP_HI(%a0)
6647 bne.b is_unnorm_reg_x
6648 tst.l FTEMP_LO(%a0)
6649 bne.b is_unnorm_reg_x
6650 # it's an "unnormalized zero". let's convert it to an actual zero...
6651 andi.w &0x8000,FTEMP_EX(%a0) # clear exponent
6652 mov.b &ZERO, %d0
6653 rts
6654 is_unnorm_reg_x:
6655 mov.b &UNNORM, %d0
6656 rts
6657 inf_or_nan_x:
6658 tst.l FTEMP_LO(%a0)
6659 bne.b is_nan_x
6660 mov.l FTEMP_HI(%a0), %d0
6661 and.l &0x7fffffff, %d0 # msb is a don't care!
6662 bne.b is_nan_x
6663 is_inf_x:
6664 mov.b &INF, %d0
6665 rts
6666 is_nan_x:
6667 btst &0x6, FTEMP_HI(%a0)
6668 beq.b is_snan_x
6669 mov.b &QNAN, %d0
6670 rts
6671 is_snan_x:
6672 mov.b &SNAN, %d0
6673 rts
6674
6675 #########################################################################
6676 # XDEF **************************************************************** #
6677 # set_tag_d(): return the optype of the input dbl fp number #
6678 # #
6679 # XREF **************************************************************** #
6680 # None #
6681 # #
6682 # INPUT *************************************************************** #
6683 # a0 = points to double precision operand #
6684 # #
6685 # OUTPUT ************************************************************** #
6686 # d0 = value of type tag #
6687 # one of: NORM, INF, QNAN, SNAN, DENORM, ZERO #
6688 # #
6689 # ALGORITHM *********************************************************** #
6690 # Simply test the exponent, j-bit, and mantissa values to #
6691 # determine the type of operand. #
6692 # #
6693 #########################################################################
6694
6695 global set_tag_d
6696 set_tag_d:
6697 mov.l FTEMP(%a0), %d0
6698 mov.l %d0, %d1
6699
6700 andi.l &0x7ff00000, %d0
6701 beq.b zero_or_denorm_d
6702
6703 cmpi.l %d0, &0x7ff00000
6704 beq.b inf_or_nan_d
6705
6706 is_norm_d:
6707 mov.b &NORM, %d0
6708 rts
6709 zero_or_denorm_d:
6710 and.l &0x000fffff, %d1
6711 bne is_denorm_d
6712 tst.l 4+FTEMP(%a0)
6713 bne is_denorm_d
6714 is_zero_d:
6715 mov.b &ZERO, %d0
6716 rts
6717 is_denorm_d:
6718 mov.b &DENORM, %d0
6719 rts
6720 inf_or_nan_d:
6721 and.l &0x000fffff, %d1
6722 bne is_nan_d
6723 tst.l 4+FTEMP(%a0)
6724 bne is_nan_d
6725 is_inf_d:
6726 mov.b &INF, %d0
6727 rts
6728 is_nan_d:
6729 btst &19, %d1
6730 bne is_qnan_d
6731 is_snan_d:
6732 mov.b &SNAN, %d0
6733 rts
6734 is_qnan_d:
6735 mov.b &QNAN, %d0
6736 rts
6737
6738 #########################################################################
6739 # XDEF **************************************************************** #
6740 # set_tag_s(): return the optype of the input sgl fp number #
6741 # #
6742 # XREF **************************************************************** #
6743 # None #
6744 # #
6745 # INPUT *************************************************************** #
6746 # a0 = pointer to single precision operand #
6747 # #
6748 # OUTPUT ************************************************************** #
6749 # d0 = value of type tag #
6750 # one of: NORM, INF, QNAN, SNAN, DENORM, ZERO #
6751 # #
6752 # ALGORITHM *********************************************************** #
6753 # Simply test the exponent, j-bit, and mantissa values to #
6754 # determine the type of operand. #
6755 # #
6756 #########################################################################
6757
6758 global set_tag_s
6759 set_tag_s:
6760 mov.l FTEMP(%a0), %d0
6761 mov.l %d0, %d1
6762
6763 andi.l &0x7f800000, %d0
6764 beq.b zero_or_denorm_s
6765
6766 cmpi.l %d0, &0x7f800000
6767 beq.b inf_or_nan_s
6768
6769 is_norm_s:
6770 mov.b &NORM, %d0
6771 rts
6772 zero_or_denorm_s:
6773 and.l &0x007fffff, %d1
6774 bne is_denorm_s
6775 is_zero_s:
6776 mov.b &ZERO, %d0
6777 rts
6778 is_denorm_s:
6779 mov.b &DENORM, %d0
6780 rts
6781 inf_or_nan_s:
6782 and.l &0x007fffff, %d1
6783 bne is_nan_s
6784 is_inf_s:
6785 mov.b &INF, %d0
6786 rts
6787 is_nan_s:
6788 btst &22, %d1
6789 bne is_qnan_s
6790 is_snan_s:
6791 mov.b &SNAN, %d0
6792 rts
6793 is_qnan_s:
6794 mov.b &QNAN, %d0
6795 rts
6796
6797 #########################################################################
6798 # XDEF **************************************************************** #
6799 # unf_res(): routine to produce default underflow result of a #
6800 # scaled extended precision number; this is used by #
6801 # fadd/fdiv/fmul/etc. emulation routines. #
6802 # unf_res4(): same as above but for fsglmul/fsgldiv which use #
6803 # single round prec and extended prec mode. #
6804 # #
6805 # XREF **************************************************************** #
6806 # _denorm() - denormalize according to scale factor #
6807 # _round() - round denormalized number according to rnd prec #
6808 # #
6809 # INPUT *************************************************************** #
6810 # a0 = pointer to extended precison operand #
6811 # d0 = scale factor #
6812 # d1 = rounding precision/mode #
6813 # #
6814 # OUTPUT ************************************************************** #
6815 # a0 = pointer to default underflow result in extended precision #
6816 # d0.b = result FPSR_cc which caller may or may not want to save #
6817 # #
6818 # ALGORITHM *********************************************************** #
6819 # Convert the input operand to "internal format" which means the #
6820 # exponent is extended to 16 bits and the sign is stored in the unused #
6821 # portion of the extended precison operand. Denormalize the number #
6822 # according to the scale factor passed in d0. Then, round the #
6823 # denormalized result. #
6824 # Set the FPSR_exc bits as appropriate but return the cc bits in #
6825 # d0 in case the caller doesn't want to save them (as is the case for #
6826 # fmove out). #
6827 # unf_res4() for fsglmul/fsgldiv forces the denorm to extended #
6828 # precision and the rounding mode to single. #
6829 # #
6830 #########################################################################
6831 global unf_res
6832 unf_res:
6833 mov.l %d1, -(%sp) # save rnd prec,mode on stack
6834
6835 btst &0x7, FTEMP_EX(%a0) # make "internal" format
6836 sne FTEMP_SGN(%a0)
6837
6838 mov.w FTEMP_EX(%a0), %d1 # extract exponent
6839 and.w &0x7fff, %d1
6840 sub.w %d0, %d1
6841 mov.w %d1, FTEMP_EX(%a0) # insert 16 bit exponent
6842
6843 mov.l %a0, -(%sp) # save operand ptr during calls
6844
6845 mov.l 0x4(%sp),%d0 # pass rnd prec.
6846 andi.w &0x00c0,%d0
6847 lsr.w &0x4,%d0
6848 bsr.l _denorm # denorm result
6849
6850 mov.l (%sp),%a0
6851 mov.w 0x6(%sp),%d1 # load prec:mode into %d1
6852 andi.w &0xc0,%d1 # extract rnd prec
6853 lsr.w &0x4,%d1
6854 swap %d1
6855 mov.w 0x6(%sp),%d1
6856 andi.w &0x30,%d1
6857 lsr.w &0x4,%d1
6858 bsr.l _round # round the denorm
6859
6860 mov.l (%sp)+, %a0
6861
6862 # result is now rounded properly. convert back to normal format
6863 bclr &0x7, FTEMP_EX(%a0) # clear sgn first; may have residue
6864 tst.b FTEMP_SGN(%a0) # is "internal result" sign set?
6865 beq.b unf_res_chkifzero # no; result is positive
6866 bset &0x7, FTEMP_EX(%a0) # set result sgn
6867 clr.b FTEMP_SGN(%a0) # clear temp sign
6868
6869 # the number may have become zero after rounding. set ccodes accordingly.
6870 unf_res_chkifzero:
6871 clr.l %d0
6872 tst.l FTEMP_HI(%a0) # is value now a zero?
6873 bne.b unf_res_cont # no
6874 tst.l FTEMP_LO(%a0)
6875 bne.b unf_res_cont # no
6876 # bset &z_bit, FPSR_CC(%a6) # yes; set zero ccode bit
6877 bset &z_bit, %d0 # yes; set zero ccode bit
6878
6879 unf_res_cont:
6880
6881 #
6882 # can inex1 also be set along with unfl and inex2???
6883 #
6884 # we know that underflow has occurred. aunfl should be set if INEX2 is also set.
6885 #
6886 btst &inex2_bit, FPSR_EXCEPT(%a6) # is INEX2 set?
6887 beq.b unf_res_end # no
6888 bset &aunfl_bit, FPSR_AEXCEPT(%a6) # yes; set aunfl
6889
6890 unf_res_end:
6891 add.l &0x4, %sp # clear stack
6892 rts
6893
6894 # unf_res() for fsglmul() and fsgldiv().
6895 global unf_res4
6896 unf_res4:
6897 mov.l %d1,-(%sp) # save rnd prec,mode on stack
6898
6899 btst &0x7,FTEMP_EX(%a0) # make "internal" format
6900 sne FTEMP_SGN(%a0)
6901
6902 mov.w FTEMP_EX(%a0),%d1 # extract exponent
6903 and.w &0x7fff,%d1
6904 sub.w %d0,%d1
6905 mov.w %d1,FTEMP_EX(%a0) # insert 16 bit exponent
6906
6907 mov.l %a0,-(%sp) # save operand ptr during calls
6908
6909 clr.l %d0 # force rnd prec = ext
6910 bsr.l _denorm # denorm result
6911
6912 mov.l (%sp),%a0
6913 mov.w &s_mode,%d1 # force rnd prec = sgl
6914 swap %d1
6915 mov.w 0x6(%sp),%d1 # load rnd mode
6916 andi.w &0x30,%d1 # extract rnd prec
6917 lsr.w &0x4,%d1
6918 bsr.l _round # round the denorm
6919
6920 mov.l (%sp)+,%a0
6921
6922 # result is now rounded properly. convert back to normal format
6923 bclr &0x7,FTEMP_EX(%a0) # clear sgn first; may have residue
6924 tst.b FTEMP_SGN(%a0) # is "internal result" sign set?
6925 beq.b unf_res4_chkifzero # no; result is positive
6926 bset &0x7,FTEMP_EX(%a0) # set result sgn
6927 clr.b FTEMP_SGN(%a0) # clear temp sign
6928
6929 # the number may have become zero after rounding. set ccodes accordingly.
6930 unf_res4_chkifzero:
6931 clr.l %d0
6932 tst.l FTEMP_HI(%a0) # is value now a zero?
6933 bne.b unf_res4_cont # no
6934 tst.l FTEMP_LO(%a0)
6935 bne.b unf_res4_cont # no
6936 # bset &z_bit,FPSR_CC(%a6) # yes; set zero ccode bit
6937 bset &z_bit,%d0 # yes; set zero ccode bit
6938
6939 unf_res4_cont:
6940
6941 #
6942 # can inex1 also be set along with unfl and inex2???
6943 #
6944 # we know that underflow has occurred. aunfl should be set if INEX2 is also set.
6945 #
6946 btst &inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
6947 beq.b unf_res4_end # no
6948 bset &aunfl_bit,FPSR_AEXCEPT(%a6) # yes; set aunfl
6949
6950 unf_res4_end:
6951 add.l &0x4,%sp # clear stack
6952 rts
6953
6954 #########################################################################
6955 # XDEF **************************************************************** #
6956 # ovf_res(): routine to produce the default overflow result of #
6957 # an overflowing number. #
6958 # ovf_res2(): same as above but the rnd mode/prec are passed #
6959 # differently. #
6960 # #
6961 # XREF **************************************************************** #
6962 # none #
6963 # #
6964 # INPUT *************************************************************** #
6965 # d1.b = '-1' => (-); '0' => (+) #
6966 # ovf_res(): #
6967 # d0 = rnd mode/prec #
6968 # ovf_res2(): #
6969 # hi(d0) = rnd prec #
6970 # lo(d0) = rnd mode #
6971 # #
6972 # OUTPUT ************************************************************** #
6973 # a0 = points to extended precision result #
6974 # d0.b = condition code bits #
6975 # #
6976 # ALGORITHM *********************************************************** #
6977 # The default overflow result can be determined by the sign of #
6978 # the result and the rounding mode/prec in effect. These bits are #
6979 # concatenated together to create an index into the default result #
6980 # table. A pointer to the correct result is returned in a0. The #
6981 # resulting condition codes are returned in d0 in case the caller #
6982 # doesn't want FPSR_cc altered (as is the case for fmove out). #
6983 # #
6984 #########################################################################
6985
6986 global ovf_res
6987 ovf_res:
6988 andi.w &0x10,%d1 # keep result sign
6989 lsr.b &0x4,%d0 # shift prec/mode
6990 or.b %d0,%d1 # concat the two
6991 mov.w %d1,%d0 # make a copy
6992 lsl.b &0x1,%d1 # multiply d1 by 2
6993 bra.b ovf_res_load
6994
6995 global ovf_res2
6996 ovf_res2:
6997 and.w &0x10, %d1 # keep result sign
6998 or.b %d0, %d1 # insert rnd mode
6999 swap %d0
7000 or.b %d0, %d1 # insert rnd prec
7001 mov.w %d1, %d0 # make a copy
7002 lsl.b &0x1, %d1 # shift left by 1
7003
7004 #
7005 # use the rounding mode, precision, and result sign as in index into the
7006 # two tables below to fetch the default result and the result ccodes.
7007 #
7008 ovf_res_load:
7009 mov.b (tbl_ovfl_cc.b,%pc,%d0.w*1), %d0 # fetch result ccodes
7010 lea (tbl_ovfl_result.b,%pc,%d1.w*8), %a0 # return result ptr
7011
7012 rts
7013
7014 tbl_ovfl_cc:
7015 byte 0x2, 0x0, 0x0, 0x2
7016 byte 0x2, 0x0, 0x0, 0x2
7017 byte 0x2, 0x0, 0x0, 0x2
7018 byte 0x0, 0x0, 0x0, 0x0
7019 byte 0x2+0x8, 0x8, 0x2+0x8, 0x8
7020 byte 0x2+0x8, 0x8, 0x2+0x8, 0x8
7021 byte 0x2+0x8, 0x8, 0x2+0x8, 0x8
7022
7023 tbl_ovfl_result:
7024 long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
7025 long 0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RZ
7026 long 0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RM
7027 long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
7028
7029 long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
7030 long 0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RZ
7031 long 0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RM
7032 long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
7033
7034 long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
7035 long 0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RZ
7036 long 0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RM
7037 long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
7038
7039 long 0x00000000,0x00000000,0x00000000,0x00000000
7040 long 0x00000000,0x00000000,0x00000000,0x00000000
7041 long 0x00000000,0x00000000,0x00000000,0x00000000
7042 long 0x00000000,0x00000000,0x00000000,0x00000000
7043
7044 long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
7045 long 0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RZ
7046 long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
7047 long 0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RP
7048
7049 long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
7050 long 0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RZ
7051 long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
7052 long 0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RP
7053
7054 long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
7055 long 0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RZ
7056 long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
7057 long 0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RP
7058
7059 #########################################################################
7060 # XDEF **************************************************************** #
7061 # fout(): move from fp register to memory or data register #
7062 # #
7063 # XREF **************************************************************** #
7064 # _round() - needed to create EXOP for sgl/dbl precision #
7065 # norm() - needed to create EXOP for extended precision #
7066 # ovf_res() - create default overflow result for sgl/dbl precision#
7067 # unf_res() - create default underflow result for sgl/dbl prec. #
7068 # dst_dbl() - create rounded dbl precision result. #
7069 # dst_sgl() - create rounded sgl precision result. #
7070 # fetch_dreg() - fetch dynamic k-factor reg for packed. #
7071 # bindec() - convert FP binary number to packed number. #
7072 # _mem_write() - write data to memory. #
7073 # _mem_write2() - write data to memory unless supv mode -(a7) exc.#
7074 # _dmem_write_{byte,word,long}() - write data to memory. #
7075 # store_dreg_{b,w,l}() - store data to data register file. #
7076 # facc_out_{b,w,l,d,x}() - data access error occurred. #
7077 # #
7078 # INPUT *************************************************************** #
7079 # a0 = pointer to extended precision source operand #
7080 # d0 = round prec,mode #
7081 # #
7082 # OUTPUT ************************************************************** #
7083 # fp0 : intermediate underflow or overflow result if #
7084 # OVFL/UNFL occurred for a sgl or dbl operand #
7085 # #
7086 # ALGORITHM *********************************************************** #
7087 # This routine is accessed by many handlers that need to do an #
7088 # opclass three move of an operand out to memory. #
7089 # Decode an fmove out (opclass 3) instruction to determine if #
7090 # it's b,w,l,s,d,x, or p in size. b,w,l can be stored to either a data #
7091 # register or memory. The algorithm uses a standard "fmove" to create #
7092 # the rounded result. Also, since exceptions are disabled, this also #
7093 # create the correct OPERR default result if appropriate. #
7094 # For sgl or dbl precision, overflow or underflow can occur. If #
7095 # either occurs and is enabled, the EXOP. #
7096 # For extended precision, the stacked <ea> must be fixed along #
7097 # w/ the address index register as appropriate w/ _calc_ea_fout(). If #
7098 # the source is a denorm and if underflow is enabled, an EXOP must be #
7099 # created. #
7100 # For packed, the k-factor must be fetched from the instruction #
7101 # word or a data register. The <ea> must be fixed as w/ extended #
7102 # precision. Then, bindec() is called to create the appropriate #
7103 # packed result. #
7104 # If at any time an access error is flagged by one of the move- #
7105 # to-memory routines, then a special exit must be made so that the #
7106 # access error can be handled properly. #
7107 # #
7108 #########################################################################
7109
7110 global fout
7111 fout:
7112 bfextu EXC_CMDREG(%a6){&3:&3},%d1 # extract dst fmt
7113 mov.w (tbl_fout.b,%pc,%d1.w*2),%a1 # use as index
7114 jmp (tbl_fout.b,%pc,%a1) # jump to routine
7115
7116 swbeg &0x8
7117 tbl_fout:
7118 short fout_long - tbl_fout
7119 short fout_sgl - tbl_fout
7120 short fout_ext - tbl_fout
7121 short fout_pack - tbl_fout
7122 short fout_word - tbl_fout
7123 short fout_dbl - tbl_fout
7124 short fout_byte - tbl_fout
7125 short fout_pack - tbl_fout
7126
7127 #################################################################
7128 # fmove.b out ###################################################
7129 #################################################################
7130
7131 # Only "Unimplemented Data Type" exceptions enter here. The operand
7132 # is either a DENORM or a NORM.
7133 fout_byte:
7134 tst.b STAG(%a6) # is operand normalized?
7135 bne.b fout_byte_denorm # no
7136
7137 fmovm.x SRC(%a0),&0x80 # load value
7138
7139 fout_byte_norm:
7140 fmov.l %d0,%fpcr # insert rnd prec,mode
7141
7142 fmov.b %fp0,%d0 # exec move out w/ correct rnd mode
7143
7144 fmov.l &0x0,%fpcr # clear FPCR
7145 fmov.l %fpsr,%d1 # fetch FPSR
7146 or.w %d1,2+USER_FPSR(%a6) # save new exc,accrued bits
7147
7148 mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode
7149 andi.b &0x38,%d1 # is mode == 0? (Dreg dst)
7150 beq.b fout_byte_dn # must save to integer regfile
7151
7152 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
7153 bsr.l _dmem_write_byte # write byte
7154
7155 tst.l %d1 # did dstore fail?
7156 bne.l facc_out_b # yes
7157
7158 rts
7159
7160 fout_byte_dn:
7161 mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn
7162 andi.w &0x7,%d1
7163 bsr.l store_dreg_b
7164 rts
7165
7166 fout_byte_denorm:
7167 mov.l SRC_EX(%a0),%d1
7168 andi.l &0x80000000,%d1 # keep DENORM sign
7169 ori.l &0x00800000,%d1 # make smallest sgl
7170 fmov.s %d1,%fp0
7171 bra.b fout_byte_norm
7172
7173 #################################################################
7174 # fmove.w out ###################################################
7175 #################################################################
7176
7177 # Only "Unimplemented Data Type" exceptions enter here. The operand
7178 # is either a DENORM or a NORM.
7179 fout_word:
7180 tst.b STAG(%a6) # is operand normalized?
7181 bne.b fout_word_denorm # no
7182
7183 fmovm.x SRC(%a0),&0x80 # load value
7184
7185 fout_word_norm:
7186 fmov.l %d0,%fpcr # insert rnd prec:mode
7187
7188 fmov.w %fp0,%d0 # exec move out w/ correct rnd mode
7189
7190 fmov.l &0x0,%fpcr # clear FPCR
7191 fmov.l %fpsr,%d1 # fetch FPSR
7192 or.w %d1,2+USER_FPSR(%a6) # save new exc,accrued bits
7193
7194 mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode
7195 andi.b &0x38,%d1 # is mode == 0? (Dreg dst)
7196 beq.b fout_word_dn # must save to integer regfile
7197
7198 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
7199 bsr.l _dmem_write_word # write word
7200
7201 tst.l %d1 # did dstore fail?
7202 bne.l facc_out_w # yes
7203
7204 rts
7205
7206 fout_word_dn:
7207 mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn
7208 andi.w &0x7,%d1
7209 bsr.l store_dreg_w
7210 rts
7211
7212 fout_word_denorm:
7213 mov.l SRC_EX(%a0),%d1
7214 andi.l &0x80000000,%d1 # keep DENORM sign
7215 ori.l &0x00800000,%d1 # make smallest sgl
7216 fmov.s %d1,%fp0
7217 bra.b fout_word_norm
7218
7219 #################################################################
7220 # fmove.l out ###################################################
7221 #################################################################
7222
7223 # Only "Unimplemented Data Type" exceptions enter here. The operand
7224 # is either a DENORM or a NORM.
7225 fout_long:
7226 tst.b STAG(%a6) # is operand normalized?
7227 bne.b fout_long_denorm # no
7228
7229 fmovm.x SRC(%a0),&0x80 # load value
7230
7231 fout_long_norm:
7232 fmov.l %d0,%fpcr # insert rnd prec:mode
7233
7234 fmov.l %fp0,%d0 # exec move out w/ correct rnd mode
7235
7236 fmov.l &0x0,%fpcr # clear FPCR
7237 fmov.l %fpsr,%d1 # fetch FPSR
7238 or.w %d1,2+USER_FPSR(%a6) # save new exc,accrued bits
7239
7240 fout_long_write:
7241 mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode
7242 andi.b &0x38,%d1 # is mode == 0? (Dreg dst)
7243 beq.b fout_long_dn # must save to integer regfile
7244
7245 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
7246 bsr.l _dmem_write_long # write long
7247
7248 tst.l %d1 # did dstore fail?
7249 bne.l facc_out_l # yes
7250
7251 rts
7252
7253 fout_long_dn:
7254 mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn
7255 andi.w &0x7,%d1
7256 bsr.l store_dreg_l
7257 rts
7258
7259 fout_long_denorm:
7260 mov.l SRC_EX(%a0),%d1
7261 andi.l &0x80000000,%d1 # keep DENORM sign
7262 ori.l &0x00800000,%d1 # make smallest sgl
7263 fmov.s %d1,%fp0
7264 bra.b fout_long_norm
7265
7266 #################################################################
7267 # fmove.x out ###################################################
7268 #################################################################
7269
7270 # Only "Unimplemented Data Type" exceptions enter here. The operand
7271 # is either a DENORM or a NORM.
7272 # The DENORM causes an Underflow exception.
7273 fout_ext:
7274
7275 # we copy the extended precision result to FP_SCR0 so that the reserved
7276 # 16-bit field gets zeroed. we do this since we promise not to disturb
7277 # what's at SRC(a0).
7278 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
7279 clr.w 2+FP_SCR0_EX(%a6) # clear reserved field
7280 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
7281 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
7282
7283 fmovm.x SRC(%a0),&0x80 # return result
7284
7285 bsr.l _calc_ea_fout # fix stacked <ea>
7286
7287 mov.l %a0,%a1 # pass: dst addr
7288 lea FP_SCR0(%a6),%a0 # pass: src addr
7289 mov.l &0xc,%d0 # pass: opsize is 12 bytes
7290
7291 # we must not yet write the extended precision data to the stack
7292 # in the pre-decrement case from supervisor mode or else we'll corrupt
7293 # the stack frame. so, leave it in FP_SRC for now and deal with it later...
7294 cmpi.b SPCOND_FLG(%a6),&mda7_flg
7295 beq.b fout_ext_a7
7296
7297 bsr.l _dmem_write # write ext prec number to memory
7298
7299 tst.l %d1 # did dstore fail?
7300 bne.w fout_ext_err # yes
7301
7302 tst.b STAG(%a6) # is operand normalized?
7303 bne.b fout_ext_denorm # no
7304 rts
7305
7306 # the number is a DENORM. must set the underflow exception bit
7307 fout_ext_denorm:
7308 bset &unfl_bit,FPSR_EXCEPT(%a6) # set underflow exc bit
7309
7310 mov.b FPCR_ENABLE(%a6),%d0
7311 andi.b &0x0a,%d0 # is UNFL or INEX enabled?
7312 bne.b fout_ext_exc # yes
7313 rts
7314
7315 # we don't want to do the write if the exception occurred in supervisor mode
7316 # so _mem_write2() handles this for us.
7317 fout_ext_a7:
7318 bsr.l _mem_write2 # write ext prec number to memory
7319
7320 tst.l %d1 # did dstore fail?
7321 bne.w fout_ext_err # yes
7322
7323 tst.b STAG(%a6) # is operand normalized?
7324 bne.b fout_ext_denorm # no
7325 rts
7326
7327 fout_ext_exc:
7328 lea FP_SCR0(%a6),%a0
7329 bsr.l norm # normalize the mantissa
7330 neg.w %d0 # new exp = -(shft amt)
7331 andi.w &0x7fff,%d0
7332 andi.w &0x8000,FP_SCR0_EX(%a6) # keep only old sign
7333 or.w %d0,FP_SCR0_EX(%a6) # insert new exponent
7334 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
7335 rts
7336
7337 fout_ext_err:
7338 mov.l EXC_A6(%a6),(%a6) # fix stacked a6
7339 bra.l facc_out_x
7340
7341 #########################################################################
7342 # fmove.s out ###########################################################
7343 #########################################################################
7344 fout_sgl:
7345 andi.b &0x30,%d0 # clear rnd prec
7346 ori.b &s_mode*0x10,%d0 # insert sgl prec
7347 mov.l %d0,L_SCR3(%a6) # save rnd prec,mode on stack
7348
7349 #
7350 # operand is a normalized number. first, we check to see if the move out
7351 # would cause either an underflow or overflow. these cases are handled
7352 # separately. otherwise, set the FPCR to the proper rounding mode and
7353 # execute the move.
7354 #
7355 mov.w SRC_EX(%a0),%d0 # extract exponent
7356 andi.w &0x7fff,%d0 # strip sign
7357
7358 cmpi.w %d0,&SGL_HI # will operand overflow?
7359 bgt.w fout_sgl_ovfl # yes; go handle OVFL
7360 beq.w fout_sgl_may_ovfl # maybe; go handle possible OVFL
7361 cmpi.w %d0,&SGL_LO # will operand underflow?
7362 blt.w fout_sgl_unfl # yes; go handle underflow
7363
7364 #
7365 # NORMs(in range) can be stored out by a simple "fmov.s"
7366 # Unnormalized inputs can come through this point.
7367 #
7368 fout_sgl_exg:
7369 fmovm.x SRC(%a0),&0x80 # fetch fop from stack
7370
7371 fmov.l L_SCR3(%a6),%fpcr # set FPCR
7372 fmov.l &0x0,%fpsr # clear FPSR
7373
7374 fmov.s %fp0,%d0 # store does convert and round
7375
7376 fmov.l &0x0,%fpcr # clear FPCR
7377 fmov.l %fpsr,%d1 # save FPSR
7378
7379 or.w %d1,2+USER_FPSR(%a6) # set possible inex2/ainex
7380
7381 fout_sgl_exg_write:
7382 mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode
7383 andi.b &0x38,%d1 # is mode == 0? (Dreg dst)
7384 beq.b fout_sgl_exg_write_dn # must save to integer regfile
7385
7386 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
7387 bsr.l _dmem_write_long # write long
7388
7389 tst.l %d1 # did dstore fail?
7390 bne.l facc_out_l # yes
7391
7392 rts
7393
7394 fout_sgl_exg_write_dn:
7395 mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn
7396 andi.w &0x7,%d1
7397 bsr.l store_dreg_l
7398 rts
7399
7400 #
7401 # here, we know that the operand would UNFL if moved out to single prec,
7402 # so, denorm and round and then use generic store single routine to
7403 # write the value to memory.
7404 #
7405 fout_sgl_unfl:
7406 bset &unfl_bit,FPSR_EXCEPT(%a6) # set UNFL
7407
7408 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
7409 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
7410 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
7411 mov.l %a0,-(%sp)
7412
7413 clr.l %d0 # pass: S.F. = 0
7414
7415 cmpi.b STAG(%a6),&DENORM # fetch src optype tag
7416 bne.b fout_sgl_unfl_cont # let DENORMs fall through
7417
7418 lea FP_SCR0(%a6),%a0
7419 bsr.l norm # normalize the DENORM
7420
7421 fout_sgl_unfl_cont:
7422 lea FP_SCR0(%a6),%a0 # pass: ptr to operand
7423 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
7424 bsr.l unf_res # calc default underflow result
7425
7426 lea FP_SCR0(%a6),%a0 # pass: ptr to fop
7427 bsr.l dst_sgl # convert to single prec
7428
7429 mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode
7430 andi.b &0x38,%d1 # is mode == 0? (Dreg dst)
7431 beq.b fout_sgl_unfl_dn # must save to integer regfile
7432
7433 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
7434 bsr.l _dmem_write_long # write long
7435
7436 tst.l %d1 # did dstore fail?
7437 bne.l facc_out_l # yes
7438
7439 bra.b fout_sgl_unfl_chkexc
7440
7441 fout_sgl_unfl_dn:
7442 mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn
7443 andi.w &0x7,%d1
7444 bsr.l store_dreg_l
7445
7446 fout_sgl_unfl_chkexc:
7447 mov.b FPCR_ENABLE(%a6),%d1
7448 andi.b &0x0a,%d1 # is UNFL or INEX enabled?
7449 bne.w fout_sd_exc_unfl # yes
7450 addq.l &0x4,%sp
7451 rts
7452
7453 #
7454 # it's definitely an overflow so call ovf_res to get the correct answer
7455 #
7456 fout_sgl_ovfl:
7457 tst.b 3+SRC_HI(%a0) # is result inexact?
7458 bne.b fout_sgl_ovfl_inex2
7459 tst.l SRC_LO(%a0) # is result inexact?
7460 bne.b fout_sgl_ovfl_inex2
7461 ori.w &ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
7462 bra.b fout_sgl_ovfl_cont
7463 fout_sgl_ovfl_inex2:
7464 ori.w &ovfinx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex/inex2
7465
7466 fout_sgl_ovfl_cont:
7467 mov.l %a0,-(%sp)
7468
7469 # call ovf_res() w/ sgl prec and the correct rnd mode to create the default
7470 # overflow result. DON'T save the returned ccodes from ovf_res() since
7471 # fmove out doesn't alter them.
7472 tst.b SRC_EX(%a0) # is operand negative?
7473 smi %d1 # set if so
7474 mov.l L_SCR3(%a6),%d0 # pass: sgl prec,rnd mode
7475 bsr.l ovf_res # calc OVFL result
7476 fmovm.x (%a0),&0x80 # load default overflow result
7477 fmov.s %fp0,%d0 # store to single
7478
7479 mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode
7480 andi.b &0x38,%d1 # is mode == 0? (Dreg dst)
7481 beq.b fout_sgl_ovfl_dn # must save to integer regfile
7482
7483 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
7484 bsr.l _dmem_write_long # write long
7485
7486 tst.l %d1 # did dstore fail?
7487 bne.l facc_out_l # yes
7488
7489 bra.b fout_sgl_ovfl_chkexc
7490
7491 fout_sgl_ovfl_dn:
7492 mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn
7493 andi.w &0x7,%d1
7494 bsr.l store_dreg_l
7495
7496 fout_sgl_ovfl_chkexc:
7497 mov.b FPCR_ENABLE(%a6),%d1
7498 andi.b &0x0a,%d1 # is UNFL or INEX enabled?
7499 bne.w fout_sd_exc_ovfl # yes
7500 addq.l &0x4,%sp
7501 rts
7502
7503 #
7504 # move out MAY overflow:
7505 # (1) force the exp to 0x3fff
7506 # (2) do a move w/ appropriate rnd mode
7507 # (3) if exp still equals zero, then insert original exponent
7508 # for the correct result.
7509 # if exp now equals one, then it overflowed so call ovf_res.
7510 #
7511 fout_sgl_may_ovfl:
7512 mov.w SRC_EX(%a0),%d1 # fetch current sign
7513 andi.w &0x8000,%d1 # keep it,clear exp
7514 ori.w &0x3fff,%d1 # insert exp = 0
7515 mov.w %d1,FP_SCR0_EX(%a6) # insert scaled exp
7516 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) # copy hi(man)
7517 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) # copy lo(man)
7518
7519 fmov.l L_SCR3(%a6),%fpcr # set FPCR
7520
7521 fmov.x FP_SCR0(%a6),%fp0 # force fop to be rounded
7522 fmov.l &0x0,%fpcr # clear FPCR
7523
7524 fabs.x %fp0 # need absolute value
7525 fcmp.b %fp0,&0x2 # did exponent increase?
7526 fblt.w fout_sgl_exg # no; go finish NORM
7527 bra.w fout_sgl_ovfl # yes; go handle overflow
7528
7529 ################
7530
7531 fout_sd_exc_unfl:
7532 mov.l (%sp)+,%a0
7533
7534 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
7535 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
7536 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
7537
7538 cmpi.b STAG(%a6),&DENORM # was src a DENORM?
7539 bne.b fout_sd_exc_cont # no
7540
7541 lea FP_SCR0(%a6),%a0
7542 bsr.l norm
7543 neg.l %d0
7544 andi.w &0x7fff,%d0
7545 bfins %d0,FP_SCR0_EX(%a6){&1:&15}
7546 bra.b fout_sd_exc_cont
7547
7548 fout_sd_exc:
7549 fout_sd_exc_ovfl:
7550 mov.l (%sp)+,%a0 # restore a0
7551
7552 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
7553 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
7554 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
7555
7556 fout_sd_exc_cont:
7557 bclr &0x7,FP_SCR0_EX(%a6) # clear sign bit
7558 sne.b 2+FP_SCR0_EX(%a6) # set internal sign bit
7559 lea FP_SCR0(%a6),%a0 # pass: ptr to DENORM
7560
7561 mov.b 3+L_SCR3(%a6),%d1
7562 lsr.b &0x4,%d1
7563 andi.w &0x0c,%d1
7564 swap %d1
7565 mov.b 3+L_SCR3(%a6),%d1
7566 lsr.b &0x4,%d1
7567 andi.w &0x03,%d1
7568 clr.l %d0 # pass: zero g,r,s
7569 bsr.l _round # round the DENORM
7570
7571 tst.b 2+FP_SCR0_EX(%a6) # is EXOP negative?
7572 beq.b fout_sd_exc_done # no
7573 bset &0x7,FP_SCR0_EX(%a6) # yes
7574
7575 fout_sd_exc_done:
7576 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
7577 rts
7578
7579 #################################################################
7580 # fmove.d out ###################################################
7581 #################################################################
7582 fout_dbl:
7583 andi.b &0x30,%d0 # clear rnd prec
7584 ori.b &d_mode*0x10,%d0 # insert dbl prec
7585 mov.l %d0,L_SCR3(%a6) # save rnd prec,mode on stack
7586
7587 #
7588 # operand is a normalized number. first, we check to see if the move out
7589 # would cause either an underflow or overflow. these cases are handled
7590 # separately. otherwise, set the FPCR to the proper rounding mode and
7591 # execute the move.
7592 #
7593 mov.w SRC_EX(%a0),%d0 # extract exponent
7594 andi.w &0x7fff,%d0 # strip sign
7595
7596 cmpi.w %d0,&DBL_HI # will operand overflow?
7597 bgt.w fout_dbl_ovfl # yes; go handle OVFL
7598 beq.w fout_dbl_may_ovfl # maybe; go handle possible OVFL
7599 cmpi.w %d0,&DBL_LO # will operand underflow?
7600 blt.w fout_dbl_unfl # yes; go handle underflow
7601
7602 #
7603 # NORMs(in range) can be stored out by a simple "fmov.d"
7604 # Unnormalized inputs can come through this point.
7605 #
7606 fout_dbl_exg:
7607 fmovm.x SRC(%a0),&0x80 # fetch fop from stack
7608
7609 fmov.l L_SCR3(%a6),%fpcr # set FPCR
7610 fmov.l &0x0,%fpsr # clear FPSR
7611
7612 fmov.d %fp0,L_SCR1(%a6) # store does convert and round
7613
7614 fmov.l &0x0,%fpcr # clear FPCR
7615 fmov.l %fpsr,%d0 # save FPSR
7616
7617 or.w %d0,2+USER_FPSR(%a6) # set possible inex2/ainex
7618
7619 mov.l EXC_EA(%a6),%a1 # pass: dst addr
7620 lea L_SCR1(%a6),%a0 # pass: src addr
7621 movq.l &0x8,%d0 # pass: opsize is 8 bytes
7622 bsr.l _dmem_write # store dbl fop to memory
7623
7624 tst.l %d1 # did dstore fail?
7625 bne.l facc_out_d # yes
7626
7627 rts # no; so we're finished
7628
7629 #
7630 # here, we know that the operand would UNFL if moved out to double prec,
7631 # so, denorm and round and then use generic store double routine to
7632 # write the value to memory.
7633 #
7634 fout_dbl_unfl:
7635 bset &unfl_bit,FPSR_EXCEPT(%a6) # set UNFL
7636
7637 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
7638 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
7639 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
7640 mov.l %a0,-(%sp)
7641
7642 clr.l %d0 # pass: S.F. = 0
7643
7644 cmpi.b STAG(%a6),&DENORM # fetch src optype tag
7645 bne.b fout_dbl_unfl_cont # let DENORMs fall through
7646
7647 lea FP_SCR0(%a6),%a0
7648 bsr.l norm # normalize the DENORM
7649
7650 fout_dbl_unfl_cont:
7651 lea FP_SCR0(%a6),%a0 # pass: ptr to operand
7652 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
7653 bsr.l unf_res # calc default underflow result
7654
7655 lea FP_SCR0(%a6),%a0 # pass: ptr to fop
7656 bsr.l dst_dbl # convert to single prec
7657 mov.l %d0,L_SCR1(%a6)
7658 mov.l %d1,L_SCR2(%a6)
7659
7660 mov.l EXC_EA(%a6),%a1 # pass: dst addr
7661 lea L_SCR1(%a6),%a0 # pass: src addr
7662 movq.l &0x8,%d0 # pass: opsize is 8 bytes
7663 bsr.l _dmem_write # store dbl fop to memory
7664
7665 tst.l %d1 # did dstore fail?
7666 bne.l facc_out_d # yes
7667
7668 mov.b FPCR_ENABLE(%a6),%d1
7669 andi.b &0x0a,%d1 # is UNFL or INEX enabled?
7670 bne.w fout_sd_exc_unfl # yes
7671 addq.l &0x4,%sp
7672 rts
7673
7674 #
7675 # it's definitely an overflow so call ovf_res to get the correct answer
7676 #
7677 fout_dbl_ovfl:
7678 mov.w 2+SRC_LO(%a0),%d0
7679 andi.w &0x7ff,%d0
7680 bne.b fout_dbl_ovfl_inex2
7681
7682 ori.w &ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
7683 bra.b fout_dbl_ovfl_cont
7684 fout_dbl_ovfl_inex2:
7685 ori.w &ovfinx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex/inex2
7686
7687 fout_dbl_ovfl_cont:
7688 mov.l %a0,-(%sp)
7689
7690 # call ovf_res() w/ dbl prec and the correct rnd mode to create the default
7691 # overflow result. DON'T save the returned ccodes from ovf_res() since
7692 # fmove out doesn't alter them.
7693 tst.b SRC_EX(%a0) # is operand negative?
7694 smi %d1 # set if so
7695 mov.l L_SCR3(%a6),%d0 # pass: dbl prec,rnd mode
7696 bsr.l ovf_res # calc OVFL result
7697 fmovm.x (%a0),&0x80 # load default overflow result
7698 fmov.d %fp0,L_SCR1(%a6) # store to double
7699
7700 mov.l EXC_EA(%a6),%a1 # pass: dst addr
7701 lea L_SCR1(%a6),%a0 # pass: src addr
7702 movq.l &0x8,%d0 # pass: opsize is 8 bytes
7703 bsr.l _dmem_write # store dbl fop to memory
7704
7705 tst.l %d1 # did dstore fail?
7706 bne.l facc_out_d # yes
7707
7708 mov.b FPCR_ENABLE(%a6),%d1
7709 andi.b &0x0a,%d1 # is UNFL or INEX enabled?
7710 bne.w fout_sd_exc_ovfl # yes
7711 addq.l &0x4,%sp
7712 rts
7713
7714 #
7715 # move out MAY overflow:
7716 # (1) force the exp to 0x3fff
7717 # (2) do a move w/ appropriate rnd mode
7718 # (3) if exp still equals zero, then insert original exponent
7719 # for the correct result.
7720 # if exp now equals one, then it overflowed so call ovf_res.
7721 #
7722 fout_dbl_may_ovfl:
7723 mov.w SRC_EX(%a0),%d1 # fetch current sign
7724 andi.w &0x8000,%d1 # keep it,clear exp
7725 ori.w &0x3fff,%d1 # insert exp = 0
7726 mov.w %d1,FP_SCR0_EX(%a6) # insert scaled exp
7727 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) # copy hi(man)
7728 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) # copy lo(man)
7729
7730 fmov.l L_SCR3(%a6),%fpcr # set FPCR
7731
7732 fmov.x FP_SCR0(%a6),%fp0 # force fop to be rounded
7733 fmov.l &0x0,%fpcr # clear FPCR
7734
7735 fabs.x %fp0 # need absolute value
7736 fcmp.b %fp0,&0x2 # did exponent increase?
7737 fblt.w fout_dbl_exg # no; go finish NORM
7738 bra.w fout_dbl_ovfl # yes; go handle overflow
7739
7740 #########################################################################
7741 # XDEF **************************************************************** #
7742 # dst_dbl(): create double precision value from extended prec. #
7743 # #
7744 # XREF **************************************************************** #
7745 # None #
7746 # #
7747 # INPUT *************************************************************** #
7748 # a0 = pointer to source operand in extended precision #
7749 # #
7750 # OUTPUT ************************************************************** #
7751 # d0 = hi(double precision result) #
7752 # d1 = lo(double precision result) #
7753 # #
7754 # ALGORITHM *********************************************************** #
7755 # #
7756 # Changes extended precision to double precision. #
7757 # Note: no attempt is made to round the extended value to double. #
7758 # dbl_sign = ext_sign #
7759 # dbl_exp = ext_exp - $3fff(ext bias) + $7ff(dbl bias) #
7760 # get rid of ext integer bit #
7761 # dbl_mant = ext_mant{62:12} #
7762 # #
7763 # --------------- --------------- --------------- #
7764 # extended -> |s| exp | |1| ms mant | | ls mant | #
7765 # --------------- --------------- --------------- #
7766 # 95 64 63 62 32 31 11 0 #
7767 # | | #
7768 # | | #
7769 # | | #
7770 # v v #
7771 # --------------- --------------- #
7772 # double -> |s|exp| mant | | mant | #
7773 # --------------- --------------- #
7774 # 63 51 32 31 0 #
7775 # #
7776 #########################################################################
7777
7778 dst_dbl:
7779 clr.l %d0 # clear d0
7780 mov.w FTEMP_EX(%a0),%d0 # get exponent
7781 subi.w &EXT_BIAS,%d0 # subtract extended precision bias
7782 addi.w &DBL_BIAS,%d0 # add double precision bias
7783 tst.b FTEMP_HI(%a0) # is number a denorm?
7784 bmi.b dst_get_dupper # no
7785 subq.w &0x1,%d0 # yes; denorm bias = DBL_BIAS - 1
7786 dst_get_dupper:
7787 swap %d0 # d0 now in upper word
7788 lsl.l &0x4,%d0 # d0 in proper place for dbl prec exp
7789 tst.b FTEMP_EX(%a0) # test sign
7790 bpl.b dst_get_dman # if postive, go process mantissa
7791 bset &0x1f,%d0 # if negative, set sign
7792 dst_get_dman:
7793 mov.l FTEMP_HI(%a0),%d1 # get ms mantissa
7794 bfextu %d1{&1:&20},%d1 # get upper 20 bits of ms
7795 or.l %d1,%d0 # put these bits in ms word of double
7796 mov.l %d0,L_SCR1(%a6) # put the new exp back on the stack
7797 mov.l FTEMP_HI(%a0),%d1 # get ms mantissa
7798 mov.l &21,%d0 # load shift count
7799 lsl.l %d0,%d1 # put lower 11 bits in upper bits
7800 mov.l %d1,L_SCR2(%a6) # build lower lword in memory
7801 mov.l FTEMP_LO(%a0),%d1 # get ls mantissa
7802 bfextu %d1{&0:&21},%d0 # get ls 21 bits of double
7803 mov.l L_SCR2(%a6),%d1
7804 or.l %d0,%d1 # put them in double result
7805 mov.l L_SCR1(%a6),%d0
7806 rts
7807
7808 #########################################################################
7809 # XDEF **************************************************************** #
7810 # dst_sgl(): create single precision value from extended prec #
7811 # #
7812 # XREF **************************************************************** #
7813 # #
7814 # INPUT *************************************************************** #
7815 # a0 = pointer to source operand in extended precision #
7816 # #
7817 # OUTPUT ************************************************************** #
7818 # d0 = single precision result #
7819 # #
7820 # ALGORITHM *********************************************************** #
7821 # #
7822 # Changes extended precision to single precision. #
7823 # sgl_sign = ext_sign #
7824 # sgl_exp = ext_exp - $3fff(ext bias) + $7f(sgl bias) #
7825 # get rid of ext integer bit #
7826 # sgl_mant = ext_mant{62:12} #
7827 # #
7828 # --------------- --------------- --------------- #
7829 # extended -> |s| exp | |1| ms mant | | ls mant | #
7830 # --------------- --------------- --------------- #
7831 # 95 64 63 62 40 32 31 12 0 #
7832 # | | #
7833 # | | #
7834 # | | #
7835 # v v #
7836 # --------------- #
7837 # single -> |s|exp| mant | #
7838 # --------------- #
7839 # 31 22 0 #
7840 # #
7841 #########################################################################
7842
7843 dst_sgl:
7844 clr.l %d0
7845 mov.w FTEMP_EX(%a0),%d0 # get exponent
7846 subi.w &EXT_BIAS,%d0 # subtract extended precision bias
7847 addi.w &SGL_BIAS,%d0 # add single precision bias
7848 tst.b FTEMP_HI(%a0) # is number a denorm?
7849 bmi.b dst_get_supper # no
7850 subq.w &0x1,%d0 # yes; denorm bias = SGL_BIAS - 1
7851 dst_get_supper:
7852 swap %d0 # put exp in upper word of d0
7853 lsl.l &0x7,%d0 # shift it into single exp bits
7854 tst.b FTEMP_EX(%a0) # test sign
7855 bpl.b dst_get_sman # if positive, continue
7856 bset &0x1f,%d0 # if negative, put in sign first
7857 dst_get_sman:
7858 mov.l FTEMP_HI(%a0),%d1 # get ms mantissa
7859 andi.l &0x7fffff00,%d1 # get upper 23 bits of ms
7860 lsr.l &0x8,%d1 # and put them flush right
7861 or.l %d1,%d0 # put these bits in ms word of single
7862 rts
7863
7864 ##############################################################################
7865 fout_pack:
7866 bsr.l _calc_ea_fout # fetch the <ea>
7867 mov.l %a0,-(%sp)
7868
7869 mov.b STAG(%a6),%d0 # fetch input type
7870 bne.w fout_pack_not_norm # input is not NORM
7871
7872 fout_pack_norm:
7873 btst &0x4,EXC_CMDREG(%a6) # static or dynamic?
7874 beq.b fout_pack_s # static
7875
7876 fout_pack_d:
7877 mov.b 1+EXC_CMDREG(%a6),%d1 # fetch dynamic reg
7878 lsr.b &0x4,%d1
7879 andi.w &0x7,%d1
7880
7881 bsr.l fetch_dreg # fetch Dn w/ k-factor
7882
7883 bra.b fout_pack_type
7884 fout_pack_s:
7885 mov.b 1+EXC_CMDREG(%a6),%d0 # fetch static field
7886
7887 fout_pack_type:
7888 bfexts %d0{&25:&7},%d0 # extract k-factor
7889 mov.l %d0,-(%sp)
7890
7891 lea FP_SRC(%a6),%a0 # pass: ptr to input
7892
7893 # bindec is currently scrambling FP_SRC for denorm inputs.
7894 # we'll have to change this, but for now, tough luck!!!
7895 bsr.l bindec # convert xprec to packed
7896
7897 # andi.l &0xcfff000f,FP_SCR0(%a6) # clear unused fields
7898 andi.l &0xcffff00f,FP_SCR0(%a6) # clear unused fields
7899
7900 mov.l (%sp)+,%d0
7901
7902 tst.b 3+FP_SCR0_EX(%a6)
7903 bne.b fout_pack_set
7904 tst.l FP_SCR0_HI(%a6)
7905 bne.b fout_pack_set
7906 tst.l FP_SCR0_LO(%a6)
7907 bne.b fout_pack_set
7908
7909 # add the extra condition that only if the k-factor was zero, too, should
7910 # we zero the exponent
7911 tst.l %d0
7912 bne.b fout_pack_set
7913 # "mantissa" is all zero which means that the answer is zero. but, the '040
7914 # algorithm allows the exponent to be non-zero. the 881/2 do not. therefore,
7915 # if the mantissa is zero, I will zero the exponent, too.
7916 # the question now is whether the exponents sign bit is allowed to be non-zero
7917 # for a zero, also...
7918 andi.w &0xf000,FP_SCR0(%a6)
7919
7920 fout_pack_set:
7921
7922 lea FP_SCR0(%a6),%a0 # pass: src addr
7923
7924 fout_pack_write:
7925 mov.l (%sp)+,%a1 # pass: dst addr
7926 mov.l &0xc,%d0 # pass: opsize is 12 bytes
7927
7928 cmpi.b SPCOND_FLG(%a6),&mda7_flg
7929 beq.b fout_pack_a7
7930
7931 bsr.l _dmem_write # write ext prec number to memory
7932
7933 tst.l %d1 # did dstore fail?
7934 bne.w fout_ext_err # yes
7935
7936 rts
7937
7938 # we don't want to do the write if the exception occurred in supervisor mode
7939 # so _mem_write2() handles this for us.
7940 fout_pack_a7:
7941 bsr.l _mem_write2 # write ext prec number to memory
7942
7943 tst.l %d1 # did dstore fail?
7944 bne.w fout_ext_err # yes
7945
7946 rts
7947
7948 fout_pack_not_norm:
7949 cmpi.b %d0,&DENORM # is it a DENORM?
7950 beq.w fout_pack_norm # yes
7951 lea FP_SRC(%a6),%a0
7952 clr.w 2+FP_SRC_EX(%a6)
7953 cmpi.b %d0,&SNAN # is it an SNAN?
7954 beq.b fout_pack_snan # yes
7955 bra.b fout_pack_write # no
7956
7957 fout_pack_snan:
7958 ori.w &snaniop2_mask,FPSR_EXCEPT(%a6) # set SNAN/AIOP
7959 bset &0x6,FP_SRC_HI(%a6) # set snan bit
7960 bra.b fout_pack_write
7961
7962 #########################################################################
7963 # XDEF **************************************************************** #
7964 # fmul(): emulates the fmul instruction #
7965 # fsmul(): emulates the fsmul instruction #
7966 # fdmul(): emulates the fdmul instruction #
7967 # #
7968 # XREF **************************************************************** #
7969 # scale_to_zero_src() - scale src exponent to zero #
7970 # scale_to_zero_dst() - scale dst exponent to zero #
7971 # unf_res() - return default underflow result #
7972 # ovf_res() - return default overflow result #
7973 # res_qnan() - return QNAN result #
7974 # res_snan() - return SNAN result #
7975 # #
7976 # INPUT *************************************************************** #
7977 # a0 = pointer to extended precision source operand #
7978 # a1 = pointer to extended precision destination operand #
7979 # d0 rnd prec,mode #
7980 # #
7981 # OUTPUT ************************************************************** #
7982 # fp0 = result #
7983 # fp1 = EXOP (if exception occurred) #
7984 # #
7985 # ALGORITHM *********************************************************** #
7986 # Handle NANs, infinities, and zeroes as special cases. Divide #
7987 # norms/denorms into ext/sgl/dbl precision. #
7988 # For norms/denorms, scale the exponents such that a multiply #
7989 # instruction won't cause an exception. Use the regular fmul to #
7990 # compute a result. Check if the regular operands would have taken #
7991 # an exception. If so, return the default overflow/underflow result #
7992 # and return the EXOP if exceptions are enabled. Else, scale the #
7993 # result operand to the proper exponent. #
7994 # #
7995 #########################################################################
7996
7997 align 0x10
7998 tbl_fmul_ovfl:
7999 long 0x3fff - 0x7ffe # ext_max
8000 long 0x3fff - 0x407e # sgl_max
8001 long 0x3fff - 0x43fe # dbl_max
8002 tbl_fmul_unfl:
8003 long 0x3fff + 0x0001 # ext_unfl
8004 long 0x3fff - 0x3f80 # sgl_unfl
8005 long 0x3fff - 0x3c00 # dbl_unfl
8006
8007 global fsmul
8008 fsmul:
8009 andi.b &0x30,%d0 # clear rnd prec
8010 ori.b &s_mode*0x10,%d0 # insert sgl prec
8011 bra.b fmul
8012
8013 global fdmul
8014 fdmul:
8015 andi.b &0x30,%d0
8016 ori.b &d_mode*0x10,%d0 # insert dbl prec
8017
8018 global fmul
8019 fmul:
8020 mov.l %d0,L_SCR3(%a6) # store rnd info
8021
8022 clr.w %d1
8023 mov.b DTAG(%a6),%d1
8024 lsl.b &0x3,%d1
8025 or.b STAG(%a6),%d1 # combine src tags
8026 bne.w fmul_not_norm # optimize on non-norm input
8027
8028 fmul_norm:
8029 mov.w DST_EX(%a1),FP_SCR1_EX(%a6)
8030 mov.l DST_HI(%a1),FP_SCR1_HI(%a6)
8031 mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
8032
8033 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
8034 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
8035 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
8036
8037 bsr.l scale_to_zero_src # scale src exponent
8038 mov.l %d0,-(%sp) # save scale factor 1
8039
8040 bsr.l scale_to_zero_dst # scale dst exponent
8041
8042 add.l %d0,(%sp) # SCALE_FACTOR = scale1 + scale2
8043
8044 mov.w 2+L_SCR3(%a6),%d1 # fetch precision
8045 lsr.b &0x6,%d1 # shift to lo bits
8046 mov.l (%sp)+,%d0 # load S.F.
8047 cmp.l %d0,(tbl_fmul_ovfl.w,%pc,%d1.w*4) # would result ovfl?
8048 beq.w fmul_may_ovfl # result may rnd to overflow
8049 blt.w fmul_ovfl # result will overflow
8050
8051 cmp.l %d0,(tbl_fmul_unfl.w,%pc,%d1.w*4) # would result unfl?
8052 beq.w fmul_may_unfl # result may rnd to no unfl
8053 bgt.w fmul_unfl # result will underflow
8054
8055 #
8056 # NORMAL:
8057 # - the result of the multiply operation will neither overflow nor underflow.
8058 # - do the multiply to the proper precision and rounding mode.
8059 # - scale the result exponent using the scale factor. if both operands were
8060 # normalized then we really don't need to go through this scaling. but for now,
8061 # this will do.
8062 #
8063 fmul_normal:
8064 fmovm.x FP_SCR1(%a6),&0x80 # load dst operand
8065
8066 fmov.l L_SCR3(%a6),%fpcr # set FPCR
8067 fmov.l &0x0,%fpsr # clear FPSR
8068
8069 fmul.x FP_SCR0(%a6),%fp0 # execute multiply
8070
8071 fmov.l %fpsr,%d1 # save status
8072 fmov.l &0x0,%fpcr # clear FPCR
8073
8074 or.l %d1,USER_FPSR(%a6) # save INEX2,N
8075
8076 fmul_normal_exit:
8077 fmovm.x &0x80,FP_SCR0(%a6) # store out result
8078 mov.l %d2,-(%sp) # save d2
8079 mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp}
8080 mov.l %d1,%d2 # make a copy
8081 andi.l &0x7fff,%d1 # strip sign
8082 andi.w &0x8000,%d2 # keep old sign
8083 sub.l %d0,%d1 # add scale factor
8084 or.w %d2,%d1 # concat old sign,new exp
8085 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
8086 mov.l (%sp)+,%d2 # restore d2
8087 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
8088 rts
8089
8090 #
8091 # OVERFLOW:
8092 # - the result of the multiply operation is an overflow.
8093 # - do the multiply to the proper precision and rounding mode in order to
8094 # set the inexact bits.
8095 # - calculate the default result and return it in fp0.
8096 # - if overflow or inexact is enabled, we need a multiply result rounded to
8097 # extended precision. if the original operation was extended, then we have this
8098 # result. if the original operation was single or double, we have to do another
8099 # multiply using extended precision and the correct rounding mode. the result
8100 # of this operation then has its exponent scaled by -0x6000 to create the
8101 # exceptional operand.
8102 #
8103 fmul_ovfl:
8104 fmovm.x FP_SCR1(%a6),&0x80 # load dst operand
8105
8106 fmov.l L_SCR3(%a6),%fpcr # set FPCR
8107 fmov.l &0x0,%fpsr # clear FPSR
8108
8109 fmul.x FP_SCR0(%a6),%fp0 # execute multiply
8110
8111 fmov.l %fpsr,%d1 # save status
8112 fmov.l &0x0,%fpcr # clear FPCR
8113
8114 or.l %d1,USER_FPSR(%a6) # save INEX2,N
8115
8116 # save setting this until now because this is where fmul_may_ovfl may jump in
8117 fmul_ovfl_tst:
8118 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
8119
8120 mov.b FPCR_ENABLE(%a6),%d1
8121 andi.b &0x13,%d1 # is OVFL or INEX enabled?
8122 bne.b fmul_ovfl_ena # yes
8123
8124 # calculate the default result
8125 fmul_ovfl_dis:
8126 btst &neg_bit,FPSR_CC(%a6) # is result negative?
8127 sne %d1 # set sign param accordingly
8128 mov.l L_SCR3(%a6),%d0 # pass rnd prec,mode
8129 bsr.l ovf_res # calculate default result
8130 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
8131 fmovm.x (%a0),&0x80 # return default result in fp0
8132 rts
8133
8134 #
8135 # OVFL is enabled; Create EXOP:
8136 # - if precision is extended, then we have the EXOP. simply bias the exponent
8137 # with an extra -0x6000. if the precision is single or double, we need to
8138 # calculate a result rounded to extended precision.
8139 #
8140 fmul_ovfl_ena:
8141 mov.l L_SCR3(%a6),%d1
8142 andi.b &0xc0,%d1 # test the rnd prec
8143 bne.b fmul_ovfl_ena_sd # it's sgl or dbl
8144
8145 fmul_ovfl_ena_cont:
8146 fmovm.x &0x80,FP_SCR0(%a6) # move result to stack
8147
8148 mov.l %d2,-(%sp) # save d2
8149 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
8150 mov.w %d1,%d2 # make a copy
8151 andi.l &0x7fff,%d1 # strip sign
8152 sub.l %d0,%d1 # add scale factor
8153 subi.l &0x6000,%d1 # subtract bias
8154 andi.w &0x7fff,%d1 # clear sign bit
8155 andi.w &0x8000,%d2 # keep old sign
8156 or.w %d2,%d1 # concat old sign,new exp
8157 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
8158 mov.l (%sp)+,%d2 # restore d2
8159 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
8160 bra.b fmul_ovfl_dis
8161
8162 fmul_ovfl_ena_sd:
8163 fmovm.x FP_SCR1(%a6),&0x80 # load dst operand
8164
8165 mov.l L_SCR3(%a6),%d1
8166 andi.b &0x30,%d1 # keep rnd mode only
8167 fmov.l %d1,%fpcr # set FPCR
8168
8169 fmul.x FP_SCR0(%a6),%fp0 # execute multiply
8170
8171 fmov.l &0x0,%fpcr # clear FPCR
8172 bra.b fmul_ovfl_ena_cont
8173
8174 #
8175 # may OVERFLOW:
8176 # - the result of the multiply operation MAY overflow.
8177 # - do the multiply to the proper precision and rounding mode in order to
8178 # set the inexact bits.
8179 # - calculate the default result and return it in fp0.
8180 #
8181 fmul_may_ovfl:
8182 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
8183
8184 fmov.l L_SCR3(%a6),%fpcr # set FPCR
8185 fmov.l &0x0,%fpsr # clear FPSR
8186
8187 fmul.x FP_SCR0(%a6),%fp0 # execute multiply
8188
8189 fmov.l %fpsr,%d1 # save status
8190 fmov.l &0x0,%fpcr # clear FPCR
8191
8192 or.l %d1,USER_FPSR(%a6) # save INEX2,N
8193
8194 fabs.x %fp0,%fp1 # make a copy of result
8195 fcmp.b %fp1,&0x2 # is |result| >= 2.b?
8196 fbge.w fmul_ovfl_tst # yes; overflow has occurred
8197
8198 # no, it didn't overflow; we have correct result
8199 bra.w fmul_normal_exit
8200
8201 #
8202 # UNDERFLOW:
8203 # - the result of the multiply operation is an underflow.
8204 # - do the multiply to the proper precision and rounding mode in order to
8205 # set the inexact bits.
8206 # - calculate the default result and return it in fp0.
8207 # - if overflow or inexact is enabled, we need a multiply result rounded to
8208 # extended precision. if the original operation was extended, then we have this
8209 # result. if the original operation was single or double, we have to do another
8210 # multiply using extended precision and the correct rounding mode. the result
8211 # of this operation then has its exponent scaled by -0x6000 to create the
8212 # exceptional operand.
8213 #
8214 fmul_unfl:
8215 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
8216
8217 # for fun, let's use only extended precision, round to zero. then, let
8218 # the unf_res() routine figure out all the rest.
8219 # will we get the correct answer.
8220 fmovm.x FP_SCR1(%a6),&0x80 # load dst operand
8221
8222 fmov.l &rz_mode*0x10,%fpcr # set FPCR
8223 fmov.l &0x0,%fpsr # clear FPSR
8224
8225 fmul.x FP_SCR0(%a6),%fp0 # execute multiply
8226
8227 fmov.l %fpsr,%d1 # save status
8228 fmov.l &0x0,%fpcr # clear FPCR
8229
8230 or.l %d1,USER_FPSR(%a6) # save INEX2,N
8231
8232 mov.b FPCR_ENABLE(%a6),%d1
8233 andi.b &0x0b,%d1 # is UNFL or INEX enabled?
8234 bne.b fmul_unfl_ena # yes
8235
8236 fmul_unfl_dis:
8237 fmovm.x &0x80,FP_SCR0(%a6) # store out result
8238
8239 lea FP_SCR0(%a6),%a0 # pass: result addr
8240 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
8241 bsr.l unf_res # calculate default result
8242 or.b %d0,FPSR_CC(%a6) # unf_res2 may have set 'Z'
8243 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
8244 rts
8245
8246 #
8247 # UNFL is enabled.
8248 #
8249 fmul_unfl_ena:
8250 fmovm.x FP_SCR1(%a6),&0x40 # load dst op
8251
8252 mov.l L_SCR3(%a6),%d1
8253 andi.b &0xc0,%d1 # is precision extended?
8254 bne.b fmul_unfl_ena_sd # no, sgl or dbl
8255
8256 # if the rnd mode is anything but RZ, then we have to re-do the above
8257 # multiplication becuase we used RZ for all.
8258 fmov.l L_SCR3(%a6),%fpcr # set FPCR
8259
8260 fmul_unfl_ena_cont:
8261 fmov.l &0x0,%fpsr # clear FPSR
8262
8263 fmul.x FP_SCR0(%a6),%fp1 # execute multiply
8264
8265 fmov.l &0x0,%fpcr # clear FPCR
8266
8267 fmovm.x &0x40,FP_SCR0(%a6) # save result to stack
8268 mov.l %d2,-(%sp) # save d2
8269 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
8270 mov.l %d1,%d2 # make a copy
8271 andi.l &0x7fff,%d1 # strip sign
8272 andi.w &0x8000,%d2 # keep old sign
8273 sub.l %d0,%d1 # add scale factor
8274 addi.l &0x6000,%d1 # add bias
8275 andi.w &0x7fff,%d1
8276 or.w %d2,%d1 # concat old sign,new exp
8277 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
8278 mov.l (%sp)+,%d2 # restore d2
8279 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
8280 bra.w fmul_unfl_dis
8281
8282 fmul_unfl_ena_sd:
8283 mov.l L_SCR3(%a6),%d1
8284 andi.b &0x30,%d1 # use only rnd mode
8285 fmov.l %d1,%fpcr # set FPCR
8286
8287 bra.b fmul_unfl_ena_cont
8288
8289 # MAY UNDERFLOW:
8290 # -use the correct rounding mode and precision. this code favors operations
8291 # that do not underflow.
8292 fmul_may_unfl:
8293 fmovm.x FP_SCR1(%a6),&0x80 # load dst operand
8294
8295 fmov.l L_SCR3(%a6),%fpcr # set FPCR
8296 fmov.l &0x0,%fpsr # clear FPSR
8297
8298 fmul.x FP_SCR0(%a6),%fp0 # execute multiply
8299
8300 fmov.l %fpsr,%d1 # save status
8301 fmov.l &0x0,%fpcr # clear FPCR
8302
8303 or.l %d1,USER_FPSR(%a6) # save INEX2,N
8304
8305 fabs.x %fp0,%fp1 # make a copy of result
8306 fcmp.b %fp1,&0x2 # is |result| > 2.b?
8307 fbgt.w fmul_normal_exit # no; no underflow occurred
8308 fblt.w fmul_unfl # yes; underflow occurred
8309
8310 #
8311 # we still don't know if underflow occurred. result is ~ equal to 2. but,
8312 # we don't know if the result was an underflow that rounded up to a 2 or
8313 # a normalized number that rounded down to a 2. so, redo the entire operation
8314 # using RZ as the rounding mode to see what the pre-rounded result is.
8315 # this case should be relatively rare.
8316 #
8317 fmovm.x FP_SCR1(%a6),&0x40 # load dst operand
8318
8319 mov.l L_SCR3(%a6),%d1
8320 andi.b &0xc0,%d1 # keep rnd prec
8321 ori.b &rz_mode*0x10,%d1 # insert RZ
8322
8323 fmov.l %d1,%fpcr # set FPCR
8324 fmov.l &0x0,%fpsr # clear FPSR
8325
8326 fmul.x FP_SCR0(%a6),%fp1 # execute multiply
8327
8328 fmov.l &0x0,%fpcr # clear FPCR
8329 fabs.x %fp1 # make absolute value
8330 fcmp.b %fp1,&0x2 # is |result| < 2.b?
8331 fbge.w fmul_normal_exit # no; no underflow occurred
8332 bra.w fmul_unfl # yes, underflow occurred
8333
8334 ################################################################################
8335
8336 #
8337 # Multiply: inputs are not both normalized; what are they?
8338 #
8339 fmul_not_norm:
8340 mov.w (tbl_fmul_op.b,%pc,%d1.w*2),%d1
8341 jmp (tbl_fmul_op.b,%pc,%d1.w)
8342
8343 swbeg &48
8344 tbl_fmul_op:
8345 short fmul_norm - tbl_fmul_op # NORM x NORM
8346 short fmul_zero - tbl_fmul_op # NORM x ZERO
8347 short fmul_inf_src - tbl_fmul_op # NORM x INF
8348 short fmul_res_qnan - tbl_fmul_op # NORM x QNAN
8349 short fmul_norm - tbl_fmul_op # NORM x DENORM
8350 short fmul_res_snan - tbl_fmul_op # NORM x SNAN
8351 short tbl_fmul_op - tbl_fmul_op #
8352 short tbl_fmul_op - tbl_fmul_op #
8353
8354 short fmul_zero - tbl_fmul_op # ZERO x NORM
8355 short fmul_zero - tbl_fmul_op # ZERO x ZERO
8356 short fmul_res_operr - tbl_fmul_op # ZERO x INF
8357 short fmul_res_qnan - tbl_fmul_op # ZERO x QNAN
8358 short fmul_zero - tbl_fmul_op # ZERO x DENORM
8359 short fmul_res_snan - tbl_fmul_op # ZERO x SNAN
8360 short tbl_fmul_op - tbl_fmul_op #
8361 short tbl_fmul_op - tbl_fmul_op #
8362
8363 short fmul_inf_dst - tbl_fmul_op # INF x NORM
8364 short fmul_res_operr - tbl_fmul_op # INF x ZERO
8365 short fmul_inf_dst - tbl_fmul_op # INF x INF
8366 short fmul_res_qnan - tbl_fmul_op # INF x QNAN
8367 short fmul_inf_dst - tbl_fmul_op # INF x DENORM
8368 short fmul_res_snan - tbl_fmul_op # INF x SNAN
8369 short tbl_fmul_op - tbl_fmul_op #
8370 short tbl_fmul_op - tbl_fmul_op #
8371
8372 short fmul_res_qnan - tbl_fmul_op # QNAN x NORM
8373 short fmul_res_qnan - tbl_fmul_op # QNAN x ZERO
8374 short fmul_res_qnan - tbl_fmul_op # QNAN x INF
8375 short fmul_res_qnan - tbl_fmul_op # QNAN x QNAN
8376 short fmul_res_qnan - tbl_fmul_op # QNAN x DENORM
8377 short fmul_res_snan - tbl_fmul_op # QNAN x SNAN
8378 short tbl_fmul_op - tbl_fmul_op #
8379 short tbl_fmul_op - tbl_fmul_op #
8380
8381 short fmul_norm - tbl_fmul_op # NORM x NORM
8382 short fmul_zero - tbl_fmul_op # NORM x ZERO
8383 short fmul_inf_src - tbl_fmul_op # NORM x INF
8384 short fmul_res_qnan - tbl_fmul_op # NORM x QNAN
8385 short fmul_norm - tbl_fmul_op # NORM x DENORM
8386 short fmul_res_snan - tbl_fmul_op # NORM x SNAN
8387 short tbl_fmul_op - tbl_fmul_op #
8388 short tbl_fmul_op - tbl_fmul_op #
8389
8390 short fmul_res_snan - tbl_fmul_op # SNAN x NORM
8391 short fmul_res_snan - tbl_fmul_op # SNAN x ZERO
8392 short fmul_res_snan - tbl_fmul_op # SNAN x INF
8393 short fmul_res_snan - tbl_fmul_op # SNAN x QNAN
8394 short fmul_res_snan - tbl_fmul_op # SNAN x DENORM
8395 short fmul_res_snan - tbl_fmul_op # SNAN x SNAN
8396 short tbl_fmul_op - tbl_fmul_op #
8397 short tbl_fmul_op - tbl_fmul_op #
8398
8399 fmul_res_operr:
8400 bra.l res_operr
8401 fmul_res_snan:
8402 bra.l res_snan
8403 fmul_res_qnan:
8404 bra.l res_qnan
8405
8406 #
8407 # Multiply: (Zero x Zero) || (Zero x norm) || (Zero x denorm)
8408 #
8409 global fmul_zero # global for fsglmul
8410 fmul_zero:
8411 mov.b SRC_EX(%a0),%d0 # exclusive or the signs
8412 mov.b DST_EX(%a1),%d1
8413 eor.b %d0,%d1
8414 bpl.b fmul_zero_p # result ZERO is pos.
8415 fmul_zero_n:
8416 fmov.s &0x80000000,%fp0 # load -ZERO
8417 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/N
8418 rts
8419 fmul_zero_p:
8420 fmov.s &0x00000000,%fp0 # load +ZERO
8421 mov.b &z_bmask,FPSR_CC(%a6) # set Z
8422 rts
8423
8424 #
8425 # Multiply: (inf x inf) || (inf x norm) || (inf x denorm)
8426 #
8427 # Note: The j-bit for an infinity is a don't-care. However, to be
8428 # strictly compatible w/ the 68881/882, we make sure to return an
8429 # INF w/ the j-bit set if the input INF j-bit was set. Destination
8430 # INFs take priority.
8431 #
8432 global fmul_inf_dst # global for fsglmul
8433 fmul_inf_dst:
8434 fmovm.x DST(%a1),&0x80 # return INF result in fp0
8435 mov.b SRC_EX(%a0),%d0 # exclusive or the signs
8436 mov.b DST_EX(%a1),%d1
8437 eor.b %d0,%d1
8438 bpl.b fmul_inf_dst_p # result INF is pos.
8439 fmul_inf_dst_n:
8440 fabs.x %fp0 # clear result sign
8441 fneg.x %fp0 # set result sign
8442 mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/N
8443 rts
8444 fmul_inf_dst_p:
8445 fabs.x %fp0 # clear result sign
8446 mov.b &inf_bmask,FPSR_CC(%a6) # set INF
8447 rts
8448
8449 global fmul_inf_src # global for fsglmul
8450 fmul_inf_src:
8451 fmovm.x SRC(%a0),&0x80 # return INF result in fp0
8452 mov.b SRC_EX(%a0),%d0 # exclusive or the signs
8453 mov.b DST_EX(%a1),%d1
8454 eor.b %d0,%d1
8455 bpl.b fmul_inf_dst_p # result INF is pos.
8456 bra.b fmul_inf_dst_n
8457
8458 #########################################################################
8459 # XDEF **************************************************************** #
8460 # fin(): emulates the fmove instruction #
8461 # fsin(): emulates the fsmove instruction #
8462 # fdin(): emulates the fdmove instruction #
8463 # #
8464 # XREF **************************************************************** #
8465 # norm() - normalize mantissa for EXOP on denorm #
8466 # scale_to_zero_src() - scale src exponent to zero #
8467 # ovf_res() - return default overflow result #
8468 # unf_res() - return default underflow result #
8469 # res_qnan_1op() - return QNAN result #
8470 # res_snan_1op() - return SNAN result #
8471 # #
8472 # INPUT *************************************************************** #
8473 # a0 = pointer to extended precision source operand #
8474 # d0 = round prec/mode #
8475 # #
8476 # OUTPUT ************************************************************** #
8477 # fp0 = result #
8478 # fp1 = EXOP (if exception occurred) #
8479 # #
8480 # ALGORITHM *********************************************************** #
8481 # Handle NANs, infinities, and zeroes as special cases. Divide #
8482 # norms into extended, single, and double precision. #
8483 # Norms can be emulated w/ a regular fmove instruction. For #
8484 # sgl/dbl, must scale exponent and perform an "fmove". Check to see #
8485 # if the result would have overflowed/underflowed. If so, use unf_res() #
8486 # or ovf_res() to return the default result. Also return EXOP if #
8487 # exception is enabled. If no exception, return the default result. #
8488 # Unnorms don't pass through here. #
8489 # #
8490 #########################################################################
8491
8492 global fsin
8493 fsin:
8494 andi.b &0x30,%d0 # clear rnd prec
8495 ori.b &s_mode*0x10,%d0 # insert sgl precision
8496 bra.b fin
8497
8498 global fdin
8499 fdin:
8500 andi.b &0x30,%d0 # clear rnd prec
8501 ori.b &d_mode*0x10,%d0 # insert dbl precision
8502
8503 global fin
8504 fin:
8505 mov.l %d0,L_SCR3(%a6) # store rnd info
8506
8507 mov.b STAG(%a6),%d1 # fetch src optype tag
8508 bne.w fin_not_norm # optimize on non-norm input
8509
8510 #
8511 # FP MOVE IN: NORMs and DENORMs ONLY!
8512 #
8513 fin_norm:
8514 andi.b &0xc0,%d0 # is precision extended?
8515 bne.w fin_not_ext # no, so go handle dbl or sgl
8516
8517 #
8518 # precision selected is extended. so...we cannot get an underflow
8519 # or overflow because of rounding to the correct precision. so...
8520 # skip the scaling and unscaling...
8521 #
8522 tst.b SRC_EX(%a0) # is the operand negative?
8523 bpl.b fin_norm_done # no
8524 bset &neg_bit,FPSR_CC(%a6) # yes, so set 'N' ccode bit
8525 fin_norm_done:
8526 fmovm.x SRC(%a0),&0x80 # return result in fp0
8527 rts
8528
8529 #
8530 # for an extended precision DENORM, the UNFL exception bit is set
8531 # the accrued bit is NOT set in this instance(no inexactness!)
8532 #
8533 fin_denorm:
8534 andi.b &0xc0,%d0 # is precision extended?
8535 bne.w fin_not_ext # no, so go handle dbl or sgl
8536
8537 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
8538 tst.b SRC_EX(%a0) # is the operand negative?
8539 bpl.b fin_denorm_done # no
8540 bset &neg_bit,FPSR_CC(%a6) # yes, so set 'N' ccode bit
8541 fin_denorm_done:
8542 fmovm.x SRC(%a0),&0x80 # return result in fp0
8543 btst &unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
8544 bne.b fin_denorm_unfl_ena # yes
8545 rts
8546
8547 #
8548 # the input is an extended DENORM and underflow is enabled in the FPCR.
8549 # normalize the mantissa and add the bias of 0x6000 to the resulting negative
8550 # exponent and insert back into the operand.
8551 #
8552 fin_denorm_unfl_ena:
8553 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
8554 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
8555 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
8556 lea FP_SCR0(%a6),%a0 # pass: ptr to operand
8557 bsr.l norm # normalize result
8558 neg.w %d0 # new exponent = -(shft val)
8559 addi.w &0x6000,%d0 # add new bias to exponent
8560 mov.w FP_SCR0_EX(%a6),%d1 # fetch old sign,exp
8561 andi.w &0x8000,%d1 # keep old sign
8562 andi.w &0x7fff,%d0 # clear sign position
8563 or.w %d1,%d0 # concat new exo,old sign
8564 mov.w %d0,FP_SCR0_EX(%a6) # insert new exponent
8565 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
8566 rts
8567
8568 #
8569 # operand is to be rounded to single or double precision
8570 #
8571 fin_not_ext:
8572 cmpi.b %d0,&s_mode*0x10 # separate sgl/dbl prec
8573 bne.b fin_dbl
8574
8575 #
8576 # operand is to be rounded to single precision
8577 #
8578 fin_sgl:
8579 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
8580 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
8581 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
8582 bsr.l scale_to_zero_src # calculate scale factor
8583
8584 cmpi.l %d0,&0x3fff-0x3f80 # will move in underflow?
8585 bge.w fin_sd_unfl # yes; go handle underflow
8586 cmpi.l %d0,&0x3fff-0x407e # will move in overflow?
8587 beq.w fin_sd_may_ovfl # maybe; go check
8588 blt.w fin_sd_ovfl # yes; go handle overflow
8589
8590 #
8591 # operand will NOT overflow or underflow when moved into the fp reg file
8592 #
8593 fin_sd_normal:
8594 fmov.l &0x0,%fpsr # clear FPSR
8595 fmov.l L_SCR3(%a6),%fpcr # set FPCR
8596
8597 fmov.x FP_SCR0(%a6),%fp0 # perform move
8598
8599 fmov.l %fpsr,%d1 # save FPSR
8600 fmov.l &0x0,%fpcr # clear FPCR
8601
8602 or.l %d1,USER_FPSR(%a6) # save INEX2,N
8603
8604 fin_sd_normal_exit:
8605 mov.l %d2,-(%sp) # save d2
8606 fmovm.x &0x80,FP_SCR0(%a6) # store out result
8607 mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp}
8608 mov.w %d1,%d2 # make a copy
8609 andi.l &0x7fff,%d1 # strip sign
8610 sub.l %d0,%d1 # add scale factor
8611 andi.w &0x8000,%d2 # keep old sign
8612 or.w %d1,%d2 # concat old sign,new exponent
8613 mov.w %d2,FP_SCR0_EX(%a6) # insert new exponent
8614 mov.l (%sp)+,%d2 # restore d2
8615 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
8616 rts
8617
8618 #
8619 # operand is to be rounded to double precision
8620 #
8621 fin_dbl:
8622 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
8623 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
8624 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
8625 bsr.l scale_to_zero_src # calculate scale factor
8626
8627 cmpi.l %d0,&0x3fff-0x3c00 # will move in underflow?
8628 bge.w fin_sd_unfl # yes; go handle underflow
8629 cmpi.l %d0,&0x3fff-0x43fe # will move in overflow?
8630 beq.w fin_sd_may_ovfl # maybe; go check
8631 blt.w fin_sd_ovfl # yes; go handle overflow
8632 bra.w fin_sd_normal # no; ho handle normalized op
8633
8634 #
8635 # operand WILL underflow when moved in to the fp register file
8636 #
8637 fin_sd_unfl:
8638 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
8639
8640 tst.b FP_SCR0_EX(%a6) # is operand negative?
8641 bpl.b fin_sd_unfl_tst
8642 bset &neg_bit,FPSR_CC(%a6) # set 'N' ccode bit
8643
8644 # if underflow or inexact is enabled, then go calculate the EXOP first.
8645 fin_sd_unfl_tst:
8646 mov.b FPCR_ENABLE(%a6),%d1
8647 andi.b &0x0b,%d1 # is UNFL or INEX enabled?
8648 bne.b fin_sd_unfl_ena # yes
8649
8650 fin_sd_unfl_dis:
8651 lea FP_SCR0(%a6),%a0 # pass: result addr
8652 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
8653 bsr.l unf_res # calculate default result
8654 or.b %d0,FPSR_CC(%a6) # unf_res may have set 'Z'
8655 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
8656 rts
8657
8658 #
8659 # operand will underflow AND underflow or inexact is enabled.
8660 # therefore, we must return the result rounded to extended precision.
8661 #
8662 fin_sd_unfl_ena:
8663 mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
8664 mov.l FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
8665 mov.w FP_SCR0_EX(%a6),%d1 # load current exponent
8666
8667 mov.l %d2,-(%sp) # save d2
8668 mov.w %d1,%d2 # make a copy
8669 andi.l &0x7fff,%d1 # strip sign
8670 sub.l %d0,%d1 # subtract scale factor
8671 andi.w &0x8000,%d2 # extract old sign
8672 addi.l &0x6000,%d1 # add new bias
8673 andi.w &0x7fff,%d1
8674 or.w %d1,%d2 # concat old sign,new exp
8675 mov.w %d2,FP_SCR1_EX(%a6) # insert new exponent
8676 fmovm.x FP_SCR1(%a6),&0x40 # return EXOP in fp1
8677 mov.l (%sp)+,%d2 # restore d2
8678 bra.b fin_sd_unfl_dis
8679
8680 #
8681 # operand WILL overflow.
8682 #
8683 fin_sd_ovfl:
8684 fmov.l &0x0,%fpsr # clear FPSR
8685 fmov.l L_SCR3(%a6),%fpcr # set FPCR
8686
8687 fmov.x FP_SCR0(%a6),%fp0 # perform move
8688
8689 fmov.l &0x0,%fpcr # clear FPCR
8690 fmov.l %fpsr,%d1 # save FPSR
8691
8692 or.l %d1,USER_FPSR(%a6) # save INEX2,N
8693
8694 fin_sd_ovfl_tst:
8695 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
8696
8697 mov.b FPCR_ENABLE(%a6),%d1
8698 andi.b &0x13,%d1 # is OVFL or INEX enabled?
8699 bne.b fin_sd_ovfl_ena # yes
8700
8701 #
8702 # OVFL is not enabled; therefore, we must create the default result by
8703 # calling ovf_res().
8704 #
8705 fin_sd_ovfl_dis:
8706 btst &neg_bit,FPSR_CC(%a6) # is result negative?
8707 sne %d1 # set sign param accordingly
8708 mov.l L_SCR3(%a6),%d0 # pass: prec,mode
8709 bsr.l ovf_res # calculate default result
8710 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
8711 fmovm.x (%a0),&0x80 # return default result in fp0
8712 rts
8713
8714 #
8715 # OVFL is enabled.
8716 # the INEX2 bit has already been updated by the round to the correct precision.
8717 # now, round to extended(and don't alter the FPSR).
8718 #
8719 fin_sd_ovfl_ena:
8720 mov.l %d2,-(%sp) # save d2
8721 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
8722 mov.l %d1,%d2 # make a copy
8723 andi.l &0x7fff,%d1 # strip sign
8724 andi.w &0x8000,%d2 # keep old sign
8725 sub.l %d0,%d1 # add scale factor
8726 sub.l &0x6000,%d1 # subtract bias
8727 andi.w &0x7fff,%d1
8728 or.w %d2,%d1
8729 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
8730 mov.l (%sp)+,%d2 # restore d2
8731 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
8732 bra.b fin_sd_ovfl_dis
8733
8734 #
8735 # the move in MAY overflow. so...
8736 #
8737 fin_sd_may_ovfl:
8738 fmov.l &0x0,%fpsr # clear FPSR
8739 fmov.l L_SCR3(%a6),%fpcr # set FPCR
8740
8741 fmov.x FP_SCR0(%a6),%fp0 # perform the move
8742
8743 fmov.l %fpsr,%d1 # save status
8744 fmov.l &0x0,%fpcr # clear FPCR
8745
8746 or.l %d1,USER_FPSR(%a6) # save INEX2,N
8747
8748 fabs.x %fp0,%fp1 # make a copy of result
8749 fcmp.b %fp1,&0x2 # is |result| >= 2.b?
8750 fbge.w fin_sd_ovfl_tst # yes; overflow has occurred
8751
8752 # no, it didn't overflow; we have correct result
8753 bra.w fin_sd_normal_exit
8754
8755 ##########################################################################
8756
8757 #
8758 # operand is not a NORM: check its optype and branch accordingly
8759 #
8760 fin_not_norm:
8761 cmpi.b %d1,&DENORM # weed out DENORM
8762 beq.w fin_denorm
8763 cmpi.b %d1,&SNAN # weed out SNANs
8764 beq.l res_snan_1op
8765 cmpi.b %d1,&QNAN # weed out QNANs
8766 beq.l res_qnan_1op
8767
8768 #
8769 # do the fmove in; at this point, only possible ops are ZERO and INF.
8770 # use fmov to determine ccodes.
8771 # prec:mode should be zero at this point but it won't affect answer anyways.
8772 #
8773 fmov.x SRC(%a0),%fp0 # do fmove in
8774 fmov.l %fpsr,%d0 # no exceptions possible
8775 rol.l &0x8,%d0 # put ccodes in lo byte
8776 mov.b %d0,FPSR_CC(%a6) # insert correct ccodes
8777 rts
8778
8779 #########################################################################
8780 # XDEF **************************************************************** #
8781 # fdiv(): emulates the fdiv instruction #
8782 # fsdiv(): emulates the fsdiv instruction #
8783 # fddiv(): emulates the fddiv instruction #
8784 # #
8785 # XREF **************************************************************** #
8786 # scale_to_zero_src() - scale src exponent to zero #
8787 # scale_to_zero_dst() - scale dst exponent to zero #
8788 # unf_res() - return default underflow result #
8789 # ovf_res() - return default overflow result #
8790 # res_qnan() - return QNAN result #
8791 # res_snan() - return SNAN result #
8792 # #
8793 # INPUT *************************************************************** #
8794 # a0 = pointer to extended precision source operand #
8795 # a1 = pointer to extended precision destination operand #
8796 # d0 rnd prec,mode #
8797 # #
8798 # OUTPUT ************************************************************** #
8799 # fp0 = result #
8800 # fp1 = EXOP (if exception occurred) #
8801 # #
8802 # ALGORITHM *********************************************************** #
8803 # Handle NANs, infinities, and zeroes as special cases. Divide #
8804 # norms/denorms into ext/sgl/dbl precision. #
8805 # For norms/denorms, scale the exponents such that a divide #
8806 # instruction won't cause an exception. Use the regular fdiv to #
8807 # compute a result. Check if the regular operands would have taken #
8808 # an exception. If so, return the default overflow/underflow result #
8809 # and return the EXOP if exceptions are enabled. Else, scale the #
8810 # result operand to the proper exponent. #
8811 # #
8812 #########################################################################
8813
8814 align 0x10
8815 tbl_fdiv_unfl:
8816 long 0x3fff - 0x0000 # ext_unfl
8817 long 0x3fff - 0x3f81 # sgl_unfl
8818 long 0x3fff - 0x3c01 # dbl_unfl
8819
8820 tbl_fdiv_ovfl:
8821 long 0x3fff - 0x7ffe # ext overflow exponent
8822 long 0x3fff - 0x407e # sgl overflow exponent
8823 long 0x3fff - 0x43fe # dbl overflow exponent
8824
8825 global fsdiv
8826 fsdiv:
8827 andi.b &0x30,%d0 # clear rnd prec
8828 ori.b &s_mode*0x10,%d0 # insert sgl prec
8829 bra.b fdiv
8830
8831 global fddiv
8832 fddiv:
8833 andi.b &0x30,%d0 # clear rnd prec
8834 ori.b &d_mode*0x10,%d0 # insert dbl prec
8835
8836 global fdiv
8837 fdiv:
8838 mov.l %d0,L_SCR3(%a6) # store rnd info
8839
8840 clr.w %d1
8841 mov.b DTAG(%a6),%d1
8842 lsl.b &0x3,%d1
8843 or.b STAG(%a6),%d1 # combine src tags
8844
8845 bne.w fdiv_not_norm # optimize on non-norm input
8846
8847 #
8848 # DIVIDE: NORMs and DENORMs ONLY!
8849 #
8850 fdiv_norm:
8851 mov.w DST_EX(%a1),FP_SCR1_EX(%a6)
8852 mov.l DST_HI(%a1),FP_SCR1_HI(%a6)
8853 mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
8854
8855 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
8856 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
8857 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
8858
8859 bsr.l scale_to_zero_src # scale src exponent
8860 mov.l %d0,-(%sp) # save scale factor 1
8861
8862 bsr.l scale_to_zero_dst # scale dst exponent
8863
8864 neg.l (%sp) # SCALE FACTOR = scale1 - scale2
8865 add.l %d0,(%sp)
8866
8867 mov.w 2+L_SCR3(%a6),%d1 # fetch precision
8868 lsr.b &0x6,%d1 # shift to lo bits
8869 mov.l (%sp)+,%d0 # load S.F.
8870 cmp.l %d0,(tbl_fdiv_ovfl.b,%pc,%d1.w*4) # will result overflow?
8871 ble.w fdiv_may_ovfl # result will overflow
8872
8873 cmp.l %d0,(tbl_fdiv_unfl.w,%pc,%d1.w*4) # will result underflow?
8874 beq.w fdiv_may_unfl # maybe
8875 bgt.w fdiv_unfl # yes; go handle underflow
8876
8877 fdiv_normal:
8878 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
8879
8880 fmov.l L_SCR3(%a6),%fpcr # save FPCR
8881 fmov.l &0x0,%fpsr # clear FPSR
8882
8883 fdiv.x FP_SCR0(%a6),%fp0 # perform divide
8884
8885 fmov.l %fpsr,%d1 # save FPSR
8886 fmov.l &0x0,%fpcr # clear FPCR
8887
8888 or.l %d1,USER_FPSR(%a6) # save INEX2,N
8889
8890 fdiv_normal_exit:
8891 fmovm.x &0x80,FP_SCR0(%a6) # store result on stack
8892 mov.l %d2,-(%sp) # store d2
8893 mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp}
8894 mov.l %d1,%d2 # make a copy
8895 andi.l &0x7fff,%d1 # strip sign
8896 andi.w &0x8000,%d2 # keep old sign
8897 sub.l %d0,%d1 # add scale factor
8898 or.w %d2,%d1 # concat old sign,new exp
8899 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
8900 mov.l (%sp)+,%d2 # restore d2
8901 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
8902 rts
8903
8904 tbl_fdiv_ovfl2:
8905 long 0x7fff
8906 long 0x407f
8907 long 0x43ff
8908
8909 fdiv_no_ovfl:
8910 mov.l (%sp)+,%d0 # restore scale factor
8911 bra.b fdiv_normal_exit
8912
8913 fdiv_may_ovfl:
8914 mov.l %d0,-(%sp) # save scale factor
8915
8916 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
8917
8918 fmov.l L_SCR3(%a6),%fpcr # set FPCR
8919 fmov.l &0x0,%fpsr # set FPSR
8920
8921 fdiv.x FP_SCR0(%a6),%fp0 # execute divide
8922
8923 fmov.l %fpsr,%d0
8924 fmov.l &0x0,%fpcr
8925
8926 or.l %d0,USER_FPSR(%a6) # save INEX,N
8927
8928 fmovm.x &0x01,-(%sp) # save result to stack
8929 mov.w (%sp),%d0 # fetch new exponent
8930 add.l &0xc,%sp # clear result from stack
8931 andi.l &0x7fff,%d0 # strip sign
8932 sub.l (%sp),%d0 # add scale factor
8933 cmp.l %d0,(tbl_fdiv_ovfl2.b,%pc,%d1.w*4)
8934 blt.b fdiv_no_ovfl
8935 mov.l (%sp)+,%d0
8936
8937 fdiv_ovfl_tst:
8938 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
8939
8940 mov.b FPCR_ENABLE(%a6),%d1
8941 andi.b &0x13,%d1 # is OVFL or INEX enabled?
8942 bne.b fdiv_ovfl_ena # yes
8943
8944 fdiv_ovfl_dis:
8945 btst &neg_bit,FPSR_CC(%a6) # is result negative?
8946 sne %d1 # set sign param accordingly
8947 mov.l L_SCR3(%a6),%d0 # pass prec:rnd
8948 bsr.l ovf_res # calculate default result
8949 or.b %d0,FPSR_CC(%a6) # set INF if applicable
8950 fmovm.x (%a0),&0x80 # return default result in fp0
8951 rts
8952
8953 fdiv_ovfl_ena:
8954 mov.l L_SCR3(%a6),%d1
8955 andi.b &0xc0,%d1 # is precision extended?
8956 bne.b fdiv_ovfl_ena_sd # no, do sgl or dbl
8957
8958 fdiv_ovfl_ena_cont:
8959 fmovm.x &0x80,FP_SCR0(%a6) # move result to stack
8960
8961 mov.l %d2,-(%sp) # save d2
8962 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
8963 mov.w %d1,%d2 # make a copy
8964 andi.l &0x7fff,%d1 # strip sign
8965 sub.l %d0,%d1 # add scale factor
8966 subi.l &0x6000,%d1 # subtract bias
8967 andi.w &0x7fff,%d1 # clear sign bit
8968 andi.w &0x8000,%d2 # keep old sign
8969 or.w %d2,%d1 # concat old sign,new exp
8970 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
8971 mov.l (%sp)+,%d2 # restore d2
8972 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
8973 bra.b fdiv_ovfl_dis
8974
8975 fdiv_ovfl_ena_sd:
8976 fmovm.x FP_SCR1(%a6),&0x80 # load dst operand
8977
8978 mov.l L_SCR3(%a6),%d1
8979 andi.b &0x30,%d1 # keep rnd mode
8980 fmov.l %d1,%fpcr # set FPCR
8981
8982 fdiv.x FP_SCR0(%a6),%fp0 # execute divide
8983
8984 fmov.l &0x0,%fpcr # clear FPCR
8985 bra.b fdiv_ovfl_ena_cont
8986
8987 fdiv_unfl:
8988 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
8989
8990 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
8991
8992 fmov.l &rz_mode*0x10,%fpcr # set FPCR
8993 fmov.l &0x0,%fpsr # clear FPSR
8994
8995 fdiv.x FP_SCR0(%a6),%fp0 # execute divide
8996
8997 fmov.l %fpsr,%d1 # save status
8998 fmov.l &0x0,%fpcr # clear FPCR
8999
9000 or.l %d1,USER_FPSR(%a6) # save INEX2,N
9001
9002 mov.b FPCR_ENABLE(%a6),%d1
9003 andi.b &0x0b,%d1 # is UNFL or INEX enabled?
9004 bne.b fdiv_unfl_ena # yes
9005
9006 fdiv_unfl_dis:
9007 fmovm.x &0x80,FP_SCR0(%a6) # store out result
9008
9009 lea FP_SCR0(%a6),%a0 # pass: result addr
9010 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
9011 bsr.l unf_res # calculate default result
9012 or.b %d0,FPSR_CC(%a6) # 'Z' may have been set
9013 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
9014 rts
9015
9016 #
9017 # UNFL is enabled.
9018 #
9019 fdiv_unfl_ena:
9020 fmovm.x FP_SCR1(%a6),&0x40 # load dst op
9021
9022 mov.l L_SCR3(%a6),%d1
9023 andi.b &0xc0,%d1 # is precision extended?
9024 bne.b fdiv_unfl_ena_sd # no, sgl or dbl
9025
9026 fmov.l L_SCR3(%a6),%fpcr # set FPCR
9027
9028 fdiv_unfl_ena_cont:
9029 fmov.l &0x0,%fpsr # clear FPSR
9030
9031 fdiv.x FP_SCR0(%a6),%fp1 # execute divide
9032
9033 fmov.l &0x0,%fpcr # clear FPCR
9034
9035 fmovm.x &0x40,FP_SCR0(%a6) # save result to stack
9036 mov.l %d2,-(%sp) # save d2
9037 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
9038 mov.l %d1,%d2 # make a copy
9039 andi.l &0x7fff,%d1 # strip sign
9040 andi.w &0x8000,%d2 # keep old sign
9041 sub.l %d0,%d1 # add scale factoer
9042 addi.l &0x6000,%d1 # add bias
9043 andi.w &0x7fff,%d1
9044 or.w %d2,%d1 # concat old sign,new exp
9045 mov.w %d1,FP_SCR0_EX(%a6) # insert new exp
9046 mov.l (%sp)+,%d2 # restore d2
9047 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
9048 bra.w fdiv_unfl_dis
9049
9050 fdiv_unfl_ena_sd:
9051 mov.l L_SCR3(%a6),%d1
9052 andi.b &0x30,%d1 # use only rnd mode
9053 fmov.l %d1,%fpcr # set FPCR
9054
9055 bra.b fdiv_unfl_ena_cont
9056
9057 #
9058 # the divide operation MAY underflow:
9059 #
9060 fdiv_may_unfl:
9061 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
9062
9063 fmov.l L_SCR3(%a6),%fpcr # set FPCR
9064 fmov.l &0x0,%fpsr # clear FPSR
9065
9066 fdiv.x FP_SCR0(%a6),%fp0 # execute divide
9067
9068 fmov.l %fpsr,%d1 # save status
9069 fmov.l &0x0,%fpcr # clear FPCR
9070
9071 or.l %d1,USER_FPSR(%a6) # save INEX2,N
9072
9073 fabs.x %fp0,%fp1 # make a copy of result
9074 fcmp.b %fp1,&0x1 # is |result| > 1.b?
9075 fbgt.w fdiv_normal_exit # no; no underflow occurred
9076 fblt.w fdiv_unfl # yes; underflow occurred
9077
9078 #
9079 # we still don't know if underflow occurred. result is ~ equal to 1. but,
9080 # we don't know if the result was an underflow that rounded up to a 1
9081 # or a normalized number that rounded down to a 1. so, redo the entire
9082 # operation using RZ as the rounding mode to see what the pre-rounded
9083 # result is. this case should be relatively rare.
9084 #
9085 fmovm.x FP_SCR1(%a6),&0x40 # load dst op into fp1
9086
9087 mov.l L_SCR3(%a6),%d1
9088 andi.b &0xc0,%d1 # keep rnd prec
9089 ori.b &rz_mode*0x10,%d1 # insert RZ
9090
9091 fmov.l %d1,%fpcr # set FPCR
9092 fmov.l &0x0,%fpsr # clear FPSR
9093
9094 fdiv.x FP_SCR0(%a6),%fp1 # execute divide
9095
9096 fmov.l &0x0,%fpcr # clear FPCR
9097 fabs.x %fp1 # make absolute value
9098 fcmp.b %fp1,&0x1 # is |result| < 1.b?
9099 fbge.w fdiv_normal_exit # no; no underflow occurred
9100 bra.w fdiv_unfl # yes; underflow occurred
9101
9102 ############################################################################
9103
9104 #
9105 # Divide: inputs are not both normalized; what are they?
9106 #
9107 fdiv_not_norm:
9108 mov.w (tbl_fdiv_op.b,%pc,%d1.w*2),%d1
9109 jmp (tbl_fdiv_op.b,%pc,%d1.w*1)
9110
9111 swbeg &48
9112 tbl_fdiv_op:
9113 short fdiv_norm - tbl_fdiv_op # NORM / NORM
9114 short fdiv_inf_load - tbl_fdiv_op # NORM / ZERO
9115 short fdiv_zero_load - tbl_fdiv_op # NORM / INF
9116 short fdiv_res_qnan - tbl_fdiv_op # NORM / QNAN
9117 short fdiv_norm - tbl_fdiv_op # NORM / DENORM
9118 short fdiv_res_snan - tbl_fdiv_op # NORM / SNAN
9119 short tbl_fdiv_op - tbl_fdiv_op #
9120 short tbl_fdiv_op - tbl_fdiv_op #
9121
9122 short fdiv_zero_load - tbl_fdiv_op # ZERO / NORM
9123 short fdiv_res_operr - tbl_fdiv_op # ZERO / ZERO
9124 short fdiv_zero_load - tbl_fdiv_op # ZERO / INF
9125 short fdiv_res_qnan - tbl_fdiv_op # ZERO / QNAN
9126 short fdiv_zero_load - tbl_fdiv_op # ZERO / DENORM
9127 short fdiv_res_snan - tbl_fdiv_op # ZERO / SNAN
9128 short tbl_fdiv_op - tbl_fdiv_op #
9129 short tbl_fdiv_op - tbl_fdiv_op #
9130
9131 short fdiv_inf_dst - tbl_fdiv_op # INF / NORM
9132 short fdiv_inf_dst - tbl_fdiv_op # INF / ZERO
9133 short fdiv_res_operr - tbl_fdiv_op # INF / INF
9134 short fdiv_res_qnan - tbl_fdiv_op # INF / QNAN
9135 short fdiv_inf_dst - tbl_fdiv_op # INF / DENORM
9136 short fdiv_res_snan - tbl_fdiv_op # INF / SNAN
9137 short tbl_fdiv_op - tbl_fdiv_op #
9138 short tbl_fdiv_op - tbl_fdiv_op #
9139
9140 short fdiv_res_qnan - tbl_fdiv_op # QNAN / NORM
9141 short fdiv_res_qnan - tbl_fdiv_op # QNAN / ZERO
9142 short fdiv_res_qnan - tbl_fdiv_op # QNAN / INF
9143 short fdiv_res_qnan - tbl_fdiv_op # QNAN / QNAN
9144 short fdiv_res_qnan - tbl_fdiv_op # QNAN / DENORM
9145 short fdiv_res_snan - tbl_fdiv_op # QNAN / SNAN
9146 short tbl_fdiv_op - tbl_fdiv_op #
9147 short tbl_fdiv_op - tbl_fdiv_op #
9148
9149 short fdiv_norm - tbl_fdiv_op # DENORM / NORM
9150 short fdiv_inf_load - tbl_fdiv_op # DENORM / ZERO
9151 short fdiv_zero_load - tbl_fdiv_op # DENORM / INF
9152 short fdiv_res_qnan - tbl_fdiv_op # DENORM / QNAN
9153 short fdiv_norm - tbl_fdiv_op # DENORM / DENORM
9154 short fdiv_res_snan - tbl_fdiv_op # DENORM / SNAN
9155 short tbl_fdiv_op - tbl_fdiv_op #
9156 short tbl_fdiv_op - tbl_fdiv_op #
9157
9158 short fdiv_res_snan - tbl_fdiv_op # SNAN / NORM
9159 short fdiv_res_snan - tbl_fdiv_op # SNAN / ZERO
9160 short fdiv_res_snan - tbl_fdiv_op # SNAN / INF
9161 short fdiv_res_snan - tbl_fdiv_op # SNAN / QNAN
9162 short fdiv_res_snan - tbl_fdiv_op # SNAN / DENORM
9163 short fdiv_res_snan - tbl_fdiv_op # SNAN / SNAN
9164 short tbl_fdiv_op - tbl_fdiv_op #
9165 short tbl_fdiv_op - tbl_fdiv_op #
9166
9167 fdiv_res_qnan:
9168 bra.l res_qnan
9169 fdiv_res_snan:
9170 bra.l res_snan
9171 fdiv_res_operr:
9172 bra.l res_operr
9173
9174 global fdiv_zero_load # global for fsgldiv
9175 fdiv_zero_load:
9176 mov.b SRC_EX(%a0),%d0 # result sign is exclusive
9177 mov.b DST_EX(%a1),%d1 # or of input signs.
9178 eor.b %d0,%d1
9179 bpl.b fdiv_zero_load_p # result is positive
9180 fmov.s &0x80000000,%fp0 # load a -ZERO
9181 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/N
9182 rts
9183 fdiv_zero_load_p:
9184 fmov.s &0x00000000,%fp0 # load a +ZERO
9185 mov.b &z_bmask,FPSR_CC(%a6) # set Z
9186 rts
9187
9188 #
9189 # The destination was In Range and the source was a ZERO. The result,
9190 # therefore, is an INF w/ the proper sign.
9191 # So, determine the sign and return a new INF (w/ the j-bit cleared).
9192 #
9193 global fdiv_inf_load # global for fsgldiv
9194 fdiv_inf_load:
9195 ori.w &dz_mask+adz_mask,2+USER_FPSR(%a6) # no; set DZ/ADZ
9196 mov.b SRC_EX(%a0),%d0 # load both signs
9197 mov.b DST_EX(%a1),%d1
9198 eor.b %d0,%d1
9199 bpl.b fdiv_inf_load_p # result is positive
9200 fmov.s &0xff800000,%fp0 # make result -INF
9201 mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/N
9202 rts
9203 fdiv_inf_load_p:
9204 fmov.s &0x7f800000,%fp0 # make result +INF
9205 mov.b &inf_bmask,FPSR_CC(%a6) # set INF
9206 rts
9207
9208 #
9209 # The destination was an INF w/ an In Range or ZERO source, the result is
9210 # an INF w/ the proper sign.
9211 # The 68881/882 returns the destination INF w/ the new sign(if the j-bit of the
9212 # dst INF is set, then then j-bit of the result INF is also set).
9213 #
9214 global fdiv_inf_dst # global for fsgldiv
9215 fdiv_inf_dst:
9216 mov.b DST_EX(%a1),%d0 # load both signs
9217 mov.b SRC_EX(%a0),%d1
9218 eor.b %d0,%d1
9219 bpl.b fdiv_inf_dst_p # result is positive
9220
9221 fmovm.x DST(%a1),&0x80 # return result in fp0
9222 fabs.x %fp0 # clear sign bit
9223 fneg.x %fp0 # set sign bit
9224 mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/NEG
9225 rts
9226
9227 fdiv_inf_dst_p:
9228 fmovm.x DST(%a1),&0x80 # return result in fp0
9229 fabs.x %fp0 # return positive INF
9230 mov.b &inf_bmask,FPSR_CC(%a6) # set INF
9231 rts
9232
9233 #########################################################################
9234 # XDEF **************************************************************** #
9235 # fneg(): emulates the fneg instruction #
9236 # fsneg(): emulates the fsneg instruction #
9237 # fdneg(): emulates the fdneg instruction #
9238 # #
9239 # XREF **************************************************************** #
9240 # norm() - normalize a denorm to provide EXOP #
9241 # scale_to_zero_src() - scale sgl/dbl source exponent #
9242 # ovf_res() - return default overflow result #
9243 # unf_res() - return default underflow result #
9244 # res_qnan_1op() - return QNAN result #
9245 # res_snan_1op() - return SNAN result #
9246 # #
9247 # INPUT *************************************************************** #
9248 # a0 = pointer to extended precision source operand #
9249 # d0 = rnd prec,mode #
9250 # #
9251 # OUTPUT ************************************************************** #
9252 # fp0 = result #
9253 # fp1 = EXOP (if exception occurred) #
9254 # #
9255 # ALGORITHM *********************************************************** #
9256 # Handle NANs, zeroes, and infinities as special cases. Separate #
9257 # norms/denorms into ext/sgl/dbl precisions. Extended precision can be #
9258 # emulated by simply setting sign bit. Sgl/dbl operands must be scaled #
9259 # and an actual fneg performed to see if overflow/underflow would have #
9260 # occurred. If so, return default underflow/overflow result. Else, #
9261 # scale the result exponent and return result. FPSR gets set based on #
9262 # the result value. #
9263 # #
9264 #########################################################################
9265
9266 global fsneg
9267 fsneg:
9268 andi.b &0x30,%d0 # clear rnd prec
9269 ori.b &s_mode*0x10,%d0 # insert sgl precision
9270 bra.b fneg
9271
9272 global fdneg
9273 fdneg:
9274 andi.b &0x30,%d0 # clear rnd prec
9275 ori.b &d_mode*0x10,%d0 # insert dbl prec
9276
9277 global fneg
9278 fneg:
9279 mov.l %d0,L_SCR3(%a6) # store rnd info
9280 mov.b STAG(%a6),%d1
9281 bne.w fneg_not_norm # optimize on non-norm input
9282
9283 #
9284 # NEGATE SIGN : norms and denorms ONLY!
9285 #
9286 fneg_norm:
9287 andi.b &0xc0,%d0 # is precision extended?
9288 bne.w fneg_not_ext # no; go handle sgl or dbl
9289
9290 #
9291 # precision selected is extended. so...we can not get an underflow
9292 # or overflow because of rounding to the correct precision. so...
9293 # skip the scaling and unscaling...
9294 #
9295 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
9296 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
9297 mov.w SRC_EX(%a0),%d0
9298 eori.w &0x8000,%d0 # negate sign
9299 bpl.b fneg_norm_load # sign is positive
9300 mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
9301 fneg_norm_load:
9302 mov.w %d0,FP_SCR0_EX(%a6)
9303 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
9304 rts
9305
9306 #
9307 # for an extended precision DENORM, the UNFL exception bit is set
9308 # the accrued bit is NOT set in this instance(no inexactness!)
9309 #
9310 fneg_denorm:
9311 andi.b &0xc0,%d0 # is precision extended?
9312 bne.b fneg_not_ext # no; go handle sgl or dbl
9313
9314 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
9315
9316 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
9317 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
9318 mov.w SRC_EX(%a0),%d0
9319 eori.w &0x8000,%d0 # negate sign
9320 bpl.b fneg_denorm_done # no
9321 mov.b &neg_bmask,FPSR_CC(%a6) # yes, set 'N' ccode bit
9322 fneg_denorm_done:
9323 mov.w %d0,FP_SCR0_EX(%a6)
9324 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
9325
9326 btst &unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
9327 bne.b fneg_ext_unfl_ena # yes
9328 rts
9329
9330 #
9331 # the input is an extended DENORM and underflow is enabled in the FPCR.
9332 # normalize the mantissa and add the bias of 0x6000 to the resulting negative
9333 # exponent and insert back into the operand.
9334 #
9335 fneg_ext_unfl_ena:
9336 lea FP_SCR0(%a6),%a0 # pass: ptr to operand
9337 bsr.l norm # normalize result
9338 neg.w %d0 # new exponent = -(shft val)
9339 addi.w &0x6000,%d0 # add new bias to exponent
9340 mov.w FP_SCR0_EX(%a6),%d1 # fetch old sign,exp
9341 andi.w &0x8000,%d1 # keep old sign
9342 andi.w &0x7fff,%d0 # clear sign position
9343 or.w %d1,%d0 # concat old sign, new exponent
9344 mov.w %d0,FP_SCR0_EX(%a6) # insert new exponent
9345 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
9346 rts
9347
9348 #
9349 # operand is either single or double
9350 #
9351 fneg_not_ext:
9352 cmpi.b %d0,&s_mode*0x10 # separate sgl/dbl prec
9353 bne.b fneg_dbl
9354
9355 #
9356 # operand is to be rounded to single precision
9357 #
9358 fneg_sgl:
9359 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
9360 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
9361 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
9362 bsr.l scale_to_zero_src # calculate scale factor
9363
9364 cmpi.l %d0,&0x3fff-0x3f80 # will move in underflow?
9365 bge.w fneg_sd_unfl # yes; go handle underflow
9366 cmpi.l %d0,&0x3fff-0x407e # will move in overflow?
9367 beq.w fneg_sd_may_ovfl # maybe; go check
9368 blt.w fneg_sd_ovfl # yes; go handle overflow
9369
9370 #
9371 # operand will NOT overflow or underflow when moved in to the fp reg file
9372 #
9373 fneg_sd_normal:
9374 fmov.l &0x0,%fpsr # clear FPSR
9375 fmov.l L_SCR3(%a6),%fpcr # set FPCR
9376
9377 fneg.x FP_SCR0(%a6),%fp0 # perform negation
9378
9379 fmov.l %fpsr,%d1 # save FPSR
9380 fmov.l &0x0,%fpcr # clear FPCR
9381
9382 or.l %d1,USER_FPSR(%a6) # save INEX2,N
9383
9384 fneg_sd_normal_exit:
9385 mov.l %d2,-(%sp) # save d2
9386 fmovm.x &0x80,FP_SCR0(%a6) # store out result
9387 mov.w FP_SCR0_EX(%a6),%d1 # load sgn,exp
9388 mov.w %d1,%d2 # make a copy
9389 andi.l &0x7fff,%d1 # strip sign
9390 sub.l %d0,%d1 # add scale factor
9391 andi.w &0x8000,%d2 # keep old sign
9392 or.w %d1,%d2 # concat old sign,new exp
9393 mov.w %d2,FP_SCR0_EX(%a6) # insert new exponent
9394 mov.l (%sp)+,%d2 # restore d2
9395 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
9396 rts
9397
9398 #
9399 # operand is to be rounded to double precision
9400 #
9401 fneg_dbl:
9402 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
9403 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
9404 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
9405 bsr.l scale_to_zero_src # calculate scale factor
9406
9407 cmpi.l %d0,&0x3fff-0x3c00 # will move in underflow?
9408 bge.b fneg_sd_unfl # yes; go handle underflow
9409 cmpi.l %d0,&0x3fff-0x43fe # will move in overflow?
9410 beq.w fneg_sd_may_ovfl # maybe; go check
9411 blt.w fneg_sd_ovfl # yes; go handle overflow
9412 bra.w fneg_sd_normal # no; ho handle normalized op
9413
9414 #
9415 # operand WILL underflow when moved in to the fp register file
9416 #
9417 fneg_sd_unfl:
9418 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
9419
9420 eori.b &0x80,FP_SCR0_EX(%a6) # negate sign
9421 bpl.b fneg_sd_unfl_tst
9422 bset &neg_bit,FPSR_CC(%a6) # set 'N' ccode bit
9423
9424 # if underflow or inexact is enabled, go calculate EXOP first.
9425 fneg_sd_unfl_tst:
9426 mov.b FPCR_ENABLE(%a6),%d1
9427 andi.b &0x0b,%d1 # is UNFL or INEX enabled?
9428 bne.b fneg_sd_unfl_ena # yes
9429
9430 fneg_sd_unfl_dis:
9431 lea FP_SCR0(%a6),%a0 # pass: result addr
9432 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
9433 bsr.l unf_res # calculate default result
9434 or.b %d0,FPSR_CC(%a6) # unf_res may have set 'Z'
9435 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
9436 rts
9437
9438 #
9439 # operand will underflow AND underflow is enabled.
9440 # therefore, we must return the result rounded to extended precision.
9441 #
9442 fneg_sd_unfl_ena:
9443 mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
9444 mov.l FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
9445 mov.w FP_SCR0_EX(%a6),%d1 # load current exponent
9446
9447 mov.l %d2,-(%sp) # save d2
9448 mov.l %d1,%d2 # make a copy
9449 andi.l &0x7fff,%d1 # strip sign
9450 andi.w &0x8000,%d2 # keep old sign
9451 sub.l %d0,%d1 # subtract scale factor
9452 addi.l &0x6000,%d1 # add new bias
9453 andi.w &0x7fff,%d1
9454 or.w %d2,%d1 # concat new sign,new exp
9455 mov.w %d1,FP_SCR1_EX(%a6) # insert new exp
9456 fmovm.x FP_SCR1(%a6),&0x40 # return EXOP in fp1
9457 mov.l (%sp)+,%d2 # restore d2
9458 bra.b fneg_sd_unfl_dis
9459
9460 #
9461 # operand WILL overflow.
9462 #
9463 fneg_sd_ovfl:
9464 fmov.l &0x0,%fpsr # clear FPSR
9465 fmov.l L_SCR3(%a6),%fpcr # set FPCR
9466
9467 fneg.x FP_SCR0(%a6),%fp0 # perform negation
9468
9469 fmov.l &0x0,%fpcr # clear FPCR
9470 fmov.l %fpsr,%d1 # save FPSR
9471
9472 or.l %d1,USER_FPSR(%a6) # save INEX2,N
9473
9474 fneg_sd_ovfl_tst:
9475 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
9476
9477 mov.b FPCR_ENABLE(%a6),%d1
9478 andi.b &0x13,%d1 # is OVFL or INEX enabled?
9479 bne.b fneg_sd_ovfl_ena # yes
9480
9481 #
9482 # OVFL is not enabled; therefore, we must create the default result by
9483 # calling ovf_res().
9484 #
9485 fneg_sd_ovfl_dis:
9486 btst &neg_bit,FPSR_CC(%a6) # is result negative?
9487 sne %d1 # set sign param accordingly
9488 mov.l L_SCR3(%a6),%d0 # pass: prec,mode
9489 bsr.l ovf_res # calculate default result
9490 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
9491 fmovm.x (%a0),&0x80 # return default result in fp0
9492 rts
9493
9494 #
9495 # OVFL is enabled.
9496 # the INEX2 bit has already been updated by the round to the correct precision.
9497 # now, round to extended(and don't alter the FPSR).
9498 #
9499 fneg_sd_ovfl_ena:
9500 mov.l %d2,-(%sp) # save d2
9501 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
9502 mov.l %d1,%d2 # make a copy
9503 andi.l &0x7fff,%d1 # strip sign
9504 andi.w &0x8000,%d2 # keep old sign
9505 sub.l %d0,%d1 # add scale factor
9506 subi.l &0x6000,%d1 # subtract bias
9507 andi.w &0x7fff,%d1
9508 or.w %d2,%d1 # concat sign,exp
9509 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
9510 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
9511 mov.l (%sp)+,%d2 # restore d2
9512 bra.b fneg_sd_ovfl_dis
9513
9514 #
9515 # the move in MAY underflow. so...
9516 #
9517 fneg_sd_may_ovfl:
9518 fmov.l &0x0,%fpsr # clear FPSR
9519 fmov.l L_SCR3(%a6),%fpcr # set FPCR
9520
9521 fneg.x FP_SCR0(%a6),%fp0 # perform negation
9522
9523 fmov.l %fpsr,%d1 # save status
9524 fmov.l &0x0,%fpcr # clear FPCR
9525
9526 or.l %d1,USER_FPSR(%a6) # save INEX2,N
9527
9528 fabs.x %fp0,%fp1 # make a copy of result
9529 fcmp.b %fp1,&0x2 # is |result| >= 2.b?
9530 fbge.w fneg_sd_ovfl_tst # yes; overflow has occurred
9531
9532 # no, it didn't overflow; we have correct result
9533 bra.w fneg_sd_normal_exit
9534
9535 ##########################################################################
9536
9537 #
9538 # input is not normalized; what is it?
9539 #
9540 fneg_not_norm:
9541 cmpi.b %d1,&DENORM # weed out DENORM
9542 beq.w fneg_denorm
9543 cmpi.b %d1,&SNAN # weed out SNAN
9544 beq.l res_snan_1op
9545 cmpi.b %d1,&QNAN # weed out QNAN
9546 beq.l res_qnan_1op
9547
9548 #
9549 # do the fneg; at this point, only possible ops are ZERO and INF.
9550 # use fneg to determine ccodes.
9551 # prec:mode should be zero at this point but it won't affect answer anyways.
9552 #
9553 fneg.x SRC_EX(%a0),%fp0 # do fneg
9554 fmov.l %fpsr,%d0
9555 rol.l &0x8,%d0 # put ccodes in lo byte
9556 mov.b %d0,FPSR_CC(%a6) # insert correct ccodes
9557 rts
9558
9559 #########################################################################
9560 # XDEF **************************************************************** #
9561 # ftst(): emulates the ftest instruction #
9562 # #
9563 # XREF **************************************************************** #
9564 # res{s,q}nan_1op() - set NAN result for monadic instruction #
9565 # #
9566 # INPUT *************************************************************** #
9567 # a0 = pointer to extended precision source operand #
9568 # #
9569 # OUTPUT ************************************************************** #
9570 # none #
9571 # #
9572 # ALGORITHM *********************************************************** #
9573 # Check the source operand tag (STAG) and set the FPCR according #
9574 # to the operand type and sign. #
9575 # #
9576 #########################################################################
9577
9578 global ftst
9579 ftst:
9580 mov.b STAG(%a6),%d1
9581 bne.b ftst_not_norm # optimize on non-norm input
9582
9583 #
9584 # Norm:
9585 #
9586 ftst_norm:
9587 tst.b SRC_EX(%a0) # is operand negative?
9588 bmi.b ftst_norm_m # yes
9589 rts
9590 ftst_norm_m:
9591 mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
9592 rts
9593
9594 #
9595 # input is not normalized; what is it?
9596 #
9597 ftst_not_norm:
9598 cmpi.b %d1,&ZERO # weed out ZERO
9599 beq.b ftst_zero
9600 cmpi.b %d1,&INF # weed out INF
9601 beq.b ftst_inf
9602 cmpi.b %d1,&SNAN # weed out SNAN
9603 beq.l res_snan_1op
9604 cmpi.b %d1,&QNAN # weed out QNAN
9605 beq.l res_qnan_1op
9606
9607 #
9608 # Denorm:
9609 #
9610 ftst_denorm:
9611 tst.b SRC_EX(%a0) # is operand negative?
9612 bmi.b ftst_denorm_m # yes
9613 rts
9614 ftst_denorm_m:
9615 mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
9616 rts
9617
9618 #
9619 # Infinity:
9620 #
9621 ftst_inf:
9622 tst.b SRC_EX(%a0) # is operand negative?
9623 bmi.b ftst_inf_m # yes
9624 ftst_inf_p:
9625 mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
9626 rts
9627 ftst_inf_m:
9628 mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'I','N' ccode bits
9629 rts
9630
9631 #
9632 # Zero:
9633 #
9634 ftst_zero:
9635 tst.b SRC_EX(%a0) # is operand negative?
9636 bmi.b ftst_zero_m # yes
9637 ftst_zero_p:
9638 mov.b &z_bmask,FPSR_CC(%a6) # set 'N' ccode bit
9639 rts
9640 ftst_zero_m:
9641 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
9642 rts
9643
9644 #########################################################################
9645 # XDEF **************************************************************** #
9646 # fint(): emulates the fint instruction #
9647 # #
9648 # XREF **************************************************************** #
9649 # res_{s,q}nan_1op() - set NAN result for monadic operation #
9650 # #
9651 # INPUT *************************************************************** #
9652 # a0 = pointer to extended precision source operand #
9653 # d0 = round precision/mode #
9654 # #
9655 # OUTPUT ************************************************************** #
9656 # fp0 = result #
9657 # #
9658 # ALGORITHM *********************************************************** #
9659 # Separate according to operand type. Unnorms don't pass through #
9660 # here. For norms, load the rounding mode/prec, execute a "fint", then #
9661 # store the resulting FPSR bits. #
9662 # For denorms, force the j-bit to a one and do the same as for #
9663 # norms. Denorms are so low that the answer will either be a zero or a #
9664 # one. #
9665 # For zeroes/infs/NANs, return the same while setting the FPSR #
9666 # as appropriate. #
9667 # #
9668 #########################################################################
9669
9670 global fint
9671 fint:
9672 mov.b STAG(%a6),%d1
9673 bne.b fint_not_norm # optimize on non-norm input
9674
9675 #
9676 # Norm:
9677 #
9678 fint_norm:
9679 andi.b &0x30,%d0 # set prec = ext
9680
9681 fmov.l %d0,%fpcr # set FPCR
9682 fmov.l &0x0,%fpsr # clear FPSR
9683
9684 fint.x SRC(%a0),%fp0 # execute fint
9685
9686 fmov.l &0x0,%fpcr # clear FPCR
9687 fmov.l %fpsr,%d0 # save FPSR
9688 or.l %d0,USER_FPSR(%a6) # set exception bits
9689
9690 rts
9691
9692 #
9693 # input is not normalized; what is it?
9694 #
9695 fint_not_norm:
9696 cmpi.b %d1,&ZERO # weed out ZERO
9697 beq.b fint_zero
9698 cmpi.b %d1,&INF # weed out INF
9699 beq.b fint_inf
9700 cmpi.b %d1,&DENORM # weed out DENORM
9701 beq.b fint_denorm
9702 cmpi.b %d1,&SNAN # weed out SNAN
9703 beq.l res_snan_1op
9704 bra.l res_qnan_1op # weed out QNAN
9705
9706 #
9707 # Denorm:
9708 #
9709 # for DENORMs, the result will be either (+/-)ZERO or (+/-)1.
9710 # also, the INEX2 and AINEX exception bits will be set.
9711 # so, we could either set these manually or force the DENORM
9712 # to a very small NORM and ship it to the NORM routine.
9713 # I do the latter.
9714 #
9715 fint_denorm:
9716 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) # copy sign, zero exp
9717 mov.b &0x80,FP_SCR0_HI(%a6) # force DENORM ==> small NORM
9718 lea FP_SCR0(%a6),%a0
9719 bra.b fint_norm
9720
9721 #
9722 # Zero:
9723 #
9724 fint_zero:
9725 tst.b SRC_EX(%a0) # is ZERO negative?
9726 bmi.b fint_zero_m # yes
9727 fint_zero_p:
9728 fmov.s &0x00000000,%fp0 # return +ZERO in fp0
9729 mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
9730 rts
9731 fint_zero_m:
9732 fmov.s &0x80000000,%fp0 # return -ZERO in fp0
9733 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
9734 rts
9735
9736 #
9737 # Infinity:
9738 #
9739 fint_inf:
9740 fmovm.x SRC(%a0),&0x80 # return result in fp0
9741 tst.b SRC_EX(%a0) # is INF negative?
9742 bmi.b fint_inf_m # yes
9743 fint_inf_p:
9744 mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
9745 rts
9746 fint_inf_m:
9747 mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
9748 rts
9749
9750 #########################################################################
9751 # XDEF **************************************************************** #
9752 # fintrz(): emulates the fintrz instruction #
9753 # #
9754 # XREF **************************************************************** #
9755 # res_{s,q}nan_1op() - set NAN result for monadic operation #
9756 # #
9757 # INPUT *************************************************************** #
9758 # a0 = pointer to extended precision source operand #
9759 # d0 = round precision/mode #
9760 # #
9761 # OUTPUT ************************************************************** #
9762 # fp0 = result #
9763 # #
9764 # ALGORITHM *********************************************************** #
9765 # Separate according to operand type. Unnorms don't pass through #
9766 # here. For norms, load the rounding mode/prec, execute a "fintrz", #
9767 # then store the resulting FPSR bits. #
9768 # For denorms, force the j-bit to a one and do the same as for #
9769 # norms. Denorms are so low that the answer will either be a zero or a #
9770 # one. #
9771 # For zeroes/infs/NANs, return the same while setting the FPSR #
9772 # as appropriate. #
9773 # #
9774 #########################################################################
9775
9776 global fintrz
9777 fintrz:
9778 mov.b STAG(%a6),%d1
9779 bne.b fintrz_not_norm # optimize on non-norm input
9780
9781 #
9782 # Norm:
9783 #
9784 fintrz_norm:
9785 fmov.l &0x0,%fpsr # clear FPSR
9786
9787 fintrz.x SRC(%a0),%fp0 # execute fintrz
9788
9789 fmov.l %fpsr,%d0 # save FPSR
9790 or.l %d0,USER_FPSR(%a6) # set exception bits
9791
9792 rts
9793
9794 #
9795 # input is not normalized; what is it?
9796 #
9797 fintrz_not_norm:
9798 cmpi.b %d1,&ZERO # weed out ZERO
9799 beq.b fintrz_zero
9800 cmpi.b %d1,&INF # weed out INF
9801 beq.b fintrz_inf
9802 cmpi.b %d1,&DENORM # weed out DENORM
9803 beq.b fintrz_denorm
9804 cmpi.b %d1,&SNAN # weed out SNAN
9805 beq.l res_snan_1op
9806 bra.l res_qnan_1op # weed out QNAN
9807
9808 #
9809 # Denorm:
9810 #
9811 # for DENORMs, the result will be (+/-)ZERO.
9812 # also, the INEX2 and AINEX exception bits will be set.
9813 # so, we could either set these manually or force the DENORM
9814 # to a very small NORM and ship it to the NORM routine.
9815 # I do the latter.
9816 #
9817 fintrz_denorm:
9818 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) # copy sign, zero exp
9819 mov.b &0x80,FP_SCR0_HI(%a6) # force DENORM ==> small NORM
9820 lea FP_SCR0(%a6),%a0
9821 bra.b fintrz_norm
9822
9823 #
9824 # Zero:
9825 #
9826 fintrz_zero:
9827 tst.b SRC_EX(%a0) # is ZERO negative?
9828 bmi.b fintrz_zero_m # yes
9829 fintrz_zero_p:
9830 fmov.s &0x00000000,%fp0 # return +ZERO in fp0
9831 mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
9832 rts
9833 fintrz_zero_m:
9834 fmov.s &0x80000000,%fp0 # return -ZERO in fp0
9835 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
9836 rts
9837
9838 #
9839 # Infinity:
9840 #
9841 fintrz_inf:
9842 fmovm.x SRC(%a0),&0x80 # return result in fp0
9843 tst.b SRC_EX(%a0) # is INF negative?
9844 bmi.b fintrz_inf_m # yes
9845 fintrz_inf_p:
9846 mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
9847 rts
9848 fintrz_inf_m:
9849 mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
9850 rts
9851
9852 #########################################################################
9853 # XDEF **************************************************************** #
9854 # fabs(): emulates the fabs instruction #
9855 # fsabs(): emulates the fsabs instruction #
9856 # fdabs(): emulates the fdabs instruction #
9857 # #
9858 # XREF **************************************************************** #
9859 # norm() - normalize denorm mantissa to provide EXOP #
9860 # scale_to_zero_src() - make exponent. = 0; get scale factor #
9861 # unf_res() - calculate underflow result #
9862 # ovf_res() - calculate overflow result #
9863 # res_{s,q}nan_1op() - set NAN result for monadic operation #
9864 # #
9865 # INPUT *************************************************************** #
9866 # a0 = pointer to extended precision source operand #
9867 # d0 = rnd precision/mode #
9868 # #
9869 # OUTPUT ************************************************************** #
9870 # fp0 = result #
9871 # fp1 = EXOP (if exception occurred) #
9872 # #
9873 # ALGORITHM *********************************************************** #
9874 # Handle NANs, infinities, and zeroes as special cases. Divide #
9875 # norms into extended, single, and double precision. #
9876 # Simply clear sign for extended precision norm. Ext prec denorm #
9877 # gets an EXOP created for it since it's an underflow. #
9878 # Double and single precision can overflow and underflow. First, #
9879 # scale the operand such that the exponent is zero. Perform an "fabs" #
9880 # using the correct rnd mode/prec. Check to see if the original #
9881 # exponent would take an exception. If so, use unf_res() or ovf_res() #
9882 # to calculate the default result. Also, create the EXOP for the #
9883 # exceptional case. If no exception should occur, insert the correct #
9884 # result exponent and return. #
9885 # Unnorms don't pass through here. #
9886 # #
9887 #########################################################################
9888
9889 global fsabs
9890 fsabs:
9891 andi.b &0x30,%d0 # clear rnd prec
9892 ori.b &s_mode*0x10,%d0 # insert sgl precision
9893 bra.b fabs
9894
9895 global fdabs
9896 fdabs:
9897 andi.b &0x30,%d0 # clear rnd prec
9898 ori.b &d_mode*0x10,%d0 # insert dbl precision
9899
9900 global fabs
9901 fabs:
9902 mov.l %d0,L_SCR3(%a6) # store rnd info
9903 mov.b STAG(%a6),%d1
9904 bne.w fabs_not_norm # optimize on non-norm input
9905
9906 #
9907 # ABSOLUTE VALUE: norms and denorms ONLY!
9908 #
9909 fabs_norm:
9910 andi.b &0xc0,%d0 # is precision extended?
9911 bne.b fabs_not_ext # no; go handle sgl or dbl
9912
9913 #
9914 # precision selected is extended. so...we can not get an underflow
9915 # or overflow because of rounding to the correct precision. so...
9916 # skip the scaling and unscaling...
9917 #
9918 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
9919 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
9920 mov.w SRC_EX(%a0),%d1
9921 bclr &15,%d1 # force absolute value
9922 mov.w %d1,FP_SCR0_EX(%a6) # insert exponent
9923 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
9924 rts
9925
9926 #
9927 # for an extended precision DENORM, the UNFL exception bit is set
9928 # the accrued bit is NOT set in this instance(no inexactness!)
9929 #
9930 fabs_denorm:
9931 andi.b &0xc0,%d0 # is precision extended?
9932 bne.b fabs_not_ext # no
9933
9934 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
9935
9936 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
9937 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
9938 mov.w SRC_EX(%a0),%d0
9939 bclr &15,%d0 # clear sign
9940 mov.w %d0,FP_SCR0_EX(%a6) # insert exponent
9941
9942 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
9943
9944 btst &unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
9945 bne.b fabs_ext_unfl_ena
9946 rts
9947
9948 #
9949 # the input is an extended DENORM and underflow is enabled in the FPCR.
9950 # normalize the mantissa and add the bias of 0x6000 to the resulting negative
9951 # exponent and insert back into the operand.
9952 #
9953 fabs_ext_unfl_ena:
9954 lea FP_SCR0(%a6),%a0 # pass: ptr to operand
9955 bsr.l norm # normalize result
9956 neg.w %d0 # new exponent = -(shft val)
9957 addi.w &0x6000,%d0 # add new bias to exponent
9958 mov.w FP_SCR0_EX(%a6),%d1 # fetch old sign,exp
9959 andi.w &0x8000,%d1 # keep old sign
9960 andi.w &0x7fff,%d0 # clear sign position
9961 or.w %d1,%d0 # concat old sign, new exponent
9962 mov.w %d0,FP_SCR0_EX(%a6) # insert new exponent
9963 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
9964 rts
9965
9966 #
9967 # operand is either single or double
9968 #
9969 fabs_not_ext:
9970 cmpi.b %d0,&s_mode*0x10 # separate sgl/dbl prec
9971 bne.b fabs_dbl
9972
9973 #
9974 # operand is to be rounded to single precision
9975 #
9976 fabs_sgl:
9977 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
9978 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
9979 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
9980 bsr.l scale_to_zero_src # calculate scale factor
9981
9982 cmpi.l %d0,&0x3fff-0x3f80 # will move in underflow?
9983 bge.w fabs_sd_unfl # yes; go handle underflow
9984 cmpi.l %d0,&0x3fff-0x407e # will move in overflow?
9985 beq.w fabs_sd_may_ovfl # maybe; go check
9986 blt.w fabs_sd_ovfl # yes; go handle overflow
9987
9988 #
9989 # operand will NOT overflow or underflow when moved in to the fp reg file
9990 #
9991 fabs_sd_normal:
9992 fmov.l &0x0,%fpsr # clear FPSR
9993 fmov.l L_SCR3(%a6),%fpcr # set FPCR
9994
9995 fabs.x FP_SCR0(%a6),%fp0 # perform absolute
9996
9997 fmov.l %fpsr,%d1 # save FPSR
9998 fmov.l &0x0,%fpcr # clear FPCR
9999
10000 or.l %d1,USER_FPSR(%a6) # save INEX2,N
10001
10002 fabs_sd_normal_exit:
10003 mov.l %d2,-(%sp) # save d2
10004 fmovm.x &0x80,FP_SCR0(%a6) # store out result
10005 mov.w FP_SCR0_EX(%a6),%d1 # load sgn,exp
10006 mov.l %d1,%d2 # make a copy
10007 andi.l &0x7fff,%d1 # strip sign
10008 sub.l %d0,%d1 # add scale factor
10009 andi.w &0x8000,%d2 # keep old sign
10010 or.w %d1,%d2 # concat old sign,new exp
10011 mov.w %d2,FP_SCR0_EX(%a6) # insert new exponent
10012 mov.l (%sp)+,%d2 # restore d2
10013 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
10014 rts
10015
10016 #
10017 # operand is to be rounded to double precision
10018 #
10019 fabs_dbl:
10020 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
10021 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
10022 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
10023 bsr.l scale_to_zero_src # calculate scale factor
10024
10025 cmpi.l %d0,&0x3fff-0x3c00 # will move in underflow?
10026 bge.b fabs_sd_unfl # yes; go handle underflow
10027 cmpi.l %d0,&0x3fff-0x43fe # will move in overflow?
10028 beq.w fabs_sd_may_ovfl # maybe; go check
10029 blt.w fabs_sd_ovfl # yes; go handle overflow
10030 bra.w fabs_sd_normal # no; ho handle normalized op
10031
10032 #
10033 # operand WILL underflow when moved in to the fp register file
10034 #
10035 fabs_sd_unfl:
10036 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
10037
10038 bclr &0x7,FP_SCR0_EX(%a6) # force absolute value
10039
10040 # if underflow or inexact is enabled, go calculate EXOP first.
10041 mov.b FPCR_ENABLE(%a6),%d1
10042 andi.b &0x0b,%d1 # is UNFL or INEX enabled?
10043 bne.b fabs_sd_unfl_ena # yes
10044
10045 fabs_sd_unfl_dis:
10046 lea FP_SCR0(%a6),%a0 # pass: result addr
10047 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
10048 bsr.l unf_res # calculate default result
10049 or.b %d0,FPSR_CC(%a6) # set possible 'Z' ccode
10050 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
10051 rts
10052
10053 #
10054 # operand will underflow AND underflow is enabled.
10055 # therefore, we must return the result rounded to extended precision.
10056 #
10057 fabs_sd_unfl_ena:
10058 mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
10059 mov.l FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
10060 mov.w FP_SCR0_EX(%a6),%d1 # load current exponent
10061
10062 mov.l %d2,-(%sp) # save d2
10063 mov.l %d1,%d2 # make a copy
10064 andi.l &0x7fff,%d1 # strip sign
10065 andi.w &0x8000,%d2 # keep old sign
10066 sub.l %d0,%d1 # subtract scale factor
10067 addi.l &0x6000,%d1 # add new bias
10068 andi.w &0x7fff,%d1
10069 or.w %d2,%d1 # concat new sign,new exp
10070 mov.w %d1,FP_SCR1_EX(%a6) # insert new exp
10071 fmovm.x FP_SCR1(%a6),&0x40 # return EXOP in fp1
10072 mov.l (%sp)+,%d2 # restore d2
10073 bra.b fabs_sd_unfl_dis
10074
10075 #
10076 # operand WILL overflow.
10077 #
10078 fabs_sd_ovfl:
10079 fmov.l &0x0,%fpsr # clear FPSR
10080 fmov.l L_SCR3(%a6),%fpcr # set FPCR
10081
10082 fabs.x FP_SCR0(%a6),%fp0 # perform absolute
10083
10084 fmov.l &0x0,%fpcr # clear FPCR
10085 fmov.l %fpsr,%d1 # save FPSR
10086
10087 or.l %d1,USER_FPSR(%a6) # save INEX2,N
10088
10089 fabs_sd_ovfl_tst:
10090 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
10091
10092 mov.b FPCR_ENABLE(%a6),%d1
10093 andi.b &0x13,%d1 # is OVFL or INEX enabled?
10094 bne.b fabs_sd_ovfl_ena # yes
10095
10096 #
10097 # OVFL is not enabled; therefore, we must create the default result by
10098 # calling ovf_res().
10099 #
10100 fabs_sd_ovfl_dis:
10101 btst &neg_bit,FPSR_CC(%a6) # is result negative?
10102 sne %d1 # set sign param accordingly
10103 mov.l L_SCR3(%a6),%d0 # pass: prec,mode
10104 bsr.l ovf_res # calculate default result
10105 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
10106 fmovm.x (%a0),&0x80 # return default result in fp0
10107 rts
10108
10109 #
10110 # OVFL is enabled.
10111 # the INEX2 bit has already been updated by the round to the correct precision.
10112 # now, round to extended(and don't alter the FPSR).
10113 #
10114 fabs_sd_ovfl_ena:
10115 mov.l %d2,-(%sp) # save d2
10116 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
10117 mov.l %d1,%d2 # make a copy
10118 andi.l &0x7fff,%d1 # strip sign
10119 andi.w &0x8000,%d2 # keep old sign
10120 sub.l %d0,%d1 # add scale factor
10121 subi.l &0x6000,%d1 # subtract bias
10122 andi.w &0x7fff,%d1
10123 or.w %d2,%d1 # concat sign,exp
10124 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
10125 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
10126 mov.l (%sp)+,%d2 # restore d2
10127 bra.b fabs_sd_ovfl_dis
10128
10129 #
10130 # the move in MAY underflow. so...
10131 #
10132 fabs_sd_may_ovfl:
10133 fmov.l &0x0,%fpsr # clear FPSR
10134 fmov.l L_SCR3(%a6),%fpcr # set FPCR
10135
10136 fabs.x FP_SCR0(%a6),%fp0 # perform absolute
10137
10138 fmov.l %fpsr,%d1 # save status
10139 fmov.l &0x0,%fpcr # clear FPCR
10140
10141 or.l %d1,USER_FPSR(%a6) # save INEX2,N
10142
10143 fabs.x %fp0,%fp1 # make a copy of result
10144 fcmp.b %fp1,&0x2 # is |result| >= 2.b?
10145 fbge.w fabs_sd_ovfl_tst # yes; overflow has occurred
10146
10147 # no, it didn't overflow; we have correct result
10148 bra.w fabs_sd_normal_exit
10149
10150 ##########################################################################
10151
10152 #
10153 # input is not normalized; what is it?
10154 #
10155 fabs_not_norm:
10156 cmpi.b %d1,&DENORM # weed out DENORM
10157 beq.w fabs_denorm
10158 cmpi.b %d1,&SNAN # weed out SNAN
10159 beq.l res_snan_1op
10160 cmpi.b %d1,&QNAN # weed out QNAN
10161 beq.l res_qnan_1op
10162
10163 fabs.x SRC(%a0),%fp0 # force absolute value
10164
10165 cmpi.b %d1,&INF # weed out INF
10166 beq.b fabs_inf
10167 fabs_zero:
10168 mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
10169 rts
10170 fabs_inf:
10171 mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
10172 rts
10173
10174 #########################################################################
10175 # XDEF **************************************************************** #
10176 # fcmp(): fp compare op routine #
10177 # #
10178 # XREF **************************************************************** #
10179 # res_qnan() - return QNAN result #
10180 # res_snan() - return SNAN result #
10181 # #
10182 # INPUT *************************************************************** #
10183 # a0 = pointer to extended precision source operand #
10184 # a1 = pointer to extended precision destination operand #
10185 # d0 = round prec/mode #
10186 # #
10187 # OUTPUT ************************************************************** #
10188 # None #
10189 # #
10190 # ALGORITHM *********************************************************** #
10191 # Handle NANs and denorms as special cases. For everything else, #
10192 # just use the actual fcmp instruction to produce the correct condition #
10193 # codes. #
10194 # #
10195 #########################################################################
10196
10197 global fcmp
10198 fcmp:
10199 clr.w %d1
10200 mov.b DTAG(%a6),%d1
10201 lsl.b &0x3,%d1
10202 or.b STAG(%a6),%d1
10203 bne.b fcmp_not_norm # optimize on non-norm input
10204
10205 #
10206 # COMPARE FP OPs : NORMs, ZEROs, INFs, and "corrected" DENORMs
10207 #
10208 fcmp_norm:
10209 fmovm.x DST(%a1),&0x80 # load dst op
10210
10211 fcmp.x %fp0,SRC(%a0) # do compare
10212
10213 fmov.l %fpsr,%d0 # save FPSR
10214 rol.l &0x8,%d0 # extract ccode bits
10215 mov.b %d0,FPSR_CC(%a6) # set ccode bits(no exc bits are set)
10216
10217 rts
10218
10219 #
10220 # fcmp: inputs are not both normalized; what are they?
10221 #
10222 fcmp_not_norm:
10223 mov.w (tbl_fcmp_op.b,%pc,%d1.w*2),%d1
10224 jmp (tbl_fcmp_op.b,%pc,%d1.w*1)
10225
10226 swbeg &48
10227 tbl_fcmp_op:
10228 short fcmp_norm - tbl_fcmp_op # NORM - NORM
10229 short fcmp_norm - tbl_fcmp_op # NORM - ZERO
10230 short fcmp_norm - tbl_fcmp_op # NORM - INF
10231 short fcmp_res_qnan - tbl_fcmp_op # NORM - QNAN
10232 short fcmp_nrm_dnrm - tbl_fcmp_op # NORM - DENORM
10233 short fcmp_res_snan - tbl_fcmp_op # NORM - SNAN
10234 short tbl_fcmp_op - tbl_fcmp_op #
10235 short tbl_fcmp_op - tbl_fcmp_op #
10236
10237 short fcmp_norm - tbl_fcmp_op # ZERO - NORM
10238 short fcmp_norm - tbl_fcmp_op # ZERO - ZERO
10239 short fcmp_norm - tbl_fcmp_op # ZERO - INF
10240 short fcmp_res_qnan - tbl_fcmp_op # ZERO - QNAN
10241 short fcmp_dnrm_s - tbl_fcmp_op # ZERO - DENORM
10242 short fcmp_res_snan - tbl_fcmp_op # ZERO - SNAN
10243 short tbl_fcmp_op - tbl_fcmp_op #
10244 short tbl_fcmp_op - tbl_fcmp_op #
10245
10246 short fcmp_norm - tbl_fcmp_op # INF - NORM
10247 short fcmp_norm - tbl_fcmp_op # INF - ZERO
10248 short fcmp_norm - tbl_fcmp_op # INF - INF
10249 short fcmp_res_qnan - tbl_fcmp_op # INF - QNAN
10250 short fcmp_dnrm_s - tbl_fcmp_op # INF - DENORM
10251 short fcmp_res_snan - tbl_fcmp_op # INF - SNAN
10252 short tbl_fcmp_op - tbl_fcmp_op #
10253 short tbl_fcmp_op - tbl_fcmp_op #
10254
10255 short fcmp_res_qnan - tbl_fcmp_op # QNAN - NORM
10256 short fcmp_res_qnan - tbl_fcmp_op # QNAN - ZERO
10257 short fcmp_res_qnan - tbl_fcmp_op # QNAN - INF
10258 short fcmp_res_qnan - tbl_fcmp_op # QNAN - QNAN
10259 short fcmp_res_qnan - tbl_fcmp_op # QNAN - DENORM
10260 short fcmp_res_snan - tbl_fcmp_op # QNAN - SNAN
10261 short tbl_fcmp_op - tbl_fcmp_op #
10262 short tbl_fcmp_op - tbl_fcmp_op #
10263
10264 short fcmp_dnrm_nrm - tbl_fcmp_op # DENORM - NORM
10265 short fcmp_dnrm_d - tbl_fcmp_op # DENORM - ZERO
10266 short fcmp_dnrm_d - tbl_fcmp_op # DENORM - INF
10267 short fcmp_res_qnan - tbl_fcmp_op # DENORM - QNAN
10268 short fcmp_dnrm_sd - tbl_fcmp_op # DENORM - DENORM
10269 short fcmp_res_snan - tbl_fcmp_op # DENORM - SNAN
10270 short tbl_fcmp_op - tbl_fcmp_op #
10271 short tbl_fcmp_op - tbl_fcmp_op #
10272
10273 short fcmp_res_snan - tbl_fcmp_op # SNAN - NORM
10274 short fcmp_res_snan - tbl_fcmp_op # SNAN - ZERO
10275 short fcmp_res_snan - tbl_fcmp_op # SNAN - INF
10276 short fcmp_res_snan - tbl_fcmp_op # SNAN - QNAN
10277 short fcmp_res_snan - tbl_fcmp_op # SNAN - DENORM
10278 short fcmp_res_snan - tbl_fcmp_op # SNAN - SNAN
10279 short tbl_fcmp_op - tbl_fcmp_op #
10280 short tbl_fcmp_op - tbl_fcmp_op #
10281
10282 # unlike all other functions for QNAN and SNAN, fcmp does NOT set the
10283 # 'N' bit for a negative QNAN or SNAN input so we must squelch it here.
10284 fcmp_res_qnan:
10285 bsr.l res_qnan
10286 andi.b &0xf7,FPSR_CC(%a6)
10287 rts
10288 fcmp_res_snan:
10289 bsr.l res_snan
10290 andi.b &0xf7,FPSR_CC(%a6)
10291 rts
10292
10293 #
10294 # DENORMs are a little more difficult.
10295 # If you have a 2 DENORMs, then you can just force the j-bit to a one
10296 # and use the fcmp_norm routine.
10297 # If you have a DENORM and an INF or ZERO, just force the DENORM's j-bit to a one
10298 # and use the fcmp_norm routine.
10299 # If you have a DENORM and a NORM with opposite signs, then use fcmp_norm, also.
10300 # But with a DENORM and a NORM of the same sign, the neg bit is set if the
10301 # (1) signs are (+) and the DENORM is the dst or
10302 # (2) signs are (-) and the DENORM is the src
10303 #
10304
10305 fcmp_dnrm_s:
10306 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
10307 mov.l SRC_HI(%a0),%d0
10308 bset &31,%d0 # DENORM src; make into small norm
10309 mov.l %d0,FP_SCR0_HI(%a6)
10310 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
10311 lea FP_SCR0(%a6),%a0
10312 bra.w fcmp_norm
10313
10314 fcmp_dnrm_d:
10315 mov.l DST_EX(%a1),FP_SCR0_EX(%a6)
10316 mov.l DST_HI(%a1),%d0
10317 bset &31,%d0 # DENORM src; make into small norm
10318 mov.l %d0,FP_SCR0_HI(%a6)
10319 mov.l DST_LO(%a1),FP_SCR0_LO(%a6)
10320 lea FP_SCR0(%a6),%a1
10321 bra.w fcmp_norm
10322
10323 fcmp_dnrm_sd:
10324 mov.w DST_EX(%a1),FP_SCR1_EX(%a6)
10325 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
10326 mov.l DST_HI(%a1),%d0
10327 bset &31,%d0 # DENORM dst; make into small norm
10328 mov.l %d0,FP_SCR1_HI(%a6)
10329 mov.l SRC_HI(%a0),%d0
10330 bset &31,%d0 # DENORM dst; make into small norm
10331 mov.l %d0,FP_SCR0_HI(%a6)
10332 mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
10333 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
10334 lea FP_SCR1(%a6),%a1
10335 lea FP_SCR0(%a6),%a0
10336 bra.w fcmp_norm
10337
10338 fcmp_nrm_dnrm:
10339 mov.b SRC_EX(%a0),%d0 # determine if like signs
10340 mov.b DST_EX(%a1),%d1
10341 eor.b %d0,%d1
10342 bmi.w fcmp_dnrm_s
10343
10344 # signs are the same, so must determine the answer ourselves.
10345 tst.b %d0 # is src op negative?
10346 bmi.b fcmp_nrm_dnrm_m # yes
10347 rts
10348 fcmp_nrm_dnrm_m:
10349 mov.b &neg_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
10350 rts
10351
10352 fcmp_dnrm_nrm:
10353 mov.b SRC_EX(%a0),%d0 # determine if like signs
10354 mov.b DST_EX(%a1),%d1
10355 eor.b %d0,%d1
10356 bmi.w fcmp_dnrm_d
10357
10358 # signs are the same, so must determine the answer ourselves.
10359 tst.b %d0 # is src op negative?
10360 bpl.b fcmp_dnrm_nrm_m # no
10361 rts
10362 fcmp_dnrm_nrm_m:
10363 mov.b &neg_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
10364 rts
10365
10366 #########################################################################
10367 # XDEF **************************************************************** #
10368 # fsglmul(): emulates the fsglmul instruction #
10369 # #
10370 # XREF **************************************************************** #
10371 # scale_to_zero_src() - scale src exponent to zero #
10372 # scale_to_zero_dst() - scale dst exponent to zero #
10373 # unf_res4() - return default underflow result for sglop #
10374 # ovf_res() - return default overflow result #
10375 # res_qnan() - return QNAN result #
10376 # res_snan() - return SNAN result #
10377 # #
10378 # INPUT *************************************************************** #
10379 # a0 = pointer to extended precision source operand #
10380 # a1 = pointer to extended precision destination operand #
10381 # d0 rnd prec,mode #
10382 # #
10383 # OUTPUT ************************************************************** #
10384 # fp0 = result #
10385 # fp1 = EXOP (if exception occurred) #
10386 # #
10387 # ALGORITHM *********************************************************** #
10388 # Handle NANs, infinities, and zeroes as special cases. Divide #
10389 # norms/denorms into ext/sgl/dbl precision. #
10390 # For norms/denorms, scale the exponents such that a multiply #
10391 # instruction won't cause an exception. Use the regular fsglmul to #
10392 # compute a result. Check if the regular operands would have taken #
10393 # an exception. If so, return the default overflow/underflow result #
10394 # and return the EXOP if exceptions are enabled. Else, scale the #
10395 # result operand to the proper exponent. #
10396 # #
10397 #########################################################################
10398
10399 global fsglmul
10400 fsglmul:
10401 mov.l %d0,L_SCR3(%a6) # store rnd info
10402
10403 clr.w %d1
10404 mov.b DTAG(%a6),%d1
10405 lsl.b &0x3,%d1
10406 or.b STAG(%a6),%d1
10407
10408 bne.w fsglmul_not_norm # optimize on non-norm input
10409
10410 fsglmul_norm:
10411 mov.w DST_EX(%a1),FP_SCR1_EX(%a6)
10412 mov.l DST_HI(%a1),FP_SCR1_HI(%a6)
10413 mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
10414
10415 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
10416 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
10417 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
10418
10419 bsr.l scale_to_zero_src # scale exponent
10420 mov.l %d0,-(%sp) # save scale factor 1
10421
10422 bsr.l scale_to_zero_dst # scale dst exponent
10423
10424 add.l (%sp)+,%d0 # SCALE_FACTOR = scale1 + scale2
10425
10426 cmpi.l %d0,&0x3fff-0x7ffe # would result ovfl?
10427 beq.w fsglmul_may_ovfl # result may rnd to overflow
10428 blt.w fsglmul_ovfl # result will overflow
10429
10430 cmpi.l %d0,&0x3fff+0x0001 # would result unfl?
10431 beq.w fsglmul_may_unfl # result may rnd to no unfl
10432 bgt.w fsglmul_unfl # result will underflow
10433
10434 fsglmul_normal:
10435 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
10436
10437 fmov.l L_SCR3(%a6),%fpcr # set FPCR
10438 fmov.l &0x0,%fpsr # clear FPSR
10439
10440 fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply
10441
10442 fmov.l %fpsr,%d1 # save status
10443 fmov.l &0x0,%fpcr # clear FPCR
10444
10445 or.l %d1,USER_FPSR(%a6) # save INEX2,N
10446
10447 fsglmul_normal_exit:
10448 fmovm.x &0x80,FP_SCR0(%a6) # store out result
10449 mov.l %d2,-(%sp) # save d2
10450 mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp}
10451 mov.l %d1,%d2 # make a copy
10452 andi.l &0x7fff,%d1 # strip sign
10453 andi.w &0x8000,%d2 # keep old sign
10454 sub.l %d0,%d1 # add scale factor
10455 or.w %d2,%d1 # concat old sign,new exp
10456 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
10457 mov.l (%sp)+,%d2 # restore d2
10458 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
10459 rts
10460
10461 fsglmul_ovfl:
10462 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
10463
10464 fmov.l L_SCR3(%a6),%fpcr # set FPCR
10465 fmov.l &0x0,%fpsr # clear FPSR
10466
10467 fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply
10468
10469 fmov.l %fpsr,%d1 # save status
10470 fmov.l &0x0,%fpcr # clear FPCR
10471
10472 or.l %d1,USER_FPSR(%a6) # save INEX2,N
10473
10474 fsglmul_ovfl_tst:
10475
10476 # save setting this until now because this is where fsglmul_may_ovfl may jump in
10477 or.l &ovfl_inx_mask, USER_FPSR(%a6) # set ovfl/aovfl/ainex
10478
10479 mov.b FPCR_ENABLE(%a6),%d1
10480 andi.b &0x13,%d1 # is OVFL or INEX enabled?
10481 bne.b fsglmul_ovfl_ena # yes
10482
10483 fsglmul_ovfl_dis:
10484 btst &neg_bit,FPSR_CC(%a6) # is result negative?
10485 sne %d1 # set sign param accordingly
10486 mov.l L_SCR3(%a6),%d0 # pass prec:rnd
10487 andi.b &0x30,%d0 # force prec = ext
10488 bsr.l ovf_res # calculate default result
10489 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
10490 fmovm.x (%a0),&0x80 # return default result in fp0
10491 rts
10492
10493 fsglmul_ovfl_ena:
10494 fmovm.x &0x80,FP_SCR0(%a6) # move result to stack
10495
10496 mov.l %d2,-(%sp) # save d2
10497 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
10498 mov.l %d1,%d2 # make a copy
10499 andi.l &0x7fff,%d1 # strip sign
10500 sub.l %d0,%d1 # add scale factor
10501 subi.l &0x6000,%d1 # subtract bias
10502 andi.w &0x7fff,%d1
10503 andi.w &0x8000,%d2 # keep old sign
10504 or.w %d2,%d1 # concat old sign,new exp
10505 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
10506 mov.l (%sp)+,%d2 # restore d2
10507 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
10508 bra.b fsglmul_ovfl_dis
10509
10510 fsglmul_may_ovfl:
10511 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
10512
10513 fmov.l L_SCR3(%a6),%fpcr # set FPCR
10514 fmov.l &0x0,%fpsr # clear FPSR
10515
10516 fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply
10517
10518 fmov.l %fpsr,%d1 # save status
10519 fmov.l &0x0,%fpcr # clear FPCR
10520
10521 or.l %d1,USER_FPSR(%a6) # save INEX2,N
10522
10523 fabs.x %fp0,%fp1 # make a copy of result
10524 fcmp.b %fp1,&0x2 # is |result| >= 2.b?
10525 fbge.w fsglmul_ovfl_tst # yes; overflow has occurred
10526
10527 # no, it didn't overflow; we have correct result
10528 bra.w fsglmul_normal_exit
10529
10530 fsglmul_unfl:
10531 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
10532
10533 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
10534
10535 fmov.l &rz_mode*0x10,%fpcr # set FPCR
10536 fmov.l &0x0,%fpsr # clear FPSR
10537
10538 fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply
10539
10540 fmov.l %fpsr,%d1 # save status
10541 fmov.l &0x0,%fpcr # clear FPCR
10542
10543 or.l %d1,USER_FPSR(%a6) # save INEX2,N
10544
10545 mov.b FPCR_ENABLE(%a6),%d1
10546 andi.b &0x0b,%d1 # is UNFL or INEX enabled?
10547 bne.b fsglmul_unfl_ena # yes
10548
10549 fsglmul_unfl_dis:
10550 fmovm.x &0x80,FP_SCR0(%a6) # store out result
10551
10552 lea FP_SCR0(%a6),%a0 # pass: result addr
10553 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
10554 bsr.l unf_res4 # calculate default result
10555 or.b %d0,FPSR_CC(%a6) # 'Z' bit may have been set
10556 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
10557 rts
10558
10559 #
10560 # UNFL is enabled.
10561 #
10562 fsglmul_unfl_ena:
10563 fmovm.x FP_SCR1(%a6),&0x40 # load dst op
10564
10565 fmov.l L_SCR3(%a6),%fpcr # set FPCR
10566 fmov.l &0x0,%fpsr # clear FPSR
10567
10568 fsglmul.x FP_SCR0(%a6),%fp1 # execute sgl multiply
10569
10570 fmov.l &0x0,%fpcr # clear FPCR
10571
10572 fmovm.x &0x40,FP_SCR0(%a6) # save result to stack
10573 mov.l %d2,-(%sp) # save d2
10574 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
10575 mov.l %d1,%d2 # make a copy
10576 andi.l &0x7fff,%d1 # strip sign
10577 andi.w &0x8000,%d2 # keep old sign
10578 sub.l %d0,%d1 # add scale factor
10579 addi.l &0x6000,%d1 # add bias
10580 andi.w &0x7fff,%d1
10581 or.w %d2,%d1 # concat old sign,new exp
10582 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
10583 mov.l (%sp)+,%d2 # restore d2
10584 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
10585 bra.w fsglmul_unfl_dis
10586
10587 fsglmul_may_unfl:
10588 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
10589
10590 fmov.l L_SCR3(%a6),%fpcr # set FPCR
10591 fmov.l &0x0,%fpsr # clear FPSR
10592
10593 fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply
10594
10595 fmov.l %fpsr,%d1 # save status
10596 fmov.l &0x0,%fpcr # clear FPCR
10597
10598 or.l %d1,USER_FPSR(%a6) # save INEX2,N
10599
10600 fabs.x %fp0,%fp1 # make a copy of result
10601 fcmp.b %fp1,&0x2 # is |result| > 2.b?
10602 fbgt.w fsglmul_normal_exit # no; no underflow occurred
10603 fblt.w fsglmul_unfl # yes; underflow occurred
10604
10605 #
10606 # we still don't know if underflow occurred. result is ~ equal to 2. but,
10607 # we don't know if the result was an underflow that rounded up to a 2 or
10608 # a normalized number that rounded down to a 2. so, redo the entire operation
10609 # using RZ as the rounding mode to see what the pre-rounded result is.
10610 # this case should be relatively rare.
10611 #
10612 fmovm.x FP_SCR1(%a6),&0x40 # load dst op into fp1
10613
10614 mov.l L_SCR3(%a6),%d1
10615 andi.b &0xc0,%d1 # keep rnd prec
10616 ori.b &rz_mode*0x10,%d1 # insert RZ
10617
10618 fmov.l %d1,%fpcr # set FPCR
10619 fmov.l &0x0,%fpsr # clear FPSR
10620
10621 fsglmul.x FP_SCR0(%a6),%fp1 # execute sgl multiply
10622
10623 fmov.l &0x0,%fpcr # clear FPCR
10624 fabs.x %fp1 # make absolute value
10625 fcmp.b %fp1,&0x2 # is |result| < 2.b?
10626 fbge.w fsglmul_normal_exit # no; no underflow occurred
10627 bra.w fsglmul_unfl # yes, underflow occurred
10628
10629 ##############################################################################
10630
10631 #
10632 # Single Precision Multiply: inputs are not both normalized; what are they?
10633 #
10634 fsglmul_not_norm:
10635 mov.w (tbl_fsglmul_op.b,%pc,%d1.w*2),%d1
10636 jmp (tbl_fsglmul_op.b,%pc,%d1.w*1)
10637
10638 swbeg &48
10639 tbl_fsglmul_op:
10640 short fsglmul_norm - tbl_fsglmul_op # NORM x NORM
10641 short fsglmul_zero - tbl_fsglmul_op # NORM x ZERO
10642 short fsglmul_inf_src - tbl_fsglmul_op # NORM x INF
10643 short fsglmul_res_qnan - tbl_fsglmul_op # NORM x QNAN
10644 short fsglmul_norm - tbl_fsglmul_op # NORM x DENORM
10645 short fsglmul_res_snan - tbl_fsglmul_op # NORM x SNAN
10646 short tbl_fsglmul_op - tbl_fsglmul_op #
10647 short tbl_fsglmul_op - tbl_fsglmul_op #
10648
10649 short fsglmul_zero - tbl_fsglmul_op # ZERO x NORM
10650 short fsglmul_zero - tbl_fsglmul_op # ZERO x ZERO
10651 short fsglmul_res_operr - tbl_fsglmul_op # ZERO x INF
10652 short fsglmul_res_qnan - tbl_fsglmul_op # ZERO x QNAN
10653 short fsglmul_zero - tbl_fsglmul_op # ZERO x DENORM
10654 short fsglmul_res_snan - tbl_fsglmul_op # ZERO x SNAN
10655 short tbl_fsglmul_op - tbl_fsglmul_op #
10656 short tbl_fsglmul_op - tbl_fsglmul_op #
10657
10658 short fsglmul_inf_dst - tbl_fsglmul_op # INF x NORM
10659 short fsglmul_res_operr - tbl_fsglmul_op # INF x ZERO
10660 short fsglmul_inf_dst - tbl_fsglmul_op # INF x INF
10661 short fsglmul_res_qnan - tbl_fsglmul_op # INF x QNAN
10662 short fsglmul_inf_dst - tbl_fsglmul_op # INF x DENORM
10663 short fsglmul_res_snan - tbl_fsglmul_op # INF x SNAN
10664 short tbl_fsglmul_op - tbl_fsglmul_op #
10665 short tbl_fsglmul_op - tbl_fsglmul_op #
10666
10667 short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x NORM
10668 short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x ZERO
10669 short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x INF
10670 short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x QNAN
10671 short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x DENORM
10672 short fsglmul_res_snan - tbl_fsglmul_op # QNAN x SNAN
10673 short tbl_fsglmul_op - tbl_fsglmul_op #
10674 short tbl_fsglmul_op - tbl_fsglmul_op #
10675
10676 short fsglmul_norm - tbl_fsglmul_op # NORM x NORM
10677 short fsglmul_zero - tbl_fsglmul_op # NORM x ZERO
10678 short fsglmul_inf_src - tbl_fsglmul_op # NORM x INF
10679 short fsglmul_res_qnan - tbl_fsglmul_op # NORM x QNAN
10680 short fsglmul_norm - tbl_fsglmul_op # NORM x DENORM
10681 short fsglmul_res_snan - tbl_fsglmul_op # NORM x SNAN
10682 short tbl_fsglmul_op - tbl_fsglmul_op #
10683 short tbl_fsglmul_op - tbl_fsglmul_op #
10684
10685 short fsglmul_res_snan - tbl_fsglmul_op # SNAN x NORM
10686 short fsglmul_res_snan - tbl_fsglmul_op # SNAN x ZERO
10687 short fsglmul_res_snan - tbl_fsglmul_op # SNAN x INF
10688 short fsglmul_res_snan - tbl_fsglmul_op # SNAN x QNAN
10689 short fsglmul_res_snan - tbl_fsglmul_op # SNAN x DENORM
10690 short fsglmul_res_snan - tbl_fsglmul_op # SNAN x SNAN
10691 short tbl_fsglmul_op - tbl_fsglmul_op #
10692 short tbl_fsglmul_op - tbl_fsglmul_op #
10693
10694 fsglmul_res_operr:
10695 bra.l res_operr
10696 fsglmul_res_snan:
10697 bra.l res_snan
10698 fsglmul_res_qnan:
10699 bra.l res_qnan
10700 fsglmul_zero:
10701 bra.l fmul_zero
10702 fsglmul_inf_src:
10703 bra.l fmul_inf_src
10704 fsglmul_inf_dst:
10705 bra.l fmul_inf_dst
10706
10707 #########################################################################
10708 # XDEF **************************************************************** #
10709 # fsgldiv(): emulates the fsgldiv instruction #
10710 # #
10711 # XREF **************************************************************** #
10712 # scale_to_zero_src() - scale src exponent to zero #
10713 # scale_to_zero_dst() - scale dst exponent to zero #
10714 # unf_res4() - return default underflow result for sglop #
10715 # ovf_res() - return default overflow result #
10716 # res_qnan() - return QNAN result #
10717 # res_snan() - return SNAN result #
10718 # #
10719 # INPUT *************************************************************** #
10720 # a0 = pointer to extended precision source operand #
10721 # a1 = pointer to extended precision destination operand #
10722 # d0 rnd prec,mode #
10723 # #
10724 # OUTPUT ************************************************************** #
10725 # fp0 = result #
10726 # fp1 = EXOP (if exception occurred) #
10727 # #
10728 # ALGORITHM *********************************************************** #
10729 # Handle NANs, infinities, and zeroes as special cases. Divide #
10730 # norms/denorms into ext/sgl/dbl precision. #
10731 # For norms/denorms, scale the exponents such that a divide #
10732 # instruction won't cause an exception. Use the regular fsgldiv to #
10733 # compute a result. Check if the regular operands would have taken #
10734 # an exception. If so, return the default overflow/underflow result #
10735 # and return the EXOP if exceptions are enabled. Else, scale the #
10736 # result operand to the proper exponent. #
10737 # #
10738 #########################################################################
10739
10740 global fsgldiv
10741 fsgldiv:
10742 mov.l %d0,L_SCR3(%a6) # store rnd info
10743
10744 clr.w %d1
10745 mov.b DTAG(%a6),%d1
10746 lsl.b &0x3,%d1
10747 or.b STAG(%a6),%d1 # combine src tags
10748
10749 bne.w fsgldiv_not_norm # optimize on non-norm input
10750
10751 #
10752 # DIVIDE: NORMs and DENORMs ONLY!
10753 #
10754 fsgldiv_norm:
10755 mov.w DST_EX(%a1),FP_SCR1_EX(%a6)
10756 mov.l DST_HI(%a1),FP_SCR1_HI(%a6)
10757 mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
10758
10759 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
10760 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
10761 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
10762
10763 bsr.l scale_to_zero_src # calculate scale factor 1
10764 mov.l %d0,-(%sp) # save scale factor 1
10765
10766 bsr.l scale_to_zero_dst # calculate scale factor 2
10767
10768 neg.l (%sp) # S.F. = scale1 - scale2
10769 add.l %d0,(%sp)
10770
10771 mov.w 2+L_SCR3(%a6),%d1 # fetch precision,mode
10772 lsr.b &0x6,%d1
10773 mov.l (%sp)+,%d0
10774 cmpi.l %d0,&0x3fff-0x7ffe
10775 ble.w fsgldiv_may_ovfl
10776
10777 cmpi.l %d0,&0x3fff-0x0000 # will result underflow?
10778 beq.w fsgldiv_may_unfl # maybe
10779 bgt.w fsgldiv_unfl # yes; go handle underflow
10780
10781 fsgldiv_normal:
10782 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
10783
10784 fmov.l L_SCR3(%a6),%fpcr # save FPCR
10785 fmov.l &0x0,%fpsr # clear FPSR
10786
10787 fsgldiv.x FP_SCR0(%a6),%fp0 # perform sgl divide
10788
10789 fmov.l %fpsr,%d1 # save FPSR
10790 fmov.l &0x0,%fpcr # clear FPCR
10791
10792 or.l %d1,USER_FPSR(%a6) # save INEX2,N
10793
10794 fsgldiv_normal_exit:
10795 fmovm.x &0x80,FP_SCR0(%a6) # store result on stack
10796 mov.l %d2,-(%sp) # save d2
10797 mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp}
10798 mov.l %d1,%d2 # make a copy
10799 andi.l &0x7fff,%d1 # strip sign
10800 andi.w &0x8000,%d2 # keep old sign
10801 sub.l %d0,%d1 # add scale factor
10802 or.w %d2,%d1 # concat old sign,new exp
10803 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
10804 mov.l (%sp)+,%d2 # restore d2
10805 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
10806 rts
10807
10808 fsgldiv_may_ovfl:
10809 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
10810
10811 fmov.l L_SCR3(%a6),%fpcr # set FPCR
10812 fmov.l &0x0,%fpsr # set FPSR
10813
10814 fsgldiv.x FP_SCR0(%a6),%fp0 # execute divide
10815
10816 fmov.l %fpsr,%d1
10817 fmov.l &0x0,%fpcr
10818
10819 or.l %d1,USER_FPSR(%a6) # save INEX,N
10820
10821 fmovm.x &0x01,-(%sp) # save result to stack
10822 mov.w (%sp),%d1 # fetch new exponent
10823 add.l &0xc,%sp # clear result
10824 andi.l &0x7fff,%d1 # strip sign
10825 sub.l %d0,%d1 # add scale factor
10826 cmp.l %d1,&0x7fff # did divide overflow?
10827 blt.b fsgldiv_normal_exit
10828
10829 fsgldiv_ovfl_tst:
10830 or.w &ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
10831
10832 mov.b FPCR_ENABLE(%a6),%d1
10833 andi.b &0x13,%d1 # is OVFL or INEX enabled?
10834 bne.b fsgldiv_ovfl_ena # yes
10835
10836 fsgldiv_ovfl_dis:
10837 btst &neg_bit,FPSR_CC(%a6) # is result negative
10838 sne %d1 # set sign param accordingly
10839 mov.l L_SCR3(%a6),%d0 # pass prec:rnd
10840 andi.b &0x30,%d0 # kill precision
10841 bsr.l ovf_res # calculate default result
10842 or.b %d0,FPSR_CC(%a6) # set INF if applicable
10843 fmovm.x (%a0),&0x80 # return default result in fp0
10844 rts
10845
10846 fsgldiv_ovfl_ena:
10847 fmovm.x &0x80,FP_SCR0(%a6) # move result to stack
10848
10849 mov.l %d2,-(%sp) # save d2
10850 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
10851 mov.l %d1,%d2 # make a copy
10852 andi.l &0x7fff,%d1 # strip sign
10853 andi.w &0x8000,%d2 # keep old sign
10854 sub.l %d0,%d1 # add scale factor
10855 subi.l &0x6000,%d1 # subtract new bias
10856 andi.w &0x7fff,%d1 # clear ms bit
10857 or.w %d2,%d1 # concat old sign,new exp
10858 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
10859 mov.l (%sp)+,%d2 # restore d2
10860 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
10861 bra.b fsgldiv_ovfl_dis
10862
10863 fsgldiv_unfl:
10864 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
10865
10866 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
10867
10868 fmov.l &rz_mode*0x10,%fpcr # set FPCR
10869 fmov.l &0x0,%fpsr # clear FPSR
10870
10871 fsgldiv.x FP_SCR0(%a6),%fp0 # execute sgl divide
10872
10873 fmov.l %fpsr,%d1 # save status
10874 fmov.l &0x0,%fpcr # clear FPCR
10875
10876 or.l %d1,USER_FPSR(%a6) # save INEX2,N
10877
10878 mov.b FPCR_ENABLE(%a6),%d1
10879 andi.b &0x0b,%d1 # is UNFL or INEX enabled?
10880 bne.b fsgldiv_unfl_ena # yes
10881
10882 fsgldiv_unfl_dis:
10883 fmovm.x &0x80,FP_SCR0(%a6) # store out result
10884
10885 lea FP_SCR0(%a6),%a0 # pass: result addr
10886 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
10887 bsr.l unf_res4 # calculate default result
10888 or.b %d0,FPSR_CC(%a6) # 'Z' bit may have been set
10889 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
10890 rts
10891
10892 #
10893 # UNFL is enabled.
10894 #
10895 fsgldiv_unfl_ena:
10896 fmovm.x FP_SCR1(%a6),&0x40 # load dst op
10897
10898 fmov.l L_SCR3(%a6),%fpcr # set FPCR
10899 fmov.l &0x0,%fpsr # clear FPSR
10900
10901 fsgldiv.x FP_SCR0(%a6),%fp1 # execute sgl divide
10902
10903 fmov.l &0x0,%fpcr # clear FPCR
10904
10905 fmovm.x &0x40,FP_SCR0(%a6) # save result to stack
10906 mov.l %d2,-(%sp) # save d2
10907 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
10908 mov.l %d1,%d2 # make a copy
10909 andi.l &0x7fff,%d1 # strip sign
10910 andi.w &0x8000,%d2 # keep old sign
10911 sub.l %d0,%d1 # add scale factor
10912 addi.l &0x6000,%d1 # add bias
10913 andi.w &0x7fff,%d1 # clear top bit
10914 or.w %d2,%d1 # concat old sign, new exp
10915 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
10916 mov.l (%sp)+,%d2 # restore d2
10917 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
10918 bra.b fsgldiv_unfl_dis
10919
10920 #
10921 # the divide operation MAY underflow:
10922 #
10923 fsgldiv_may_unfl:
10924 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
10925
10926 fmov.l L_SCR3(%a6),%fpcr # set FPCR
10927 fmov.l &0x0,%fpsr # clear FPSR
10928
10929 fsgldiv.x FP_SCR0(%a6),%fp0 # execute sgl divide
10930
10931 fmov.l %fpsr,%d1 # save status
10932 fmov.l &0x0,%fpcr # clear FPCR
10933
10934 or.l %d1,USER_FPSR(%a6) # save INEX2,N
10935
10936 fabs.x %fp0,%fp1 # make a copy of result
10937 fcmp.b %fp1,&0x1 # is |result| > 1.b?
10938 fbgt.w fsgldiv_normal_exit # no; no underflow occurred
10939 fblt.w fsgldiv_unfl # yes; underflow occurred
10940
10941 #
10942 # we still don't know if underflow occurred. result is ~ equal to 1. but,
10943 # we don't know if the result was an underflow that rounded up to a 1
10944 # or a normalized number that rounded down to a 1. so, redo the entire
10945 # operation using RZ as the rounding mode to see what the pre-rounded
10946 # result is. this case should be relatively rare.
10947 #
10948 fmovm.x FP_SCR1(%a6),&0x40 # load dst op into %fp1
10949
10950 clr.l %d1 # clear scratch register
10951 ori.b &rz_mode*0x10,%d1 # force RZ rnd mode
10952
10953 fmov.l %d1,%fpcr # set FPCR
10954 fmov.l &0x0,%fpsr # clear FPSR
10955
10956 fsgldiv.x FP_SCR0(%a6),%fp1 # execute sgl divide
10957
10958 fmov.l &0x0,%fpcr # clear FPCR
10959 fabs.x %fp1 # make absolute value
10960 fcmp.b %fp1,&0x1 # is |result| < 1.b?
10961 fbge.w fsgldiv_normal_exit # no; no underflow occurred
10962 bra.w fsgldiv_unfl # yes; underflow occurred
10963
10964 ############################################################################
10965
10966 #
10967 # Divide: inputs are not both normalized; what are they?
10968 #
10969 fsgldiv_not_norm:
10970 mov.w (tbl_fsgldiv_op.b,%pc,%d1.w*2),%d1
10971 jmp (tbl_fsgldiv_op.b,%pc,%d1.w*1)
10972
10973 swbeg &48
10974 tbl_fsgldiv_op:
10975 short fsgldiv_norm - tbl_fsgldiv_op # NORM / NORM
10976 short fsgldiv_inf_load - tbl_fsgldiv_op # NORM / ZERO
10977 short fsgldiv_zero_load - tbl_fsgldiv_op # NORM / INF
10978 short fsgldiv_res_qnan - tbl_fsgldiv_op # NORM / QNAN
10979 short fsgldiv_norm - tbl_fsgldiv_op # NORM / DENORM
10980 short fsgldiv_res_snan - tbl_fsgldiv_op # NORM / SNAN
10981 short tbl_fsgldiv_op - tbl_fsgldiv_op #
10982 short tbl_fsgldiv_op - tbl_fsgldiv_op #
10983
10984 short fsgldiv_zero_load - tbl_fsgldiv_op # ZERO / NORM
10985 short fsgldiv_res_operr - tbl_fsgldiv_op # ZERO / ZERO
10986 short fsgldiv_zero_load - tbl_fsgldiv_op # ZERO / INF
10987 short fsgldiv_res_qnan - tbl_fsgldiv_op # ZERO / QNAN
10988 short fsgldiv_zero_load - tbl_fsgldiv_op # ZERO / DENORM
10989 short fsgldiv_res_snan - tbl_fsgldiv_op # ZERO / SNAN
10990 short tbl_fsgldiv_op - tbl_fsgldiv_op #
10991 short tbl_fsgldiv_op - tbl_fsgldiv_op #
10992
10993 short fsgldiv_inf_dst - tbl_fsgldiv_op # INF / NORM
10994 short fsgldiv_inf_dst - tbl_fsgldiv_op # INF / ZERO
10995 short fsgldiv_res_operr - tbl_fsgldiv_op # INF / INF
10996 short fsgldiv_res_qnan - tbl_fsgldiv_op # INF / QNAN
10997 short fsgldiv_inf_dst - tbl_fsgldiv_op # INF / DENORM
10998 short fsgldiv_res_snan - tbl_fsgldiv_op # INF / SNAN
10999 short tbl_fsgldiv_op - tbl_fsgldiv_op #
11000 short tbl_fsgldiv_op - tbl_fsgldiv_op #
11001
11002 short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / NORM
11003 short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / ZERO
11004 short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / INF
11005 short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / QNAN
11006 short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / DENORM
11007 short fsgldiv_res_snan - tbl_fsgldiv_op # QNAN / SNAN
11008 short tbl_fsgldiv_op - tbl_fsgldiv_op #
11009 short tbl_fsgldiv_op - tbl_fsgldiv_op #
11010
11011 short fsgldiv_norm - tbl_fsgldiv_op # DENORM / NORM
11012 short fsgldiv_inf_load - tbl_fsgldiv_op # DENORM / ZERO
11013 short fsgldiv_zero_load - tbl_fsgldiv_op # DENORM / INF
11014 short fsgldiv_res_qnan - tbl_fsgldiv_op # DENORM / QNAN
11015 short fsgldiv_norm - tbl_fsgldiv_op # DENORM / DENORM
11016 short fsgldiv_res_snan - tbl_fsgldiv_op # DENORM / SNAN
11017 short tbl_fsgldiv_op - tbl_fsgldiv_op #
11018 short tbl_fsgldiv_op - tbl_fsgldiv_op #
11019
11020 short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / NORM
11021 short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / ZERO
11022 short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / INF
11023 short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / QNAN
11024 short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / DENORM
11025 short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / SNAN
11026 short tbl_fsgldiv_op - tbl_fsgldiv_op #
11027 short tbl_fsgldiv_op - tbl_fsgldiv_op #
11028
11029 fsgldiv_res_qnan:
11030 bra.l res_qnan
11031 fsgldiv_res_snan:
11032 bra.l res_snan
11033 fsgldiv_res_operr:
11034 bra.l res_operr
11035 fsgldiv_inf_load:
11036 bra.l fdiv_inf_load
11037 fsgldiv_zero_load:
11038 bra.l fdiv_zero_load
11039 fsgldiv_inf_dst:
11040 bra.l fdiv_inf_dst
11041
11042 #########################################################################
11043 # XDEF **************************************************************** #
11044 # fadd(): emulates the fadd instruction #
11045 # fsadd(): emulates the fadd instruction #
11046 # fdadd(): emulates the fdadd instruction #
11047 # #
11048 # XREF **************************************************************** #
11049 # addsub_scaler2() - scale the operands so they won't take exc #
11050 # ovf_res() - return default overflow result #
11051 # unf_res() - return default underflow result #
11052 # res_qnan() - set QNAN result #
11053 # res_snan() - set SNAN result #
11054 # res_operr() - set OPERR result #
11055 # scale_to_zero_src() - set src operand exponent equal to zero #
11056 # scale_to_zero_dst() - set dst operand exponent equal to zero #
11057 # #
11058 # INPUT *************************************************************** #
11059 # a0 = pointer to extended precision source operand #
11060 # a1 = pointer to extended precision destination operand #
11061 # #
11062 # OUTPUT ************************************************************** #
11063 # fp0 = result #
11064 # fp1 = EXOP (if exception occurred) #
11065 # #
11066 # ALGORITHM *********************************************************** #
11067 # Handle NANs, infinities, and zeroes as special cases. Divide #
11068 # norms into extended, single, and double precision. #
11069 # Do addition after scaling exponents such that exception won't #
11070 # occur. Then, check result exponent to see if exception would have #
11071 # occurred. If so, return default result and maybe EXOP. Else, insert #
11072 # the correct result exponent and return. Set FPSR bits as appropriate. #
11073 # #
11074 #########################################################################
11075
11076 global fsadd
11077 fsadd:
11078 andi.b &0x30,%d0 # clear rnd prec
11079 ori.b &s_mode*0x10,%d0 # insert sgl prec
11080 bra.b fadd
11081
11082 global fdadd
11083 fdadd:
11084 andi.b &0x30,%d0 # clear rnd prec
11085 ori.b &d_mode*0x10,%d0 # insert dbl prec
11086
11087 global fadd
11088 fadd:
11089 mov.l %d0,L_SCR3(%a6) # store rnd info
11090
11091 clr.w %d1
11092 mov.b DTAG(%a6),%d1
11093 lsl.b &0x3,%d1
11094 or.b STAG(%a6),%d1 # combine src tags
11095
11096 bne.w fadd_not_norm # optimize on non-norm input
11097
11098 #
11099 # ADD: norms and denorms
11100 #
11101 fadd_norm:
11102 bsr.l addsub_scaler2 # scale exponents
11103
11104 fadd_zero_entry:
11105 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
11106
11107 fmov.l &0x0,%fpsr # clear FPSR
11108 fmov.l L_SCR3(%a6),%fpcr # set FPCR
11109
11110 fadd.x FP_SCR0(%a6),%fp0 # execute add
11111
11112 fmov.l &0x0,%fpcr # clear FPCR
11113 fmov.l %fpsr,%d1 # fetch INEX2,N,Z
11114
11115 or.l %d1,USER_FPSR(%a6) # save exc and ccode bits
11116
11117 fbeq.w fadd_zero_exit # if result is zero, end now
11118
11119 mov.l %d2,-(%sp) # save d2
11120
11121 fmovm.x &0x01,-(%sp) # save result to stack
11122
11123 mov.w 2+L_SCR3(%a6),%d1
11124 lsr.b &0x6,%d1
11125
11126 mov.w (%sp),%d2 # fetch new sign, exp
11127 andi.l &0x7fff,%d2 # strip sign
11128 sub.l %d0,%d2 # add scale factor
11129
11130 cmp.l %d2,(tbl_fadd_ovfl.b,%pc,%d1.w*4) # is it an overflow?
11131 bge.b fadd_ovfl # yes
11132
11133 cmp.l %d2,(tbl_fadd_unfl.b,%pc,%d1.w*4) # is it an underflow?
11134 blt.w fadd_unfl # yes
11135 beq.w fadd_may_unfl # maybe; go find out
11136
11137 fadd_normal:
11138 mov.w (%sp),%d1
11139 andi.w &0x8000,%d1 # keep sign
11140 or.w %d2,%d1 # concat sign,new exp
11141 mov.w %d1,(%sp) # insert new exponent
11142
11143 fmovm.x (%sp)+,&0x80 # return result in fp0
11144
11145 mov.l (%sp)+,%d2 # restore d2
11146 rts
11147
11148 fadd_zero_exit:
11149 # fmov.s &0x00000000,%fp0 # return zero in fp0
11150 rts
11151
11152 tbl_fadd_ovfl:
11153 long 0x7fff # ext ovfl
11154 long 0x407f # sgl ovfl
11155 long 0x43ff # dbl ovfl
11156
11157 tbl_fadd_unfl:
11158 long 0x0000 # ext unfl
11159 long 0x3f81 # sgl unfl
11160 long 0x3c01 # dbl unfl
11161
11162 fadd_ovfl:
11163 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
11164
11165 mov.b FPCR_ENABLE(%a6),%d1
11166 andi.b &0x13,%d1 # is OVFL or INEX enabled?
11167 bne.b fadd_ovfl_ena # yes
11168
11169 add.l &0xc,%sp
11170 fadd_ovfl_dis:
11171 btst &neg_bit,FPSR_CC(%a6) # is result negative?
11172 sne %d1 # set sign param accordingly
11173 mov.l L_SCR3(%a6),%d0 # pass prec:rnd
11174 bsr.l ovf_res # calculate default result
11175 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
11176 fmovm.x (%a0),&0x80 # return default result in fp0
11177 mov.l (%sp)+,%d2 # restore d2
11178 rts
11179
11180 fadd_ovfl_ena:
11181 mov.b L_SCR3(%a6),%d1
11182 andi.b &0xc0,%d1 # is precision extended?
11183 bne.b fadd_ovfl_ena_sd # no; prec = sgl or dbl
11184
11185 fadd_ovfl_ena_cont:
11186 mov.w (%sp),%d1
11187 andi.w &0x8000,%d1 # keep sign
11188 subi.l &0x6000,%d2 # add extra bias
11189 andi.w &0x7fff,%d2
11190 or.w %d2,%d1 # concat sign,new exp
11191 mov.w %d1,(%sp) # insert new exponent
11192
11193 fmovm.x (%sp)+,&0x40 # return EXOP in fp1
11194 bra.b fadd_ovfl_dis
11195
11196 fadd_ovfl_ena_sd:
11197 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
11198
11199 mov.l L_SCR3(%a6),%d1
11200 andi.b &0x30,%d1 # keep rnd mode
11201 fmov.l %d1,%fpcr # set FPCR
11202
11203 fadd.x FP_SCR0(%a6),%fp0 # execute add
11204
11205 fmov.l &0x0,%fpcr # clear FPCR
11206
11207 add.l &0xc,%sp
11208 fmovm.x &0x01,-(%sp)
11209 bra.b fadd_ovfl_ena_cont
11210
11211 fadd_unfl:
11212 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
11213
11214 add.l &0xc,%sp
11215
11216 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
11217
11218 fmov.l &rz_mode*0x10,%fpcr # set FPCR
11219 fmov.l &0x0,%fpsr # clear FPSR
11220
11221 fadd.x FP_SCR0(%a6),%fp0 # execute add
11222
11223 fmov.l &0x0,%fpcr # clear FPCR
11224 fmov.l %fpsr,%d1 # save status
11225
11226 or.l %d1,USER_FPSR(%a6) # save INEX,N
11227
11228 mov.b FPCR_ENABLE(%a6),%d1
11229 andi.b &0x0b,%d1 # is UNFL or INEX enabled?
11230 bne.b fadd_unfl_ena # yes
11231
11232 fadd_unfl_dis:
11233 fmovm.x &0x80,FP_SCR0(%a6) # store out result
11234
11235 lea FP_SCR0(%a6),%a0 # pass: result addr
11236 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
11237 bsr.l unf_res # calculate default result
11238 or.b %d0,FPSR_CC(%a6) # 'Z' bit may have been set
11239 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
11240 mov.l (%sp)+,%d2 # restore d2
11241 rts
11242
11243 fadd_unfl_ena:
11244 fmovm.x FP_SCR1(%a6),&0x40 # load dst op
11245
11246 mov.l L_SCR3(%a6),%d1
11247 andi.b &0xc0,%d1 # is precision extended?
11248 bne.b fadd_unfl_ena_sd # no; sgl or dbl
11249
11250 fmov.l L_SCR3(%a6),%fpcr # set FPCR
11251
11252 fadd_unfl_ena_cont:
11253 fmov.l &0x0,%fpsr # clear FPSR
11254
11255 fadd.x FP_SCR0(%a6),%fp1 # execute multiply
11256
11257 fmov.l &0x0,%fpcr # clear FPCR
11258
11259 fmovm.x &0x40,FP_SCR0(%a6) # save result to stack
11260 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
11261 mov.l %d1,%d2 # make a copy
11262 andi.l &0x7fff,%d1 # strip sign
11263 andi.w &0x8000,%d2 # keep old sign
11264 sub.l %d0,%d1 # add scale factor
11265 addi.l &0x6000,%d1 # add new bias
11266 andi.w &0x7fff,%d1 # clear top bit
11267 or.w %d2,%d1 # concat sign,new exp
11268 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
11269 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
11270 bra.w fadd_unfl_dis
11271
11272 fadd_unfl_ena_sd:
11273 mov.l L_SCR3(%a6),%d1
11274 andi.b &0x30,%d1 # use only rnd mode
11275 fmov.l %d1,%fpcr # set FPCR
11276
11277 bra.b fadd_unfl_ena_cont
11278
11279 #
11280 # result is equal to the smallest normalized number in the selected precision
11281 # if the precision is extended, this result could not have come from an
11282 # underflow that rounded up.
11283 #
11284 fadd_may_unfl:
11285 mov.l L_SCR3(%a6),%d1
11286 andi.b &0xc0,%d1
11287 beq.w fadd_normal # yes; no underflow occurred
11288
11289 mov.l 0x4(%sp),%d1 # extract hi(man)
11290 cmpi.l %d1,&0x80000000 # is hi(man) = 0x80000000?
11291 bne.w fadd_normal # no; no underflow occurred
11292
11293 tst.l 0x8(%sp) # is lo(man) = 0x0?
11294 bne.w fadd_normal # no; no underflow occurred
11295
11296 btst &inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
11297 beq.w fadd_normal # no; no underflow occurred
11298
11299 #
11300 # ok, so now the result has a exponent equal to the smallest normalized
11301 # exponent for the selected precision. also, the mantissa is equal to
11302 # 0x8000000000000000 and this mantissa is the result of rounding non-zero
11303 # g,r,s.
11304 # now, we must determine whether the pre-rounded result was an underflow
11305 # rounded "up" or a normalized number rounded "down".
11306 # so, we do this be re-executing the add using RZ as the rounding mode and
11307 # seeing if the new result is smaller or equal to the current result.
11308 #
11309 fmovm.x FP_SCR1(%a6),&0x40 # load dst op into fp1
11310
11311 mov.l L_SCR3(%a6),%d1
11312 andi.b &0xc0,%d1 # keep rnd prec
11313 ori.b &rz_mode*0x10,%d1 # insert rnd mode
11314 fmov.l %d1,%fpcr # set FPCR
11315 fmov.l &0x0,%fpsr # clear FPSR
11316
11317 fadd.x FP_SCR0(%a6),%fp1 # execute add
11318
11319 fmov.l &0x0,%fpcr # clear FPCR
11320
11321 fabs.x %fp0 # compare absolute values
11322 fabs.x %fp1
11323 fcmp.x %fp0,%fp1 # is first result > second?
11324
11325 fbgt.w fadd_unfl # yes; it's an underflow
11326 bra.w fadd_normal # no; it's not an underflow
11327
11328 ##########################################################################
11329
11330 #
11331 # Add: inputs are not both normalized; what are they?
11332 #
11333 fadd_not_norm:
11334 mov.w (tbl_fadd_op.b,%pc,%d1.w*2),%d1
11335 jmp (tbl_fadd_op.b,%pc,%d1.w*1)
11336
11337 swbeg &48
11338 tbl_fadd_op:
11339 short fadd_norm - tbl_fadd_op # NORM + NORM
11340 short fadd_zero_src - tbl_fadd_op # NORM + ZERO
11341 short fadd_inf_src - tbl_fadd_op # NORM + INF
11342 short fadd_res_qnan - tbl_fadd_op # NORM + QNAN
11343 short fadd_norm - tbl_fadd_op # NORM + DENORM
11344 short fadd_res_snan - tbl_fadd_op # NORM + SNAN
11345 short tbl_fadd_op - tbl_fadd_op #
11346 short tbl_fadd_op - tbl_fadd_op #
11347
11348 short fadd_zero_dst - tbl_fadd_op # ZERO + NORM
11349 short fadd_zero_2 - tbl_fadd_op # ZERO + ZERO
11350 short fadd_inf_src - tbl_fadd_op # ZERO + INF
11351 short fadd_res_qnan - tbl_fadd_op # NORM + QNAN
11352 short fadd_zero_dst - tbl_fadd_op # ZERO + DENORM
11353 short fadd_res_snan - tbl_fadd_op # NORM + SNAN
11354 short tbl_fadd_op - tbl_fadd_op #
11355 short tbl_fadd_op - tbl_fadd_op #
11356
11357 short fadd_inf_dst - tbl_fadd_op # INF + NORM
11358 short fadd_inf_dst - tbl_fadd_op # INF + ZERO
11359 short fadd_inf_2 - tbl_fadd_op # INF + INF
11360 short fadd_res_qnan - tbl_fadd_op # NORM + QNAN
11361 short fadd_inf_dst - tbl_fadd_op # INF + DENORM
11362 short fadd_res_snan - tbl_fadd_op # NORM + SNAN
11363 short tbl_fadd_op - tbl_fadd_op #
11364 short tbl_fadd_op - tbl_fadd_op #
11365
11366 short fadd_res_qnan - tbl_fadd_op # QNAN + NORM
11367 short fadd_res_qnan - tbl_fadd_op # QNAN + ZERO
11368 short fadd_res_qnan - tbl_fadd_op # QNAN + INF
11369 short fadd_res_qnan - tbl_fadd_op # QNAN + QNAN
11370 short fadd_res_qnan - tbl_fadd_op # QNAN + DENORM
11371 short fadd_res_snan - tbl_fadd_op # QNAN + SNAN
11372 short tbl_fadd_op - tbl_fadd_op #
11373 short tbl_fadd_op - tbl_fadd_op #
11374
11375 short fadd_norm - tbl_fadd_op # DENORM + NORM
11376 short fadd_zero_src - tbl_fadd_op # DENORM + ZERO
11377 short fadd_inf_src - tbl_fadd_op # DENORM + INF
11378 short fadd_res_qnan - tbl_fadd_op # NORM + QNAN
11379 short fadd_norm - tbl_fadd_op # DENORM + DENORM
11380 short fadd_res_snan - tbl_fadd_op # NORM + SNAN
11381 short tbl_fadd_op - tbl_fadd_op #
11382 short tbl_fadd_op - tbl_fadd_op #
11383
11384 short fadd_res_snan - tbl_fadd_op # SNAN + NORM
11385 short fadd_res_snan - tbl_fadd_op # SNAN + ZERO
11386 short fadd_res_snan - tbl_fadd_op # SNAN + INF
11387 short fadd_res_snan - tbl_fadd_op # SNAN + QNAN
11388 short fadd_res_snan - tbl_fadd_op # SNAN + DENORM
11389 short fadd_res_snan - tbl_fadd_op # SNAN + SNAN
11390 short tbl_fadd_op - tbl_fadd_op #
11391 short tbl_fadd_op - tbl_fadd_op #
11392
11393 fadd_res_qnan:
11394 bra.l res_qnan
11395 fadd_res_snan:
11396 bra.l res_snan
11397
11398 #
11399 # both operands are ZEROes
11400 #
11401 fadd_zero_2:
11402 mov.b SRC_EX(%a0),%d0 # are the signs opposite
11403 mov.b DST_EX(%a1),%d1
11404 eor.b %d0,%d1
11405 bmi.w fadd_zero_2_chk_rm # weed out (-ZERO)+(+ZERO)
11406
11407 # the signs are the same. so determine whether they are positive or negative
11408 # and return the appropriately signed zero.
11409 tst.b %d0 # are ZEROes positive or negative?
11410 bmi.b fadd_zero_rm # negative
11411 fmov.s &0x00000000,%fp0 # return +ZERO
11412 mov.b &z_bmask,FPSR_CC(%a6) # set Z
11413 rts
11414
11415 #
11416 # the ZEROes have opposite signs:
11417 # - therefore, we return +ZERO if the rounding modes are RN,RZ, or RP.
11418 # - -ZERO is returned in the case of RM.
11419 #
11420 fadd_zero_2_chk_rm:
11421 mov.b 3+L_SCR3(%a6),%d1
11422 andi.b &0x30,%d1 # extract rnd mode
11423 cmpi.b %d1,&rm_mode*0x10 # is rnd mode == RM?
11424 beq.b fadd_zero_rm # yes
11425 fmov.s &0x00000000,%fp0 # return +ZERO
11426 mov.b &z_bmask,FPSR_CC(%a6) # set Z
11427 rts
11428
11429 fadd_zero_rm:
11430 fmov.s &0x80000000,%fp0 # return -ZERO
11431 mov.b &neg_bmask+z_bmask,FPSR_CC(%a6) # set NEG/Z
11432 rts
11433
11434 #
11435 # one operand is a ZERO and the other is a DENORM or NORM. scale
11436 # the DENORM or NORM and jump to the regular fadd routine.
11437 #
11438 fadd_zero_dst:
11439 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
11440 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
11441 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
11442 bsr.l scale_to_zero_src # scale the operand
11443 clr.w FP_SCR1_EX(%a6)
11444 clr.l FP_SCR1_HI(%a6)
11445 clr.l FP_SCR1_LO(%a6)
11446 bra.w fadd_zero_entry # go execute fadd
11447
11448 fadd_zero_src:
11449 mov.w DST_EX(%a1),FP_SCR1_EX(%a6)
11450 mov.l DST_HI(%a1),FP_SCR1_HI(%a6)
11451 mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
11452 bsr.l scale_to_zero_dst # scale the operand
11453 clr.w FP_SCR0_EX(%a6)
11454 clr.l FP_SCR0_HI(%a6)
11455 clr.l FP_SCR0_LO(%a6)
11456 bra.w fadd_zero_entry # go execute fadd
11457
11458 #
11459 # both operands are INFs. an OPERR will result if the INFs have
11460 # different signs. else, an INF of the same sign is returned
11461 #
11462 fadd_inf_2:
11463 mov.b SRC_EX(%a0),%d0 # exclusive or the signs
11464 mov.b DST_EX(%a1),%d1
11465 eor.b %d1,%d0
11466 bmi.l res_operr # weed out (-INF)+(+INF)
11467
11468 # ok, so it's not an OPERR. but, we do have to remember to return the
11469 # src INF since that's where the 881/882 gets the j-bit from...
11470
11471 #
11472 # operands are INF and one of {ZERO, INF, DENORM, NORM}
11473 #
11474 fadd_inf_src:
11475 fmovm.x SRC(%a0),&0x80 # return src INF
11476 tst.b SRC_EX(%a0) # is INF positive?
11477 bpl.b fadd_inf_done # yes; we're done
11478 mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
11479 rts
11480
11481 #
11482 # operands are INF and one of {ZERO, INF, DENORM, NORM}
11483 #
11484 fadd_inf_dst:
11485 fmovm.x DST(%a1),&0x80 # return dst INF
11486 tst.b DST_EX(%a1) # is INF positive?
11487 bpl.b fadd_inf_done # yes; we're done
11488 mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
11489 rts
11490
11491 fadd_inf_done:
11492 mov.b &inf_bmask,FPSR_CC(%a6) # set INF
11493 rts
11494
11495 #########################################################################
11496 # XDEF **************************************************************** #
11497 # fsub(): emulates the fsub instruction #
11498 # fssub(): emulates the fssub instruction #
11499 # fdsub(): emulates the fdsub instruction #
11500 # #
11501 # XREF **************************************************************** #
11502 # addsub_scaler2() - scale the operands so they won't take exc #
11503 # ovf_res() - return default overflow result #
11504 # unf_res() - return default underflow result #
11505 # res_qnan() - set QNAN result #
11506 # res_snan() - set SNAN result #
11507 # res_operr() - set OPERR result #
11508 # scale_to_zero_src() - set src operand exponent equal to zero #
11509 # scale_to_zero_dst() - set dst operand exponent equal to zero #
11510 # #
11511 # INPUT *************************************************************** #
11512 # a0 = pointer to extended precision source operand #
11513 # a1 = pointer to extended precision destination operand #
11514 # #
11515 # OUTPUT ************************************************************** #
11516 # fp0 = result #
11517 # fp1 = EXOP (if exception occurred) #
11518 # #
11519 # ALGORITHM *********************************************************** #
11520 # Handle NANs, infinities, and zeroes as special cases. Divide #
11521 # norms into extended, single, and double precision. #
11522 # Do subtraction after scaling exponents such that exception won't#
11523 # occur. Then, check result exponent to see if exception would have #
11524 # occurred. If so, return default result and maybe EXOP. Else, insert #
11525 # the correct result exponent and return. Set FPSR bits as appropriate. #
11526 # #
11527 #########################################################################
11528
11529 global fssub
11530 fssub:
11531 andi.b &0x30,%d0 # clear rnd prec
11532 ori.b &s_mode*0x10,%d0 # insert sgl prec
11533 bra.b fsub
11534
11535 global fdsub
11536 fdsub:
11537 andi.b &0x30,%d0 # clear rnd prec
11538 ori.b &d_mode*0x10,%d0 # insert dbl prec
11539
11540 global fsub
11541 fsub:
11542 mov.l %d0,L_SCR3(%a6) # store rnd info
11543
11544 clr.w %d1
11545 mov.b DTAG(%a6),%d1
11546 lsl.b &0x3,%d1
11547 or.b STAG(%a6),%d1 # combine src tags
11548
11549 bne.w fsub_not_norm # optimize on non-norm input
11550
11551 #
11552 # SUB: norms and denorms
11553 #
11554 fsub_norm:
11555 bsr.l addsub_scaler2 # scale exponents
11556
11557 fsub_zero_entry:
11558 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
11559
11560 fmov.l &0x0,%fpsr # clear FPSR
11561 fmov.l L_SCR3(%a6),%fpcr # set FPCR
11562
11563 fsub.x FP_SCR0(%a6),%fp0 # execute subtract
11564
11565 fmov.l &0x0,%fpcr # clear FPCR
11566 fmov.l %fpsr,%d1 # fetch INEX2, N, Z
11567
11568 or.l %d1,USER_FPSR(%a6) # save exc and ccode bits
11569
11570 fbeq.w fsub_zero_exit # if result zero, end now
11571
11572 mov.l %d2,-(%sp) # save d2
11573
11574 fmovm.x &0x01,-(%sp) # save result to stack
11575
11576 mov.w 2+L_SCR3(%a6),%d1
11577 lsr.b &0x6,%d1
11578
11579 mov.w (%sp),%d2 # fetch new exponent
11580 andi.l &0x7fff,%d2 # strip sign
11581 sub.l %d0,%d2 # add scale factor
11582
11583 cmp.l %d2,(tbl_fsub_ovfl.b,%pc,%d1.w*4) # is it an overflow?
11584 bge.b fsub_ovfl # yes
11585
11586 cmp.l %d2,(tbl_fsub_unfl.b,%pc,%d1.w*4) # is it an underflow?
11587 blt.w fsub_unfl # yes
11588 beq.w fsub_may_unfl # maybe; go find out
11589
11590 fsub_normal:
11591 mov.w (%sp),%d1
11592 andi.w &0x8000,%d1 # keep sign
11593 or.w %d2,%d1 # insert new exponent
11594 mov.w %d1,(%sp) # insert new exponent
11595
11596 fmovm.x (%sp)+,&0x80 # return result in fp0
11597
11598 mov.l (%sp)+,%d2 # restore d2
11599 rts
11600
11601 fsub_zero_exit:
11602 # fmov.s &0x00000000,%fp0 # return zero in fp0
11603 rts
11604
11605 tbl_fsub_ovfl:
11606 long 0x7fff # ext ovfl
11607 long 0x407f # sgl ovfl
11608 long 0x43ff # dbl ovfl
11609
11610 tbl_fsub_unfl:
11611 long 0x0000 # ext unfl
11612 long 0x3f81 # sgl unfl
11613 long 0x3c01 # dbl unfl
11614
11615 fsub_ovfl:
11616 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
11617
11618 mov.b FPCR_ENABLE(%a6),%d1
11619 andi.b &0x13,%d1 # is OVFL or INEX enabled?
11620 bne.b fsub_ovfl_ena # yes
11621
11622 add.l &0xc,%sp
11623 fsub_ovfl_dis:
11624 btst &neg_bit,FPSR_CC(%a6) # is result negative?
11625 sne %d1 # set sign param accordingly
11626 mov.l L_SCR3(%a6),%d0 # pass prec:rnd
11627 bsr.l ovf_res # calculate default result
11628 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
11629 fmovm.x (%a0),&0x80 # return default result in fp0
11630 mov.l (%sp)+,%d2 # restore d2
11631 rts
11632
11633 fsub_ovfl_ena:
11634 mov.b L_SCR3(%a6),%d1
11635 andi.b &0xc0,%d1 # is precision extended?
11636 bne.b fsub_ovfl_ena_sd # no
11637
11638 fsub_ovfl_ena_cont:
11639 mov.w (%sp),%d1 # fetch {sgn,exp}
11640 andi.w &0x8000,%d1 # keep sign
11641 subi.l &0x6000,%d2 # subtract new bias
11642 andi.w &0x7fff,%d2 # clear top bit
11643 or.w %d2,%d1 # concat sign,exp
11644 mov.w %d1,(%sp) # insert new exponent
11645
11646 fmovm.x (%sp)+,&0x40 # return EXOP in fp1
11647 bra.b fsub_ovfl_dis
11648
11649 fsub_ovfl_ena_sd:
11650 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
11651
11652 mov.l L_SCR3(%a6),%d1
11653 andi.b &0x30,%d1 # clear rnd prec
11654 fmov.l %d1,%fpcr # set FPCR
11655
11656 fsub.x FP_SCR0(%a6),%fp0 # execute subtract
11657
11658 fmov.l &0x0,%fpcr # clear FPCR
11659
11660 add.l &0xc,%sp
11661 fmovm.x &0x01,-(%sp)
11662 bra.b fsub_ovfl_ena_cont
11663
11664 fsub_unfl:
11665 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
11666
11667 add.l &0xc,%sp
11668
11669 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
11670
11671 fmov.l &rz_mode*0x10,%fpcr # set FPCR
11672 fmov.l &0x0,%fpsr # clear FPSR
11673
11674 fsub.x FP_SCR0(%a6),%fp0 # execute subtract
11675
11676 fmov.l &0x0,%fpcr # clear FPCR
11677 fmov.l %fpsr,%d1 # save status
11678
11679 or.l %d1,USER_FPSR(%a6)
11680
11681 mov.b FPCR_ENABLE(%a6),%d1
11682 andi.b &0x0b,%d1 # is UNFL or INEX enabled?
11683 bne.b fsub_unfl_ena # yes
11684
11685 fsub_unfl_dis:
11686 fmovm.x &0x80,FP_SCR0(%a6) # store out result
11687
11688 lea FP_SCR0(%a6),%a0 # pass: result addr
11689 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
11690 bsr.l unf_res # calculate default result
11691 or.b %d0,FPSR_CC(%a6) # 'Z' may have been set
11692 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
11693 mov.l (%sp)+,%d2 # restore d2
11694 rts
11695
11696 fsub_unfl_ena:
11697 fmovm.x FP_SCR1(%a6),&0x40
11698
11699 mov.l L_SCR3(%a6),%d1
11700 andi.b &0xc0,%d1 # is precision extended?
11701 bne.b fsub_unfl_ena_sd # no
11702
11703 fmov.l L_SCR3(%a6),%fpcr # set FPCR
11704
11705 fsub_unfl_ena_cont:
11706 fmov.l &0x0,%fpsr # clear FPSR
11707
11708 fsub.x FP_SCR0(%a6),%fp1 # execute subtract
11709
11710 fmov.l &0x0,%fpcr # clear FPCR
11711
11712 fmovm.x &0x40,FP_SCR0(%a6) # store result to stack
11713 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
11714 mov.l %d1,%d2 # make a copy
11715 andi.l &0x7fff,%d1 # strip sign
11716 andi.w &0x8000,%d2 # keep old sign
11717 sub.l %d0,%d1 # add scale factor
11718 addi.l &0x6000,%d1 # subtract new bias
11719 andi.w &0x7fff,%d1 # clear top bit
11720 or.w %d2,%d1 # concat sgn,exp
11721 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
11722 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
11723 bra.w fsub_unfl_dis
11724
11725 fsub_unfl_ena_sd:
11726 mov.l L_SCR3(%a6),%d1
11727 andi.b &0x30,%d1 # clear rnd prec
11728 fmov.l %d1,%fpcr # set FPCR
11729
11730 bra.b fsub_unfl_ena_cont
11731
11732 #
11733 # result is equal to the smallest normalized number in the selected precision
11734 # if the precision is extended, this result could not have come from an
11735 # underflow that rounded up.
11736 #
11737 fsub_may_unfl:
11738 mov.l L_SCR3(%a6),%d1
11739 andi.b &0xc0,%d1 # fetch rnd prec
11740 beq.w fsub_normal # yes; no underflow occurred
11741
11742 mov.l 0x4(%sp),%d1
11743 cmpi.l %d1,&0x80000000 # is hi(man) = 0x80000000?
11744 bne.w fsub_normal # no; no underflow occurred
11745
11746 tst.l 0x8(%sp) # is lo(man) = 0x0?
11747 bne.w fsub_normal # no; no underflow occurred
11748
11749 btst &inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
11750 beq.w fsub_normal # no; no underflow occurred
11751
11752 #
11753 # ok, so now the result has a exponent equal to the smallest normalized
11754 # exponent for the selected precision. also, the mantissa is equal to
11755 # 0x8000000000000000 and this mantissa is the result of rounding non-zero
11756 # g,r,s.
11757 # now, we must determine whether the pre-rounded result was an underflow
11758 # rounded "up" or a normalized number rounded "down".
11759 # so, we do this be re-executing the add using RZ as the rounding mode and
11760 # seeing if the new result is smaller or equal to the current result.
11761 #
11762 fmovm.x FP_SCR1(%a6),&0x40 # load dst op into fp1
11763
11764 mov.l L_SCR3(%a6),%d1
11765 andi.b &0xc0,%d1 # keep rnd prec
11766 ori.b &rz_mode*0x10,%d1 # insert rnd mode
11767 fmov.l %d1,%fpcr # set FPCR
11768 fmov.l &0x0,%fpsr # clear FPSR
11769
11770 fsub.x FP_SCR0(%a6),%fp1 # execute subtract
11771
11772 fmov.l &0x0,%fpcr # clear FPCR
11773
11774 fabs.x %fp0 # compare absolute values
11775 fabs.x %fp1
11776 fcmp.x %fp0,%fp1 # is first result > second?
11777
11778 fbgt.w fsub_unfl # yes; it's an underflow
11779 bra.w fsub_normal # no; it's not an underflow
11780
11781 ##########################################################################
11782
11783 #
11784 # Sub: inputs are not both normalized; what are they?
11785 #
11786 fsub_not_norm:
11787 mov.w (tbl_fsub_op.b,%pc,%d1.w*2),%d1
11788 jmp (tbl_fsub_op.b,%pc,%d1.w*1)
11789
11790 swbeg &48
11791 tbl_fsub_op:
11792 short fsub_norm - tbl_fsub_op # NORM - NORM
11793 short fsub_zero_src - tbl_fsub_op # NORM - ZERO
11794 short fsub_inf_src - tbl_fsub_op # NORM - INF
11795 short fsub_res_qnan - tbl_fsub_op # NORM - QNAN
11796 short fsub_norm - tbl_fsub_op # NORM - DENORM
11797 short fsub_res_snan - tbl_fsub_op # NORM - SNAN
11798 short tbl_fsub_op - tbl_fsub_op #
11799 short tbl_fsub_op - tbl_fsub_op #
11800
11801 short fsub_zero_dst - tbl_fsub_op # ZERO - NORM
11802 short fsub_zero_2 - tbl_fsub_op # ZERO - ZERO
11803 short fsub_inf_src - tbl_fsub_op # ZERO - INF
11804 short fsub_res_qnan - tbl_fsub_op # NORM - QNAN
11805 short fsub_zero_dst - tbl_fsub_op # ZERO - DENORM
11806 short fsub_res_snan - tbl_fsub_op # NORM - SNAN
11807 short tbl_fsub_op - tbl_fsub_op #
11808 short tbl_fsub_op - tbl_fsub_op #
11809
11810 short fsub_inf_dst - tbl_fsub_op # INF - NORM
11811 short fsub_inf_dst - tbl_fsub_op # INF - ZERO
11812 short fsub_inf_2 - tbl_fsub_op # INF - INF
11813 short fsub_res_qnan - tbl_fsub_op # NORM - QNAN
11814 short fsub_inf_dst - tbl_fsub_op # INF - DENORM
11815 short fsub_res_snan - tbl_fsub_op # NORM - SNAN
11816 short tbl_fsub_op - tbl_fsub_op #
11817 short tbl_fsub_op - tbl_fsub_op #
11818
11819 short fsub_res_qnan - tbl_fsub_op # QNAN - NORM
11820 short fsub_res_qnan - tbl_fsub_op # QNAN - ZERO
11821 short fsub_res_qnan - tbl_fsub_op # QNAN - INF
11822 short fsub_res_qnan - tbl_fsub_op # QNAN - QNAN
11823 short fsub_res_qnan - tbl_fsub_op # QNAN - DENORM
11824 short fsub_res_snan - tbl_fsub_op # QNAN - SNAN
11825 short tbl_fsub_op - tbl_fsub_op #
11826 short tbl_fsub_op - tbl_fsub_op #
11827
11828 short fsub_norm - tbl_fsub_op # DENORM - NORM
11829 short fsub_zero_src - tbl_fsub_op # DENORM - ZERO
11830 short fsub_inf_src - tbl_fsub_op # DENORM - INF
11831 short fsub_res_qnan - tbl_fsub_op # NORM - QNAN
11832 short fsub_norm - tbl_fsub_op # DENORM - DENORM
11833 short fsub_res_snan - tbl_fsub_op # NORM - SNAN
11834 short tbl_fsub_op - tbl_fsub_op #
11835 short tbl_fsub_op - tbl_fsub_op #
11836
11837 short fsub_res_snan - tbl_fsub_op # SNAN - NORM
11838 short fsub_res_snan - tbl_fsub_op # SNAN - ZERO
11839 short fsub_res_snan - tbl_fsub_op # SNAN - INF
11840 short fsub_res_snan - tbl_fsub_op # SNAN - QNAN
11841 short fsub_res_snan - tbl_fsub_op # SNAN - DENORM
11842 short fsub_res_snan - tbl_fsub_op # SNAN - SNAN
11843 short tbl_fsub_op - tbl_fsub_op #
11844 short tbl_fsub_op - tbl_fsub_op #
11845
11846 fsub_res_qnan:
11847 bra.l res_qnan
11848 fsub_res_snan:
11849 bra.l res_snan
11850
11851 #
11852 # both operands are ZEROes
11853 #
11854 fsub_zero_2:
11855 mov.b SRC_EX(%a0),%d0
11856 mov.b DST_EX(%a1),%d1
11857 eor.b %d1,%d0
11858 bpl.b fsub_zero_2_chk_rm
11859
11860 # the signs are opposite, so, return a ZERO w/ the sign of the dst ZERO
11861 tst.b %d0 # is dst negative?
11862 bmi.b fsub_zero_2_rm # yes
11863 fmov.s &0x00000000,%fp0 # no; return +ZERO
11864 mov.b &z_bmask,FPSR_CC(%a6) # set Z
11865 rts
11866
11867 #
11868 # the ZEROes have the same signs:
11869 # - therefore, we return +ZERO if the rounding mode is RN,RZ, or RP
11870 # - -ZERO is returned in the case of RM.
11871 #
11872 fsub_zero_2_chk_rm:
11873 mov.b 3+L_SCR3(%a6),%d1
11874 andi.b &0x30,%d1 # extract rnd mode
11875 cmpi.b %d1,&rm_mode*0x10 # is rnd mode = RM?
11876 beq.b fsub_zero_2_rm # yes
11877 fmov.s &0x00000000,%fp0 # no; return +ZERO
11878 mov.b &z_bmask,FPSR_CC(%a6) # set Z
11879 rts
11880
11881 fsub_zero_2_rm:
11882 fmov.s &0x80000000,%fp0 # return -ZERO
11883 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/NEG
11884 rts
11885
11886 #
11887 # one operand is a ZERO and the other is a DENORM or a NORM.
11888 # scale the DENORM or NORM and jump to the regular fsub routine.
11889 #
11890 fsub_zero_dst:
11891 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
11892 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
11893 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
11894 bsr.l scale_to_zero_src # scale the operand
11895 clr.w FP_SCR1_EX(%a6)
11896 clr.l FP_SCR1_HI(%a6)
11897 clr.l FP_SCR1_LO(%a6)
11898 bra.w fsub_zero_entry # go execute fsub
11899
11900 fsub_zero_src:
11901 mov.w DST_EX(%a1),FP_SCR1_EX(%a6)
11902 mov.l DST_HI(%a1),FP_SCR1_HI(%a6)
11903 mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
11904 bsr.l scale_to_zero_dst # scale the operand
11905 clr.w FP_SCR0_EX(%a6)
11906 clr.l FP_SCR0_HI(%a6)
11907 clr.l FP_SCR0_LO(%a6)
11908 bra.w fsub_zero_entry # go execute fsub
11909
11910 #
11911 # both operands are INFs. an OPERR will result if the INFs have the
11912 # same signs. else,
11913 #
11914 fsub_inf_2:
11915 mov.b SRC_EX(%a0),%d0 # exclusive or the signs
11916 mov.b DST_EX(%a1),%d1
11917 eor.b %d1,%d0
11918 bpl.l res_operr # weed out (-INF)+(+INF)
11919
11920 # ok, so it's not an OPERR. but we do have to remember to return
11921 # the src INF since that's where the 881/882 gets the j-bit.
11922
11923 fsub_inf_src:
11924 fmovm.x SRC(%a0),&0x80 # return src INF
11925 fneg.x %fp0 # invert sign
11926 fbge.w fsub_inf_done # sign is now positive
11927 mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
11928 rts
11929
11930 fsub_inf_dst:
11931 fmovm.x DST(%a1),&0x80 # return dst INF
11932 tst.b DST_EX(%a1) # is INF negative?
11933 bpl.b fsub_inf_done # no
11934 mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
11935 rts
11936
11937 fsub_inf_done:
11938 mov.b &inf_bmask,FPSR_CC(%a6) # set INF
11939 rts
11940
11941 #########################################################################
11942 # XDEF **************************************************************** #
11943 # fsqrt(): emulates the fsqrt instruction #
11944 # fssqrt(): emulates the fssqrt instruction #
11945 # fdsqrt(): emulates the fdsqrt instruction #
11946 # #
11947 # XREF **************************************************************** #
11948 # scale_sqrt() - scale the source operand #
11949 # unf_res() - return default underflow result #
11950 # ovf_res() - return default overflow result #
11951 # res_qnan_1op() - return QNAN result #
11952 # res_snan_1op() - return SNAN result #
11953 # #
11954 # INPUT *************************************************************** #
11955 # a0 = pointer to extended precision source operand #
11956 # d0 rnd prec,mode #
11957 # #
11958 # OUTPUT ************************************************************** #
11959 # fp0 = result #
11960 # fp1 = EXOP (if exception occurred) #
11961 # #
11962 # ALGORITHM *********************************************************** #
11963 # Handle NANs, infinities, and zeroes as special cases. Divide #
11964 # norms/denorms into ext/sgl/dbl precision. #
11965 # For norms/denorms, scale the exponents such that a sqrt #
11966 # instruction won't cause an exception. Use the regular fsqrt to #
11967 # compute a result. Check if the regular operands would have taken #
11968 # an exception. If so, return the default overflow/underflow result #
11969 # and return the EXOP if exceptions are enabled. Else, scale the #
11970 # result operand to the proper exponent. #
11971 # #
11972 #########################################################################
11973
11974 global fssqrt
11975 fssqrt:
11976 andi.b &0x30,%d0 # clear rnd prec
11977 ori.b &s_mode*0x10,%d0 # insert sgl precision
11978 bra.b fsqrt
11979
11980 global fdsqrt
11981 fdsqrt:
11982 andi.b &0x30,%d0 # clear rnd prec
11983 ori.b &d_mode*0x10,%d0 # insert dbl precision
11984
11985 global fsqrt
11986 fsqrt:
11987 mov.l %d0,L_SCR3(%a6) # store rnd info
11988 clr.w %d1
11989 mov.b STAG(%a6),%d1
11990 bne.w fsqrt_not_norm # optimize on non-norm input
11991
11992 #
11993 # SQUARE ROOT: norms and denorms ONLY!
11994 #
11995 fsqrt_norm:
11996 tst.b SRC_EX(%a0) # is operand negative?
11997 bmi.l res_operr # yes
11998
11999 andi.b &0xc0,%d0 # is precision extended?
12000 bne.b fsqrt_not_ext # no; go handle sgl or dbl
12001
12002 fmov.l L_SCR3(%a6),%fpcr # set FPCR
12003 fmov.l &0x0,%fpsr # clear FPSR
12004
12005 fsqrt.x (%a0),%fp0 # execute square root
12006
12007 fmov.l %fpsr,%d1
12008 or.l %d1,USER_FPSR(%a6) # set N,INEX
12009
12010 rts
12011
12012 fsqrt_denorm:
12013 tst.b SRC_EX(%a0) # is operand negative?
12014 bmi.l res_operr # yes
12015
12016 andi.b &0xc0,%d0 # is precision extended?
12017 bne.b fsqrt_not_ext # no; go handle sgl or dbl
12018
12019 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
12020 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
12021 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
12022
12023 bsr.l scale_sqrt # calculate scale factor
12024
12025 bra.w fsqrt_sd_normal
12026
12027 #
12028 # operand is either single or double
12029 #
12030 fsqrt_not_ext:
12031 cmpi.b %d0,&s_mode*0x10 # separate sgl/dbl prec
12032 bne.w fsqrt_dbl
12033
12034 #
12035 # operand is to be rounded to single precision
12036 #
12037 fsqrt_sgl:
12038 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
12039 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
12040 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
12041
12042 bsr.l scale_sqrt # calculate scale factor
12043
12044 cmpi.l %d0,&0x3fff-0x3f81 # will move in underflow?
12045 beq.w fsqrt_sd_may_unfl
12046 bgt.w fsqrt_sd_unfl # yes; go handle underflow
12047 cmpi.l %d0,&0x3fff-0x407f # will move in overflow?
12048 beq.w fsqrt_sd_may_ovfl # maybe; go check
12049 blt.w fsqrt_sd_ovfl # yes; go handle overflow
12050
12051 #
12052 # operand will NOT overflow or underflow when moved in to the fp reg file
12053 #
12054 fsqrt_sd_normal:
12055 fmov.l &0x0,%fpsr # clear FPSR
12056 fmov.l L_SCR3(%a6),%fpcr # set FPCR
12057
12058 fsqrt.x FP_SCR0(%a6),%fp0 # perform absolute
12059
12060 fmov.l %fpsr,%d1 # save FPSR
12061 fmov.l &0x0,%fpcr # clear FPCR
12062
12063 or.l %d1,USER_FPSR(%a6) # save INEX2,N
12064
12065 fsqrt_sd_normal_exit:
12066 mov.l %d2,-(%sp) # save d2
12067 fmovm.x &0x80,FP_SCR0(%a6) # store out result
12068 mov.w FP_SCR0_EX(%a6),%d1 # load sgn,exp
12069 mov.l %d1,%d2 # make a copy
12070 andi.l &0x7fff,%d1 # strip sign
12071 sub.l %d0,%d1 # add scale factor
12072 andi.w &0x8000,%d2 # keep old sign
12073 or.w %d1,%d2 # concat old sign,new exp
12074 mov.w %d2,FP_SCR0_EX(%a6) # insert new exponent
12075 mov.l (%sp)+,%d2 # restore d2
12076 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
12077 rts
12078
12079 #
12080 # operand is to be rounded to double precision
12081 #
12082 fsqrt_dbl:
12083 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
12084 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
12085 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
12086
12087 bsr.l scale_sqrt # calculate scale factor
12088
12089 cmpi.l %d0,&0x3fff-0x3c01 # will move in underflow?
12090 beq.w fsqrt_sd_may_unfl
12091 bgt.b fsqrt_sd_unfl # yes; go handle underflow
12092 cmpi.l %d0,&0x3fff-0x43ff # will move in overflow?
12093 beq.w fsqrt_sd_may_ovfl # maybe; go check
12094 blt.w fsqrt_sd_ovfl # yes; go handle overflow
12095 bra.w fsqrt_sd_normal # no; ho handle normalized op
12096
12097 # we're on the line here and the distinguising characteristic is whether
12098 # the exponent is 3fff or 3ffe. if it's 3ffe, then it's a safe number
12099 # elsewise fall through to underflow.
12100 fsqrt_sd_may_unfl:
12101 btst &0x0,1+FP_SCR0_EX(%a6) # is exponent 0x3fff?
12102 bne.w fsqrt_sd_normal # yes, so no underflow
12103
12104 #
12105 # operand WILL underflow when moved in to the fp register file
12106 #
12107 fsqrt_sd_unfl:
12108 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
12109
12110 fmov.l &rz_mode*0x10,%fpcr # set FPCR
12111 fmov.l &0x0,%fpsr # clear FPSR
12112
12113 fsqrt.x FP_SCR0(%a6),%fp0 # execute square root
12114
12115 fmov.l %fpsr,%d1 # save status
12116 fmov.l &0x0,%fpcr # clear FPCR
12117
12118 or.l %d1,USER_FPSR(%a6) # save INEX2,N
12119
12120 # if underflow or inexact is enabled, go calculate EXOP first.
12121 mov.b FPCR_ENABLE(%a6),%d1
12122 andi.b &0x0b,%d1 # is UNFL or INEX enabled?
12123 bne.b fsqrt_sd_unfl_ena # yes
12124
12125 fsqrt_sd_unfl_dis:
12126 fmovm.x &0x80,FP_SCR0(%a6) # store out result
12127
12128 lea FP_SCR0(%a6),%a0 # pass: result addr
12129 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
12130 bsr.l unf_res # calculate default result
12131 or.b %d0,FPSR_CC(%a6) # set possible 'Z' ccode
12132 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
12133 rts
12134
12135 #
12136 # operand will underflow AND underflow is enabled.
12137 # therefore, we must return the result rounded to extended precision.
12138 #
12139 fsqrt_sd_unfl_ena:
12140 mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
12141 mov.l FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
12142 mov.w FP_SCR0_EX(%a6),%d1 # load current exponent
12143
12144 mov.l %d2,-(%sp) # save d2
12145 mov.l %d1,%d2 # make a copy
12146 andi.l &0x7fff,%d1 # strip sign
12147 andi.w &0x8000,%d2 # keep old sign
12148 sub.l %d0,%d1 # subtract scale factor
12149 addi.l &0x6000,%d1 # add new bias
12150 andi.w &0x7fff,%d1
12151 or.w %d2,%d1 # concat new sign,new exp
12152 mov.w %d1,FP_SCR1_EX(%a6) # insert new exp
12153 fmovm.x FP_SCR1(%a6),&0x40 # return EXOP in fp1
12154 mov.l (%sp)+,%d2 # restore d2
12155 bra.b fsqrt_sd_unfl_dis
12156
12157 #
12158 # operand WILL overflow.
12159 #
12160 fsqrt_sd_ovfl:
12161 fmov.l &0x0,%fpsr # clear FPSR
12162 fmov.l L_SCR3(%a6),%fpcr # set FPCR
12163
12164 fsqrt.x FP_SCR0(%a6),%fp0 # perform square root
12165
12166 fmov.l &0x0,%fpcr # clear FPCR
12167 fmov.l %fpsr,%d1 # save FPSR
12168
12169 or.l %d1,USER_FPSR(%a6) # save INEX2,N
12170
12171 fsqrt_sd_ovfl_tst:
12172 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
12173
12174 mov.b FPCR_ENABLE(%a6),%d1
12175 andi.b &0x13,%d1 # is OVFL or INEX enabled?
12176 bne.b fsqrt_sd_ovfl_ena # yes
12177
12178 #
12179 # OVFL is not enabled; therefore, we must create the default result by
12180 # calling ovf_res().
12181 #
12182 fsqrt_sd_ovfl_dis:
12183 btst &neg_bit,FPSR_CC(%a6) # is result negative?
12184 sne %d1 # set sign param accordingly
12185 mov.l L_SCR3(%a6),%d0 # pass: prec,mode
12186 bsr.l ovf_res # calculate default result
12187 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
12188 fmovm.x (%a0),&0x80 # return default result in fp0
12189 rts
12190
12191 #
12192 # OVFL is enabled.
12193 # the INEX2 bit has already been updated by the round to the correct precision.
12194 # now, round to extended(and don't alter the FPSR).
12195 #
12196 fsqrt_sd_ovfl_ena:
12197 mov.l %d2,-(%sp) # save d2
12198 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
12199 mov.l %d1,%d2 # make a copy
12200 andi.l &0x7fff,%d1 # strip sign
12201 andi.w &0x8000,%d2 # keep old sign
12202 sub.l %d0,%d1 # add scale factor
12203 subi.l &0x6000,%d1 # subtract bias
12204 andi.w &0x7fff,%d1
12205 or.w %d2,%d1 # concat sign,exp
12206 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
12207 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
12208 mov.l (%sp)+,%d2 # restore d2
12209 bra.b fsqrt_sd_ovfl_dis
12210
12211 #
12212 # the move in MAY underflow. so...
12213 #
12214 fsqrt_sd_may_ovfl:
12215 btst &0x0,1+FP_SCR0_EX(%a6) # is exponent 0x3fff?
12216 bne.w fsqrt_sd_ovfl # yes, so overflow
12217
12218 fmov.l &0x0,%fpsr # clear FPSR
12219 fmov.l L_SCR3(%a6),%fpcr # set FPCR
12220
12221 fsqrt.x FP_SCR0(%a6),%fp0 # perform absolute
12222
12223 fmov.l %fpsr,%d1 # save status
12224 fmov.l &0x0,%fpcr # clear FPCR
12225
12226 or.l %d1,USER_FPSR(%a6) # save INEX2,N
12227
12228 fmov.x %fp0,%fp1 # make a copy of result
12229 fcmp.b %fp1,&0x1 # is |result| >= 1.b?
12230 fbge.w fsqrt_sd_ovfl_tst # yes; overflow has occurred
12231
12232 # no, it didn't overflow; we have correct result
12233 bra.w fsqrt_sd_normal_exit
12234
12235 ##########################################################################
12236
12237 #
12238 # input is not normalized; what is it?
12239 #
12240 fsqrt_not_norm:
12241 cmpi.b %d1,&DENORM # weed out DENORM
12242 beq.w fsqrt_denorm
12243 cmpi.b %d1,&ZERO # weed out ZERO
12244 beq.b fsqrt_zero
12245 cmpi.b %d1,&INF # weed out INF
12246 beq.b fsqrt_inf
12247 cmpi.b %d1,&SNAN # weed out SNAN
12248 beq.l res_snan_1op
12249 bra.l res_qnan_1op
12250
12251 #
12252 # fsqrt(+0) = +0
12253 # fsqrt(-0) = -0
12254 # fsqrt(+INF) = +INF
12255 # fsqrt(-INF) = OPERR
12256 #
12257 fsqrt_zero:
12258 tst.b SRC_EX(%a0) # is ZERO positive or negative?
12259 bmi.b fsqrt_zero_m # negative
12260 fsqrt_zero_p:
12261 fmov.s &0x00000000,%fp0 # return +ZERO
12262 mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
12263 rts
12264 fsqrt_zero_m:
12265 fmov.s &0x80000000,%fp0 # return -ZERO
12266 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
12267 rts
12268
12269 fsqrt_inf:
12270 tst.b SRC_EX(%a0) # is INF positive or negative?
12271 bmi.l res_operr # negative
12272 fsqrt_inf_p:
12273 fmovm.x SRC(%a0),&0x80 # return +INF in fp0
12274 mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
12275 rts
12276
12277 #########################################################################
12278 # XDEF **************************************************************** #
12279 # fetch_dreg(): fetch register according to index in d1 #
12280 # #
12281 # XREF **************************************************************** #
12282 # None #
12283 # #
12284 # INPUT *************************************************************** #
12285 # d1 = index of register to fetch from #
12286 # #
12287 # OUTPUT ************************************************************** #
12288 # d0 = value of register fetched #
12289 # #
12290 # ALGORITHM *********************************************************** #
12291 # According to the index value in d1 which can range from zero #
12292 # to fifteen, load the corresponding register file value (where #
12293 # address register indexes start at 8). D0/D1/A0/A1/A6/A7 are on the #
12294 # stack. The rest should still be in their original places. #
12295 # #
12296 #########################################################################
12297
12298 # this routine leaves d1 intact for subsequent store_dreg calls.
12299 global fetch_dreg
12300 fetch_dreg:
12301 mov.w (tbl_fdreg.b,%pc,%d1.w*2),%d0
12302 jmp (tbl_fdreg.b,%pc,%d0.w*1)
12303
12304 tbl_fdreg:
12305 short fdreg0 - tbl_fdreg
12306 short fdreg1 - tbl_fdreg
12307 short fdreg2 - tbl_fdreg
12308 short fdreg3 - tbl_fdreg
12309 short fdreg4 - tbl_fdreg
12310 short fdreg5 - tbl_fdreg
12311 short fdreg6 - tbl_fdreg
12312 short fdreg7 - tbl_fdreg
12313 short fdreg8 - tbl_fdreg
12314 short fdreg9 - tbl_fdreg
12315 short fdrega - tbl_fdreg
12316 short fdregb - tbl_fdreg
12317 short fdregc - tbl_fdreg
12318 short fdregd - tbl_fdreg
12319 short fdrege - tbl_fdreg
12320 short fdregf - tbl_fdreg
12321
12322 fdreg0:
12323 mov.l EXC_DREGS+0x0(%a6),%d0
12324 rts
12325 fdreg1:
12326 mov.l EXC_DREGS+0x4(%a6),%d0
12327 rts
12328 fdreg2:
12329 mov.l %d2,%d0
12330 rts
12331 fdreg3:
12332 mov.l %d3,%d0
12333 rts
12334 fdreg4:
12335 mov.l %d4,%d0
12336 rts
12337 fdreg5:
12338 mov.l %d5,%d0
12339 rts
12340 fdreg6:
12341 mov.l %d6,%d0
12342 rts
12343 fdreg7:
12344 mov.l %d7,%d0
12345 rts
12346 fdreg8:
12347 mov.l EXC_DREGS+0x8(%a6),%d0
12348 rts
12349 fdreg9:
12350 mov.l EXC_DREGS+0xc(%a6),%d0
12351 rts
12352 fdrega:
12353 mov.l %a2,%d0
12354 rts
12355 fdregb:
12356 mov.l %a3,%d0
12357 rts
12358 fdregc:
12359 mov.l %a4,%d0
12360 rts
12361 fdregd:
12362 mov.l %a5,%d0
12363 rts
12364 fdrege:
12365 mov.l (%a6),%d0
12366 rts
12367 fdregf:
12368 mov.l EXC_A7(%a6),%d0
12369 rts
12370
12371 #########################################################################
12372 # XDEF **************************************************************** #
12373 # store_dreg_l(): store longword to data register specified by d1 #
12374 # #
12375 # XREF **************************************************************** #
12376 # None #
12377 # #
12378 # INPUT *************************************************************** #
12379 # d0 = longowrd value to store #
12380 # d1 = index of register to fetch from #
12381 # #
12382 # OUTPUT ************************************************************** #
12383 # (data register is updated) #
12384 # #
12385 # ALGORITHM *********************************************************** #
12386 # According to the index value in d1, store the longword value #
12387 # in d0 to the corresponding data register. D0/D1 are on the stack #
12388 # while the rest are in their initial places. #
12389 # #
12390 #########################################################################
12391
12392 global store_dreg_l
12393 store_dreg_l:
12394 mov.w (tbl_sdregl.b,%pc,%d1.w*2),%d1
12395 jmp (tbl_sdregl.b,%pc,%d1.w*1)
12396
12397 tbl_sdregl:
12398 short sdregl0 - tbl_sdregl
12399 short sdregl1 - tbl_sdregl
12400 short sdregl2 - tbl_sdregl
12401 short sdregl3 - tbl_sdregl
12402 short sdregl4 - tbl_sdregl
12403 short sdregl5 - tbl_sdregl
12404 short sdregl6 - tbl_sdregl
12405 short sdregl7 - tbl_sdregl
12406
12407 sdregl0:
12408 mov.l %d0,EXC_DREGS+0x0(%a6)
12409 rts
12410 sdregl1:
12411 mov.l %d0,EXC_DREGS+0x4(%a6)
12412 rts
12413 sdregl2:
12414 mov.l %d0,%d2
12415 rts
12416 sdregl3:
12417 mov.l %d0,%d3
12418 rts
12419 sdregl4:
12420 mov.l %d0,%d4
12421 rts
12422 sdregl5:
12423 mov.l %d0,%d5
12424 rts
12425 sdregl6:
12426 mov.l %d0,%d6
12427 rts
12428 sdregl7:
12429 mov.l %d0,%d7
12430 rts
12431
12432 #########################################################################
12433 # XDEF **************************************************************** #
12434 # store_dreg_w(): store word to data register specified by d1 #
12435 # #
12436 # XREF **************************************************************** #
12437 # None #
12438 # #
12439 # INPUT *************************************************************** #
12440 # d0 = word value to store #
12441 # d1 = index of register to fetch from #
12442 # #
12443 # OUTPUT ************************************************************** #
12444 # (data register is updated) #
12445 # #
12446 # ALGORITHM *********************************************************** #
12447 # According to the index value in d1, store the word value #
12448 # in d0 to the corresponding data register. D0/D1 are on the stack #
12449 # while the rest are in their initial places. #
12450 # #
12451 #########################################################################
12452
12453 global store_dreg_w
12454 store_dreg_w:
12455 mov.w (tbl_sdregw.b,%pc,%d1.w*2),%d1
12456 jmp (tbl_sdregw.b,%pc,%d1.w*1)
12457
12458 tbl_sdregw:
12459 short sdregw0 - tbl_sdregw
12460 short sdregw1 - tbl_sdregw
12461 short sdregw2 - tbl_sdregw
12462 short sdregw3 - tbl_sdregw
12463 short sdregw4 - tbl_sdregw
12464 short sdregw5 - tbl_sdregw
12465 short sdregw6 - tbl_sdregw
12466 short sdregw7 - tbl_sdregw
12467
12468 sdregw0:
12469 mov.w %d0,2+EXC_DREGS+0x0(%a6)
12470 rts
12471 sdregw1:
12472 mov.w %d0,2+EXC_DREGS+0x4(%a6)
12473 rts
12474 sdregw2:
12475 mov.w %d0,%d2
12476 rts
12477 sdregw3:
12478 mov.w %d0,%d3
12479 rts
12480 sdregw4:
12481 mov.w %d0,%d4
12482 rts
12483 sdregw5:
12484 mov.w %d0,%d5
12485 rts
12486 sdregw6:
12487 mov.w %d0,%d6
12488 rts
12489 sdregw7:
12490 mov.w %d0,%d7
12491 rts
12492
12493 #########################################################################
12494 # XDEF **************************************************************** #
12495 # store_dreg_b(): store byte to data register specified by d1 #
12496 # #
12497 # XREF **************************************************************** #
12498 # None #
12499 # #
12500 # INPUT *************************************************************** #
12501 # d0 = byte value to store #
12502 # d1 = index of register to fetch from #
12503 # #
12504 # OUTPUT ************************************************************** #
12505 # (data register is updated) #
12506 # #
12507 # ALGORITHM *********************************************************** #
12508 # According to the index value in d1, store the byte value #
12509 # in d0 to the corresponding data register. D0/D1 are on the stack #
12510 # while the rest are in their initial places. #
12511 # #
12512 #########################################################################
12513
12514 global store_dreg_b
12515 store_dreg_b:
12516 mov.w (tbl_sdregb.b,%pc,%d1.w*2),%d1
12517 jmp (tbl_sdregb.b,%pc,%d1.w*1)
12518
12519 tbl_sdregb:
12520 short sdregb0 - tbl_sdregb
12521 short sdregb1 - tbl_sdregb
12522 short sdregb2 - tbl_sdregb
12523 short sdregb3 - tbl_sdregb
12524 short sdregb4 - tbl_sdregb
12525 short sdregb5 - tbl_sdregb
12526 short sdregb6 - tbl_sdregb
12527 short sdregb7 - tbl_sdregb
12528
12529 sdregb0:
12530 mov.b %d0,3+EXC_DREGS+0x0(%a6)
12531 rts
12532 sdregb1:
12533 mov.b %d0,3+EXC_DREGS+0x4(%a6)
12534 rts
12535 sdregb2:
12536 mov.b %d0,%d2
12537 rts
12538 sdregb3:
12539 mov.b %d0,%d3
12540 rts
12541 sdregb4:
12542 mov.b %d0,%d4
12543 rts
12544 sdregb5:
12545 mov.b %d0,%d5
12546 rts
12547 sdregb6:
12548 mov.b %d0,%d6
12549 rts
12550 sdregb7:
12551 mov.b %d0,%d7
12552 rts
12553
12554 #########################################################################
12555 # XDEF **************************************************************** #
12556 # inc_areg(): increment an address register by the value in d0 #
12557 # #
12558 # XREF **************************************************************** #
12559 # None #
12560 # #
12561 # INPUT *************************************************************** #
12562 # d0 = amount to increment by #
12563 # d1 = index of address register to increment #
12564 # #
12565 # OUTPUT ************************************************************** #
12566 # (address register is updated) #
12567 # #
12568 # ALGORITHM *********************************************************** #
12569 # Typically used for an instruction w/ a post-increment <ea>, #
12570 # this routine adds the increment value in d0 to the address register #
12571 # specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside #
12572 # in their original places. #
12573 # For a7, if the increment amount is one, then we have to #
12574 # increment by two. For any a7 update, set the mia7_flag so that if #
12575 # an access error exception occurs later in emulation, this address #
12576 # register update can be undone. #
12577 # #
12578 #########################################################################
12579
12580 global inc_areg
12581 inc_areg:
12582 mov.w (tbl_iareg.b,%pc,%d1.w*2),%d1
12583 jmp (tbl_iareg.b,%pc,%d1.w*1)
12584
12585 tbl_iareg:
12586 short iareg0 - tbl_iareg
12587 short iareg1 - tbl_iareg
12588 short iareg2 - tbl_iareg
12589 short iareg3 - tbl_iareg
12590 short iareg4 - tbl_iareg
12591 short iareg5 - tbl_iareg
12592 short iareg6 - tbl_iareg
12593 short iareg7 - tbl_iareg
12594
12595 iareg0: add.l %d0,EXC_DREGS+0x8(%a6)
12596 rts
12597 iareg1: add.l %d0,EXC_DREGS+0xc(%a6)
12598 rts
12599 iareg2: add.l %d0,%a2
12600 rts
12601 iareg3: add.l %d0,%a3
12602 rts
12603 iareg4: add.l %d0,%a4
12604 rts
12605 iareg5: add.l %d0,%a5
12606 rts
12607 iareg6: add.l %d0,(%a6)
12608 rts
12609 iareg7: mov.b &mia7_flg,SPCOND_FLG(%a6)
12610 cmpi.b %d0,&0x1
12611 beq.b iareg7b
12612 add.l %d0,EXC_A7(%a6)
12613 rts
12614 iareg7b:
12615 addq.l &0x2,EXC_A7(%a6)
12616 rts
12617
12618 #########################################################################
12619 # XDEF **************************************************************** #
12620 # dec_areg(): decrement an address register by the value in d0 #
12621 # #
12622 # XREF **************************************************************** #
12623 # None #
12624 # #
12625 # INPUT *************************************************************** #
12626 # d0 = amount to decrement by #
12627 # d1 = index of address register to decrement #
12628 # #
12629 # OUTPUT ************************************************************** #
12630 # (address register is updated) #
12631 # #
12632 # ALGORITHM *********************************************************** #
12633 # Typically used for an instruction w/ a pre-decrement <ea>, #
12634 # this routine adds the decrement value in d0 to the address register #
12635 # specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside #
12636 # in their original places. #
12637 # For a7, if the decrement amount is one, then we have to #
12638 # decrement by two. For any a7 update, set the mda7_flag so that if #
12639 # an access error exception occurs later in emulation, this address #
12640 # register update can be undone. #
12641 # #
12642 #########################################################################
12643
12644 global dec_areg
12645 dec_areg:
12646 mov.w (tbl_dareg.b,%pc,%d1.w*2),%d1
12647 jmp (tbl_dareg.b,%pc,%d1.w*1)
12648
12649 tbl_dareg:
12650 short dareg0 - tbl_dareg
12651 short dareg1 - tbl_dareg
12652 short dareg2 - tbl_dareg
12653 short dareg3 - tbl_dareg
12654 short dareg4 - tbl_dareg
12655 short dareg5 - tbl_dareg
12656 short dareg6 - tbl_dareg
12657 short dareg7 - tbl_dareg
12658
12659 dareg0: sub.l %d0,EXC_DREGS+0x8(%a6)
12660 rts
12661 dareg1: sub.l %d0,EXC_DREGS+0xc(%a6)
12662 rts
12663 dareg2: sub.l %d0,%a2
12664 rts
12665 dareg3: sub.l %d0,%a3
12666 rts
12667 dareg4: sub.l %d0,%a4
12668 rts
12669 dareg5: sub.l %d0,%a5
12670 rts
12671 dareg6: sub.l %d0,(%a6)
12672 rts
12673 dareg7: mov.b &mda7_flg,SPCOND_FLG(%a6)
12674 cmpi.b %d0,&0x1
12675 beq.b dareg7b
12676 sub.l %d0,EXC_A7(%a6)
12677 rts
12678 dareg7b:
12679 subq.l &0x2,EXC_A7(%a6)
12680 rts
12681
12682 ##############################################################################
12683
12684 #########################################################################
12685 # XDEF **************************************************************** #
12686 # load_fpn1(): load FP register value into FP_SRC(a6). #
12687 # #
12688 # XREF **************************************************************** #
12689 # None #
12690 # #
12691 # INPUT *************************************************************** #
12692 # d0 = index of FP register to load #
12693 # #
12694 # OUTPUT ************************************************************** #
12695 # FP_SRC(a6) = value loaded from FP register file #
12696 # #
12697 # ALGORITHM *********************************************************** #
12698 # Using the index in d0, load FP_SRC(a6) with a number from the #
12699 # FP register file. #
12700 # #
12701 #########################################################################
12702
12703 global load_fpn1
12704 load_fpn1:
12705 mov.w (tbl_load_fpn1.b,%pc,%d0.w*2), %d0
12706 jmp (tbl_load_fpn1.b,%pc,%d0.w*1)
12707
12708 tbl_load_fpn1:
12709 short load_fpn1_0 - tbl_load_fpn1
12710 short load_fpn1_1 - tbl_load_fpn1
12711 short load_fpn1_2 - tbl_load_fpn1
12712 short load_fpn1_3 - tbl_load_fpn1
12713 short load_fpn1_4 - tbl_load_fpn1
12714 short load_fpn1_5 - tbl_load_fpn1
12715 short load_fpn1_6 - tbl_load_fpn1
12716 short load_fpn1_7 - tbl_load_fpn1
12717
12718 load_fpn1_0:
12719 mov.l 0+EXC_FP0(%a6), 0+FP_SRC(%a6)
12720 mov.l 4+EXC_FP0(%a6), 4+FP_SRC(%a6)
12721 mov.l 8+EXC_FP0(%a6), 8+FP_SRC(%a6)
12722 lea FP_SRC(%a6), %a0
12723 rts
12724 load_fpn1_1:
12725 mov.l 0+EXC_FP1(%a6), 0+FP_SRC(%a6)
12726 mov.l 4+EXC_FP1(%a6), 4+FP_SRC(%a6)
12727 mov.l 8+EXC_FP1(%a6), 8+FP_SRC(%a6)
12728 lea FP_SRC(%a6), %a0
12729 rts
12730 load_fpn1_2:
12731 fmovm.x &0x20, FP_SRC(%a6)
12732 lea FP_SRC(%a6), %a0
12733 rts
12734 load_fpn1_3:
12735 fmovm.x &0x10, FP_SRC(%a6)
12736 lea FP_SRC(%a6), %a0
12737 rts
12738 load_fpn1_4:
12739 fmovm.x &0x08, FP_SRC(%a6)
12740 lea FP_SRC(%a6), %a0
12741 rts
12742 load_fpn1_5:
12743 fmovm.x &0x04, FP_SRC(%a6)
12744 lea FP_SRC(%a6), %a0
12745 rts
12746 load_fpn1_6:
12747 fmovm.x &0x02, FP_SRC(%a6)
12748 lea FP_SRC(%a6), %a0
12749 rts
12750 load_fpn1_7:
12751 fmovm.x &0x01, FP_SRC(%a6)
12752 lea FP_SRC(%a6), %a0
12753 rts
12754
12755 #############################################################################
12756
12757 #########################################################################
12758 # XDEF **************************************************************** #
12759 # load_fpn2(): load FP register value into FP_DST(a6). #
12760 # #
12761 # XREF **************************************************************** #
12762 # None #
12763 # #
12764 # INPUT *************************************************************** #
12765 # d0 = index of FP register to load #
12766 # #
12767 # OUTPUT ************************************************************** #
12768 # FP_DST(a6) = value loaded from FP register file #
12769 # #
12770 # ALGORITHM *********************************************************** #
12771 # Using the index in d0, load FP_DST(a6) with a number from the #
12772 # FP register file. #
12773 # #
12774 #########################################################################
12775
12776 global load_fpn2
12777 load_fpn2:
12778 mov.w (tbl_load_fpn2.b,%pc,%d0.w*2), %d0
12779 jmp (tbl_load_fpn2.b,%pc,%d0.w*1)
12780
12781 tbl_load_fpn2:
12782 short load_fpn2_0 - tbl_load_fpn2
12783 short load_fpn2_1 - tbl_load_fpn2
12784 short load_fpn2_2 - tbl_load_fpn2
12785 short load_fpn2_3 - tbl_load_fpn2
12786 short load_fpn2_4 - tbl_load_fpn2
12787 short load_fpn2_5 - tbl_load_fpn2
12788 short load_fpn2_6 - tbl_load_fpn2
12789 short load_fpn2_7 - tbl_load_fpn2
12790
12791 load_fpn2_0:
12792 mov.l 0+EXC_FP0(%a6), 0+FP_DST(%a6)
12793 mov.l 4+EXC_FP0(%a6), 4+FP_DST(%a6)
12794 mov.l 8+EXC_FP0(%a6), 8+FP_DST(%a6)
12795 lea FP_DST(%a6), %a0
12796 rts
12797 load_fpn2_1:
12798 mov.l 0+EXC_FP1(%a6), 0+FP_DST(%a6)
12799 mov.l 4+EXC_FP1(%a6), 4+FP_DST(%a6)
12800 mov.l 8+EXC_FP1(%a6), 8+FP_DST(%a6)
12801 lea FP_DST(%a6), %a0
12802 rts
12803 load_fpn2_2:
12804 fmovm.x &0x20, FP_DST(%a6)
12805 lea FP_DST(%a6), %a0
12806 rts
12807 load_fpn2_3:
12808 fmovm.x &0x10, FP_DST(%a6)
12809 lea FP_DST(%a6), %a0
12810 rts
12811 load_fpn2_4:
12812 fmovm.x &0x08, FP_DST(%a6)
12813 lea FP_DST(%a6), %a0
12814 rts
12815 load_fpn2_5:
12816 fmovm.x &0x04, FP_DST(%a6)
12817 lea FP_DST(%a6), %a0
12818 rts
12819 load_fpn2_6:
12820 fmovm.x &0x02, FP_DST(%a6)
12821 lea FP_DST(%a6), %a0
12822 rts
12823 load_fpn2_7:
12824 fmovm.x &0x01, FP_DST(%a6)
12825 lea FP_DST(%a6), %a0
12826 rts
12827
12828 #############################################################################
12829
12830 #########################################################################
12831 # XDEF **************************************************************** #
12832 # store_fpreg(): store an fp value to the fpreg designated d0. #
12833 # #
12834 # XREF **************************************************************** #
12835 # None #
12836 # #
12837 # INPUT *************************************************************** #
12838 # fp0 = extended precision value to store #
12839 # d0 = index of floating-point register #
12840 # #
12841 # OUTPUT ************************************************************** #
12842 # None #
12843 # #
12844 # ALGORITHM *********************************************************** #
12845 # Store the value in fp0 to the FP register designated by the #
12846 # value in d0. The FP number can be DENORM or SNAN so we have to be #
12847 # careful that we don't take an exception here. #
12848 # #
12849 #########################################################################
12850
12851 global store_fpreg
12852 store_fpreg:
12853 mov.w (tbl_store_fpreg.b,%pc,%d0.w*2), %d0
12854 jmp (tbl_store_fpreg.b,%pc,%d0.w*1)
12855
12856 tbl_store_fpreg:
12857 short store_fpreg_0 - tbl_store_fpreg
12858 short store_fpreg_1 - tbl_store_fpreg
12859 short store_fpreg_2 - tbl_store_fpreg
12860 short store_fpreg_3 - tbl_store_fpreg
12861 short store_fpreg_4 - tbl_store_fpreg
12862 short store_fpreg_5 - tbl_store_fpreg
12863 short store_fpreg_6 - tbl_store_fpreg
12864 short store_fpreg_7 - tbl_store_fpreg
12865
12866 store_fpreg_0:
12867 fmovm.x &0x80, EXC_FP0(%a6)
12868 rts
12869 store_fpreg_1:
12870 fmovm.x &0x80, EXC_FP1(%a6)
12871 rts
12872 store_fpreg_2:
12873 fmovm.x &0x01, -(%sp)
12874 fmovm.x (%sp)+, &0x20
12875 rts
12876 store_fpreg_3:
12877 fmovm.x &0x01, -(%sp)
12878 fmovm.x (%sp)+, &0x10
12879 rts
12880 store_fpreg_4:
12881 fmovm.x &0x01, -(%sp)
12882 fmovm.x (%sp)+, &0x08
12883 rts
12884 store_fpreg_5:
12885 fmovm.x &0x01, -(%sp)
12886 fmovm.x (%sp)+, &0x04
12887 rts
12888 store_fpreg_6:
12889 fmovm.x &0x01, -(%sp)
12890 fmovm.x (%sp)+, &0x02
12891 rts
12892 store_fpreg_7:
12893 fmovm.x &0x01, -(%sp)
12894 fmovm.x (%sp)+, &0x01
12895 rts
12896
12897 #########################################################################
12898 # XDEF **************************************************************** #
12899 # get_packed(): fetch a packed operand from memory and then #
12900 # convert it to a floating-point binary number. #
12901 # #
12902 # XREF **************************************************************** #
12903 # _dcalc_ea() - calculate the correct <ea> #
12904 # _mem_read() - fetch the packed operand from memory #
12905 # facc_in_x() - the fetch failed so jump to special exit code #
12906 # decbin() - convert packed to binary extended precision #
12907 # #
12908 # INPUT *************************************************************** #
12909 # None #
12910 # #
12911 # OUTPUT ************************************************************** #
12912 # If no failure on _mem_read(): #
12913 # FP_SRC(a6) = packed operand now as a binary FP number #
12914 # #
12915 # ALGORITHM *********************************************************** #
12916 # Get the correct <ea> whihc is the value on the exception stack #
12917 # frame w/ maybe a correction factor if the <ea> is -(an) or (an)+. #
12918 # Then, fetch the operand from memory. If the fetch fails, exit #
12919 # through facc_in_x(). #
12920 # If the packed operand is a ZERO,NAN, or INF, convert it to #
12921 # its binary representation here. Else, call decbin() which will #
12922 # convert the packed value to an extended precision binary value. #
12923 # #
12924 #########################################################################
12925
12926 # the stacked <ea> for packed is correct except for -(An).
12927 # the base reg must be updated for both -(An) and (An)+.
12928 global get_packed
12929 get_packed:
12930 mov.l &0xc,%d0 # packed is 12 bytes
12931 bsr.l _dcalc_ea # fetch <ea>; correct An
12932
12933 lea FP_SRC(%a6),%a1 # pass: ptr to super dst
12934 mov.l &0xc,%d0 # pass: 12 bytes
12935 bsr.l _dmem_read # read packed operand
12936
12937 tst.l %d1 # did dfetch fail?
12938 bne.l facc_in_x # yes
12939
12940 # The packed operand is an INF or a NAN if the exponent field is all ones.
12941 bfextu FP_SRC(%a6){&1:&15},%d0 # get exp
12942 cmpi.w %d0,&0x7fff # INF or NAN?
12943 bne.b gp_try_zero # no
12944 rts # operand is an INF or NAN
12945
12946 # The packed operand is a zero if the mantissa is all zero, else it's
12947 # a normal packed op.
12948 gp_try_zero:
12949 mov.b 3+FP_SRC(%a6),%d0 # get byte 4
12950 andi.b &0x0f,%d0 # clear all but last nybble
12951 bne.b gp_not_spec # not a zero
12952 tst.l FP_SRC_HI(%a6) # is lw 2 zero?
12953 bne.b gp_not_spec # not a zero
12954 tst.l FP_SRC_LO(%a6) # is lw 3 zero?
12955 bne.b gp_not_spec # not a zero
12956 rts # operand is a ZERO
12957 gp_not_spec:
12958 lea FP_SRC(%a6),%a0 # pass: ptr to packed op
12959 bsr.l decbin # convert to extended
12960 fmovm.x &0x80,FP_SRC(%a6) # make this the srcop
12961 rts
12962
12963 #########################################################################
12964 # decbin(): Converts normalized packed bcd value pointed to by register #
12965 # a0 to extended-precision value in fp0. #
12966 # #
12967 # INPUT *************************************************************** #
12968 # a0 = pointer to normalized packed bcd value #
12969 # #
12970 # OUTPUT ************************************************************** #
12971 # fp0 = exact fp representation of the packed bcd value. #
12972 # #
12973 # ALGORITHM *********************************************************** #
12974 # Expected is a normal bcd (i.e. non-exceptional; all inf, zero, #
12975 # and NaN operands are dispatched without entering this routine) #
12976 # value in 68881/882 format at location (a0). #
12977 # #
12978 # A1. Convert the bcd exponent to binary by successive adds and #
12979 # muls. Set the sign according to SE. Subtract 16 to compensate #
12980 # for the mantissa which is to be interpreted as 17 integer #
12981 # digits, rather than 1 integer and 16 fraction digits. #
12982 # Note: this operation can never overflow. #
12983 # #
12984 # A2. Convert the bcd mantissa to binary by successive #
12985 # adds and muls in FP0. Set the sign according to SM. #
12986 # The mantissa digits will be converted with the decimal point #
12987 # assumed following the least-significant digit. #
12988 # Note: this operation can never overflow. #
12989 # #
12990 # A3. Count the number of leading/trailing zeros in the #
12991 # bcd string. If SE is positive, count the leading zeros; #
12992 # if negative, count the trailing zeros. Set the adjusted #
12993 # exponent equal to the exponent from A1 and the zero count #
12994 # added if SM = 1 and subtracted if SM = 0. Scale the #
12995 # mantissa the equivalent of forcing in the bcd value: #
12996 # #
12997 # SM = 0 a non-zero digit in the integer position #
12998 # SM = 1 a non-zero digit in Mant0, lsd of the fraction #
12999 # #
13000 # this will insure that any value, regardless of its #
13001 # representation (ex. 0.1E2, 1E1, 10E0, 100E-1), is converted #
13002 # consistently. #
13003 # #
13004 # A4. Calculate the factor 10^exp in FP1 using a table of #
13005 # 10^(2^n) values. To reduce the error in forming factors #
13006 # greater than 10^27, a directed rounding scheme is used with #
13007 # tables rounded to RN, RM, and RP, according to the table #
13008 # in the comments of the pwrten section. #
13009 # #
13010 # A5. Form the final binary number by scaling the mantissa by #
13011 # the exponent factor. This is done by multiplying the #
13012 # mantissa in FP0 by the factor in FP1 if the adjusted #
13013 # exponent sign is positive, and dividing FP0 by FP1 if #
13014 # it is negative. #
13015 # #
13016 # Clean up and return. Check if the final mul or div was inexact. #
13017 # If so, set INEX1 in USER_FPSR. #
13018 # #
13019 #########################################################################
13020
13021 #
13022 # PTENRN, PTENRM, and PTENRP are arrays of powers of 10 rounded
13023 # to nearest, minus, and plus, respectively. The tables include
13024 # 10**{1,2,4,8,16,32,64,128,256,512,1024,2048,4096}. No rounding
13025 # is required until the power is greater than 27, however, all
13026 # tables include the first 5 for ease of indexing.
13027 #
13028 RTABLE:
13029 byte 0,0,0,0
13030 byte 2,3,2,3
13031 byte 2,3,3,2
13032 byte 3,2,2,3
13033
13034 set FNIBS,7
13035 set FSTRT,0
13036
13037 set ESTRT,4
13038 set EDIGITS,2
13039
13040 global decbin
13041 decbin:
13042 mov.l 0x0(%a0),FP_SCR0_EX(%a6) # make a copy of input
13043 mov.l 0x4(%a0),FP_SCR0_HI(%a6) # so we don't alter it
13044 mov.l 0x8(%a0),FP_SCR0_LO(%a6)
13045
13046 lea FP_SCR0(%a6),%a0
13047
13048 movm.l &0x3c00,-(%sp) # save d2-d5
13049 fmovm.x &0x1,-(%sp) # save fp1
13050 #
13051 # Calculate exponent:
13052 # 1. Copy bcd value in memory for use as a working copy.
13053 # 2. Calculate absolute value of exponent in d1 by mul and add.
13054 # 3. Correct for exponent sign.
13055 # 4. Subtract 16 to compensate for interpreting the mant as all integer digits.
13056 # (i.e., all digits assumed left of the decimal point.)
13057 #
13058 # Register usage:
13059 #
13060 # calc_e:
13061 # (*) d0: temp digit storage
13062 # (*) d1: accumulator for binary exponent
13063 # (*) d2: digit count
13064 # (*) d3: offset pointer
13065 # ( ) d4: first word of bcd
13066 # ( ) a0: pointer to working bcd value
13067 # ( ) a6: pointer to original bcd value
13068 # (*) FP_SCR1: working copy of original bcd value
13069 # (*) L_SCR1: copy of original exponent word
13070 #
13071 calc_e:
13072 mov.l &EDIGITS,%d2 # # of nibbles (digits) in fraction part
13073 mov.l &ESTRT,%d3 # counter to pick up digits
13074 mov.l (%a0),%d4 # get first word of bcd
13075 clr.l %d1 # zero d1 for accumulator
13076 e_gd:
13077 mulu.l &0xa,%d1 # mul partial product by one digit place
13078 bfextu %d4{%d3:&4},%d0 # get the digit and zero extend into d0
13079 add.l %d0,%d1 # d1 = d1 + d0
13080 addq.b &4,%d3 # advance d3 to the next digit
13081 dbf.w %d2,e_gd # if we have used all 3 digits, exit loop
13082 btst &30,%d4 # get SE
13083 beq.b e_pos # don't negate if pos
13084 neg.l %d1 # negate before subtracting
13085 e_pos:
13086 sub.l &16,%d1 # sub to compensate for shift of mant
13087 bge.b e_save # if still pos, do not neg
13088 neg.l %d1 # now negative, make pos and set SE
13089 or.l &0x40000000,%d4 # set SE in d4,
13090 or.l &0x40000000,(%a0) # and in working bcd
13091 e_save:
13092 mov.l %d1,-(%sp) # save exp on stack
13093 #
13094 #
13095 # Calculate mantissa:
13096 # 1. Calculate absolute value of mantissa in fp0 by mul and add.
13097 # 2. Correct for mantissa sign.
13098 # (i.e., all digits assumed left of the decimal point.)
13099 #
13100 # Register usage:
13101 #
13102 # calc_m:
13103 # (*) d0: temp digit storage
13104 # (*) d1: lword counter
13105 # (*) d2: digit count
13106 # (*) d3: offset pointer
13107 # ( ) d4: words 2 and 3 of bcd
13108 # ( ) a0: pointer to working bcd value
13109 # ( ) a6: pointer to original bcd value
13110 # (*) fp0: mantissa accumulator
13111 # ( ) FP_SCR1: working copy of original bcd value
13112 # ( ) L_SCR1: copy of original exponent word
13113 #
13114 calc_m:
13115 mov.l &1,%d1 # word counter, init to 1
13116 fmov.s &0x00000000,%fp0 # accumulator
13117 #
13118 #
13119 # Since the packed number has a long word between the first & second parts,
13120 # get the integer digit then skip down & get the rest of the
13121 # mantissa. We will unroll the loop once.
13122 #
13123 bfextu (%a0){&28:&4},%d0 # integer part is ls digit in long word
13124 fadd.b %d0,%fp0 # add digit to sum in fp0
13125 #
13126 #
13127 # Get the rest of the mantissa.
13128 #
13129 loadlw:
13130 mov.l (%a0,%d1.L*4),%d4 # load mantissa lonqword into d4
13131 mov.l &FSTRT,%d3 # counter to pick up digits
13132 mov.l &FNIBS,%d2 # reset number of digits per a0 ptr
13133 md2b:
13134 fmul.s &0x41200000,%fp0 # fp0 = fp0 * 10
13135 bfextu %d4{%d3:&4},%d0 # get the digit and zero extend
13136 fadd.b %d0,%fp0 # fp0 = fp0 + digit
13137 #
13138 #
13139 # If all the digits (8) in that long word have been converted (d2=0),
13140 # then inc d1 (=2) to point to the next long word and reset d3 to 0
13141 # to initialize the digit offset, and set d2 to 7 for the digit count;
13142 # else continue with this long word.
13143 #
13144 addq.b &4,%d3 # advance d3 to the next digit
13145 dbf.w %d2,md2b # check for last digit in this lw
13146 nextlw:
13147 addq.l &1,%d1 # inc lw pointer in mantissa
13148 cmp.l %d1,&2 # test for last lw
13149 ble.b loadlw # if not, get last one
13150 #
13151 # Check the sign of the mant and make the value in fp0 the same sign.
13152 #
13153 m_sign:
13154 btst &31,(%a0) # test sign of the mantissa
13155 beq.b ap_st_z # if clear, go to append/strip zeros
13156 fneg.x %fp0 # if set, negate fp0
13157 #
13158 # Append/strip zeros:
13159 #
13160 # For adjusted exponents which have an absolute value greater than 27*,
13161 # this routine calculates the amount needed to normalize the mantissa
13162 # for the adjusted exponent. That number is subtracted from the exp
13163 # if the exp was positive, and added if it was negative. The purpose
13164 # of this is to reduce the value of the exponent and the possibility
13165 # of error in calculation of pwrten.
13166 #
13167 # 1. Branch on the sign of the adjusted exponent.
13168 # 2p.(positive exp)
13169 # 2. Check M16 and the digits in lwords 2 and 3 in decending order.
13170 # 3. Add one for each zero encountered until a non-zero digit.
13171 # 4. Subtract the count from the exp.
13172 # 5. Check if the exp has crossed zero in #3 above; make the exp abs
13173 # and set SE.
13174 # 6. Multiply the mantissa by 10**count.
13175 # 2n.(negative exp)
13176 # 2. Check the digits in lwords 3 and 2 in decending order.
13177 # 3. Add one for each zero encountered until a non-zero digit.
13178 # 4. Add the count to the exp.
13179 # 5. Check if the exp has crossed zero in #3 above; clear SE.
13180 # 6. Divide the mantissa by 10**count.
13181 #
13182 # *Why 27? If the adjusted exponent is within -28 < expA < 28, than
13183 # any adjustment due to append/strip zeros will drive the resultane
13184 # exponent towards zero. Since all pwrten constants with a power
13185 # of 27 or less are exact, there is no need to use this routine to
13186 # attempt to lessen the resultant exponent.
13187 #
13188 # Register usage:
13189 #
13190 # ap_st_z:
13191 # (*) d0: temp digit storage
13192 # (*) d1: zero count
13193 # (*) d2: digit count
13194 # (*) d3: offset pointer
13195 # ( ) d4: first word of bcd
13196 # (*) d5: lword counter
13197 # ( ) a0: pointer to working bcd value
13198 # ( ) FP_SCR1: working copy of original bcd value
13199 # ( ) L_SCR1: copy of original exponent word
13200 #
13201 #
13202 # First check the absolute value of the exponent to see if this
13203 # routine is necessary. If so, then check the sign of the exponent
13204 # and do append (+) or strip (-) zeros accordingly.
13205 # This section handles a positive adjusted exponent.
13206 #
13207 ap_st_z:
13208 mov.l (%sp),%d1 # load expA for range test
13209 cmp.l %d1,&27 # test is with 27
13210 ble.w pwrten # if abs(expA) <28, skip ap/st zeros
13211 btst &30,(%a0) # check sign of exp
13212 bne.b ap_st_n # if neg, go to neg side
13213 clr.l %d1 # zero count reg
13214 mov.l (%a0),%d4 # load lword 1 to d4
13215 bfextu %d4{&28:&4},%d0 # get M16 in d0
13216 bne.b ap_p_fx # if M16 is non-zero, go fix exp
13217 addq.l &1,%d1 # inc zero count
13218 mov.l &1,%d5 # init lword counter
13219 mov.l (%a0,%d5.L*4),%d4 # get lword 2 to d4
13220 bne.b ap_p_cl # if lw 2 is zero, skip it
13221 addq.l &8,%d1 # and inc count by 8
13222 addq.l &1,%d5 # inc lword counter
13223 mov.l (%a0,%d5.L*4),%d4 # get lword 3 to d4
13224 ap_p_cl:
13225 clr.l %d3 # init offset reg
13226 mov.l &7,%d2 # init digit counter
13227 ap_p_gd:
13228 bfextu %d4{%d3:&4},%d0 # get digit
13229 bne.b ap_p_fx # if non-zero, go to fix exp
13230 addq.l &4,%d3 # point to next digit
13231 addq.l &1,%d1 # inc digit counter
13232 dbf.w %d2,ap_p_gd # get next digit
13233 ap_p_fx:
13234 mov.l %d1,%d0 # copy counter to d2
13235 mov.l (%sp),%d1 # get adjusted exp from memory
13236 sub.l %d0,%d1 # subtract count from exp
13237 bge.b ap_p_fm # if still pos, go to pwrten
13238 neg.l %d1 # now its neg; get abs
13239 mov.l (%a0),%d4 # load lword 1 to d4
13240 or.l &0x40000000,%d4 # and set SE in d4
13241 or.l &0x40000000,(%a0) # and in memory
13242 #
13243 # Calculate the mantissa multiplier to compensate for the striping of
13244 # zeros from the mantissa.
13245 #
13246 ap_p_fm:
13247 lea.l PTENRN(%pc),%a1 # get address of power-of-ten table
13248 clr.l %d3 # init table index
13249 fmov.s &0x3f800000,%fp1 # init fp1 to 1
13250 mov.l &3,%d2 # init d2 to count bits in counter
13251 ap_p_el:
13252 asr.l &1,%d0 # shift lsb into carry
13253 bcc.b ap_p_en # if 1, mul fp1 by pwrten factor
13254 fmul.x (%a1,%d3),%fp1 # mul by 10**(d3_bit_no)
13255 ap_p_en:
13256 add.l &12,%d3 # inc d3 to next rtable entry
13257 tst.l %d0 # check if d0 is zero
13258 bne.b ap_p_el # if not, get next bit
13259 fmul.x %fp1,%fp0 # mul mantissa by 10**(no_bits_shifted)
13260 bra.b pwrten # go calc pwrten
13261 #
13262 # This section handles a negative adjusted exponent.
13263 #
13264 ap_st_n:
13265 clr.l %d1 # clr counter
13266 mov.l &2,%d5 # set up d5 to point to lword 3
13267 mov.l (%a0,%d5.L*4),%d4 # get lword 3
13268 bne.b ap_n_cl # if not zero, check digits
13269 sub.l &1,%d5 # dec d5 to point to lword 2
13270 addq.l &8,%d1 # inc counter by 8
13271 mov.l (%a0,%d5.L*4),%d4 # get lword 2
13272 ap_n_cl:
13273 mov.l &28,%d3 # point to last digit
13274 mov.l &7,%d2 # init digit counter
13275 ap_n_gd:
13276 bfextu %d4{%d3:&4},%d0 # get digit
13277 bne.b ap_n_fx # if non-zero, go to exp fix
13278 subq.l &4,%d3 # point to previous digit
13279 addq.l &1,%d1 # inc digit counter
13280 dbf.w %d2,ap_n_gd # get next digit
13281 ap_n_fx:
13282 mov.l %d1,%d0 # copy counter to d0
13283 mov.l (%sp),%d1 # get adjusted exp from memory
13284 sub.l %d0,%d1 # subtract count from exp
13285 bgt.b ap_n_fm # if still pos, go fix mantissa
13286 neg.l %d1 # take abs of exp and clr SE
13287 mov.l (%a0),%d4 # load lword 1 to d4
13288 and.l &0xbfffffff,%d4 # and clr SE in d4
13289 and.l &0xbfffffff,(%a0) # and in memory
13290 #
13291 # Calculate the mantissa multiplier to compensate for the appending of
13292 # zeros to the mantissa.
13293 #
13294 ap_n_fm:
13295 lea.l PTENRN(%pc),%a1 # get address of power-of-ten table
13296 clr.l %d3 # init table index
13297 fmov.s &0x3f800000,%fp1 # init fp1 to 1
13298 mov.l &3,%d2 # init d2 to count bits in counter
13299 ap_n_el:
13300 asr.l &1,%d0 # shift lsb into carry
13301 bcc.b ap_n_en # if 1, mul fp1 by pwrten factor
13302 fmul.x (%a1,%d3),%fp1 # mul by 10**(d3_bit_no)
13303 ap_n_en:
13304 add.l &12,%d3 # inc d3 to next rtable entry
13305 tst.l %d0 # check if d0 is zero
13306 bne.b ap_n_el # if not, get next bit
13307 fdiv.x %fp1,%fp0 # div mantissa by 10**(no_bits_shifted)
13308 #
13309 #
13310 # Calculate power-of-ten factor from adjusted and shifted exponent.
13311 #
13312 # Register usage:
13313 #
13314 # pwrten:
13315 # (*) d0: temp
13316 # ( ) d1: exponent
13317 # (*) d2: {FPCR[6:5],SM,SE} as index in RTABLE; temp
13318 # (*) d3: FPCR work copy
13319 # ( ) d4: first word of bcd
13320 # (*) a1: RTABLE pointer
13321 # calc_p:
13322 # (*) d0: temp
13323 # ( ) d1: exponent
13324 # (*) d3: PWRTxx table index
13325 # ( ) a0: pointer to working copy of bcd
13326 # (*) a1: PWRTxx pointer
13327 # (*) fp1: power-of-ten accumulator
13328 #
13329 # Pwrten calculates the exponent factor in the selected rounding mode
13330 # according to the following table:
13331 #
13332 # Sign of Mant Sign of Exp Rounding Mode PWRTEN Rounding Mode
13333 #
13334 # ANY ANY RN RN
13335 #
13336 # + + RP RP
13337 # - + RP RM
13338 # + - RP RM
13339 # - - RP RP
13340 #
13341 # + + RM RM
13342 # - + RM RP
13343 # + - RM RP
13344 # - - RM RM
13345 #
13346 # + + RZ RM
13347 # - + RZ RM
13348 # + - RZ RP
13349 # - - RZ RP
13350 #
13351 #
13352 pwrten:
13353 mov.l USER_FPCR(%a6),%d3 # get user's FPCR
13354 bfextu %d3{&26:&2},%d2 # isolate rounding mode bits
13355 mov.l (%a0),%d4 # reload 1st bcd word to d4
13356 asl.l &2,%d2 # format d2 to be
13357 bfextu %d4{&0:&2},%d0 # {FPCR[6],FPCR[5],SM,SE}
13358 add.l %d0,%d2 # in d2 as index into RTABLE
13359 lea.l RTABLE(%pc),%a1 # load rtable base
13360 mov.b (%a1,%d2),%d0 # load new rounding bits from table
13361 clr.l %d3 # clear d3 to force no exc and extended
13362 bfins %d0,%d3{&26:&2} # stuff new rounding bits in FPCR
13363 fmov.l %d3,%fpcr # write new FPCR
13364 asr.l &1,%d0 # write correct PTENxx table
13365 bcc.b not_rp # to a1
13366 lea.l PTENRP(%pc),%a1 # it is RP
13367 bra.b calc_p # go to init section
13368 not_rp:
13369 asr.l &1,%d0 # keep checking
13370 bcc.b not_rm
13371 lea.l PTENRM(%pc),%a1 # it is RM
13372 bra.b calc_p # go to init section
13373 not_rm:
13374 lea.l PTENRN(%pc),%a1 # it is RN
13375 calc_p:
13376 mov.l %d1,%d0 # copy exp to d0;use d0
13377 bpl.b no_neg # if exp is negative,
13378 neg.l %d0 # invert it
13379 or.l &0x40000000,(%a0) # and set SE bit
13380 no_neg:
13381 clr.l %d3 # table index
13382 fmov.s &0x3f800000,%fp1 # init fp1 to 1
13383 e_loop:
13384 asr.l &1,%d0 # shift next bit into carry
13385 bcc.b e_next # if zero, skip the mul
13386 fmul.x (%a1,%d3),%fp1 # mul by 10**(d3_bit_no)
13387 e_next:
13388 add.l &12,%d3 # inc d3 to next rtable entry
13389 tst.l %d0 # check if d0 is zero
13390 bne.b e_loop # not zero, continue shifting
13391 #
13392 #
13393 # Check the sign of the adjusted exp and make the value in fp0 the
13394 # same sign. If the exp was pos then multiply fp1*fp0;
13395 # else divide fp0/fp1.
13396 #
13397 # Register Usage:
13398 # norm:
13399 # ( ) a0: pointer to working bcd value
13400 # (*) fp0: mantissa accumulator
13401 # ( ) fp1: scaling factor - 10**(abs(exp))
13402 #
13403 pnorm:
13404 btst &30,(%a0) # test the sign of the exponent
13405 beq.b mul # if clear, go to multiply
13406 div:
13407 fdiv.x %fp1,%fp0 # exp is negative, so divide mant by exp
13408 bra.b end_dec
13409 mul:
13410 fmul.x %fp1,%fp0 # exp is positive, so multiply by exp
13411 #
13412 #
13413 # Clean up and return with result in fp0.
13414 #
13415 # If the final mul/div in decbin incurred an inex exception,
13416 # it will be inex2, but will be reported as inex1 by get_op.
13417 #
13418 end_dec:
13419 fmov.l %fpsr,%d0 # get status register
13420 bclr &inex2_bit+8,%d0 # test for inex2 and clear it
13421 beq.b no_exc # skip this if no exc
13422 ori.w &inx1a_mask,2+USER_FPSR(%a6) # set INEX1/AINEX
13423 no_exc:
13424 add.l &0x4,%sp # clear 1 lw param
13425 fmovm.x (%sp)+,&0x40 # restore fp1
13426 movm.l (%sp)+,&0x3c # restore d2-d5
13427 fmov.l &0x0,%fpcr
13428 fmov.l &0x0,%fpsr
13429 rts
13430
13431 #########################################################################
13432 # bindec(): Converts an input in extended precision format to bcd format#
13433 # #
13434 # INPUT *************************************************************** #
13435 # a0 = pointer to the input extended precision value in memory. #
13436 # the input may be either normalized, unnormalized, or #
13437 # denormalized. #
13438 # d0 = contains the k-factor sign-extended to 32-bits. #
13439 # #
13440 # OUTPUT ************************************************************** #
13441 # FP_SCR0(a6) = bcd format result on the stack. #
13442 # #
13443 # ALGORITHM *********************************************************** #
13444 # #
13445 # A1. Set RM and size ext; Set SIGMA = sign of input. #
13446 # The k-factor is saved for use in d7. Clear the #
13447 # BINDEC_FLG for separating normalized/denormalized #
13448 # input. If input is unnormalized or denormalized, #
13449 # normalize it. #
13450 # #
13451 # A2. Set X = abs(input). #
13452 # #
13453 # A3. Compute ILOG. #
13454 # ILOG is the log base 10 of the input value. It is #
13455 # approximated by adding e + 0.f when the original #
13456 # value is viewed as 2^^e * 1.f in extended precision. #
13457 # This value is stored in d6. #
13458 # #
13459 # A4. Clr INEX bit. #
13460 # The operation in A3 above may have set INEX2. #
13461 # #
13462 # A5. Set ICTR = 0; #
13463 # ICTR is a flag used in A13. It must be set before the #
13464 # loop entry A6. #
13465 # #
13466 # A6. Calculate LEN. #
13467 # LEN is the number of digits to be displayed. The #
13468 # k-factor can dictate either the total number of digits, #
13469 # if it is a positive number, or the number of digits #
13470 # after the decimal point which are to be included as #
13471 # significant. See the 68882 manual for examples. #
13472 # If LEN is computed to be greater than 17, set OPERR in #
13473 # USER_FPSR. LEN is stored in d4. #
13474 # #
13475 # A7. Calculate SCALE. #
13476 # SCALE is equal to 10^ISCALE, where ISCALE is the number #
13477 # of decimal places needed to insure LEN integer digits #
13478 # in the output before conversion to bcd. LAMBDA is the #
13479 # sign of ISCALE, used in A9. Fp1 contains #
13480 # 10^^(abs(ISCALE)) using a rounding mode which is a #
13481 # function of the original rounding mode and the signs #
13482 # of ISCALE and X. A table is given in the code. #
13483 # #
13484 # A8. Clr INEX; Force RZ. #
13485 # The operation in A3 above may have set INEX2. #
13486 # RZ mode is forced for the scaling operation to insure #
13487 # only one rounding error. The grs bits are collected in #
13488 # the INEX flag for use in A10. #
13489 # #
13490 # A9. Scale X -> Y. #
13491 # The mantissa is scaled to the desired number of #
13492 # significant digits. The excess digits are collected #
13493 # in INEX2. #
13494 # #
13495 # A10. Or in INEX. #
13496 # If INEX is set, round error occurred. This is #
13497 # compensated for by 'or-ing' in the INEX2 flag to #
13498 # the lsb of Y. #
13499 # #
13500 # A11. Restore original FPCR; set size ext. #
13501 # Perform FINT operation in the user's rounding mode. #
13502 # Keep the size to extended. #
13503 # #
13504 # A12. Calculate YINT = FINT(Y) according to user's rounding #
13505 # mode. The FPSP routine sintd0 is used. The output #
13506 # is in fp0. #
13507 # #
13508 # A13. Check for LEN digits. #
13509 # If the int operation results in more than LEN digits, #
13510 # or less than LEN -1 digits, adjust ILOG and repeat from #
13511 # A6. This test occurs only on the first pass. If the #
13512 # result is exactly 10^LEN, decrement ILOG and divide #
13513 # the mantissa by 10. #
13514 # #
13515 # A14. Convert the mantissa to bcd. #
13516 # The binstr routine is used to convert the LEN digit #
13517 # mantissa to bcd in memory. The input to binstr is #
13518 # to be a fraction; i.e. (mantissa)/10^LEN and adjusted #
13519 # such that the decimal point is to the left of bit 63. #
13520 # The bcd digits are stored in the correct position in #
13521 # the final string area in memory. #
13522 # #
13523 # A15. Convert the exponent to bcd. #
13524 # As in A14 above, the exp is converted to bcd and the #
13525 # digits are stored in the final string. #
13526 # Test the length of the final exponent string. If the #
13527 # length is 4, set operr. #
13528 # #
13529 # A16. Write sign bits to final string. #
13530 # #
13531 #########################################################################
13532
13533 set BINDEC_FLG, EXC_TEMP # DENORM flag
13534
13535 # Constants in extended precision
13536 PLOG2:
13537 long 0x3FFD0000,0x9A209A84,0xFBCFF798,0x00000000
13538 PLOG2UP1:
13539 long 0x3FFD0000,0x9A209A84,0xFBCFF799,0x00000000
13540
13541 # Constants in single precision
13542 FONE:
13543 long 0x3F800000,0x00000000,0x00000000,0x00000000
13544 FTWO:
13545 long 0x40000000,0x00000000,0x00000000,0x00000000
13546 FTEN:
13547 long 0x41200000,0x00000000,0x00000000,0x00000000
13548 F4933:
13549 long 0x459A2800,0x00000000,0x00000000,0x00000000
13550
13551 RBDTBL:
13552 byte 0,0,0,0
13553 byte 3,3,2,2
13554 byte 3,2,2,3
13555 byte 2,3,3,2
13556
13557 # Implementation Notes:
13558 #
13559 # The registers are used as follows:
13560 #
13561 # d0: scratch; LEN input to binstr
13562 # d1: scratch
13563 # d2: upper 32-bits of mantissa for binstr
13564 # d3: scratch;lower 32-bits of mantissa for binstr
13565 # d4: LEN
13566 # d5: LAMBDA/ICTR
13567 # d6: ILOG
13568 # d7: k-factor
13569 # a0: ptr for original operand/final result
13570 # a1: scratch pointer
13571 # a2: pointer to FP_X; abs(original value) in ext
13572 # fp0: scratch
13573 # fp1: scratch
13574 # fp2: scratch
13575 # F_SCR1:
13576 # F_SCR2:
13577 # L_SCR1:
13578 # L_SCR2:
13579
13580 global bindec
13581 bindec:
13582 movm.l &0x3f20,-(%sp) # {%d2-%d7/%a2}
13583 fmovm.x &0x7,-(%sp) # {%fp0-%fp2}
13584
13585 # A1. Set RM and size ext. Set SIGMA = sign input;
13586 # The k-factor is saved for use in d7. Clear BINDEC_FLG for
13587 # separating normalized/denormalized input. If the input
13588 # is a denormalized number, set the BINDEC_FLG memory word
13589 # to signal denorm. If the input is unnormalized, normalize
13590 # the input and test for denormalized result.
13591 #
13592 fmov.l &rm_mode*0x10,%fpcr # set RM and ext
13593 mov.l (%a0),L_SCR2(%a6) # save exponent for sign check
13594 mov.l %d0,%d7 # move k-factor to d7
13595
13596 clr.b BINDEC_FLG(%a6) # clr norm/denorm flag
13597 cmpi.b STAG(%a6),&DENORM # is input a DENORM?
13598 bne.w A2_str # no; input is a NORM
13599
13600 #
13601 # Normalize the denorm
13602 #
13603 un_de_norm:
13604 mov.w (%a0),%d0
13605 and.w &0x7fff,%d0 # strip sign of normalized exp
13606 mov.l 4(%a0),%d1
13607 mov.l 8(%a0),%d2
13608 norm_loop:
13609 sub.w &1,%d0
13610 lsl.l &1,%d2
13611 roxl.l &1,%d1
13612 tst.l %d1
13613 bge.b norm_loop
13614 #
13615 # Test if the normalized input is denormalized
13616 #
13617 tst.w %d0
13618 bgt.b pos_exp # if greater than zero, it is a norm
13619 st BINDEC_FLG(%a6) # set flag for denorm
13620 pos_exp:
13621 and.w &0x7fff,%d0 # strip sign of normalized exp
13622 mov.w %d0,(%a0)
13623 mov.l %d1,4(%a0)
13624 mov.l %d2,8(%a0)
13625
13626 # A2. Set X = abs(input).
13627 #
13628 A2_str:
13629 mov.l (%a0),FP_SCR1(%a6) # move input to work space
13630 mov.l 4(%a0),FP_SCR1+4(%a6) # move input to work space
13631 mov.l 8(%a0),FP_SCR1+8(%a6) # move input to work space
13632 and.l &0x7fffffff,FP_SCR1(%a6) # create abs(X)
13633
13634 # A3. Compute ILOG.
13635 # ILOG is the log base 10 of the input value. It is approx-
13636 # imated by adding e + 0.f when the original value is viewed
13637 # as 2^^e * 1.f in extended precision. This value is stored
13638 # in d6.
13639 #
13640 # Register usage:
13641 # Input/Output
13642 # d0: k-factor/exponent
13643 # d2: x/x
13644 # d3: x/x
13645 # d4: x/x
13646 # d5: x/x
13647 # d6: x/ILOG
13648 # d7: k-factor/Unchanged
13649 # a0: ptr for original operand/final result
13650 # a1: x/x
13651 # a2: x/x
13652 # fp0: x/float(ILOG)
13653 # fp1: x/x
13654 # fp2: x/x
13655 # F_SCR1:x/x
13656 # F_SCR2:Abs(X)/Abs(X) with $3fff exponent
13657 # L_SCR1:x/x
13658 # L_SCR2:first word of X packed/Unchanged
13659
13660 tst.b BINDEC_FLG(%a6) # check for denorm
13661 beq.b A3_cont # if clr, continue with norm
13662 mov.l &-4933,%d6 # force ILOG = -4933
13663 bra.b A4_str
13664 A3_cont:
13665 mov.w FP_SCR1(%a6),%d0 # move exp to d0
13666 mov.w &0x3fff,FP_SCR1(%a6) # replace exponent with 0x3fff
13667 fmov.x FP_SCR1(%a6),%fp0 # now fp0 has 1.f
13668 sub.w &0x3fff,%d0 # strip off bias
13669 fadd.w %d0,%fp0 # add in exp
13670 fsub.s FONE(%pc),%fp0 # subtract off 1.0
13671 fbge.w pos_res # if pos, branch
13672 fmul.x PLOG2UP1(%pc),%fp0 # if neg, mul by LOG2UP1
13673 fmov.l %fp0,%d6 # put ILOG in d6 as a lword
13674 bra.b A4_str # go move out ILOG
13675 pos_res:
13676 fmul.x PLOG2(%pc),%fp0 # if pos, mul by LOG2
13677 fmov.l %fp0,%d6 # put ILOG in d6 as a lword
13678
13679
13680 # A4. Clr INEX bit.
13681 # The operation in A3 above may have set INEX2.
13682
13683 A4_str:
13684 fmov.l &0,%fpsr # zero all of fpsr - nothing needed
13685
13686
13687 # A5. Set ICTR = 0;
13688 # ICTR is a flag used in A13. It must be set before the
13689 # loop entry A6. The lower word of d5 is used for ICTR.
13690
13691 clr.w %d5 # clear ICTR
13692
13693 # A6. Calculate LEN.
13694 # LEN is the number of digits to be displayed. The k-factor
13695 # can dictate either the total number of digits, if it is
13696 # a positive number, or the number of digits after the
13697 # original decimal point which are to be included as
13698 # significant. See the 68882 manual for examples.
13699 # If LEN is computed to be greater than 17, set OPERR in
13700 # USER_FPSR. LEN is stored in d4.
13701 #
13702 # Register usage:
13703 # Input/Output
13704 # d0: exponent/Unchanged
13705 # d2: x/x/scratch
13706 # d3: x/x
13707 # d4: exc picture/LEN
13708 # d5: ICTR/Unchanged
13709 # d6: ILOG/Unchanged
13710 # d7: k-factor/Unchanged
13711 # a0: ptr for original operand/final result
13712 # a1: x/x
13713 # a2: x/x
13714 # fp0: float(ILOG)/Unchanged
13715 # fp1: x/x
13716 # fp2: x/x
13717 # F_SCR1:x/x
13718 # F_SCR2:Abs(X) with $3fff exponent/Unchanged
13719 # L_SCR1:x/x
13720 # L_SCR2:first word of X packed/Unchanged
13721
13722 A6_str:
13723 tst.l %d7 # branch on sign of k
13724 ble.b k_neg # if k <= 0, LEN = ILOG + 1 - k
13725 mov.l %d7,%d4 # if k > 0, LEN = k
13726 bra.b len_ck # skip to LEN check
13727 k_neg:
13728 mov.l %d6,%d4 # first load ILOG to d4
13729 sub.l %d7,%d4 # subtract off k
13730 addq.l &1,%d4 # add in the 1
13731 len_ck:
13732 tst.l %d4 # LEN check: branch on sign of LEN
13733 ble.b LEN_ng # if neg, set LEN = 1
13734 cmp.l %d4,&17 # test if LEN > 17
13735 ble.b A7_str # if not, forget it
13736 mov.l &17,%d4 # set max LEN = 17
13737 tst.l %d7 # if negative, never set OPERR
13738 ble.b A7_str # if positive, continue
13739 or.l &opaop_mask,USER_FPSR(%a6) # set OPERR & AIOP in USER_FPSR
13740 bra.b A7_str # finished here
13741 LEN_ng:
13742 mov.l &1,%d4 # min LEN is 1
13743
13744
13745 # A7. Calculate SCALE.
13746 # SCALE is equal to 10^ISCALE, where ISCALE is the number
13747 # of decimal places needed to insure LEN integer digits
13748 # in the output before conversion to bcd. LAMBDA is the sign
13749 # of ISCALE, used in A9. Fp1 contains 10^^(abs(ISCALE)) using
13750 # the rounding mode as given in the following table (see
13751 # Coonen, p. 7.23 as ref.; however, the SCALE variable is
13752 # of opposite sign in bindec.sa from Coonen).
13753 #
13754 # Initial USE
13755 # FPCR[6:5] LAMBDA SIGN(X) FPCR[6:5]
13756 # ----------------------------------------------
13757 # RN 00 0 0 00/0 RN
13758 # RN 00 0 1 00/0 RN
13759 # RN 00 1 0 00/0 RN
13760 # RN 00 1 1 00/0 RN
13761 # RZ 01 0 0 11/3 RP
13762 # RZ 01 0 1 11/3 RP
13763 # RZ 01 1 0 10/2 RM
13764 # RZ 01 1 1 10/2 RM
13765 # RM 10 0 0 11/3 RP
13766 # RM 10 0 1 10/2 RM
13767 # RM 10 1 0 10/2 RM
13768 # RM 10 1 1 11/3 RP
13769 # RP 11 0 0 10/2 RM
13770 # RP 11 0 1 11/3 RP
13771 # RP 11 1 0 11/3 RP
13772 # RP 11 1 1 10/2 RM
13773 #
13774 # Register usage:
13775 # Input/Output
13776 # d0: exponent/scratch - final is 0
13777 # d2: x/0 or 24 for A9
13778 # d3: x/scratch - offset ptr into PTENRM array
13779 # d4: LEN/Unchanged
13780 # d5: 0/ICTR:LAMBDA
13781 # d6: ILOG/ILOG or k if ((k<=0)&(ILOG<k))
13782 # d7: k-factor/Unchanged
13783 # a0: ptr for original operand/final result
13784 # a1: x/ptr to PTENRM array
13785 # a2: x/x
13786 # fp0: float(ILOG)/Unchanged
13787 # fp1: x/10^ISCALE
13788 # fp2: x/x
13789 # F_SCR1:x/x
13790 # F_SCR2:Abs(X) with $3fff exponent/Unchanged
13791 # L_SCR1:x/x
13792 # L_SCR2:first word of X packed/Unchanged
13793
13794 A7_str:
13795 tst.l %d7 # test sign of k
13796 bgt.b k_pos # if pos and > 0, skip this
13797 cmp.l %d7,%d6 # test k - ILOG
13798 blt.b k_pos # if ILOG >= k, skip this
13799 mov.l %d7,%d6 # if ((k<0) & (ILOG < k)) ILOG = k
13800 k_pos:
13801 mov.l %d6,%d0 # calc ILOG + 1 - LEN in d0
13802 addq.l &1,%d0 # add the 1
13803 sub.l %d4,%d0 # sub off LEN
13804 swap %d5 # use upper word of d5 for LAMBDA
13805 clr.w %d5 # set it zero initially
13806 clr.w %d2 # set up d2 for very small case
13807 tst.l %d0 # test sign of ISCALE
13808 bge.b iscale # if pos, skip next inst
13809 addq.w &1,%d5 # if neg, set LAMBDA true
13810 cmp.l %d0,&0xffffecd4 # test iscale <= -4908
13811 bgt.b no_inf # if false, skip rest
13812 add.l &24,%d0 # add in 24 to iscale
13813 mov.l &24,%d2 # put 24 in d2 for A9
13814 no_inf:
13815 neg.l %d0 # and take abs of ISCALE
13816 iscale:
13817 fmov.s FONE(%pc),%fp1 # init fp1 to 1
13818 bfextu USER_FPCR(%a6){&26:&2},%d1 # get initial rmode bits
13819 lsl.w &1,%d1 # put them in bits 2:1
13820 add.w %d5,%d1 # add in LAMBDA
13821 lsl.w &1,%d1 # put them in bits 3:1
13822 tst.l L_SCR2(%a6) # test sign of original x
13823 bge.b x_pos # if pos, don't set bit 0
13824 addq.l &1,%d1 # if neg, set bit 0
13825 x_pos:
13826 lea.l RBDTBL(%pc),%a2 # load rbdtbl base
13827 mov.b (%a2,%d1),%d3 # load d3 with new rmode
13828 lsl.l &4,%d3 # put bits in proper position
13829 fmov.l %d3,%fpcr # load bits into fpu
13830 lsr.l &4,%d3 # put bits in proper position
13831 tst.b %d3 # decode new rmode for pten table
13832 bne.b not_rn # if zero, it is RN
13833 lea.l PTENRN(%pc),%a1 # load a1 with RN table base
13834 bra.b rmode # exit decode
13835 not_rn:
13836 lsr.b &1,%d3 # get lsb in carry
13837 bcc.b not_rp2 # if carry clear, it is RM
13838 lea.l PTENRP(%pc),%a1 # load a1 with RP table base
13839 bra.b rmode # exit decode
13840 not_rp2:
13841 lea.l PTENRM(%pc),%a1 # load a1 with RM table base
13842 rmode:
13843 clr.l %d3 # clr table index
13844 e_loop2:
13845 lsr.l &1,%d0 # shift next bit into carry
13846 bcc.b e_next2 # if zero, skip the mul
13847 fmul.x (%a1,%d3),%fp1 # mul by 10**(d3_bit_no)
13848 e_next2:
13849 add.l &12,%d3 # inc d3 to next pwrten table entry
13850 tst.l %d0 # test if ISCALE is zero
13851 bne.b e_loop2 # if not, loop
13852
13853 # A8. Clr INEX; Force RZ.
13854 # The operation in A3 above may have set INEX2.
13855 # RZ mode is forced for the scaling operation to insure
13856 # only one rounding error. The grs bits are collected in
13857 # the INEX flag for use in A10.
13858 #
13859 # Register usage:
13860 # Input/Output
13861
13862 fmov.l &0,%fpsr # clr INEX
13863 fmov.l &rz_mode*0x10,%fpcr # set RZ rounding mode
13864
13865 # A9. Scale X -> Y.
13866 # The mantissa is scaled to the desired number of significant
13867 # digits. The excess digits are collected in INEX2. If mul,
13868 # Check d2 for excess 10 exponential value. If not zero,
13869 # the iscale value would have caused the pwrten calculation
13870 # to overflow. Only a negative iscale can cause this, so
13871 # multiply by 10^(d2), which is now only allowed to be 24,
13872 # with a multiply by 10^8 and 10^16, which is exact since
13873 # 10^24 is exact. If the input was denormalized, we must
13874 # create a busy stack frame with the mul command and the
13875 # two operands, and allow the fpu to complete the multiply.
13876 #
13877 # Register usage:
13878 # Input/Output
13879 # d0: FPCR with RZ mode/Unchanged
13880 # d2: 0 or 24/unchanged
13881 # d3: x/x
13882 # d4: LEN/Unchanged
13883 # d5: ICTR:LAMBDA
13884 # d6: ILOG/Unchanged
13885 # d7: k-factor/Unchanged
13886 # a0: ptr for original operand/final result
13887 # a1: ptr to PTENRM array/Unchanged
13888 # a2: x/x
13889 # fp0: float(ILOG)/X adjusted for SCALE (Y)
13890 # fp1: 10^ISCALE/Unchanged
13891 # fp2: x/x
13892 # F_SCR1:x/x
13893 # F_SCR2:Abs(X) with $3fff exponent/Unchanged
13894 # L_SCR1:x/x
13895 # L_SCR2:first word of X packed/Unchanged
13896
13897 A9_str:
13898 fmov.x (%a0),%fp0 # load X from memory
13899 fabs.x %fp0 # use abs(X)
13900 tst.w %d5 # LAMBDA is in lower word of d5
13901 bne.b sc_mul # if neg (LAMBDA = 1), scale by mul
13902 fdiv.x %fp1,%fp0 # calculate X / SCALE -> Y to fp0
13903 bra.w A10_st # branch to A10
13904
13905 sc_mul:
13906 tst.b BINDEC_FLG(%a6) # check for denorm
13907 beq.w A9_norm # if norm, continue with mul
13908
13909 # for DENORM, we must calculate:
13910 # fp0 = input_op * 10^ISCALE * 10^24
13911 # since the input operand is a DENORM, we can't multiply it directly.
13912 # so, we do the multiplication of the exponents and mantissas separately.
13913 # in this way, we avoid underflow on intermediate stages of the
13914 # multiplication and guarantee a result without exception.
13915 fmovm.x &0x2,-(%sp) # save 10^ISCALE to stack
13916
13917 mov.w (%sp),%d3 # grab exponent
13918 andi.w &0x7fff,%d3 # clear sign
13919 ori.w &0x8000,(%a0) # make DENORM exp negative
13920 add.w (%a0),%d3 # add DENORM exp to 10^ISCALE exp
13921 subi.w &0x3fff,%d3 # subtract BIAS
13922 add.w 36(%a1),%d3
13923 subi.w &0x3fff,%d3 # subtract BIAS
13924 add.w 48(%a1),%d3
13925 subi.w &0x3fff,%d3 # subtract BIAS
13926
13927 bmi.w sc_mul_err # is result is DENORM, punt!!!
13928
13929 andi.w &0x8000,(%sp) # keep sign
13930 or.w %d3,(%sp) # insert new exponent
13931 andi.w &0x7fff,(%a0) # clear sign bit on DENORM again
13932 mov.l 0x8(%a0),-(%sp) # put input op mantissa on stk
13933 mov.l 0x4(%a0),-(%sp)
13934 mov.l &0x3fff0000,-(%sp) # force exp to zero
13935 fmovm.x (%sp)+,&0x80 # load normalized DENORM into fp0
13936 fmul.x (%sp)+,%fp0
13937
13938 # fmul.x 36(%a1),%fp0 # multiply fp0 by 10^8
13939 # fmul.x 48(%a1),%fp0 # multiply fp0 by 10^16
13940 mov.l 36+8(%a1),-(%sp) # get 10^8 mantissa
13941 mov.l 36+4(%a1),-(%sp)
13942 mov.l &0x3fff0000,-(%sp) # force exp to zero
13943 mov.l 48+8(%a1),-(%sp) # get 10^16 mantissa
13944 mov.l 48+4(%a1),-(%sp)
13945 mov.l &0x3fff0000,-(%sp)# force exp to zero
13946 fmul.x (%sp)+,%fp0 # multiply fp0 by 10^8
13947 fmul.x (%sp)+,%fp0 # multiply fp0 by 10^16
13948 bra.b A10_st
13949
13950 sc_mul_err:
13951 bra.b sc_mul_err
13952
13953 A9_norm:
13954 tst.w %d2 # test for small exp case
13955 beq.b A9_con # if zero, continue as normal
13956 fmul.x 36(%a1),%fp0 # multiply fp0 by 10^8
13957 fmul.x 48(%a1),%fp0 # multiply fp0 by 10^16
13958 A9_con:
13959 fmul.x %fp1,%fp0 # calculate X * SCALE -> Y to fp0
13960
13961 # A10. Or in INEX.
13962 # If INEX is set, round error occurred. This is compensated
13963 # for by 'or-ing' in the INEX2 flag to the lsb of Y.
13964 #
13965 # Register usage:
13966 # Input/Output
13967 # d0: FPCR with RZ mode/FPSR with INEX2 isolated
13968 # d2: x/x
13969 # d3: x/x
13970 # d4: LEN/Unchanged
13971 # d5: ICTR:LAMBDA
13972 # d6: ILOG/Unchanged
13973 # d7: k-factor/Unchanged
13974 # a0: ptr for original operand/final result
13975 # a1: ptr to PTENxx array/Unchanged
13976 # a2: x/ptr to FP_SCR1(a6)
13977 # fp0: Y/Y with lsb adjusted
13978 # fp1: 10^ISCALE/Unchanged
13979 # fp2: x/x
13980
13981 A10_st:
13982 fmov.l %fpsr,%d0 # get FPSR
13983 fmov.x %fp0,FP_SCR1(%a6) # move Y to memory
13984 lea.l FP_SCR1(%a6),%a2 # load a2 with ptr to FP_SCR1
13985 btst &9,%d0 # check if INEX2 set
13986 beq.b A11_st # if clear, skip rest
13987 or.l &1,8(%a2) # or in 1 to lsb of mantissa
13988 fmov.x FP_SCR1(%a6),%fp0 # write adjusted Y back to fpu
13989
13990
13991 # A11. Restore original FPCR; set size ext.
13992 # Perform FINT operation in the user's rounding mode. Keep
13993 # the size to extended. The sintdo entry point in the sint
13994 # routine expects the FPCR value to be in USER_FPCR for
13995 # mode and precision. The original FPCR is saved in L_SCR1.
13996
13997 A11_st:
13998 mov.l USER_FPCR(%a6),L_SCR1(%a6) # save it for later
13999 and.l &0x00000030,USER_FPCR(%a6) # set size to ext,
14000 # ;block exceptions
14001
14002
14003 # A12. Calculate YINT = FINT(Y) according to user's rounding mode.
14004 # The FPSP routine sintd0 is used. The output is in fp0.
14005 #
14006 # Register usage:
14007 # Input/Output
14008 # d0: FPSR with AINEX cleared/FPCR with size set to ext
14009 # d2: x/x/scratch
14010 # d3: x/x
14011 # d4: LEN/Unchanged
14012 # d5: ICTR:LAMBDA/Unchanged
14013 # d6: ILOG/Unchanged
14014 # d7: k-factor/Unchanged
14015 # a0: ptr for original operand/src ptr for sintdo
14016 # a1: ptr to PTENxx array/Unchanged
14017 # a2: ptr to FP_SCR1(a6)/Unchanged
14018 # a6: temp pointer to FP_SCR1(a6) - orig value saved and restored
14019 # fp0: Y/YINT
14020 # fp1: 10^ISCALE/Unchanged
14021 # fp2: x/x
14022 # F_SCR1:x/x
14023 # F_SCR2:Y adjusted for inex/Y with original exponent
14024 # L_SCR1:x/original USER_FPCR
14025 # L_SCR2:first word of X packed/Unchanged
14026
14027 A12_st:
14028 movm.l &0xc0c0,-(%sp) # save regs used by sintd0 {%d0-%d1/%a0-%a1}
14029 mov.l L_SCR1(%a6),-(%sp)
14030 mov.l L_SCR2(%a6),-(%sp)
14031
14032 lea.l FP_SCR1(%a6),%a0 # a0 is ptr to FP_SCR1(a6)
14033 fmov.x %fp0,(%a0) # move Y to memory at FP_SCR1(a6)
14034 tst.l L_SCR2(%a6) # test sign of original operand
14035 bge.b do_fint12 # if pos, use Y
14036 or.l &0x80000000,(%a0) # if neg, use -Y
14037 do_fint12:
14038 mov.l USER_FPSR(%a6),-(%sp)
14039 # bsr sintdo # sint routine returns int in fp0
14040
14041 fmov.l USER_FPCR(%a6),%fpcr
14042 fmov.l &0x0,%fpsr # clear the AEXC bits!!!
14043 ## mov.l USER_FPCR(%a6),%d0 # ext prec/keep rnd mode
14044 ## andi.l &0x00000030,%d0
14045 ## fmov.l %d0,%fpcr
14046 fint.x FP_SCR1(%a6),%fp0 # do fint()
14047 fmov.l %fpsr,%d0
14048 or.w %d0,FPSR_EXCEPT(%a6)
14049 ## fmov.l &0x0,%fpcr
14050 ## fmov.l %fpsr,%d0 # don't keep ccodes
14051 ## or.w %d0,FPSR_EXCEPT(%a6)
14052
14053 mov.b (%sp),USER_FPSR(%a6)
14054 add.l &4,%sp
14055
14056 mov.l (%sp)+,L_SCR2(%a6)
14057 mov.l (%sp)+,L_SCR1(%a6)
14058 movm.l (%sp)+,&0x303 # restore regs used by sint {%d0-%d1/%a0-%a1}
14059
14060 mov.l L_SCR2(%a6),FP_SCR1(%a6) # restore original exponent
14061 mov.l L_SCR1(%a6),USER_FPCR(%a6) # restore user's FPCR
14062
14063 # A13. Check for LEN digits.
14064 # If the int operation results in more than LEN digits,
14065 # or less than LEN -1 digits, adjust ILOG and repeat from
14066 # A6. This test occurs only on the first pass. If the
14067 # result is exactly 10^LEN, decrement ILOG and divide
14068 # the mantissa by 10. The calculation of 10^LEN cannot
14069 # be inexact, since all powers of ten upto 10^27 are exact
14070 # in extended precision, so the use of a previous power-of-ten
14071 # table will introduce no error.
14072 #
14073 #
14074 # Register usage:
14075 # Input/Output
14076 # d0: FPCR with size set to ext/scratch final = 0
14077 # d2: x/x
14078 # d3: x/scratch final = x
14079 # d4: LEN/LEN adjusted
14080 # d5: ICTR:LAMBDA/LAMBDA:ICTR
14081 # d6: ILOG/ILOG adjusted
14082 # d7: k-factor/Unchanged
14083 # a0: pointer into memory for packed bcd string formation
14084 # a1: ptr to PTENxx array/Unchanged
14085 # a2: ptr to FP_SCR1(a6)/Unchanged
14086 # fp0: int portion of Y/abs(YINT) adjusted
14087 # fp1: 10^ISCALE/Unchanged
14088 # fp2: x/10^LEN
14089 # F_SCR1:x/x
14090 # F_SCR2:Y with original exponent/Unchanged
14091 # L_SCR1:original USER_FPCR/Unchanged
14092 # L_SCR2:first word of X packed/Unchanged
14093
14094 A13_st:
14095 swap %d5 # put ICTR in lower word of d5
14096 tst.w %d5 # check if ICTR = 0
14097 bne not_zr # if non-zero, go to second test
14098 #
14099 # Compute 10^(LEN-1)
14100 #
14101 fmov.s FONE(%pc),%fp2 # init fp2 to 1.0
14102 mov.l %d4,%d0 # put LEN in d0
14103 subq.l &1,%d0 # d0 = LEN -1
14104 clr.l %d3 # clr table index
14105 l_loop:
14106 lsr.l &1,%d0 # shift next bit into carry
14107 bcc.b l_next # if zero, skip the mul
14108 fmul.x (%a1,%d3),%fp2 # mul by 10**(d3_bit_no)
14109 l_next:
14110 add.l &12,%d3 # inc d3 to next pwrten table entry
14111 tst.l %d0 # test if LEN is zero
14112 bne.b l_loop # if not, loop
14113 #
14114 # 10^LEN-1 is computed for this test and A14. If the input was
14115 # denormalized, check only the case in which YINT > 10^LEN.
14116 #
14117 tst.b BINDEC_FLG(%a6) # check if input was norm
14118 beq.b A13_con # if norm, continue with checking
14119 fabs.x %fp0 # take abs of YINT
14120 bra test_2
14121 #
14122 # Compare abs(YINT) to 10^(LEN-1) and 10^LEN
14123 #
14124 A13_con:
14125 fabs.x %fp0 # take abs of YINT
14126 fcmp.x %fp0,%fp2 # compare abs(YINT) with 10^(LEN-1)
14127 fbge.w test_2 # if greater, do next test
14128 subq.l &1,%d6 # subtract 1 from ILOG
14129 mov.w &1,%d5 # set ICTR
14130 fmov.l &rm_mode*0x10,%fpcr # set rmode to RM
14131 fmul.s FTEN(%pc),%fp2 # compute 10^LEN
14132 bra.w A6_str # return to A6 and recompute YINT
14133 test_2:
14134 fmul.s FTEN(%pc),%fp2 # compute 10^LEN
14135 fcmp.x %fp0,%fp2 # compare abs(YINT) with 10^LEN
14136 fblt.w A14_st # if less, all is ok, go to A14
14137 fbgt.w fix_ex # if greater, fix and redo
14138 fdiv.s FTEN(%pc),%fp0 # if equal, divide by 10
14139 addq.l &1,%d6 # and inc ILOG
14140 bra.b A14_st # and continue elsewhere
14141 fix_ex:
14142 addq.l &1,%d6 # increment ILOG by 1
14143 mov.w &1,%d5 # set ICTR
14144 fmov.l &rm_mode*0x10,%fpcr # set rmode to RM
14145 bra.w A6_str # return to A6 and recompute YINT
14146 #
14147 # Since ICTR <> 0, we have already been through one adjustment,
14148 # and shouldn't have another; this is to check if abs(YINT) = 10^LEN
14149 # 10^LEN is again computed using whatever table is in a1 since the
14150 # value calculated cannot be inexact.
14151 #
14152 not_zr:
14153 fmov.s FONE(%pc),%fp2 # init fp2 to 1.0
14154 mov.l %d4,%d0 # put LEN in d0
14155 clr.l %d3 # clr table index
14156 z_loop:
14157 lsr.l &1,%d0 # shift next bit into carry
14158 bcc.b z_next # if zero, skip the mul
14159 fmul.x (%a1,%d3),%fp2 # mul by 10**(d3_bit_no)
14160 z_next:
14161 add.l &12,%d3 # inc d3 to next pwrten table entry
14162 tst.l %d0 # test if LEN is zero
14163 bne.b z_loop # if not, loop
14164 fabs.x %fp0 # get abs(YINT)
14165 fcmp.x %fp0,%fp2 # check if abs(YINT) = 10^LEN
14166 fbneq.w A14_st # if not, skip this
14167 fdiv.s FTEN(%pc),%fp0 # divide abs(YINT) by 10
14168 addq.l &1,%d6 # and inc ILOG by 1
14169 addq.l &1,%d4 # and inc LEN
14170 fmul.s FTEN(%pc),%fp2 # if LEN++, the get 10^^LEN
14171
14172 # A14. Convert the mantissa to bcd.
14173 # The binstr routine is used to convert the LEN digit
14174 # mantissa to bcd in memory. The input to binstr is
14175 # to be a fraction; i.e. (mantissa)/10^LEN and adjusted
14176 # such that the decimal point is to the left of bit 63.
14177 # The bcd digits are stored in the correct position in
14178 # the final string area in memory.
14179 #
14180 #
14181 # Register usage:
14182 # Input/Output
14183 # d0: x/LEN call to binstr - final is 0
14184 # d1: x/0
14185 # d2: x/ms 32-bits of mant of abs(YINT)
14186 # d3: x/ls 32-bits of mant of abs(YINT)
14187 # d4: LEN/Unchanged
14188 # d5: ICTR:LAMBDA/LAMBDA:ICTR
14189 # d6: ILOG
14190 # d7: k-factor/Unchanged
14191 # a0: pointer into memory for packed bcd string formation
14192 # /ptr to first mantissa byte in result string
14193 # a1: ptr to PTENxx array/Unchanged
14194 # a2: ptr to FP_SCR1(a6)/Unchanged
14195 # fp0: int portion of Y/abs(YINT) adjusted
14196 # fp1: 10^ISCALE/Unchanged
14197 # fp2: 10^LEN/Unchanged
14198 # F_SCR1:x/Work area for final result
14199 # F_SCR2:Y with original exponent/Unchanged
14200 # L_SCR1:original USER_FPCR/Unchanged
14201 # L_SCR2:first word of X packed/Unchanged
14202
14203 A14_st:
14204 fmov.l &rz_mode*0x10,%fpcr # force rz for conversion
14205 fdiv.x %fp2,%fp0 # divide abs(YINT) by 10^LEN
14206 lea.l FP_SCR0(%a6),%a0
14207 fmov.x %fp0,(%a0) # move abs(YINT)/10^LEN to memory
14208 mov.l 4(%a0),%d2 # move 2nd word of FP_RES to d2
14209 mov.l 8(%a0),%d3 # move 3rd word of FP_RES to d3
14210 clr.l 4(%a0) # zero word 2 of FP_RES
14211 clr.l 8(%a0) # zero word 3 of FP_RES
14212 mov.l (%a0),%d0 # move exponent to d0
14213 swap %d0 # put exponent in lower word
14214 beq.b no_sft # if zero, don't shift
14215 sub.l &0x3ffd,%d0 # sub bias less 2 to make fract
14216 tst.l %d0 # check if > 1
14217 bgt.b no_sft # if so, don't shift
14218 neg.l %d0 # make exp positive
14219 m_loop:
14220 lsr.l &1,%d2 # shift d2:d3 right, add 0s
14221 roxr.l &1,%d3 # the number of places
14222 dbf.w %d0,m_loop # given in d0
14223 no_sft:
14224 tst.l %d2 # check for mantissa of zero
14225 bne.b no_zr # if not, go on
14226 tst.l %d3 # continue zero check
14227 beq.b zer_m # if zero, go directly to binstr
14228 no_zr:
14229 clr.l %d1 # put zero in d1 for addx
14230 add.l &0x00000080,%d3 # inc at bit 7
14231 addx.l %d1,%d2 # continue inc
14232 and.l &0xffffff80,%d3 # strip off lsb not used by 882
14233 zer_m:
14234 mov.l %d4,%d0 # put LEN in d0 for binstr call
14235 addq.l &3,%a0 # a0 points to M16 byte in result
14236 bsr binstr # call binstr to convert mant
14237
14238
14239 # A15. Convert the exponent to bcd.
14240 # As in A14 above, the exp is converted to bcd and the
14241 # digits are stored in the final string.
14242 #
14243 # Digits are stored in L_SCR1(a6) on return from BINDEC as:
14244 #
14245 # 32 16 15 0
14246 # -----------------------------------------
14247 # | 0 | e3 | e2 | e1 | e4 | X | X | X |
14248 # -----------------------------------------
14249 #
14250 # And are moved into their proper places in FP_SCR0. If digit e4
14251 # is non-zero, OPERR is signaled. In all cases, all 4 digits are
14252 # written as specified in the 881/882 manual for packed decimal.
14253 #
14254 # Register usage:
14255 # Input/Output
14256 # d0: x/LEN call to binstr - final is 0
14257 # d1: x/scratch (0);shift count for final exponent packing
14258 # d2: x/ms 32-bits of exp fraction/scratch
14259 # d3: x/ls 32-bits of exp fraction
14260 # d4: LEN/Unchanged
14261 # d5: ICTR:LAMBDA/LAMBDA:ICTR
14262 # d6: ILOG
14263 # d7: k-factor/Unchanged
14264 # a0: ptr to result string/ptr to L_SCR1(a6)
14265 # a1: ptr to PTENxx array/Unchanged
14266 # a2: ptr to FP_SCR1(a6)/Unchanged
14267 # fp0: abs(YINT) adjusted/float(ILOG)
14268 # fp1: 10^ISCALE/Unchanged
14269 # fp2: 10^LEN/Unchanged
14270 # F_SCR1:Work area for final result/BCD result
14271 # F_SCR2:Y with original exponent/ILOG/10^4
14272 # L_SCR1:original USER_FPCR/Exponent digits on return from binstr
14273 # L_SCR2:first word of X packed/Unchanged
14274
14275 A15_st:
14276 tst.b BINDEC_FLG(%a6) # check for denorm
14277 beq.b not_denorm
14278 ftest.x %fp0 # test for zero
14279 fbeq.w den_zero # if zero, use k-factor or 4933
14280 fmov.l %d6,%fp0 # float ILOG
14281 fabs.x %fp0 # get abs of ILOG
14282 bra.b convrt
14283 den_zero:
14284 tst.l %d7 # check sign of the k-factor
14285 blt.b use_ilog # if negative, use ILOG
14286 fmov.s F4933(%pc),%fp0 # force exponent to 4933
14287 bra.b convrt # do it
14288 use_ilog:
14289 fmov.l %d6,%fp0 # float ILOG
14290 fabs.x %fp0 # get abs of ILOG
14291 bra.b convrt
14292 not_denorm:
14293 ftest.x %fp0 # test for zero
14294 fbneq.w not_zero # if zero, force exponent
14295 fmov.s FONE(%pc),%fp0 # force exponent to 1
14296 bra.b convrt # do it
14297 not_zero:
14298 fmov.l %d6,%fp0 # float ILOG
14299 fabs.x %fp0 # get abs of ILOG
14300 convrt:
14301 fdiv.x 24(%a1),%fp0 # compute ILOG/10^4
14302 fmov.x %fp0,FP_SCR1(%a6) # store fp0 in memory
14303 mov.l 4(%a2),%d2 # move word 2 to d2
14304 mov.l 8(%a2),%d3 # move word 3 to d3
14305 mov.w (%a2),%d0 # move exp to d0
14306 beq.b x_loop_fin # if zero, skip the shift
14307 sub.w &0x3ffd,%d0 # subtract off bias
14308 neg.w %d0 # make exp positive
14309 x_loop:
14310 lsr.l &1,%d2 # shift d2:d3 right
14311 roxr.l &1,%d3 # the number of places
14312 dbf.w %d0,x_loop # given in d0
14313 x_loop_fin:
14314 clr.l %d1 # put zero in d1 for addx
14315 add.l &0x00000080,%d3 # inc at bit 6
14316 addx.l %d1,%d2 # continue inc
14317 and.l &0xffffff80,%d3 # strip off lsb not used by 882
14318 mov.l &4,%d0 # put 4 in d0 for binstr call
14319 lea.l L_SCR1(%a6),%a0 # a0 is ptr to L_SCR1 for exp digits
14320 bsr binstr # call binstr to convert exp
14321 mov.l L_SCR1(%a6),%d0 # load L_SCR1 lword to d0
14322 mov.l &12,%d1 # use d1 for shift count
14323 lsr.l %d1,%d0 # shift d0 right by 12
14324 bfins %d0,FP_SCR0(%a6){&4:&12} # put e3:e2:e1 in FP_SCR0
14325 lsr.l %d1,%d0 # shift d0 right by 12
14326 bfins %d0,FP_SCR0(%a6){&16:&4} # put e4 in FP_SCR0
14327 tst.b %d0 # check if e4 is zero
14328 beq.b A16_st # if zero, skip rest
14329 or.l &opaop_mask,USER_FPSR(%a6) # set OPERR & AIOP in USER_FPSR
14330
14331
14332 # A16. Write sign bits to final string.
14333 # Sigma is bit 31 of initial value; RHO is bit 31 of d6 (ILOG).
14334 #
14335 # Register usage:
14336 # Input/Output
14337 # d0: x/scratch - final is x
14338 # d2: x/x
14339 # d3: x/x
14340 # d4: LEN/Unchanged
14341 # d5: ICTR:LAMBDA/LAMBDA:ICTR
14342 # d6: ILOG/ILOG adjusted
14343 # d7: k-factor/Unchanged
14344 # a0: ptr to L_SCR1(a6)/Unchanged
14345 # a1: ptr to PTENxx array/Unchanged
14346 # a2: ptr to FP_SCR1(a6)/Unchanged
14347 # fp0: float(ILOG)/Unchanged
14348 # fp1: 10^ISCALE/Unchanged
14349 # fp2: 10^LEN/Unchanged
14350 # F_SCR1:BCD result with correct signs
14351 # F_SCR2:ILOG/10^4
14352 # L_SCR1:Exponent digits on return from binstr
14353 # L_SCR2:first word of X packed/Unchanged
14354
14355 A16_st:
14356 clr.l %d0 # clr d0 for collection of signs
14357 and.b &0x0f,FP_SCR0(%a6) # clear first nibble of FP_SCR0
14358 tst.l L_SCR2(%a6) # check sign of original mantissa
14359 bge.b mant_p # if pos, don't set SM
14360 mov.l &2,%d0 # move 2 in to d0 for SM
14361 mant_p:
14362 tst.l %d6 # check sign of ILOG
14363 bge.b wr_sgn # if pos, don't set SE
14364 addq.l &1,%d0 # set bit 0 in d0 for SE
14365 wr_sgn:
14366 bfins %d0,FP_SCR0(%a6){&0:&2} # insert SM and SE into FP_SCR0
14367
14368 # Clean up and restore all registers used.
14369
14370 fmov.l &0,%fpsr # clear possible inex2/ainex bits
14371 fmovm.x (%sp)+,&0xe0 # {%fp0-%fp2}
14372 movm.l (%sp)+,&0x4fc # {%d2-%d7/%a2}
14373 rts
14374
14375 global PTENRN
14376 PTENRN:
14377 long 0x40020000,0xA0000000,0x00000000 # 10 ^ 1
14378 long 0x40050000,0xC8000000,0x00000000 # 10 ^ 2
14379 long 0x400C0000,0x9C400000,0x00000000 # 10 ^ 4
14380 long 0x40190000,0xBEBC2000,0x00000000 # 10 ^ 8
14381 long 0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 16
14382 long 0x40690000,0x9DC5ADA8,0x2B70B59E # 10 ^ 32
14383 long 0x40D30000,0xC2781F49,0xFFCFA6D5 # 10 ^ 64
14384 long 0x41A80000,0x93BA47C9,0x80E98CE0 # 10 ^ 128
14385 long 0x43510000,0xAA7EEBFB,0x9DF9DE8E # 10 ^ 256
14386 long 0x46A30000,0xE319A0AE,0xA60E91C7 # 10 ^ 512
14387 long 0x4D480000,0xC9767586,0x81750C17 # 10 ^ 1024
14388 long 0x5A920000,0x9E8B3B5D,0xC53D5DE5 # 10 ^ 2048
14389 long 0x75250000,0xC4605202,0x8A20979B # 10 ^ 4096
14390
14391 global PTENRP
14392 PTENRP:
14393 long 0x40020000,0xA0000000,0x00000000 # 10 ^ 1
14394 long 0x40050000,0xC8000000,0x00000000 # 10 ^ 2
14395 long 0x400C0000,0x9C400000,0x00000000 # 10 ^ 4
14396 long 0x40190000,0xBEBC2000,0x00000000 # 10 ^ 8
14397 long 0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 16
14398 long 0x40690000,0x9DC5ADA8,0x2B70B59E # 10 ^ 32
14399 long 0x40D30000,0xC2781F49,0xFFCFA6D6 # 10 ^ 64
14400 long 0x41A80000,0x93BA47C9,0x80E98CE0 # 10 ^ 128
14401 long 0x43510000,0xAA7EEBFB,0x9DF9DE8E # 10 ^ 256
14402 long 0x46A30000,0xE319A0AE,0xA60E91C7 # 10 ^ 512
14403 long 0x4D480000,0xC9767586,0x81750C18 # 10 ^ 1024
14404 long 0x5A920000,0x9E8B3B5D,0xC53D5DE5 # 10 ^ 2048
14405 long 0x75250000,0xC4605202,0x8A20979B # 10 ^ 4096
14406
14407 global PTENRM
14408 PTENRM:
14409 long 0x40020000,0xA0000000,0x00000000 # 10 ^ 1
14410 long 0x40050000,0xC8000000,0x00000000 # 10 ^ 2
14411 long 0x400C0000,0x9C400000,0x00000000 # 10 ^ 4
14412 long 0x40190000,0xBEBC2000,0x00000000 # 10 ^ 8
14413 long 0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 16
14414 long 0x40690000,0x9DC5ADA8,0x2B70B59D # 10 ^ 32
14415 long 0x40D30000,0xC2781F49,0xFFCFA6D5 # 10 ^ 64
14416 long 0x41A80000,0x93BA47C9,0x80E98CDF # 10 ^ 128
14417 long 0x43510000,0xAA7EEBFB,0x9DF9DE8D # 10 ^ 256
14418 long 0x46A30000,0xE319A0AE,0xA60E91C6 # 10 ^ 512
14419 long 0x4D480000,0xC9767586,0x81750C17 # 10 ^ 1024
14420 long 0x5A920000,0x9E8B3B5D,0xC53D5DE4 # 10 ^ 2048
14421 long 0x75250000,0xC4605202,0x8A20979A # 10 ^ 4096
14422
14423 #########################################################################
14424 # binstr(): Converts a 64-bit binary integer to bcd. #
14425 # #
14426 # INPUT *************************************************************** #
14427 # d2:d3 = 64-bit binary integer #
14428 # d0 = desired length (LEN) #
14429 # a0 = pointer to start in memory for bcd characters #
14430 # (This pointer must point to byte 4 of the first #
14431 # lword of the packed decimal memory string.) #
14432 # #
14433 # OUTPUT ************************************************************** #
14434 # a0 = pointer to LEN bcd digits representing the 64-bit integer. #
14435 # #
14436 # ALGORITHM *********************************************************** #
14437 # The 64-bit binary is assumed to have a decimal point before #
14438 # bit 63. The fraction is multiplied by 10 using a mul by 2 #
14439 # shift and a mul by 8 shift. The bits shifted out of the #
14440 # msb form a decimal digit. This process is iterated until #
14441 # LEN digits are formed. #
14442 # #
14443 # A1. Init d7 to 1. D7 is the byte digit counter, and if 1, the #
14444 # digit formed will be assumed the least significant. This is #
14445 # to force the first byte formed to have a 0 in the upper 4 bits. #
14446 # #
14447 # A2. Beginning of the loop: #
14448 # Copy the fraction in d2:d3 to d4:d5. #
14449 # #
14450 # A3. Multiply the fraction in d2:d3 by 8 using bit-field #
14451 # extracts and shifts. The three msbs from d2 will go into d1. #
14452 # #
14453 # A4. Multiply the fraction in d4:d5 by 2 using shifts. The msb #
14454 # will be collected by the carry. #
14455 # #
14456 # A5. Add using the carry the 64-bit quantities in d2:d3 and d4:d5 #
14457 # into d2:d3. D1 will contain the bcd digit formed. #
14458 # #
14459 # A6. Test d7. If zero, the digit formed is the ms digit. If non- #
14460 # zero, it is the ls digit. Put the digit in its place in the #
14461 # upper word of d0. If it is the ls digit, write the word #
14462 # from d0 to memory. #
14463 # #
14464 # A7. Decrement d6 (LEN counter) and repeat the loop until zero. #
14465 # #
14466 #########################################################################
14467
14468 # Implementation Notes:
14469 #
14470 # The registers are used as follows:
14471 #
14472 # d0: LEN counter
14473 # d1: temp used to form the digit
14474 # d2: upper 32-bits of fraction for mul by 8
14475 # d3: lower 32-bits of fraction for mul by 8
14476 # d4: upper 32-bits of fraction for mul by 2
14477 # d5: lower 32-bits of fraction for mul by 2
14478 # d6: temp for bit-field extracts
14479 # d7: byte digit formation word;digit count {0,1}
14480 # a0: pointer into memory for packed bcd string formation
14481 #
14482
14483 global binstr
14484 binstr:
14485 movm.l &0xff00,-(%sp) # {%d0-%d7}
14486
14487 #
14488 # A1: Init d7
14489 #
14490 mov.l &1,%d7 # init d7 for second digit
14491 subq.l &1,%d0 # for dbf d0 would have LEN+1 passes
14492 #
14493 # A2. Copy d2:d3 to d4:d5. Start loop.
14494 #
14495 loop:
14496 mov.l %d2,%d4 # copy the fraction before muls
14497 mov.l %d3,%d5 # to d4:d5
14498 #
14499 # A3. Multiply d2:d3 by 8; extract msbs into d1.
14500 #
14501 bfextu %d2{&0:&3},%d1 # copy 3 msbs of d2 into d1
14502 asl.l &3,%d2 # shift d2 left by 3 places
14503 bfextu %d3{&0:&3},%d6 # copy 3 msbs of d3 into d6
14504 asl.l &3,%d3 # shift d3 left by 3 places
14505 or.l %d6,%d2 # or in msbs from d3 into d2
14506 #
14507 # A4. Multiply d4:d5 by 2; add carry out to d1.
14508 #
14509 asl.l &1,%d5 # mul d5 by 2
14510 roxl.l &1,%d4 # mul d4 by 2
14511 swap %d6 # put 0 in d6 lower word
14512 addx.w %d6,%d1 # add in extend from mul by 2
14513 #
14514 # A5. Add mul by 8 to mul by 2. D1 contains the digit formed.
14515 #
14516 add.l %d5,%d3 # add lower 32 bits
14517 nop # ERRATA FIX #13 (Rev. 1.2 6/6/90)
14518 addx.l %d4,%d2 # add with extend upper 32 bits
14519 nop # ERRATA FIX #13 (Rev. 1.2 6/6/90)
14520 addx.w %d6,%d1 # add in extend from add to d1
14521 swap %d6 # with d6 = 0; put 0 in upper word
14522 #
14523 # A6. Test d7 and branch.
14524 #
14525 tst.w %d7 # if zero, store digit & to loop
14526 beq.b first_d # if non-zero, form byte & write
14527 sec_d:
14528 swap %d7 # bring first digit to word d7b
14529 asl.w &4,%d7 # first digit in upper 4 bits d7b
14530 add.w %d1,%d7 # add in ls digit to d7b
14531 mov.b %d7,(%a0)+ # store d7b byte in memory
14532 swap %d7 # put LEN counter in word d7a
14533 clr.w %d7 # set d7a to signal no digits done
14534 dbf.w %d0,loop # do loop some more!
14535 bra.b end_bstr # finished, so exit
14536 first_d:
14537 swap %d7 # put digit word in d7b
14538 mov.w %d1,%d7 # put new digit in d7b
14539 swap %d7 # put LEN counter in word d7a
14540 addq.w &1,%d7 # set d7a to signal first digit done
14541 dbf.w %d0,loop # do loop some more!
14542 swap %d7 # put last digit in string
14543 lsl.w &4,%d7 # move it to upper 4 bits
14544 mov.b %d7,(%a0)+ # store it in memory string
14545 #
14546 # Clean up and return with result in fp0.
14547 #
14548 end_bstr:
14549 movm.l (%sp)+,&0xff # {%d0-%d7}
14550 rts
14551
14552 #########################################################################
14553 # XDEF **************************************************************** #
14554 # facc_in_b(): dmem_read_byte failed #
14555 # facc_in_w(): dmem_read_word failed #
14556 # facc_in_l(): dmem_read_long failed #
14557 # facc_in_d(): dmem_read of dbl prec failed #
14558 # facc_in_x(): dmem_read of ext prec failed #
14559 # #
14560 # facc_out_b(): dmem_write_byte failed #
14561 # facc_out_w(): dmem_write_word failed #
14562 # facc_out_l(): dmem_write_long failed #
14563 # facc_out_d(): dmem_write of dbl prec failed #
14564 # facc_out_x(): dmem_write of ext prec failed #
14565 # #
14566 # XREF **************************************************************** #
14567 # _real_access() - exit through access error handler #
14568 # #
14569 # INPUT *************************************************************** #
14570 # None #
14571 # #
14572 # OUTPUT ************************************************************** #
14573 # None #
14574 # #
14575 # ALGORITHM *********************************************************** #
14576 # Flow jumps here when an FP data fetch call gets an error #
14577 # result. This means the operating system wants an access error frame #
14578 # made out of the current exception stack frame. #
14579 # So, we first call restore() which makes sure that any updated #
14580 # -(an)+ register gets returned to its pre-exception value and then #
14581 # we change the stack to an acess error stack frame. #
14582 # #
14583 #########################################################################
14584
14585 facc_in_b:
14586 movq.l &0x1,%d0 # one byte
14587 bsr.w restore # fix An
14588
14589 mov.w &0x0121,EXC_VOFF(%a6) # set FSLW
14590 bra.w facc_finish
14591
14592 facc_in_w:
14593 movq.l &0x2,%d0 # two bytes
14594 bsr.w restore # fix An
14595
14596 mov.w &0x0141,EXC_VOFF(%a6) # set FSLW
14597 bra.b facc_finish
14598
14599 facc_in_l:
14600 movq.l &0x4,%d0 # four bytes
14601 bsr.w restore # fix An
14602
14603 mov.w &0x0101,EXC_VOFF(%a6) # set FSLW
14604 bra.b facc_finish
14605
14606 facc_in_d:
14607 movq.l &0x8,%d0 # eight bytes
14608 bsr.w restore # fix An
14609
14610 mov.w &0x0161,EXC_VOFF(%a6) # set FSLW
14611 bra.b facc_finish
14612
14613 facc_in_x:
14614 movq.l &0xc,%d0 # twelve bytes
14615 bsr.w restore # fix An
14616
14617 mov.w &0x0161,EXC_VOFF(%a6) # set FSLW
14618 bra.b facc_finish
14619
14620 ################################################################
14621
14622 facc_out_b:
14623 movq.l &0x1,%d0 # one byte
14624 bsr.w restore # restore An
14625
14626 mov.w &0x00a1,EXC_VOFF(%a6) # set FSLW
14627 bra.b facc_finish
14628
14629 facc_out_w:
14630 movq.l &0x2,%d0 # two bytes
14631 bsr.w restore # restore An
14632
14633 mov.w &0x00c1,EXC_VOFF(%a6) # set FSLW
14634 bra.b facc_finish
14635
14636 facc_out_l:
14637 movq.l &0x4,%d0 # four bytes
14638 bsr.w restore # restore An
14639
14640 mov.w &0x0081,EXC_VOFF(%a6) # set FSLW
14641 bra.b facc_finish
14642
14643 facc_out_d:
14644 movq.l &0x8,%d0 # eight bytes
14645 bsr.w restore # restore An
14646
14647 mov.w &0x00e1,EXC_VOFF(%a6) # set FSLW
14648 bra.b facc_finish
14649
14650 facc_out_x:
14651 mov.l &0xc,%d0 # twelve bytes
14652 bsr.w restore # restore An
14653
14654 mov.w &0x00e1,EXC_VOFF(%a6) # set FSLW
14655
14656 # here's where we actually create the access error frame from the
14657 # current exception stack frame.
14658 facc_finish:
14659 mov.l USER_FPIAR(%a6),EXC_PC(%a6) # store current PC
14660
14661 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
14662 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
14663 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
14664
14665 unlk %a6
14666
14667 mov.l (%sp),-(%sp) # store SR, hi(PC)
14668 mov.l 0x8(%sp),0x4(%sp) # store lo(PC)
14669 mov.l 0xc(%sp),0x8(%sp) # store EA
14670 mov.l &0x00000001,0xc(%sp) # store FSLW
14671 mov.w 0x6(%sp),0xc(%sp) # fix FSLW (size)
14672 mov.w &0x4008,0x6(%sp) # store voff
14673
14674 btst &0x5,(%sp) # supervisor or user mode?
14675 beq.b facc_out2 # user
14676 bset &0x2,0xd(%sp) # set supervisor TM bit
14677
14678 facc_out2:
14679 bra.l _real_access
14680
14681 ##################################################################
14682
14683 # if the effective addressing mode was predecrement or postincrement,
14684 # the emulation has already changed its value to the correct post-
14685 # instruction value. but since we're exiting to the access error
14686 # handler, then AN must be returned to its pre-instruction value.
14687 # we do that here.
14688 restore:
14689 mov.b EXC_OPWORD+0x1(%a6),%d1
14690 andi.b &0x38,%d1 # extract opmode
14691 cmpi.b %d1,&0x18 # postinc?
14692 beq.w rest_inc
14693 cmpi.b %d1,&0x20 # predec?
14694 beq.w rest_dec
14695 rts
14696
14697 rest_inc:
14698 mov.b EXC_OPWORD+0x1(%a6),%d1
14699 andi.w &0x0007,%d1 # fetch An
14700
14701 mov.w (tbl_rest_inc.b,%pc,%d1.w*2),%d1
14702 jmp (tbl_rest_inc.b,%pc,%d1.w*1)
14703
14704 tbl_rest_inc:
14705 short ri_a0 - tbl_rest_inc
14706 short ri_a1 - tbl_rest_inc
14707 short ri_a2 - tbl_rest_inc
14708 short ri_a3 - tbl_rest_inc
14709 short ri_a4 - tbl_rest_inc
14710 short ri_a5 - tbl_rest_inc
14711 short ri_a6 - tbl_rest_inc
14712 short ri_a7 - tbl_rest_inc
14713
14714 ri_a0:
14715 sub.l %d0,EXC_DREGS+0x8(%a6) # fix stacked a0
14716 rts
14717 ri_a1:
14718 sub.l %d0,EXC_DREGS+0xc(%a6) # fix stacked a1
14719 rts
14720 ri_a2:
14721 sub.l %d0,%a2 # fix a2
14722 rts
14723 ri_a3:
14724 sub.l %d0,%a3 # fix a3
14725 rts
14726 ri_a4:
14727 sub.l %d0,%a4 # fix a4
14728 rts
14729 ri_a5:
14730 sub.l %d0,%a5 # fix a5
14731 rts
14732 ri_a6:
14733 sub.l %d0,(%a6) # fix stacked a6
14734 rts
14735 # if it's a fmove out instruction, we don't have to fix a7
14736 # because we hadn't changed it yet. if it's an opclass two
14737 # instruction (data moved in) and the exception was in supervisor
14738 # mode, then also also wasn't updated. if it was user mode, then
14739 # restore the correct a7 which is in the USP currently.
14740 ri_a7:
14741 cmpi.b EXC_VOFF(%a6),&0x30 # move in or out?
14742 bne.b ri_a7_done # out
14743
14744 btst &0x5,EXC_SR(%a6) # user or supervisor?
14745 bne.b ri_a7_done # supervisor
14746 movc %usp,%a0 # restore USP
14747 sub.l %d0,%a0
14748 movc %a0,%usp
14749 ri_a7_done:
14750 rts
14751
14752 # need to invert adjustment value if the <ea> was predec
14753 rest_dec:
14754 neg.l %d0
14755 bra.b rest_inc
14756