res_func.sa revision 1.1
1*	MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
2*	M68000 Hi-Performance Microprocessor Division
3*	M68040 Software Package 
4*
5*	M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
6*	All rights reserved.
7*
8*	THE SOFTWARE is provided on an "AS IS" basis and without warranty.
9*	To the maximum extent permitted by applicable law,
10*	MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
11*	INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
12*	PARTICULAR PURPOSE and any warranty against infringement with
13*	regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
14*	and any accompanying written materials. 
15*
16*	To the maximum extent permitted by applicable law,
17*	IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
18*	(INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
19*	PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
20*	OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
21*	SOFTWARE.  Motorola assumes no responsibility for the maintenance
22*	and support of the SOFTWARE.  
23*
24*	You are hereby granted a copyright license to use, modify, and
25*	distribute the SOFTWARE so long as this entire notice is retained
26*	without alteration in any modified and/or redistributed versions,
27*	and that such modified versions are clearly identified as such.
28*	No licenses are granted by implication, estoppel or otherwise
29*	under any patents or trademarks of Motorola, Inc.
30
31*
32*	res_func.sa 3.9 7/29/91
33*
34* Normalizes denormalized numbers if necessary and updates the
35* stack frame.  The function is then restored back into the
36* machine and the 040 completes the operation.  This routine
37* is only used by the unsupported data type/format handler.
38* (Exception vector 55).
39*
40* For packed move out (fmove.p fpm,<ea>) the operation is
41* completed here; data is packed and moved to user memory. 
42* The stack is restored to the 040 only in the case of a
43* reportable exception in the conversion.
44*
45
46RES_FUNC    IDNT    2,1 Motorola 040 Floating Point Software Package
47
48	section	8
49
50	include	fpsp.h
51
52sp_bnds:	dc.w	$3f81,$407e
53		dc.w	$3f6a,$0000
54dp_bnds:	dc.w	$3c01,$43fe
55		dc.w	$3bcd,$0000
56
57	xref	mem_write
58	xref	bindec
59	xref	get_fline
60	xref	round
61	xref	denorm
62	xref	dest_ext
63	xref	dest_dbl
64	xref	dest_sgl
65	xref	unf_sub
66	xref	nrm_set
67	xref	dnrm_lp
68	xref	ovf_res
69	xref	reg_dest
70	xref	t_ovfl
71	xref	t_unfl
72
73	xdef	res_func
74	xdef 	p_move
75
76res_func:
77	clr.b	DNRM_FLG(a6)
78	clr.b	RES_FLG(a6)
79	clr.b	CU_ONLY(a6)
80	tst.b	DY_MO_FLG(a6)
81	beq.b	monadic
82dyadic:
83	btst.b	#7,DTAG(a6)	;if dop = norm=000, zero=001,
84*				;inf=010 or nan=011
85	beq.b	monadic		;then branch
86*				;else denorm
87* HANDLE DESTINATION DENORM HERE
88*				;set dtag to norm
89*				;write the tag & fpte15 to the fstack
90	lea.l	FPTEMP(a6),a0
91
92	bclr.b	#sign_bit,LOCAL_EX(a0)
93	sne	LOCAL_SGN(a0)
94
95	bsr	nrm_set		;normalize number (exp will go negative)
96	bclr.b	#sign_bit,LOCAL_EX(a0) ;get rid of false sign
97	bfclr	LOCAL_SGN(a0){0:8}	;change back to IEEE ext format
98	beq.b	dpos
99	bset.b	#sign_bit,LOCAL_EX(a0)
100dpos:
101	bfclr	DTAG(a6){0:4}	;set tag to normalized, FPTE15 = 0
102	bset.b	#4,DTAG(a6)	;set FPTE15
103	or.b	#$0f,DNRM_FLG(a6)
104monadic:
105	lea.l	ETEMP(a6),a0
106	btst.b	#direction_bit,CMDREG1B(a6)	;check direction
107	bne.w	opclass3			;it is a mv out
108*
109* At this point, only oplcass 0 and 2 possible
110*
111	btst.b	#7,STAG(a6)	;if sop = norm=000, zero=001,
112*				;inf=010 or nan=011
113	bne.w	mon_dnrm	;else denorm
114	tst.b	DY_MO_FLG(a6)	;all cases of dyadic instructions would
115	bne.w	normal		;require normalization of denorm
116
117* At this point:
118*	monadic instructions:	fabs  = $18  fneg   = $1a  ftst   = $3a
119*				fmove = $00  fsmove = $40  fdmove = $44
120*				fsqrt = $05* fssqrt = $41  fdsqrt = $45
121*				(*fsqrt reencoded to $05)
122*
123	move.w	CMDREG1B(a6),d0	;get command register
124	andi.l	#$7f,d0			;strip to only command word
125*
126* At this point, fabs, fneg, fsmove, fdmove, ftst, fsqrt, fssqrt, and 
127* fdsqrt are possible.
128* For cases fabs, fneg, fsmove, and fdmove goto spos (do not normalize)
129* For cases fsqrt, fssqrt, and fdsqrt goto nrm_src (do normalize)
130*
131	btst.l	#0,d0
132	bne.w	normal			;weed out fsqrt instructions
133*
134* cu_norm handles fmove in instructions with normalized inputs.
135* The routine round is used to correctly round the input for the
136* destination precision and mode.
137*
138cu_norm:
139	st	CU_ONLY(a6)		;set cu-only inst flag
140	move.w	CMDREG1B(a6),d0
141	andi.b	#$3b,d0		;isolate bits to select inst
142	tst.b	d0
143	beq.l	cu_nmove	;if zero, it is an fmove
144	cmpi.b	#$18,d0
145	beq.l	cu_nabs		;if $18, it is fabs
146	cmpi.b	#$1a,d0
147	beq.l	cu_nneg		;if $1a, it is fneg
148*
149* Inst is ftst.  Check the source operand and set the cc's accordingly.
150* No write is done, so simply rts.
151*
152cu_ntst:
153	move.w	LOCAL_EX(a0),d0
154	bclr.l	#15,d0
155	sne	LOCAL_SGN(a0)
156	beq.b	cu_ntpo
157	or.l	#neg_mask,USER_FPSR(a6) ;set N
158cu_ntpo:
159	cmpi.w	#$7fff,d0	;test for inf/nan
160	bne.b	cu_ntcz
161	tst.l	LOCAL_HI(a0)
162	bne.b	cu_ntn
163	tst.l	LOCAL_LO(a0)
164	bne.b	cu_ntn
165	or.l	#inf_mask,USER_FPSR(a6)
166	rts
167cu_ntn:
168	or.l	#nan_mask,USER_FPSR(a6)
169	move.l	ETEMP_EX(a6),FPTEMP_EX(a6)	;set up fptemp sign for 
170*						;snan handler
171
172	rts
173cu_ntcz:
174	tst.l	LOCAL_HI(a0)
175	bne.l	cu_ntsx
176	tst.l	LOCAL_LO(a0)
177	bne.l	cu_ntsx
178	or.l	#z_mask,USER_FPSR(a6)
179cu_ntsx:
180	rts
181*
182* Inst is fabs.  Execute the absolute value function on the input.
183* Branch to the fmove code.  If the operand is NaN, do nothing.
184*
185cu_nabs:
186	move.b	STAG(a6),d0
187	btst.l	#5,d0			;test for NaN or zero
188	bne	wr_etemp		;if either, simply write it
189	bclr.b	#7,LOCAL_EX(a0)		;do abs
190	bra.b	cu_nmove		;fmove code will finish
191*
192* Inst is fneg.  Execute the negate value function on the input.
193* Fall though to the fmove code.  If the operand is NaN, do nothing.
194*
195cu_nneg:
196	move.b	STAG(a6),d0
197	btst.l	#5,d0			;test for NaN or zero
198	bne	wr_etemp		;if either, simply write it
199	bchg.b	#7,LOCAL_EX(a0)		;do neg
200*
201* Inst is fmove.  This code also handles all result writes.
202* If bit 2 is set, round is forced to double.  If it is clear,
203* and bit 6 is set, round is forced to single.  If both are clear,
204* the round precision is found in the fpcr.  If the rounding precision
205* is double or single, round the result before the write.
206*
207cu_nmove:
208	move.b	STAG(a6),d0
209	andi.b	#$e0,d0			;isolate stag bits
210	bne	wr_etemp		;if not norm, simply write it
211	btst.b	#2,CMDREG1B+1(a6)	;check for rd
212	bne	cu_nmrd
213	btst.b	#6,CMDREG1B+1(a6)	;check for rs
214	bne	cu_nmrs
215*
216* The move or operation is not with forced precision.  Test for
217* nan or inf as the input; if so, simply write it to FPn.  Use the
218* FPCR_MODE byte to get rounding on norms and zeros.
219*
220cu_nmnr:
221	bfextu	FPCR_MODE(a6){0:2},d0
222	tst.b	d0			;check for extended
223	beq	cu_wrexn		;if so, just write result
224	cmpi.b	#1,d0			;check for single
225	beq	cu_nmrs			;fall through to double
226*
227* The move is fdmove or round precision is double.
228*
229cu_nmrd:
230	move.l	#2,d0			;set up the size for denorm
231	move.w	LOCAL_EX(a0),d1		;compare exponent to double threshold
232	and.w	#$7fff,d1	
233	cmp.w	#$3c01,d1
234	bls	cu_nunfl
235	bfextu	FPCR_MODE(a6){2:2},d1	;get rmode
236	or.l	#$00020000,d1		;or in rprec (double)
237	clr.l	d0			;clear g,r,s for round
238	bclr.b	#sign_bit,LOCAL_EX(a0)	;convert to internal format
239	sne	LOCAL_SGN(a0)
240	bsr.l	round
241	bfclr	LOCAL_SGN(a0){0:8}
242	beq.b	cu_nmrdc
243	bset.b	#sign_bit,LOCAL_EX(a0)
244cu_nmrdc:
245	move.w	LOCAL_EX(a0),d1		;check for overflow
246	and.w	#$7fff,d1
247	cmp.w	#$43ff,d1
248	bge	cu_novfl		;take care of overflow case
249	bra.w	cu_wrexn
250*
251* The move is fsmove or round precision is single.
252*
253cu_nmrs:
254	move.l	#1,d0
255	move.w	LOCAL_EX(a0),d1
256	and.w	#$7fff,d1
257	cmp.w	#$3f81,d1
258	bls	cu_nunfl
259	bfextu	FPCR_MODE(a6){2:2},d1
260	or.l	#$00010000,d1
261	clr.l	d0
262	bclr.b	#sign_bit,LOCAL_EX(a0)
263	sne	LOCAL_SGN(a0)
264	bsr.l	round
265	bfclr	LOCAL_SGN(a0){0:8}
266	beq.b	cu_nmrsc
267	bset.b	#sign_bit,LOCAL_EX(a0)
268cu_nmrsc:
269	move.w	LOCAL_EX(a0),d1
270	and.w	#$7FFF,d1
271	cmp.w	#$407f,d1
272	blt	cu_wrexn
273*
274* The operand is above precision boundaries.  Use t_ovfl to
275* generate the correct value.
276*
277cu_novfl:
278	bsr	t_ovfl
279	bra	cu_wrexn
280*
281* The operand is below precision boundaries.  Use denorm to
282* generate the correct value.
283*
284cu_nunfl:
285	bclr.b	#sign_bit,LOCAL_EX(a0)
286	sne	LOCAL_SGN(a0)
287	bsr	denorm
288	bfclr	LOCAL_SGN(a0){0:8}	;change back to IEEE ext format
289	beq.b	cu_nucont
290	bset.b	#sign_bit,LOCAL_EX(a0)
291cu_nucont:
292	bfextu	FPCR_MODE(a6){2:2},d1
293	btst.b	#2,CMDREG1B+1(a6)	;check for rd
294	bne	inst_d
295	btst.b	#6,CMDREG1B+1(a6)	;check for rs
296	bne	inst_s
297	swap	d1
298	move.b	FPCR_MODE(a6),d1
299	lsr.b	#6,d1
300	swap	d1
301	bra	inst_sd
302inst_d:
303	or.l	#$00020000,d1
304	bra	inst_sd
305inst_s:
306	or.l	#$00010000,d1
307inst_sd:
308	bclr.b	#sign_bit,LOCAL_EX(a0)
309	sne	LOCAL_SGN(a0)
310	bsr.l	round
311	bfclr	LOCAL_SGN(a0){0:8}
312	beq.b	cu_nuflp
313	bset.b	#sign_bit,LOCAL_EX(a0)
314cu_nuflp:
315	btst.b	#inex2_bit,FPSR_EXCEPT(a6)
316	beq.b	cu_nuninx
317	or.l	#aunfl_mask,USER_FPSR(a6) ;if the round was inex, set AUNFL
318cu_nuninx:
319	tst.l	LOCAL_HI(a0)		;test for zero
320	bne.b	cu_nunzro
321	tst.l	LOCAL_LO(a0)
322	bne.b	cu_nunzro
323*
324* The mantissa is zero from the denorm loop.  Check sign and rmode
325* to see if rounding should have occured which would leave the lsb.
326*
327	move.l	USER_FPCR(a6),d0
328	andi.l	#$30,d0		;isolate rmode
329	cmpi.l	#$20,d0
330	blt.b	cu_nzro
331	bne.b	cu_nrp
332cu_nrm:
333	tst.w	LOCAL_EX(a0)	;if positive, set lsb
334	bge.b	cu_nzro
335	btst.b	#7,FPCR_MODE(a6) ;check for double
336	beq.b	cu_nincs
337	bra.b	cu_nincd
338cu_nrp:
339	tst.w	LOCAL_EX(a0)	;if positive, set lsb
340	blt.b	cu_nzro
341	btst.b	#7,FPCR_MODE(a6) ;check for double
342	beq.b	cu_nincs
343cu_nincd:
344	or.l	#$800,LOCAL_LO(a0) ;inc for double
345	bra	cu_nunzro
346cu_nincs:
347	or.l	#$100,LOCAL_HI(a0) ;inc for single
348	bra	cu_nunzro
349cu_nzro:
350	or.l	#z_mask,USER_FPSR(a6)
351	move.b	STAG(a6),d0
352	andi.b	#$e0,d0
353	cmpi.b	#$40,d0		;check if input was tagged zero
354	beq.b	cu_numv
355cu_nunzro:
356	or.l	#unfl_mask,USER_FPSR(a6) ;set unfl
357cu_numv:
358	move.l	(a0),ETEMP(a6)
359	move.l	4(a0),ETEMP_HI(a6)
360	move.l	8(a0),ETEMP_LO(a6)
361*
362* Write the result to memory, setting the fpsr cc bits.  NaN and Inf
363* bypass cu_wrexn.
364*
365cu_wrexn:
366	tst.w	LOCAL_EX(a0)		;test for zero
367	beq.b	cu_wrzero
368	cmp.w	#$8000,LOCAL_EX(a0)	;test for zero
369	bne.b	cu_wreon
370cu_wrzero:
371	or.l	#z_mask,USER_FPSR(a6)	;set Z bit
372cu_wreon:
373	tst.w	LOCAL_EX(a0)
374	bpl	wr_etemp
375	or.l	#neg_mask,USER_FPSR(a6)
376	bra	wr_etemp
377
378*
379* HANDLE SOURCE DENORM HERE
380*
381*				;clear denorm stag to norm
382*				;write the new tag & ete15 to the fstack
383mon_dnrm:
384*
385* At this point, check for the cases in which normalizing the 
386* denorm produces incorrect results.
387*
388	tst.b	DY_MO_FLG(a6)	;all cases of dyadic instructions would
389	bne.b	nrm_src		;require normalization of denorm
390
391* At this point:
392*	monadic instructions:	fabs  = $18  fneg   = $1a  ftst   = $3a
393*				fmove = $00  fsmove = $40  fdmove = $44
394*				fsqrt = $05* fssqrt = $41  fdsqrt = $45
395*				(*fsqrt reencoded to $05)
396*
397	move.w	CMDREG1B(a6),d0	;get command register
398	andi.l	#$7f,d0			;strip to only command word
399*
400* At this point, fabs, fneg, fsmove, fdmove, ftst, fsqrt, fssqrt, and 
401* fdsqrt are possible.
402* For cases fabs, fneg, fsmove, and fdmove goto spos (do not normalize)
403* For cases fsqrt, fssqrt, and fdsqrt goto nrm_src (do normalize)
404*
405	btst.l	#0,d0
406	bne.b	nrm_src		;weed out fsqrt instructions
407	st	CU_ONLY(a6)	;set cu-only inst flag
408	bra	cu_dnrm		;fmove, fabs, fneg, ftst 
409*				;cases go to cu_dnrm
410nrm_src:
411	bclr.b	#sign_bit,LOCAL_EX(a0)
412	sne	LOCAL_SGN(a0)
413	bsr	nrm_set		;normalize number (exponent will go 
414*				; negative)
415	bclr.b	#sign_bit,LOCAL_EX(a0) ;get rid of false sign
416
417	bfclr	LOCAL_SGN(a0){0:8}	;change back to IEEE ext format
418	beq.b	spos
419	bset.b	#sign_bit,LOCAL_EX(a0)
420spos:
421	bfclr	STAG(a6){0:4}	;set tag to normalized, FPTE15 = 0
422	bset.b	#4,STAG(a6)	;set ETE15
423	or.b	#$f0,DNRM_FLG(a6)
424normal:
425	tst.b	DNRM_FLG(a6)	;check if any of the ops were denorms
426	bne	ck_wrap		;if so, check if it is a potential
427*				;wrap-around case
428fix_stk:
429	move.b	#$fe,CU_SAVEPC(a6)
430	bclr.b	#E1,E_BYTE(a6)
431
432	clr.w	NMNEXC(a6)
433
434	st.b	RES_FLG(a6)	;indicate that a restore is needed
435	rts
436
437*
438* cu_dnrm handles all cu-only instructions (fmove, fabs, fneg, and
439* ftst) completly in software without an frestore to the 040. 
440*
441cu_dnrm:
442	st.b	CU_ONLY(a6)
443	move.w	CMDREG1B(a6),d0
444	andi.b	#$3b,d0		;isolate bits to select inst
445	tst.b	d0
446	beq.l	cu_dmove	;if zero, it is an fmove
447	cmpi.b	#$18,d0
448	beq.l	cu_dabs		;if $18, it is fabs
449	cmpi.b	#$1a,d0
450	beq.l	cu_dneg		;if $1a, it is fneg
451*
452* Inst is ftst.  Check the source operand and set the cc's accordingly.
453* No write is done, so simply rts.
454*
455cu_dtst:
456	move.w	LOCAL_EX(a0),d0
457	bclr.l	#15,d0
458	sne	LOCAL_SGN(a0)
459	beq.b	cu_dtpo
460	or.l	#neg_mask,USER_FPSR(a6) ;set N
461cu_dtpo:
462	cmpi.w	#$7fff,d0	;test for inf/nan
463	bne.b	cu_dtcz
464	tst.l	LOCAL_HI(a0)
465	bne.b	cu_dtn
466	tst.l	LOCAL_LO(a0)
467	bne.b	cu_dtn
468	or.l	#inf_mask,USER_FPSR(a6)
469	rts
470cu_dtn:
471	or.l	#nan_mask,USER_FPSR(a6)
472	move.l	ETEMP_EX(a6),FPTEMP_EX(a6)	;set up fptemp sign for 
473*						;snan handler
474	rts
475cu_dtcz:
476	tst.l	LOCAL_HI(a0)
477	bne.l	cu_dtsx
478	tst.l	LOCAL_LO(a0)
479	bne.l	cu_dtsx
480	or.l	#z_mask,USER_FPSR(a6)
481cu_dtsx:
482	rts
483*
484* Inst is fabs.  Execute the absolute value function on the input.
485* Branch to the fmove code.
486*
487cu_dabs:
488	bclr.b	#7,LOCAL_EX(a0)		;do abs
489	bra.b	cu_dmove		;fmove code will finish
490*
491* Inst is fneg.  Execute the negate value function on the input.
492* Fall though to the fmove code.
493*
494cu_dneg:
495	bchg.b	#7,LOCAL_EX(a0)		;do neg
496*
497* Inst is fmove.  This code also handles all result writes.
498* If bit 2 is set, round is forced to double.  If it is clear,
499* and bit 6 is set, round is forced to single.  If both are clear,
500* the round precision is found in the fpcr.  If the rounding precision
501* is double or single, the result is zero, and the mode is checked
502* to determine if the lsb of the result should be set.
503*
504cu_dmove:
505	btst.b	#2,CMDREG1B+1(a6)	;check for rd
506	bne	cu_dmrd
507	btst.b	#6,CMDREG1B+1(a6)	;check for rs
508	bne	cu_dmrs
509*
510* The move or operation is not with forced precision.  Use the
511* FPCR_MODE byte to get rounding.
512*
513cu_dmnr:
514	bfextu	FPCR_MODE(a6){0:2},d0
515	tst.b	d0			;check for extended
516	beq	cu_wrexd		;if so, just write result
517	cmpi.b	#1,d0			;check for single
518	beq	cu_dmrs			;fall through to double
519*
520* The move is fdmove or round precision is double.  Result is zero.
521* Check rmode for rp or rm and set lsb accordingly.
522*
523cu_dmrd:
524	bfextu	FPCR_MODE(a6){2:2},d1	;get rmode
525	tst.w	LOCAL_EX(a0)		;check sign
526	blt.b	cu_dmdn
527	cmpi.b	#3,d1			;check for rp
528	bne	cu_dpd			;load double pos zero
529	bra	cu_dpdr			;load double pos zero w/lsb
530cu_dmdn:
531	cmpi.b	#2,d1			;check for rm
532	bne	cu_dnd			;load double neg zero
533	bra	cu_dndr			;load double neg zero w/lsb
534*
535* The move is fsmove or round precision is single.  Result is zero.
536* Check for rp or rm and set lsb accordingly.
537*
538cu_dmrs:
539	bfextu	FPCR_MODE(a6){2:2},d1	;get rmode
540	tst.w	LOCAL_EX(a0)		;check sign
541	blt.b	cu_dmsn
542	cmpi.b	#3,d1			;check for rp
543	bne	cu_spd			;load single pos zero
544	bra	cu_spdr			;load single pos zero w/lsb
545cu_dmsn:
546	cmpi.b	#2,d1			;check for rm
547	bne	cu_snd			;load single neg zero
548	bra	cu_sndr			;load single neg zero w/lsb
549*
550* The precision is extended, so the result in etemp is correct.
551* Simply set unfl (not inex2 or aunfl) and write the result to 
552* the correct fp register.
553cu_wrexd:
554	or.l	#unfl_mask,USER_FPSR(a6)
555	tst.w	LOCAL_EX(a0)
556	beq	wr_etemp
557	or.l	#neg_mask,USER_FPSR(a6)
558	bra	wr_etemp
559*
560* These routines write +/- zero in double format.  The routines
561* cu_dpdr and cu_dndr set the double lsb.
562*
563cu_dpd:
564	move.l	#$3c010000,LOCAL_EX(a0)	;force pos double zero
565	clr.l	LOCAL_HI(a0)
566	clr.l	LOCAL_LO(a0)
567	or.l	#z_mask,USER_FPSR(a6)
568	or.l	#unfinx_mask,USER_FPSR(a6)
569	bra	wr_etemp
570cu_dpdr:
571	move.l	#$3c010000,LOCAL_EX(a0)	;force pos double zero
572	clr.l	LOCAL_HI(a0)
573	move.l	#$800,LOCAL_LO(a0)	;with lsb set
574	or.l	#unfinx_mask,USER_FPSR(a6)
575	bra	wr_etemp
576cu_dnd:
577	move.l	#$bc010000,LOCAL_EX(a0)	;force pos double zero
578	clr.l	LOCAL_HI(a0)
579	clr.l	LOCAL_LO(a0)
580	or.l	#z_mask,USER_FPSR(a6)
581	or.l	#neg_mask,USER_FPSR(a6)
582	or.l	#unfinx_mask,USER_FPSR(a6)
583	bra	wr_etemp
584cu_dndr:
585	move.l	#$bc010000,LOCAL_EX(a0)	;force pos double zero
586	clr.l	LOCAL_HI(a0)
587	move.l	#$800,LOCAL_LO(a0)	;with lsb set
588	or.l	#neg_mask,USER_FPSR(a6)
589	or.l	#unfinx_mask,USER_FPSR(a6)
590	bra	wr_etemp
591*
592* These routines write +/- zero in single format.  The routines
593* cu_dpdr and cu_dndr set the single lsb.
594*
595cu_spd:
596	move.l	#$3f810000,LOCAL_EX(a0)	;force pos single zero
597	clr.l	LOCAL_HI(a0)
598	clr.l	LOCAL_LO(a0)
599	or.l	#z_mask,USER_FPSR(a6)
600	or.l	#unfinx_mask,USER_FPSR(a6)
601	bra	wr_etemp
602cu_spdr:
603	move.l	#$3f810000,LOCAL_EX(a0)	;force pos single zero
604	move.l	#$100,LOCAL_HI(a0)	;with lsb set
605	clr.l	LOCAL_LO(a0)
606	or.l	#unfinx_mask,USER_FPSR(a6)
607	bra	wr_etemp
608cu_snd:
609	move.l	#$bf810000,LOCAL_EX(a0)	;force pos single zero
610	clr.l	LOCAL_HI(a0)
611	clr.l	LOCAL_LO(a0)
612	or.l	#z_mask,USER_FPSR(a6)
613	or.l	#neg_mask,USER_FPSR(a6)
614	or.l	#unfinx_mask,USER_FPSR(a6)
615	bra	wr_etemp
616cu_sndr:
617	move.l	#$bf810000,LOCAL_EX(a0)	;force pos single zero
618	move.l	#$100,LOCAL_HI(a0)	;with lsb set
619	clr.l	LOCAL_LO(a0)
620	or.l	#neg_mask,USER_FPSR(a6)
621	or.l	#unfinx_mask,USER_FPSR(a6)
622	bra	wr_etemp
623	
624*
625* This code checks for 16-bit overflow conditions on dyadic
626* operations which are not restorable into the floating-point
627* unit and must be completed in software.  Basically, this
628* condition exists with a very large norm and a denorm.  One
629* of the operands must be denormalized to enter this code.
630*
631* Flags used:
632*	DY_MO_FLG contains 0 for monadic op, $ff for dyadic
633*	DNRM_FLG contains $00 for neither op denormalized
634*	                  $0f for the destination op denormalized
635*	                  $f0 for the source op denormalized
636*	                  $ff for both ops denormalzed
637*
638* The wrap-around condition occurs for add, sub, div, and cmp
639* when 
640*
641*	abs(dest_exp - src_exp) >= $8000
642*
643* and for mul when
644*
645*	(dest_exp + src_exp) < $0
646*
647* we must process the operation here if this case is true.
648*
649* The rts following the frcfpn routine is the exit from res_func
650* for this condition.  The restore flag (RES_FLG) is left clear.
651* No frestore is done unless an exception is to be reported.
652*
653* For fadd: 
654*	if(sign_of(dest) != sign_of(src))
655*		replace exponent of src with $3fff (keep sign)
656*		use fpu to perform dest+new_src (user's rmode and X)
657*		clr sticky
658*	else
659*		set sticky
660*	call round with user's precision and mode
661*	move result to fpn and wbtemp
662*
663* For fsub:
664*	if(sign_of(dest) == sign_of(src))
665*		replace exponent of src with $3fff (keep sign)
666*		use fpu to perform dest+new_src (user's rmode and X)
667*		clr sticky
668*	else
669*		set sticky
670*	call round with user's precision and mode
671*	move result to fpn and wbtemp
672*
673* For fdiv/fsgldiv:
674*	if(both operands are denorm)
675*		restore_to_fpu;
676*	if(dest is norm)
677*		force_ovf;
678*	else(dest is denorm)
679*		force_unf:
680*
681* For fcmp:
682*	if(dest is norm)
683*		N = sign_of(dest);
684*	else(dest is denorm)
685*		N = sign_of(src);
686*
687* For fmul:
688*	if(both operands are denorm)
689*		force_unf;
690*	if((dest_exp + src_exp) < 0)
691*		force_unf:
692*	else
693*		restore_to_fpu;
694*
695* local equates:
696addcode	equ	$22
697subcode	equ	$28
698mulcode	equ	$23
699divcode	equ	$20
700cmpcode	equ	$38
701ck_wrap:
702	tst.b	DY_MO_FLG(a6)	;check for fsqrt
703	beq	fix_stk		;if zero, it is fsqrt
704	move.w	CMDREG1B(a6),d0
705	andi.w	#$3b,d0		;strip to command bits
706	cmpi.w	#addcode,d0
707	beq	wrap_add
708	cmpi.w	#subcode,d0
709	beq	wrap_sub
710	cmpi.w	#mulcode,d0
711	beq	wrap_mul
712	cmpi.w	#cmpcode,d0
713	beq	wrap_cmp
714*
715* Inst is fdiv.  
716*
717wrap_div:
718	cmp.b	#$ff,DNRM_FLG(a6) ;if both ops denorm, 
719	beq	fix_stk		 ;restore to fpu
720*
721* One of the ops is denormalized.  Test for wrap condition
722* and force the result.
723*
724	cmp.b	#$0f,DNRM_FLG(a6) ;check for dest denorm
725	bne.b	div_srcd
726div_destd:
727	bsr.l	ckinf_ns
728	bne	fix_stk
729	bfextu	ETEMP_EX(a6){1:15},d0	;get src exp (always pos)
730	bfexts	FPTEMP_EX(a6){1:15},d1	;get dest exp (always neg)
731	sub.l	d1,d0			;subtract dest from src
732	cmp.l	#$7fff,d0
733	blt	fix_stk			;if less, not wrap case
734	clr.b	WBTEMP_SGN(a6)
735	move.w	ETEMP_EX(a6),d0		;find the sign of the result
736	move.w	FPTEMP_EX(a6),d1
737	eor.w	d1,d0
738	andi.w	#$8000,d0
739	beq	force_unf
740	st.b	WBTEMP_SGN(a6)
741	bra	force_unf
742
743ckinf_ns:
744	move.b	STAG(a6),d0		;check source tag for inf or nan
745	bra	ck_in_com
746ckinf_nd:
747	move.b	DTAG(a6),d0		;check destination tag for inf or nan
748ck_in_com:	
749	andi.b	#$60,d0			;isolate tag bits
750	cmp.b	#$40,d0			;is it inf?
751	beq	nan_or_inf		;not wrap case
752	cmp.b	#$60,d0			;is it nan?
753	beq	nan_or_inf		;yes, not wrap case?
754	cmp.b	#$20,d0			;is it a zero?
755	beq	nan_or_inf		;yes
756	clr.l	d0
757	rts				;then it is either a zero of norm,
758*					;check wrap case
759nan_or_inf:
760	moveq.l	#-1,d0
761	rts
762
763
764
765div_srcd:
766	bsr.l	ckinf_nd
767	bne	fix_stk
768	bfextu	FPTEMP_EX(a6){1:15},d0	;get dest exp (always pos)
769	bfexts	ETEMP_EX(a6){1:15},d1	;get src exp (always neg)
770	sub.l	d1,d0			;subtract src from dest
771	cmp.l	#$8000,d0
772	blt	fix_stk			;if less, not wrap case
773	clr.b	WBTEMP_SGN(a6)
774	move.w	ETEMP_EX(a6),d0		;find the sign of the result
775	move.w	FPTEMP_EX(a6),d1
776	eor.w	d1,d0
777	andi.w	#$8000,d0
778	beq.b	force_ovf
779	st.b	WBTEMP_SGN(a6)
780*
781* This code handles the case of the instruction resulting in 
782* an overflow condition.
783*
784force_ovf:
785	bclr.b	#E1,E_BYTE(a6)
786	or.l	#ovfl_inx_mask,USER_FPSR(a6)
787	clr.w	NMNEXC(a6)
788	lea.l	WBTEMP(a6),a0		;point a0 to memory location
789	move.w	CMDREG1B(a6),d0
790	btst.l	#6,d0			;test for forced precision
791	beq.b	frcovf_fpcr
792	btst.l	#2,d0			;check for double
793	bne.b	frcovf_dbl
794	move.l	#$1,d0			;inst is forced single
795	bra.b	frcovf_rnd
796frcovf_dbl:
797	move.l	#$2,d0			;inst is forced double
798	bra.b	frcovf_rnd
799frcovf_fpcr:
800	bfextu	FPCR_MODE(a6){0:2},d0	;inst not forced - use fpcr prec
801frcovf_rnd:
802
803* The 881/882 does not set inex2 for the following case, so the 
804* line is commented out to be compatible with 881/882
805*	tst.b	d0
806*	beq.b	frcovf_x
807*	or.l	#inex2_mask,USER_FPSR(a6) ;if prec is s or d, set inex2
808
809*frcovf_x:
810	bsr.l	ovf_res			;get correct result based on
811*					;round precision/mode.  This 
812*					;sets FPSR_CC correctly
813*					;returns in external format
814	bfclr	WBTEMP_SGN(a6){0:8}
815	beq	frcfpn
816	bset.b	#sign_bit,WBTEMP_EX(a6)
817	bra	frcfpn
818*
819* Inst is fadd.
820*
821wrap_add:
822	cmp.b	#$ff,DNRM_FLG(a6) ;if both ops denorm, 
823	beq	fix_stk		 ;restore to fpu
824*
825* One of the ops is denormalized.  Test for wrap condition
826* and complete the instruction.
827*
828	cmp.b	#$0f,DNRM_FLG(a6) ;check for dest denorm
829	bne.b	add_srcd
830add_destd:
831	bsr.l	ckinf_ns
832	bne	fix_stk
833	bfextu	ETEMP_EX(a6){1:15},d0	;get src exp (always pos)
834	bfexts	FPTEMP_EX(a6){1:15},d1	;get dest exp (always neg)
835	sub.l	d1,d0			;subtract dest from src
836	cmp.l	#$8000,d0
837	blt	fix_stk			;if less, not wrap case
838	bra	add_wrap
839add_srcd:
840	bsr.l	ckinf_nd
841	bne	fix_stk
842	bfextu	FPTEMP_EX(a6){1:15},d0	;get dest exp (always pos)
843	bfexts	ETEMP_EX(a6){1:15},d1	;get src exp (always neg)
844	sub.l	d1,d0			;subtract src from dest
845	cmp.l	#$8000,d0
846	blt	fix_stk			;if less, not wrap case
847*
848* Check the signs of the operands.  If they are unlike, the fpu
849* can be used to add the norm and 1.0 with the sign of the
850* denorm and it will correctly generate the result in extended
851* precision.  We can then call round with no sticky and the result
852* will be correct for the user's rounding mode and precision.  If
853* the signs are the same, we call round with the sticky bit set
854* and the result will be correctfor the user's rounding mode and
855* precision.
856*
857add_wrap:
858	move.w	ETEMP_EX(a6),d0
859	move.w	FPTEMP_EX(a6),d1
860	eor.w	d1,d0
861	andi.w	#$8000,d0
862	beq	add_same
863*
864* The signs are unlike.
865*
866	cmp.b	#$0f,DNRM_FLG(a6) ;is dest the denorm?
867	bne.b	add_u_srcd
868	move.w	FPTEMP_EX(a6),d0
869	andi.w	#$8000,d0
870	or.w	#$3fff,d0	;force the exponent to +/- 1
871	move.w	d0,FPTEMP_EX(a6) ;in the denorm
872	move.l	USER_FPCR(a6),d0
873	andi.l	#$30,d0
874	fmove.l	d0,fpcr		;set up users rmode and X
875	fmove.x	ETEMP(a6),fp0
876	fadd.x	FPTEMP(a6),fp0
877	lea.l	WBTEMP(a6),a0	;point a0 to wbtemp in frame
878	fmove.l	fpsr,d1
879	or.l	d1,USER_FPSR(a6) ;capture cc's and inex from fadd
880	fmove.x	fp0,WBTEMP(a6)	;write result to memory
881	lsr.l	#4,d0		;put rmode in lower 2 bits
882	move.l	USER_FPCR(a6),d1
883	andi.l	#$c0,d1
884	lsr.l	#6,d1		;put precision in upper word
885	swap	d1
886	or.l	d0,d1		;set up for round call
887	clr.l	d0		;force sticky to zero
888	bclr.b	#sign_bit,WBTEMP_EX(a6)
889	sne	WBTEMP_SGN(a6)
890	bsr.l	round		;round result to users rmode & prec
891	bfclr	WBTEMP_SGN(a6){0:8}	;convert back to IEEE ext format
892	beq	frcfpnr
893	bset.b	#sign_bit,WBTEMP_EX(a6)
894	bra	frcfpnr
895add_u_srcd:
896	move.w	ETEMP_EX(a6),d0
897	andi.w	#$8000,d0
898	or.w	#$3fff,d0	;force the exponent to +/- 1
899	move.w	d0,ETEMP_EX(a6) ;in the denorm
900	move.l	USER_FPCR(a6),d0
901	andi.l	#$30,d0
902	fmove.l	d0,fpcr		;set up users rmode and X
903	fmove.x	ETEMP(a6),fp0
904	fadd.x	FPTEMP(a6),fp0
905	fmove.l	fpsr,d1
906	or.l	d1,USER_FPSR(a6) ;capture cc's and inex from fadd
907	lea.l	WBTEMP(a6),a0	;point a0 to wbtemp in frame
908	fmove.x	fp0,WBTEMP(a6)	;write result to memory
909	lsr.l	#4,d0		;put rmode in lower 2 bits
910	move.l	USER_FPCR(a6),d1
911	andi.l	#$c0,d1
912	lsr.l	#6,d1		;put precision in upper word
913	swap	d1
914	or.l	d0,d1		;set up for round call
915	clr.l	d0		;force sticky to zero
916	bclr.b	#sign_bit,WBTEMP_EX(a6)
917	sne	WBTEMP_SGN(a6)	;use internal format for round
918	bsr.l	round		;round result to users rmode & prec
919	bfclr	WBTEMP_SGN(a6){0:8}	;convert back to IEEE ext format
920	beq	frcfpnr
921	bset.b	#sign_bit,WBTEMP_EX(a6)
922	bra	frcfpnr
923*
924* Signs are alike:
925*
926add_same:
927	cmp.b	#$0f,DNRM_FLG(a6) ;is dest the denorm?
928	bne.b	add_s_srcd
929add_s_destd:
930	lea.l	ETEMP(a6),a0
931	move.l	USER_FPCR(a6),d0
932	andi.l	#$30,d0
933	lsr.l	#4,d0		;put rmode in lower 2 bits
934	move.l	USER_FPCR(a6),d1
935	andi.l	#$c0,d1
936	lsr.l	#6,d1		;put precision in upper word
937	swap	d1
938	or.l	d0,d1		;set up for round call
939	move.l	#$20000000,d0	;set sticky for round
940	bclr.b	#sign_bit,ETEMP_EX(a6)
941	sne	ETEMP_SGN(a6)
942	bsr.l	round		;round result to users rmode & prec
943	bfclr	ETEMP_SGN(a6){0:8}	;convert back to IEEE ext format
944	beq.b	add_s_dclr
945	bset.b	#sign_bit,ETEMP_EX(a6)
946add_s_dclr:
947	lea.l	WBTEMP(a6),a0
948	move.l	ETEMP(a6),(a0)	;write result to wbtemp
949	move.l	ETEMP_HI(a6),4(a0)
950	move.l	ETEMP_LO(a6),8(a0)
951	tst.w	ETEMP_EX(a6)
952	bgt	add_ckovf
953	or.l	#neg_mask,USER_FPSR(a6)
954	bra	add_ckovf
955add_s_srcd:
956	lea.l	FPTEMP(a6),a0
957	move.l	USER_FPCR(a6),d0
958	andi.l	#$30,d0
959	lsr.l	#4,d0		;put rmode in lower 2 bits
960	move.l	USER_FPCR(a6),d1
961	andi.l	#$c0,d1
962	lsr.l	#6,d1		;put precision in upper word
963	swap	d1
964	or.l	d0,d1		;set up for round call
965	move.l	#$20000000,d0	;set sticky for round
966	bclr.b	#sign_bit,FPTEMP_EX(a6)
967	sne	FPTEMP_SGN(a6)
968	bsr.l	round		;round result to users rmode & prec
969	bfclr	FPTEMP_SGN(a6){0:8}	;convert back to IEEE ext format
970	beq.b	add_s_sclr
971	bset.b	#sign_bit,FPTEMP_EX(a6)
972add_s_sclr:
973	lea.l	WBTEMP(a6),a0
974	move.l	FPTEMP(a6),(a0)	;write result to wbtemp
975	move.l	FPTEMP_HI(a6),4(a0)
976	move.l	FPTEMP_LO(a6),8(a0)
977	tst.w	FPTEMP_EX(a6)
978	bgt	add_ckovf
979	or.l	#neg_mask,USER_FPSR(a6)
980add_ckovf:
981	move.w	WBTEMP_EX(a6),d0
982	andi.w	#$7fff,d0
983	cmpi.w	#$7fff,d0
984	bne	frcfpnr
985*
986* The result has overflowed to $7fff exponent.  Set I, ovfl,
987* and aovfl, and clr the mantissa (incorrectly set by the
988* round routine.)
989*
990	or.l	#inf_mask+ovfl_inx_mask,USER_FPSR(a6)	
991	clr.l	4(a0)
992	bra	frcfpnr
993*
994* Inst is fsub.
995*
996wrap_sub:
997	cmp.b	#$ff,DNRM_FLG(a6) ;if both ops denorm, 
998	beq	fix_stk		 ;restore to fpu
999*
1000* One of the ops is denormalized.  Test for wrap condition
1001* and complete the instruction.
1002*
1003	cmp.b	#$0f,DNRM_FLG(a6) ;check for dest denorm
1004	bne.b	sub_srcd
1005sub_destd:
1006	bsr.l	ckinf_ns
1007	bne	fix_stk
1008	bfextu	ETEMP_EX(a6){1:15},d0	;get src exp (always pos)
1009	bfexts	FPTEMP_EX(a6){1:15},d1	;get dest exp (always neg)
1010	sub.l	d1,d0			;subtract src from dest
1011	cmp.l	#$8000,d0
1012	blt	fix_stk			;if less, not wrap case
1013	bra	sub_wrap
1014sub_srcd:
1015	bsr.l	ckinf_nd
1016	bne	fix_stk
1017	bfextu	FPTEMP_EX(a6){1:15},d0	;get dest exp (always pos)
1018	bfexts	ETEMP_EX(a6){1:15},d1	;get src exp (always neg)
1019	sub.l	d1,d0			;subtract dest from src
1020	cmp.l	#$8000,d0
1021	blt	fix_stk			;if less, not wrap case
1022*
1023* Check the signs of the operands.  If they are alike, the fpu
1024* can be used to subtract from the norm 1.0 with the sign of the
1025* denorm and it will correctly generate the result in extended
1026* precision.  We can then call round with no sticky and the result
1027* will be correct for the user's rounding mode and precision.  If
1028* the signs are unlike, we call round with the sticky bit set
1029* and the result will be correctfor the user's rounding mode and
1030* precision.
1031*
1032sub_wrap:
1033	move.w	ETEMP_EX(a6),d0
1034	move.w	FPTEMP_EX(a6),d1
1035	eor.w	d1,d0
1036	andi.w	#$8000,d0
1037	bne	sub_diff
1038*
1039* The signs are alike.
1040*
1041	cmp.b	#$0f,DNRM_FLG(a6) ;is dest the denorm?
1042	bne.b	sub_u_srcd
1043	move.w	FPTEMP_EX(a6),d0
1044	andi.w	#$8000,d0
1045	or.w	#$3fff,d0	;force the exponent to +/- 1
1046	move.w	d0,FPTEMP_EX(a6) ;in the denorm
1047	move.l	USER_FPCR(a6),d0
1048	andi.l	#$30,d0
1049	fmove.l	d0,fpcr		;set up users rmode and X
1050	fmove.x	FPTEMP(a6),fp0
1051	fsub.x	ETEMP(a6),fp0
1052	fmove.l	fpsr,d1
1053	or.l	d1,USER_FPSR(a6) ;capture cc's and inex from fadd
1054	lea.l	WBTEMP(a6),a0	;point a0 to wbtemp in frame
1055	fmove.x	fp0,WBTEMP(a6)	;write result to memory
1056	lsr.l	#4,d0		;put rmode in lower 2 bits
1057	move.l	USER_FPCR(a6),d1
1058	andi.l	#$c0,d1
1059	lsr.l	#6,d1		;put precision in upper word
1060	swap	d1
1061	or.l	d0,d1		;set up for round call
1062	clr.l	d0		;force sticky to zero
1063	bclr.b	#sign_bit,WBTEMP_EX(a6)
1064	sne	WBTEMP_SGN(a6)
1065	bsr.l	round		;round result to users rmode & prec
1066	bfclr	WBTEMP_SGN(a6){0:8}	;convert back to IEEE ext format
1067	beq	frcfpnr
1068	bset.b	#sign_bit,WBTEMP_EX(a6)
1069	bra	frcfpnr
1070sub_u_srcd:
1071	move.w	ETEMP_EX(a6),d0
1072	andi.w	#$8000,d0
1073	or.w	#$3fff,d0	;force the exponent to +/- 1
1074	move.w	d0,ETEMP_EX(a6) ;in the denorm
1075	move.l	USER_FPCR(a6),d0
1076	andi.l	#$30,d0
1077	fmove.l	d0,fpcr		;set up users rmode and X
1078	fmove.x	FPTEMP(a6),fp0
1079	fsub.x	ETEMP(a6),fp0
1080	fmove.l	fpsr,d1
1081	or.l	d1,USER_FPSR(a6) ;capture cc's and inex from fadd
1082	lea.l	WBTEMP(a6),a0	;point a0 to wbtemp in frame
1083	fmove.x	fp0,WBTEMP(a6)	;write result to memory
1084	lsr.l	#4,d0		;put rmode in lower 2 bits
1085	move.l	USER_FPCR(a6),d1
1086	andi.l	#$c0,d1
1087	lsr.l	#6,d1		;put precision in upper word
1088	swap	d1
1089	or.l	d0,d1		;set up for round call
1090	clr.l	d0		;force sticky to zero
1091	bclr.b	#sign_bit,WBTEMP_EX(a6)
1092	sne	WBTEMP_SGN(a6)
1093	bsr.l	round		;round result to users rmode & prec
1094	bfclr	WBTEMP_SGN(a6){0:8}	;convert back to IEEE ext format
1095	beq	frcfpnr
1096	bset.b	#sign_bit,WBTEMP_EX(a6)
1097	bra	frcfpnr
1098*
1099* Signs are unlike:
1100*
1101sub_diff:
1102	cmp.b	#$0f,DNRM_FLG(a6) ;is dest the denorm?
1103	bne.b	sub_s_srcd
1104sub_s_destd:
1105	lea.l	ETEMP(a6),a0
1106	move.l	USER_FPCR(a6),d0
1107	andi.l	#$30,d0
1108	lsr.l	#4,d0		;put rmode in lower 2 bits
1109	move.l	USER_FPCR(a6),d1
1110	andi.l	#$c0,d1
1111	lsr.l	#6,d1		;put precision in upper word
1112	swap	d1
1113	or.l	d0,d1		;set up for round call
1114	move.l	#$20000000,d0	;set sticky for round
1115*
1116* Since the dest is the denorm, the sign is the opposite of the
1117* norm sign.
1118*
1119	eori.w	#$8000,ETEMP_EX(a6)	;flip sign on result
1120	tst.w	ETEMP_EX(a6)
1121	bgt.b	sub_s_dwr
1122	or.l	#neg_mask,USER_FPSR(a6)
1123sub_s_dwr:
1124	bclr.b	#sign_bit,ETEMP_EX(a6)
1125	sne	ETEMP_SGN(a6)
1126	bsr.l	round		;round result to users rmode & prec
1127	bfclr	ETEMP_SGN(a6){0:8}	;convert back to IEEE ext format
1128	beq.b	sub_s_dclr
1129	bset.b	#sign_bit,ETEMP_EX(a6)
1130sub_s_dclr:
1131	lea.l	WBTEMP(a6),a0
1132	move.l	ETEMP(a6),(a0)	;write result to wbtemp
1133	move.l	ETEMP_HI(a6),4(a0)
1134	move.l	ETEMP_LO(a6),8(a0)
1135	bra	sub_ckovf
1136sub_s_srcd:
1137	lea.l	FPTEMP(a6),a0
1138	move.l	USER_FPCR(a6),d0
1139	andi.l	#$30,d0
1140	lsr.l	#4,d0		;put rmode in lower 2 bits
1141	move.l	USER_FPCR(a6),d1
1142	andi.l	#$c0,d1
1143	lsr.l	#6,d1		;put precision in upper word
1144	swap	d1
1145	or.l	d0,d1		;set up for round call
1146	move.l	#$20000000,d0	;set sticky for round
1147	bclr.b	#sign_bit,FPTEMP_EX(a6)
1148	sne	FPTEMP_SGN(a6)
1149	bsr.l	round		;round result to users rmode & prec
1150	bfclr	FPTEMP_SGN(a6){0:8}	;convert back to IEEE ext format
1151	beq.b	sub_s_sclr
1152	bset.b	#sign_bit,FPTEMP_EX(a6)
1153sub_s_sclr:
1154	lea.l	WBTEMP(a6),a0
1155	move.l	FPTEMP(a6),(a0)	;write result to wbtemp
1156	move.l	FPTEMP_HI(a6),4(a0)
1157	move.l	FPTEMP_LO(a6),8(a0)
1158	tst.w	FPTEMP_EX(a6)
1159	bgt	sub_ckovf
1160	or.l	#neg_mask,USER_FPSR(a6)
1161sub_ckovf:
1162	move.w	WBTEMP_EX(a6),d0
1163	andi.w	#$7fff,d0
1164	cmpi.w	#$7fff,d0
1165	bne	frcfpnr
1166*
1167* The result has overflowed to $7fff exponent.  Set I, ovfl,
1168* and aovfl, and clr the mantissa (incorrectly set by the
1169* round routine.)
1170*
1171	or.l	#inf_mask+ovfl_inx_mask,USER_FPSR(a6)	
1172	clr.l	4(a0)
1173	bra	frcfpnr
1174*
1175* Inst is fcmp.
1176*
1177wrap_cmp:
1178	cmp.b	#$ff,DNRM_FLG(a6) ;if both ops denorm, 
1179	beq	fix_stk		 ;restore to fpu
1180*
1181* One of the ops is denormalized.  Test for wrap condition
1182* and complete the instruction.
1183*
1184	cmp.b	#$0f,DNRM_FLG(a6) ;check for dest denorm
1185	bne.b	cmp_srcd
1186cmp_destd:
1187	bsr.l	ckinf_ns
1188	bne	fix_stk
1189	bfextu	ETEMP_EX(a6){1:15},d0	;get src exp (always pos)
1190	bfexts	FPTEMP_EX(a6){1:15},d1	;get dest exp (always neg)
1191	sub.l	d1,d0			;subtract dest from src
1192	cmp.l	#$8000,d0
1193	blt	fix_stk			;if less, not wrap case
1194	tst.w	ETEMP_EX(a6)		;set N to ~sign_of(src)
1195	bge	cmp_setn
1196	rts
1197cmp_srcd:
1198	bsr.l	ckinf_nd
1199	bne	fix_stk
1200	bfextu	FPTEMP_EX(a6){1:15},d0	;get dest exp (always pos)
1201	bfexts	ETEMP_EX(a6){1:15},d1	;get src exp (always neg)
1202	sub.l	d1,d0			;subtract src from dest
1203	cmp.l	#$8000,d0
1204	blt	fix_stk			;if less, not wrap case
1205	tst.w	FPTEMP_EX(a6)		;set N to sign_of(dest)
1206	blt	cmp_setn
1207	rts
1208cmp_setn:
1209	or.l	#neg_mask,USER_FPSR(a6)
1210	rts
1211
1212*
1213* Inst is fmul.
1214*
1215wrap_mul:
1216	cmp.b	#$ff,DNRM_FLG(a6) ;if both ops denorm, 
1217	beq	force_unf	;force an underflow (really!)
1218*
1219* One of the ops is denormalized.  Test for wrap condition
1220* and complete the instruction.
1221*
1222	cmp.b	#$0f,DNRM_FLG(a6) ;check for dest denorm
1223	bne.b	mul_srcd
1224mul_destd:
1225	bsr.l	ckinf_ns
1226	bne	fix_stk
1227	bfextu	ETEMP_EX(a6){1:15},d0	;get src exp (always pos)
1228	bfexts	FPTEMP_EX(a6){1:15},d1	;get dest exp (always neg)
1229	add.l	d1,d0			;subtract dest from src
1230	bgt	fix_stk
1231	bra	force_unf
1232mul_srcd:
1233	bsr.l	ckinf_nd
1234	bne	fix_stk
1235	bfextu	FPTEMP_EX(a6){1:15},d0	;get dest exp (always pos)
1236	bfexts	ETEMP_EX(a6){1:15},d1	;get src exp (always neg)
1237	add.l	d1,d0			;subtract src from dest
1238	bgt	fix_stk
1239	
1240*
1241* This code handles the case of the instruction resulting in 
1242* an underflow condition.
1243*
1244force_unf:
1245	bclr.b	#E1,E_BYTE(a6)
1246	or.l	#unfinx_mask,USER_FPSR(a6)
1247	clr.w	NMNEXC(a6)
1248	clr.b	WBTEMP_SGN(a6)
1249	move.w	ETEMP_EX(a6),d0		;find the sign of the result
1250	move.w	FPTEMP_EX(a6),d1
1251	eor.w	d1,d0
1252	andi.w	#$8000,d0
1253	beq.b	frcunfcont
1254	st.b	WBTEMP_SGN(a6)
1255frcunfcont:
1256	lea	WBTEMP(a6),a0		;point a0 to memory location
1257	move.w	CMDREG1B(a6),d0
1258	btst.l	#6,d0			;test for forced precision
1259	beq.b	frcunf_fpcr
1260	btst.l	#2,d0			;check for double
1261	bne.b	frcunf_dbl
1262	move.l	#$1,d0			;inst is forced single
1263	bra.b	frcunf_rnd
1264frcunf_dbl:
1265	move.l	#$2,d0			;inst is forced double
1266	bra.b	frcunf_rnd
1267frcunf_fpcr:
1268	bfextu	FPCR_MODE(a6){0:2},d0	;inst not forced - use fpcr prec
1269frcunf_rnd:
1270	bsr.l	unf_sub			;get correct result based on
1271*					;round precision/mode.  This 
1272*					;sets FPSR_CC correctly
1273	bfclr	WBTEMP_SGN(a6){0:8}	;convert back to IEEE ext format
1274	beq.b	frcfpn
1275	bset.b	#sign_bit,WBTEMP_EX(a6)
1276	bra	frcfpn
1277
1278*
1279* Write the result to the user's fpn.  All results must be HUGE to be
1280* written; otherwise the results would have overflowed or underflowed.
1281* If the rounding precision is single or double, the ovf_res routine
1282* is needed to correctly supply the max value.
1283*
1284frcfpnr:
1285	move.w	CMDREG1B(a6),d0
1286	btst.l	#6,d0			;test for forced precision
1287	beq.b	frcfpn_fpcr
1288	btst.l	#2,d0			;check for double
1289	bne.b	frcfpn_dbl
1290	move.l	#$1,d0			;inst is forced single
1291	bra.b	frcfpn_rnd
1292frcfpn_dbl:
1293	move.l	#$2,d0			;inst is forced double
1294	bra.b	frcfpn_rnd
1295frcfpn_fpcr:
1296	bfextu	FPCR_MODE(a6){0:2},d0	;inst not forced - use fpcr prec
1297	tst.b	d0
1298	beq.b	frcfpn			;if extended, write what you got
1299frcfpn_rnd:
1300	bclr.b	#sign_bit,WBTEMP_EX(a6)
1301	sne	WBTEMP_SGN(a6)
1302	bsr.l	ovf_res			;get correct result based on
1303*					;round precision/mode.  This 
1304*					;sets FPSR_CC correctly
1305	bfclr	WBTEMP_SGN(a6){0:8}	;convert back to IEEE ext format
1306	beq.b	frcfpn_clr
1307	bset.b	#sign_bit,WBTEMP_EX(a6)
1308frcfpn_clr:
1309	or.l	#ovfinx_mask,USER_FPSR(a6)
1310* 
1311* Perform the write.
1312*
1313frcfpn:
1314	bfextu	CMDREG1B(a6){6:3},d0	;extract fp destination register
1315	cmpi.b	#3,d0
1316	ble.b	frc0123			;check if dest is fp0-fp3
1317	move.l	#7,d1
1318	sub.l	d0,d1
1319	clr.l	d0
1320	bset.l	d1,d0
1321	fmovem.x WBTEMP(a6),d0
1322	rts
1323frc0123:
1324	cmpi.b	#0,d0
1325	beq.b	frc0_dst
1326	cmpi.b	#1,d0
1327	beq.b	frc1_dst 
1328	cmpi.b	#2,d0
1329	beq.b	frc2_dst 
1330frc3_dst:
1331	move.l	WBTEMP_EX(a6),USER_FP3(a6)
1332	move.l	WBTEMP_HI(a6),USER_FP3+4(a6)
1333	move.l	WBTEMP_LO(a6),USER_FP3+8(a6)
1334	rts
1335frc2_dst:
1336	move.l	WBTEMP_EX(a6),USER_FP2(a6)
1337	move.l	WBTEMP_HI(a6),USER_FP2+4(a6)
1338	move.l	WBTEMP_LO(a6),USER_FP2+8(a6)
1339	rts
1340frc1_dst:
1341	move.l	WBTEMP_EX(a6),USER_FP1(a6)
1342	move.l	WBTEMP_HI(a6),USER_FP1+4(a6)
1343	move.l	WBTEMP_LO(a6),USER_FP1+8(a6)
1344	rts
1345frc0_dst:
1346	move.l	WBTEMP_EX(a6),USER_FP0(a6)
1347	move.l	WBTEMP_HI(a6),USER_FP0+4(a6)
1348	move.l	WBTEMP_LO(a6),USER_FP0+8(a6)
1349	rts
1350
1351*
1352* Write etemp to fpn.
1353* A check is made on enabled and signalled snan exceptions,
1354* and the destination is not overwritten if this condition exists.
1355* This code is designed to make fmoveins of unsupported data types
1356* faster.
1357*
1358wr_etemp:
1359	btst.b	#snan_bit,FPSR_EXCEPT(a6)	;if snan is set, and
1360	beq.b	fmoveinc		;enabled, force restore
1361	btst.b	#snan_bit,FPCR_ENABLE(a6) ;and don't overwrite
1362	beq.b	fmoveinc		;the dest
1363	move.l	ETEMP_EX(a6),FPTEMP_EX(a6)	;set up fptemp sign for 
1364*						;snan handler
1365	tst.b	ETEMP(a6)		;check for negative
1366	blt.b	snan_neg
1367	rts
1368snan_neg:
1369	or.l	#neg_bit,USER_FPSR(a6)	;snan is negative; set N
1370	rts
1371fmoveinc:
1372	clr.w	NMNEXC(a6)
1373	bclr.b	#E1,E_BYTE(a6)
1374	move.b	STAG(a6),d0		;check if stag is inf
1375	andi.b	#$e0,d0
1376	cmpi.b	#$40,d0
1377	bne.b	fminc_cnan
1378	or.l	#inf_mask,USER_FPSR(a6) ;if inf, nothing yet has set I
1379	tst.w	LOCAL_EX(a0)		;check sign
1380	bge.b	fminc_con
1381	or.l	#neg_mask,USER_FPSR(a6)
1382	bra	fminc_con
1383fminc_cnan:
1384	cmpi.b	#$60,d0			;check if stag is NaN
1385	bne.b	fminc_czero
1386	or.l	#nan_mask,USER_FPSR(a6) ;if nan, nothing yet has set NaN
1387	move.l	ETEMP_EX(a6),FPTEMP_EX(a6)	;set up fptemp sign for 
1388*						;snan handler
1389	tst.w	LOCAL_EX(a0)		;check sign
1390	bge.b	fminc_con
1391	or.l	#neg_mask,USER_FPSR(a6)
1392	bra	fminc_con
1393fminc_czero:
1394	cmpi.b	#$20,d0			;check if zero
1395	bne.b	fminc_con
1396	or.l	#z_mask,USER_FPSR(a6)	;if zero, set Z
1397	tst.w	LOCAL_EX(a0)		;check sign
1398	bge.b	fminc_con
1399	or.l	#neg_mask,USER_FPSR(a6)
1400fminc_con:
1401	bfextu	CMDREG1B(a6){6:3},d0	;extract fp destination register
1402	cmpi.b	#3,d0
1403	ble.b	fp0123			;check if dest is fp0-fp3
1404	move.l	#7,d1
1405	sub.l	d0,d1
1406	clr.l	d0
1407	bset.l	d1,d0
1408	fmovem.x ETEMP(a6),d0
1409	rts
1410
1411fp0123:
1412	cmpi.b	#0,d0
1413	beq.b	fp0_dst
1414	cmpi.b	#1,d0
1415	beq.b	fp1_dst 
1416	cmpi.b	#2,d0
1417	beq.b	fp2_dst 
1418fp3_dst:
1419	move.l	ETEMP_EX(a6),USER_FP3(a6)
1420	move.l	ETEMP_HI(a6),USER_FP3+4(a6)
1421	move.l	ETEMP_LO(a6),USER_FP3+8(a6)
1422	rts
1423fp2_dst:
1424	move.l	ETEMP_EX(a6),USER_FP2(a6)
1425	move.l	ETEMP_HI(a6),USER_FP2+4(a6)
1426	move.l	ETEMP_LO(a6),USER_FP2+8(a6)
1427	rts
1428fp1_dst:
1429	move.l	ETEMP_EX(a6),USER_FP1(a6)
1430	move.l	ETEMP_HI(a6),USER_FP1+4(a6)
1431	move.l	ETEMP_LO(a6),USER_FP1+8(a6)
1432	rts
1433fp0_dst:
1434	move.l	ETEMP_EX(a6),USER_FP0(a6)
1435	move.l	ETEMP_HI(a6),USER_FP0+4(a6)
1436	move.l	ETEMP_LO(a6),USER_FP0+8(a6)
1437	rts
1438
1439opclass3:
1440	st.b	CU_ONLY(a6)
1441	move.w	CMDREG1B(a6),d0	;check if packed moveout
1442	andi.w	#$0c00,d0	;isolate last 2 bits of size field
1443	cmpi.w	#$0c00,d0	;if size is 011 or 111, it is packed
1444	beq.w	pack_out	;else it is norm or denorm
1445	bra.w	mv_out
1446
1447	
1448*
1449*	MOVE OUT
1450*
1451
1452mv_tbl:
1453	dc.l	li
1454	dc.l 	sgp
1455	dc.l 	xp
1456	dc.l 	mvout_end	;should never be taken
1457	dc.l 	wi
1458	dc.l 	dp
1459	dc.l 	bi
1460	dc.l 	mvout_end	;should never be taken
1461mv_out:
1462	bfextu	CMDREG1B(a6){3:3},d1	;put source specifier in d1
1463	lea.l	mv_tbl,a0
1464	move.l	(a0,d1*4),a0
1465	jmp	(a0)
1466
1467*
1468* This exit is for move-out to memory.  The aunfl bit is 
1469* set if the result is inex and unfl is signalled.
1470*
1471mvout_end:
1472	btst.b	#inex2_bit,FPSR_EXCEPT(a6)
1473	beq.b	no_aufl
1474	btst.b	#unfl_bit,FPSR_EXCEPT(a6)
1475	beq.b	no_aufl
1476	bset.b	#aunfl_bit,FPSR_AEXCEPT(a6)
1477no_aufl:
1478	clr.w	NMNEXC(a6)
1479	bclr.b	#E1,E_BYTE(a6)
1480	fmove.l	#0,FPSR			;clear any cc bits from res_func
1481*
1482* Return ETEMP to extended format from internal extended format so
1483* that gen_except will have a correctly signed value for ovfl/unfl
1484* handlers.
1485*
1486	bfclr	ETEMP_SGN(a6){0:8}
1487	beq.b	mvout_con
1488	bset.b	#sign_bit,ETEMP_EX(a6)
1489mvout_con:
1490	rts
1491*
1492* This exit is for move-out to int register.  The aunfl bit is 
1493* not set in any case for this move.
1494*
1495mvouti_end:
1496	clr.w	NMNEXC(a6)
1497	bclr.b	#E1,E_BYTE(a6)
1498	fmove.l	#0,FPSR			;clear any cc bits from res_func
1499*
1500* Return ETEMP to extended format from internal extended format so
1501* that gen_except will have a correctly signed value for ovfl/unfl
1502* handlers.
1503*
1504	bfclr	ETEMP_SGN(a6){0:8}
1505	beq.b	mvouti_con
1506	bset.b	#sign_bit,ETEMP_EX(a6)
1507mvouti_con:
1508	rts
1509*
1510* li is used to handle a long integer source specifier
1511*
1512
1513li:
1514	moveq.l	#4,d0		;set byte count
1515
1516	btst.b	#7,STAG(a6)	;check for extended denorm
1517	bne.w	int_dnrm	;if so, branch
1518
1519	fmovem.x ETEMP(a6),fp0
1520	fcmp.d	#:41dfffffffc00000,fp0
1521* 41dfffffffc00000 in dbl prec = 401d0000fffffffe00000000 in ext prec
1522	fbge.w	lo_plrg	
1523	fcmp.d	#:c1e0000000000000,fp0
1524* c1e0000000000000 in dbl prec = c01e00008000000000000000 in ext prec
1525	fble.w	lo_nlrg
1526*
1527* at this point, the answer is between the largest pos and neg values
1528*
1529	move.l	USER_FPCR(a6),d1	;use user's rounding mode
1530	andi.l	#$30,d1
1531	fmove.l	d1,fpcr
1532	fmove.l	fp0,L_SCR1(a6)	;let the 040 perform conversion
1533	fmove.l fpsr,d1
1534	or.l	d1,USER_FPSR(a6)	;capture inex2/ainex if set
1535	bra.w	int_wrt
1536
1537
1538lo_plrg:
1539	move.l	#$7fffffff,L_SCR1(a6)	;answer is largest positive int
1540	fbeq.w	int_wrt			;exact answer
1541	fcmp.d	#:41dfffffffe00000,fp0
1542* 41dfffffffe00000 in dbl prec = 401d0000ffffffff00000000 in ext prec
1543	fbge.w	int_operr		;set operr
1544	bra.w	int_inx			;set inexact
1545
1546lo_nlrg:
1547	move.l	#$80000000,L_SCR1(a6)
1548	fbeq.w	int_wrt			;exact answer
1549	fcmp.d	#:c1e0000000100000,fp0
1550* c1e0000000100000 in dbl prec = c01e00008000000080000000 in ext prec
1551	fblt.w	int_operr		;set operr
1552	bra.w	int_inx			;set inexact
1553
1554*
1555* wi is used to handle a word integer source specifier
1556*
1557
1558wi:
1559	moveq.l	#2,d0		;set byte count
1560
1561	btst.b	#7,STAG(a6)	;check for extended denorm
1562	bne.w	int_dnrm	;branch if so
1563
1564	fmovem.x ETEMP(a6),fp0
1565	fcmp.s	#:46fffe00,fp0
1566* 46fffe00 in sgl prec = 400d0000fffe000000000000 in ext prec
1567	fbge.w	wo_plrg	
1568	fcmp.s	#:c7000000,fp0
1569* c7000000 in sgl prec = c00e00008000000000000000 in ext prec
1570	fble.w	wo_nlrg
1571
1572*
1573* at this point, the answer is between the largest pos and neg values
1574*
1575	move.l	USER_FPCR(a6),d1	;use user's rounding mode
1576	andi.l	#$30,d1
1577	fmove.l	d1,fpcr
1578	fmove.w	fp0,L_SCR1(a6)	;let the 040 perform conversion
1579	fmove.l fpsr,d1
1580	or.l	d1,USER_FPSR(a6)	;capture inex2/ainex if set
1581	bra.w	int_wrt
1582
1583wo_plrg:
1584	move.w	#$7fff,L_SCR1(a6)	;answer is largest positive int
1585	fbeq.w	int_wrt			;exact answer
1586	fcmp.s	#:46ffff00,fp0
1587* 46ffff00 in sgl prec = 400d0000ffff000000000000 in ext prec
1588	fbge.w	int_operr		;set operr
1589	bra.w	int_inx			;set inexact
1590
1591wo_nlrg:
1592	move.w	#$8000,L_SCR1(a6)
1593	fbeq.w	int_wrt			;exact answer
1594	fcmp.s	#:c7000080,fp0
1595* c7000080 in sgl prec = c00e00008000800000000000 in ext prec
1596	fblt.w	int_operr		;set operr
1597	bra.w	int_inx			;set inexact
1598
1599*
1600* bi is used to handle a byte integer source specifier
1601*
1602
1603bi:
1604	moveq.l	#1,d0		;set byte count
1605
1606	btst.b	#7,STAG(a6)	;check for extended denorm
1607	bne.w	int_dnrm	;branch if so
1608
1609	fmovem.x ETEMP(a6),fp0
1610	fcmp.s	#:42fe0000,fp0
1611* 42fe0000 in sgl prec = 40050000fe00000000000000 in ext prec
1612	fbge.w	by_plrg	
1613	fcmp.s	#:c3000000,fp0
1614* c3000000 in sgl prec = c00600008000000000000000 in ext prec
1615	fble.w	by_nlrg
1616
1617*
1618* at this point, the answer is between the largest pos and neg values
1619*
1620	move.l	USER_FPCR(a6),d1	;use user's rounding mode
1621	andi.l	#$30,d1
1622	fmove.l	d1,fpcr
1623	fmove.b	fp0,L_SCR1(a6)	;let the 040 perform conversion
1624	fmove.l fpsr,d1
1625	or.l	d1,USER_FPSR(a6)	;capture inex2/ainex if set
1626	bra.w	int_wrt
1627
1628by_plrg:
1629	move.b	#$7f,L_SCR1(a6)		;answer is largest positive int
1630	fbeq.w	int_wrt			;exact answer
1631	fcmp.s	#:42ff0000,fp0
1632* 42ff0000 in sgl prec = 40050000ff00000000000000 in ext prec
1633	fbge.w	int_operr		;set operr
1634	bra.w	int_inx			;set inexact
1635
1636by_nlrg:
1637	move.b	#$80,L_SCR1(a6)
1638	fbeq.w	int_wrt			;exact answer
1639	fcmp.s	#:c3008000,fp0
1640* c3008000 in sgl prec = c00600008080000000000000 in ext prec
1641	fblt.w	int_operr		;set operr
1642	bra.w	int_inx			;set inexact
1643
1644*
1645* Common integer routines
1646*
1647* int_drnrm---account for possible nonzero result for round up with positive
1648* operand and round down for negative answer.  In the first case (result = 1)
1649* byte-width (store in d0) of result must be honored.  In the second case,
1650* -1 in L_SCR1(a6) will cover all contingencies (FMOVE.B/W/L out).
1651
1652int_dnrm:
1653	move.l	#0,L_SCR1(a6)	; initialize result to 0
1654	bfextu	FPCR_MODE(a6){2:2},d1	; d1 is the rounding mode
1655	cmp.b	#2,d1		
1656	bmi.b	int_inx		; if RN or RZ, done
1657	bne.b	int_rp		; if RP, continue below
1658	tst.w	ETEMP(a6)	; RM: store -1 in L_SCR1 if src is negative
1659	bpl.b	int_inx		; otherwise result is 0
1660	move.l	#-1,L_SCR1(a6)
1661	bra.b	int_inx
1662int_rp:
1663	tst.w	ETEMP(a6)	; RP: store +1 of proper width in L_SCR1 if
1664*				; source is greater than 0
1665	bmi.b	int_inx		; otherwise, result is 0
1666	lea	L_SCR1(a6),a1	; a1 is address of L_SCR1
1667	adda.l	d0,a1		; offset by destination width -1
1668	suba.l	#1,a1		
1669	bset.b	#0,(a1)		; set low bit at a1 address
1670int_inx:
1671	ori.l	#inx2a_mask,USER_FPSR(a6)
1672	bra.b	int_wrt
1673int_operr:
1674	fmovem.x fp0,FPTEMP(a6)	;FPTEMP must contain the extended
1675*				;precision source that needs to be
1676*				;converted to integer this is required
1677*				;if the operr exception is enabled.
1678*				;set operr/aiop (no inex2 on int ovfl)
1679
1680	ori.l	#opaop_mask,USER_FPSR(a6)
1681*				;fall through to perform int_wrt
1682int_wrt: 
1683	move.l	EXC_EA(a6),a1	;load destination address
1684	tst.l	a1		;check to see if it is a dest register
1685	beq.b	wrt_dn		;write data register 
1686	lea	L_SCR1(a6),a0	;point to supervisor source address
1687	bsr.l	mem_write
1688	bra.w	mvouti_end
1689
1690wrt_dn:
1691	move.l	d0,-(sp)	;d0 currently contains the size to write
1692	bsr.l	get_fline	;get_fline returns Dn in d0
1693	andi.w	#$7,d0		;isolate register
1694	move.l	(sp)+,d1	;get size
1695	cmpi.l	#4,d1		;most frequent case
1696	beq.b	sz_long
1697	cmpi.l	#2,d1
1698	bne.b	sz_con
1699	or.l	#8,d0		;add 'word' size to register#
1700	bra.b	sz_con
1701sz_long:
1702	or.l	#$10,d0		;add 'long' size to register#
1703sz_con:
1704	move.l	d0,d1		;reg_dest expects size:reg in d1
1705	bsr.l	reg_dest	;load proper data register
1706	bra.w	mvouti_end 
1707xp:
1708	lea	ETEMP(a6),a0
1709	bclr.b	#sign_bit,LOCAL_EX(a0)
1710	sne	LOCAL_SGN(a0)
1711	btst.b	#7,STAG(a6)	;check for extended denorm
1712	bne.w	xdnrm
1713	clr.l	d0
1714	bra.b	do_fp		;do normal case
1715sgp:
1716	lea	ETEMP(a6),a0
1717	bclr.b	#sign_bit,LOCAL_EX(a0)
1718	sne	LOCAL_SGN(a0)
1719	btst.b	#7,STAG(a6)	;check for extended denorm
1720	bne.w	sp_catas	;branch if so
1721	move.w	LOCAL_EX(a0),d0
1722	lea	sp_bnds,a1
1723	cmp.w	(a1),d0
1724	blt.w	sp_under
1725	cmp.w	2(a1),d0
1726	bgt.w	sp_over
1727	move.l	#1,d0		;set destination format to single
1728	bra.b	do_fp		;do normal case
1729dp:
1730	lea	ETEMP(a6),a0
1731	bclr.b	#sign_bit,LOCAL_EX(a0)
1732	sne	LOCAL_SGN(a0)
1733
1734	btst.b	#7,STAG(a6)	;check for extended denorm
1735	bne.w	dp_catas	;branch if so
1736
1737	move.w	LOCAL_EX(a0),d0
1738	lea	dp_bnds,a1
1739
1740	cmp.w	(a1),d0
1741	blt.w	dp_under
1742	cmp.w	2(a1),d0
1743	bgt.w	dp_over
1744	
1745	move.l	#2,d0		;set destination format to double
1746*				;fall through to do_fp
1747*
1748do_fp:
1749	bfextu	FPCR_MODE(a6){2:2},d1	;rnd mode in d1
1750	swap	d0			;rnd prec in upper word
1751	add.l	d0,d1			;d1 has PREC/MODE info
1752	
1753	clr.l	d0			;clear g,r,s 
1754
1755	bsr.l	round			;round 
1756
1757	move.l	a0,a1
1758	move.l	EXC_EA(a6),a0
1759
1760	bfextu	CMDREG1B(a6){3:3},d1	;extract destination format
1761*					;at this point only the dest
1762*					;formats sgl, dbl, ext are
1763*					;possible
1764	cmp.b	#2,d1
1765	bgt.b	ddbl			;double=5, extended=2, single=1
1766	bne.b	dsgl
1767*					;fall through to dext
1768dext:
1769	bsr.l	dest_ext
1770	bra.w	mvout_end
1771dsgl:
1772	bsr.l	dest_sgl
1773	bra.w	mvout_end
1774ddbl:
1775	bsr.l	dest_dbl
1776	bra.w	mvout_end
1777
1778*
1779* Handle possible denorm or catastrophic underflow cases here
1780*
1781xdnrm:
1782	bsr.w	set_xop		;initialize WBTEMP
1783	bset.b	#wbtemp15_bit,WB_BYTE(a6) ;set wbtemp15
1784
1785	move.l	a0,a1
1786	move.l	EXC_EA(a6),a0	;a0 has the destination pointer
1787	bsr.l	dest_ext	;store to memory
1788	bset.b	#unfl_bit,FPSR_EXCEPT(a6)
1789	bra.w	mvout_end
1790	
1791sp_under:
1792	bset.b	#etemp15_bit,STAG(a6)
1793
1794	cmp.w	4(a1),d0
1795	blt.b	sp_catas	;catastrophic underflow case	
1796
1797	move.l	#1,d0		;load in round precision
1798	move.l	#sgl_thresh,d1	;load in single denorm threshold
1799	bsr.l	dpspdnrm	;expects d1 to have the proper
1800*				;denorm threshold
1801	bsr.l	dest_sgl	;stores value to destination
1802	bset.b	#unfl_bit,FPSR_EXCEPT(a6)
1803	bra.w	mvout_end	;exit
1804
1805dp_under:
1806	bset.b	#etemp15_bit,STAG(a6)
1807
1808	cmp.w	4(a1),d0
1809	blt.b	dp_catas	;catastrophic underflow case
1810		
1811	move.l	#dbl_thresh,d1	;load in double precision threshold
1812	move.l	#2,d0		
1813	bsr.l	dpspdnrm	;expects d1 to have proper
1814*				;denorm threshold
1815*				;expects d0 to have round precision
1816	bsr.l	dest_dbl	;store value to destination
1817	bset.b	#unfl_bit,FPSR_EXCEPT(a6)
1818	bra.w	mvout_end	;exit
1819
1820*
1821* Handle catastrophic underflow cases here
1822*
1823sp_catas:
1824* Temp fix for z bit set in unf_sub
1825	move.l	USER_FPSR(a6),-(a7)
1826
1827	move.l	#1,d0		;set round precision to sgl
1828
1829	bsr.l	unf_sub		;a0 points to result
1830
1831	move.l	(a7)+,USER_FPSR(a6)
1832
1833	move.l	#1,d0
1834	sub.w	d0,LOCAL_EX(a0) ;account for difference between
1835*				;denorm/norm bias
1836
1837	move.l	a0,a1		;a1 has the operand input
1838	move.l	EXC_EA(a6),a0	;a0 has the destination pointer
1839	
1840	bsr.l	dest_sgl	;store the result
1841	ori.l	#unfinx_mask,USER_FPSR(a6)
1842	bra.w	mvout_end
1843	
1844dp_catas:
1845* Temp fix for z bit set in unf_sub
1846	move.l	USER_FPSR(a6),-(a7)
1847
1848	move.l	#2,d0		;set round precision to dbl
1849	bsr.l	unf_sub		;a0 points to result
1850
1851	move.l	(a7)+,USER_FPSR(a6)
1852
1853	move.l	#1,d0
1854	sub.w	d0,LOCAL_EX(a0) ;account for difference between 
1855*				;denorm/norm bias
1856
1857	move.l	a0,a1		;a1 has the operand input
1858	move.l	EXC_EA(a6),a0	;a0 has the destination pointer
1859	
1860	bsr.l	dest_dbl	;store the result
1861	ori.l	#unfinx_mask,USER_FPSR(a6)
1862	bra.w	mvout_end
1863
1864*
1865* Handle catastrophic overflow cases here
1866*
1867sp_over:
1868* Temp fix for z bit set in unf_sub
1869	move.l	USER_FPSR(a6),-(a7)
1870
1871	move.l	#1,d0
1872	lea.l	FP_SCR1(a6),a0	;use FP_SCR1 for creating result
1873	move.l	ETEMP_EX(a6),(a0)
1874	move.l	ETEMP_HI(a6),4(a0)
1875	move.l	ETEMP_LO(a6),8(a0)
1876	bsr.l	ovf_res
1877
1878	move.l	(a7)+,USER_FPSR(a6)
1879
1880	move.l	a0,a1
1881	move.l	EXC_EA(a6),a0
1882	bsr.l	dest_sgl
1883	or.l	#ovfinx_mask,USER_FPSR(a6)
1884	bra.w	mvout_end
1885
1886dp_over:
1887* Temp fix for z bit set in ovf_res
1888	move.l	USER_FPSR(a6),-(a7)
1889
1890	move.l	#2,d0
1891	lea.l	FP_SCR1(a6),a0	;use FP_SCR1 for creating result
1892	move.l	ETEMP_EX(a6),(a0)
1893	move.l	ETEMP_HI(a6),4(a0)
1894	move.l	ETEMP_LO(a6),8(a0)
1895	bsr.l	ovf_res
1896
1897	move.l	(a7)+,USER_FPSR(a6)
1898
1899	move.l	a0,a1
1900	move.l	EXC_EA(a6),a0
1901	bsr.l	dest_dbl
1902	or.l	#ovfinx_mask,USER_FPSR(a6)
1903	bra.w	mvout_end
1904
1905*
1906* 	DPSPDNRM
1907*
1908* This subroutine takes an extended normalized number and denormalizes
1909* it to the given round precision. This subroutine also decrements
1910* the input operand's exponent by 1 to account for the fact that
1911* dest_sgl or dest_dbl expects a normalized number's bias.
1912*
1913* Input: a0  points to a normalized number in internal extended format
1914*	 d0  is the round precision (=1 for sgl; =2 for dbl)
1915*	 d1  is the the single precision or double precision
1916*	     denorm threshold
1917*
1918* Output: (In the format for dest_sgl or dest_dbl)
1919*	 a0   points to the destination
1920*   	 a1   points to the operand
1921*
1922* Exceptions: Reports inexact 2 exception by setting USER_FPSR bits
1923*
1924dpspdnrm:
1925	move.l	d0,-(a7)	;save round precision
1926	clr.l	d0		;clear initial g,r,s
1927	bsr.l	dnrm_lp		;careful with d0, it's needed by round
1928
1929	bfextu	FPCR_MODE(a6){2:2},d1 ;get rounding mode
1930	swap	d1
1931	move.w	2(a7),d1	;set rounding precision 
1932	swap	d1		;at this point d1 has PREC/MODE info
1933	bsr.l	round		;round result, sets the inex bit in
1934*				;USER_FPSR if needed
1935
1936	move.w	#1,d0
1937	sub.w	d0,LOCAL_EX(a0) ;account for difference in denorm
1938*				;vs norm bias
1939
1940	move.l	a0,a1		;a1 has the operand input
1941	move.l	EXC_EA(a6),a0	;a0 has the destination pointer
1942	add.w	#4,a7		;pop stack
1943	rts
1944*
1945* SET_XOP initialized WBTEMP with the value pointed to by a0
1946* input: a0 points to input operand in the internal extended format
1947*
1948set_xop:
1949	move.l	LOCAL_EX(a0),WBTEMP_EX(a6)
1950	move.l	LOCAL_HI(a0),WBTEMP_HI(a6)
1951	move.l	LOCAL_LO(a0),WBTEMP_LO(a6)
1952	bfclr	WBTEMP_SGN(a6){0:8}
1953	beq.b	sxop
1954	bset.b	#sign_bit,WBTEMP_EX(a6)
1955sxop:
1956	bfclr	STAG(a6){5:4}	;clear wbtm66,wbtm1,wbtm0,sbit
1957	rts
1958*
1959*	P_MOVE
1960*
1961p_movet:
1962	dc.l	p_move
1963	dc.l	p_movez
1964	dc.l	p_movei
1965	dc.l	p_moven
1966	dc.l	p_move
1967p_regd:
1968	dc.l	p_dyd0
1969	dc.l	p_dyd1
1970	dc.l	p_dyd2
1971	dc.l	p_dyd3
1972	dc.l	p_dyd4
1973	dc.l	p_dyd5
1974	dc.l	p_dyd6
1975	dc.l	p_dyd7
1976
1977pack_out:
1978 	lea.l	p_movet,a0	;load jmp table address
1979	move.w	STAG(a6),d0	;get source tag
1980	bfextu	d0{16:3},d0	;isolate source bits
1981	move.l	(a0,d0.w*4),a0	;load a0 with routine label for tag
1982	jmp	(a0)		;go to the routine
1983
1984p_write:
1985	move.l	#$0c,d0 	;get byte count
1986	move.l	EXC_EA(a6),a1	;get the destination address
1987	bsr 	mem_write	;write the user's destination
1988	move.b	#0,CU_SAVEPC(a6) ;set the cu save pc to all 0's
1989
1990*
1991* Also note that the dtag must be set to norm here - this is because 
1992* the 040 uses the dtag to execute the correct microcode.
1993*
1994        bfclr    DTAG(a6){0:3}  ;set dtag to norm
1995
1996	rts
1997
1998* Notes on handling of special case (zero, inf, and nan) inputs:
1999*	1. Operr is not signalled if the k-factor is greater than 18.
2000*	2. Per the manual, status bits are not set.
2001*
2002
2003p_move:
2004	move.w	CMDREG1B(a6),d0
2005	btst.l	#kfact_bit,d0	;test for dynamic k-factor
2006	beq.b	statick		;if clear, k-factor is static
2007dynamick:
2008	bfextu	d0{25:3},d0	;isolate register for dynamic k-factor
2009	lea	p_regd,a0
2010	move.l	(a0,d0*4),a0
2011	jmp	(a0)
2012statick:
2013	andi.w	#$007f,d0	;get k-factor
2014	bfexts	d0{25:7},d0	;sign extend d0 for bindec
2015	lea.l	ETEMP(a6),a0	;a0 will point to the packed decimal
2016	bsr.l	bindec		;perform the convert; data at a6
2017	lea.l	FP_SCR1(a6),a0	;load a0 with result address
2018	bra.l	p_write
2019p_movez:
2020	lea.l	ETEMP(a6),a0	;a0 will point to the packed decimal
2021	clr.w	2(a0)		;clear lower word of exp
2022	clr.l	4(a0)		;load second lword of ZERO
2023	clr.l	8(a0)		;load third lword of ZERO
2024	bra.w	p_write		;go write results
2025p_movei:
2026	fmove.l	#0,FPSR		;clear aiop
2027	lea.l	ETEMP(a6),a0	;a0 will point to the packed decimal
2028	clr.w	2(a0)		;clear lower word of exp
2029	bra.w	p_write		;go write the result
2030p_moven:
2031	lea.l	ETEMP(a6),a0	;a0 will point to the packed decimal
2032	clr.w	2(a0)		;clear lower word of exp
2033	bra.w	p_write		;go write the result
2034
2035*
2036* Routines to read the dynamic k-factor from Dn.
2037*
2038p_dyd0:
2039	move.l	USER_D0(a6),d0
2040	bra.b	statick
2041p_dyd1:
2042	move.l	USER_D1(a6),d0
2043	bra.b	statick
2044p_dyd2:
2045	move.l	d2,d0
2046	bra.b	statick
2047p_dyd3:
2048	move.l	d3,d0
2049	bra.b	statick
2050p_dyd4:
2051	move.l	d4,d0
2052	bra.b	statick
2053p_dyd5:
2054	move.l	d5,d0
2055	bra.b	statick
2056p_dyd6:
2057	move.l	d6,d0
2058	bra.w	statick
2059p_dyd7:
2060	move.l	d7,d0
2061	bra.w	statick
2062
2063	end
2064