fpu_implode.c revision 1.1 1 /*
2 * Copyright (c) 1992, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This software was developed by the Computer Systems Engineering group
6 * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
7 * contributed to Berkeley.
8 *
9 * All advertising materials mentioning features or use of this software
10 * must display the following acknowledgement:
11 * This product includes software developed by the University of
12 * California, Lawrence Berkeley Laboratory.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 * 1. Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * 2. Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in the
21 * documentation and/or other materials provided with the distribution.
22 * 3. All advertising materials mentioning features or use of this software
23 * must display the following acknowledgement:
24 * This product includes software developed by the University of
25 * California, Berkeley and its contributors.
26 * 4. Neither the name of the University nor the names of its contributors
27 * may be used to endorse or promote products derived from this software
28 * without specific prior written permission.
29 *
30 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
31 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
32 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
34 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
35 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
36 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
37 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
38 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
39 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
40 * SUCH DAMAGE.
41 *
42 * @(#)fpu_implode.c 8.1 (Berkeley) 6/11/93
43 *
44 * from: Header: fpu_implode.c,v 1.5 92/11/26 01:39:49 torek Exp
45 * $Id: fpu_implode.c,v 1.1 1993/10/02 10:22:58 deraadt Exp $
46 */
47
48 /*
49 * FPU subroutines: `implode' internal format numbers into the machine's
50 * `packed binary' format.
51 */
52
53 #include <sys/types.h>
54
55 #include <machine/ieee.h>
56 #include <machine/instr.h>
57 #include <machine/reg.h>
58
59 #include <sparc/fpu/fpu_arith.h>
60 #include <sparc/fpu/fpu_emu.h>
61
62 /*
63 * Round a number (algorithm from Motorola MC68882 manual, modified for
64 * our internal format). Set inexact exception if rounding is required.
65 * Return true iff we rounded up.
66 *
67 * After rounding, we discard the guard and round bits by shifting right
68 * 2 bits (a la fpu_shr(), but we do not bother with fp->fp_sticky).
69 * This saves effort later.
70 *
71 * Note that we may leave the value 2.0 in fp->fp_mant; it is the caller's
72 * responsibility to fix this if necessary.
73 */
74 static int
75 round(register struct fpemu *fe, register struct fpn *fp)
76 {
77 register u_int m0, m1, m2, m3;
78 register int gr, s, ret;
79
80 m0 = fp->fp_mant[0];
81 m1 = fp->fp_mant[1];
82 m2 = fp->fp_mant[2];
83 m3 = fp->fp_mant[3];
84 gr = m3 & 3;
85 s = fp->fp_sticky;
86
87 /* mant >>= FP_NG */
88 m3 = (m3 >> FP_NG) | (m2 << (32 - FP_NG));
89 m2 = (m2 >> FP_NG) | (m1 << (32 - FP_NG));
90 m1 = (m1 >> FP_NG) | (m0 << (32 - FP_NG));
91 m0 >>= FP_NG;
92
93 if ((gr | s) == 0) /* result is exact: no rounding needed */
94 goto rounddown;
95
96 fe->fe_cx |= FSR_NX; /* inexact */
97
98 /* Go to rounddown to round down; break to round up. */
99 switch ((fe->fe_fsr >> FSR_RD_SHIFT) & FSR_RD_MASK) {
100
101 case FSR_RD_RN:
102 default:
103 /*
104 * Round only if guard is set (gr & 2). If guard is set,
105 * but round & sticky both clear, then we want to round
106 * but have a tie, so round to even, i.e., add 1 iff odd.
107 */
108 if ((gr & 2) == 0)
109 goto rounddown;
110 if ((gr & 1) || fp->fp_sticky || (m3 & 1))
111 break;
112 goto rounddown;
113
114 case FSR_RD_RZ:
115 /* Round towards zero, i.e., down. */
116 goto rounddown;
117
118 case FSR_RD_RM:
119 /* Round towards -Inf: up if negative, down if positive. */
120 if (fp->fp_sign)
121 break;
122 goto rounddown;
123
124 case FSR_RD_RP:
125 /* Round towards +Inf: up if positive, down otherwise. */
126 if (!fp->fp_sign)
127 break;
128 goto rounddown;
129 }
130
131 /* Bump low bit of mantissa, with carry. */
132 #ifdef sparc /* ``cheating'' (left out FPU_DECL_CARRY; know this is faster) */
133 FPU_ADDS(m3, m3, 1);
134 FPU_ADDCS(m2, m2, 0);
135 FPU_ADDCS(m1, m1, 0);
136 FPU_ADDC(m0, m0, 0);
137 #else
138 if (++m3 == 0 && ++m2 == 0 && ++m1 == 0)
139 m0++;
140 #endif
141 fp->fp_mant[0] = m0;
142 fp->fp_mant[1] = m1;
143 fp->fp_mant[2] = m2;
144 fp->fp_mant[3] = m3;
145 return (1);
146
147 rounddown:
148 fp->fp_mant[0] = m0;
149 fp->fp_mant[1] = m1;
150 fp->fp_mant[2] = m2;
151 fp->fp_mant[3] = m3;
152 return (0);
153 }
154
155 /*
156 * For overflow: return true if overflow is to go to +/-Inf, according
157 * to the sign of the overflowing result. If false, overflow is to go
158 * to the largest magnitude value instead.
159 */
160 static int
161 toinf(struct fpemu *fe, int sign)
162 {
163 int inf;
164
165 /* look at rounding direction */
166 switch ((fe->fe_fsr >> FSR_RD_SHIFT) & FSR_RD_MASK) {
167
168 default:
169 case FSR_RD_RN: /* the nearest value is always Inf */
170 inf = 1;
171 break;
172
173 case FSR_RD_RZ: /* toward 0 => never towards Inf */
174 inf = 0;
175 break;
176
177 case FSR_RD_RP: /* toward +Inf iff positive */
178 inf = sign == 0;
179 break;
180
181 case FSR_RD_RM: /* toward -Inf iff negative */
182 inf = sign;
183 break;
184 }
185 return (inf);
186 }
187
188 /*
189 * fpn -> int (int value returned as return value).
190 *
191 * N.B.: this conversion always rounds towards zero (this is a peculiarity
192 * of the SPARC instruction set).
193 */
194 u_int
195 fpu_ftoi(fe, fp)
196 struct fpemu *fe;
197 register struct fpn *fp;
198 {
199 register u_int i;
200 register int sign, exp;
201
202 sign = fp->fp_sign;
203 switch (fp->fp_class) {
204
205 case FPC_ZERO:
206 return (0);
207
208 case FPC_NUM:
209 /*
210 * If exp >= 2^32, overflow. Otherwise shift value right
211 * into last mantissa word (this will not exceed 0xffffffff),
212 * shifting any guard and round bits out into the sticky
213 * bit. Then ``round'' towards zero, i.e., just set an
214 * inexact exception if sticky is set (see round()).
215 * If the result is > 0x80000000, or is positive and equals
216 * 0x80000000, overflow; otherwise the last fraction word
217 * is the result.
218 */
219 if ((exp = fp->fp_exp) >= 32)
220 break;
221 /* NB: the following includes exp < 0 cases */
222 if (fpu_shr(fp, FP_NMANT - 1 - exp) != 0)
223 fe->fe_cx |= FSR_NX;
224 i = fp->fp_mant[3];
225 if (i >= ((u_int)0x80000000 + sign))
226 break;
227 return (sign ? -i : i);
228
229 default: /* Inf, qNaN, sNaN */
230 break;
231 }
232 /* overflow: replace any inexact exception with invalid */
233 fe->fe_cx = (fe->fe_cx & ~FSR_NX) | FSR_NV;
234 return (0x7fffffff + sign);
235 }
236
237 /*
238 * fpn -> single (32 bit single returned as return value).
239 * We assume <= 29 bits in a single-precision fraction (1.f part).
240 */
241 u_int
242 fpu_ftos(fe, fp)
243 struct fpemu *fe;
244 register struct fpn *fp;
245 {
246 register u_int sign = fp->fp_sign << 31;
247 register int exp;
248
249 #define SNG_EXP(e) ((e) << SNG_FRACBITS) /* makes e an exponent */
250 #define SNG_MASK (SNG_EXP(1) - 1) /* mask for fraction */
251
252 /* Take care of non-numbers first. */
253 if (ISNAN(fp)) {
254 /*
255 * Preserve upper bits of NaN, per SPARC V8 appendix N.
256 * Note that fp->fp_mant[0] has the quiet bit set,
257 * even if it is classified as a signalling NaN.
258 */
259 (void) fpu_shr(fp, FP_NMANT - 1 - SNG_FRACBITS);
260 exp = SNG_EXP_INFNAN;
261 goto done;
262 }
263 if (ISINF(fp))
264 return (sign | SNG_EXP(SNG_EXP_INFNAN));
265 if (ISZERO(fp))
266 return (sign);
267
268 /*
269 * Normals (including subnormals). Drop all the fraction bits
270 * (including the explicit ``implied'' 1 bit) down into the
271 * single-precision range. If the number is subnormal, move
272 * the ``implied'' 1 into the explicit range as well, and shift
273 * right to introduce leading zeroes. Rounding then acts
274 * differently for normals and subnormals: the largest subnormal
275 * may round to the smallest normal (1.0 x 2^minexp), or may
276 * remain subnormal. In the latter case, signal an underflow
277 * if the result was inexact or if underflow traps are enabled.
278 *
279 * Rounding a normal, on the other hand, always produces another
280 * normal (although either way the result might be too big for
281 * single precision, and cause an overflow). If rounding a
282 * normal produces 2.0 in the fraction, we need not adjust that
283 * fraction at all, since both 1.0 and 2.0 are zero under the
284 * fraction mask.
285 *
286 * Note that the guard and round bits vanish from the number after
287 * rounding.
288 */
289 if ((exp = fp->fp_exp + SNG_EXP_BIAS) <= 0) { /* subnormal */
290 /* -NG for g,r; -SNG_FRACBITS-exp for fraction */
291 (void) fpu_shr(fp, FP_NMANT - FP_NG - SNG_FRACBITS - exp);
292 if (round(fe, fp) && fp->fp_mant[3] == SNG_EXP(1))
293 return (sign | SNG_EXP(1) | 0);
294 if ((fe->fe_cx & FSR_NX) ||
295 (fe->fe_fsr & (FSR_UF << FSR_TEM_SHIFT)))
296 fe->fe_cx |= FSR_UF;
297 return (sign | SNG_EXP(0) | fp->fp_mant[3]);
298 }
299 /* -FP_NG for g,r; -1 for implied 1; -SNG_FRACBITS for fraction */
300 (void) fpu_shr(fp, FP_NMANT - FP_NG - 1 - SNG_FRACBITS);
301 #ifdef DIAGNOSTIC
302 if ((fp->fp_mant[3] & SNG_EXP(1 << FP_NG)) == 0)
303 panic("fpu_ftos");
304 #endif
305 if (round(fe, fp) && fp->fp_mant[3] == SNG_EXP(2))
306 exp++;
307 if (exp >= SNG_EXP_INFNAN) {
308 /* overflow to inf or to max single */
309 fe->fe_cx |= FSR_OF | FSR_NX;
310 if (toinf(fe, sign))
311 return (sign | SNG_EXP(SNG_EXP_INFNAN));
312 return (sign | SNG_EXP(SNG_EXP_INFNAN - 1) | SNG_MASK);
313 }
314 done:
315 /* phew, made it */
316 return (sign | SNG_EXP(exp) | (fp->fp_mant[3] & SNG_MASK));
317 }
318
319 /*
320 * fpn -> double (32 bit high-order result returned; 32-bit low order result
321 * left in res[1]). Assumes <= 61 bits in double precision fraction.
322 *
323 * This code mimics fpu_ftos; see it for comments.
324 */
325 u_int
326 fpu_ftod(fe, fp, res)
327 struct fpemu *fe;
328 register struct fpn *fp;
329 u_int *res;
330 {
331 register u_int sign = fp->fp_sign << 31;
332 register int exp;
333
334 #define DBL_EXP(e) ((e) << (DBL_FRACBITS & 31))
335 #define DBL_MASK (DBL_EXP(1) - 1)
336
337 if (ISNAN(fp)) {
338 (void) fpu_shr(fp, FP_NMANT - 1 - DBL_FRACBITS);
339 exp = DBL_EXP_INFNAN;
340 goto done;
341 }
342 if (ISINF(fp)) {
343 sign |= DBL_EXP(DBL_EXP_INFNAN);
344 goto zero;
345 }
346 if (ISZERO(fp)) {
347 zero: res[1] = 0;
348 return (sign);
349 }
350
351 if ((exp = fp->fp_exp + DBL_EXP_BIAS) <= 0) {
352 (void) fpu_shr(fp, FP_NMANT - FP_NG - DBL_FRACBITS - exp);
353 if (round(fe, fp) && fp->fp_mant[2] == DBL_EXP(1)) {
354 res[1] = 0;
355 return (sign | DBL_EXP(1) | 0);
356 }
357 if ((fe->fe_cx & FSR_NX) ||
358 (fe->fe_fsr & (FSR_UF << FSR_TEM_SHIFT)))
359 fe->fe_cx |= FSR_UF;
360 exp = 0;
361 goto done;
362 }
363 (void) fpu_shr(fp, FP_NMANT - FP_NG - 1 - DBL_FRACBITS);
364 if (round(fe, fp) && fp->fp_mant[2] == DBL_EXP(2))
365 exp++;
366 if (exp >= DBL_EXP_INFNAN) {
367 fe->fe_cx |= FSR_OF | FSR_NX;
368 if (toinf(fe, sign)) {
369 res[1] = 0;
370 return (sign | DBL_EXP(DBL_EXP_INFNAN) | 0);
371 }
372 res[1] = ~0;
373 return (sign | DBL_EXP(DBL_EXP_INFNAN) | DBL_MASK);
374 }
375 done:
376 res[1] = fp->fp_mant[3];
377 return (sign | DBL_EXP(exp) | (fp->fp_mant[2] & DBL_MASK));
378 }
379
380 /*
381 * fpn -> extended (32 bit high-order result returned; low-order fraction
382 * words left in res[1]..res[3]). Like ftod, which is like ftos ... but
383 * our internal format *is* extended precision, plus 2 bits for guard/round,
384 * so we can avoid a small bit of work.
385 */
386 u_int
387 fpu_ftox(fe, fp, res)
388 struct fpemu *fe;
389 register struct fpn *fp;
390 u_int *res;
391 {
392 register u_int sign = fp->fp_sign << 31;
393 register int exp;
394
395 #define EXT_EXP(e) ((e) << (EXT_FRACBITS & 31))
396 #define EXT_MASK (EXT_EXP(1) - 1)
397
398 if (ISNAN(fp)) {
399 (void) fpu_shr(fp, 2); /* since we are not rounding */
400 exp = EXT_EXP_INFNAN;
401 goto done;
402 }
403 if (ISINF(fp)) {
404 sign |= EXT_EXP(EXT_EXP_INFNAN);
405 goto zero;
406 }
407 if (ISZERO(fp)) {
408 zero: res[1] = res[2] = res[3] = 0;
409 return (sign);
410 }
411
412 if ((exp = fp->fp_exp + EXT_EXP_BIAS) <= 0) {
413 (void) fpu_shr(fp, FP_NMANT - FP_NG - EXT_FRACBITS - exp);
414 if (round(fe, fp) && fp->fp_mant[0] == EXT_EXP(1)) {
415 res[1] = res[2] = res[3] = 0;
416 return (sign | EXT_EXP(1) | 0);
417 }
418 if ((fe->fe_cx & FSR_NX) ||
419 (fe->fe_fsr & (FSR_UF << FSR_TEM_SHIFT)))
420 fe->fe_cx |= FSR_UF;
421 exp = 0;
422 goto done;
423 }
424 /* Since internal == extended, no need to shift here. */
425 if (round(fe, fp) && fp->fp_mant[0] == EXT_EXP(2))
426 exp++;
427 if (exp >= EXT_EXP_INFNAN) {
428 fe->fe_cx |= FSR_OF | FSR_NX;
429 if (toinf(fe, sign)) {
430 res[1] = res[2] = res[3] = 0;
431 return (sign | EXT_EXP(EXT_EXP_INFNAN) | 0);
432 }
433 res[1] = res[2] = res[3] = ~0;
434 return (sign | EXT_EXP(EXT_EXP_INFNAN) | EXT_MASK);
435 }
436 done:
437 res[1] = fp->fp_mant[1];
438 res[2] = fp->fp_mant[2];
439 res[3] = fp->fp_mant[3];
440 return (sign | EXT_EXP(exp) | (fp->fp_mant[0] & EXT_MASK));
441 }
442
443 /*
444 * Implode an fpn, writing the result into the given space.
445 */
446 void
447 fpu_implode(fe, fp, type, space)
448 struct fpemu *fe;
449 register struct fpn *fp;
450 int type;
451 register u_int *space;
452 {
453
454 switch (type) {
455
456 case FTYPE_INT:
457 space[0] = fpu_ftoi(fe, fp);
458 break;
459
460 case FTYPE_SNG:
461 space[0] = fpu_ftos(fe, fp);
462 break;
463
464 case FTYPE_DBL:
465 space[0] = fpu_ftod(fe, fp, space);
466 break;
467
468 case FTYPE_EXT:
469 /* funky rounding precision options ?? */
470 space[0] = fpu_ftox(fe, fp, space);
471 break;
472
473 default:
474 panic("fpu_implode");
475 }
476 }
477