softfloat-specialize revision 1.2
1
2/*
3===============================================================================
4
5This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
6Arithmetic Package, Release 2a.
7
8Written by John R. Hauser.  This work was made possible in part by the
9International Computer Science Institute, located at Suite 600, 1947 Center
10Street, Berkeley, California 94704.  Funding was partially provided by the
11National Science Foundation under grant MIP-9311980.  The original version
12of this code was written as part of a project to build a fixed-point vector
13processor in collaboration with the University of California at Berkeley,
14overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
15is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
16arithmetic/SoftFloat.html'.
17
18THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
19has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
20TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
21PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
22AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
23
24Derivative works are acceptable, even for commercial purposes, so long as
25(1) they include prominent notice that the work is derivative, and (2) they
26include prominent notice akin to these four paragraphs for those parts of
27this code that are retained.
28
29===============================================================================
30*/
31
32/*
33-------------------------------------------------------------------------------
34Underflow tininess-detection mode, statically initialized to default value.
35(The declaration in `softfloat.h' must match the `int8' type here.)
36-------------------------------------------------------------------------------
37*/
38#ifdef SOFTFLOAT_FOR_GCC
39static
40#endif
41int8 float_detect_tininess = float_tininess_after_rounding;
42
43/*
44-------------------------------------------------------------------------------
45Raises the exceptions specified by `flags'.  Floating-point traps can be
46defined here if desired.  It is currently not possible for such a trap to
47substitute a result value.  If traps are not implemented, this routine
48should be simply `float_exception_flags |= flags;'.
49-------------------------------------------------------------------------------
50*/
51void float_raise( int8 flags )
52{
53
54    float_exception_flags |= flags;
55
56}
57
58/*
59-------------------------------------------------------------------------------
60Internal canonical NaN format.
61-------------------------------------------------------------------------------
62*/
63typedef struct {
64    flag sign;
65    bits64 high, low;
66} commonNaNT;
67
68/*
69-------------------------------------------------------------------------------
70The pattern for a default generated single-precision NaN.
71-------------------------------------------------------------------------------
72*/
73#define float32_default_nan 0xFFFFFFFF
74
75/*
76-------------------------------------------------------------------------------
77Returns 1 if the single-precision floating-point value `a' is a NaN;
78otherwise returns 0.
79-------------------------------------------------------------------------------
80*/
81#ifdef SOFTFLOAT_FOR_GCC
82static
83#endif
84flag float32_is_nan( float32 a )
85{
86
87    return ( 0xFF000000 < (bits32) ( a<<1 ) );
88
89}
90
91/*
92-------------------------------------------------------------------------------
93Returns 1 if the single-precision floating-point value `a' is a signaling
94NaN; otherwise returns 0.
95-------------------------------------------------------------------------------
96*/
97#if defined(SOFTFLOAT_FOR_GCC) && !defined(SOFTFLOATSPARC64_FOR_GCC)
98static
99#endif
100flag float32_is_signaling_nan( float32 a )
101{
102
103    return ( ( ( a>>22 ) & 0x1FF ) == 0x1FE ) && ( a & 0x003FFFFF );
104
105}
106
107/*
108-------------------------------------------------------------------------------
109Returns the result of converting the single-precision floating-point NaN
110`a' to the canonical NaN format.  If `a' is a signaling NaN, the invalid
111exception is raised.
112-------------------------------------------------------------------------------
113*/
114static commonNaNT float32ToCommonNaN( float32 a )
115{
116    commonNaNT z;
117
118    if ( float32_is_signaling_nan( a ) ) float_raise( float_flag_invalid );
119    z.sign = a>>31;
120    z.low = 0;
121    z.high = ( (bits64) a )<<41;
122    return z;
123
124}
125
126/*
127-------------------------------------------------------------------------------
128Returns the result of converting the canonical NaN `a' to the single-
129precision floating-point format.
130-------------------------------------------------------------------------------
131*/
132static float32 commonNaNToFloat32( commonNaNT a )
133{
134
135    return ( ( (bits32) a.sign )<<31 ) | 0x7FC00000 | ( a.high>>41 );
136
137}
138
139/*
140-------------------------------------------------------------------------------
141Takes two single-precision floating-point values `a' and `b', one of which
142is a NaN, and returns the appropriate NaN result.  If either `a' or `b' is a
143signaling NaN, the invalid exception is raised.
144-------------------------------------------------------------------------------
145*/
146static float32 propagateFloat32NaN( float32 a, float32 b )
147{
148    flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
149
150    aIsNaN = float32_is_nan( a );
151    aIsSignalingNaN = float32_is_signaling_nan( a );
152    bIsNaN = float32_is_nan( b );
153    bIsSignalingNaN = float32_is_signaling_nan( b );
154    a |= 0x00400000;
155    b |= 0x00400000;
156    if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid );
157    if ( aIsNaN ) {
158        return ( aIsSignalingNaN & bIsNaN ) ? b : a;
159    }
160    else {
161        return b;
162    }
163
164}
165
166/*
167-------------------------------------------------------------------------------
168The pattern for a default generated double-precision NaN.
169-------------------------------------------------------------------------------
170*/
171#define float64_default_nan LIT64( 0xFFFFFFFFFFFFFFFF )
172
173/*
174-------------------------------------------------------------------------------
175Returns 1 if the double-precision floating-point value `a' is a NaN;
176otherwise returns 0.
177-------------------------------------------------------------------------------
178*/
179#ifdef SOFTFLOAT_FOR_GCC
180static
181#endif
182flag float64_is_nan( float64 a )
183{
184
185    return ( LIT64( 0xFFE0000000000000 ) <
186	     (bits64) ( FLOAT64_DEMANGLE(a)<<1 ) );
187
188}
189
190/*
191-------------------------------------------------------------------------------
192Returns 1 if the double-precision floating-point value `a' is a signaling
193NaN; otherwise returns 0.
194-------------------------------------------------------------------------------
195*/
196#if defined(SOFTFLOAT_FOR_GCC) && !defined(SOFTFLOATSPARC64_FOR_GCC)
197static
198#endif
199flag float64_is_signaling_nan( float64 a )
200{
201
202    return
203           ( ( ( FLOAT64_DEMANGLE(a)>>51 ) & 0xFFF ) == 0xFFE )
204        && ( FLOAT64_DEMANGLE(a) & LIT64( 0x0007FFFFFFFFFFFF ) );
205
206}
207
208/*
209-------------------------------------------------------------------------------
210Returns the result of converting the double-precision floating-point NaN
211`a' to the canonical NaN format.  If `a' is a signaling NaN, the invalid
212exception is raised.
213-------------------------------------------------------------------------------
214*/
215static commonNaNT float64ToCommonNaN( float64 a )
216{
217    commonNaNT z;
218
219    if ( float64_is_signaling_nan( a ) ) float_raise( float_flag_invalid );
220    z.sign = FLOAT64_DEMANGLE(a)>>63;
221    z.low = 0;
222    z.high = FLOAT64_DEMANGLE(a)<<12;
223    return z;
224
225}
226
227/*
228-------------------------------------------------------------------------------
229Returns the result of converting the canonical NaN `a' to the double-
230precision floating-point format.
231-------------------------------------------------------------------------------
232*/
233static float64 commonNaNToFloat64( commonNaNT a )
234{
235
236    return FLOAT64_MANGLE(
237	( ( (bits64) a.sign )<<63 )
238        | LIT64( 0x7FF8000000000000 )
239        | ( a.high>>12 ) );
240
241}
242
243/*
244-------------------------------------------------------------------------------
245Takes two double-precision floating-point values `a' and `b', one of which
246is a NaN, and returns the appropriate NaN result.  If either `a' or `b' is a
247signaling NaN, the invalid exception is raised.
248-------------------------------------------------------------------------------
249*/
250static float64 propagateFloat64NaN( float64 a, float64 b )
251{
252    flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
253
254    aIsNaN = float64_is_nan( a );
255    aIsSignalingNaN = float64_is_signaling_nan( a );
256    bIsNaN = float64_is_nan( b );
257    bIsSignalingNaN = float64_is_signaling_nan( b );
258    a |= FLOAT64_MANGLE(LIT64( 0x0008000000000000 ));
259    b |= FLOAT64_MANGLE(LIT64( 0x0008000000000000 ));
260    if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid );
261    if ( aIsNaN ) {
262        return ( aIsSignalingNaN & bIsNaN ) ? b : a;
263    }
264    else {
265        return b;
266    }
267
268}
269
270#ifdef FLOATX80
271
272/*
273-------------------------------------------------------------------------------
274The pattern for a default generated extended double-precision NaN.  The
275`high' and `low' values hold the most- and least-significant bits,
276respectively.
277-------------------------------------------------------------------------------
278*/
279#define floatx80_default_nan_high 0xFFFF
280#define floatx80_default_nan_low  LIT64( 0xFFFFFFFFFFFFFFFF )
281
282/*
283-------------------------------------------------------------------------------
284Returns 1 if the extended double-precision floating-point value `a' is a
285NaN; otherwise returns 0.
286-------------------------------------------------------------------------------
287*/
288flag floatx80_is_nan( floatx80 a )
289{
290
291    return ( ( a.high & 0x7FFF ) == 0x7FFF ) && (bits64) ( a.low<<1 );
292
293}
294
295/*
296-------------------------------------------------------------------------------
297Returns 1 if the extended double-precision floating-point value `a' is a
298signaling NaN; otherwise returns 0.
299-------------------------------------------------------------------------------
300*/
301flag floatx80_is_signaling_nan( floatx80 a )
302{
303    bits64 aLow;
304
305    aLow = a.low & ~ LIT64( 0x4000000000000000 );
306    return
307           ( ( a.high & 0x7FFF ) == 0x7FFF )
308        && (bits64) ( aLow<<1 )
309        && ( a.low == aLow );
310
311}
312
313/*
314-------------------------------------------------------------------------------
315Returns the result of converting the extended double-precision floating-
316point NaN `a' to the canonical NaN format.  If `a' is a signaling NaN, the
317invalid exception is raised.
318-------------------------------------------------------------------------------
319*/
320static commonNaNT floatx80ToCommonNaN( floatx80 a )
321{
322    commonNaNT z;
323
324    if ( floatx80_is_signaling_nan( a ) ) float_raise( float_flag_invalid );
325    z.sign = a.high>>15;
326    z.low = 0;
327    z.high = a.low<<1;
328    return z;
329
330}
331
332/*
333-------------------------------------------------------------------------------
334Returns the result of converting the canonical NaN `a' to the extended
335double-precision floating-point format.
336-------------------------------------------------------------------------------
337*/
338static floatx80 commonNaNToFloatx80( commonNaNT a )
339{
340    floatx80 z;
341
342    z.low = LIT64( 0xC000000000000000 ) | ( a.high>>1 );
343    z.high = ( ( (bits16) a.sign )<<15 ) | 0x7FFF;
344    return z;
345
346}
347
348/*
349-------------------------------------------------------------------------------
350Takes two extended double-precision floating-point values `a' and `b', one
351of which is a NaN, and returns the appropriate NaN result.  If either `a' or
352`b' is a signaling NaN, the invalid exception is raised.
353-------------------------------------------------------------------------------
354*/
355static floatx80 propagateFloatx80NaN( floatx80 a, floatx80 b )
356{
357    flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
358
359    aIsNaN = floatx80_is_nan( a );
360    aIsSignalingNaN = floatx80_is_signaling_nan( a );
361    bIsNaN = floatx80_is_nan( b );
362    bIsSignalingNaN = floatx80_is_signaling_nan( b );
363    a.low |= LIT64( 0xC000000000000000 );
364    b.low |= LIT64( 0xC000000000000000 );
365    if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid );
366    if ( aIsNaN ) {
367        return ( aIsSignalingNaN & bIsNaN ) ? b : a;
368    }
369    else {
370        return b;
371    }
372
373}
374
375#endif
376
377#ifdef FLOAT128
378
379/*
380-------------------------------------------------------------------------------
381The pattern for a default generated quadruple-precision NaN.  The `high' and
382`low' values hold the most- and least-significant bits, respectively.
383-------------------------------------------------------------------------------
384*/
385#define float128_default_nan_high LIT64( 0xFFFFFFFFFFFFFFFF )
386#define float128_default_nan_low  LIT64( 0xFFFFFFFFFFFFFFFF )
387
388/*
389-------------------------------------------------------------------------------
390Returns 1 if the quadruple-precision floating-point value `a' is a NaN;
391otherwise returns 0.
392-------------------------------------------------------------------------------
393*/
394flag float128_is_nan( float128 a )
395{
396
397    return
398           ( LIT64( 0xFFFE000000000000 ) <= (bits64) ( a.high<<1 ) )
399        && ( a.low || ( a.high & LIT64( 0x0000FFFFFFFFFFFF ) ) );
400
401}
402
403/*
404-------------------------------------------------------------------------------
405Returns 1 if the quadruple-precision floating-point value `a' is a
406signaling NaN; otherwise returns 0.
407-------------------------------------------------------------------------------
408*/
409flag float128_is_signaling_nan( float128 a )
410{
411
412    return
413           ( ( ( a.high>>47 ) & 0xFFFF ) == 0xFFFE )
414        && ( a.low || ( a.high & LIT64( 0x00007FFFFFFFFFFF ) ) );
415
416}
417
418/*
419-------------------------------------------------------------------------------
420Returns the result of converting the quadruple-precision floating-point NaN
421`a' to the canonical NaN format.  If `a' is a signaling NaN, the invalid
422exception is raised.
423-------------------------------------------------------------------------------
424*/
425static commonNaNT float128ToCommonNaN( float128 a )
426{
427    commonNaNT z;
428
429    if ( float128_is_signaling_nan( a ) ) float_raise( float_flag_invalid );
430    z.sign = a.high>>63;
431    shortShift128Left( a.high, a.low, 16, &z.high, &z.low );
432    return z;
433
434}
435
436/*
437-------------------------------------------------------------------------------
438Returns the result of converting the canonical NaN `a' to the quadruple-
439precision floating-point format.
440-------------------------------------------------------------------------------
441*/
442static float128 commonNaNToFloat128( commonNaNT a )
443{
444    float128 z;
445
446    shift128Right( a.high, a.low, 16, &z.high, &z.low );
447    z.high |= ( ( (bits64) a.sign )<<63 ) | LIT64( 0x7FFF800000000000 );
448    return z;
449
450}
451
452/*
453-------------------------------------------------------------------------------
454Takes two quadruple-precision floating-point values `a' and `b', one of
455which is a NaN, and returns the appropriate NaN result.  If either `a' or
456`b' is a signaling NaN, the invalid exception is raised.
457-------------------------------------------------------------------------------
458*/
459static float128 propagateFloat128NaN( float128 a, float128 b )
460{
461    flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
462
463    aIsNaN = float128_is_nan( a );
464    aIsSignalingNaN = float128_is_signaling_nan( a );
465    bIsNaN = float128_is_nan( b );
466    bIsSignalingNaN = float128_is_signaling_nan( b );
467    a.high |= LIT64( 0x0000800000000000 );
468    b.high |= LIT64( 0x0000800000000000 );
469    if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid );
470    if ( aIsNaN ) {
471        return ( aIsSignalingNaN & bIsNaN ) ? b : a;
472    }
473    else {
474        return b;
475    }
476
477}
478
479#endif
480
481