softfloat.c revision 1.1.2.3 1 1.1.2.3 thorpej /* $NetBSD: softfloat.c,v 1.1.2.3 2002/12/11 06:46:23 thorpej Exp $ */
2 1.1.2.2 nathanw
3 1.1.2.2 nathanw /*
4 1.1.2.2 nathanw * This version hacked for use with gcc -msoft-float by bjh21.
5 1.1.2.2 nathanw * (Mostly a case of #ifdefing out things GCC doesn't need or provides
6 1.1.2.2 nathanw * itself).
7 1.1.2.2 nathanw */
8 1.1.2.2 nathanw
9 1.1.2.2 nathanw /*
10 1.1.2.2 nathanw * Things you may want to define:
11 1.1.2.2 nathanw *
12 1.1.2.2 nathanw * SOFTFLOAT_FOR_GCC - build only those functions necessary for GCC (with
13 1.1.2.2 nathanw * -msoft-float) to work. Include "softfloat-for-gcc.h" to get them
14 1.1.2.2 nathanw * properly renamed.
15 1.1.2.2 nathanw */
16 1.1.2.2 nathanw
17 1.1.2.2 nathanw /*
18 1.1.2.2 nathanw ===============================================================================
19 1.1.2.2 nathanw
20 1.1.2.2 nathanw This C source file is part of the SoftFloat IEC/IEEE Floating-point
21 1.1.2.2 nathanw Arithmetic Package, Release 2a.
22 1.1.2.2 nathanw
23 1.1.2.2 nathanw Written by John R. Hauser. This work was made possible in part by the
24 1.1.2.2 nathanw International Computer Science Institute, located at Suite 600, 1947 Center
25 1.1.2.2 nathanw Street, Berkeley, California 94704. Funding was partially provided by the
26 1.1.2.2 nathanw National Science Foundation under grant MIP-9311980. The original version
27 1.1.2.2 nathanw of this code was written as part of a project to build a fixed-point vector
28 1.1.2.2 nathanw processor in collaboration with the University of California at Berkeley,
29 1.1.2.2 nathanw overseen by Profs. Nelson Morgan and John Wawrzynek. More information
30 1.1.2.2 nathanw is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
31 1.1.2.2 nathanw arithmetic/SoftFloat.html'.
32 1.1.2.2 nathanw
33 1.1.2.2 nathanw THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
34 1.1.2.2 nathanw has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35 1.1.2.2 nathanw TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
36 1.1.2.2 nathanw PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37 1.1.2.2 nathanw AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
38 1.1.2.2 nathanw
39 1.1.2.2 nathanw Derivative works are acceptable, even for commercial purposes, so long as
40 1.1.2.2 nathanw (1) they include prominent notice that the work is derivative, and (2) they
41 1.1.2.2 nathanw include prominent notice akin to these four paragraphs for those parts of
42 1.1.2.2 nathanw this code that are retained.
43 1.1.2.2 nathanw
44 1.1.2.2 nathanw ===============================================================================
45 1.1.2.2 nathanw */
46 1.1.2.2 nathanw
47 1.1.2.3 thorpej /* If you need this in a boot program, you have bigger problems... */
48 1.1.2.3 thorpej #ifndef _STANDALONE
49 1.1.2.3 thorpej
50 1.1.2.2 nathanw #include <sys/cdefs.h>
51 1.1.2.2 nathanw #if defined(LIBC_SCCS) && !defined(lint)
52 1.1.2.3 thorpej __RCSID("$NetBSD: softfloat.c,v 1.1.2.3 2002/12/11 06:46:23 thorpej Exp $");
53 1.1.2.2 nathanw #endif /* LIBC_SCCS and not lint */
54 1.1.2.2 nathanw
55 1.1.2.2 nathanw #ifdef SOFTFLOAT_FOR_GCC
56 1.1.2.2 nathanw #include "softfloat-for-gcc.h"
57 1.1.2.2 nathanw #endif
58 1.1.2.2 nathanw
59 1.1.2.2 nathanw #include "milieu.h"
60 1.1.2.2 nathanw #include "softfloat.h"
61 1.1.2.2 nathanw
62 1.1.2.2 nathanw /*
63 1.1.2.2 nathanw * Conversions between floats as stored in memory and floats as
64 1.1.2.2 nathanw * SoftFloat uses them
65 1.1.2.2 nathanw */
66 1.1.2.2 nathanw #ifndef FLOAT64_DEMANGLE
67 1.1.2.2 nathanw #define FLOAT64_DEMANGLE(a) (a)
68 1.1.2.2 nathanw #endif
69 1.1.2.2 nathanw #ifndef FLOAT64_MANGLE
70 1.1.2.2 nathanw #define FLOAT64_MANGLE(a) (a)
71 1.1.2.2 nathanw #endif
72 1.1.2.2 nathanw
73 1.1.2.2 nathanw /*
74 1.1.2.2 nathanw -------------------------------------------------------------------------------
75 1.1.2.2 nathanw Floating-point rounding mode, extended double-precision rounding precision,
76 1.1.2.2 nathanw and exception flags.
77 1.1.2.2 nathanw -------------------------------------------------------------------------------
78 1.1.2.2 nathanw */
79 1.1.2.2 nathanw
80 1.1.2.2 nathanw /*
81 1.1.2.2 nathanw * XXX: This may cause options-MULTIPROCESSOR or thread problems someday.
82 1.1.2.2 nathanw * Right now, it does not. I've removed all other dynamic global
83 1.1.2.2 nathanw * variables. [ross]
84 1.1.2.2 nathanw */
85 1.1.2.2 nathanw #ifdef FLOATX80
86 1.1.2.2 nathanw int8 floatx80_rounding_precision = 80;
87 1.1.2.2 nathanw #endif
88 1.1.2.2 nathanw
89 1.1.2.2 nathanw /*
90 1.1.2.2 nathanw -------------------------------------------------------------------------------
91 1.1.2.2 nathanw Primitive arithmetic functions, including multi-word arithmetic, and
92 1.1.2.2 nathanw division and square root approximations. (Can be specialized to target if
93 1.1.2.2 nathanw desired.)
94 1.1.2.2 nathanw -------------------------------------------------------------------------------
95 1.1.2.2 nathanw */
96 1.1.2.2 nathanw #include "softfloat-macros.h"
97 1.1.2.2 nathanw
98 1.1.2.2 nathanw /*
99 1.1.2.2 nathanw -------------------------------------------------------------------------------
100 1.1.2.2 nathanw Functions and definitions to determine: (1) whether tininess for underflow
101 1.1.2.2 nathanw is detected before or after rounding by default, (2) what (if anything)
102 1.1.2.2 nathanw happens when exceptions are raised, (3) how signaling NaNs are distinguished
103 1.1.2.2 nathanw from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
104 1.1.2.2 nathanw are propagated from function inputs to output. These details are target-
105 1.1.2.2 nathanw specific.
106 1.1.2.2 nathanw -------------------------------------------------------------------------------
107 1.1.2.2 nathanw */
108 1.1.2.2 nathanw #include "softfloat-specialize.h"
109 1.1.2.2 nathanw
110 1.1.2.2 nathanw #ifndef SOFTFLOAT_FOR_GCC /* Not used */
111 1.1.2.2 nathanw /*
112 1.1.2.2 nathanw -------------------------------------------------------------------------------
113 1.1.2.2 nathanw Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
114 1.1.2.2 nathanw and 7, and returns the properly rounded 32-bit integer corresponding to the
115 1.1.2.2 nathanw input. If `zSign' is 1, the input is negated before being converted to an
116 1.1.2.2 nathanw integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input
117 1.1.2.2 nathanw is simply rounded to an integer, with the inexact exception raised if the
118 1.1.2.2 nathanw input cannot be represented exactly as an integer. However, if the fixed-
119 1.1.2.2 nathanw point input is too large, the invalid exception is raised and the largest
120 1.1.2.2 nathanw positive or negative integer is returned.
121 1.1.2.2 nathanw -------------------------------------------------------------------------------
122 1.1.2.2 nathanw */
123 1.1.2.2 nathanw static int32 roundAndPackInt32( flag zSign, bits64 absZ )
124 1.1.2.2 nathanw {
125 1.1.2.2 nathanw int8 roundingMode;
126 1.1.2.2 nathanw flag roundNearestEven;
127 1.1.2.2 nathanw int8 roundIncrement, roundBits;
128 1.1.2.2 nathanw int32 z;
129 1.1.2.2 nathanw
130 1.1.2.2 nathanw roundingMode = float_rounding_mode();
131 1.1.2.2 nathanw roundNearestEven = ( roundingMode == float_round_nearest_even );
132 1.1.2.2 nathanw roundIncrement = 0x40;
133 1.1.2.2 nathanw if ( ! roundNearestEven ) {
134 1.1.2.2 nathanw if ( roundingMode == float_round_to_zero ) {
135 1.1.2.2 nathanw roundIncrement = 0;
136 1.1.2.2 nathanw }
137 1.1.2.2 nathanw else {
138 1.1.2.2 nathanw roundIncrement = 0x7F;
139 1.1.2.2 nathanw if ( zSign ) {
140 1.1.2.2 nathanw if ( roundingMode == float_round_up ) roundIncrement = 0;
141 1.1.2.2 nathanw }
142 1.1.2.2 nathanw else {
143 1.1.2.2 nathanw if ( roundingMode == float_round_down ) roundIncrement = 0;
144 1.1.2.2 nathanw }
145 1.1.2.2 nathanw }
146 1.1.2.2 nathanw }
147 1.1.2.2 nathanw roundBits = absZ & 0x7F;
148 1.1.2.2 nathanw absZ = ( absZ + roundIncrement )>>7;
149 1.1.2.2 nathanw absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
150 1.1.2.2 nathanw z = absZ;
151 1.1.2.2 nathanw if ( zSign ) z = - z;
152 1.1.2.2 nathanw if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
153 1.1.2.2 nathanw float_raise( float_flag_invalid );
154 1.1.2.2 nathanw return zSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
155 1.1.2.2 nathanw }
156 1.1.2.2 nathanw if ( roundBits ) float_set_inexact();
157 1.1.2.2 nathanw return z;
158 1.1.2.2 nathanw
159 1.1.2.2 nathanw }
160 1.1.2.2 nathanw
161 1.1.2.2 nathanw /*
162 1.1.2.2 nathanw -------------------------------------------------------------------------------
163 1.1.2.2 nathanw Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
164 1.1.2.2 nathanw `absZ1', with binary point between bits 63 and 64 (between the input words),
165 1.1.2.2 nathanw and returns the properly rounded 64-bit integer corresponding to the input.
166 1.1.2.2 nathanw If `zSign' is 1, the input is negated before being converted to an integer.
167 1.1.2.2 nathanw Ordinarily, the fixed-point input is simply rounded to an integer, with
168 1.1.2.2 nathanw the inexact exception raised if the input cannot be represented exactly as
169 1.1.2.2 nathanw an integer. However, if the fixed-point input is too large, the invalid
170 1.1.2.2 nathanw exception is raised and the largest positive or negative integer is
171 1.1.2.2 nathanw returned.
172 1.1.2.2 nathanw -------------------------------------------------------------------------------
173 1.1.2.2 nathanw */
174 1.1.2.2 nathanw static int64 roundAndPackInt64( flag zSign, bits64 absZ0, bits64 absZ1 )
175 1.1.2.2 nathanw {
176 1.1.2.2 nathanw int8 roundingMode;
177 1.1.2.2 nathanw flag roundNearestEven, increment;
178 1.1.2.2 nathanw int64 z;
179 1.1.2.2 nathanw
180 1.1.2.2 nathanw roundingMode = float_rounding_mode();
181 1.1.2.2 nathanw roundNearestEven = ( roundingMode == float_round_nearest_even );
182 1.1.2.2 nathanw increment = ( (sbits64) absZ1 < 0 );
183 1.1.2.2 nathanw if ( ! roundNearestEven ) {
184 1.1.2.2 nathanw if ( roundingMode == float_round_to_zero ) {
185 1.1.2.2 nathanw increment = 0;
186 1.1.2.2 nathanw }
187 1.1.2.2 nathanw else {
188 1.1.2.2 nathanw if ( zSign ) {
189 1.1.2.2 nathanw increment = ( roundingMode == float_round_down ) && absZ1;
190 1.1.2.2 nathanw }
191 1.1.2.2 nathanw else {
192 1.1.2.2 nathanw increment = ( roundingMode == float_round_up ) && absZ1;
193 1.1.2.2 nathanw }
194 1.1.2.2 nathanw }
195 1.1.2.2 nathanw }
196 1.1.2.2 nathanw if ( increment ) {
197 1.1.2.2 nathanw ++absZ0;
198 1.1.2.2 nathanw if ( absZ0 == 0 ) goto overflow;
199 1.1.2.2 nathanw absZ0 &= ~ ( ( (bits64) ( absZ1<<1 ) == 0 ) & roundNearestEven );
200 1.1.2.2 nathanw }
201 1.1.2.2 nathanw z = absZ0;
202 1.1.2.2 nathanw if ( zSign ) z = - z;
203 1.1.2.2 nathanw if ( z && ( ( z < 0 ) ^ zSign ) ) {
204 1.1.2.2 nathanw overflow:
205 1.1.2.2 nathanw float_raise( float_flag_invalid );
206 1.1.2.2 nathanw return
207 1.1.2.2 nathanw zSign ? (sbits64) LIT64( 0x8000000000000000 )
208 1.1.2.2 nathanw : LIT64( 0x7FFFFFFFFFFFFFFF );
209 1.1.2.2 nathanw }
210 1.1.2.2 nathanw if ( absZ1 ) float_set_inexact();
211 1.1.2.2 nathanw return z;
212 1.1.2.2 nathanw
213 1.1.2.2 nathanw }
214 1.1.2.2 nathanw #endif
215 1.1.2.2 nathanw
216 1.1.2.2 nathanw /*
217 1.1.2.2 nathanw -------------------------------------------------------------------------------
218 1.1.2.2 nathanw Returns the fraction bits of the single-precision floating-point value `a'.
219 1.1.2.2 nathanw -------------------------------------------------------------------------------
220 1.1.2.2 nathanw */
221 1.1.2.2 nathanw INLINE bits32 extractFloat32Frac( float32 a )
222 1.1.2.2 nathanw {
223 1.1.2.2 nathanw
224 1.1.2.2 nathanw return a & 0x007FFFFF;
225 1.1.2.2 nathanw
226 1.1.2.2 nathanw }
227 1.1.2.2 nathanw
228 1.1.2.2 nathanw /*
229 1.1.2.2 nathanw -------------------------------------------------------------------------------
230 1.1.2.2 nathanw Returns the exponent bits of the single-precision floating-point value `a'.
231 1.1.2.2 nathanw -------------------------------------------------------------------------------
232 1.1.2.2 nathanw */
233 1.1.2.2 nathanw INLINE int16 extractFloat32Exp( float32 a )
234 1.1.2.2 nathanw {
235 1.1.2.2 nathanw
236 1.1.2.2 nathanw return ( a>>23 ) & 0xFF;
237 1.1.2.2 nathanw
238 1.1.2.2 nathanw }
239 1.1.2.2 nathanw
240 1.1.2.2 nathanw /*
241 1.1.2.2 nathanw -------------------------------------------------------------------------------
242 1.1.2.2 nathanw Returns the sign bit of the single-precision floating-point value `a'.
243 1.1.2.2 nathanw -------------------------------------------------------------------------------
244 1.1.2.2 nathanw */
245 1.1.2.2 nathanw INLINE flag extractFloat32Sign( float32 a )
246 1.1.2.2 nathanw {
247 1.1.2.2 nathanw
248 1.1.2.2 nathanw return a>>31;
249 1.1.2.2 nathanw
250 1.1.2.2 nathanw }
251 1.1.2.2 nathanw
252 1.1.2.2 nathanw /*
253 1.1.2.2 nathanw -------------------------------------------------------------------------------
254 1.1.2.2 nathanw Normalizes the subnormal single-precision floating-point value represented
255 1.1.2.2 nathanw by the denormalized significand `aSig'. The normalized exponent and
256 1.1.2.2 nathanw significand are stored at the locations pointed to by `zExpPtr' and
257 1.1.2.2 nathanw `zSigPtr', respectively.
258 1.1.2.2 nathanw -------------------------------------------------------------------------------
259 1.1.2.2 nathanw */
260 1.1.2.2 nathanw static void
261 1.1.2.2 nathanw normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr )
262 1.1.2.2 nathanw {
263 1.1.2.2 nathanw int8 shiftCount;
264 1.1.2.2 nathanw
265 1.1.2.2 nathanw shiftCount = countLeadingZeros32( aSig ) - 8;
266 1.1.2.2 nathanw *zSigPtr = aSig<<shiftCount;
267 1.1.2.2 nathanw *zExpPtr = 1 - shiftCount;
268 1.1.2.2 nathanw
269 1.1.2.2 nathanw }
270 1.1.2.2 nathanw
271 1.1.2.2 nathanw /*
272 1.1.2.2 nathanw -------------------------------------------------------------------------------
273 1.1.2.2 nathanw Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
274 1.1.2.2 nathanw single-precision floating-point value, returning the result. After being
275 1.1.2.2 nathanw shifted into the proper positions, the three fields are simply added
276 1.1.2.2 nathanw together to form the result. This means that any integer portion of `zSig'
277 1.1.2.2 nathanw will be added into the exponent. Since a properly normalized significand
278 1.1.2.2 nathanw will have an integer portion equal to 1, the `zExp' input should be 1 less
279 1.1.2.2 nathanw than the desired result exponent whenever `zSig' is a complete, normalized
280 1.1.2.2 nathanw significand.
281 1.1.2.2 nathanw -------------------------------------------------------------------------------
282 1.1.2.2 nathanw */
283 1.1.2.2 nathanw INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig )
284 1.1.2.2 nathanw {
285 1.1.2.2 nathanw
286 1.1.2.2 nathanw return ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig;
287 1.1.2.2 nathanw
288 1.1.2.2 nathanw }
289 1.1.2.2 nathanw
290 1.1.2.2 nathanw /*
291 1.1.2.2 nathanw -------------------------------------------------------------------------------
292 1.1.2.2 nathanw Takes an abstract floating-point value having sign `zSign', exponent `zExp',
293 1.1.2.2 nathanw and significand `zSig', and returns the proper single-precision floating-
294 1.1.2.2 nathanw point value corresponding to the abstract input. Ordinarily, the abstract
295 1.1.2.2 nathanw value is simply rounded and packed into the single-precision format, with
296 1.1.2.2 nathanw the inexact exception raised if the abstract input cannot be represented
297 1.1.2.2 nathanw exactly. However, if the abstract value is too large, the overflow and
298 1.1.2.2 nathanw inexact exceptions are raised and an infinity or maximal finite value is
299 1.1.2.2 nathanw returned. If the abstract value is too small, the input value is rounded to
300 1.1.2.2 nathanw a subnormal number, and the underflow and inexact exceptions are raised if
301 1.1.2.2 nathanw the abstract input cannot be represented exactly as a subnormal single-
302 1.1.2.2 nathanw precision floating-point number.
303 1.1.2.2 nathanw The input significand `zSig' has its binary point between bits 30
304 1.1.2.2 nathanw and 29, which is 7 bits to the left of the usual location. This shifted
305 1.1.2.2 nathanw significand must be normalized or smaller. If `zSig' is not normalized,
306 1.1.2.2 nathanw `zExp' must be 0; in that case, the result returned is a subnormal number,
307 1.1.2.2 nathanw and it must not require rounding. In the usual case that `zSig' is
308 1.1.2.2 nathanw normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
309 1.1.2.2 nathanw The handling of underflow and overflow follows the IEC/IEEE Standard for
310 1.1.2.2 nathanw Binary Floating-Point Arithmetic.
311 1.1.2.2 nathanw -------------------------------------------------------------------------------
312 1.1.2.2 nathanw */
313 1.1.2.2 nathanw static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
314 1.1.2.2 nathanw {
315 1.1.2.2 nathanw int8 roundingMode;
316 1.1.2.2 nathanw flag roundNearestEven;
317 1.1.2.2 nathanw int8 roundIncrement, roundBits;
318 1.1.2.2 nathanw flag isTiny;
319 1.1.2.2 nathanw
320 1.1.2.2 nathanw roundingMode = float_rounding_mode();
321 1.1.2.2 nathanw roundNearestEven = ( roundingMode == float_round_nearest_even );
322 1.1.2.2 nathanw roundIncrement = 0x40;
323 1.1.2.2 nathanw if ( ! roundNearestEven ) {
324 1.1.2.2 nathanw if ( roundingMode == float_round_to_zero ) {
325 1.1.2.2 nathanw roundIncrement = 0;
326 1.1.2.2 nathanw }
327 1.1.2.2 nathanw else {
328 1.1.2.2 nathanw roundIncrement = 0x7F;
329 1.1.2.2 nathanw if ( zSign ) {
330 1.1.2.2 nathanw if ( roundingMode == float_round_up ) roundIncrement = 0;
331 1.1.2.2 nathanw }
332 1.1.2.2 nathanw else {
333 1.1.2.2 nathanw if ( roundingMode == float_round_down ) roundIncrement = 0;
334 1.1.2.2 nathanw }
335 1.1.2.2 nathanw }
336 1.1.2.2 nathanw }
337 1.1.2.2 nathanw roundBits = zSig & 0x7F;
338 1.1.2.2 nathanw if ( 0xFD <= (bits16) zExp ) {
339 1.1.2.2 nathanw if ( ( 0xFD < zExp )
340 1.1.2.2 nathanw || ( ( zExp == 0xFD )
341 1.1.2.2 nathanw && ( (sbits32) ( zSig + roundIncrement ) < 0 ) )
342 1.1.2.2 nathanw ) {
343 1.1.2.2 nathanw float_raise( float_flag_overflow | float_flag_inexact );
344 1.1.2.2 nathanw return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 );
345 1.1.2.2 nathanw }
346 1.1.2.2 nathanw if ( zExp < 0 ) {
347 1.1.2.2 nathanw isTiny =
348 1.1.2.2 nathanw ( float_detect_tininess == float_tininess_before_rounding )
349 1.1.2.2 nathanw || ( zExp < -1 )
350 1.1.2.2 nathanw || ( zSig + roundIncrement < 0x80000000 );
351 1.1.2.2 nathanw shift32RightJamming( zSig, - zExp, &zSig );
352 1.1.2.2 nathanw zExp = 0;
353 1.1.2.2 nathanw roundBits = zSig & 0x7F;
354 1.1.2.2 nathanw if ( isTiny && roundBits ) float_raise( float_flag_underflow );
355 1.1.2.2 nathanw }
356 1.1.2.2 nathanw }
357 1.1.2.2 nathanw if ( roundBits ) float_set_inexact();
358 1.1.2.2 nathanw zSig = ( zSig + roundIncrement )>>7;
359 1.1.2.2 nathanw zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
360 1.1.2.2 nathanw if ( zSig == 0 ) zExp = 0;
361 1.1.2.2 nathanw return packFloat32( zSign, zExp, zSig );
362 1.1.2.2 nathanw
363 1.1.2.2 nathanw }
364 1.1.2.2 nathanw
365 1.1.2.2 nathanw /*
366 1.1.2.2 nathanw -------------------------------------------------------------------------------
367 1.1.2.2 nathanw Takes an abstract floating-point value having sign `zSign', exponent `zExp',
368 1.1.2.2 nathanw and significand `zSig', and returns the proper single-precision floating-
369 1.1.2.2 nathanw point value corresponding to the abstract input. This routine is just like
370 1.1.2.2 nathanw `roundAndPackFloat32' except that `zSig' does not have to be normalized.
371 1.1.2.2 nathanw Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
372 1.1.2.2 nathanw floating-point exponent.
373 1.1.2.2 nathanw -------------------------------------------------------------------------------
374 1.1.2.2 nathanw */
375 1.1.2.2 nathanw static float32
376 1.1.2.2 nathanw normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
377 1.1.2.2 nathanw {
378 1.1.2.2 nathanw int8 shiftCount;
379 1.1.2.2 nathanw
380 1.1.2.2 nathanw shiftCount = countLeadingZeros32( zSig ) - 1;
381 1.1.2.2 nathanw return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount );
382 1.1.2.2 nathanw
383 1.1.2.2 nathanw }
384 1.1.2.2 nathanw
385 1.1.2.2 nathanw /*
386 1.1.2.2 nathanw -------------------------------------------------------------------------------
387 1.1.2.2 nathanw Returns the fraction bits of the double-precision floating-point value `a'.
388 1.1.2.2 nathanw -------------------------------------------------------------------------------
389 1.1.2.2 nathanw */
390 1.1.2.2 nathanw INLINE bits64 extractFloat64Frac( float64 a )
391 1.1.2.2 nathanw {
392 1.1.2.2 nathanw
393 1.1.2.2 nathanw return FLOAT64_DEMANGLE(a) & LIT64( 0x000FFFFFFFFFFFFF );
394 1.1.2.2 nathanw
395 1.1.2.2 nathanw }
396 1.1.2.2 nathanw
397 1.1.2.2 nathanw /*
398 1.1.2.2 nathanw -------------------------------------------------------------------------------
399 1.1.2.2 nathanw Returns the exponent bits of the double-precision floating-point value `a'.
400 1.1.2.2 nathanw -------------------------------------------------------------------------------
401 1.1.2.2 nathanw */
402 1.1.2.2 nathanw INLINE int16 extractFloat64Exp( float64 a )
403 1.1.2.2 nathanw {
404 1.1.2.2 nathanw
405 1.1.2.2 nathanw return ( FLOAT64_DEMANGLE(a)>>52 ) & 0x7FF;
406 1.1.2.2 nathanw
407 1.1.2.2 nathanw }
408 1.1.2.2 nathanw
409 1.1.2.2 nathanw /*
410 1.1.2.2 nathanw -------------------------------------------------------------------------------
411 1.1.2.2 nathanw Returns the sign bit of the double-precision floating-point value `a'.
412 1.1.2.2 nathanw -------------------------------------------------------------------------------
413 1.1.2.2 nathanw */
414 1.1.2.2 nathanw INLINE flag extractFloat64Sign( float64 a )
415 1.1.2.2 nathanw {
416 1.1.2.2 nathanw
417 1.1.2.2 nathanw return FLOAT64_DEMANGLE(a)>>63;
418 1.1.2.2 nathanw
419 1.1.2.2 nathanw }
420 1.1.2.2 nathanw
421 1.1.2.2 nathanw /*
422 1.1.2.2 nathanw -------------------------------------------------------------------------------
423 1.1.2.2 nathanw Normalizes the subnormal double-precision floating-point value represented
424 1.1.2.2 nathanw by the denormalized significand `aSig'. The normalized exponent and
425 1.1.2.2 nathanw significand are stored at the locations pointed to by `zExpPtr' and
426 1.1.2.2 nathanw `zSigPtr', respectively.
427 1.1.2.2 nathanw -------------------------------------------------------------------------------
428 1.1.2.2 nathanw */
429 1.1.2.2 nathanw static void
430 1.1.2.2 nathanw normalizeFloat64Subnormal( bits64 aSig, int16 *zExpPtr, bits64 *zSigPtr )
431 1.1.2.2 nathanw {
432 1.1.2.2 nathanw int8 shiftCount;
433 1.1.2.2 nathanw
434 1.1.2.2 nathanw shiftCount = countLeadingZeros64( aSig ) - 11;
435 1.1.2.2 nathanw *zSigPtr = aSig<<shiftCount;
436 1.1.2.2 nathanw *zExpPtr = 1 - shiftCount;
437 1.1.2.2 nathanw
438 1.1.2.2 nathanw }
439 1.1.2.2 nathanw
440 1.1.2.2 nathanw /*
441 1.1.2.2 nathanw -------------------------------------------------------------------------------
442 1.1.2.2 nathanw Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
443 1.1.2.2 nathanw double-precision floating-point value, returning the result. After being
444 1.1.2.2 nathanw shifted into the proper positions, the three fields are simply added
445 1.1.2.2 nathanw together to form the result. This means that any integer portion of `zSig'
446 1.1.2.2 nathanw will be added into the exponent. Since a properly normalized significand
447 1.1.2.2 nathanw will have an integer portion equal to 1, the `zExp' input should be 1 less
448 1.1.2.2 nathanw than the desired result exponent whenever `zSig' is a complete, normalized
449 1.1.2.2 nathanw significand.
450 1.1.2.2 nathanw -------------------------------------------------------------------------------
451 1.1.2.2 nathanw */
452 1.1.2.2 nathanw INLINE float64 packFloat64( flag zSign, int16 zExp, bits64 zSig )
453 1.1.2.2 nathanw {
454 1.1.2.2 nathanw
455 1.1.2.2 nathanw return FLOAT64_MANGLE( ( ( (bits64) zSign )<<63 ) +
456 1.1.2.2 nathanw ( ( (bits64) zExp )<<52 ) + zSig );
457 1.1.2.2 nathanw
458 1.1.2.2 nathanw }
459 1.1.2.2 nathanw
460 1.1.2.2 nathanw /*
461 1.1.2.2 nathanw -------------------------------------------------------------------------------
462 1.1.2.2 nathanw Takes an abstract floating-point value having sign `zSign', exponent `zExp',
463 1.1.2.2 nathanw and significand `zSig', and returns the proper double-precision floating-
464 1.1.2.2 nathanw point value corresponding to the abstract input. Ordinarily, the abstract
465 1.1.2.2 nathanw value is simply rounded and packed into the double-precision format, with
466 1.1.2.2 nathanw the inexact exception raised if the abstract input cannot be represented
467 1.1.2.2 nathanw exactly. However, if the abstract value is too large, the overflow and
468 1.1.2.2 nathanw inexact exceptions are raised and an infinity or maximal finite value is
469 1.1.2.2 nathanw returned. If the abstract value is too small, the input value is rounded to
470 1.1.2.2 nathanw a subnormal number, and the underflow and inexact exceptions are raised if
471 1.1.2.2 nathanw the abstract input cannot be represented exactly as a subnormal double-
472 1.1.2.2 nathanw precision floating-point number.
473 1.1.2.2 nathanw The input significand `zSig' has its binary point between bits 62
474 1.1.2.2 nathanw and 61, which is 10 bits to the left of the usual location. This shifted
475 1.1.2.2 nathanw significand must be normalized or smaller. If `zSig' is not normalized,
476 1.1.2.2 nathanw `zExp' must be 0; in that case, the result returned is a subnormal number,
477 1.1.2.2 nathanw and it must not require rounding. In the usual case that `zSig' is
478 1.1.2.2 nathanw normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
479 1.1.2.2 nathanw The handling of underflow and overflow follows the IEC/IEEE Standard for
480 1.1.2.2 nathanw Binary Floating-Point Arithmetic.
481 1.1.2.2 nathanw -------------------------------------------------------------------------------
482 1.1.2.2 nathanw */
483 1.1.2.2 nathanw static float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
484 1.1.2.2 nathanw {
485 1.1.2.2 nathanw int8 roundingMode;
486 1.1.2.2 nathanw flag roundNearestEven;
487 1.1.2.2 nathanw int16 roundIncrement, roundBits;
488 1.1.2.2 nathanw flag isTiny;
489 1.1.2.2 nathanw
490 1.1.2.2 nathanw roundingMode = float_rounding_mode();
491 1.1.2.2 nathanw roundNearestEven = ( roundingMode == float_round_nearest_even );
492 1.1.2.2 nathanw roundIncrement = 0x200;
493 1.1.2.2 nathanw if ( ! roundNearestEven ) {
494 1.1.2.2 nathanw if ( roundingMode == float_round_to_zero ) {
495 1.1.2.2 nathanw roundIncrement = 0;
496 1.1.2.2 nathanw }
497 1.1.2.2 nathanw else {
498 1.1.2.2 nathanw roundIncrement = 0x3FF;
499 1.1.2.2 nathanw if ( zSign ) {
500 1.1.2.2 nathanw if ( roundingMode == float_round_up ) roundIncrement = 0;
501 1.1.2.2 nathanw }
502 1.1.2.2 nathanw else {
503 1.1.2.2 nathanw if ( roundingMode == float_round_down ) roundIncrement = 0;
504 1.1.2.2 nathanw }
505 1.1.2.2 nathanw }
506 1.1.2.2 nathanw }
507 1.1.2.2 nathanw roundBits = zSig & 0x3FF;
508 1.1.2.2 nathanw if ( 0x7FD <= (bits16) zExp ) {
509 1.1.2.2 nathanw if ( ( 0x7FD < zExp )
510 1.1.2.2 nathanw || ( ( zExp == 0x7FD )
511 1.1.2.2 nathanw && ( (sbits64) ( zSig + roundIncrement ) < 0 ) )
512 1.1.2.2 nathanw ) {
513 1.1.2.2 nathanw float_raise( float_flag_overflow | float_flag_inexact );
514 1.1.2.2 nathanw return FLOAT64_MANGLE(
515 1.1.2.2 nathanw FLOAT64_DEMANGLE(packFloat64( zSign, 0x7FF, 0 )) -
516 1.1.2.2 nathanw ( roundIncrement == 0 ));
517 1.1.2.2 nathanw }
518 1.1.2.2 nathanw if ( zExp < 0 ) {
519 1.1.2.2 nathanw isTiny =
520 1.1.2.2 nathanw ( float_detect_tininess == float_tininess_before_rounding )
521 1.1.2.2 nathanw || ( zExp < -1 )
522 1.1.2.2 nathanw || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
523 1.1.2.2 nathanw shift64RightJamming( zSig, - zExp, &zSig );
524 1.1.2.2 nathanw zExp = 0;
525 1.1.2.2 nathanw roundBits = zSig & 0x3FF;
526 1.1.2.2 nathanw if ( isTiny && roundBits ) float_raise( float_flag_underflow );
527 1.1.2.2 nathanw }
528 1.1.2.2 nathanw }
529 1.1.2.2 nathanw if ( roundBits ) float_set_inexact();
530 1.1.2.2 nathanw zSig = ( zSig + roundIncrement )>>10;
531 1.1.2.2 nathanw zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
532 1.1.2.2 nathanw if ( zSig == 0 ) zExp = 0;
533 1.1.2.2 nathanw return packFloat64( zSign, zExp, zSig );
534 1.1.2.2 nathanw
535 1.1.2.2 nathanw }
536 1.1.2.2 nathanw
537 1.1.2.2 nathanw /*
538 1.1.2.2 nathanw -------------------------------------------------------------------------------
539 1.1.2.2 nathanw Takes an abstract floating-point value having sign `zSign', exponent `zExp',
540 1.1.2.2 nathanw and significand `zSig', and returns the proper double-precision floating-
541 1.1.2.2 nathanw point value corresponding to the abstract input. This routine is just like
542 1.1.2.2 nathanw `roundAndPackFloat64' except that `zSig' does not have to be normalized.
543 1.1.2.2 nathanw Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
544 1.1.2.2 nathanw floating-point exponent.
545 1.1.2.2 nathanw -------------------------------------------------------------------------------
546 1.1.2.2 nathanw */
547 1.1.2.2 nathanw static float64
548 1.1.2.2 nathanw normalizeRoundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
549 1.1.2.2 nathanw {
550 1.1.2.2 nathanw int8 shiftCount;
551 1.1.2.2 nathanw
552 1.1.2.2 nathanw shiftCount = countLeadingZeros64( zSig ) - 1;
553 1.1.2.2 nathanw return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<<shiftCount );
554 1.1.2.2 nathanw
555 1.1.2.2 nathanw }
556 1.1.2.2 nathanw
557 1.1.2.2 nathanw #ifdef FLOATX80
558 1.1.2.2 nathanw
559 1.1.2.2 nathanw /*
560 1.1.2.2 nathanw -------------------------------------------------------------------------------
561 1.1.2.2 nathanw Returns the fraction bits of the extended double-precision floating-point
562 1.1.2.2 nathanw value `a'.
563 1.1.2.2 nathanw -------------------------------------------------------------------------------
564 1.1.2.2 nathanw */
565 1.1.2.2 nathanw INLINE bits64 extractFloatx80Frac( floatx80 a )
566 1.1.2.2 nathanw {
567 1.1.2.2 nathanw
568 1.1.2.2 nathanw return a.low;
569 1.1.2.2 nathanw
570 1.1.2.2 nathanw }
571 1.1.2.2 nathanw
572 1.1.2.2 nathanw /*
573 1.1.2.2 nathanw -------------------------------------------------------------------------------
574 1.1.2.2 nathanw Returns the exponent bits of the extended double-precision floating-point
575 1.1.2.2 nathanw value `a'.
576 1.1.2.2 nathanw -------------------------------------------------------------------------------
577 1.1.2.2 nathanw */
578 1.1.2.2 nathanw INLINE int32 extractFloatx80Exp( floatx80 a )
579 1.1.2.2 nathanw {
580 1.1.2.2 nathanw
581 1.1.2.2 nathanw return a.high & 0x7FFF;
582 1.1.2.2 nathanw
583 1.1.2.2 nathanw }
584 1.1.2.2 nathanw
585 1.1.2.2 nathanw /*
586 1.1.2.2 nathanw -------------------------------------------------------------------------------
587 1.1.2.2 nathanw Returns the sign bit of the extended double-precision floating-point value
588 1.1.2.2 nathanw `a'.
589 1.1.2.2 nathanw -------------------------------------------------------------------------------
590 1.1.2.2 nathanw */
591 1.1.2.2 nathanw INLINE flag extractFloatx80Sign( floatx80 a )
592 1.1.2.2 nathanw {
593 1.1.2.2 nathanw
594 1.1.2.2 nathanw return a.high>>15;
595 1.1.2.2 nathanw
596 1.1.2.2 nathanw }
597 1.1.2.2 nathanw
598 1.1.2.2 nathanw /*
599 1.1.2.2 nathanw -------------------------------------------------------------------------------
600 1.1.2.2 nathanw Normalizes the subnormal extended double-precision floating-point value
601 1.1.2.2 nathanw represented by the denormalized significand `aSig'. The normalized exponent
602 1.1.2.2 nathanw and significand are stored at the locations pointed to by `zExpPtr' and
603 1.1.2.2 nathanw `zSigPtr', respectively.
604 1.1.2.2 nathanw -------------------------------------------------------------------------------
605 1.1.2.2 nathanw */
606 1.1.2.2 nathanw static void
607 1.1.2.2 nathanw normalizeFloatx80Subnormal( bits64 aSig, int32 *zExpPtr, bits64 *zSigPtr )
608 1.1.2.2 nathanw {
609 1.1.2.2 nathanw int8 shiftCount;
610 1.1.2.2 nathanw
611 1.1.2.2 nathanw shiftCount = countLeadingZeros64( aSig );
612 1.1.2.2 nathanw *zSigPtr = aSig<<shiftCount;
613 1.1.2.2 nathanw *zExpPtr = 1 - shiftCount;
614 1.1.2.2 nathanw
615 1.1.2.2 nathanw }
616 1.1.2.2 nathanw
617 1.1.2.2 nathanw /*
618 1.1.2.2 nathanw -------------------------------------------------------------------------------
619 1.1.2.2 nathanw Packs the sign `zSign', exponent `zExp', and significand `zSig' into an
620 1.1.2.2 nathanw extended double-precision floating-point value, returning the result.
621 1.1.2.2 nathanw -------------------------------------------------------------------------------
622 1.1.2.2 nathanw */
623 1.1.2.2 nathanw INLINE floatx80 packFloatx80( flag zSign, int32 zExp, bits64 zSig )
624 1.1.2.2 nathanw {
625 1.1.2.2 nathanw floatx80 z;
626 1.1.2.2 nathanw
627 1.1.2.2 nathanw z.low = zSig;
628 1.1.2.2 nathanw z.high = ( ( (bits16) zSign )<<15 ) + zExp;
629 1.1.2.2 nathanw return z;
630 1.1.2.2 nathanw
631 1.1.2.2 nathanw }
632 1.1.2.2 nathanw
633 1.1.2.2 nathanw /*
634 1.1.2.2 nathanw -------------------------------------------------------------------------------
635 1.1.2.2 nathanw Takes an abstract floating-point value having sign `zSign', exponent `zExp',
636 1.1.2.2 nathanw and extended significand formed by the concatenation of `zSig0' and `zSig1',
637 1.1.2.2 nathanw and returns the proper extended double-precision floating-point value
638 1.1.2.2 nathanw corresponding to the abstract input. Ordinarily, the abstract value is
639 1.1.2.2 nathanw rounded and packed into the extended double-precision format, with the
640 1.1.2.2 nathanw inexact exception raised if the abstract input cannot be represented
641 1.1.2.2 nathanw exactly. However, if the abstract value is too large, the overflow and
642 1.1.2.2 nathanw inexact exceptions are raised and an infinity or maximal finite value is
643 1.1.2.2 nathanw returned. If the abstract value is too small, the input value is rounded to
644 1.1.2.2 nathanw a subnormal number, and the underflow and inexact exceptions are raised if
645 1.1.2.2 nathanw the abstract input cannot be represented exactly as a subnormal extended
646 1.1.2.2 nathanw double-precision floating-point number.
647 1.1.2.2 nathanw If `roundingPrecision' is 32 or 64, the result is rounded to the same
648 1.1.2.2 nathanw number of bits as single or double precision, respectively. Otherwise, the
649 1.1.2.2 nathanw result is rounded to the full precision of the extended double-precision
650 1.1.2.2 nathanw format.
651 1.1.2.2 nathanw The input significand must be normalized or smaller. If the input
652 1.1.2.2 nathanw significand is not normalized, `zExp' must be 0; in that case, the result
653 1.1.2.2 nathanw returned is a subnormal number, and it must not require rounding. The
654 1.1.2.2 nathanw handling of underflow and overflow follows the IEC/IEEE Standard for Binary
655 1.1.2.2 nathanw Floating-Point Arithmetic.
656 1.1.2.2 nathanw -------------------------------------------------------------------------------
657 1.1.2.2 nathanw */
658 1.1.2.2 nathanw static floatx80
659 1.1.2.2 nathanw roundAndPackFloatx80(
660 1.1.2.2 nathanw int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
661 1.1.2.2 nathanw )
662 1.1.2.2 nathanw {
663 1.1.2.2 nathanw int8 roundingMode;
664 1.1.2.2 nathanw flag roundNearestEven, increment, isTiny;
665 1.1.2.2 nathanw int64 roundIncrement, roundMask, roundBits;
666 1.1.2.2 nathanw
667 1.1.2.2 nathanw roundingMode = float_rounding_mode();
668 1.1.2.2 nathanw roundNearestEven = ( roundingMode == float_round_nearest_even );
669 1.1.2.2 nathanw if ( roundingPrecision == 80 ) goto precision80;
670 1.1.2.2 nathanw if ( roundingPrecision == 64 ) {
671 1.1.2.2 nathanw roundIncrement = LIT64( 0x0000000000000400 );
672 1.1.2.2 nathanw roundMask = LIT64( 0x00000000000007FF );
673 1.1.2.2 nathanw }
674 1.1.2.2 nathanw else if ( roundingPrecision == 32 ) {
675 1.1.2.2 nathanw roundIncrement = LIT64( 0x0000008000000000 );
676 1.1.2.2 nathanw roundMask = LIT64( 0x000000FFFFFFFFFF );
677 1.1.2.2 nathanw }
678 1.1.2.2 nathanw else {
679 1.1.2.2 nathanw goto precision80;
680 1.1.2.2 nathanw }
681 1.1.2.2 nathanw zSig0 |= ( zSig1 != 0 );
682 1.1.2.2 nathanw if ( ! roundNearestEven ) {
683 1.1.2.2 nathanw if ( roundingMode == float_round_to_zero ) {
684 1.1.2.2 nathanw roundIncrement = 0;
685 1.1.2.2 nathanw }
686 1.1.2.2 nathanw else {
687 1.1.2.2 nathanw roundIncrement = roundMask;
688 1.1.2.2 nathanw if ( zSign ) {
689 1.1.2.2 nathanw if ( roundingMode == float_round_up ) roundIncrement = 0;
690 1.1.2.2 nathanw }
691 1.1.2.2 nathanw else {
692 1.1.2.2 nathanw if ( roundingMode == float_round_down ) roundIncrement = 0;
693 1.1.2.2 nathanw }
694 1.1.2.2 nathanw }
695 1.1.2.2 nathanw }
696 1.1.2.2 nathanw roundBits = zSig0 & roundMask;
697 1.1.2.2 nathanw if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
698 1.1.2.2 nathanw if ( ( 0x7FFE < zExp )
699 1.1.2.2 nathanw || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
700 1.1.2.2 nathanw ) {
701 1.1.2.2 nathanw goto overflow;
702 1.1.2.2 nathanw }
703 1.1.2.2 nathanw if ( zExp <= 0 ) {
704 1.1.2.2 nathanw isTiny =
705 1.1.2.2 nathanw ( float_detect_tininess == float_tininess_before_rounding )
706 1.1.2.2 nathanw || ( zExp < 0 )
707 1.1.2.2 nathanw || ( zSig0 <= zSig0 + roundIncrement );
708 1.1.2.2 nathanw shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
709 1.1.2.2 nathanw zExp = 0;
710 1.1.2.2 nathanw roundBits = zSig0 & roundMask;
711 1.1.2.2 nathanw if ( isTiny && roundBits ) float_raise( float_flag_underflow );
712 1.1.2.2 nathanw if ( roundBits ) float_set_inexact();
713 1.1.2.2 nathanw zSig0 += roundIncrement;
714 1.1.2.2 nathanw if ( (sbits64) zSig0 < 0 ) zExp = 1;
715 1.1.2.2 nathanw roundIncrement = roundMask + 1;
716 1.1.2.2 nathanw if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
717 1.1.2.2 nathanw roundMask |= roundIncrement;
718 1.1.2.2 nathanw }
719 1.1.2.2 nathanw zSig0 &= ~ roundMask;
720 1.1.2.2 nathanw return packFloatx80( zSign, zExp, zSig0 );
721 1.1.2.2 nathanw }
722 1.1.2.2 nathanw }
723 1.1.2.2 nathanw if ( roundBits ) float_set_inexact();
724 1.1.2.2 nathanw zSig0 += roundIncrement;
725 1.1.2.2 nathanw if ( zSig0 < roundIncrement ) {
726 1.1.2.2 nathanw ++zExp;
727 1.1.2.2 nathanw zSig0 = LIT64( 0x8000000000000000 );
728 1.1.2.2 nathanw }
729 1.1.2.2 nathanw roundIncrement = roundMask + 1;
730 1.1.2.2 nathanw if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
731 1.1.2.2 nathanw roundMask |= roundIncrement;
732 1.1.2.2 nathanw }
733 1.1.2.2 nathanw zSig0 &= ~ roundMask;
734 1.1.2.2 nathanw if ( zSig0 == 0 ) zExp = 0;
735 1.1.2.2 nathanw return packFloatx80( zSign, zExp, zSig0 );
736 1.1.2.2 nathanw precision80:
737 1.1.2.2 nathanw increment = ( (sbits64) zSig1 < 0 );
738 1.1.2.2 nathanw if ( ! roundNearestEven ) {
739 1.1.2.2 nathanw if ( roundingMode == float_round_to_zero ) {
740 1.1.2.2 nathanw increment = 0;
741 1.1.2.2 nathanw }
742 1.1.2.2 nathanw else {
743 1.1.2.2 nathanw if ( zSign ) {
744 1.1.2.2 nathanw increment = ( roundingMode == float_round_down ) && zSig1;
745 1.1.2.2 nathanw }
746 1.1.2.2 nathanw else {
747 1.1.2.2 nathanw increment = ( roundingMode == float_round_up ) && zSig1;
748 1.1.2.2 nathanw }
749 1.1.2.2 nathanw }
750 1.1.2.2 nathanw }
751 1.1.2.2 nathanw if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
752 1.1.2.2 nathanw if ( ( 0x7FFE < zExp )
753 1.1.2.2 nathanw || ( ( zExp == 0x7FFE )
754 1.1.2.2 nathanw && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
755 1.1.2.2 nathanw && increment
756 1.1.2.2 nathanw )
757 1.1.2.2 nathanw ) {
758 1.1.2.2 nathanw roundMask = 0;
759 1.1.2.2 nathanw overflow:
760 1.1.2.2 nathanw float_raise( float_flag_overflow | float_flag_inexact );
761 1.1.2.2 nathanw if ( ( roundingMode == float_round_to_zero )
762 1.1.2.2 nathanw || ( zSign && ( roundingMode == float_round_up ) )
763 1.1.2.2 nathanw || ( ! zSign && ( roundingMode == float_round_down ) )
764 1.1.2.2 nathanw ) {
765 1.1.2.2 nathanw return packFloatx80( zSign, 0x7FFE, ~ roundMask );
766 1.1.2.2 nathanw }
767 1.1.2.2 nathanw return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
768 1.1.2.2 nathanw }
769 1.1.2.2 nathanw if ( zExp <= 0 ) {
770 1.1.2.2 nathanw isTiny =
771 1.1.2.2 nathanw ( float_detect_tininess == float_tininess_before_rounding )
772 1.1.2.2 nathanw || ( zExp < 0 )
773 1.1.2.2 nathanw || ! increment
774 1.1.2.2 nathanw || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
775 1.1.2.2 nathanw shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
776 1.1.2.2 nathanw zExp = 0;
777 1.1.2.2 nathanw if ( isTiny && zSig1 ) float_raise( float_flag_underflow );
778 1.1.2.2 nathanw if ( zSig1 ) float_set_inexact();
779 1.1.2.2 nathanw if ( roundNearestEven ) {
780 1.1.2.2 nathanw increment = ( (sbits64) zSig1 < 0 );
781 1.1.2.2 nathanw }
782 1.1.2.2 nathanw else {
783 1.1.2.2 nathanw if ( zSign ) {
784 1.1.2.2 nathanw increment = ( roundingMode == float_round_down ) && zSig1;
785 1.1.2.2 nathanw }
786 1.1.2.2 nathanw else {
787 1.1.2.2 nathanw increment = ( roundingMode == float_round_up ) && zSig1;
788 1.1.2.2 nathanw }
789 1.1.2.2 nathanw }
790 1.1.2.2 nathanw if ( increment ) {
791 1.1.2.2 nathanw ++zSig0;
792 1.1.2.2 nathanw zSig0 &=
793 1.1.2.2 nathanw ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven );
794 1.1.2.2 nathanw if ( (sbits64) zSig0 < 0 ) zExp = 1;
795 1.1.2.2 nathanw }
796 1.1.2.2 nathanw return packFloatx80( zSign, zExp, zSig0 );
797 1.1.2.2 nathanw }
798 1.1.2.2 nathanw }
799 1.1.2.2 nathanw if ( zSig1 ) float_set_inexact();
800 1.1.2.2 nathanw if ( increment ) {
801 1.1.2.2 nathanw ++zSig0;
802 1.1.2.2 nathanw if ( zSig0 == 0 ) {
803 1.1.2.2 nathanw ++zExp;
804 1.1.2.2 nathanw zSig0 = LIT64( 0x8000000000000000 );
805 1.1.2.2 nathanw }
806 1.1.2.2 nathanw else {
807 1.1.2.2 nathanw zSig0 &= ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven );
808 1.1.2.2 nathanw }
809 1.1.2.2 nathanw }
810 1.1.2.2 nathanw else {
811 1.1.2.2 nathanw if ( zSig0 == 0 ) zExp = 0;
812 1.1.2.2 nathanw }
813 1.1.2.2 nathanw return packFloatx80( zSign, zExp, zSig0 );
814 1.1.2.2 nathanw
815 1.1.2.2 nathanw }
816 1.1.2.2 nathanw
817 1.1.2.2 nathanw /*
818 1.1.2.2 nathanw -------------------------------------------------------------------------------
819 1.1.2.2 nathanw Takes an abstract floating-point value having sign `zSign', exponent
820 1.1.2.2 nathanw `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
821 1.1.2.2 nathanw and returns the proper extended double-precision floating-point value
822 1.1.2.2 nathanw corresponding to the abstract input. This routine is just like
823 1.1.2.2 nathanw `roundAndPackFloatx80' except that the input significand does not have to be
824 1.1.2.2 nathanw normalized.
825 1.1.2.2 nathanw -------------------------------------------------------------------------------
826 1.1.2.2 nathanw */
827 1.1.2.2 nathanw static floatx80
828 1.1.2.2 nathanw normalizeRoundAndPackFloatx80(
829 1.1.2.2 nathanw int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
830 1.1.2.2 nathanw )
831 1.1.2.2 nathanw {
832 1.1.2.2 nathanw int8 shiftCount;
833 1.1.2.2 nathanw
834 1.1.2.2 nathanw if ( zSig0 == 0 ) {
835 1.1.2.2 nathanw zSig0 = zSig1;
836 1.1.2.2 nathanw zSig1 = 0;
837 1.1.2.2 nathanw zExp -= 64;
838 1.1.2.2 nathanw }
839 1.1.2.2 nathanw shiftCount = countLeadingZeros64( zSig0 );
840 1.1.2.2 nathanw shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
841 1.1.2.2 nathanw zExp -= shiftCount;
842 1.1.2.2 nathanw return
843 1.1.2.2 nathanw roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 );
844 1.1.2.2 nathanw
845 1.1.2.2 nathanw }
846 1.1.2.2 nathanw
847 1.1.2.2 nathanw #endif
848 1.1.2.2 nathanw
849 1.1.2.2 nathanw #ifdef FLOAT128
850 1.1.2.2 nathanw
851 1.1.2.2 nathanw /*
852 1.1.2.2 nathanw -------------------------------------------------------------------------------
853 1.1.2.2 nathanw Returns the least-significant 64 fraction bits of the quadruple-precision
854 1.1.2.2 nathanw floating-point value `a'.
855 1.1.2.2 nathanw -------------------------------------------------------------------------------
856 1.1.2.2 nathanw */
857 1.1.2.2 nathanw INLINE bits64 extractFloat128Frac1( float128 a )
858 1.1.2.2 nathanw {
859 1.1.2.2 nathanw
860 1.1.2.2 nathanw return a.low;
861 1.1.2.2 nathanw
862 1.1.2.2 nathanw }
863 1.1.2.2 nathanw
864 1.1.2.2 nathanw /*
865 1.1.2.2 nathanw -------------------------------------------------------------------------------
866 1.1.2.2 nathanw Returns the most-significant 48 fraction bits of the quadruple-precision
867 1.1.2.2 nathanw floating-point value `a'.
868 1.1.2.2 nathanw -------------------------------------------------------------------------------
869 1.1.2.2 nathanw */
870 1.1.2.2 nathanw INLINE bits64 extractFloat128Frac0( float128 a )
871 1.1.2.2 nathanw {
872 1.1.2.2 nathanw
873 1.1.2.2 nathanw return a.high & LIT64( 0x0000FFFFFFFFFFFF );
874 1.1.2.2 nathanw
875 1.1.2.2 nathanw }
876 1.1.2.2 nathanw
877 1.1.2.2 nathanw /*
878 1.1.2.2 nathanw -------------------------------------------------------------------------------
879 1.1.2.2 nathanw Returns the exponent bits of the quadruple-precision floating-point value
880 1.1.2.2 nathanw `a'.
881 1.1.2.2 nathanw -------------------------------------------------------------------------------
882 1.1.2.2 nathanw */
883 1.1.2.2 nathanw INLINE int32 extractFloat128Exp( float128 a )
884 1.1.2.2 nathanw {
885 1.1.2.2 nathanw
886 1.1.2.2 nathanw return ( a.high>>48 ) & 0x7FFF;
887 1.1.2.2 nathanw
888 1.1.2.2 nathanw }
889 1.1.2.2 nathanw
890 1.1.2.2 nathanw /*
891 1.1.2.2 nathanw -------------------------------------------------------------------------------
892 1.1.2.2 nathanw Returns the sign bit of the quadruple-precision floating-point value `a'.
893 1.1.2.2 nathanw -------------------------------------------------------------------------------
894 1.1.2.2 nathanw */
895 1.1.2.2 nathanw INLINE flag extractFloat128Sign( float128 a )
896 1.1.2.2 nathanw {
897 1.1.2.2 nathanw
898 1.1.2.2 nathanw return a.high>>63;
899 1.1.2.2 nathanw
900 1.1.2.2 nathanw }
901 1.1.2.2 nathanw
902 1.1.2.2 nathanw /*
903 1.1.2.2 nathanw -------------------------------------------------------------------------------
904 1.1.2.2 nathanw Normalizes the subnormal quadruple-precision floating-point value
905 1.1.2.2 nathanw represented by the denormalized significand formed by the concatenation of
906 1.1.2.2 nathanw `aSig0' and `aSig1'. The normalized exponent is stored at the location
907 1.1.2.2 nathanw pointed to by `zExpPtr'. The most significant 49 bits of the normalized
908 1.1.2.2 nathanw significand are stored at the location pointed to by `zSig0Ptr', and the
909 1.1.2.2 nathanw least significant 64 bits of the normalized significand are stored at the
910 1.1.2.2 nathanw location pointed to by `zSig1Ptr'.
911 1.1.2.2 nathanw -------------------------------------------------------------------------------
912 1.1.2.2 nathanw */
913 1.1.2.2 nathanw static void
914 1.1.2.2 nathanw normalizeFloat128Subnormal(
915 1.1.2.2 nathanw bits64 aSig0,
916 1.1.2.2 nathanw bits64 aSig1,
917 1.1.2.2 nathanw int32 *zExpPtr,
918 1.1.2.2 nathanw bits64 *zSig0Ptr,
919 1.1.2.2 nathanw bits64 *zSig1Ptr
920 1.1.2.2 nathanw )
921 1.1.2.2 nathanw {
922 1.1.2.2 nathanw int8 shiftCount;
923 1.1.2.2 nathanw
924 1.1.2.2 nathanw if ( aSig0 == 0 ) {
925 1.1.2.2 nathanw shiftCount = countLeadingZeros64( aSig1 ) - 15;
926 1.1.2.2 nathanw if ( shiftCount < 0 ) {
927 1.1.2.2 nathanw *zSig0Ptr = aSig1>>( - shiftCount );
928 1.1.2.2 nathanw *zSig1Ptr = aSig1<<( shiftCount & 63 );
929 1.1.2.2 nathanw }
930 1.1.2.2 nathanw else {
931 1.1.2.2 nathanw *zSig0Ptr = aSig1<<shiftCount;
932 1.1.2.2 nathanw *zSig1Ptr = 0;
933 1.1.2.2 nathanw }
934 1.1.2.2 nathanw *zExpPtr = - shiftCount - 63;
935 1.1.2.2 nathanw }
936 1.1.2.2 nathanw else {
937 1.1.2.2 nathanw shiftCount = countLeadingZeros64( aSig0 ) - 15;
938 1.1.2.2 nathanw shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
939 1.1.2.2 nathanw *zExpPtr = 1 - shiftCount;
940 1.1.2.2 nathanw }
941 1.1.2.2 nathanw
942 1.1.2.2 nathanw }
943 1.1.2.2 nathanw
944 1.1.2.2 nathanw /*
945 1.1.2.2 nathanw -------------------------------------------------------------------------------
946 1.1.2.2 nathanw Packs the sign `zSign', the exponent `zExp', and the significand formed
947 1.1.2.2 nathanw by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
948 1.1.2.2 nathanw floating-point value, returning the result. After being shifted into the
949 1.1.2.2 nathanw proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
950 1.1.2.2 nathanw added together to form the most significant 32 bits of the result. This
951 1.1.2.2 nathanw means that any integer portion of `zSig0' will be added into the exponent.
952 1.1.2.2 nathanw Since a properly normalized significand will have an integer portion equal
953 1.1.2.2 nathanw to 1, the `zExp' input should be 1 less than the desired result exponent
954 1.1.2.2 nathanw whenever `zSig0' and `zSig1' concatenated form a complete, normalized
955 1.1.2.2 nathanw significand.
956 1.1.2.2 nathanw -------------------------------------------------------------------------------
957 1.1.2.2 nathanw */
958 1.1.2.2 nathanw INLINE float128
959 1.1.2.2 nathanw packFloat128( flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 )
960 1.1.2.2 nathanw {
961 1.1.2.2 nathanw float128 z;
962 1.1.2.2 nathanw
963 1.1.2.2 nathanw z.low = zSig1;
964 1.1.2.2 nathanw z.high = ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<48 ) + zSig0;
965 1.1.2.2 nathanw return z;
966 1.1.2.2 nathanw
967 1.1.2.2 nathanw }
968 1.1.2.2 nathanw
969 1.1.2.2 nathanw /*
970 1.1.2.2 nathanw -------------------------------------------------------------------------------
971 1.1.2.2 nathanw Takes an abstract floating-point value having sign `zSign', exponent `zExp',
972 1.1.2.2 nathanw and extended significand formed by the concatenation of `zSig0', `zSig1',
973 1.1.2.2 nathanw and `zSig2', and returns the proper quadruple-precision floating-point value
974 1.1.2.2 nathanw corresponding to the abstract input. Ordinarily, the abstract value is
975 1.1.2.2 nathanw simply rounded and packed into the quadruple-precision format, with the
976 1.1.2.2 nathanw inexact exception raised if the abstract input cannot be represented
977 1.1.2.2 nathanw exactly. However, if the abstract value is too large, the overflow and
978 1.1.2.2 nathanw inexact exceptions are raised and an infinity or maximal finite value is
979 1.1.2.2 nathanw returned. If the abstract value is too small, the input value is rounded to
980 1.1.2.2 nathanw a subnormal number, and the underflow and inexact exceptions are raised if
981 1.1.2.2 nathanw the abstract input cannot be represented exactly as a subnormal quadruple-
982 1.1.2.2 nathanw precision floating-point number.
983 1.1.2.2 nathanw The input significand must be normalized or smaller. If the input
984 1.1.2.2 nathanw significand is not normalized, `zExp' must be 0; in that case, the result
985 1.1.2.2 nathanw returned is a subnormal number, and it must not require rounding. In the
986 1.1.2.2 nathanw usual case that the input significand is normalized, `zExp' must be 1 less
987 1.1.2.2 nathanw than the ``true'' floating-point exponent. The handling of underflow and
988 1.1.2.2 nathanw overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
989 1.1.2.2 nathanw -------------------------------------------------------------------------------
990 1.1.2.2 nathanw */
991 1.1.2.2 nathanw static float128
992 1.1.2.2 nathanw roundAndPackFloat128(
993 1.1.2.2 nathanw flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1, bits64 zSig2 )
994 1.1.2.2 nathanw {
995 1.1.2.2 nathanw int8 roundingMode;
996 1.1.2.2 nathanw flag roundNearestEven, increment, isTiny;
997 1.1.2.2 nathanw
998 1.1.2.2 nathanw roundingMode = float_rounding_mode();
999 1.1.2.2 nathanw roundNearestEven = ( roundingMode == float_round_nearest_even );
1000 1.1.2.2 nathanw increment = ( (sbits64) zSig2 < 0 );
1001 1.1.2.2 nathanw if ( ! roundNearestEven ) {
1002 1.1.2.2 nathanw if ( roundingMode == float_round_to_zero ) {
1003 1.1.2.2 nathanw increment = 0;
1004 1.1.2.2 nathanw }
1005 1.1.2.2 nathanw else {
1006 1.1.2.2 nathanw if ( zSign ) {
1007 1.1.2.2 nathanw increment = ( roundingMode == float_round_down ) && zSig2;
1008 1.1.2.2 nathanw }
1009 1.1.2.2 nathanw else {
1010 1.1.2.2 nathanw increment = ( roundingMode == float_round_up ) && zSig2;
1011 1.1.2.2 nathanw }
1012 1.1.2.2 nathanw }
1013 1.1.2.2 nathanw }
1014 1.1.2.2 nathanw if ( 0x7FFD <= (bits32) zExp ) {
1015 1.1.2.2 nathanw if ( ( 0x7FFD < zExp )
1016 1.1.2.2 nathanw || ( ( zExp == 0x7FFD )
1017 1.1.2.2 nathanw && eq128(
1018 1.1.2.2 nathanw LIT64( 0x0001FFFFFFFFFFFF ),
1019 1.1.2.2 nathanw LIT64( 0xFFFFFFFFFFFFFFFF ),
1020 1.1.2.2 nathanw zSig0,
1021 1.1.2.2 nathanw zSig1
1022 1.1.2.2 nathanw )
1023 1.1.2.2 nathanw && increment
1024 1.1.2.2 nathanw )
1025 1.1.2.2 nathanw ) {
1026 1.1.2.2 nathanw float_raise( float_flag_overflow | float_flag_inexact );
1027 1.1.2.2 nathanw if ( ( roundingMode == float_round_to_zero )
1028 1.1.2.2 nathanw || ( zSign && ( roundingMode == float_round_up ) )
1029 1.1.2.2 nathanw || ( ! zSign && ( roundingMode == float_round_down ) )
1030 1.1.2.2 nathanw ) {
1031 1.1.2.2 nathanw return
1032 1.1.2.2 nathanw packFloat128(
1033 1.1.2.2 nathanw zSign,
1034 1.1.2.2 nathanw 0x7FFE,
1035 1.1.2.2 nathanw LIT64( 0x0000FFFFFFFFFFFF ),
1036 1.1.2.2 nathanw LIT64( 0xFFFFFFFFFFFFFFFF )
1037 1.1.2.2 nathanw );
1038 1.1.2.2 nathanw }
1039 1.1.2.2 nathanw return packFloat128( zSign, 0x7FFF, 0, 0 );
1040 1.1.2.2 nathanw }
1041 1.1.2.2 nathanw if ( zExp < 0 ) {
1042 1.1.2.2 nathanw isTiny =
1043 1.1.2.2 nathanw ( float_detect_tininess == float_tininess_before_rounding )
1044 1.1.2.2 nathanw || ( zExp < -1 )
1045 1.1.2.2 nathanw || ! increment
1046 1.1.2.2 nathanw || lt128(
1047 1.1.2.2 nathanw zSig0,
1048 1.1.2.2 nathanw zSig1,
1049 1.1.2.2 nathanw LIT64( 0x0001FFFFFFFFFFFF ),
1050 1.1.2.2 nathanw LIT64( 0xFFFFFFFFFFFFFFFF )
1051 1.1.2.2 nathanw );
1052 1.1.2.2 nathanw shift128ExtraRightJamming(
1053 1.1.2.2 nathanw zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
1054 1.1.2.2 nathanw zExp = 0;
1055 1.1.2.2 nathanw if ( isTiny && zSig2 ) float_raise( float_flag_underflow );
1056 1.1.2.2 nathanw if ( roundNearestEven ) {
1057 1.1.2.2 nathanw increment = ( (sbits64) zSig2 < 0 );
1058 1.1.2.2 nathanw }
1059 1.1.2.2 nathanw else {
1060 1.1.2.2 nathanw if ( zSign ) {
1061 1.1.2.2 nathanw increment = ( roundingMode == float_round_down ) && zSig2;
1062 1.1.2.2 nathanw }
1063 1.1.2.2 nathanw else {
1064 1.1.2.2 nathanw increment = ( roundingMode == float_round_up ) && zSig2;
1065 1.1.2.2 nathanw }
1066 1.1.2.2 nathanw }
1067 1.1.2.2 nathanw }
1068 1.1.2.2 nathanw }
1069 1.1.2.2 nathanw if ( zSig2 ) float_set_inexact();
1070 1.1.2.2 nathanw if ( increment ) {
1071 1.1.2.2 nathanw add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
1072 1.1.2.2 nathanw zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
1073 1.1.2.2 nathanw }
1074 1.1.2.2 nathanw else {
1075 1.1.2.2 nathanw if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
1076 1.1.2.2 nathanw }
1077 1.1.2.2 nathanw return packFloat128( zSign, zExp, zSig0, zSig1 );
1078 1.1.2.2 nathanw
1079 1.1.2.2 nathanw }
1080 1.1.2.2 nathanw
1081 1.1.2.2 nathanw /*
1082 1.1.2.2 nathanw -------------------------------------------------------------------------------
1083 1.1.2.2 nathanw Takes an abstract floating-point value having sign `zSign', exponent `zExp',
1084 1.1.2.2 nathanw and significand formed by the concatenation of `zSig0' and `zSig1', and
1085 1.1.2.2 nathanw returns the proper quadruple-precision floating-point value corresponding
1086 1.1.2.2 nathanw to the abstract input. This routine is just like `roundAndPackFloat128'
1087 1.1.2.2 nathanw except that the input significand has fewer bits and does not have to be
1088 1.1.2.2 nathanw normalized. In all cases, `zExp' must be 1 less than the ``true'' floating-
1089 1.1.2.2 nathanw point exponent.
1090 1.1.2.2 nathanw -------------------------------------------------------------------------------
1091 1.1.2.2 nathanw */
1092 1.1.2.2 nathanw static float128
1093 1.1.2.2 nathanw normalizeRoundAndPackFloat128(
1094 1.1.2.2 nathanw flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 )
1095 1.1.2.2 nathanw {
1096 1.1.2.2 nathanw int8 shiftCount;
1097 1.1.2.2 nathanw bits64 zSig2;
1098 1.1.2.2 nathanw
1099 1.1.2.2 nathanw if ( zSig0 == 0 ) {
1100 1.1.2.2 nathanw zSig0 = zSig1;
1101 1.1.2.2 nathanw zSig1 = 0;
1102 1.1.2.2 nathanw zExp -= 64;
1103 1.1.2.2 nathanw }
1104 1.1.2.2 nathanw shiftCount = countLeadingZeros64( zSig0 ) - 15;
1105 1.1.2.2 nathanw if ( 0 <= shiftCount ) {
1106 1.1.2.2 nathanw zSig2 = 0;
1107 1.1.2.2 nathanw shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
1108 1.1.2.2 nathanw }
1109 1.1.2.2 nathanw else {
1110 1.1.2.2 nathanw shift128ExtraRightJamming(
1111 1.1.2.2 nathanw zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
1112 1.1.2.2 nathanw }
1113 1.1.2.2 nathanw zExp -= shiftCount;
1114 1.1.2.2 nathanw return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
1115 1.1.2.2 nathanw
1116 1.1.2.2 nathanw }
1117 1.1.2.2 nathanw
1118 1.1.2.2 nathanw #endif
1119 1.1.2.2 nathanw
1120 1.1.2.2 nathanw /*
1121 1.1.2.2 nathanw -------------------------------------------------------------------------------
1122 1.1.2.2 nathanw Returns the result of converting the 32-bit two's complement integer `a'
1123 1.1.2.2 nathanw to the single-precision floating-point format. The conversion is performed
1124 1.1.2.2 nathanw according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1125 1.1.2.2 nathanw -------------------------------------------------------------------------------
1126 1.1.2.2 nathanw */
1127 1.1.2.2 nathanw float32 int32_to_float32( int32 a )
1128 1.1.2.2 nathanw {
1129 1.1.2.2 nathanw flag zSign;
1130 1.1.2.2 nathanw
1131 1.1.2.2 nathanw if ( a == 0 ) return 0;
1132 1.1.2.2 nathanw if ( a == (sbits32) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
1133 1.1.2.2 nathanw zSign = ( a < 0 );
1134 1.1.2.2 nathanw return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a );
1135 1.1.2.2 nathanw
1136 1.1.2.2 nathanw }
1137 1.1.2.2 nathanw
1138 1.1.2.2 nathanw /*
1139 1.1.2.2 nathanw -------------------------------------------------------------------------------
1140 1.1.2.2 nathanw Returns the result of converting the 32-bit two's complement integer `a'
1141 1.1.2.2 nathanw to the double-precision floating-point format. The conversion is performed
1142 1.1.2.2 nathanw according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1143 1.1.2.2 nathanw -------------------------------------------------------------------------------
1144 1.1.2.2 nathanw */
1145 1.1.2.2 nathanw float64 int32_to_float64( int32 a )
1146 1.1.2.2 nathanw {
1147 1.1.2.2 nathanw flag zSign;
1148 1.1.2.2 nathanw uint32 absA;
1149 1.1.2.2 nathanw int8 shiftCount;
1150 1.1.2.2 nathanw bits64 zSig;
1151 1.1.2.2 nathanw
1152 1.1.2.2 nathanw if ( a == 0 ) return 0;
1153 1.1.2.2 nathanw zSign = ( a < 0 );
1154 1.1.2.2 nathanw absA = zSign ? - a : a;
1155 1.1.2.2 nathanw shiftCount = countLeadingZeros32( absA ) + 21;
1156 1.1.2.2 nathanw zSig = absA;
1157 1.1.2.2 nathanw return packFloat64( zSign, 0x432 - shiftCount, zSig<<shiftCount );
1158 1.1.2.2 nathanw
1159 1.1.2.2 nathanw }
1160 1.1.2.2 nathanw
1161 1.1.2.2 nathanw #ifdef FLOATX80
1162 1.1.2.2 nathanw
1163 1.1.2.2 nathanw /*
1164 1.1.2.2 nathanw -------------------------------------------------------------------------------
1165 1.1.2.2 nathanw Returns the result of converting the 32-bit two's complement integer `a'
1166 1.1.2.2 nathanw to the extended double-precision floating-point format. The conversion
1167 1.1.2.2 nathanw is performed according to the IEC/IEEE Standard for Binary Floating-Point
1168 1.1.2.2 nathanw Arithmetic.
1169 1.1.2.2 nathanw -------------------------------------------------------------------------------
1170 1.1.2.2 nathanw */
1171 1.1.2.2 nathanw floatx80 int32_to_floatx80( int32 a )
1172 1.1.2.2 nathanw {
1173 1.1.2.2 nathanw flag zSign;
1174 1.1.2.2 nathanw uint32 absA;
1175 1.1.2.2 nathanw int8 shiftCount;
1176 1.1.2.2 nathanw bits64 zSig;
1177 1.1.2.2 nathanw
1178 1.1.2.2 nathanw if ( a == 0 ) return packFloatx80( 0, 0, 0 );
1179 1.1.2.2 nathanw zSign = ( a < 0 );
1180 1.1.2.2 nathanw absA = zSign ? - a : a;
1181 1.1.2.2 nathanw shiftCount = countLeadingZeros32( absA ) + 32;
1182 1.1.2.2 nathanw zSig = absA;
1183 1.1.2.2 nathanw return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
1184 1.1.2.2 nathanw
1185 1.1.2.2 nathanw }
1186 1.1.2.2 nathanw
1187 1.1.2.2 nathanw #endif
1188 1.1.2.2 nathanw
1189 1.1.2.2 nathanw #ifdef FLOAT128
1190 1.1.2.2 nathanw
1191 1.1.2.2 nathanw /*
1192 1.1.2.2 nathanw -------------------------------------------------------------------------------
1193 1.1.2.2 nathanw Returns the result of converting the 32-bit two's complement integer `a' to
1194 1.1.2.2 nathanw the quadruple-precision floating-point format. The conversion is performed
1195 1.1.2.2 nathanw according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1196 1.1.2.2 nathanw -------------------------------------------------------------------------------
1197 1.1.2.2 nathanw */
1198 1.1.2.2 nathanw float128 int32_to_float128( int32 a )
1199 1.1.2.2 nathanw {
1200 1.1.2.2 nathanw flag zSign;
1201 1.1.2.2 nathanw uint32 absA;
1202 1.1.2.2 nathanw int8 shiftCount;
1203 1.1.2.2 nathanw bits64 zSig0;
1204 1.1.2.2 nathanw
1205 1.1.2.2 nathanw if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
1206 1.1.2.2 nathanw zSign = ( a < 0 );
1207 1.1.2.2 nathanw absA = zSign ? - a : a;
1208 1.1.2.2 nathanw shiftCount = countLeadingZeros32( absA ) + 17;
1209 1.1.2.2 nathanw zSig0 = absA;
1210 1.1.2.2 nathanw return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
1211 1.1.2.2 nathanw
1212 1.1.2.2 nathanw }
1213 1.1.2.2 nathanw
1214 1.1.2.2 nathanw #endif
1215 1.1.2.2 nathanw
1216 1.1.2.2 nathanw #ifndef SOFTFLOAT_FOR_GCC /* __floatdi?f is in libgcc2.c */
1217 1.1.2.2 nathanw /*
1218 1.1.2.2 nathanw -------------------------------------------------------------------------------
1219 1.1.2.2 nathanw Returns the result of converting the 64-bit two's complement integer `a'
1220 1.1.2.2 nathanw to the single-precision floating-point format. The conversion is performed
1221 1.1.2.2 nathanw according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1222 1.1.2.2 nathanw -------------------------------------------------------------------------------
1223 1.1.2.2 nathanw */
1224 1.1.2.2 nathanw float32 int64_to_float32( int64 a )
1225 1.1.2.2 nathanw {
1226 1.1.2.2 nathanw flag zSign;
1227 1.1.2.2 nathanw uint64 absA;
1228 1.1.2.2 nathanw int8 shiftCount;
1229 1.1.2.2 nathanw
1230 1.1.2.2 nathanw if ( a == 0 ) return 0;
1231 1.1.2.2 nathanw zSign = ( a < 0 );
1232 1.1.2.2 nathanw absA = zSign ? - a : a;
1233 1.1.2.2 nathanw shiftCount = countLeadingZeros64( absA ) - 40;
1234 1.1.2.2 nathanw if ( 0 <= shiftCount ) {
1235 1.1.2.2 nathanw return packFloat32( zSign, 0x95 - shiftCount, absA<<shiftCount );
1236 1.1.2.2 nathanw }
1237 1.1.2.2 nathanw else {
1238 1.1.2.2 nathanw shiftCount += 7;
1239 1.1.2.2 nathanw if ( shiftCount < 0 ) {
1240 1.1.2.2 nathanw shift64RightJamming( absA, - shiftCount, &absA );
1241 1.1.2.2 nathanw }
1242 1.1.2.2 nathanw else {
1243 1.1.2.2 nathanw absA <<= shiftCount;
1244 1.1.2.2 nathanw }
1245 1.1.2.2 nathanw return roundAndPackFloat32( zSign, 0x9C - shiftCount, absA );
1246 1.1.2.2 nathanw }
1247 1.1.2.2 nathanw
1248 1.1.2.2 nathanw }
1249 1.1.2.2 nathanw
1250 1.1.2.2 nathanw /*
1251 1.1.2.2 nathanw -------------------------------------------------------------------------------
1252 1.1.2.2 nathanw Returns the result of converting the 64-bit two's complement integer `a'
1253 1.1.2.2 nathanw to the double-precision floating-point format. The conversion is performed
1254 1.1.2.2 nathanw according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1255 1.1.2.2 nathanw -------------------------------------------------------------------------------
1256 1.1.2.2 nathanw */
1257 1.1.2.2 nathanw float64 int64_to_float64( int64 a )
1258 1.1.2.2 nathanw {
1259 1.1.2.2 nathanw flag zSign;
1260 1.1.2.2 nathanw
1261 1.1.2.2 nathanw if ( a == 0 ) return 0;
1262 1.1.2.2 nathanw if ( a == (sbits64) LIT64( 0x8000000000000000 ) ) {
1263 1.1.2.2 nathanw return packFloat64( 1, 0x43E, 0 );
1264 1.1.2.2 nathanw }
1265 1.1.2.2 nathanw zSign = ( a < 0 );
1266 1.1.2.2 nathanw return normalizeRoundAndPackFloat64( zSign, 0x43C, zSign ? - a : a );
1267 1.1.2.2 nathanw
1268 1.1.2.2 nathanw }
1269 1.1.2.2 nathanw
1270 1.1.2.2 nathanw #ifdef FLOATX80
1271 1.1.2.2 nathanw
1272 1.1.2.2 nathanw /*
1273 1.1.2.2 nathanw -------------------------------------------------------------------------------
1274 1.1.2.2 nathanw Returns the result of converting the 64-bit two's complement integer `a'
1275 1.1.2.2 nathanw to the extended double-precision floating-point format. The conversion
1276 1.1.2.2 nathanw is performed according to the IEC/IEEE Standard for Binary Floating-Point
1277 1.1.2.2 nathanw Arithmetic.
1278 1.1.2.2 nathanw -------------------------------------------------------------------------------
1279 1.1.2.2 nathanw */
1280 1.1.2.2 nathanw floatx80 int64_to_floatx80( int64 a )
1281 1.1.2.2 nathanw {
1282 1.1.2.2 nathanw flag zSign;
1283 1.1.2.2 nathanw uint64 absA;
1284 1.1.2.2 nathanw int8 shiftCount;
1285 1.1.2.2 nathanw
1286 1.1.2.2 nathanw if ( a == 0 ) return packFloatx80( 0, 0, 0 );
1287 1.1.2.2 nathanw zSign = ( a < 0 );
1288 1.1.2.2 nathanw absA = zSign ? - a : a;
1289 1.1.2.2 nathanw shiftCount = countLeadingZeros64( absA );
1290 1.1.2.2 nathanw return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
1291 1.1.2.2 nathanw
1292 1.1.2.2 nathanw }
1293 1.1.2.2 nathanw
1294 1.1.2.2 nathanw #endif
1295 1.1.2.2 nathanw
1296 1.1.2.2 nathanw #ifdef FLOAT128
1297 1.1.2.2 nathanw
1298 1.1.2.2 nathanw /*
1299 1.1.2.2 nathanw -------------------------------------------------------------------------------
1300 1.1.2.2 nathanw Returns the result of converting the 64-bit two's complement integer `a' to
1301 1.1.2.2 nathanw the quadruple-precision floating-point format. The conversion is performed
1302 1.1.2.2 nathanw according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1303 1.1.2.2 nathanw -------------------------------------------------------------------------------
1304 1.1.2.2 nathanw */
1305 1.1.2.2 nathanw float128 int64_to_float128( int64 a )
1306 1.1.2.2 nathanw {
1307 1.1.2.2 nathanw flag zSign;
1308 1.1.2.2 nathanw uint64 absA;
1309 1.1.2.2 nathanw int8 shiftCount;
1310 1.1.2.2 nathanw int32 zExp;
1311 1.1.2.2 nathanw bits64 zSig0, zSig1;
1312 1.1.2.2 nathanw
1313 1.1.2.2 nathanw if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
1314 1.1.2.2 nathanw zSign = ( a < 0 );
1315 1.1.2.2 nathanw absA = zSign ? - a : a;
1316 1.1.2.2 nathanw shiftCount = countLeadingZeros64( absA ) + 49;
1317 1.1.2.2 nathanw zExp = 0x406E - shiftCount;
1318 1.1.2.2 nathanw if ( 64 <= shiftCount ) {
1319 1.1.2.2 nathanw zSig1 = 0;
1320 1.1.2.2 nathanw zSig0 = absA;
1321 1.1.2.2 nathanw shiftCount -= 64;
1322 1.1.2.2 nathanw }
1323 1.1.2.2 nathanw else {
1324 1.1.2.2 nathanw zSig1 = absA;
1325 1.1.2.2 nathanw zSig0 = 0;
1326 1.1.2.2 nathanw }
1327 1.1.2.2 nathanw shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
1328 1.1.2.2 nathanw return packFloat128( zSign, zExp, zSig0, zSig1 );
1329 1.1.2.2 nathanw
1330 1.1.2.2 nathanw }
1331 1.1.2.2 nathanw
1332 1.1.2.2 nathanw #endif
1333 1.1.2.2 nathanw #endif /* !SOFTFLOAT_FOR_GCC */
1334 1.1.2.2 nathanw
1335 1.1.2.2 nathanw #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
1336 1.1.2.2 nathanw /*
1337 1.1.2.2 nathanw -------------------------------------------------------------------------------
1338 1.1.2.2 nathanw Returns the result of converting the single-precision floating-point value
1339 1.1.2.2 nathanw `a' to the 32-bit two's complement integer format. The conversion is
1340 1.1.2.2 nathanw performed according to the IEC/IEEE Standard for Binary Floating-Point
1341 1.1.2.2 nathanw Arithmetic---which means in particular that the conversion is rounded
1342 1.1.2.2 nathanw according to the current rounding mode. If `a' is a NaN, the largest
1343 1.1.2.2 nathanw positive integer is returned. Otherwise, if the conversion overflows, the
1344 1.1.2.2 nathanw largest integer with the same sign as `a' is returned.
1345 1.1.2.2 nathanw -------------------------------------------------------------------------------
1346 1.1.2.2 nathanw */
1347 1.1.2.2 nathanw int32 float32_to_int32( float32 a )
1348 1.1.2.2 nathanw {
1349 1.1.2.2 nathanw flag aSign;
1350 1.1.2.2 nathanw int16 aExp, shiftCount;
1351 1.1.2.2 nathanw bits32 aSig;
1352 1.1.2.2 nathanw bits64 aSig64;
1353 1.1.2.2 nathanw
1354 1.1.2.2 nathanw aSig = extractFloat32Frac( a );
1355 1.1.2.2 nathanw aExp = extractFloat32Exp( a );
1356 1.1.2.2 nathanw aSign = extractFloat32Sign( a );
1357 1.1.2.2 nathanw if ( ( aExp == 0xFF ) && aSig ) aSign = 0;
1358 1.1.2.2 nathanw if ( aExp ) aSig |= 0x00800000;
1359 1.1.2.2 nathanw shiftCount = 0xAF - aExp;
1360 1.1.2.2 nathanw aSig64 = aSig;
1361 1.1.2.2 nathanw aSig64 <<= 32;
1362 1.1.2.2 nathanw if ( 0 < shiftCount ) shift64RightJamming( aSig64, shiftCount, &aSig64 );
1363 1.1.2.2 nathanw return roundAndPackInt32( aSign, aSig64 );
1364 1.1.2.2 nathanw
1365 1.1.2.2 nathanw }
1366 1.1.2.2 nathanw #endif /* !SOFTFLOAT_FOR_GCC */
1367 1.1.2.2 nathanw
1368 1.1.2.2 nathanw /*
1369 1.1.2.2 nathanw -------------------------------------------------------------------------------
1370 1.1.2.2 nathanw Returns the result of converting the single-precision floating-point value
1371 1.1.2.2 nathanw `a' to the 32-bit two's complement integer format. The conversion is
1372 1.1.2.2 nathanw performed according to the IEC/IEEE Standard for Binary Floating-Point
1373 1.1.2.2 nathanw Arithmetic, except that the conversion is always rounded toward zero.
1374 1.1.2.2 nathanw If `a' is a NaN, the largest positive integer is returned. Otherwise, if
1375 1.1.2.2 nathanw the conversion overflows, the largest integer with the same sign as `a' is
1376 1.1.2.2 nathanw returned.
1377 1.1.2.2 nathanw -------------------------------------------------------------------------------
1378 1.1.2.2 nathanw */
1379 1.1.2.2 nathanw int32 float32_to_int32_round_to_zero( float32 a )
1380 1.1.2.2 nathanw {
1381 1.1.2.2 nathanw flag aSign;
1382 1.1.2.2 nathanw int16 aExp, shiftCount;
1383 1.1.2.2 nathanw bits32 aSig;
1384 1.1.2.2 nathanw int32 z;
1385 1.1.2.2 nathanw
1386 1.1.2.2 nathanw aSig = extractFloat32Frac( a );
1387 1.1.2.2 nathanw aExp = extractFloat32Exp( a );
1388 1.1.2.2 nathanw aSign = extractFloat32Sign( a );
1389 1.1.2.2 nathanw shiftCount = aExp - 0x9E;
1390 1.1.2.2 nathanw if ( 0 <= shiftCount ) {
1391 1.1.2.2 nathanw if ( a != 0xCF000000 ) {
1392 1.1.2.2 nathanw float_raise( float_flag_invalid );
1393 1.1.2.2 nathanw if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
1394 1.1.2.2 nathanw }
1395 1.1.2.2 nathanw return (sbits32) 0x80000000;
1396 1.1.2.2 nathanw }
1397 1.1.2.2 nathanw else if ( aExp <= 0x7E ) {
1398 1.1.2.2 nathanw if ( aExp | aSig ) float_set_inexact();
1399 1.1.2.2 nathanw return 0;
1400 1.1.2.2 nathanw }
1401 1.1.2.2 nathanw aSig = ( aSig | 0x00800000 )<<8;
1402 1.1.2.2 nathanw z = aSig>>( - shiftCount );
1403 1.1.2.2 nathanw if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {
1404 1.1.2.2 nathanw float_set_inexact();
1405 1.1.2.2 nathanw }
1406 1.1.2.2 nathanw if ( aSign ) z = - z;
1407 1.1.2.2 nathanw return z;
1408 1.1.2.2 nathanw
1409 1.1.2.2 nathanw }
1410 1.1.2.2 nathanw
1411 1.1.2.2 nathanw #ifndef SOFTFLOAT_FOR_GCC /* __fix?fdi provided by libgcc2.c */
1412 1.1.2.2 nathanw /*
1413 1.1.2.2 nathanw -------------------------------------------------------------------------------
1414 1.1.2.2 nathanw Returns the result of converting the single-precision floating-point value
1415 1.1.2.2 nathanw `a' to the 64-bit two's complement integer format. The conversion is
1416 1.1.2.2 nathanw performed according to the IEC/IEEE Standard for Binary Floating-Point
1417 1.1.2.2 nathanw Arithmetic---which means in particular that the conversion is rounded
1418 1.1.2.2 nathanw according to the current rounding mode. If `a' is a NaN, the largest
1419 1.1.2.2 nathanw positive integer is returned. Otherwise, if the conversion overflows, the
1420 1.1.2.2 nathanw largest integer with the same sign as `a' is returned.
1421 1.1.2.2 nathanw -------------------------------------------------------------------------------
1422 1.1.2.2 nathanw */
1423 1.1.2.2 nathanw int64 float32_to_int64( float32 a )
1424 1.1.2.2 nathanw {
1425 1.1.2.2 nathanw flag aSign;
1426 1.1.2.2 nathanw int16 aExp, shiftCount;
1427 1.1.2.2 nathanw bits32 aSig;
1428 1.1.2.2 nathanw bits64 aSig64, aSigExtra;
1429 1.1.2.2 nathanw
1430 1.1.2.2 nathanw aSig = extractFloat32Frac( a );
1431 1.1.2.2 nathanw aExp = extractFloat32Exp( a );
1432 1.1.2.2 nathanw aSign = extractFloat32Sign( a );
1433 1.1.2.2 nathanw shiftCount = 0xBE - aExp;
1434 1.1.2.2 nathanw if ( shiftCount < 0 ) {
1435 1.1.2.2 nathanw float_raise( float_flag_invalid );
1436 1.1.2.2 nathanw if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
1437 1.1.2.2 nathanw return LIT64( 0x7FFFFFFFFFFFFFFF );
1438 1.1.2.2 nathanw }
1439 1.1.2.2 nathanw return (sbits64) LIT64( 0x8000000000000000 );
1440 1.1.2.2 nathanw }
1441 1.1.2.2 nathanw if ( aExp ) aSig |= 0x00800000;
1442 1.1.2.2 nathanw aSig64 = aSig;
1443 1.1.2.2 nathanw aSig64 <<= 40;
1444 1.1.2.2 nathanw shift64ExtraRightJamming( aSig64, 0, shiftCount, &aSig64, &aSigExtra );
1445 1.1.2.2 nathanw return roundAndPackInt64( aSign, aSig64, aSigExtra );
1446 1.1.2.2 nathanw
1447 1.1.2.2 nathanw }
1448 1.1.2.2 nathanw
1449 1.1.2.2 nathanw /*
1450 1.1.2.2 nathanw -------------------------------------------------------------------------------
1451 1.1.2.2 nathanw Returns the result of converting the single-precision floating-point value
1452 1.1.2.2 nathanw `a' to the 64-bit two's complement integer format. The conversion is
1453 1.1.2.2 nathanw performed according to the IEC/IEEE Standard for Binary Floating-Point
1454 1.1.2.2 nathanw Arithmetic, except that the conversion is always rounded toward zero. If
1455 1.1.2.2 nathanw `a' is a NaN, the largest positive integer is returned. Otherwise, if the
1456 1.1.2.2 nathanw conversion overflows, the largest integer with the same sign as `a' is
1457 1.1.2.2 nathanw returned.
1458 1.1.2.2 nathanw -------------------------------------------------------------------------------
1459 1.1.2.2 nathanw */
1460 1.1.2.2 nathanw int64 float32_to_int64_round_to_zero( float32 a )
1461 1.1.2.2 nathanw {
1462 1.1.2.2 nathanw flag aSign;
1463 1.1.2.2 nathanw int16 aExp, shiftCount;
1464 1.1.2.2 nathanw bits32 aSig;
1465 1.1.2.2 nathanw bits64 aSig64;
1466 1.1.2.2 nathanw int64 z;
1467 1.1.2.2 nathanw
1468 1.1.2.2 nathanw aSig = extractFloat32Frac( a );
1469 1.1.2.2 nathanw aExp = extractFloat32Exp( a );
1470 1.1.2.2 nathanw aSign = extractFloat32Sign( a );
1471 1.1.2.2 nathanw shiftCount = aExp - 0xBE;
1472 1.1.2.2 nathanw if ( 0 <= shiftCount ) {
1473 1.1.2.2 nathanw if ( a != 0xDF000000 ) {
1474 1.1.2.2 nathanw float_raise( float_flag_invalid );
1475 1.1.2.2 nathanw if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
1476 1.1.2.2 nathanw return LIT64( 0x7FFFFFFFFFFFFFFF );
1477 1.1.2.2 nathanw }
1478 1.1.2.2 nathanw }
1479 1.1.2.2 nathanw return (sbits64) LIT64( 0x8000000000000000 );
1480 1.1.2.2 nathanw }
1481 1.1.2.2 nathanw else if ( aExp <= 0x7E ) {
1482 1.1.2.2 nathanw if ( aExp | aSig ) float_set_inexact();
1483 1.1.2.2 nathanw return 0;
1484 1.1.2.2 nathanw }
1485 1.1.2.2 nathanw aSig64 = aSig | 0x00800000;
1486 1.1.2.2 nathanw aSig64 <<= 40;
1487 1.1.2.2 nathanw z = aSig64>>( - shiftCount );
1488 1.1.2.2 nathanw if ( (bits64) ( aSig64<<( shiftCount & 63 ) ) ) {
1489 1.1.2.2 nathanw float_set_inexact();
1490 1.1.2.2 nathanw }
1491 1.1.2.2 nathanw if ( aSign ) z = - z;
1492 1.1.2.2 nathanw return z;
1493 1.1.2.2 nathanw
1494 1.1.2.2 nathanw }
1495 1.1.2.2 nathanw #endif /* !SOFTFLOAT_FOR_GCC */
1496 1.1.2.2 nathanw
1497 1.1.2.2 nathanw /*
1498 1.1.2.2 nathanw -------------------------------------------------------------------------------
1499 1.1.2.2 nathanw Returns the result of converting the single-precision floating-point value
1500 1.1.2.2 nathanw `a' to the double-precision floating-point format. The conversion is
1501 1.1.2.2 nathanw performed according to the IEC/IEEE Standard for Binary Floating-Point
1502 1.1.2.2 nathanw Arithmetic.
1503 1.1.2.2 nathanw -------------------------------------------------------------------------------
1504 1.1.2.2 nathanw */
1505 1.1.2.2 nathanw float64 float32_to_float64( float32 a )
1506 1.1.2.2 nathanw {
1507 1.1.2.2 nathanw flag aSign;
1508 1.1.2.2 nathanw int16 aExp;
1509 1.1.2.2 nathanw bits32 aSig;
1510 1.1.2.2 nathanw
1511 1.1.2.2 nathanw aSig = extractFloat32Frac( a );
1512 1.1.2.2 nathanw aExp = extractFloat32Exp( a );
1513 1.1.2.2 nathanw aSign = extractFloat32Sign( a );
1514 1.1.2.2 nathanw if ( aExp == 0xFF ) {
1515 1.1.2.2 nathanw if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) );
1516 1.1.2.2 nathanw return packFloat64( aSign, 0x7FF, 0 );
1517 1.1.2.2 nathanw }
1518 1.1.2.2 nathanw if ( aExp == 0 ) {
1519 1.1.2.2 nathanw if ( aSig == 0 ) return packFloat64( aSign, 0, 0 );
1520 1.1.2.2 nathanw normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1521 1.1.2.2 nathanw --aExp;
1522 1.1.2.2 nathanw }
1523 1.1.2.2 nathanw return packFloat64( aSign, aExp + 0x380, ( (bits64) aSig )<<29 );
1524 1.1.2.2 nathanw
1525 1.1.2.2 nathanw }
1526 1.1.2.2 nathanw
1527 1.1.2.2 nathanw #ifdef FLOATX80
1528 1.1.2.2 nathanw
1529 1.1.2.2 nathanw /*
1530 1.1.2.2 nathanw -------------------------------------------------------------------------------
1531 1.1.2.2 nathanw Returns the result of converting the single-precision floating-point value
1532 1.1.2.2 nathanw `a' to the extended double-precision floating-point format. The conversion
1533 1.1.2.2 nathanw is performed according to the IEC/IEEE Standard for Binary Floating-Point
1534 1.1.2.2 nathanw Arithmetic.
1535 1.1.2.2 nathanw -------------------------------------------------------------------------------
1536 1.1.2.2 nathanw */
1537 1.1.2.2 nathanw floatx80 float32_to_floatx80( float32 a )
1538 1.1.2.2 nathanw {
1539 1.1.2.2 nathanw flag aSign;
1540 1.1.2.2 nathanw int16 aExp;
1541 1.1.2.2 nathanw bits32 aSig;
1542 1.1.2.2 nathanw
1543 1.1.2.2 nathanw aSig = extractFloat32Frac( a );
1544 1.1.2.2 nathanw aExp = extractFloat32Exp( a );
1545 1.1.2.2 nathanw aSign = extractFloat32Sign( a );
1546 1.1.2.2 nathanw if ( aExp == 0xFF ) {
1547 1.1.2.2 nathanw if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a ) );
1548 1.1.2.2 nathanw return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
1549 1.1.2.2 nathanw }
1550 1.1.2.2 nathanw if ( aExp == 0 ) {
1551 1.1.2.2 nathanw if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
1552 1.1.2.2 nathanw normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1553 1.1.2.2 nathanw }
1554 1.1.2.2 nathanw aSig |= 0x00800000;
1555 1.1.2.2 nathanw return packFloatx80( aSign, aExp + 0x3F80, ( (bits64) aSig )<<40 );
1556 1.1.2.2 nathanw
1557 1.1.2.2 nathanw }
1558 1.1.2.2 nathanw
1559 1.1.2.2 nathanw #endif
1560 1.1.2.2 nathanw
1561 1.1.2.2 nathanw #ifdef FLOAT128
1562 1.1.2.2 nathanw
1563 1.1.2.2 nathanw /*
1564 1.1.2.2 nathanw -------------------------------------------------------------------------------
1565 1.1.2.2 nathanw Returns the result of converting the single-precision floating-point value
1566 1.1.2.2 nathanw `a' to the double-precision floating-point format. The conversion is
1567 1.1.2.2 nathanw performed according to the IEC/IEEE Standard for Binary Floating-Point
1568 1.1.2.2 nathanw Arithmetic.
1569 1.1.2.2 nathanw -------------------------------------------------------------------------------
1570 1.1.2.2 nathanw */
1571 1.1.2.2 nathanw float128 float32_to_float128( float32 a )
1572 1.1.2.2 nathanw {
1573 1.1.2.2 nathanw flag aSign;
1574 1.1.2.2 nathanw int16 aExp;
1575 1.1.2.2 nathanw bits32 aSig;
1576 1.1.2.2 nathanw
1577 1.1.2.2 nathanw aSig = extractFloat32Frac( a );
1578 1.1.2.2 nathanw aExp = extractFloat32Exp( a );
1579 1.1.2.2 nathanw aSign = extractFloat32Sign( a );
1580 1.1.2.2 nathanw if ( aExp == 0xFF ) {
1581 1.1.2.2 nathanw if ( aSig ) return commonNaNToFloat128( float32ToCommonNaN( a ) );
1582 1.1.2.2 nathanw return packFloat128( aSign, 0x7FFF, 0, 0 );
1583 1.1.2.2 nathanw }
1584 1.1.2.2 nathanw if ( aExp == 0 ) {
1585 1.1.2.2 nathanw if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
1586 1.1.2.2 nathanw normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1587 1.1.2.2 nathanw --aExp;
1588 1.1.2.2 nathanw }
1589 1.1.2.2 nathanw return packFloat128( aSign, aExp + 0x3F80, ( (bits64) aSig )<<25, 0 );
1590 1.1.2.2 nathanw
1591 1.1.2.2 nathanw }
1592 1.1.2.2 nathanw
1593 1.1.2.2 nathanw #endif
1594 1.1.2.2 nathanw
1595 1.1.2.2 nathanw #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
1596 1.1.2.2 nathanw /*
1597 1.1.2.2 nathanw -------------------------------------------------------------------------------
1598 1.1.2.2 nathanw Rounds the single-precision floating-point value `a' to an integer, and
1599 1.1.2.2 nathanw returns the result as a single-precision floating-point value. The
1600 1.1.2.2 nathanw operation is performed according to the IEC/IEEE Standard for Binary
1601 1.1.2.2 nathanw Floating-Point Arithmetic.
1602 1.1.2.2 nathanw -------------------------------------------------------------------------------
1603 1.1.2.2 nathanw */
1604 1.1.2.2 nathanw float32 float32_round_to_int( float32 a )
1605 1.1.2.2 nathanw {
1606 1.1.2.2 nathanw flag aSign;
1607 1.1.2.2 nathanw int16 aExp;
1608 1.1.2.2 nathanw bits32 lastBitMask, roundBitsMask;
1609 1.1.2.2 nathanw int8 roundingMode;
1610 1.1.2.2 nathanw float32 z;
1611 1.1.2.2 nathanw
1612 1.1.2.2 nathanw aExp = extractFloat32Exp( a );
1613 1.1.2.2 nathanw if ( 0x96 <= aExp ) {
1614 1.1.2.2 nathanw if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
1615 1.1.2.2 nathanw return propagateFloat32NaN( a, a );
1616 1.1.2.2 nathanw }
1617 1.1.2.2 nathanw return a;
1618 1.1.2.2 nathanw }
1619 1.1.2.2 nathanw if ( aExp <= 0x7E ) {
1620 1.1.2.2 nathanw if ( (bits32) ( a<<1 ) == 0 ) return a;
1621 1.1.2.2 nathanw float_set_inexact();
1622 1.1.2.2 nathanw aSign = extractFloat32Sign( a );
1623 1.1.2.2 nathanw switch ( float_rounding_mode() ) {
1624 1.1.2.2 nathanw case float_round_nearest_even:
1625 1.1.2.2 nathanw if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
1626 1.1.2.2 nathanw return packFloat32( aSign, 0x7F, 0 );
1627 1.1.2.2 nathanw }
1628 1.1.2.2 nathanw break;
1629 1.1.2.2 nathanw case float_round_down:
1630 1.1.2.2 nathanw return aSign ? 0xBF800000 : 0;
1631 1.1.2.2 nathanw case float_round_up:
1632 1.1.2.2 nathanw return aSign ? 0x80000000 : 0x3F800000;
1633 1.1.2.2 nathanw }
1634 1.1.2.2 nathanw return packFloat32( aSign, 0, 0 );
1635 1.1.2.2 nathanw }
1636 1.1.2.2 nathanw lastBitMask = 1;
1637 1.1.2.2 nathanw lastBitMask <<= 0x96 - aExp;
1638 1.1.2.2 nathanw roundBitsMask = lastBitMask - 1;
1639 1.1.2.2 nathanw z = a;
1640 1.1.2.2 nathanw roundingMode = float_rounding_mode();
1641 1.1.2.2 nathanw if ( roundingMode == float_round_nearest_even ) {
1642 1.1.2.2 nathanw z += lastBitMask>>1;
1643 1.1.2.2 nathanw if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
1644 1.1.2.2 nathanw }
1645 1.1.2.2 nathanw else if ( roundingMode != float_round_to_zero ) {
1646 1.1.2.2 nathanw if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) {
1647 1.1.2.2 nathanw z += roundBitsMask;
1648 1.1.2.2 nathanw }
1649 1.1.2.2 nathanw }
1650 1.1.2.2 nathanw z &= ~ roundBitsMask;
1651 1.1.2.2 nathanw if ( z != a ) float_set_inexact();
1652 1.1.2.2 nathanw return z;
1653 1.1.2.2 nathanw
1654 1.1.2.2 nathanw }
1655 1.1.2.2 nathanw #endif /* !SOFTFLOAT_FOR_GCC */
1656 1.1.2.2 nathanw
1657 1.1.2.2 nathanw /*
1658 1.1.2.2 nathanw -------------------------------------------------------------------------------
1659 1.1.2.2 nathanw Returns the result of adding the absolute values of the single-precision
1660 1.1.2.2 nathanw floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
1661 1.1.2.2 nathanw before being returned. `zSign' is ignored if the result is a NaN.
1662 1.1.2.2 nathanw The addition is performed according to the IEC/IEEE Standard for Binary
1663 1.1.2.2 nathanw Floating-Point Arithmetic.
1664 1.1.2.2 nathanw -------------------------------------------------------------------------------
1665 1.1.2.2 nathanw */
1666 1.1.2.2 nathanw static float32 addFloat32Sigs( float32 a, float32 b, flag zSign )
1667 1.1.2.2 nathanw {
1668 1.1.2.2 nathanw int16 aExp, bExp, zExp;
1669 1.1.2.2 nathanw bits32 aSig, bSig, zSig;
1670 1.1.2.2 nathanw int16 expDiff;
1671 1.1.2.2 nathanw
1672 1.1.2.2 nathanw aSig = extractFloat32Frac( a );
1673 1.1.2.2 nathanw aExp = extractFloat32Exp( a );
1674 1.1.2.2 nathanw bSig = extractFloat32Frac( b );
1675 1.1.2.2 nathanw bExp = extractFloat32Exp( b );
1676 1.1.2.2 nathanw expDiff = aExp - bExp;
1677 1.1.2.2 nathanw aSig <<= 6;
1678 1.1.2.2 nathanw bSig <<= 6;
1679 1.1.2.2 nathanw if ( 0 < expDiff ) {
1680 1.1.2.2 nathanw if ( aExp == 0xFF ) {
1681 1.1.2.2 nathanw if ( aSig ) return propagateFloat32NaN( a, b );
1682 1.1.2.2 nathanw return a;
1683 1.1.2.2 nathanw }
1684 1.1.2.2 nathanw if ( bExp == 0 ) {
1685 1.1.2.2 nathanw --expDiff;
1686 1.1.2.2 nathanw }
1687 1.1.2.2 nathanw else {
1688 1.1.2.2 nathanw bSig |= 0x20000000;
1689 1.1.2.2 nathanw }
1690 1.1.2.2 nathanw shift32RightJamming( bSig, expDiff, &bSig );
1691 1.1.2.2 nathanw zExp = aExp;
1692 1.1.2.2 nathanw }
1693 1.1.2.2 nathanw else if ( expDiff < 0 ) {
1694 1.1.2.2 nathanw if ( bExp == 0xFF ) {
1695 1.1.2.2 nathanw if ( bSig ) return propagateFloat32NaN( a, b );
1696 1.1.2.2 nathanw return packFloat32( zSign, 0xFF, 0 );
1697 1.1.2.2 nathanw }
1698 1.1.2.2 nathanw if ( aExp == 0 ) {
1699 1.1.2.2 nathanw ++expDiff;
1700 1.1.2.2 nathanw }
1701 1.1.2.2 nathanw else {
1702 1.1.2.2 nathanw aSig |= 0x20000000;
1703 1.1.2.2 nathanw }
1704 1.1.2.2 nathanw shift32RightJamming( aSig, - expDiff, &aSig );
1705 1.1.2.2 nathanw zExp = bExp;
1706 1.1.2.2 nathanw }
1707 1.1.2.2 nathanw else {
1708 1.1.2.2 nathanw if ( aExp == 0xFF ) {
1709 1.1.2.2 nathanw if ( aSig | bSig ) return propagateFloat32NaN( a, b );
1710 1.1.2.2 nathanw return a;
1711 1.1.2.2 nathanw }
1712 1.1.2.2 nathanw if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
1713 1.1.2.2 nathanw zSig = 0x40000000 + aSig + bSig;
1714 1.1.2.2 nathanw zExp = aExp;
1715 1.1.2.2 nathanw goto roundAndPack;
1716 1.1.2.2 nathanw }
1717 1.1.2.2 nathanw aSig |= 0x20000000;
1718 1.1.2.2 nathanw zSig = ( aSig + bSig )<<1;
1719 1.1.2.2 nathanw --zExp;
1720 1.1.2.2 nathanw if ( (sbits32) zSig < 0 ) {
1721 1.1.2.2 nathanw zSig = aSig + bSig;
1722 1.1.2.2 nathanw ++zExp;
1723 1.1.2.2 nathanw }
1724 1.1.2.2 nathanw roundAndPack:
1725 1.1.2.2 nathanw return roundAndPackFloat32( zSign, zExp, zSig );
1726 1.1.2.2 nathanw
1727 1.1.2.2 nathanw }
1728 1.1.2.2 nathanw
1729 1.1.2.2 nathanw /*
1730 1.1.2.2 nathanw -------------------------------------------------------------------------------
1731 1.1.2.2 nathanw Returns the result of subtracting the absolute values of the single-
1732 1.1.2.2 nathanw precision floating-point values `a' and `b'. If `zSign' is 1, the
1733 1.1.2.2 nathanw difference is negated before being returned. `zSign' is ignored if the
1734 1.1.2.2 nathanw result is a NaN. The subtraction is performed according to the IEC/IEEE
1735 1.1.2.2 nathanw Standard for Binary Floating-Point Arithmetic.
1736 1.1.2.2 nathanw -------------------------------------------------------------------------------
1737 1.1.2.2 nathanw */
1738 1.1.2.2 nathanw static float32 subFloat32Sigs( float32 a, float32 b, flag zSign )
1739 1.1.2.2 nathanw {
1740 1.1.2.2 nathanw int16 aExp, bExp, zExp;
1741 1.1.2.2 nathanw bits32 aSig, bSig, zSig;
1742 1.1.2.2 nathanw int16 expDiff;
1743 1.1.2.2 nathanw
1744 1.1.2.2 nathanw aSig = extractFloat32Frac( a );
1745 1.1.2.2 nathanw aExp = extractFloat32Exp( a );
1746 1.1.2.2 nathanw bSig = extractFloat32Frac( b );
1747 1.1.2.2 nathanw bExp = extractFloat32Exp( b );
1748 1.1.2.2 nathanw expDiff = aExp - bExp;
1749 1.1.2.2 nathanw aSig <<= 7;
1750 1.1.2.2 nathanw bSig <<= 7;
1751 1.1.2.2 nathanw if ( 0 < expDiff ) goto aExpBigger;
1752 1.1.2.2 nathanw if ( expDiff < 0 ) goto bExpBigger;
1753 1.1.2.2 nathanw if ( aExp == 0xFF ) {
1754 1.1.2.2 nathanw if ( aSig | bSig ) return propagateFloat32NaN( a, b );
1755 1.1.2.2 nathanw float_raise( float_flag_invalid );
1756 1.1.2.2 nathanw return float32_default_nan;
1757 1.1.2.2 nathanw }
1758 1.1.2.2 nathanw if ( aExp == 0 ) {
1759 1.1.2.2 nathanw aExp = 1;
1760 1.1.2.2 nathanw bExp = 1;
1761 1.1.2.2 nathanw }
1762 1.1.2.2 nathanw if ( bSig < aSig ) goto aBigger;
1763 1.1.2.2 nathanw if ( aSig < bSig ) goto bBigger;
1764 1.1.2.2 nathanw return packFloat32( float_rounding_mode() == float_round_down, 0, 0 );
1765 1.1.2.2 nathanw bExpBigger:
1766 1.1.2.2 nathanw if ( bExp == 0xFF ) {
1767 1.1.2.2 nathanw if ( bSig ) return propagateFloat32NaN( a, b );
1768 1.1.2.2 nathanw return packFloat32( zSign ^ 1, 0xFF, 0 );
1769 1.1.2.2 nathanw }
1770 1.1.2.2 nathanw if ( aExp == 0 ) {
1771 1.1.2.2 nathanw ++expDiff;
1772 1.1.2.2 nathanw }
1773 1.1.2.2 nathanw else {
1774 1.1.2.2 nathanw aSig |= 0x40000000;
1775 1.1.2.2 nathanw }
1776 1.1.2.2 nathanw shift32RightJamming( aSig, - expDiff, &aSig );
1777 1.1.2.2 nathanw bSig |= 0x40000000;
1778 1.1.2.2 nathanw bBigger:
1779 1.1.2.2 nathanw zSig = bSig - aSig;
1780 1.1.2.2 nathanw zExp = bExp;
1781 1.1.2.2 nathanw zSign ^= 1;
1782 1.1.2.2 nathanw goto normalizeRoundAndPack;
1783 1.1.2.2 nathanw aExpBigger:
1784 1.1.2.2 nathanw if ( aExp == 0xFF ) {
1785 1.1.2.2 nathanw if ( aSig ) return propagateFloat32NaN( a, b );
1786 1.1.2.2 nathanw return a;
1787 1.1.2.2 nathanw }
1788 1.1.2.2 nathanw if ( bExp == 0 ) {
1789 1.1.2.2 nathanw --expDiff;
1790 1.1.2.2 nathanw }
1791 1.1.2.2 nathanw else {
1792 1.1.2.2 nathanw bSig |= 0x40000000;
1793 1.1.2.2 nathanw }
1794 1.1.2.2 nathanw shift32RightJamming( bSig, expDiff, &bSig );
1795 1.1.2.2 nathanw aSig |= 0x40000000;
1796 1.1.2.2 nathanw aBigger:
1797 1.1.2.2 nathanw zSig = aSig - bSig;
1798 1.1.2.2 nathanw zExp = aExp;
1799 1.1.2.2 nathanw normalizeRoundAndPack:
1800 1.1.2.2 nathanw --zExp;
1801 1.1.2.2 nathanw return normalizeRoundAndPackFloat32( zSign, zExp, zSig );
1802 1.1.2.2 nathanw
1803 1.1.2.2 nathanw }
1804 1.1.2.2 nathanw
1805 1.1.2.2 nathanw /*
1806 1.1.2.2 nathanw -------------------------------------------------------------------------------
1807 1.1.2.2 nathanw Returns the result of adding the single-precision floating-point values `a'
1808 1.1.2.2 nathanw and `b'. The operation is performed according to the IEC/IEEE Standard for
1809 1.1.2.2 nathanw Binary Floating-Point Arithmetic.
1810 1.1.2.2 nathanw -------------------------------------------------------------------------------
1811 1.1.2.2 nathanw */
1812 1.1.2.2 nathanw float32 float32_add( float32 a, float32 b )
1813 1.1.2.2 nathanw {
1814 1.1.2.2 nathanw flag aSign, bSign;
1815 1.1.2.2 nathanw
1816 1.1.2.2 nathanw aSign = extractFloat32Sign( a );
1817 1.1.2.2 nathanw bSign = extractFloat32Sign( b );
1818 1.1.2.2 nathanw if ( aSign == bSign ) {
1819 1.1.2.2 nathanw return addFloat32Sigs( a, b, aSign );
1820 1.1.2.2 nathanw }
1821 1.1.2.2 nathanw else {
1822 1.1.2.2 nathanw return subFloat32Sigs( a, b, aSign );
1823 1.1.2.2 nathanw }
1824 1.1.2.2 nathanw
1825 1.1.2.2 nathanw }
1826 1.1.2.2 nathanw
1827 1.1.2.2 nathanw /*
1828 1.1.2.2 nathanw -------------------------------------------------------------------------------
1829 1.1.2.2 nathanw Returns the result of subtracting the single-precision floating-point values
1830 1.1.2.2 nathanw `a' and `b'. The operation is performed according to the IEC/IEEE Standard
1831 1.1.2.2 nathanw for Binary Floating-Point Arithmetic.
1832 1.1.2.2 nathanw -------------------------------------------------------------------------------
1833 1.1.2.2 nathanw */
1834 1.1.2.2 nathanw float32 float32_sub( float32 a, float32 b )
1835 1.1.2.2 nathanw {
1836 1.1.2.2 nathanw flag aSign, bSign;
1837 1.1.2.2 nathanw
1838 1.1.2.2 nathanw aSign = extractFloat32Sign( a );
1839 1.1.2.2 nathanw bSign = extractFloat32Sign( b );
1840 1.1.2.2 nathanw if ( aSign == bSign ) {
1841 1.1.2.2 nathanw return subFloat32Sigs( a, b, aSign );
1842 1.1.2.2 nathanw }
1843 1.1.2.2 nathanw else {
1844 1.1.2.2 nathanw return addFloat32Sigs( a, b, aSign );
1845 1.1.2.2 nathanw }
1846 1.1.2.2 nathanw
1847 1.1.2.2 nathanw }
1848 1.1.2.2 nathanw
1849 1.1.2.2 nathanw /*
1850 1.1.2.2 nathanw -------------------------------------------------------------------------------
1851 1.1.2.2 nathanw Returns the result of multiplying the single-precision floating-point values
1852 1.1.2.2 nathanw `a' and `b'. The operation is performed according to the IEC/IEEE Standard
1853 1.1.2.2 nathanw for Binary Floating-Point Arithmetic.
1854 1.1.2.2 nathanw -------------------------------------------------------------------------------
1855 1.1.2.2 nathanw */
1856 1.1.2.2 nathanw float32 float32_mul( float32 a, float32 b )
1857 1.1.2.2 nathanw {
1858 1.1.2.2 nathanw flag aSign, bSign, zSign;
1859 1.1.2.2 nathanw int16 aExp, bExp, zExp;
1860 1.1.2.2 nathanw bits32 aSig, bSig;
1861 1.1.2.2 nathanw bits64 zSig64;
1862 1.1.2.2 nathanw bits32 zSig;
1863 1.1.2.2 nathanw
1864 1.1.2.2 nathanw aSig = extractFloat32Frac( a );
1865 1.1.2.2 nathanw aExp = extractFloat32Exp( a );
1866 1.1.2.2 nathanw aSign = extractFloat32Sign( a );
1867 1.1.2.2 nathanw bSig = extractFloat32Frac( b );
1868 1.1.2.2 nathanw bExp = extractFloat32Exp( b );
1869 1.1.2.2 nathanw bSign = extractFloat32Sign( b );
1870 1.1.2.2 nathanw zSign = aSign ^ bSign;
1871 1.1.2.2 nathanw if ( aExp == 0xFF ) {
1872 1.1.2.2 nathanw if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
1873 1.1.2.2 nathanw return propagateFloat32NaN( a, b );
1874 1.1.2.2 nathanw }
1875 1.1.2.2 nathanw if ( ( bExp | bSig ) == 0 ) {
1876 1.1.2.2 nathanw float_raise( float_flag_invalid );
1877 1.1.2.2 nathanw return float32_default_nan;
1878 1.1.2.2 nathanw }
1879 1.1.2.2 nathanw return packFloat32( zSign, 0xFF, 0 );
1880 1.1.2.2 nathanw }
1881 1.1.2.2 nathanw if ( bExp == 0xFF ) {
1882 1.1.2.2 nathanw if ( bSig ) return propagateFloat32NaN( a, b );
1883 1.1.2.2 nathanw if ( ( aExp | aSig ) == 0 ) {
1884 1.1.2.2 nathanw float_raise( float_flag_invalid );
1885 1.1.2.2 nathanw return float32_default_nan;
1886 1.1.2.2 nathanw }
1887 1.1.2.2 nathanw return packFloat32( zSign, 0xFF, 0 );
1888 1.1.2.2 nathanw }
1889 1.1.2.2 nathanw if ( aExp == 0 ) {
1890 1.1.2.2 nathanw if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
1891 1.1.2.2 nathanw normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1892 1.1.2.2 nathanw }
1893 1.1.2.2 nathanw if ( bExp == 0 ) {
1894 1.1.2.2 nathanw if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
1895 1.1.2.2 nathanw normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1896 1.1.2.2 nathanw }
1897 1.1.2.2 nathanw zExp = aExp + bExp - 0x7F;
1898 1.1.2.2 nathanw aSig = ( aSig | 0x00800000 )<<7;
1899 1.1.2.2 nathanw bSig = ( bSig | 0x00800000 )<<8;
1900 1.1.2.2 nathanw shift64RightJamming( ( (bits64) aSig ) * bSig, 32, &zSig64 );
1901 1.1.2.2 nathanw zSig = zSig64;
1902 1.1.2.2 nathanw if ( 0 <= (sbits32) ( zSig<<1 ) ) {
1903 1.1.2.2 nathanw zSig <<= 1;
1904 1.1.2.2 nathanw --zExp;
1905 1.1.2.2 nathanw }
1906 1.1.2.2 nathanw return roundAndPackFloat32( zSign, zExp, zSig );
1907 1.1.2.2 nathanw
1908 1.1.2.2 nathanw }
1909 1.1.2.2 nathanw
1910 1.1.2.2 nathanw /*
1911 1.1.2.2 nathanw -------------------------------------------------------------------------------
1912 1.1.2.2 nathanw Returns the result of dividing the single-precision floating-point value `a'
1913 1.1.2.2 nathanw by the corresponding value `b'. The operation is performed according to the
1914 1.1.2.2 nathanw IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1915 1.1.2.2 nathanw -------------------------------------------------------------------------------
1916 1.1.2.2 nathanw */
1917 1.1.2.2 nathanw float32 float32_div( float32 a, float32 b )
1918 1.1.2.2 nathanw {
1919 1.1.2.2 nathanw flag aSign, bSign, zSign;
1920 1.1.2.2 nathanw int16 aExp, bExp, zExp;
1921 1.1.2.2 nathanw bits32 aSig, bSig, zSig;
1922 1.1.2.2 nathanw
1923 1.1.2.2 nathanw aSig = extractFloat32Frac( a );
1924 1.1.2.2 nathanw aExp = extractFloat32Exp( a );
1925 1.1.2.2 nathanw aSign = extractFloat32Sign( a );
1926 1.1.2.2 nathanw bSig = extractFloat32Frac( b );
1927 1.1.2.2 nathanw bExp = extractFloat32Exp( b );
1928 1.1.2.2 nathanw bSign = extractFloat32Sign( b );
1929 1.1.2.2 nathanw zSign = aSign ^ bSign;
1930 1.1.2.2 nathanw if ( aExp == 0xFF ) {
1931 1.1.2.2 nathanw if ( aSig ) return propagateFloat32NaN( a, b );
1932 1.1.2.2 nathanw if ( bExp == 0xFF ) {
1933 1.1.2.2 nathanw if ( bSig ) return propagateFloat32NaN( a, b );
1934 1.1.2.2 nathanw float_raise( float_flag_invalid );
1935 1.1.2.2 nathanw return float32_default_nan;
1936 1.1.2.2 nathanw }
1937 1.1.2.2 nathanw return packFloat32( zSign, 0xFF, 0 );
1938 1.1.2.2 nathanw }
1939 1.1.2.2 nathanw if ( bExp == 0xFF ) {
1940 1.1.2.2 nathanw if ( bSig ) return propagateFloat32NaN( a, b );
1941 1.1.2.2 nathanw return packFloat32( zSign, 0, 0 );
1942 1.1.2.2 nathanw }
1943 1.1.2.2 nathanw if ( bExp == 0 ) {
1944 1.1.2.2 nathanw if ( bSig == 0 ) {
1945 1.1.2.2 nathanw if ( ( aExp | aSig ) == 0 ) {
1946 1.1.2.2 nathanw float_raise( float_flag_invalid );
1947 1.1.2.2 nathanw return float32_default_nan;
1948 1.1.2.2 nathanw }
1949 1.1.2.2 nathanw float_raise( float_flag_divbyzero );
1950 1.1.2.2 nathanw return packFloat32( zSign, 0xFF, 0 );
1951 1.1.2.2 nathanw }
1952 1.1.2.2 nathanw normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1953 1.1.2.2 nathanw }
1954 1.1.2.2 nathanw if ( aExp == 0 ) {
1955 1.1.2.2 nathanw if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
1956 1.1.2.2 nathanw normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1957 1.1.2.2 nathanw }
1958 1.1.2.2 nathanw zExp = aExp - bExp + 0x7D;
1959 1.1.2.2 nathanw aSig = ( aSig | 0x00800000 )<<7;
1960 1.1.2.2 nathanw bSig = ( bSig | 0x00800000 )<<8;
1961 1.1.2.2 nathanw if ( bSig <= ( aSig + aSig ) ) {
1962 1.1.2.2 nathanw aSig >>= 1;
1963 1.1.2.2 nathanw ++zExp;
1964 1.1.2.2 nathanw }
1965 1.1.2.2 nathanw zSig = ( ( (bits64) aSig )<<32 ) / bSig;
1966 1.1.2.2 nathanw if ( ( zSig & 0x3F ) == 0 ) {
1967 1.1.2.2 nathanw zSig |= ( (bits64) bSig * zSig != ( (bits64) aSig )<<32 );
1968 1.1.2.2 nathanw }
1969 1.1.2.2 nathanw return roundAndPackFloat32( zSign, zExp, zSig );
1970 1.1.2.2 nathanw
1971 1.1.2.2 nathanw }
1972 1.1.2.2 nathanw
1973 1.1.2.2 nathanw #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
1974 1.1.2.2 nathanw /*
1975 1.1.2.2 nathanw -------------------------------------------------------------------------------
1976 1.1.2.2 nathanw Returns the remainder of the single-precision floating-point value `a'
1977 1.1.2.2 nathanw with respect to the corresponding value `b'. The operation is performed
1978 1.1.2.2 nathanw according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1979 1.1.2.2 nathanw -------------------------------------------------------------------------------
1980 1.1.2.2 nathanw */
1981 1.1.2.2 nathanw float32 float32_rem( float32 a, float32 b )
1982 1.1.2.2 nathanw {
1983 1.1.2.2 nathanw flag aSign, bSign, zSign;
1984 1.1.2.2 nathanw int16 aExp, bExp, expDiff;
1985 1.1.2.2 nathanw bits32 aSig, bSig;
1986 1.1.2.2 nathanw bits32 q;
1987 1.1.2.2 nathanw bits64 aSig64, bSig64, q64;
1988 1.1.2.2 nathanw bits32 alternateASig;
1989 1.1.2.2 nathanw sbits32 sigMean;
1990 1.1.2.2 nathanw
1991 1.1.2.2 nathanw aSig = extractFloat32Frac( a );
1992 1.1.2.2 nathanw aExp = extractFloat32Exp( a );
1993 1.1.2.2 nathanw aSign = extractFloat32Sign( a );
1994 1.1.2.2 nathanw bSig = extractFloat32Frac( b );
1995 1.1.2.2 nathanw bExp = extractFloat32Exp( b );
1996 1.1.2.2 nathanw bSign = extractFloat32Sign( b );
1997 1.1.2.2 nathanw if ( aExp == 0xFF ) {
1998 1.1.2.2 nathanw if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
1999 1.1.2.2 nathanw return propagateFloat32NaN( a, b );
2000 1.1.2.2 nathanw }
2001 1.1.2.2 nathanw float_raise( float_flag_invalid );
2002 1.1.2.2 nathanw return float32_default_nan;
2003 1.1.2.2 nathanw }
2004 1.1.2.2 nathanw if ( bExp == 0xFF ) {
2005 1.1.2.2 nathanw if ( bSig ) return propagateFloat32NaN( a, b );
2006 1.1.2.2 nathanw return a;
2007 1.1.2.2 nathanw }
2008 1.1.2.2 nathanw if ( bExp == 0 ) {
2009 1.1.2.2 nathanw if ( bSig == 0 ) {
2010 1.1.2.2 nathanw float_raise( float_flag_invalid );
2011 1.1.2.2 nathanw return float32_default_nan;
2012 1.1.2.2 nathanw }
2013 1.1.2.2 nathanw normalizeFloat32Subnormal( bSig, &bExp, &bSig );
2014 1.1.2.2 nathanw }
2015 1.1.2.2 nathanw if ( aExp == 0 ) {
2016 1.1.2.2 nathanw if ( aSig == 0 ) return a;
2017 1.1.2.2 nathanw normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2018 1.1.2.2 nathanw }
2019 1.1.2.2 nathanw expDiff = aExp - bExp;
2020 1.1.2.2 nathanw aSig |= 0x00800000;
2021 1.1.2.2 nathanw bSig |= 0x00800000;
2022 1.1.2.2 nathanw if ( expDiff < 32 ) {
2023 1.1.2.2 nathanw aSig <<= 8;
2024 1.1.2.2 nathanw bSig <<= 8;
2025 1.1.2.2 nathanw if ( expDiff < 0 ) {
2026 1.1.2.2 nathanw if ( expDiff < -1 ) return a;
2027 1.1.2.2 nathanw aSig >>= 1;
2028 1.1.2.2 nathanw }
2029 1.1.2.2 nathanw q = ( bSig <= aSig );
2030 1.1.2.2 nathanw if ( q ) aSig -= bSig;
2031 1.1.2.2 nathanw if ( 0 < expDiff ) {
2032 1.1.2.2 nathanw q = ( ( (bits64) aSig )<<32 ) / bSig;
2033 1.1.2.2 nathanw q >>= 32 - expDiff;
2034 1.1.2.2 nathanw bSig >>= 2;
2035 1.1.2.2 nathanw aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
2036 1.1.2.2 nathanw }
2037 1.1.2.2 nathanw else {
2038 1.1.2.2 nathanw aSig >>= 2;
2039 1.1.2.2 nathanw bSig >>= 2;
2040 1.1.2.2 nathanw }
2041 1.1.2.2 nathanw }
2042 1.1.2.2 nathanw else {
2043 1.1.2.2 nathanw if ( bSig <= aSig ) aSig -= bSig;
2044 1.1.2.2 nathanw aSig64 = ( (bits64) aSig )<<40;
2045 1.1.2.2 nathanw bSig64 = ( (bits64) bSig )<<40;
2046 1.1.2.2 nathanw expDiff -= 64;
2047 1.1.2.2 nathanw while ( 0 < expDiff ) {
2048 1.1.2.2 nathanw q64 = estimateDiv128To64( aSig64, 0, bSig64 );
2049 1.1.2.2 nathanw q64 = ( 2 < q64 ) ? q64 - 2 : 0;
2050 1.1.2.2 nathanw aSig64 = - ( ( bSig * q64 )<<38 );
2051 1.1.2.2 nathanw expDiff -= 62;
2052 1.1.2.2 nathanw }
2053 1.1.2.2 nathanw expDiff += 64;
2054 1.1.2.2 nathanw q64 = estimateDiv128To64( aSig64, 0, bSig64 );
2055 1.1.2.2 nathanw q64 = ( 2 < q64 ) ? q64 - 2 : 0;
2056 1.1.2.2 nathanw q = q64>>( 64 - expDiff );
2057 1.1.2.2 nathanw bSig <<= 6;
2058 1.1.2.2 nathanw aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
2059 1.1.2.2 nathanw }
2060 1.1.2.2 nathanw do {
2061 1.1.2.2 nathanw alternateASig = aSig;
2062 1.1.2.2 nathanw ++q;
2063 1.1.2.2 nathanw aSig -= bSig;
2064 1.1.2.2 nathanw } while ( 0 <= (sbits32) aSig );
2065 1.1.2.2 nathanw sigMean = aSig + alternateASig;
2066 1.1.2.2 nathanw if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
2067 1.1.2.2 nathanw aSig = alternateASig;
2068 1.1.2.2 nathanw }
2069 1.1.2.2 nathanw zSign = ( (sbits32) aSig < 0 );
2070 1.1.2.2 nathanw if ( zSign ) aSig = - aSig;
2071 1.1.2.2 nathanw return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig );
2072 1.1.2.2 nathanw
2073 1.1.2.2 nathanw }
2074 1.1.2.2 nathanw #endif /* !SOFTFLOAT_FOR_GCC */
2075 1.1.2.2 nathanw
2076 1.1.2.2 nathanw #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
2077 1.1.2.2 nathanw /*
2078 1.1.2.2 nathanw -------------------------------------------------------------------------------
2079 1.1.2.2 nathanw Returns the square root of the single-precision floating-point value `a'.
2080 1.1.2.2 nathanw The operation is performed according to the IEC/IEEE Standard for Binary
2081 1.1.2.2 nathanw Floating-Point Arithmetic.
2082 1.1.2.2 nathanw -------------------------------------------------------------------------------
2083 1.1.2.2 nathanw */
2084 1.1.2.2 nathanw float32 float32_sqrt( float32 a )
2085 1.1.2.2 nathanw {
2086 1.1.2.2 nathanw flag aSign;
2087 1.1.2.2 nathanw int16 aExp, zExp;
2088 1.1.2.2 nathanw bits32 aSig, zSig;
2089 1.1.2.2 nathanw bits64 rem, term;
2090 1.1.2.2 nathanw
2091 1.1.2.2 nathanw aSig = extractFloat32Frac( a );
2092 1.1.2.2 nathanw aExp = extractFloat32Exp( a );
2093 1.1.2.2 nathanw aSign = extractFloat32Sign( a );
2094 1.1.2.2 nathanw if ( aExp == 0xFF ) {
2095 1.1.2.2 nathanw if ( aSig ) return propagateFloat32NaN( a, 0 );
2096 1.1.2.2 nathanw if ( ! aSign ) return a;
2097 1.1.2.2 nathanw float_raise( float_flag_invalid );
2098 1.1.2.2 nathanw return float32_default_nan;
2099 1.1.2.2 nathanw }
2100 1.1.2.2 nathanw if ( aSign ) {
2101 1.1.2.2 nathanw if ( ( aExp | aSig ) == 0 ) return a;
2102 1.1.2.2 nathanw float_raise( float_flag_invalid );
2103 1.1.2.2 nathanw return float32_default_nan;
2104 1.1.2.2 nathanw }
2105 1.1.2.2 nathanw if ( aExp == 0 ) {
2106 1.1.2.2 nathanw if ( aSig == 0 ) return 0;
2107 1.1.2.2 nathanw normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2108 1.1.2.2 nathanw }
2109 1.1.2.2 nathanw zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
2110 1.1.2.2 nathanw aSig = ( aSig | 0x00800000 )<<8;
2111 1.1.2.2 nathanw zSig = estimateSqrt32( aExp, aSig ) + 2;
2112 1.1.2.2 nathanw if ( ( zSig & 0x7F ) <= 5 ) {
2113 1.1.2.2 nathanw if ( zSig < 2 ) {
2114 1.1.2.2 nathanw zSig = 0x7FFFFFFF;
2115 1.1.2.2 nathanw goto roundAndPack;
2116 1.1.2.2 nathanw }
2117 1.1.2.2 nathanw aSig >>= aExp & 1;
2118 1.1.2.2 nathanw term = ( (bits64) zSig ) * zSig;
2119 1.1.2.2 nathanw rem = ( ( (bits64) aSig )<<32 ) - term;
2120 1.1.2.2 nathanw while ( (sbits64) rem < 0 ) {
2121 1.1.2.2 nathanw --zSig;
2122 1.1.2.2 nathanw rem += ( ( (bits64) zSig )<<1 ) | 1;
2123 1.1.2.2 nathanw }
2124 1.1.2.2 nathanw zSig |= ( rem != 0 );
2125 1.1.2.2 nathanw }
2126 1.1.2.2 nathanw shift32RightJamming( zSig, 1, &zSig );
2127 1.1.2.2 nathanw roundAndPack:
2128 1.1.2.2 nathanw return roundAndPackFloat32( 0, zExp, zSig );
2129 1.1.2.2 nathanw
2130 1.1.2.2 nathanw }
2131 1.1.2.2 nathanw #endif /* !SOFTFLOAT_FOR_GCC */
2132 1.1.2.2 nathanw
2133 1.1.2.2 nathanw /*
2134 1.1.2.2 nathanw -------------------------------------------------------------------------------
2135 1.1.2.2 nathanw Returns 1 if the single-precision floating-point value `a' is equal to
2136 1.1.2.2 nathanw the corresponding value `b', and 0 otherwise. The comparison is performed
2137 1.1.2.2 nathanw according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2138 1.1.2.2 nathanw -------------------------------------------------------------------------------
2139 1.1.2.2 nathanw */
2140 1.1.2.2 nathanw flag float32_eq( float32 a, float32 b )
2141 1.1.2.2 nathanw {
2142 1.1.2.2 nathanw
2143 1.1.2.2 nathanw if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2144 1.1.2.2 nathanw || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2145 1.1.2.2 nathanw ) {
2146 1.1.2.2 nathanw if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
2147 1.1.2.2 nathanw float_raise( float_flag_invalid );
2148 1.1.2.2 nathanw }
2149 1.1.2.2 nathanw return 0;
2150 1.1.2.2 nathanw }
2151 1.1.2.2 nathanw return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
2152 1.1.2.2 nathanw
2153 1.1.2.2 nathanw }
2154 1.1.2.2 nathanw
2155 1.1.2.2 nathanw /*
2156 1.1.2.2 nathanw -------------------------------------------------------------------------------
2157 1.1.2.2 nathanw Returns 1 if the single-precision floating-point value `a' is less than
2158 1.1.2.2 nathanw or equal to the corresponding value `b', and 0 otherwise. The comparison
2159 1.1.2.2 nathanw is performed according to the IEC/IEEE Standard for Binary Floating-Point
2160 1.1.2.2 nathanw Arithmetic.
2161 1.1.2.2 nathanw -------------------------------------------------------------------------------
2162 1.1.2.2 nathanw */
2163 1.1.2.2 nathanw flag float32_le( float32 a, float32 b )
2164 1.1.2.2 nathanw {
2165 1.1.2.2 nathanw flag aSign, bSign;
2166 1.1.2.2 nathanw
2167 1.1.2.2 nathanw if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2168 1.1.2.2 nathanw || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2169 1.1.2.2 nathanw ) {
2170 1.1.2.2 nathanw float_raise( float_flag_invalid );
2171 1.1.2.2 nathanw return 0;
2172 1.1.2.2 nathanw }
2173 1.1.2.2 nathanw aSign = extractFloat32Sign( a );
2174 1.1.2.2 nathanw bSign = extractFloat32Sign( b );
2175 1.1.2.2 nathanw if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
2176 1.1.2.2 nathanw return ( a == b ) || ( aSign ^ ( a < b ) );
2177 1.1.2.2 nathanw
2178 1.1.2.2 nathanw }
2179 1.1.2.2 nathanw
2180 1.1.2.2 nathanw /*
2181 1.1.2.2 nathanw -------------------------------------------------------------------------------
2182 1.1.2.2 nathanw Returns 1 if the single-precision floating-point value `a' is less than
2183 1.1.2.2 nathanw the corresponding value `b', and 0 otherwise. The comparison is performed
2184 1.1.2.2 nathanw according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2185 1.1.2.2 nathanw -------------------------------------------------------------------------------
2186 1.1.2.2 nathanw */
2187 1.1.2.2 nathanw flag float32_lt( float32 a, float32 b )
2188 1.1.2.2 nathanw {
2189 1.1.2.2 nathanw flag aSign, bSign;
2190 1.1.2.2 nathanw
2191 1.1.2.2 nathanw if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2192 1.1.2.2 nathanw || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2193 1.1.2.2 nathanw ) {
2194 1.1.2.2 nathanw float_raise( float_flag_invalid );
2195 1.1.2.2 nathanw return 0;
2196 1.1.2.2 nathanw }
2197 1.1.2.2 nathanw aSign = extractFloat32Sign( a );
2198 1.1.2.2 nathanw bSign = extractFloat32Sign( b );
2199 1.1.2.2 nathanw if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
2200 1.1.2.2 nathanw return ( a != b ) && ( aSign ^ ( a < b ) );
2201 1.1.2.2 nathanw
2202 1.1.2.2 nathanw }
2203 1.1.2.2 nathanw
2204 1.1.2.2 nathanw #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
2205 1.1.2.2 nathanw /*
2206 1.1.2.2 nathanw -------------------------------------------------------------------------------
2207 1.1.2.2 nathanw Returns 1 if the single-precision floating-point value `a' is equal to
2208 1.1.2.2 nathanw the corresponding value `b', and 0 otherwise. The invalid exception is
2209 1.1.2.2 nathanw raised if either operand is a NaN. Otherwise, the comparison is performed
2210 1.1.2.2 nathanw according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2211 1.1.2.2 nathanw -------------------------------------------------------------------------------
2212 1.1.2.2 nathanw */
2213 1.1.2.2 nathanw flag float32_eq_signaling( float32 a, float32 b )
2214 1.1.2.2 nathanw {
2215 1.1.2.2 nathanw
2216 1.1.2.2 nathanw if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2217 1.1.2.2 nathanw || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2218 1.1.2.2 nathanw ) {
2219 1.1.2.2 nathanw float_raise( float_flag_invalid );
2220 1.1.2.2 nathanw return 0;
2221 1.1.2.2 nathanw }
2222 1.1.2.2 nathanw return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
2223 1.1.2.2 nathanw
2224 1.1.2.2 nathanw }
2225 1.1.2.2 nathanw
2226 1.1.2.2 nathanw /*
2227 1.1.2.2 nathanw -------------------------------------------------------------------------------
2228 1.1.2.2 nathanw Returns 1 if the single-precision floating-point value `a' is less than or
2229 1.1.2.2 nathanw equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
2230 1.1.2.2 nathanw cause an exception. Otherwise, the comparison is performed according to the
2231 1.1.2.2 nathanw IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2232 1.1.2.2 nathanw -------------------------------------------------------------------------------
2233 1.1.2.2 nathanw */
2234 1.1.2.2 nathanw flag float32_le_quiet( float32 a, float32 b )
2235 1.1.2.2 nathanw {
2236 1.1.2.2 nathanw flag aSign, bSign;
2237 1.1.2.2 nathanw
2238 1.1.2.2 nathanw if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2239 1.1.2.2 nathanw || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2240 1.1.2.2 nathanw ) {
2241 1.1.2.2 nathanw if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
2242 1.1.2.2 nathanw float_raise( float_flag_invalid );
2243 1.1.2.2 nathanw }
2244 1.1.2.2 nathanw return 0;
2245 1.1.2.2 nathanw }
2246 1.1.2.2 nathanw aSign = extractFloat32Sign( a );
2247 1.1.2.2 nathanw bSign = extractFloat32Sign( b );
2248 1.1.2.2 nathanw if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
2249 1.1.2.2 nathanw return ( a == b ) || ( aSign ^ ( a < b ) );
2250 1.1.2.2 nathanw
2251 1.1.2.2 nathanw }
2252 1.1.2.2 nathanw
2253 1.1.2.2 nathanw /*
2254 1.1.2.2 nathanw -------------------------------------------------------------------------------
2255 1.1.2.2 nathanw Returns 1 if the single-precision floating-point value `a' is less than
2256 1.1.2.2 nathanw the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
2257 1.1.2.2 nathanw exception. Otherwise, the comparison is performed according to the IEC/IEEE
2258 1.1.2.2 nathanw Standard for Binary Floating-Point Arithmetic.
2259 1.1.2.2 nathanw -------------------------------------------------------------------------------
2260 1.1.2.2 nathanw */
2261 1.1.2.2 nathanw flag float32_lt_quiet( float32 a, float32 b )
2262 1.1.2.2 nathanw {
2263 1.1.2.2 nathanw flag aSign, bSign;
2264 1.1.2.2 nathanw
2265 1.1.2.2 nathanw if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2266 1.1.2.2 nathanw || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2267 1.1.2.2 nathanw ) {
2268 1.1.2.2 nathanw if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
2269 1.1.2.2 nathanw float_raise( float_flag_invalid );
2270 1.1.2.2 nathanw }
2271 1.1.2.2 nathanw return 0;
2272 1.1.2.2 nathanw }
2273 1.1.2.2 nathanw aSign = extractFloat32Sign( a );
2274 1.1.2.2 nathanw bSign = extractFloat32Sign( b );
2275 1.1.2.2 nathanw if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
2276 1.1.2.2 nathanw return ( a != b ) && ( aSign ^ ( a < b ) );
2277 1.1.2.2 nathanw
2278 1.1.2.2 nathanw }
2279 1.1.2.2 nathanw #endif /* !SOFTFLOAT_FOR_GCC */
2280 1.1.2.2 nathanw
2281 1.1.2.2 nathanw #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
2282 1.1.2.2 nathanw /*
2283 1.1.2.2 nathanw -------------------------------------------------------------------------------
2284 1.1.2.2 nathanw Returns the result of converting the double-precision floating-point value
2285 1.1.2.2 nathanw `a' to the 32-bit two's complement integer format. The conversion is
2286 1.1.2.2 nathanw performed according to the IEC/IEEE Standard for Binary Floating-Point
2287 1.1.2.2 nathanw Arithmetic---which means in particular that the conversion is rounded
2288 1.1.2.2 nathanw according to the current rounding mode. If `a' is a NaN, the largest
2289 1.1.2.2 nathanw positive integer is returned. Otherwise, if the conversion overflows, the
2290 1.1.2.2 nathanw largest integer with the same sign as `a' is returned.
2291 1.1.2.2 nathanw -------------------------------------------------------------------------------
2292 1.1.2.2 nathanw */
2293 1.1.2.2 nathanw int32 float64_to_int32( float64 a )
2294 1.1.2.2 nathanw {
2295 1.1.2.2 nathanw flag aSign;
2296 1.1.2.2 nathanw int16 aExp, shiftCount;
2297 1.1.2.2 nathanw bits64 aSig;
2298 1.1.2.2 nathanw
2299 1.1.2.2 nathanw aSig = extractFloat64Frac( a );
2300 1.1.2.2 nathanw aExp = extractFloat64Exp( a );
2301 1.1.2.2 nathanw aSign = extractFloat64Sign( a );
2302 1.1.2.2 nathanw if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
2303 1.1.2.2 nathanw if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
2304 1.1.2.2 nathanw shiftCount = 0x42C - aExp;
2305 1.1.2.2 nathanw if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
2306 1.1.2.2 nathanw return roundAndPackInt32( aSign, aSig );
2307 1.1.2.2 nathanw
2308 1.1.2.2 nathanw }
2309 1.1.2.2 nathanw #endif /* !SOFTFLOAT_FOR_GCC */
2310 1.1.2.2 nathanw
2311 1.1.2.2 nathanw /*
2312 1.1.2.2 nathanw -------------------------------------------------------------------------------
2313 1.1.2.2 nathanw Returns the result of converting the double-precision floating-point value
2314 1.1.2.2 nathanw `a' to the 32-bit two's complement integer format. The conversion is
2315 1.1.2.2 nathanw performed according to the IEC/IEEE Standard for Binary Floating-Point
2316 1.1.2.2 nathanw Arithmetic, except that the conversion is always rounded toward zero.
2317 1.1.2.2 nathanw If `a' is a NaN, the largest positive integer is returned. Otherwise, if
2318 1.1.2.2 nathanw the conversion overflows, the largest integer with the same sign as `a' is
2319 1.1.2.2 nathanw returned.
2320 1.1.2.2 nathanw -------------------------------------------------------------------------------
2321 1.1.2.2 nathanw */
2322 1.1.2.2 nathanw int32 float64_to_int32_round_to_zero( float64 a )
2323 1.1.2.2 nathanw {
2324 1.1.2.2 nathanw flag aSign;
2325 1.1.2.2 nathanw int16 aExp, shiftCount;
2326 1.1.2.2 nathanw bits64 aSig, savedASig;
2327 1.1.2.2 nathanw int32 z;
2328 1.1.2.2 nathanw
2329 1.1.2.2 nathanw aSig = extractFloat64Frac( a );
2330 1.1.2.2 nathanw aExp = extractFloat64Exp( a );
2331 1.1.2.2 nathanw aSign = extractFloat64Sign( a );
2332 1.1.2.2 nathanw if ( 0x41E < aExp ) {
2333 1.1.2.2 nathanw if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
2334 1.1.2.2 nathanw goto invalid;
2335 1.1.2.2 nathanw }
2336 1.1.2.2 nathanw else if ( aExp < 0x3FF ) {
2337 1.1.2.2 nathanw if ( aExp || aSig ) float_set_inexact();
2338 1.1.2.2 nathanw return 0;
2339 1.1.2.2 nathanw }
2340 1.1.2.2 nathanw aSig |= LIT64( 0x0010000000000000 );
2341 1.1.2.2 nathanw shiftCount = 0x433 - aExp;
2342 1.1.2.2 nathanw savedASig = aSig;
2343 1.1.2.2 nathanw aSig >>= shiftCount;
2344 1.1.2.2 nathanw z = aSig;
2345 1.1.2.2 nathanw if ( aSign ) z = - z;
2346 1.1.2.2 nathanw if ( ( z < 0 ) ^ aSign ) {
2347 1.1.2.2 nathanw invalid:
2348 1.1.2.2 nathanw float_raise( float_flag_invalid );
2349 1.1.2.2 nathanw return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
2350 1.1.2.2 nathanw }
2351 1.1.2.2 nathanw if ( ( aSig<<shiftCount ) != savedASig ) {
2352 1.1.2.2 nathanw float_set_inexact();
2353 1.1.2.2 nathanw }
2354 1.1.2.2 nathanw return z;
2355 1.1.2.2 nathanw
2356 1.1.2.2 nathanw }
2357 1.1.2.2 nathanw
2358 1.1.2.2 nathanw #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
2359 1.1.2.2 nathanw /*
2360 1.1.2.2 nathanw -------------------------------------------------------------------------------
2361 1.1.2.2 nathanw Returns the result of converting the double-precision floating-point value
2362 1.1.2.2 nathanw `a' to the 64-bit two's complement integer format. The conversion is
2363 1.1.2.2 nathanw performed according to the IEC/IEEE Standard for Binary Floating-Point
2364 1.1.2.2 nathanw Arithmetic---which means in particular that the conversion is rounded
2365 1.1.2.2 nathanw according to the current rounding mode. If `a' is a NaN, the largest
2366 1.1.2.2 nathanw positive integer is returned. Otherwise, if the conversion overflows, the
2367 1.1.2.2 nathanw largest integer with the same sign as `a' is returned.
2368 1.1.2.2 nathanw -------------------------------------------------------------------------------
2369 1.1.2.2 nathanw */
2370 1.1.2.2 nathanw int64 float64_to_int64( float64 a )
2371 1.1.2.2 nathanw {
2372 1.1.2.2 nathanw flag aSign;
2373 1.1.2.2 nathanw int16 aExp, shiftCount;
2374 1.1.2.2 nathanw bits64 aSig, aSigExtra;
2375 1.1.2.2 nathanw
2376 1.1.2.2 nathanw aSig = extractFloat64Frac( a );
2377 1.1.2.2 nathanw aExp = extractFloat64Exp( a );
2378 1.1.2.2 nathanw aSign = extractFloat64Sign( a );
2379 1.1.2.2 nathanw if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
2380 1.1.2.2 nathanw shiftCount = 0x433 - aExp;
2381 1.1.2.2 nathanw if ( shiftCount <= 0 ) {
2382 1.1.2.2 nathanw if ( 0x43E < aExp ) {
2383 1.1.2.2 nathanw float_raise( float_flag_invalid );
2384 1.1.2.2 nathanw if ( ! aSign
2385 1.1.2.2 nathanw || ( ( aExp == 0x7FF )
2386 1.1.2.2 nathanw && ( aSig != LIT64( 0x0010000000000000 ) ) )
2387 1.1.2.2 nathanw ) {
2388 1.1.2.2 nathanw return LIT64( 0x7FFFFFFFFFFFFFFF );
2389 1.1.2.2 nathanw }
2390 1.1.2.2 nathanw return (sbits64) LIT64( 0x8000000000000000 );
2391 1.1.2.2 nathanw }
2392 1.1.2.2 nathanw aSigExtra = 0;
2393 1.1.2.2 nathanw aSig <<= - shiftCount;
2394 1.1.2.2 nathanw }
2395 1.1.2.2 nathanw else {
2396 1.1.2.2 nathanw shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
2397 1.1.2.2 nathanw }
2398 1.1.2.2 nathanw return roundAndPackInt64( aSign, aSig, aSigExtra );
2399 1.1.2.2 nathanw
2400 1.1.2.2 nathanw }
2401 1.1.2.2 nathanw
2402 1.1.2.2 nathanw /*
2403 1.1.2.2 nathanw -------------------------------------------------------------------------------
2404 1.1.2.2 nathanw Returns the result of converting the double-precision floating-point value
2405 1.1.2.2 nathanw `a' to the 64-bit two's complement integer format. The conversion is
2406 1.1.2.2 nathanw performed according to the IEC/IEEE Standard for Binary Floating-Point
2407 1.1.2.2 nathanw Arithmetic, except that the conversion is always rounded toward zero.
2408 1.1.2.2 nathanw If `a' is a NaN, the largest positive integer is returned. Otherwise, if
2409 1.1.2.2 nathanw the conversion overflows, the largest integer with the same sign as `a' is
2410 1.1.2.2 nathanw returned.
2411 1.1.2.2 nathanw -------------------------------------------------------------------------------
2412 1.1.2.2 nathanw */
2413 1.1.2.2 nathanw int64 float64_to_int64_round_to_zero( float64 a )
2414 1.1.2.2 nathanw {
2415 1.1.2.2 nathanw flag aSign;
2416 1.1.2.2 nathanw int16 aExp, shiftCount;
2417 1.1.2.2 nathanw bits64 aSig;
2418 1.1.2.2 nathanw int64 z;
2419 1.1.2.2 nathanw
2420 1.1.2.2 nathanw aSig = extractFloat64Frac( a );
2421 1.1.2.2 nathanw aExp = extractFloat64Exp( a );
2422 1.1.2.2 nathanw aSign = extractFloat64Sign( a );
2423 1.1.2.2 nathanw if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
2424 1.1.2.2 nathanw shiftCount = aExp - 0x433;
2425 1.1.2.2 nathanw if ( 0 <= shiftCount ) {
2426 1.1.2.2 nathanw if ( 0x43E <= aExp ) {
2427 1.1.2.2 nathanw if ( a != LIT64( 0xC3E0000000000000 ) ) {
2428 1.1.2.2 nathanw float_raise( float_flag_invalid );
2429 1.1.2.2 nathanw if ( ! aSign
2430 1.1.2.2 nathanw || ( ( aExp == 0x7FF )
2431 1.1.2.2 nathanw && ( aSig != LIT64( 0x0010000000000000 ) ) )
2432 1.1.2.2 nathanw ) {
2433 1.1.2.2 nathanw return LIT64( 0x7FFFFFFFFFFFFFFF );
2434 1.1.2.2 nathanw }
2435 1.1.2.2 nathanw }
2436 1.1.2.2 nathanw return (sbits64) LIT64( 0x8000000000000000 );
2437 1.1.2.2 nathanw }
2438 1.1.2.2 nathanw z = aSig<<shiftCount;
2439 1.1.2.2 nathanw }
2440 1.1.2.2 nathanw else {
2441 1.1.2.2 nathanw if ( aExp < 0x3FE ) {
2442 1.1.2.2 nathanw if ( aExp | aSig ) float_set_inexact();
2443 1.1.2.2 nathanw return 0;
2444 1.1.2.2 nathanw }
2445 1.1.2.2 nathanw z = aSig>>( - shiftCount );
2446 1.1.2.2 nathanw if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) {
2447 1.1.2.2 nathanw float_set_inexact();
2448 1.1.2.2 nathanw }
2449 1.1.2.2 nathanw }
2450 1.1.2.2 nathanw if ( aSign ) z = - z;
2451 1.1.2.2 nathanw return z;
2452 1.1.2.2 nathanw
2453 1.1.2.2 nathanw }
2454 1.1.2.2 nathanw #endif /* !SOFTFLOAT_FOR_GCC */
2455 1.1.2.2 nathanw
2456 1.1.2.2 nathanw /*
2457 1.1.2.2 nathanw -------------------------------------------------------------------------------
2458 1.1.2.2 nathanw Returns the result of converting the double-precision floating-point value
2459 1.1.2.2 nathanw `a' to the single-precision floating-point format. The conversion is
2460 1.1.2.2 nathanw performed according to the IEC/IEEE Standard for Binary Floating-Point
2461 1.1.2.2 nathanw Arithmetic.
2462 1.1.2.2 nathanw -------------------------------------------------------------------------------
2463 1.1.2.2 nathanw */
2464 1.1.2.2 nathanw float32 float64_to_float32( float64 a )
2465 1.1.2.2 nathanw {
2466 1.1.2.2 nathanw flag aSign;
2467 1.1.2.2 nathanw int16 aExp;
2468 1.1.2.2 nathanw bits64 aSig;
2469 1.1.2.2 nathanw bits32 zSig;
2470 1.1.2.2 nathanw
2471 1.1.2.2 nathanw aSig = extractFloat64Frac( a );
2472 1.1.2.2 nathanw aExp = extractFloat64Exp( a );
2473 1.1.2.2 nathanw aSign = extractFloat64Sign( a );
2474 1.1.2.2 nathanw if ( aExp == 0x7FF ) {
2475 1.1.2.2 nathanw if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a ) );
2476 1.1.2.2 nathanw return packFloat32( aSign, 0xFF, 0 );
2477 1.1.2.2 nathanw }
2478 1.1.2.2 nathanw shift64RightJamming( aSig, 22, &aSig );
2479 1.1.2.2 nathanw zSig = aSig;
2480 1.1.2.2 nathanw if ( aExp || zSig ) {
2481 1.1.2.2 nathanw zSig |= 0x40000000;
2482 1.1.2.2 nathanw aExp -= 0x381;
2483 1.1.2.2 nathanw }
2484 1.1.2.2 nathanw return roundAndPackFloat32( aSign, aExp, zSig );
2485 1.1.2.2 nathanw
2486 1.1.2.2 nathanw }
2487 1.1.2.2 nathanw
2488 1.1.2.2 nathanw #ifdef FLOATX80
2489 1.1.2.2 nathanw
2490 1.1.2.2 nathanw /*
2491 1.1.2.2 nathanw -------------------------------------------------------------------------------
2492 1.1.2.2 nathanw Returns the result of converting the double-precision floating-point value
2493 1.1.2.2 nathanw `a' to the extended double-precision floating-point format. The conversion
2494 1.1.2.2 nathanw is performed according to the IEC/IEEE Standard for Binary Floating-Point
2495 1.1.2.2 nathanw Arithmetic.
2496 1.1.2.2 nathanw -------------------------------------------------------------------------------
2497 1.1.2.2 nathanw */
2498 1.1.2.2 nathanw floatx80 float64_to_floatx80( float64 a )
2499 1.1.2.2 nathanw {
2500 1.1.2.2 nathanw flag aSign;
2501 1.1.2.2 nathanw int16 aExp;
2502 1.1.2.2 nathanw bits64 aSig;
2503 1.1.2.2 nathanw
2504 1.1.2.2 nathanw aSig = extractFloat64Frac( a );
2505 1.1.2.2 nathanw aExp = extractFloat64Exp( a );
2506 1.1.2.2 nathanw aSign = extractFloat64Sign( a );
2507 1.1.2.2 nathanw if ( aExp == 0x7FF ) {
2508 1.1.2.2 nathanw if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a ) );
2509 1.1.2.2 nathanw return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
2510 1.1.2.2 nathanw }
2511 1.1.2.2 nathanw if ( aExp == 0 ) {
2512 1.1.2.2 nathanw if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
2513 1.1.2.2 nathanw normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2514 1.1.2.2 nathanw }
2515 1.1.2.2 nathanw return
2516 1.1.2.2 nathanw packFloatx80(
2517 1.1.2.2 nathanw aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );
2518 1.1.2.2 nathanw
2519 1.1.2.2 nathanw }
2520 1.1.2.2 nathanw
2521 1.1.2.2 nathanw #endif
2522 1.1.2.2 nathanw
2523 1.1.2.2 nathanw #ifdef FLOAT128
2524 1.1.2.2 nathanw
2525 1.1.2.2 nathanw /*
2526 1.1.2.2 nathanw -------------------------------------------------------------------------------
2527 1.1.2.2 nathanw Returns the result of converting the double-precision floating-point value
2528 1.1.2.2 nathanw `a' to the quadruple-precision floating-point format. The conversion is
2529 1.1.2.2 nathanw performed according to the IEC/IEEE Standard for Binary Floating-Point
2530 1.1.2.2 nathanw Arithmetic.
2531 1.1.2.2 nathanw -------------------------------------------------------------------------------
2532 1.1.2.2 nathanw */
2533 1.1.2.2 nathanw float128 float64_to_float128( float64 a )
2534 1.1.2.2 nathanw {
2535 1.1.2.2 nathanw flag aSign;
2536 1.1.2.2 nathanw int16 aExp;
2537 1.1.2.2 nathanw bits64 aSig, zSig0, zSig1;
2538 1.1.2.2 nathanw
2539 1.1.2.2 nathanw aSig = extractFloat64Frac( a );
2540 1.1.2.2 nathanw aExp = extractFloat64Exp( a );
2541 1.1.2.2 nathanw aSign = extractFloat64Sign( a );
2542 1.1.2.2 nathanw if ( aExp == 0x7FF ) {
2543 1.1.2.2 nathanw if ( aSig ) return commonNaNToFloat128( float64ToCommonNaN( a ) );
2544 1.1.2.2 nathanw return packFloat128( aSign, 0x7FFF, 0, 0 );
2545 1.1.2.2 nathanw }
2546 1.1.2.2 nathanw if ( aExp == 0 ) {
2547 1.1.2.2 nathanw if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
2548 1.1.2.2 nathanw normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2549 1.1.2.2 nathanw --aExp;
2550 1.1.2.2 nathanw }
2551 1.1.2.2 nathanw shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
2552 1.1.2.2 nathanw return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
2553 1.1.2.2 nathanw
2554 1.1.2.2 nathanw }
2555 1.1.2.2 nathanw
2556 1.1.2.2 nathanw #endif
2557 1.1.2.2 nathanw
2558 1.1.2.2 nathanw #ifndef SOFTFLOAT_FOR_GCC
2559 1.1.2.2 nathanw /*
2560 1.1.2.2 nathanw -------------------------------------------------------------------------------
2561 1.1.2.2 nathanw Rounds the double-precision floating-point value `a' to an integer, and
2562 1.1.2.2 nathanw returns the result as a double-precision floating-point value. The
2563 1.1.2.2 nathanw operation is performed according to the IEC/IEEE Standard for Binary
2564 1.1.2.2 nathanw Floating-Point Arithmetic.
2565 1.1.2.2 nathanw -------------------------------------------------------------------------------
2566 1.1.2.2 nathanw */
2567 1.1.2.2 nathanw float64 float64_round_to_int( float64 a )
2568 1.1.2.2 nathanw {
2569 1.1.2.2 nathanw flag aSign;
2570 1.1.2.2 nathanw int16 aExp;
2571 1.1.2.2 nathanw bits64 lastBitMask, roundBitsMask;
2572 1.1.2.2 nathanw int8 roundingMode;
2573 1.1.2.2 nathanw float64 z;
2574 1.1.2.2 nathanw
2575 1.1.2.2 nathanw aExp = extractFloat64Exp( a );
2576 1.1.2.2 nathanw if ( 0x433 <= aExp ) {
2577 1.1.2.2 nathanw if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) {
2578 1.1.2.2 nathanw return propagateFloat64NaN( a, a );
2579 1.1.2.2 nathanw }
2580 1.1.2.2 nathanw return a;
2581 1.1.2.2 nathanw }
2582 1.1.2.2 nathanw if ( aExp < 0x3FF ) {
2583 1.1.2.2 nathanw if ( (bits64) ( a<<1 ) == 0 ) return a;
2584 1.1.2.2 nathanw float_set_inexact();
2585 1.1.2.2 nathanw aSign = extractFloat64Sign( a );
2586 1.1.2.2 nathanw switch ( float_rounding_mode() ) {
2587 1.1.2.2 nathanw case float_round_nearest_even:
2588 1.1.2.2 nathanw if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) {
2589 1.1.2.2 nathanw return packFloat64( aSign, 0x3FF, 0 );
2590 1.1.2.2 nathanw }
2591 1.1.2.2 nathanw break;
2592 1.1.2.2 nathanw case float_round_down:
2593 1.1.2.2 nathanw return aSign ? LIT64( 0xBFF0000000000000 ) : 0;
2594 1.1.2.2 nathanw case float_round_up:
2595 1.1.2.2 nathanw return
2596 1.1.2.2 nathanw aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 );
2597 1.1.2.2 nathanw }
2598 1.1.2.2 nathanw return packFloat64( aSign, 0, 0 );
2599 1.1.2.2 nathanw }
2600 1.1.2.2 nathanw lastBitMask = 1;
2601 1.1.2.2 nathanw lastBitMask <<= 0x433 - aExp;
2602 1.1.2.2 nathanw roundBitsMask = lastBitMask - 1;
2603 1.1.2.2 nathanw z = a;
2604 1.1.2.2 nathanw roundingMode = float_rounding_mode();
2605 1.1.2.2 nathanw if ( roundingMode == float_round_nearest_even ) {
2606 1.1.2.2 nathanw z += lastBitMask>>1;
2607 1.1.2.2 nathanw if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
2608 1.1.2.2 nathanw }
2609 1.1.2.2 nathanw else if ( roundingMode != float_round_to_zero ) {
2610 1.1.2.2 nathanw if ( extractFloat64Sign( z ) ^ ( roundingMode == float_round_up ) ) {
2611 1.1.2.2 nathanw z += roundBitsMask;
2612 1.1.2.2 nathanw }
2613 1.1.2.2 nathanw }
2614 1.1.2.2 nathanw z &= ~ roundBitsMask;
2615 1.1.2.2 nathanw if ( z != a ) float_set_inexact();
2616 1.1.2.2 nathanw return z;
2617 1.1.2.2 nathanw
2618 1.1.2.2 nathanw }
2619 1.1.2.2 nathanw #endif
2620 1.1.2.2 nathanw
2621 1.1.2.2 nathanw /*
2622 1.1.2.2 nathanw -------------------------------------------------------------------------------
2623 1.1.2.2 nathanw Returns the result of adding the absolute values of the double-precision
2624 1.1.2.2 nathanw floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
2625 1.1.2.2 nathanw before being returned. `zSign' is ignored if the result is a NaN.
2626 1.1.2.2 nathanw The addition is performed according to the IEC/IEEE Standard for Binary
2627 1.1.2.2 nathanw Floating-Point Arithmetic.
2628 1.1.2.2 nathanw -------------------------------------------------------------------------------
2629 1.1.2.2 nathanw */
2630 1.1.2.2 nathanw static float64 addFloat64Sigs( float64 a, float64 b, flag zSign )
2631 1.1.2.2 nathanw {
2632 1.1.2.2 nathanw int16 aExp, bExp, zExp;
2633 1.1.2.2 nathanw bits64 aSig, bSig, zSig;
2634 1.1.2.2 nathanw int16 expDiff;
2635 1.1.2.2 nathanw
2636 1.1.2.2 nathanw aSig = extractFloat64Frac( a );
2637 1.1.2.2 nathanw aExp = extractFloat64Exp( a );
2638 1.1.2.2 nathanw bSig = extractFloat64Frac( b );
2639 1.1.2.2 nathanw bExp = extractFloat64Exp( b );
2640 1.1.2.2 nathanw expDiff = aExp - bExp;
2641 1.1.2.2 nathanw aSig <<= 9;
2642 1.1.2.2 nathanw bSig <<= 9;
2643 1.1.2.2 nathanw if ( 0 < expDiff ) {
2644 1.1.2.2 nathanw if ( aExp == 0x7FF ) {
2645 1.1.2.2 nathanw if ( aSig ) return propagateFloat64NaN( a, b );
2646 1.1.2.2 nathanw return a;
2647 1.1.2.2 nathanw }
2648 1.1.2.2 nathanw if ( bExp == 0 ) {
2649 1.1.2.2 nathanw --expDiff;
2650 1.1.2.2 nathanw }
2651 1.1.2.2 nathanw else {
2652 1.1.2.2 nathanw bSig |= LIT64( 0x2000000000000000 );
2653 1.1.2.2 nathanw }
2654 1.1.2.2 nathanw shift64RightJamming( bSig, expDiff, &bSig );
2655 1.1.2.2 nathanw zExp = aExp;
2656 1.1.2.2 nathanw }
2657 1.1.2.2 nathanw else if ( expDiff < 0 ) {
2658 1.1.2.2 nathanw if ( bExp == 0x7FF ) {
2659 1.1.2.2 nathanw if ( bSig ) return propagateFloat64NaN( a, b );
2660 1.1.2.2 nathanw return packFloat64( zSign, 0x7FF, 0 );
2661 1.1.2.2 nathanw }
2662 1.1.2.2 nathanw if ( aExp == 0 ) {
2663 1.1.2.2 nathanw ++expDiff;
2664 1.1.2.2 nathanw }
2665 1.1.2.2 nathanw else {
2666 1.1.2.2 nathanw aSig |= LIT64( 0x2000000000000000 );
2667 1.1.2.2 nathanw }
2668 1.1.2.2 nathanw shift64RightJamming( aSig, - expDiff, &aSig );
2669 1.1.2.2 nathanw zExp = bExp;
2670 1.1.2.2 nathanw }
2671 1.1.2.2 nathanw else {
2672 1.1.2.2 nathanw if ( aExp == 0x7FF ) {
2673 1.1.2.2 nathanw if ( aSig | bSig ) return propagateFloat64NaN( a, b );
2674 1.1.2.2 nathanw return a;
2675 1.1.2.2 nathanw }
2676 1.1.2.2 nathanw if ( aExp == 0 ) return packFloat64( zSign, 0, ( aSig + bSig )>>9 );
2677 1.1.2.2 nathanw zSig = LIT64( 0x4000000000000000 ) + aSig + bSig;
2678 1.1.2.2 nathanw zExp = aExp;
2679 1.1.2.2 nathanw goto roundAndPack;
2680 1.1.2.2 nathanw }
2681 1.1.2.2 nathanw aSig |= LIT64( 0x2000000000000000 );
2682 1.1.2.2 nathanw zSig = ( aSig + bSig )<<1;
2683 1.1.2.2 nathanw --zExp;
2684 1.1.2.2 nathanw if ( (sbits64) zSig < 0 ) {
2685 1.1.2.2 nathanw zSig = aSig + bSig;
2686 1.1.2.2 nathanw ++zExp;
2687 1.1.2.2 nathanw }
2688 1.1.2.2 nathanw roundAndPack:
2689 1.1.2.2 nathanw return roundAndPackFloat64( zSign, zExp, zSig );
2690 1.1.2.2 nathanw
2691 1.1.2.2 nathanw }
2692 1.1.2.2 nathanw
2693 1.1.2.2 nathanw /*
2694 1.1.2.2 nathanw -------------------------------------------------------------------------------
2695 1.1.2.2 nathanw Returns the result of subtracting the absolute values of the double-
2696 1.1.2.2 nathanw precision floating-point values `a' and `b'. If `zSign' is 1, the
2697 1.1.2.2 nathanw difference is negated before being returned. `zSign' is ignored if the
2698 1.1.2.2 nathanw result is a NaN. The subtraction is performed according to the IEC/IEEE
2699 1.1.2.2 nathanw Standard for Binary Floating-Point Arithmetic.
2700 1.1.2.2 nathanw -------------------------------------------------------------------------------
2701 1.1.2.2 nathanw */
2702 1.1.2.2 nathanw static float64 subFloat64Sigs( float64 a, float64 b, flag zSign )
2703 1.1.2.2 nathanw {
2704 1.1.2.2 nathanw int16 aExp, bExp, zExp;
2705 1.1.2.2 nathanw bits64 aSig, bSig, zSig;
2706 1.1.2.2 nathanw int16 expDiff;
2707 1.1.2.2 nathanw
2708 1.1.2.2 nathanw aSig = extractFloat64Frac( a );
2709 1.1.2.2 nathanw aExp = extractFloat64Exp( a );
2710 1.1.2.2 nathanw bSig = extractFloat64Frac( b );
2711 1.1.2.2 nathanw bExp = extractFloat64Exp( b );
2712 1.1.2.2 nathanw expDiff = aExp - bExp;
2713 1.1.2.2 nathanw aSig <<= 10;
2714 1.1.2.2 nathanw bSig <<= 10;
2715 1.1.2.2 nathanw if ( 0 < expDiff ) goto aExpBigger;
2716 1.1.2.2 nathanw if ( expDiff < 0 ) goto bExpBigger;
2717 1.1.2.2 nathanw if ( aExp == 0x7FF ) {
2718 1.1.2.2 nathanw if ( aSig | bSig ) return propagateFloat64NaN( a, b );
2719 1.1.2.2 nathanw float_raise( float_flag_invalid );
2720 1.1.2.2 nathanw return float64_default_nan;
2721 1.1.2.2 nathanw }
2722 1.1.2.2 nathanw if ( aExp == 0 ) {
2723 1.1.2.2 nathanw aExp = 1;
2724 1.1.2.2 nathanw bExp = 1;
2725 1.1.2.2 nathanw }
2726 1.1.2.2 nathanw if ( bSig < aSig ) goto aBigger;
2727 1.1.2.2 nathanw if ( aSig < bSig ) goto bBigger;
2728 1.1.2.2 nathanw return packFloat64( float_rounding_mode() == float_round_down, 0, 0 );
2729 1.1.2.2 nathanw bExpBigger:
2730 1.1.2.2 nathanw if ( bExp == 0x7FF ) {
2731 1.1.2.2 nathanw if ( bSig ) return propagateFloat64NaN( a, b );
2732 1.1.2.2 nathanw return packFloat64( zSign ^ 1, 0x7FF, 0 );
2733 1.1.2.2 nathanw }
2734 1.1.2.2 nathanw if ( aExp == 0 ) {
2735 1.1.2.2 nathanw ++expDiff;
2736 1.1.2.2 nathanw }
2737 1.1.2.2 nathanw else {
2738 1.1.2.2 nathanw aSig |= LIT64( 0x4000000000000000 );
2739 1.1.2.2 nathanw }
2740 1.1.2.2 nathanw shift64RightJamming( aSig, - expDiff, &aSig );
2741 1.1.2.2 nathanw bSig |= LIT64( 0x4000000000000000 );
2742 1.1.2.2 nathanw bBigger:
2743 1.1.2.2 nathanw zSig = bSig - aSig;
2744 1.1.2.2 nathanw zExp = bExp;
2745 1.1.2.2 nathanw zSign ^= 1;
2746 1.1.2.2 nathanw goto normalizeRoundAndPack;
2747 1.1.2.2 nathanw aExpBigger:
2748 1.1.2.2 nathanw if ( aExp == 0x7FF ) {
2749 1.1.2.2 nathanw if ( aSig ) return propagateFloat64NaN( a, b );
2750 1.1.2.2 nathanw return a;
2751 1.1.2.2 nathanw }
2752 1.1.2.2 nathanw if ( bExp == 0 ) {
2753 1.1.2.2 nathanw --expDiff;
2754 1.1.2.2 nathanw }
2755 1.1.2.2 nathanw else {
2756 1.1.2.2 nathanw bSig |= LIT64( 0x4000000000000000 );
2757 1.1.2.2 nathanw }
2758 1.1.2.2 nathanw shift64RightJamming( bSig, expDiff, &bSig );
2759 1.1.2.2 nathanw aSig |= LIT64( 0x4000000000000000 );
2760 1.1.2.2 nathanw aBigger:
2761 1.1.2.2 nathanw zSig = aSig - bSig;
2762 1.1.2.2 nathanw zExp = aExp;
2763 1.1.2.2 nathanw normalizeRoundAndPack:
2764 1.1.2.2 nathanw --zExp;
2765 1.1.2.2 nathanw return normalizeRoundAndPackFloat64( zSign, zExp, zSig );
2766 1.1.2.2 nathanw
2767 1.1.2.2 nathanw }
2768 1.1.2.2 nathanw
2769 1.1.2.2 nathanw /*
2770 1.1.2.2 nathanw -------------------------------------------------------------------------------
2771 1.1.2.2 nathanw Returns the result of adding the double-precision floating-point values `a'
2772 1.1.2.2 nathanw and `b'. The operation is performed according to the IEC/IEEE Standard for
2773 1.1.2.2 nathanw Binary Floating-Point Arithmetic.
2774 1.1.2.2 nathanw -------------------------------------------------------------------------------
2775 1.1.2.2 nathanw */
2776 1.1.2.2 nathanw float64 float64_add( float64 a, float64 b )
2777 1.1.2.2 nathanw {
2778 1.1.2.2 nathanw flag aSign, bSign;
2779 1.1.2.2 nathanw
2780 1.1.2.2 nathanw aSign = extractFloat64Sign( a );
2781 1.1.2.2 nathanw bSign = extractFloat64Sign( b );
2782 1.1.2.2 nathanw if ( aSign == bSign ) {
2783 1.1.2.2 nathanw return addFloat64Sigs( a, b, aSign );
2784 1.1.2.2 nathanw }
2785 1.1.2.2 nathanw else {
2786 1.1.2.2 nathanw return subFloat64Sigs( a, b, aSign );
2787 1.1.2.2 nathanw }
2788 1.1.2.2 nathanw
2789 1.1.2.2 nathanw }
2790 1.1.2.2 nathanw
2791 1.1.2.2 nathanw /*
2792 1.1.2.2 nathanw -------------------------------------------------------------------------------
2793 1.1.2.2 nathanw Returns the result of subtracting the double-precision floating-point values
2794 1.1.2.2 nathanw `a' and `b'. The operation is performed according to the IEC/IEEE Standard
2795 1.1.2.2 nathanw for Binary Floating-Point Arithmetic.
2796 1.1.2.2 nathanw -------------------------------------------------------------------------------
2797 1.1.2.2 nathanw */
2798 1.1.2.2 nathanw float64 float64_sub( float64 a, float64 b )
2799 1.1.2.2 nathanw {
2800 1.1.2.2 nathanw flag aSign, bSign;
2801 1.1.2.2 nathanw
2802 1.1.2.2 nathanw aSign = extractFloat64Sign( a );
2803 1.1.2.2 nathanw bSign = extractFloat64Sign( b );
2804 1.1.2.2 nathanw if ( aSign == bSign ) {
2805 1.1.2.2 nathanw return subFloat64Sigs( a, b, aSign );
2806 1.1.2.2 nathanw }
2807 1.1.2.2 nathanw else {
2808 1.1.2.2 nathanw return addFloat64Sigs( a, b, aSign );
2809 1.1.2.2 nathanw }
2810 1.1.2.2 nathanw
2811 1.1.2.2 nathanw }
2812 1.1.2.2 nathanw
2813 1.1.2.2 nathanw /*
2814 1.1.2.2 nathanw -------------------------------------------------------------------------------
2815 1.1.2.2 nathanw Returns the result of multiplying the double-precision floating-point values
2816 1.1.2.2 nathanw `a' and `b'. The operation is performed according to the IEC/IEEE Standard
2817 1.1.2.2 nathanw for Binary Floating-Point Arithmetic.
2818 1.1.2.2 nathanw -------------------------------------------------------------------------------
2819 1.1.2.2 nathanw */
2820 1.1.2.2 nathanw float64 float64_mul( float64 a, float64 b )
2821 1.1.2.2 nathanw {
2822 1.1.2.2 nathanw flag aSign, bSign, zSign;
2823 1.1.2.2 nathanw int16 aExp, bExp, zExp;
2824 1.1.2.2 nathanw bits64 aSig, bSig, zSig0, zSig1;
2825 1.1.2.2 nathanw
2826 1.1.2.2 nathanw aSig = extractFloat64Frac( a );
2827 1.1.2.2 nathanw aExp = extractFloat64Exp( a );
2828 1.1.2.2 nathanw aSign = extractFloat64Sign( a );
2829 1.1.2.2 nathanw bSig = extractFloat64Frac( b );
2830 1.1.2.2 nathanw bExp = extractFloat64Exp( b );
2831 1.1.2.2 nathanw bSign = extractFloat64Sign( b );
2832 1.1.2.2 nathanw zSign = aSign ^ bSign;
2833 1.1.2.2 nathanw if ( aExp == 0x7FF ) {
2834 1.1.2.2 nathanw if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
2835 1.1.2.2 nathanw return propagateFloat64NaN( a, b );
2836 1.1.2.2 nathanw }
2837 1.1.2.2 nathanw if ( ( bExp | bSig ) == 0 ) {
2838 1.1.2.2 nathanw float_raise( float_flag_invalid );
2839 1.1.2.2 nathanw return float64_default_nan;
2840 1.1.2.2 nathanw }
2841 1.1.2.2 nathanw return packFloat64( zSign, 0x7FF, 0 );
2842 1.1.2.2 nathanw }
2843 1.1.2.2 nathanw if ( bExp == 0x7FF ) {
2844 1.1.2.2 nathanw if ( bSig ) return propagateFloat64NaN( a, b );
2845 1.1.2.2 nathanw if ( ( aExp | aSig ) == 0 ) {
2846 1.1.2.2 nathanw float_raise( float_flag_invalid );
2847 1.1.2.2 nathanw return float64_default_nan;
2848 1.1.2.2 nathanw }
2849 1.1.2.2 nathanw return packFloat64( zSign, 0x7FF, 0 );
2850 1.1.2.2 nathanw }
2851 1.1.2.2 nathanw if ( aExp == 0 ) {
2852 1.1.2.2 nathanw if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
2853 1.1.2.2 nathanw normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2854 1.1.2.2 nathanw }
2855 1.1.2.2 nathanw if ( bExp == 0 ) {
2856 1.1.2.2 nathanw if ( bSig == 0 ) return packFloat64( zSign, 0, 0 );
2857 1.1.2.2 nathanw normalizeFloat64Subnormal( bSig, &bExp, &bSig );
2858 1.1.2.2 nathanw }
2859 1.1.2.2 nathanw zExp = aExp + bExp - 0x3FF;
2860 1.1.2.2 nathanw aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
2861 1.1.2.2 nathanw bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
2862 1.1.2.2 nathanw mul64To128( aSig, bSig, &zSig0, &zSig1 );
2863 1.1.2.2 nathanw zSig0 |= ( zSig1 != 0 );
2864 1.1.2.2 nathanw if ( 0 <= (sbits64) ( zSig0<<1 ) ) {
2865 1.1.2.2 nathanw zSig0 <<= 1;
2866 1.1.2.2 nathanw --zExp;
2867 1.1.2.2 nathanw }
2868 1.1.2.2 nathanw return roundAndPackFloat64( zSign, zExp, zSig0 );
2869 1.1.2.2 nathanw
2870 1.1.2.2 nathanw }
2871 1.1.2.2 nathanw
2872 1.1.2.2 nathanw /*
2873 1.1.2.2 nathanw -------------------------------------------------------------------------------
2874 1.1.2.2 nathanw Returns the result of dividing the double-precision floating-point value `a'
2875 1.1.2.2 nathanw by the corresponding value `b'. The operation is performed according to
2876 1.1.2.2 nathanw the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2877 1.1.2.2 nathanw -------------------------------------------------------------------------------
2878 1.1.2.2 nathanw */
2879 1.1.2.2 nathanw float64 float64_div( float64 a, float64 b )
2880 1.1.2.2 nathanw {
2881 1.1.2.2 nathanw flag aSign, bSign, zSign;
2882 1.1.2.2 nathanw int16 aExp, bExp, zExp;
2883 1.1.2.2 nathanw bits64 aSig, bSig, zSig;
2884 1.1.2.2 nathanw bits64 rem0, rem1;
2885 1.1.2.2 nathanw bits64 term0, term1;
2886 1.1.2.2 nathanw
2887 1.1.2.2 nathanw aSig = extractFloat64Frac( a );
2888 1.1.2.2 nathanw aExp = extractFloat64Exp( a );
2889 1.1.2.2 nathanw aSign = extractFloat64Sign( a );
2890 1.1.2.2 nathanw bSig = extractFloat64Frac( b );
2891 1.1.2.2 nathanw bExp = extractFloat64Exp( b );
2892 1.1.2.2 nathanw bSign = extractFloat64Sign( b );
2893 1.1.2.2 nathanw zSign = aSign ^ bSign;
2894 1.1.2.2 nathanw if ( aExp == 0x7FF ) {
2895 1.1.2.2 nathanw if ( aSig ) return propagateFloat64NaN( a, b );
2896 1.1.2.2 nathanw if ( bExp == 0x7FF ) {
2897 1.1.2.2 nathanw if ( bSig ) return propagateFloat64NaN( a, b );
2898 1.1.2.2 nathanw float_raise( float_flag_invalid );
2899 1.1.2.2 nathanw return float64_default_nan;
2900 1.1.2.2 nathanw }
2901 1.1.2.2 nathanw return packFloat64( zSign, 0x7FF, 0 );
2902 1.1.2.2 nathanw }
2903 1.1.2.2 nathanw if ( bExp == 0x7FF ) {
2904 1.1.2.2 nathanw if ( bSig ) return propagateFloat64NaN( a, b );
2905 1.1.2.2 nathanw return packFloat64( zSign, 0, 0 );
2906 1.1.2.2 nathanw }
2907 1.1.2.2 nathanw if ( bExp == 0 ) {
2908 1.1.2.2 nathanw if ( bSig == 0 ) {
2909 1.1.2.2 nathanw if ( ( aExp | aSig ) == 0 ) {
2910 1.1.2.2 nathanw float_raise( float_flag_invalid );
2911 1.1.2.2 nathanw return float64_default_nan;
2912 1.1.2.2 nathanw }
2913 1.1.2.2 nathanw float_raise( float_flag_divbyzero );
2914 1.1.2.2 nathanw return packFloat64( zSign, 0x7FF, 0 );
2915 1.1.2.2 nathanw }
2916 1.1.2.2 nathanw normalizeFloat64Subnormal( bSig, &bExp, &bSig );
2917 1.1.2.2 nathanw }
2918 1.1.2.2 nathanw if ( aExp == 0 ) {
2919 1.1.2.2 nathanw if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
2920 1.1.2.2 nathanw normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2921 1.1.2.2 nathanw }
2922 1.1.2.2 nathanw zExp = aExp - bExp + 0x3FD;
2923 1.1.2.2 nathanw aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
2924 1.1.2.2 nathanw bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
2925 1.1.2.2 nathanw if ( bSig <= ( aSig + aSig ) ) {
2926 1.1.2.2 nathanw aSig >>= 1;
2927 1.1.2.2 nathanw ++zExp;
2928 1.1.2.2 nathanw }
2929 1.1.2.2 nathanw zSig = estimateDiv128To64( aSig, 0, bSig );
2930 1.1.2.2 nathanw if ( ( zSig & 0x1FF ) <= 2 ) {
2931 1.1.2.2 nathanw mul64To128( bSig, zSig, &term0, &term1 );
2932 1.1.2.2 nathanw sub128( aSig, 0, term0, term1, &rem0, &rem1 );
2933 1.1.2.2 nathanw while ( (sbits64) rem0 < 0 ) {
2934 1.1.2.2 nathanw --zSig;
2935 1.1.2.2 nathanw add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
2936 1.1.2.2 nathanw }
2937 1.1.2.2 nathanw zSig |= ( rem1 != 0 );
2938 1.1.2.2 nathanw }
2939 1.1.2.2 nathanw return roundAndPackFloat64( zSign, zExp, zSig );
2940 1.1.2.2 nathanw
2941 1.1.2.2 nathanw }
2942 1.1.2.2 nathanw
2943 1.1.2.2 nathanw #ifndef SOFTFLOAT_FOR_GCC
2944 1.1.2.2 nathanw /*
2945 1.1.2.2 nathanw -------------------------------------------------------------------------------
2946 1.1.2.2 nathanw Returns the remainder of the double-precision floating-point value `a'
2947 1.1.2.2 nathanw with respect to the corresponding value `b'. The operation is performed
2948 1.1.2.2 nathanw according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2949 1.1.2.2 nathanw -------------------------------------------------------------------------------
2950 1.1.2.2 nathanw */
2951 1.1.2.2 nathanw float64 float64_rem( float64 a, float64 b )
2952 1.1.2.2 nathanw {
2953 1.1.2.2 nathanw flag aSign, bSign, zSign;
2954 1.1.2.2 nathanw int16 aExp, bExp, expDiff;
2955 1.1.2.2 nathanw bits64 aSig, bSig;
2956 1.1.2.2 nathanw bits64 q, alternateASig;
2957 1.1.2.2 nathanw sbits64 sigMean;
2958 1.1.2.2 nathanw
2959 1.1.2.2 nathanw aSig = extractFloat64Frac( a );
2960 1.1.2.2 nathanw aExp = extractFloat64Exp( a );
2961 1.1.2.2 nathanw aSign = extractFloat64Sign( a );
2962 1.1.2.2 nathanw bSig = extractFloat64Frac( b );
2963 1.1.2.2 nathanw bExp = extractFloat64Exp( b );
2964 1.1.2.2 nathanw bSign = extractFloat64Sign( b );
2965 1.1.2.2 nathanw if ( aExp == 0x7FF ) {
2966 1.1.2.2 nathanw if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
2967 1.1.2.2 nathanw return propagateFloat64NaN( a, b );
2968 1.1.2.2 nathanw }
2969 1.1.2.2 nathanw float_raise( float_flag_invalid );
2970 1.1.2.2 nathanw return float64_default_nan;
2971 1.1.2.2 nathanw }
2972 1.1.2.2 nathanw if ( bExp == 0x7FF ) {
2973 1.1.2.2 nathanw if ( bSig ) return propagateFloat64NaN( a, b );
2974 1.1.2.2 nathanw return a;
2975 1.1.2.2 nathanw }
2976 1.1.2.2 nathanw if ( bExp == 0 ) {
2977 1.1.2.2 nathanw if ( bSig == 0 ) {
2978 1.1.2.2 nathanw float_raise( float_flag_invalid );
2979 1.1.2.2 nathanw return float64_default_nan;
2980 1.1.2.2 nathanw }
2981 1.1.2.2 nathanw normalizeFloat64Subnormal( bSig, &bExp, &bSig );
2982 1.1.2.2 nathanw }
2983 1.1.2.2 nathanw if ( aExp == 0 ) {
2984 1.1.2.2 nathanw if ( aSig == 0 ) return a;
2985 1.1.2.2 nathanw normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2986 1.1.2.2 nathanw }
2987 1.1.2.2 nathanw expDiff = aExp - bExp;
2988 1.1.2.2 nathanw aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11;
2989 1.1.2.2 nathanw bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
2990 1.1.2.2 nathanw if ( expDiff < 0 ) {
2991 1.1.2.2 nathanw if ( expDiff < -1 ) return a;
2992 1.1.2.2 nathanw aSig >>= 1;
2993 1.1.2.2 nathanw }
2994 1.1.2.2 nathanw q = ( bSig <= aSig );
2995 1.1.2.2 nathanw if ( q ) aSig -= bSig;
2996 1.1.2.2 nathanw expDiff -= 64;
2997 1.1.2.2 nathanw while ( 0 < expDiff ) {
2998 1.1.2.2 nathanw q = estimateDiv128To64( aSig, 0, bSig );
2999 1.1.2.2 nathanw q = ( 2 < q ) ? q - 2 : 0;
3000 1.1.2.2 nathanw aSig = - ( ( bSig>>2 ) * q );
3001 1.1.2.2 nathanw expDiff -= 62;
3002 1.1.2.2 nathanw }
3003 1.1.2.2 nathanw expDiff += 64;
3004 1.1.2.2 nathanw if ( 0 < expDiff ) {
3005 1.1.2.2 nathanw q = estimateDiv128To64( aSig, 0, bSig );
3006 1.1.2.2 nathanw q = ( 2 < q ) ? q - 2 : 0;
3007 1.1.2.2 nathanw q >>= 64 - expDiff;
3008 1.1.2.2 nathanw bSig >>= 2;
3009 1.1.2.2 nathanw aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
3010 1.1.2.2 nathanw }
3011 1.1.2.2 nathanw else {
3012 1.1.2.2 nathanw aSig >>= 2;
3013 1.1.2.2 nathanw bSig >>= 2;
3014 1.1.2.2 nathanw }
3015 1.1.2.2 nathanw do {
3016 1.1.2.2 nathanw alternateASig = aSig;
3017 1.1.2.2 nathanw ++q;
3018 1.1.2.2 nathanw aSig -= bSig;
3019 1.1.2.2 nathanw } while ( 0 <= (sbits64) aSig );
3020 1.1.2.2 nathanw sigMean = aSig + alternateASig;
3021 1.1.2.2 nathanw if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
3022 1.1.2.2 nathanw aSig = alternateASig;
3023 1.1.2.2 nathanw }
3024 1.1.2.2 nathanw zSign = ( (sbits64) aSig < 0 );
3025 1.1.2.2 nathanw if ( zSign ) aSig = - aSig;
3026 1.1.2.2 nathanw return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig );
3027 1.1.2.2 nathanw
3028 1.1.2.2 nathanw }
3029 1.1.2.2 nathanw
3030 1.1.2.2 nathanw /*
3031 1.1.2.2 nathanw -------------------------------------------------------------------------------
3032 1.1.2.2 nathanw Returns the square root of the double-precision floating-point value `a'.
3033 1.1.2.2 nathanw The operation is performed according to the IEC/IEEE Standard for Binary
3034 1.1.2.2 nathanw Floating-Point Arithmetic.
3035 1.1.2.2 nathanw -------------------------------------------------------------------------------
3036 1.1.2.2 nathanw */
3037 1.1.2.2 nathanw float64 float64_sqrt( float64 a )
3038 1.1.2.2 nathanw {
3039 1.1.2.2 nathanw flag aSign;
3040 1.1.2.2 nathanw int16 aExp, zExp;
3041 1.1.2.2 nathanw bits64 aSig, zSig, doubleZSig;
3042 1.1.2.2 nathanw bits64 rem0, rem1, term0, term1;
3043 1.1.2.2 nathanw
3044 1.1.2.2 nathanw aSig = extractFloat64Frac( a );
3045 1.1.2.2 nathanw aExp = extractFloat64Exp( a );
3046 1.1.2.2 nathanw aSign = extractFloat64Sign( a );
3047 1.1.2.2 nathanw if ( aExp == 0x7FF ) {
3048 1.1.2.2 nathanw if ( aSig ) return propagateFloat64NaN( a, a );
3049 1.1.2.2 nathanw if ( ! aSign ) return a;
3050 1.1.2.2 nathanw float_raise( float_flag_invalid );
3051 1.1.2.2 nathanw return float64_default_nan;
3052 1.1.2.2 nathanw }
3053 1.1.2.2 nathanw if ( aSign ) {
3054 1.1.2.2 nathanw if ( ( aExp | aSig ) == 0 ) return a;
3055 1.1.2.2 nathanw float_raise( float_flag_invalid );
3056 1.1.2.2 nathanw return float64_default_nan;
3057 1.1.2.2 nathanw }
3058 1.1.2.2 nathanw if ( aExp == 0 ) {
3059 1.1.2.2 nathanw if ( aSig == 0 ) return 0;
3060 1.1.2.2 nathanw normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3061 1.1.2.2 nathanw }
3062 1.1.2.2 nathanw zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
3063 1.1.2.2 nathanw aSig |= LIT64( 0x0010000000000000 );
3064 1.1.2.2 nathanw zSig = estimateSqrt32( aExp, aSig>>21 );
3065 1.1.2.2 nathanw aSig <<= 9 - ( aExp & 1 );
3066 1.1.2.2 nathanw zSig = estimateDiv128To64( aSig, 0, zSig<<32 ) + ( zSig<<30 );
3067 1.1.2.2 nathanw if ( ( zSig & 0x1FF ) <= 5 ) {
3068 1.1.2.2 nathanw doubleZSig = zSig<<1;
3069 1.1.2.2 nathanw mul64To128( zSig, zSig, &term0, &term1 );
3070 1.1.2.2 nathanw sub128( aSig, 0, term0, term1, &rem0, &rem1 );
3071 1.1.2.2 nathanw while ( (sbits64) rem0 < 0 ) {
3072 1.1.2.2 nathanw --zSig;
3073 1.1.2.2 nathanw doubleZSig -= 2;
3074 1.1.2.2 nathanw add128( rem0, rem1, zSig>>63, doubleZSig | 1, &rem0, &rem1 );
3075 1.1.2.2 nathanw }
3076 1.1.2.2 nathanw zSig |= ( ( rem0 | rem1 ) != 0 );
3077 1.1.2.2 nathanw }
3078 1.1.2.2 nathanw return roundAndPackFloat64( 0, zExp, zSig );
3079 1.1.2.2 nathanw
3080 1.1.2.2 nathanw }
3081 1.1.2.2 nathanw #endif
3082 1.1.2.2 nathanw
3083 1.1.2.2 nathanw /*
3084 1.1.2.2 nathanw -------------------------------------------------------------------------------
3085 1.1.2.2 nathanw Returns 1 if the double-precision floating-point value `a' is equal to the
3086 1.1.2.2 nathanw corresponding value `b', and 0 otherwise. The comparison is performed
3087 1.1.2.2 nathanw according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3088 1.1.2.2 nathanw -------------------------------------------------------------------------------
3089 1.1.2.2 nathanw */
3090 1.1.2.2 nathanw flag float64_eq( float64 a, float64 b )
3091 1.1.2.2 nathanw {
3092 1.1.2.2 nathanw
3093 1.1.2.2 nathanw if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3094 1.1.2.2 nathanw || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3095 1.1.2.2 nathanw ) {
3096 1.1.2.2 nathanw if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
3097 1.1.2.2 nathanw float_raise( float_flag_invalid );
3098 1.1.2.2 nathanw }
3099 1.1.2.2 nathanw return 0;
3100 1.1.2.2 nathanw }
3101 1.1.2.2 nathanw return ( a == b ) ||
3102 1.1.2.2 nathanw ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) == 0 );
3103 1.1.2.2 nathanw
3104 1.1.2.2 nathanw }
3105 1.1.2.2 nathanw
3106 1.1.2.2 nathanw /*
3107 1.1.2.2 nathanw -------------------------------------------------------------------------------
3108 1.1.2.2 nathanw Returns 1 if the double-precision floating-point value `a' is less than or
3109 1.1.2.2 nathanw equal to the corresponding value `b', and 0 otherwise. The comparison is
3110 1.1.2.2 nathanw performed according to the IEC/IEEE Standard for Binary Floating-Point
3111 1.1.2.2 nathanw Arithmetic.
3112 1.1.2.2 nathanw -------------------------------------------------------------------------------
3113 1.1.2.2 nathanw */
3114 1.1.2.2 nathanw flag float64_le( float64 a, float64 b )
3115 1.1.2.2 nathanw {
3116 1.1.2.2 nathanw flag aSign, bSign;
3117 1.1.2.2 nathanw
3118 1.1.2.2 nathanw if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3119 1.1.2.2 nathanw || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3120 1.1.2.2 nathanw ) {
3121 1.1.2.2 nathanw float_raise( float_flag_invalid );
3122 1.1.2.2 nathanw return 0;
3123 1.1.2.2 nathanw }
3124 1.1.2.2 nathanw aSign = extractFloat64Sign( a );
3125 1.1.2.2 nathanw bSign = extractFloat64Sign( b );
3126 1.1.2.2 nathanw if ( aSign != bSign )
3127 1.1.2.2 nathanw return aSign ||
3128 1.1.2.2 nathanw ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) ==
3129 1.1.2.2 nathanw 0 );
3130 1.1.2.2 nathanw return ( a == b ) ||
3131 1.1.2.2 nathanw ( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) );
3132 1.1.2.2 nathanw
3133 1.1.2.2 nathanw }
3134 1.1.2.2 nathanw
3135 1.1.2.2 nathanw /*
3136 1.1.2.2 nathanw -------------------------------------------------------------------------------
3137 1.1.2.2 nathanw Returns 1 if the double-precision floating-point value `a' is less than
3138 1.1.2.2 nathanw the corresponding value `b', and 0 otherwise. The comparison is performed
3139 1.1.2.2 nathanw according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3140 1.1.2.2 nathanw -------------------------------------------------------------------------------
3141 1.1.2.2 nathanw */
3142 1.1.2.2 nathanw flag float64_lt( float64 a, float64 b )
3143 1.1.2.2 nathanw {
3144 1.1.2.2 nathanw flag aSign, bSign;
3145 1.1.2.2 nathanw
3146 1.1.2.2 nathanw if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3147 1.1.2.2 nathanw || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3148 1.1.2.2 nathanw ) {
3149 1.1.2.2 nathanw float_raise( float_flag_invalid );
3150 1.1.2.2 nathanw return 0;
3151 1.1.2.2 nathanw }
3152 1.1.2.2 nathanw aSign = extractFloat64Sign( a );
3153 1.1.2.2 nathanw bSign = extractFloat64Sign( b );
3154 1.1.2.2 nathanw if ( aSign != bSign )
3155 1.1.2.2 nathanw return aSign &&
3156 1.1.2.2 nathanw ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) !=
3157 1.1.2.2 nathanw 0 );
3158 1.1.2.2 nathanw return ( a != b ) &&
3159 1.1.2.2 nathanw ( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) );
3160 1.1.2.2 nathanw
3161 1.1.2.2 nathanw }
3162 1.1.2.2 nathanw
3163 1.1.2.2 nathanw #ifndef SOFTFLOAT_FOR_GCC
3164 1.1.2.2 nathanw /*
3165 1.1.2.2 nathanw -------------------------------------------------------------------------------
3166 1.1.2.2 nathanw Returns 1 if the double-precision floating-point value `a' is equal to the
3167 1.1.2.2 nathanw corresponding value `b', and 0 otherwise. The invalid exception is raised
3168 1.1.2.2 nathanw if either operand is a NaN. Otherwise, the comparison is performed
3169 1.1.2.2 nathanw according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3170 1.1.2.2 nathanw -------------------------------------------------------------------------------
3171 1.1.2.2 nathanw */
3172 1.1.2.2 nathanw flag float64_eq_signaling( float64 a, float64 b )
3173 1.1.2.2 nathanw {
3174 1.1.2.2 nathanw
3175 1.1.2.2 nathanw if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3176 1.1.2.2 nathanw || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3177 1.1.2.2 nathanw ) {
3178 1.1.2.2 nathanw float_raise( float_flag_invalid );
3179 1.1.2.2 nathanw return 0;
3180 1.1.2.2 nathanw }
3181 1.1.2.2 nathanw return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 );
3182 1.1.2.2 nathanw
3183 1.1.2.2 nathanw }
3184 1.1.2.2 nathanw
3185 1.1.2.2 nathanw /*
3186 1.1.2.2 nathanw -------------------------------------------------------------------------------
3187 1.1.2.2 nathanw Returns 1 if the double-precision floating-point value `a' is less than or
3188 1.1.2.2 nathanw equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
3189 1.1.2.2 nathanw cause an exception. Otherwise, the comparison is performed according to the
3190 1.1.2.2 nathanw IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3191 1.1.2.2 nathanw -------------------------------------------------------------------------------
3192 1.1.2.2 nathanw */
3193 1.1.2.2 nathanw flag float64_le_quiet( float64 a, float64 b )
3194 1.1.2.2 nathanw {
3195 1.1.2.2 nathanw flag aSign, bSign;
3196 1.1.2.2 nathanw
3197 1.1.2.2 nathanw if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3198 1.1.2.2 nathanw || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3199 1.1.2.2 nathanw ) {
3200 1.1.2.2 nathanw if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
3201 1.1.2.2 nathanw float_raise( float_flag_invalid );
3202 1.1.2.2 nathanw }
3203 1.1.2.2 nathanw return 0;
3204 1.1.2.2 nathanw }
3205 1.1.2.2 nathanw aSign = extractFloat64Sign( a );
3206 1.1.2.2 nathanw bSign = extractFloat64Sign( b );
3207 1.1.2.2 nathanw if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 );
3208 1.1.2.2 nathanw return ( a == b ) || ( aSign ^ ( a < b ) );
3209 1.1.2.2 nathanw
3210 1.1.2.2 nathanw }
3211 1.1.2.2 nathanw
3212 1.1.2.2 nathanw /*
3213 1.1.2.2 nathanw -------------------------------------------------------------------------------
3214 1.1.2.2 nathanw Returns 1 if the double-precision floating-point value `a' is less than
3215 1.1.2.2 nathanw the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
3216 1.1.2.2 nathanw exception. Otherwise, the comparison is performed according to the IEC/IEEE
3217 1.1.2.2 nathanw Standard for Binary Floating-Point Arithmetic.
3218 1.1.2.2 nathanw -------------------------------------------------------------------------------
3219 1.1.2.2 nathanw */
3220 1.1.2.2 nathanw flag float64_lt_quiet( float64 a, float64 b )
3221 1.1.2.2 nathanw {
3222 1.1.2.2 nathanw flag aSign, bSign;
3223 1.1.2.2 nathanw
3224 1.1.2.2 nathanw if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3225 1.1.2.2 nathanw || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3226 1.1.2.2 nathanw ) {
3227 1.1.2.2 nathanw if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
3228 1.1.2.2 nathanw float_raise( float_flag_invalid );
3229 1.1.2.2 nathanw }
3230 1.1.2.2 nathanw return 0;
3231 1.1.2.2 nathanw }
3232 1.1.2.2 nathanw aSign = extractFloat64Sign( a );
3233 1.1.2.2 nathanw bSign = extractFloat64Sign( b );
3234 1.1.2.2 nathanw if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 );
3235 1.1.2.2 nathanw return ( a != b ) && ( aSign ^ ( a < b ) );
3236 1.1.2.2 nathanw
3237 1.1.2.2 nathanw }
3238 1.1.2.2 nathanw #endif
3239 1.1.2.2 nathanw
3240 1.1.2.2 nathanw #ifdef FLOATX80
3241 1.1.2.2 nathanw
3242 1.1.2.2 nathanw /*
3243 1.1.2.2 nathanw -------------------------------------------------------------------------------
3244 1.1.2.2 nathanw Returns the result of converting the extended double-precision floating-
3245 1.1.2.2 nathanw point value `a' to the 32-bit two's complement integer format. The
3246 1.1.2.2 nathanw conversion is performed according to the IEC/IEEE Standard for Binary
3247 1.1.2.2 nathanw Floating-Point Arithmetic---which means in particular that the conversion
3248 1.1.2.2 nathanw is rounded according to the current rounding mode. If `a' is a NaN, the
3249 1.1.2.2 nathanw largest positive integer is returned. Otherwise, if the conversion
3250 1.1.2.2 nathanw overflows, the largest integer with the same sign as `a' is returned.
3251 1.1.2.2 nathanw -------------------------------------------------------------------------------
3252 1.1.2.2 nathanw */
3253 1.1.2.2 nathanw int32 floatx80_to_int32( floatx80 a )
3254 1.1.2.2 nathanw {
3255 1.1.2.2 nathanw flag aSign;
3256 1.1.2.2 nathanw int32 aExp, shiftCount;
3257 1.1.2.2 nathanw bits64 aSig;
3258 1.1.2.2 nathanw
3259 1.1.2.2 nathanw aSig = extractFloatx80Frac( a );
3260 1.1.2.2 nathanw aExp = extractFloatx80Exp( a );
3261 1.1.2.2 nathanw aSign = extractFloatx80Sign( a );
3262 1.1.2.2 nathanw if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
3263 1.1.2.2 nathanw shiftCount = 0x4037 - aExp;
3264 1.1.2.2 nathanw if ( shiftCount <= 0 ) shiftCount = 1;
3265 1.1.2.2 nathanw shift64RightJamming( aSig, shiftCount, &aSig );
3266 1.1.2.2 nathanw return roundAndPackInt32( aSign, aSig );
3267 1.1.2.2 nathanw
3268 1.1.2.2 nathanw }
3269 1.1.2.2 nathanw
3270 1.1.2.2 nathanw /*
3271 1.1.2.2 nathanw -------------------------------------------------------------------------------
3272 1.1.2.2 nathanw Returns the result of converting the extended double-precision floating-
3273 1.1.2.2 nathanw point value `a' to the 32-bit two's complement integer format. The
3274 1.1.2.2 nathanw conversion is performed according to the IEC/IEEE Standard for Binary
3275 1.1.2.2 nathanw Floating-Point Arithmetic, except that the conversion is always rounded
3276 1.1.2.2 nathanw toward zero. If `a' is a NaN, the largest positive integer is returned.
3277 1.1.2.2 nathanw Otherwise, if the conversion overflows, the largest integer with the same
3278 1.1.2.2 nathanw sign as `a' is returned.
3279 1.1.2.2 nathanw -------------------------------------------------------------------------------
3280 1.1.2.2 nathanw */
3281 1.1.2.2 nathanw int32 floatx80_to_int32_round_to_zero( floatx80 a )
3282 1.1.2.2 nathanw {
3283 1.1.2.2 nathanw flag aSign;
3284 1.1.2.2 nathanw int32 aExp, shiftCount;
3285 1.1.2.2 nathanw bits64 aSig, savedASig;
3286 1.1.2.2 nathanw int32 z;
3287 1.1.2.2 nathanw
3288 1.1.2.2 nathanw aSig = extractFloatx80Frac( a );
3289 1.1.2.2 nathanw aExp = extractFloatx80Exp( a );
3290 1.1.2.2 nathanw aSign = extractFloatx80Sign( a );
3291 1.1.2.2 nathanw if ( 0x401E < aExp ) {
3292 1.1.2.2 nathanw if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
3293 1.1.2.2 nathanw goto invalid;
3294 1.1.2.2 nathanw }
3295 1.1.2.2 nathanw else if ( aExp < 0x3FFF ) {
3296 1.1.2.2 nathanw if ( aExp || aSig ) float_set_inexact();
3297 1.1.2.2 nathanw return 0;
3298 1.1.2.2 nathanw }
3299 1.1.2.2 nathanw shiftCount = 0x403E - aExp;
3300 1.1.2.2 nathanw savedASig = aSig;
3301 1.1.2.2 nathanw aSig >>= shiftCount;
3302 1.1.2.2 nathanw z = aSig;
3303 1.1.2.2 nathanw if ( aSign ) z = - z;
3304 1.1.2.2 nathanw if ( ( z < 0 ) ^ aSign ) {
3305 1.1.2.2 nathanw invalid:
3306 1.1.2.2 nathanw float_raise( float_flag_invalid );
3307 1.1.2.2 nathanw return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
3308 1.1.2.2 nathanw }
3309 1.1.2.2 nathanw if ( ( aSig<<shiftCount ) != savedASig ) {
3310 1.1.2.2 nathanw float_set_inexact();
3311 1.1.2.2 nathanw }
3312 1.1.2.2 nathanw return z;
3313 1.1.2.2 nathanw
3314 1.1.2.2 nathanw }
3315 1.1.2.2 nathanw
3316 1.1.2.2 nathanw /*
3317 1.1.2.2 nathanw -------------------------------------------------------------------------------
3318 1.1.2.2 nathanw Returns the result of converting the extended double-precision floating-
3319 1.1.2.2 nathanw point value `a' to the 64-bit two's complement integer format. The
3320 1.1.2.2 nathanw conversion is performed according to the IEC/IEEE Standard for Binary
3321 1.1.2.2 nathanw Floating-Point Arithmetic---which means in particular that the conversion
3322 1.1.2.2 nathanw is rounded according to the current rounding mode. If `a' is a NaN,
3323 1.1.2.2 nathanw the largest positive integer is returned. Otherwise, if the conversion
3324 1.1.2.2 nathanw overflows, the largest integer with the same sign as `a' is returned.
3325 1.1.2.2 nathanw -------------------------------------------------------------------------------
3326 1.1.2.2 nathanw */
3327 1.1.2.2 nathanw int64 floatx80_to_int64( floatx80 a )
3328 1.1.2.2 nathanw {
3329 1.1.2.2 nathanw flag aSign;
3330 1.1.2.2 nathanw int32 aExp, shiftCount;
3331 1.1.2.2 nathanw bits64 aSig, aSigExtra;
3332 1.1.2.2 nathanw
3333 1.1.2.2 nathanw aSig = extractFloatx80Frac( a );
3334 1.1.2.2 nathanw aExp = extractFloatx80Exp( a );
3335 1.1.2.2 nathanw aSign = extractFloatx80Sign( a );
3336 1.1.2.2 nathanw shiftCount = 0x403E - aExp;
3337 1.1.2.2 nathanw if ( shiftCount <= 0 ) {
3338 1.1.2.2 nathanw if ( shiftCount ) {
3339 1.1.2.2 nathanw float_raise( float_flag_invalid );
3340 1.1.2.2 nathanw if ( ! aSign
3341 1.1.2.2 nathanw || ( ( aExp == 0x7FFF )
3342 1.1.2.2 nathanw && ( aSig != LIT64( 0x8000000000000000 ) ) )
3343 1.1.2.2 nathanw ) {
3344 1.1.2.2 nathanw return LIT64( 0x7FFFFFFFFFFFFFFF );
3345 1.1.2.2 nathanw }
3346 1.1.2.2 nathanw return (sbits64) LIT64( 0x8000000000000000 );
3347 1.1.2.2 nathanw }
3348 1.1.2.2 nathanw aSigExtra = 0;
3349 1.1.2.2 nathanw }
3350 1.1.2.2 nathanw else {
3351 1.1.2.2 nathanw shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
3352 1.1.2.2 nathanw }
3353 1.1.2.2 nathanw return roundAndPackInt64( aSign, aSig, aSigExtra );
3354 1.1.2.2 nathanw
3355 1.1.2.2 nathanw }
3356 1.1.2.2 nathanw
3357 1.1.2.2 nathanw /*
3358 1.1.2.2 nathanw -------------------------------------------------------------------------------
3359 1.1.2.2 nathanw Returns the result of converting the extended double-precision floating-
3360 1.1.2.2 nathanw point value `a' to the 64-bit two's complement integer format. The
3361 1.1.2.2 nathanw conversion is performed according to the IEC/IEEE Standard for Binary
3362 1.1.2.2 nathanw Floating-Point Arithmetic, except that the conversion is always rounded
3363 1.1.2.2 nathanw toward zero. If `a' is a NaN, the largest positive integer is returned.
3364 1.1.2.2 nathanw Otherwise, if the conversion overflows, the largest integer with the same
3365 1.1.2.2 nathanw sign as `a' is returned.
3366 1.1.2.2 nathanw -------------------------------------------------------------------------------
3367 1.1.2.2 nathanw */
3368 1.1.2.2 nathanw int64 floatx80_to_int64_round_to_zero( floatx80 a )
3369 1.1.2.2 nathanw {
3370 1.1.2.2 nathanw flag aSign;
3371 1.1.2.2 nathanw int32 aExp, shiftCount;
3372 1.1.2.2 nathanw bits64 aSig;
3373 1.1.2.2 nathanw int64 z;
3374 1.1.2.2 nathanw
3375 1.1.2.2 nathanw aSig = extractFloatx80Frac( a );
3376 1.1.2.2 nathanw aExp = extractFloatx80Exp( a );
3377 1.1.2.2 nathanw aSign = extractFloatx80Sign( a );
3378 1.1.2.2 nathanw shiftCount = aExp - 0x403E;
3379 1.1.2.2 nathanw if ( 0 <= shiftCount ) {
3380 1.1.2.2 nathanw aSig &= LIT64( 0x7FFFFFFFFFFFFFFF );
3381 1.1.2.2 nathanw if ( ( a.high != 0xC03E ) || aSig ) {
3382 1.1.2.2 nathanw float_raise( float_flag_invalid );
3383 1.1.2.2 nathanw if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
3384 1.1.2.2 nathanw return LIT64( 0x7FFFFFFFFFFFFFFF );
3385 1.1.2.2 nathanw }
3386 1.1.2.2 nathanw }
3387 1.1.2.2 nathanw return (sbits64) LIT64( 0x8000000000000000 );
3388 1.1.2.2 nathanw }
3389 1.1.2.2 nathanw else if ( aExp < 0x3FFF ) {
3390 1.1.2.2 nathanw if ( aExp | aSig ) float_set_inexact();
3391 1.1.2.2 nathanw return 0;
3392 1.1.2.2 nathanw }
3393 1.1.2.2 nathanw z = aSig>>( - shiftCount );
3394 1.1.2.2 nathanw if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) {
3395 1.1.2.2 nathanw float_set_inexact();
3396 1.1.2.2 nathanw }
3397 1.1.2.2 nathanw if ( aSign ) z = - z;
3398 1.1.2.2 nathanw return z;
3399 1.1.2.2 nathanw
3400 1.1.2.2 nathanw }
3401 1.1.2.2 nathanw
3402 1.1.2.2 nathanw /*
3403 1.1.2.2 nathanw -------------------------------------------------------------------------------
3404 1.1.2.2 nathanw Returns the result of converting the extended double-precision floating-
3405 1.1.2.2 nathanw point value `a' to the single-precision floating-point format. The
3406 1.1.2.2 nathanw conversion is performed according to the IEC/IEEE Standard for Binary
3407 1.1.2.2 nathanw Floating-Point Arithmetic.
3408 1.1.2.2 nathanw -------------------------------------------------------------------------------
3409 1.1.2.2 nathanw */
3410 1.1.2.2 nathanw float32 floatx80_to_float32( floatx80 a )
3411 1.1.2.2 nathanw {
3412 1.1.2.2 nathanw flag aSign;
3413 1.1.2.2 nathanw int32 aExp;
3414 1.1.2.2 nathanw bits64 aSig;
3415 1.1.2.2 nathanw
3416 1.1.2.2 nathanw aSig = extractFloatx80Frac( a );
3417 1.1.2.2 nathanw aExp = extractFloatx80Exp( a );
3418 1.1.2.2 nathanw aSign = extractFloatx80Sign( a );
3419 1.1.2.2 nathanw if ( aExp == 0x7FFF ) {
3420 1.1.2.2 nathanw if ( (bits64) ( aSig<<1 ) ) {
3421 1.1.2.2 nathanw return commonNaNToFloat32( floatx80ToCommonNaN( a ) );
3422 1.1.2.2 nathanw }
3423 1.1.2.2 nathanw return packFloat32( aSign, 0xFF, 0 );
3424 1.1.2.2 nathanw }
3425 1.1.2.2 nathanw shift64RightJamming( aSig, 33, &aSig );
3426 1.1.2.2 nathanw if ( aExp || aSig ) aExp -= 0x3F81;
3427 1.1.2.2 nathanw return roundAndPackFloat32( aSign, aExp, aSig );
3428 1.1.2.2 nathanw
3429 1.1.2.2 nathanw }
3430 1.1.2.2 nathanw
3431 1.1.2.2 nathanw /*
3432 1.1.2.2 nathanw -------------------------------------------------------------------------------
3433 1.1.2.2 nathanw Returns the result of converting the extended double-precision floating-
3434 1.1.2.2 nathanw point value `a' to the double-precision floating-point format. The
3435 1.1.2.2 nathanw conversion is performed according to the IEC/IEEE Standard for Binary
3436 1.1.2.2 nathanw Floating-Point Arithmetic.
3437 1.1.2.2 nathanw -------------------------------------------------------------------------------
3438 1.1.2.2 nathanw */
3439 1.1.2.2 nathanw float64 floatx80_to_float64( floatx80 a )
3440 1.1.2.2 nathanw {
3441 1.1.2.2 nathanw flag aSign;
3442 1.1.2.2 nathanw int32 aExp;
3443 1.1.2.2 nathanw bits64 aSig, zSig;
3444 1.1.2.2 nathanw
3445 1.1.2.2 nathanw aSig = extractFloatx80Frac( a );
3446 1.1.2.2 nathanw aExp = extractFloatx80Exp( a );
3447 1.1.2.2 nathanw aSign = extractFloatx80Sign( a );
3448 1.1.2.2 nathanw if ( aExp == 0x7FFF ) {
3449 1.1.2.2 nathanw if ( (bits64) ( aSig<<1 ) ) {
3450 1.1.2.2 nathanw return commonNaNToFloat64( floatx80ToCommonNaN( a ) );
3451 1.1.2.2 nathanw }
3452 1.1.2.2 nathanw return packFloat64( aSign, 0x7FF, 0 );
3453 1.1.2.2 nathanw }
3454 1.1.2.2 nathanw shift64RightJamming( aSig, 1, &zSig );
3455 1.1.2.2 nathanw if ( aExp || aSig ) aExp -= 0x3C01;
3456 1.1.2.2 nathanw return roundAndPackFloat64( aSign, aExp, zSig );
3457 1.1.2.2 nathanw
3458 1.1.2.2 nathanw }
3459 1.1.2.2 nathanw
3460 1.1.2.2 nathanw #ifdef FLOAT128
3461 1.1.2.2 nathanw
3462 1.1.2.2 nathanw /*
3463 1.1.2.2 nathanw -------------------------------------------------------------------------------
3464 1.1.2.2 nathanw Returns the result of converting the extended double-precision floating-
3465 1.1.2.2 nathanw point value `a' to the quadruple-precision floating-point format. The
3466 1.1.2.2 nathanw conversion is performed according to the IEC/IEEE Standard for Binary
3467 1.1.2.2 nathanw Floating-Point Arithmetic.
3468 1.1.2.2 nathanw -------------------------------------------------------------------------------
3469 1.1.2.2 nathanw */
3470 1.1.2.2 nathanw float128 floatx80_to_float128( floatx80 a )
3471 1.1.2.2 nathanw {
3472 1.1.2.2 nathanw flag aSign;
3473 1.1.2.2 nathanw int16 aExp;
3474 1.1.2.2 nathanw bits64 aSig, zSig0, zSig1;
3475 1.1.2.2 nathanw
3476 1.1.2.2 nathanw aSig = extractFloatx80Frac( a );
3477 1.1.2.2 nathanw aExp = extractFloatx80Exp( a );
3478 1.1.2.2 nathanw aSign = extractFloatx80Sign( a );
3479 1.1.2.2 nathanw if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) {
3480 1.1.2.2 nathanw return commonNaNToFloat128( floatx80ToCommonNaN( a ) );
3481 1.1.2.2 nathanw }
3482 1.1.2.2 nathanw shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
3483 1.1.2.2 nathanw return packFloat128( aSign, aExp, zSig0, zSig1 );
3484 1.1.2.2 nathanw
3485 1.1.2.2 nathanw }
3486 1.1.2.2 nathanw
3487 1.1.2.2 nathanw #endif
3488 1.1.2.2 nathanw
3489 1.1.2.2 nathanw /*
3490 1.1.2.2 nathanw -------------------------------------------------------------------------------
3491 1.1.2.2 nathanw Rounds the extended double-precision floating-point value `a' to an integer,
3492 1.1.2.2 nathanw and returns the result as an extended quadruple-precision floating-point
3493 1.1.2.2 nathanw value. The operation is performed according to the IEC/IEEE Standard for
3494 1.1.2.2 nathanw Binary Floating-Point Arithmetic.
3495 1.1.2.2 nathanw -------------------------------------------------------------------------------
3496 1.1.2.2 nathanw */
3497 1.1.2.2 nathanw floatx80 floatx80_round_to_int( floatx80 a )
3498 1.1.2.2 nathanw {
3499 1.1.2.2 nathanw flag aSign;
3500 1.1.2.2 nathanw int32 aExp;
3501 1.1.2.2 nathanw bits64 lastBitMask, roundBitsMask;
3502 1.1.2.2 nathanw int8 roundingMode;
3503 1.1.2.2 nathanw floatx80 z;
3504 1.1.2.2 nathanw
3505 1.1.2.2 nathanw aExp = extractFloatx80Exp( a );
3506 1.1.2.2 nathanw if ( 0x403E <= aExp ) {
3507 1.1.2.2 nathanw if ( ( aExp == 0x7FFF ) && (bits64) ( extractFloatx80Frac( a )<<1 ) ) {
3508 1.1.2.2 nathanw return propagateFloatx80NaN( a, a );
3509 1.1.2.2 nathanw }
3510 1.1.2.2 nathanw return a;
3511 1.1.2.2 nathanw }
3512 1.1.2.2 nathanw if ( aExp < 0x3FFF ) {
3513 1.1.2.2 nathanw if ( ( aExp == 0 )
3514 1.1.2.2 nathanw && ( (bits64) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
3515 1.1.2.2 nathanw return a;
3516 1.1.2.2 nathanw }
3517 1.1.2.2 nathanw float_set_inexact();
3518 1.1.2.2 nathanw aSign = extractFloatx80Sign( a );
3519 1.1.2.2 nathanw switch ( float_rounding_mode() ) {
3520 1.1.2.2 nathanw case float_round_nearest_even:
3521 1.1.2.2 nathanw if ( ( aExp == 0x3FFE ) && (bits64) ( extractFloatx80Frac( a )<<1 )
3522 1.1.2.2 nathanw ) {
3523 1.1.2.2 nathanw return
3524 1.1.2.2 nathanw packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
3525 1.1.2.2 nathanw }
3526 1.1.2.2 nathanw break;
3527 1.1.2.2 nathanw case float_round_down:
3528 1.1.2.2 nathanw return
3529 1.1.2.2 nathanw aSign ?
3530 1.1.2.2 nathanw packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
3531 1.1.2.2 nathanw : packFloatx80( 0, 0, 0 );
3532 1.1.2.2 nathanw case float_round_up:
3533 1.1.2.2 nathanw return
3534 1.1.2.2 nathanw aSign ? packFloatx80( 1, 0, 0 )
3535 1.1.2.2 nathanw : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
3536 1.1.2.2 nathanw }
3537 1.1.2.2 nathanw return packFloatx80( aSign, 0, 0 );
3538 1.1.2.2 nathanw }
3539 1.1.2.2 nathanw lastBitMask = 1;
3540 1.1.2.2 nathanw lastBitMask <<= 0x403E - aExp;
3541 1.1.2.2 nathanw roundBitsMask = lastBitMask - 1;
3542 1.1.2.2 nathanw z = a;
3543 1.1.2.2 nathanw roundingMode = float_rounding_mode();
3544 1.1.2.2 nathanw if ( roundingMode == float_round_nearest_even ) {
3545 1.1.2.2 nathanw z.low += lastBitMask>>1;
3546 1.1.2.2 nathanw if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
3547 1.1.2.2 nathanw }
3548 1.1.2.2 nathanw else if ( roundingMode != float_round_to_zero ) {
3549 1.1.2.2 nathanw if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) {
3550 1.1.2.2 nathanw z.low += roundBitsMask;
3551 1.1.2.2 nathanw }
3552 1.1.2.2 nathanw }
3553 1.1.2.2 nathanw z.low &= ~ roundBitsMask;
3554 1.1.2.2 nathanw if ( z.low == 0 ) {
3555 1.1.2.2 nathanw ++z.high;
3556 1.1.2.2 nathanw z.low = LIT64( 0x8000000000000000 );
3557 1.1.2.2 nathanw }
3558 1.1.2.2 nathanw if ( z.low != a.low ) float_set_inexact();
3559 1.1.2.2 nathanw return z;
3560 1.1.2.2 nathanw
3561 1.1.2.2 nathanw }
3562 1.1.2.2 nathanw
3563 1.1.2.2 nathanw /*
3564 1.1.2.2 nathanw -------------------------------------------------------------------------------
3565 1.1.2.2 nathanw Returns the result of adding the absolute values of the extended double-
3566 1.1.2.2 nathanw precision floating-point values `a' and `b'. If `zSign' is 1, the sum is
3567 1.1.2.2 nathanw negated before being returned. `zSign' is ignored if the result is a NaN.
3568 1.1.2.2 nathanw The addition is performed according to the IEC/IEEE Standard for Binary
3569 1.1.2.2 nathanw Floating-Point Arithmetic.
3570 1.1.2.2 nathanw -------------------------------------------------------------------------------
3571 1.1.2.2 nathanw */
3572 1.1.2.2 nathanw static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
3573 1.1.2.2 nathanw {
3574 1.1.2.2 nathanw int32 aExp, bExp, zExp;
3575 1.1.2.2 nathanw bits64 aSig, bSig, zSig0, zSig1;
3576 1.1.2.2 nathanw int32 expDiff;
3577 1.1.2.2 nathanw
3578 1.1.2.2 nathanw aSig = extractFloatx80Frac( a );
3579 1.1.2.2 nathanw aExp = extractFloatx80Exp( a );
3580 1.1.2.2 nathanw bSig = extractFloatx80Frac( b );
3581 1.1.2.2 nathanw bExp = extractFloatx80Exp( b );
3582 1.1.2.2 nathanw expDiff = aExp - bExp;
3583 1.1.2.2 nathanw if ( 0 < expDiff ) {
3584 1.1.2.2 nathanw if ( aExp == 0x7FFF ) {
3585 1.1.2.2 nathanw if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
3586 1.1.2.2 nathanw return a;
3587 1.1.2.2 nathanw }
3588 1.1.2.2 nathanw if ( bExp == 0 ) --expDiff;
3589 1.1.2.2 nathanw shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
3590 1.1.2.2 nathanw zExp = aExp;
3591 1.1.2.2 nathanw }
3592 1.1.2.2 nathanw else if ( expDiff < 0 ) {
3593 1.1.2.2 nathanw if ( bExp == 0x7FFF ) {
3594 1.1.2.2 nathanw if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
3595 1.1.2.2 nathanw return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3596 1.1.2.2 nathanw }
3597 1.1.2.2 nathanw if ( aExp == 0 ) ++expDiff;
3598 1.1.2.2 nathanw shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
3599 1.1.2.2 nathanw zExp = bExp;
3600 1.1.2.2 nathanw }
3601 1.1.2.2 nathanw else {
3602 1.1.2.2 nathanw if ( aExp == 0x7FFF ) {
3603 1.1.2.2 nathanw if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
3604 1.1.2.2 nathanw return propagateFloatx80NaN( a, b );
3605 1.1.2.2 nathanw }
3606 1.1.2.2 nathanw return a;
3607 1.1.2.2 nathanw }
3608 1.1.2.2 nathanw zSig1 = 0;
3609 1.1.2.2 nathanw zSig0 = aSig + bSig;
3610 1.1.2.2 nathanw if ( aExp == 0 ) {
3611 1.1.2.2 nathanw normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
3612 1.1.2.2 nathanw goto roundAndPack;
3613 1.1.2.2 nathanw }
3614 1.1.2.2 nathanw zExp = aExp;
3615 1.1.2.2 nathanw goto shiftRight1;
3616 1.1.2.2 nathanw }
3617 1.1.2.2 nathanw zSig0 = aSig + bSig;
3618 1.1.2.2 nathanw if ( (sbits64) zSig0 < 0 ) goto roundAndPack;
3619 1.1.2.2 nathanw shiftRight1:
3620 1.1.2.2 nathanw shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
3621 1.1.2.2 nathanw zSig0 |= LIT64( 0x8000000000000000 );
3622 1.1.2.2 nathanw ++zExp;
3623 1.1.2.2 nathanw roundAndPack:
3624 1.1.2.2 nathanw return
3625 1.1.2.2 nathanw roundAndPackFloatx80(
3626 1.1.2.2 nathanw floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
3627 1.1.2.2 nathanw
3628 1.1.2.2 nathanw }
3629 1.1.2.2 nathanw
3630 1.1.2.2 nathanw /*
3631 1.1.2.2 nathanw -------------------------------------------------------------------------------
3632 1.1.2.2 nathanw Returns the result of subtracting the absolute values of the extended
3633 1.1.2.2 nathanw double-precision floating-point values `a' and `b'. If `zSign' is 1, the
3634 1.1.2.2 nathanw difference is negated before being returned. `zSign' is ignored if the
3635 1.1.2.2 nathanw result is a NaN. The subtraction is performed according to the IEC/IEEE
3636 1.1.2.2 nathanw Standard for Binary Floating-Point Arithmetic.
3637 1.1.2.2 nathanw -------------------------------------------------------------------------------
3638 1.1.2.2 nathanw */
3639 1.1.2.2 nathanw static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
3640 1.1.2.2 nathanw {
3641 1.1.2.2 nathanw int32 aExp, bExp, zExp;
3642 1.1.2.2 nathanw bits64 aSig, bSig, zSig0, zSig1;
3643 1.1.2.2 nathanw int32 expDiff;
3644 1.1.2.2 nathanw floatx80 z;
3645 1.1.2.2 nathanw
3646 1.1.2.2 nathanw aSig = extractFloatx80Frac( a );
3647 1.1.2.2 nathanw aExp = extractFloatx80Exp( a );
3648 1.1.2.2 nathanw bSig = extractFloatx80Frac( b );
3649 1.1.2.2 nathanw bExp = extractFloatx80Exp( b );
3650 1.1.2.2 nathanw expDiff = aExp - bExp;
3651 1.1.2.2 nathanw if ( 0 < expDiff ) goto aExpBigger;
3652 1.1.2.2 nathanw if ( expDiff < 0 ) goto bExpBigger;
3653 1.1.2.2 nathanw if ( aExp == 0x7FFF ) {
3654 1.1.2.2 nathanw if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
3655 1.1.2.2 nathanw return propagateFloatx80NaN( a, b );
3656 1.1.2.2 nathanw }
3657 1.1.2.2 nathanw float_raise( float_flag_invalid );
3658 1.1.2.2 nathanw z.low = floatx80_default_nan_low;
3659 1.1.2.2 nathanw z.high = floatx80_default_nan_high;
3660 1.1.2.2 nathanw return z;
3661 1.1.2.2 nathanw }
3662 1.1.2.2 nathanw if ( aExp == 0 ) {
3663 1.1.2.2 nathanw aExp = 1;
3664 1.1.2.2 nathanw bExp = 1;
3665 1.1.2.2 nathanw }
3666 1.1.2.2 nathanw zSig1 = 0;
3667 1.1.2.2 nathanw if ( bSig < aSig ) goto aBigger;
3668 1.1.2.2 nathanw if ( aSig < bSig ) goto bBigger;
3669 1.1.2.2 nathanw return packFloatx80( float_rounding_mode() == float_round_down, 0, 0 );
3670 1.1.2.2 nathanw bExpBigger:
3671 1.1.2.2 nathanw if ( bExp == 0x7FFF ) {
3672 1.1.2.2 nathanw if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
3673 1.1.2.2 nathanw return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) );
3674 1.1.2.2 nathanw }
3675 1.1.2.2 nathanw if ( aExp == 0 ) ++expDiff;
3676 1.1.2.2 nathanw shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
3677 1.1.2.2 nathanw bBigger:
3678 1.1.2.2 nathanw sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
3679 1.1.2.2 nathanw zExp = bExp;
3680 1.1.2.2 nathanw zSign ^= 1;
3681 1.1.2.2 nathanw goto normalizeRoundAndPack;
3682 1.1.2.2 nathanw aExpBigger:
3683 1.1.2.2 nathanw if ( aExp == 0x7FFF ) {
3684 1.1.2.2 nathanw if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
3685 1.1.2.2 nathanw return a;
3686 1.1.2.2 nathanw }
3687 1.1.2.2 nathanw if ( bExp == 0 ) --expDiff;
3688 1.1.2.2 nathanw shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
3689 1.1.2.2 nathanw aBigger:
3690 1.1.2.2 nathanw sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
3691 1.1.2.2 nathanw zExp = aExp;
3692 1.1.2.2 nathanw normalizeRoundAndPack:
3693 1.1.2.2 nathanw return
3694 1.1.2.2 nathanw normalizeRoundAndPackFloatx80(
3695 1.1.2.2 nathanw floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
3696 1.1.2.2 nathanw
3697 1.1.2.2 nathanw }
3698 1.1.2.2 nathanw
3699 1.1.2.2 nathanw /*
3700 1.1.2.2 nathanw -------------------------------------------------------------------------------
3701 1.1.2.2 nathanw Returns the result of adding the extended double-precision floating-point
3702 1.1.2.2 nathanw values `a' and `b'. The operation is performed according to the IEC/IEEE
3703 1.1.2.2 nathanw Standard for Binary Floating-Point Arithmetic.
3704 1.1.2.2 nathanw -------------------------------------------------------------------------------
3705 1.1.2.2 nathanw */
3706 1.1.2.2 nathanw floatx80 floatx80_add( floatx80 a, floatx80 b )
3707 1.1.2.2 nathanw {
3708 1.1.2.2 nathanw flag aSign, bSign;
3709 1.1.2.2 nathanw
3710 1.1.2.2 nathanw aSign = extractFloatx80Sign( a );
3711 1.1.2.2 nathanw bSign = extractFloatx80Sign( b );
3712 1.1.2.2 nathanw if ( aSign == bSign ) {
3713 1.1.2.2 nathanw return addFloatx80Sigs( a, b, aSign );
3714 1.1.2.2 nathanw }
3715 1.1.2.2 nathanw else {
3716 1.1.2.2 nathanw return subFloatx80Sigs( a, b, aSign );
3717 1.1.2.2 nathanw }
3718 1.1.2.2 nathanw
3719 1.1.2.2 nathanw }
3720 1.1.2.2 nathanw
3721 1.1.2.2 nathanw /*
3722 1.1.2.2 nathanw -------------------------------------------------------------------------------
3723 1.1.2.2 nathanw Returns the result of subtracting the extended double-precision floating-
3724 1.1.2.2 nathanw point values `a' and `b'. The operation is performed according to the
3725 1.1.2.2 nathanw IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3726 1.1.2.2 nathanw -------------------------------------------------------------------------------
3727 1.1.2.2 nathanw */
3728 1.1.2.2 nathanw floatx80 floatx80_sub( floatx80 a, floatx80 b )
3729 1.1.2.2 nathanw {
3730 1.1.2.2 nathanw flag aSign, bSign;
3731 1.1.2.2 nathanw
3732 1.1.2.2 nathanw aSign = extractFloatx80Sign( a );
3733 1.1.2.2 nathanw bSign = extractFloatx80Sign( b );
3734 1.1.2.2 nathanw if ( aSign == bSign ) {
3735 1.1.2.2 nathanw return subFloatx80Sigs( a, b, aSign );
3736 1.1.2.2 nathanw }
3737 1.1.2.2 nathanw else {
3738 1.1.2.2 nathanw return addFloatx80Sigs( a, b, aSign );
3739 1.1.2.2 nathanw }
3740 1.1.2.2 nathanw
3741 1.1.2.2 nathanw }
3742 1.1.2.2 nathanw
3743 1.1.2.2 nathanw /*
3744 1.1.2.2 nathanw -------------------------------------------------------------------------------
3745 1.1.2.2 nathanw Returns the result of multiplying the extended double-precision floating-
3746 1.1.2.2 nathanw point values `a' and `b'. The operation is performed according to the
3747 1.1.2.2 nathanw IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3748 1.1.2.2 nathanw -------------------------------------------------------------------------------
3749 1.1.2.2 nathanw */
3750 1.1.2.2 nathanw floatx80 floatx80_mul( floatx80 a, floatx80 b )
3751 1.1.2.2 nathanw {
3752 1.1.2.2 nathanw flag aSign, bSign, zSign;
3753 1.1.2.2 nathanw int32 aExp, bExp, zExp;
3754 1.1.2.2 nathanw bits64 aSig, bSig, zSig0, zSig1;
3755 1.1.2.2 nathanw floatx80 z;
3756 1.1.2.2 nathanw
3757 1.1.2.2 nathanw aSig = extractFloatx80Frac( a );
3758 1.1.2.2 nathanw aExp = extractFloatx80Exp( a );
3759 1.1.2.2 nathanw aSign = extractFloatx80Sign( a );
3760 1.1.2.2 nathanw bSig = extractFloatx80Frac( b );
3761 1.1.2.2 nathanw bExp = extractFloatx80Exp( b );
3762 1.1.2.2 nathanw bSign = extractFloatx80Sign( b );
3763 1.1.2.2 nathanw zSign = aSign ^ bSign;
3764 1.1.2.2 nathanw if ( aExp == 0x7FFF ) {
3765 1.1.2.2 nathanw if ( (bits64) ( aSig<<1 )
3766 1.1.2.2 nathanw || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
3767 1.1.2.2 nathanw return propagateFloatx80NaN( a, b );
3768 1.1.2.2 nathanw }
3769 1.1.2.2 nathanw if ( ( bExp | bSig ) == 0 ) goto invalid;
3770 1.1.2.2 nathanw return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3771 1.1.2.2 nathanw }
3772 1.1.2.2 nathanw if ( bExp == 0x7FFF ) {
3773 1.1.2.2 nathanw if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
3774 1.1.2.2 nathanw if ( ( aExp | aSig ) == 0 ) {
3775 1.1.2.2 nathanw invalid:
3776 1.1.2.2 nathanw float_raise( float_flag_invalid );
3777 1.1.2.2 nathanw z.low = floatx80_default_nan_low;
3778 1.1.2.2 nathanw z.high = floatx80_default_nan_high;
3779 1.1.2.2 nathanw return z;
3780 1.1.2.2 nathanw }
3781 1.1.2.2 nathanw return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3782 1.1.2.2 nathanw }
3783 1.1.2.2 nathanw if ( aExp == 0 ) {
3784 1.1.2.2 nathanw if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
3785 1.1.2.2 nathanw normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
3786 1.1.2.2 nathanw }
3787 1.1.2.2 nathanw if ( bExp == 0 ) {
3788 1.1.2.2 nathanw if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
3789 1.1.2.2 nathanw normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
3790 1.1.2.2 nathanw }
3791 1.1.2.2 nathanw zExp = aExp + bExp - 0x3FFE;
3792 1.1.2.2 nathanw mul64To128( aSig, bSig, &zSig0, &zSig1 );
3793 1.1.2.2 nathanw if ( 0 < (sbits64) zSig0 ) {
3794 1.1.2.2 nathanw shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
3795 1.1.2.2 nathanw --zExp;
3796 1.1.2.2 nathanw }
3797 1.1.2.2 nathanw return
3798 1.1.2.2 nathanw roundAndPackFloatx80(
3799 1.1.2.2 nathanw floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
3800 1.1.2.2 nathanw
3801 1.1.2.2 nathanw }
3802 1.1.2.2 nathanw
3803 1.1.2.2 nathanw /*
3804 1.1.2.2 nathanw -------------------------------------------------------------------------------
3805 1.1.2.2 nathanw Returns the result of dividing the extended double-precision floating-point
3806 1.1.2.2 nathanw value `a' by the corresponding value `b'. The operation is performed
3807 1.1.2.2 nathanw according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3808 1.1.2.2 nathanw -------------------------------------------------------------------------------
3809 1.1.2.2 nathanw */
3810 1.1.2.2 nathanw floatx80 floatx80_div( floatx80 a, floatx80 b )
3811 1.1.2.2 nathanw {
3812 1.1.2.2 nathanw flag aSign, bSign, zSign;
3813 1.1.2.2 nathanw int32 aExp, bExp, zExp;
3814 1.1.2.2 nathanw bits64 aSig, bSig, zSig0, zSig1;
3815 1.1.2.2 nathanw bits64 rem0, rem1, rem2, term0, term1, term2;
3816 1.1.2.2 nathanw floatx80 z;
3817 1.1.2.2 nathanw
3818 1.1.2.2 nathanw aSig = extractFloatx80Frac( a );
3819 1.1.2.2 nathanw aExp = extractFloatx80Exp( a );
3820 1.1.2.2 nathanw aSign = extractFloatx80Sign( a );
3821 1.1.2.2 nathanw bSig = extractFloatx80Frac( b );
3822 1.1.2.2 nathanw bExp = extractFloatx80Exp( b );
3823 1.1.2.2 nathanw bSign = extractFloatx80Sign( b );
3824 1.1.2.2 nathanw zSign = aSign ^ bSign;
3825 1.1.2.2 nathanw if ( aExp == 0x7FFF ) {
3826 1.1.2.2 nathanw if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
3827 1.1.2.2 nathanw if ( bExp == 0x7FFF ) {
3828 1.1.2.2 nathanw if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
3829 1.1.2.2 nathanw goto invalid;
3830 1.1.2.2 nathanw }
3831 1.1.2.2 nathanw return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3832 1.1.2.2 nathanw }
3833 1.1.2.2 nathanw if ( bExp == 0x7FFF ) {
3834 1.1.2.2 nathanw if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
3835 1.1.2.2 nathanw return packFloatx80( zSign, 0, 0 );
3836 1.1.2.2 nathanw }
3837 1.1.2.2 nathanw if ( bExp == 0 ) {
3838 1.1.2.2 nathanw if ( bSig == 0 ) {
3839 1.1.2.2 nathanw if ( ( aExp | aSig ) == 0 ) {
3840 1.1.2.2 nathanw invalid:
3841 1.1.2.2 nathanw float_raise( float_flag_invalid );
3842 1.1.2.2 nathanw z.low = floatx80_default_nan_low;
3843 1.1.2.2 nathanw z.high = floatx80_default_nan_high;
3844 1.1.2.2 nathanw return z;
3845 1.1.2.2 nathanw }
3846 1.1.2.2 nathanw float_raise( float_flag_divbyzero );
3847 1.1.2.2 nathanw return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3848 1.1.2.2 nathanw }
3849 1.1.2.2 nathanw normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
3850 1.1.2.2 nathanw }
3851 1.1.2.2 nathanw if ( aExp == 0 ) {
3852 1.1.2.2 nathanw if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
3853 1.1.2.2 nathanw normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
3854 1.1.2.2 nathanw }
3855 1.1.2.2 nathanw zExp = aExp - bExp + 0x3FFE;
3856 1.1.2.2 nathanw rem1 = 0;
3857 1.1.2.2 nathanw if ( bSig <= aSig ) {
3858 1.1.2.2 nathanw shift128Right( aSig, 0, 1, &aSig, &rem1 );
3859 1.1.2.2 nathanw ++zExp;
3860 1.1.2.2 nathanw }
3861 1.1.2.2 nathanw zSig0 = estimateDiv128To64( aSig, rem1, bSig );
3862 1.1.2.2 nathanw mul64To128( bSig, zSig0, &term0, &term1 );
3863 1.1.2.2 nathanw sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
3864 1.1.2.2 nathanw while ( (sbits64) rem0 < 0 ) {
3865 1.1.2.2 nathanw --zSig0;
3866 1.1.2.2 nathanw add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
3867 1.1.2.2 nathanw }
3868 1.1.2.2 nathanw zSig1 = estimateDiv128To64( rem1, 0, bSig );
3869 1.1.2.2 nathanw if ( (bits64) ( zSig1<<1 ) <= 8 ) {
3870 1.1.2.2 nathanw mul64To128( bSig, zSig1, &term1, &term2 );
3871 1.1.2.2 nathanw sub128( rem1, 0, term1, term2, &rem1, &rem2 );
3872 1.1.2.2 nathanw while ( (sbits64) rem1 < 0 ) {
3873 1.1.2.2 nathanw --zSig1;
3874 1.1.2.2 nathanw add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
3875 1.1.2.2 nathanw }
3876 1.1.2.2 nathanw zSig1 |= ( ( rem1 | rem2 ) != 0 );
3877 1.1.2.2 nathanw }
3878 1.1.2.2 nathanw return
3879 1.1.2.2 nathanw roundAndPackFloatx80(
3880 1.1.2.2 nathanw floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
3881 1.1.2.2 nathanw
3882 1.1.2.2 nathanw }
3883 1.1.2.2 nathanw
3884 1.1.2.2 nathanw /*
3885 1.1.2.2 nathanw -------------------------------------------------------------------------------
3886 1.1.2.2 nathanw Returns the remainder of the extended double-precision floating-point value
3887 1.1.2.2 nathanw `a' with respect to the corresponding value `b'. The operation is performed
3888 1.1.2.2 nathanw according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3889 1.1.2.2 nathanw -------------------------------------------------------------------------------
3890 1.1.2.2 nathanw */
3891 1.1.2.2 nathanw floatx80 floatx80_rem( floatx80 a, floatx80 b )
3892 1.1.2.2 nathanw {
3893 1.1.2.2 nathanw flag aSign, bSign, zSign;
3894 1.1.2.2 nathanw int32 aExp, bExp, expDiff;
3895 1.1.2.2 nathanw bits64 aSig0, aSig1, bSig;
3896 1.1.2.2 nathanw bits64 q, term0, term1, alternateASig0, alternateASig1;
3897 1.1.2.2 nathanw floatx80 z;
3898 1.1.2.2 nathanw
3899 1.1.2.2 nathanw aSig0 = extractFloatx80Frac( a );
3900 1.1.2.2 nathanw aExp = extractFloatx80Exp( a );
3901 1.1.2.2 nathanw aSign = extractFloatx80Sign( a );
3902 1.1.2.2 nathanw bSig = extractFloatx80Frac( b );
3903 1.1.2.2 nathanw bExp = extractFloatx80Exp( b );
3904 1.1.2.2 nathanw bSign = extractFloatx80Sign( b );
3905 1.1.2.2 nathanw if ( aExp == 0x7FFF ) {
3906 1.1.2.2 nathanw if ( (bits64) ( aSig0<<1 )
3907 1.1.2.2 nathanw || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
3908 1.1.2.2 nathanw return propagateFloatx80NaN( a, b );
3909 1.1.2.2 nathanw }
3910 1.1.2.2 nathanw goto invalid;
3911 1.1.2.2 nathanw }
3912 1.1.2.2 nathanw if ( bExp == 0x7FFF ) {
3913 1.1.2.2 nathanw if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
3914 1.1.2.2 nathanw return a;
3915 1.1.2.2 nathanw }
3916 1.1.2.2 nathanw if ( bExp == 0 ) {
3917 1.1.2.2 nathanw if ( bSig == 0 ) {
3918 1.1.2.2 nathanw invalid:
3919 1.1.2.2 nathanw float_raise( float_flag_invalid );
3920 1.1.2.2 nathanw z.low = floatx80_default_nan_low;
3921 1.1.2.2 nathanw z.high = floatx80_default_nan_high;
3922 1.1.2.2 nathanw return z;
3923 1.1.2.2 nathanw }
3924 1.1.2.2 nathanw normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
3925 1.1.2.2 nathanw }
3926 1.1.2.2 nathanw if ( aExp == 0 ) {
3927 1.1.2.2 nathanw if ( (bits64) ( aSig0<<1 ) == 0 ) return a;
3928 1.1.2.2 nathanw normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
3929 1.1.2.2 nathanw }
3930 1.1.2.2 nathanw bSig |= LIT64( 0x8000000000000000 );
3931 1.1.2.2 nathanw zSign = aSign;
3932 1.1.2.2 nathanw expDiff = aExp - bExp;
3933 1.1.2.2 nathanw aSig1 = 0;
3934 1.1.2.2 nathanw if ( expDiff < 0 ) {
3935 1.1.2.2 nathanw if ( expDiff < -1 ) return a;
3936 1.1.2.2 nathanw shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
3937 1.1.2.2 nathanw expDiff = 0;
3938 1.1.2.2 nathanw }
3939 1.1.2.2 nathanw q = ( bSig <= aSig0 );
3940 1.1.2.2 nathanw if ( q ) aSig0 -= bSig;
3941 1.1.2.2 nathanw expDiff -= 64;
3942 1.1.2.2 nathanw while ( 0 < expDiff ) {
3943 1.1.2.2 nathanw q = estimateDiv128To64( aSig0, aSig1, bSig );
3944 1.1.2.2 nathanw q = ( 2 < q ) ? q - 2 : 0;
3945 1.1.2.2 nathanw mul64To128( bSig, q, &term0, &term1 );
3946 1.1.2.2 nathanw sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
3947 1.1.2.2 nathanw shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
3948 1.1.2.2 nathanw expDiff -= 62;
3949 1.1.2.2 nathanw }
3950 1.1.2.2 nathanw expDiff += 64;
3951 1.1.2.2 nathanw if ( 0 < expDiff ) {
3952 1.1.2.2 nathanw q = estimateDiv128To64( aSig0, aSig1, bSig );
3953 1.1.2.2 nathanw q = ( 2 < q ) ? q - 2 : 0;
3954 1.1.2.2 nathanw q >>= 64 - expDiff;
3955 1.1.2.2 nathanw mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
3956 1.1.2.2 nathanw sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
3957 1.1.2.2 nathanw shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
3958 1.1.2.2 nathanw while ( le128( term0, term1, aSig0, aSig1 ) ) {
3959 1.1.2.2 nathanw ++q;
3960 1.1.2.2 nathanw sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
3961 1.1.2.2 nathanw }
3962 1.1.2.2 nathanw }
3963 1.1.2.2 nathanw else {
3964 1.1.2.2 nathanw term1 = 0;
3965 1.1.2.2 nathanw term0 = bSig;
3966 1.1.2.2 nathanw }
3967 1.1.2.2 nathanw sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
3968 1.1.2.2 nathanw if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
3969 1.1.2.2 nathanw || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
3970 1.1.2.2 nathanw && ( q & 1 ) )
3971 1.1.2.2 nathanw ) {
3972 1.1.2.2 nathanw aSig0 = alternateASig0;
3973 1.1.2.2 nathanw aSig1 = alternateASig1;
3974 1.1.2.2 nathanw zSign = ! zSign;
3975 1.1.2.2 nathanw }
3976 1.1.2.2 nathanw return
3977 1.1.2.2 nathanw normalizeRoundAndPackFloatx80(
3978 1.1.2.2 nathanw 80, zSign, bExp + expDiff, aSig0, aSig1 );
3979 1.1.2.2 nathanw
3980 1.1.2.2 nathanw }
3981 1.1.2.2 nathanw
3982 1.1.2.2 nathanw /*
3983 1.1.2.2 nathanw -------------------------------------------------------------------------------
3984 1.1.2.2 nathanw Returns the square root of the extended double-precision floating-point
3985 1.1.2.2 nathanw value `a'. The operation is performed according to the IEC/IEEE Standard
3986 1.1.2.2 nathanw for Binary Floating-Point Arithmetic.
3987 1.1.2.2 nathanw -------------------------------------------------------------------------------
3988 1.1.2.2 nathanw */
3989 1.1.2.2 nathanw floatx80 floatx80_sqrt( floatx80 a )
3990 1.1.2.2 nathanw {
3991 1.1.2.2 nathanw flag aSign;
3992 1.1.2.2 nathanw int32 aExp, zExp;
3993 1.1.2.2 nathanw bits64 aSig0, aSig1, zSig0, zSig1, doubleZSig0;
3994 1.1.2.2 nathanw bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
3995 1.1.2.2 nathanw floatx80 z;
3996 1.1.2.2 nathanw
3997 1.1.2.2 nathanw aSig0 = extractFloatx80Frac( a );
3998 1.1.2.2 nathanw aExp = extractFloatx80Exp( a );
3999 1.1.2.2 nathanw aSign = extractFloatx80Sign( a );
4000 1.1.2.2 nathanw if ( aExp == 0x7FFF ) {
4001 1.1.2.2 nathanw if ( (bits64) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a );
4002 1.1.2.2 nathanw if ( ! aSign ) return a;
4003 1.1.2.2 nathanw goto invalid;
4004 1.1.2.2 nathanw }
4005 1.1.2.2 nathanw if ( aSign ) {
4006 1.1.2.2 nathanw if ( ( aExp | aSig0 ) == 0 ) return a;
4007 1.1.2.2 nathanw invalid:
4008 1.1.2.2 nathanw float_raise( float_flag_invalid );
4009 1.1.2.2 nathanw z.low = floatx80_default_nan_low;
4010 1.1.2.2 nathanw z.high = floatx80_default_nan_high;
4011 1.1.2.2 nathanw return z;
4012 1.1.2.2 nathanw }
4013 1.1.2.2 nathanw if ( aExp == 0 ) {
4014 1.1.2.2 nathanw if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
4015 1.1.2.2 nathanw normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
4016 1.1.2.2 nathanw }
4017 1.1.2.2 nathanw zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
4018 1.1.2.2 nathanw zSig0 = estimateSqrt32( aExp, aSig0>>32 );
4019 1.1.2.2 nathanw shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
4020 1.1.2.2 nathanw zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
4021 1.1.2.2 nathanw doubleZSig0 = zSig0<<1;
4022 1.1.2.2 nathanw mul64To128( zSig0, zSig0, &term0, &term1 );
4023 1.1.2.2 nathanw sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
4024 1.1.2.2 nathanw while ( (sbits64) rem0 < 0 ) {
4025 1.1.2.2 nathanw --zSig0;
4026 1.1.2.2 nathanw doubleZSig0 -= 2;
4027 1.1.2.2 nathanw add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
4028 1.1.2.2 nathanw }
4029 1.1.2.2 nathanw zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
4030 1.1.2.2 nathanw if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) {
4031 1.1.2.2 nathanw if ( zSig1 == 0 ) zSig1 = 1;
4032 1.1.2.2 nathanw mul64To128( doubleZSig0, zSig1, &term1, &term2 );
4033 1.1.2.2 nathanw sub128( rem1, 0, term1, term2, &rem1, &rem2 );
4034 1.1.2.2 nathanw mul64To128( zSig1, zSig1, &term2, &term3 );
4035 1.1.2.2 nathanw sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
4036 1.1.2.2 nathanw while ( (sbits64) rem1 < 0 ) {
4037 1.1.2.2 nathanw --zSig1;
4038 1.1.2.2 nathanw shortShift128Left( 0, zSig1, 1, &term2, &term3 );
4039 1.1.2.2 nathanw term3 |= 1;
4040 1.1.2.2 nathanw term2 |= doubleZSig0;
4041 1.1.2.2 nathanw add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
4042 1.1.2.2 nathanw }
4043 1.1.2.2 nathanw zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
4044 1.1.2.2 nathanw }
4045 1.1.2.2 nathanw shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
4046 1.1.2.2 nathanw zSig0 |= doubleZSig0;
4047 1.1.2.2 nathanw return
4048 1.1.2.2 nathanw roundAndPackFloatx80(
4049 1.1.2.2 nathanw floatx80_rounding_precision, 0, zExp, zSig0, zSig1 );
4050 1.1.2.2 nathanw
4051 1.1.2.2 nathanw }
4052 1.1.2.2 nathanw
4053 1.1.2.2 nathanw /*
4054 1.1.2.2 nathanw -------------------------------------------------------------------------------
4055 1.1.2.2 nathanw Returns 1 if the extended double-precision floating-point value `a' is
4056 1.1.2.2 nathanw equal to the corresponding value `b', and 0 otherwise. The comparison is
4057 1.1.2.2 nathanw performed according to the IEC/IEEE Standard for Binary Floating-Point
4058 1.1.2.2 nathanw Arithmetic.
4059 1.1.2.2 nathanw -------------------------------------------------------------------------------
4060 1.1.2.2 nathanw */
4061 1.1.2.2 nathanw flag floatx80_eq( floatx80 a, floatx80 b )
4062 1.1.2.2 nathanw {
4063 1.1.2.2 nathanw
4064 1.1.2.2 nathanw if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
4065 1.1.2.2 nathanw && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4066 1.1.2.2 nathanw || ( ( extractFloatx80Exp( b ) == 0x7FFF )
4067 1.1.2.2 nathanw && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4068 1.1.2.2 nathanw ) {
4069 1.1.2.2 nathanw if ( floatx80_is_signaling_nan( a )
4070 1.1.2.2 nathanw || floatx80_is_signaling_nan( b ) ) {
4071 1.1.2.2 nathanw float_raise( float_flag_invalid );
4072 1.1.2.2 nathanw }
4073 1.1.2.2 nathanw return 0;
4074 1.1.2.2 nathanw }
4075 1.1.2.2 nathanw return
4076 1.1.2.2 nathanw ( a.low == b.low )
4077 1.1.2.2 nathanw && ( ( a.high == b.high )
4078 1.1.2.2 nathanw || ( ( a.low == 0 )
4079 1.1.2.2 nathanw && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
4080 1.1.2.2 nathanw );
4081 1.1.2.2 nathanw
4082 1.1.2.2 nathanw }
4083 1.1.2.2 nathanw
4084 1.1.2.2 nathanw /*
4085 1.1.2.2 nathanw -------------------------------------------------------------------------------
4086 1.1.2.2 nathanw Returns 1 if the extended double-precision floating-point value `a' is
4087 1.1.2.2 nathanw less than or equal to the corresponding value `b', and 0 otherwise. The
4088 1.1.2.2 nathanw comparison is performed according to the IEC/IEEE Standard for Binary
4089 1.1.2.2 nathanw Floating-Point Arithmetic.
4090 1.1.2.2 nathanw -------------------------------------------------------------------------------
4091 1.1.2.2 nathanw */
4092 1.1.2.2 nathanw flag floatx80_le( floatx80 a, floatx80 b )
4093 1.1.2.2 nathanw {
4094 1.1.2.2 nathanw flag aSign, bSign;
4095 1.1.2.2 nathanw
4096 1.1.2.2 nathanw if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
4097 1.1.2.2 nathanw && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4098 1.1.2.2 nathanw || ( ( extractFloatx80Exp( b ) == 0x7FFF )
4099 1.1.2.2 nathanw && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4100 1.1.2.2 nathanw ) {
4101 1.1.2.2 nathanw float_raise( float_flag_invalid );
4102 1.1.2.2 nathanw return 0;
4103 1.1.2.2 nathanw }
4104 1.1.2.2 nathanw aSign = extractFloatx80Sign( a );
4105 1.1.2.2 nathanw bSign = extractFloatx80Sign( b );
4106 1.1.2.2 nathanw if ( aSign != bSign ) {
4107 1.1.2.2 nathanw return
4108 1.1.2.2 nathanw aSign
4109 1.1.2.2 nathanw || ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
4110 1.1.2.2 nathanw == 0 );
4111 1.1.2.2 nathanw }
4112 1.1.2.2 nathanw return
4113 1.1.2.2 nathanw aSign ? le128( b.high, b.low, a.high, a.low )
4114 1.1.2.2 nathanw : le128( a.high, a.low, b.high, b.low );
4115 1.1.2.2 nathanw
4116 1.1.2.2 nathanw }
4117 1.1.2.2 nathanw
4118 1.1.2.2 nathanw /*
4119 1.1.2.2 nathanw -------------------------------------------------------------------------------
4120 1.1.2.2 nathanw Returns 1 if the extended double-precision floating-point value `a' is
4121 1.1.2.2 nathanw less than the corresponding value `b', and 0 otherwise. The comparison
4122 1.1.2.2 nathanw is performed according to the IEC/IEEE Standard for Binary Floating-Point
4123 1.1.2.2 nathanw Arithmetic.
4124 1.1.2.2 nathanw -------------------------------------------------------------------------------
4125 1.1.2.2 nathanw */
4126 1.1.2.2 nathanw flag floatx80_lt( floatx80 a, floatx80 b )
4127 1.1.2.2 nathanw {
4128 1.1.2.2 nathanw flag aSign, bSign;
4129 1.1.2.2 nathanw
4130 1.1.2.2 nathanw if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
4131 1.1.2.2 nathanw && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4132 1.1.2.2 nathanw || ( ( extractFloatx80Exp( b ) == 0x7FFF )
4133 1.1.2.2 nathanw && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4134 1.1.2.2 nathanw ) {
4135 1.1.2.2 nathanw float_raise( float_flag_invalid );
4136 1.1.2.2 nathanw return 0;
4137 1.1.2.2 nathanw }
4138 1.1.2.2 nathanw aSign = extractFloatx80Sign( a );
4139 1.1.2.2 nathanw bSign = extractFloatx80Sign( b );
4140 1.1.2.2 nathanw if ( aSign != bSign ) {
4141 1.1.2.2 nathanw return
4142 1.1.2.2 nathanw aSign
4143 1.1.2.2 nathanw && ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
4144 1.1.2.2 nathanw != 0 );
4145 1.1.2.2 nathanw }
4146 1.1.2.2 nathanw return
4147 1.1.2.2 nathanw aSign ? lt128( b.high, b.low, a.high, a.low )
4148 1.1.2.2 nathanw : lt128( a.high, a.low, b.high, b.low );
4149 1.1.2.2 nathanw
4150 1.1.2.2 nathanw }
4151 1.1.2.2 nathanw
4152 1.1.2.2 nathanw /*
4153 1.1.2.2 nathanw -------------------------------------------------------------------------------
4154 1.1.2.2 nathanw Returns 1 if the extended double-precision floating-point value `a' is equal
4155 1.1.2.2 nathanw to the corresponding value `b', and 0 otherwise. The invalid exception is
4156 1.1.2.2 nathanw raised if either operand is a NaN. Otherwise, the comparison is performed
4157 1.1.2.2 nathanw according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4158 1.1.2.2 nathanw -------------------------------------------------------------------------------
4159 1.1.2.2 nathanw */
4160 1.1.2.2 nathanw flag floatx80_eq_signaling( floatx80 a, floatx80 b )
4161 1.1.2.2 nathanw {
4162 1.1.2.2 nathanw
4163 1.1.2.2 nathanw if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
4164 1.1.2.2 nathanw && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4165 1.1.2.2 nathanw || ( ( extractFloatx80Exp( b ) == 0x7FFF )
4166 1.1.2.2 nathanw && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4167 1.1.2.2 nathanw ) {
4168 1.1.2.2 nathanw float_raise( float_flag_invalid );
4169 1.1.2.2 nathanw return 0;
4170 1.1.2.2 nathanw }
4171 1.1.2.2 nathanw return
4172 1.1.2.2 nathanw ( a.low == b.low )
4173 1.1.2.2 nathanw && ( ( a.high == b.high )
4174 1.1.2.2 nathanw || ( ( a.low == 0 )
4175 1.1.2.2 nathanw && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
4176 1.1.2.2 nathanw );
4177 1.1.2.2 nathanw
4178 1.1.2.2 nathanw }
4179 1.1.2.2 nathanw
4180 1.1.2.2 nathanw /*
4181 1.1.2.2 nathanw -------------------------------------------------------------------------------
4182 1.1.2.2 nathanw Returns 1 if the extended double-precision floating-point value `a' is less
4183 1.1.2.2 nathanw than or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs
4184 1.1.2.2 nathanw do not cause an exception. Otherwise, the comparison is performed according
4185 1.1.2.2 nathanw to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4186 1.1.2.2 nathanw -------------------------------------------------------------------------------
4187 1.1.2.2 nathanw */
4188 1.1.2.2 nathanw flag floatx80_le_quiet( floatx80 a, floatx80 b )
4189 1.1.2.2 nathanw {
4190 1.1.2.2 nathanw flag aSign, bSign;
4191 1.1.2.2 nathanw
4192 1.1.2.2 nathanw if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
4193 1.1.2.2 nathanw && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4194 1.1.2.2 nathanw || ( ( extractFloatx80Exp( b ) == 0x7FFF )
4195 1.1.2.2 nathanw && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4196 1.1.2.2 nathanw ) {
4197 1.1.2.2 nathanw if ( floatx80_is_signaling_nan( a )
4198 1.1.2.2 nathanw || floatx80_is_signaling_nan( b ) ) {
4199 1.1.2.2 nathanw float_raise( float_flag_invalid );
4200 1.1.2.2 nathanw }
4201 1.1.2.2 nathanw return 0;
4202 1.1.2.2 nathanw }
4203 1.1.2.2 nathanw aSign = extractFloatx80Sign( a );
4204 1.1.2.2 nathanw bSign = extractFloatx80Sign( b );
4205 1.1.2.2 nathanw if ( aSign != bSign ) {
4206 1.1.2.2 nathanw return
4207 1.1.2.2 nathanw aSign
4208 1.1.2.2 nathanw || ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
4209 1.1.2.2 nathanw == 0 );
4210 1.1.2.2 nathanw }
4211 1.1.2.2 nathanw return
4212 1.1.2.2 nathanw aSign ? le128( b.high, b.low, a.high, a.low )
4213 1.1.2.2 nathanw : le128( a.high, a.low, b.high, b.low );
4214 1.1.2.2 nathanw
4215 1.1.2.2 nathanw }
4216 1.1.2.2 nathanw
4217 1.1.2.2 nathanw /*
4218 1.1.2.2 nathanw -------------------------------------------------------------------------------
4219 1.1.2.2 nathanw Returns 1 if the extended double-precision floating-point value `a' is less
4220 1.1.2.2 nathanw than the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause
4221 1.1.2.2 nathanw an exception. Otherwise, the comparison is performed according to the
4222 1.1.2.2 nathanw IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4223 1.1.2.2 nathanw -------------------------------------------------------------------------------
4224 1.1.2.2 nathanw */
4225 1.1.2.2 nathanw flag floatx80_lt_quiet( floatx80 a, floatx80 b )
4226 1.1.2.2 nathanw {
4227 1.1.2.2 nathanw flag aSign, bSign;
4228 1.1.2.2 nathanw
4229 1.1.2.2 nathanw if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
4230 1.1.2.2 nathanw && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4231 1.1.2.2 nathanw || ( ( extractFloatx80Exp( b ) == 0x7FFF )
4232 1.1.2.2 nathanw && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4233 1.1.2.2 nathanw ) {
4234 1.1.2.2 nathanw if ( floatx80_is_signaling_nan( a )
4235 1.1.2.2 nathanw || floatx80_is_signaling_nan( b ) ) {
4236 1.1.2.2 nathanw float_raise( float_flag_invalid );
4237 1.1.2.2 nathanw }
4238 1.1.2.2 nathanw return 0;
4239 1.1.2.2 nathanw }
4240 1.1.2.2 nathanw aSign = extractFloatx80Sign( a );
4241 1.1.2.2 nathanw bSign = extractFloatx80Sign( b );
4242 1.1.2.2 nathanw if ( aSign != bSign ) {
4243 1.1.2.2 nathanw return
4244 1.1.2.2 nathanw aSign
4245 1.1.2.2 nathanw && ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
4246 1.1.2.2 nathanw != 0 );
4247 1.1.2.2 nathanw }
4248 1.1.2.2 nathanw return
4249 1.1.2.2 nathanw aSign ? lt128( b.high, b.low, a.high, a.low )
4250 1.1.2.2 nathanw : lt128( a.high, a.low, b.high, b.low );
4251 1.1.2.2 nathanw
4252 1.1.2.2 nathanw }
4253 1.1.2.2 nathanw
4254 1.1.2.2 nathanw #endif
4255 1.1.2.2 nathanw
4256 1.1.2.2 nathanw #ifdef FLOAT128
4257 1.1.2.2 nathanw
4258 1.1.2.2 nathanw /*
4259 1.1.2.2 nathanw -------------------------------------------------------------------------------
4260 1.1.2.2 nathanw Returns the result of converting the quadruple-precision floating-point
4261 1.1.2.2 nathanw value `a' to the 32-bit two's complement integer format. The conversion
4262 1.1.2.2 nathanw is performed according to the IEC/IEEE Standard for Binary Floating-Point
4263 1.1.2.2 nathanw Arithmetic---which means in particular that the conversion is rounded
4264 1.1.2.2 nathanw according to the current rounding mode. If `a' is a NaN, the largest
4265 1.1.2.2 nathanw positive integer is returned. Otherwise, if the conversion overflows, the
4266 1.1.2.2 nathanw largest integer with the same sign as `a' is returned.
4267 1.1.2.2 nathanw -------------------------------------------------------------------------------
4268 1.1.2.2 nathanw */
4269 1.1.2.2 nathanw int32 float128_to_int32( float128 a )
4270 1.1.2.2 nathanw {
4271 1.1.2.2 nathanw flag aSign;
4272 1.1.2.2 nathanw int32 aExp, shiftCount;
4273 1.1.2.2 nathanw bits64 aSig0, aSig1;
4274 1.1.2.2 nathanw
4275 1.1.2.2 nathanw aSig1 = extractFloat128Frac1( a );
4276 1.1.2.2 nathanw aSig0 = extractFloat128Frac0( a );
4277 1.1.2.2 nathanw aExp = extractFloat128Exp( a );
4278 1.1.2.2 nathanw aSign = extractFloat128Sign( a );
4279 1.1.2.2 nathanw if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
4280 1.1.2.2 nathanw if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
4281 1.1.2.2 nathanw aSig0 |= ( aSig1 != 0 );
4282 1.1.2.2 nathanw shiftCount = 0x4028 - aExp;
4283 1.1.2.2 nathanw if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
4284 1.1.2.2 nathanw return roundAndPackInt32( aSign, aSig0 );
4285 1.1.2.2 nathanw
4286 1.1.2.2 nathanw }
4287 1.1.2.2 nathanw
4288 1.1.2.2 nathanw /*
4289 1.1.2.2 nathanw -------------------------------------------------------------------------------
4290 1.1.2.2 nathanw Returns the result of converting the quadruple-precision floating-point
4291 1.1.2.2 nathanw value `a' to the 32-bit two's complement integer format. The conversion
4292 1.1.2.2 nathanw is performed according to the IEC/IEEE Standard for Binary Floating-Point
4293 1.1.2.2 nathanw Arithmetic, except that the conversion is always rounded toward zero. If
4294 1.1.2.2 nathanw `a' is a NaN, the largest positive integer is returned. Otherwise, if the
4295 1.1.2.2 nathanw conversion overflows, the largest integer with the same sign as `a' is
4296 1.1.2.2 nathanw returned.
4297 1.1.2.2 nathanw -------------------------------------------------------------------------------
4298 1.1.2.2 nathanw */
4299 1.1.2.2 nathanw int32 float128_to_int32_round_to_zero( float128 a )
4300 1.1.2.2 nathanw {
4301 1.1.2.2 nathanw flag aSign;
4302 1.1.2.2 nathanw int32 aExp, shiftCount;
4303 1.1.2.2 nathanw bits64 aSig0, aSig1, savedASig;
4304 1.1.2.2 nathanw int32 z;
4305 1.1.2.2 nathanw
4306 1.1.2.2 nathanw aSig1 = extractFloat128Frac1( a );
4307 1.1.2.2 nathanw aSig0 = extractFloat128Frac0( a );
4308 1.1.2.2 nathanw aExp = extractFloat128Exp( a );
4309 1.1.2.2 nathanw aSign = extractFloat128Sign( a );
4310 1.1.2.2 nathanw aSig0 |= ( aSig1 != 0 );
4311 1.1.2.2 nathanw if ( 0x401E < aExp ) {
4312 1.1.2.2 nathanw if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
4313 1.1.2.2 nathanw goto invalid;
4314 1.1.2.2 nathanw }
4315 1.1.2.2 nathanw else if ( aExp < 0x3FFF ) {
4316 1.1.2.2 nathanw if ( aExp || aSig0 ) float_set_inexact();
4317 1.1.2.2 nathanw return 0;
4318 1.1.2.2 nathanw }
4319 1.1.2.2 nathanw aSig0 |= LIT64( 0x0001000000000000 );
4320 1.1.2.2 nathanw shiftCount = 0x402F - aExp;
4321 1.1.2.2 nathanw savedASig = aSig0;
4322 1.1.2.2 nathanw aSig0 >>= shiftCount;
4323 1.1.2.2 nathanw z = aSig0;
4324 1.1.2.2 nathanw if ( aSign ) z = - z;
4325 1.1.2.2 nathanw if ( ( z < 0 ) ^ aSign ) {
4326 1.1.2.2 nathanw invalid:
4327 1.1.2.2 nathanw float_raise( float_flag_invalid );
4328 1.1.2.2 nathanw return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
4329 1.1.2.2 nathanw }
4330 1.1.2.2 nathanw if ( ( aSig0<<shiftCount ) != savedASig ) {
4331 1.1.2.2 nathanw float_set_inexact();
4332 1.1.2.2 nathanw }
4333 1.1.2.2 nathanw return z;
4334 1.1.2.2 nathanw
4335 1.1.2.2 nathanw }
4336 1.1.2.2 nathanw
4337 1.1.2.2 nathanw /*
4338 1.1.2.2 nathanw -------------------------------------------------------------------------------
4339 1.1.2.2 nathanw Returns the result of converting the quadruple-precision floating-point
4340 1.1.2.2 nathanw value `a' to the 64-bit two's complement integer format. The conversion
4341 1.1.2.2 nathanw is performed according to the IEC/IEEE Standard for Binary Floating-Point
4342 1.1.2.2 nathanw Arithmetic---which means in particular that the conversion is rounded
4343 1.1.2.2 nathanw according to the current rounding mode. If `a' is a NaN, the largest
4344 1.1.2.2 nathanw positive integer is returned. Otherwise, if the conversion overflows, the
4345 1.1.2.2 nathanw largest integer with the same sign as `a' is returned.
4346 1.1.2.2 nathanw -------------------------------------------------------------------------------
4347 1.1.2.2 nathanw */
4348 1.1.2.2 nathanw int64 float128_to_int64( float128 a )
4349 1.1.2.2 nathanw {
4350 1.1.2.2 nathanw flag aSign;
4351 1.1.2.2 nathanw int32 aExp, shiftCount;
4352 1.1.2.2 nathanw bits64 aSig0, aSig1;
4353 1.1.2.2 nathanw
4354 1.1.2.2 nathanw aSig1 = extractFloat128Frac1( a );
4355 1.1.2.2 nathanw aSig0 = extractFloat128Frac0( a );
4356 1.1.2.2 nathanw aExp = extractFloat128Exp( a );
4357 1.1.2.2 nathanw aSign = extractFloat128Sign( a );
4358 1.1.2.2 nathanw if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
4359 1.1.2.2 nathanw shiftCount = 0x402F - aExp;
4360 1.1.2.2 nathanw if ( shiftCount <= 0 ) {
4361 1.1.2.2 nathanw if ( 0x403E < aExp ) {
4362 1.1.2.2 nathanw float_raise( float_flag_invalid );
4363 1.1.2.2 nathanw if ( ! aSign
4364 1.1.2.2 nathanw || ( ( aExp == 0x7FFF )
4365 1.1.2.2 nathanw && ( aSig1 || ( aSig0 != LIT64( 0x0001000000000000 ) ) )
4366 1.1.2.2 nathanw )
4367 1.1.2.2 nathanw ) {
4368 1.1.2.2 nathanw return LIT64( 0x7FFFFFFFFFFFFFFF );
4369 1.1.2.2 nathanw }
4370 1.1.2.2 nathanw return (sbits64) LIT64( 0x8000000000000000 );
4371 1.1.2.2 nathanw }
4372 1.1.2.2 nathanw shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
4373 1.1.2.2 nathanw }
4374 1.1.2.2 nathanw else {
4375 1.1.2.2 nathanw shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
4376 1.1.2.2 nathanw }
4377 1.1.2.2 nathanw return roundAndPackInt64( aSign, aSig0, aSig1 );
4378 1.1.2.2 nathanw
4379 1.1.2.2 nathanw }
4380 1.1.2.2 nathanw
4381 1.1.2.2 nathanw /*
4382 1.1.2.2 nathanw -------------------------------------------------------------------------------
4383 1.1.2.2 nathanw Returns the result of converting the quadruple-precision floating-point
4384 1.1.2.2 nathanw value `a' to the 64-bit two's complement integer format. The conversion
4385 1.1.2.2 nathanw is performed according to the IEC/IEEE Standard for Binary Floating-Point
4386 1.1.2.2 nathanw Arithmetic, except that the conversion is always rounded toward zero.
4387 1.1.2.2 nathanw If `a' is a NaN, the largest positive integer is returned. Otherwise, if
4388 1.1.2.2 nathanw the conversion overflows, the largest integer with the same sign as `a' is
4389 1.1.2.2 nathanw returned.
4390 1.1.2.2 nathanw -------------------------------------------------------------------------------
4391 1.1.2.2 nathanw */
4392 1.1.2.2 nathanw int64 float128_to_int64_round_to_zero( float128 a )
4393 1.1.2.2 nathanw {
4394 1.1.2.2 nathanw flag aSign;
4395 1.1.2.2 nathanw int32 aExp, shiftCount;
4396 1.1.2.2 nathanw bits64 aSig0, aSig1;
4397 1.1.2.2 nathanw int64 z;
4398 1.1.2.2 nathanw
4399 1.1.2.2 nathanw aSig1 = extractFloat128Frac1( a );
4400 1.1.2.2 nathanw aSig0 = extractFloat128Frac0( a );
4401 1.1.2.2 nathanw aExp = extractFloat128Exp( a );
4402 1.1.2.2 nathanw aSign = extractFloat128Sign( a );
4403 1.1.2.2 nathanw if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
4404 1.1.2.2 nathanw shiftCount = aExp - 0x402F;
4405 1.1.2.2 nathanw if ( 0 < shiftCount ) {
4406 1.1.2.2 nathanw if ( 0x403E <= aExp ) {
4407 1.1.2.2 nathanw aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
4408 1.1.2.2 nathanw if ( ( a.high == LIT64( 0xC03E000000000000 ) )
4409 1.1.2.2 nathanw && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
4410 1.1.2.2 nathanw if ( aSig1 ) float_set_inexact();
4411 1.1.2.2 nathanw }
4412 1.1.2.2 nathanw else {
4413 1.1.2.2 nathanw float_raise( float_flag_invalid );
4414 1.1.2.2 nathanw if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
4415 1.1.2.2 nathanw return LIT64( 0x7FFFFFFFFFFFFFFF );
4416 1.1.2.2 nathanw }
4417 1.1.2.2 nathanw }
4418 1.1.2.2 nathanw return (sbits64) LIT64( 0x8000000000000000 );
4419 1.1.2.2 nathanw }
4420 1.1.2.2 nathanw z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
4421 1.1.2.2 nathanw if ( (bits64) ( aSig1<<shiftCount ) ) {
4422 1.1.2.2 nathanw float_set_inexact();
4423 1.1.2.2 nathanw }
4424 1.1.2.2 nathanw }
4425 1.1.2.2 nathanw else {
4426 1.1.2.2 nathanw if ( aExp < 0x3FFF ) {
4427 1.1.2.2 nathanw if ( aExp | aSig0 | aSig1 ) {
4428 1.1.2.2 nathanw float_set_inexact();
4429 1.1.2.2 nathanw }
4430 1.1.2.2 nathanw return 0;
4431 1.1.2.2 nathanw }
4432 1.1.2.2 nathanw z = aSig0>>( - shiftCount );
4433 1.1.2.2 nathanw if ( aSig1
4434 1.1.2.2 nathanw || ( shiftCount && (bits64) ( aSig0<<( shiftCount & 63 ) ) ) ) {
4435 1.1.2.2 nathanw float_set_inexact();
4436 1.1.2.2 nathanw }
4437 1.1.2.2 nathanw }
4438 1.1.2.2 nathanw if ( aSign ) z = - z;
4439 1.1.2.2 nathanw return z;
4440 1.1.2.2 nathanw
4441 1.1.2.2 nathanw }
4442 1.1.2.2 nathanw
4443 1.1.2.2 nathanw /*
4444 1.1.2.2 nathanw -------------------------------------------------------------------------------
4445 1.1.2.2 nathanw Returns the result of converting the quadruple-precision floating-point
4446 1.1.2.2 nathanw value `a' to the single-precision floating-point format. The conversion
4447 1.1.2.2 nathanw is performed according to the IEC/IEEE Standard for Binary Floating-Point
4448 1.1.2.2 nathanw Arithmetic.
4449 1.1.2.2 nathanw -------------------------------------------------------------------------------
4450 1.1.2.2 nathanw */
4451 1.1.2.2 nathanw float32 float128_to_float32( float128 a )
4452 1.1.2.2 nathanw {
4453 1.1.2.2 nathanw flag aSign;
4454 1.1.2.2 nathanw int32 aExp;
4455 1.1.2.2 nathanw bits64 aSig0, aSig1;
4456 1.1.2.2 nathanw bits32 zSig;
4457 1.1.2.2 nathanw
4458 1.1.2.2 nathanw aSig1 = extractFloat128Frac1( a );
4459 1.1.2.2 nathanw aSig0 = extractFloat128Frac0( a );
4460 1.1.2.2 nathanw aExp = extractFloat128Exp( a );
4461 1.1.2.2 nathanw aSign = extractFloat128Sign( a );
4462 1.1.2.2 nathanw if ( aExp == 0x7FFF ) {
4463 1.1.2.2 nathanw if ( aSig0 | aSig1 ) {
4464 1.1.2.2 nathanw return commonNaNToFloat32( float128ToCommonNaN( a ) );
4465 1.1.2.2 nathanw }
4466 1.1.2.2 nathanw return packFloat32( aSign, 0xFF, 0 );
4467 1.1.2.2 nathanw }
4468 1.1.2.2 nathanw aSig0 |= ( aSig1 != 0 );
4469 1.1.2.2 nathanw shift64RightJamming( aSig0, 18, &aSig0 );
4470 1.1.2.2 nathanw zSig = aSig0;
4471 1.1.2.2 nathanw if ( aExp || zSig ) {
4472 1.1.2.2 nathanw zSig |= 0x40000000;
4473 1.1.2.2 nathanw aExp -= 0x3F81;
4474 1.1.2.2 nathanw }
4475 1.1.2.2 nathanw return roundAndPackFloat32( aSign, aExp, zSig );
4476 1.1.2.2 nathanw
4477 1.1.2.2 nathanw }
4478 1.1.2.2 nathanw
4479 1.1.2.2 nathanw /*
4480 1.1.2.2 nathanw -------------------------------------------------------------------------------
4481 1.1.2.2 nathanw Returns the result of converting the quadruple-precision floating-point
4482 1.1.2.2 nathanw value `a' to the double-precision floating-point format. The conversion
4483 1.1.2.2 nathanw is performed according to the IEC/IEEE Standard for Binary Floating-Point
4484 1.1.2.2 nathanw Arithmetic.
4485 1.1.2.2 nathanw -------------------------------------------------------------------------------
4486 1.1.2.2 nathanw */
4487 1.1.2.2 nathanw float64 float128_to_float64( float128 a )
4488 1.1.2.2 nathanw {
4489 1.1.2.2 nathanw flag aSign;
4490 1.1.2.2 nathanw int32 aExp;
4491 1.1.2.2 nathanw bits64 aSig0, aSig1;
4492 1.1.2.2 nathanw
4493 1.1.2.2 nathanw aSig1 = extractFloat128Frac1( a );
4494 1.1.2.2 nathanw aSig0 = extractFloat128Frac0( a );
4495 1.1.2.2 nathanw aExp = extractFloat128Exp( a );
4496 1.1.2.2 nathanw aSign = extractFloat128Sign( a );
4497 1.1.2.2 nathanw if ( aExp == 0x7FFF ) {
4498 1.1.2.2 nathanw if ( aSig0 | aSig1 ) {
4499 1.1.2.2 nathanw return commonNaNToFloat64( float128ToCommonNaN( a ) );
4500 1.1.2.2 nathanw }
4501 1.1.2.2 nathanw return packFloat64( aSign, 0x7FF, 0 );
4502 1.1.2.2 nathanw }
4503 1.1.2.2 nathanw shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
4504 1.1.2.2 nathanw aSig0 |= ( aSig1 != 0 );
4505 1.1.2.2 nathanw if ( aExp || aSig0 ) {
4506 1.1.2.2 nathanw aSig0 |= LIT64( 0x4000000000000000 );
4507 1.1.2.2 nathanw aExp -= 0x3C01;
4508 1.1.2.2 nathanw }
4509 1.1.2.2 nathanw return roundAndPackFloat64( aSign, aExp, aSig0 );
4510 1.1.2.2 nathanw
4511 1.1.2.2 nathanw }
4512 1.1.2.2 nathanw
4513 1.1.2.2 nathanw #ifdef FLOATX80
4514 1.1.2.2 nathanw
4515 1.1.2.2 nathanw /*
4516 1.1.2.2 nathanw -------------------------------------------------------------------------------
4517 1.1.2.2 nathanw Returns the result of converting the quadruple-precision floating-point
4518 1.1.2.2 nathanw value `a' to the extended double-precision floating-point format. The
4519 1.1.2.2 nathanw conversion is performed according to the IEC/IEEE Standard for Binary
4520 1.1.2.2 nathanw Floating-Point Arithmetic.
4521 1.1.2.2 nathanw -------------------------------------------------------------------------------
4522 1.1.2.2 nathanw */
4523 1.1.2.2 nathanw floatx80 float128_to_floatx80( float128 a )
4524 1.1.2.2 nathanw {
4525 1.1.2.2 nathanw flag aSign;
4526 1.1.2.2 nathanw int32 aExp;
4527 1.1.2.2 nathanw bits64 aSig0, aSig1;
4528 1.1.2.2 nathanw
4529 1.1.2.2 nathanw aSig1 = extractFloat128Frac1( a );
4530 1.1.2.2 nathanw aSig0 = extractFloat128Frac0( a );
4531 1.1.2.2 nathanw aExp = extractFloat128Exp( a );
4532 1.1.2.2 nathanw aSign = extractFloat128Sign( a );
4533 1.1.2.2 nathanw if ( aExp == 0x7FFF ) {
4534 1.1.2.2 nathanw if ( aSig0 | aSig1 ) {
4535 1.1.2.2 nathanw return commonNaNToFloatx80( float128ToCommonNaN( a ) );
4536 1.1.2.2 nathanw }
4537 1.1.2.2 nathanw return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
4538 1.1.2.2 nathanw }
4539 1.1.2.2 nathanw if ( aExp == 0 ) {
4540 1.1.2.2 nathanw if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
4541 1.1.2.2 nathanw normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
4542 1.1.2.2 nathanw }
4543 1.1.2.2 nathanw else {
4544 1.1.2.2 nathanw aSig0 |= LIT64( 0x0001000000000000 );
4545 1.1.2.2 nathanw }
4546 1.1.2.2 nathanw shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
4547 1.1.2.2 nathanw return roundAndPackFloatx80( 80, aSign, aExp, aSig0, aSig1 );
4548 1.1.2.2 nathanw
4549 1.1.2.2 nathanw }
4550 1.1.2.2 nathanw
4551 1.1.2.2 nathanw #endif
4552 1.1.2.2 nathanw
4553 1.1.2.2 nathanw /*
4554 1.1.2.2 nathanw -------------------------------------------------------------------------------
4555 1.1.2.2 nathanw Rounds the quadruple-precision floating-point value `a' to an integer, and
4556 1.1.2.2 nathanw returns the result as a quadruple-precision floating-point value. The
4557 1.1.2.2 nathanw operation is performed according to the IEC/IEEE Standard for Binary
4558 1.1.2.2 nathanw Floating-Point Arithmetic.
4559 1.1.2.2 nathanw -------------------------------------------------------------------------------
4560 1.1.2.2 nathanw */
4561 1.1.2.2 nathanw float128 float128_round_to_int( float128 a )
4562 1.1.2.2 nathanw {
4563 1.1.2.2 nathanw flag aSign;
4564 1.1.2.2 nathanw int32 aExp;
4565 1.1.2.2 nathanw bits64 lastBitMask, roundBitsMask;
4566 1.1.2.2 nathanw int8 roundingMode;
4567 1.1.2.2 nathanw float128 z;
4568 1.1.2.2 nathanw
4569 1.1.2.2 nathanw aExp = extractFloat128Exp( a );
4570 1.1.2.2 nathanw if ( 0x402F <= aExp ) {
4571 1.1.2.2 nathanw if ( 0x406F <= aExp ) {
4572 1.1.2.2 nathanw if ( ( aExp == 0x7FFF )
4573 1.1.2.2 nathanw && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
4574 1.1.2.2 nathanw ) {
4575 1.1.2.2 nathanw return propagateFloat128NaN( a, a );
4576 1.1.2.2 nathanw }
4577 1.1.2.2 nathanw return a;
4578 1.1.2.2 nathanw }
4579 1.1.2.2 nathanw lastBitMask = 1;
4580 1.1.2.2 nathanw lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
4581 1.1.2.2 nathanw roundBitsMask = lastBitMask - 1;
4582 1.1.2.2 nathanw z = a;
4583 1.1.2.2 nathanw roundingMode = float_rounding_mode();
4584 1.1.2.2 nathanw if ( roundingMode == float_round_nearest_even ) {
4585 1.1.2.2 nathanw if ( lastBitMask ) {
4586 1.1.2.2 nathanw add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
4587 1.1.2.2 nathanw if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
4588 1.1.2.2 nathanw }
4589 1.1.2.2 nathanw else {
4590 1.1.2.2 nathanw if ( (sbits64) z.low < 0 ) {
4591 1.1.2.2 nathanw ++z.high;
4592 1.1.2.2 nathanw if ( (bits64) ( z.low<<1 ) == 0 ) z.high &= ~1;
4593 1.1.2.2 nathanw }
4594 1.1.2.2 nathanw }
4595 1.1.2.2 nathanw }
4596 1.1.2.2 nathanw else if ( roundingMode != float_round_to_zero ) {
4597 1.1.2.2 nathanw if ( extractFloat128Sign( z )
4598 1.1.2.2 nathanw ^ ( roundingMode == float_round_up ) ) {
4599 1.1.2.2 nathanw add128( z.high, z.low, 0, roundBitsMask, &z.high, &z.low );
4600 1.1.2.2 nathanw }
4601 1.1.2.2 nathanw }
4602 1.1.2.2 nathanw z.low &= ~ roundBitsMask;
4603 1.1.2.2 nathanw }
4604 1.1.2.2 nathanw else {
4605 1.1.2.2 nathanw if ( aExp < 0x3FFF ) {
4606 1.1.2.2 nathanw if ( ( ( (bits64) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
4607 1.1.2.2 nathanw float_set_inexact();
4608 1.1.2.2 nathanw aSign = extractFloat128Sign( a );
4609 1.1.2.2 nathanw switch ( float_rounding_mode() ) {
4610 1.1.2.2 nathanw case float_round_nearest_even:
4611 1.1.2.2 nathanw if ( ( aExp == 0x3FFE )
4612 1.1.2.2 nathanw && ( extractFloat128Frac0( a )
4613 1.1.2.2 nathanw | extractFloat128Frac1( a ) )
4614 1.1.2.2 nathanw ) {
4615 1.1.2.2 nathanw return packFloat128( aSign, 0x3FFF, 0, 0 );
4616 1.1.2.2 nathanw }
4617 1.1.2.2 nathanw break;
4618 1.1.2.2 nathanw case float_round_down:
4619 1.1.2.2 nathanw return
4620 1.1.2.2 nathanw aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
4621 1.1.2.2 nathanw : packFloat128( 0, 0, 0, 0 );
4622 1.1.2.2 nathanw case float_round_up:
4623 1.1.2.2 nathanw return
4624 1.1.2.2 nathanw aSign ? packFloat128( 1, 0, 0, 0 )
4625 1.1.2.2 nathanw : packFloat128( 0, 0x3FFF, 0, 0 );
4626 1.1.2.2 nathanw }
4627 1.1.2.2 nathanw return packFloat128( aSign, 0, 0, 0 );
4628 1.1.2.2 nathanw }
4629 1.1.2.2 nathanw lastBitMask = 1;
4630 1.1.2.2 nathanw lastBitMask <<= 0x402F - aExp;
4631 1.1.2.2 nathanw roundBitsMask = lastBitMask - 1;
4632 1.1.2.2 nathanw z.low = 0;
4633 1.1.2.2 nathanw z.high = a.high;
4634 1.1.2.2 nathanw roundingMode = float_rounding_mode();
4635 1.1.2.2 nathanw if ( roundingMode == float_round_nearest_even ) {
4636 1.1.2.2 nathanw z.high += lastBitMask>>1;
4637 1.1.2.2 nathanw if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
4638 1.1.2.2 nathanw z.high &= ~ lastBitMask;
4639 1.1.2.2 nathanw }
4640 1.1.2.2 nathanw }
4641 1.1.2.2 nathanw else if ( roundingMode != float_round_to_zero ) {
4642 1.1.2.2 nathanw if ( extractFloat128Sign( z )
4643 1.1.2.2 nathanw ^ ( roundingMode == float_round_up ) ) {
4644 1.1.2.2 nathanw z.high |= ( a.low != 0 );
4645 1.1.2.2 nathanw z.high += roundBitsMask;
4646 1.1.2.2 nathanw }
4647 1.1.2.2 nathanw }
4648 1.1.2.2 nathanw z.high &= ~ roundBitsMask;
4649 1.1.2.2 nathanw }
4650 1.1.2.2 nathanw if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
4651 1.1.2.2 nathanw float_set_inexact();
4652 1.1.2.2 nathanw }
4653 1.1.2.2 nathanw return z;
4654 1.1.2.2 nathanw
4655 1.1.2.2 nathanw }
4656 1.1.2.2 nathanw
4657 1.1.2.2 nathanw /*
4658 1.1.2.2 nathanw -------------------------------------------------------------------------------
4659 1.1.2.2 nathanw Returns the result of adding the absolute values of the quadruple-precision
4660 1.1.2.2 nathanw floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
4661 1.1.2.2 nathanw before being returned. `zSign' is ignored if the result is a NaN.
4662 1.1.2.2 nathanw The addition is performed according to the IEC/IEEE Standard for Binary
4663 1.1.2.2 nathanw Floating-Point Arithmetic.
4664 1.1.2.2 nathanw -------------------------------------------------------------------------------
4665 1.1.2.2 nathanw */
4666 1.1.2.2 nathanw static float128 addFloat128Sigs( float128 a, float128 b, flag zSign )
4667 1.1.2.2 nathanw {
4668 1.1.2.2 nathanw int32 aExp, bExp, zExp;
4669 1.1.2.2 nathanw bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
4670 1.1.2.2 nathanw int32 expDiff;
4671 1.1.2.2 nathanw
4672 1.1.2.2 nathanw aSig1 = extractFloat128Frac1( a );
4673 1.1.2.2 nathanw aSig0 = extractFloat128Frac0( a );
4674 1.1.2.2 nathanw aExp = extractFloat128Exp( a );
4675 1.1.2.2 nathanw bSig1 = extractFloat128Frac1( b );
4676 1.1.2.2 nathanw bSig0 = extractFloat128Frac0( b );
4677 1.1.2.2 nathanw bExp = extractFloat128Exp( b );
4678 1.1.2.2 nathanw expDiff = aExp - bExp;
4679 1.1.2.2 nathanw if ( 0 < expDiff ) {
4680 1.1.2.2 nathanw if ( aExp == 0x7FFF ) {
4681 1.1.2.2 nathanw if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
4682 1.1.2.2 nathanw return a;
4683 1.1.2.2 nathanw }
4684 1.1.2.2 nathanw if ( bExp == 0 ) {
4685 1.1.2.2 nathanw --expDiff;
4686 1.1.2.2 nathanw }
4687 1.1.2.2 nathanw else {
4688 1.1.2.2 nathanw bSig0 |= LIT64( 0x0001000000000000 );
4689 1.1.2.2 nathanw }
4690 1.1.2.2 nathanw shift128ExtraRightJamming(
4691 1.1.2.2 nathanw bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
4692 1.1.2.2 nathanw zExp = aExp;
4693 1.1.2.2 nathanw }
4694 1.1.2.2 nathanw else if ( expDiff < 0 ) {
4695 1.1.2.2 nathanw if ( bExp == 0x7FFF ) {
4696 1.1.2.2 nathanw if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
4697 1.1.2.2 nathanw return packFloat128( zSign, 0x7FFF, 0, 0 );
4698 1.1.2.2 nathanw }
4699 1.1.2.2 nathanw if ( aExp == 0 ) {
4700 1.1.2.2 nathanw ++expDiff;
4701 1.1.2.2 nathanw }
4702 1.1.2.2 nathanw else {
4703 1.1.2.2 nathanw aSig0 |= LIT64( 0x0001000000000000 );
4704 1.1.2.2 nathanw }
4705 1.1.2.2 nathanw shift128ExtraRightJamming(
4706 1.1.2.2 nathanw aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
4707 1.1.2.2 nathanw zExp = bExp;
4708 1.1.2.2 nathanw }
4709 1.1.2.2 nathanw else {
4710 1.1.2.2 nathanw if ( aExp == 0x7FFF ) {
4711 1.1.2.2 nathanw if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
4712 1.1.2.2 nathanw return propagateFloat128NaN( a, b );
4713 1.1.2.2 nathanw }
4714 1.1.2.2 nathanw return a;
4715 1.1.2.2 nathanw }
4716 1.1.2.2 nathanw add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
4717 1.1.2.2 nathanw if ( aExp == 0 ) return packFloat128( zSign, 0, zSig0, zSig1 );
4718 1.1.2.2 nathanw zSig2 = 0;
4719 1.1.2.2 nathanw zSig0 |= LIT64( 0x0002000000000000 );
4720 1.1.2.2 nathanw zExp = aExp;
4721 1.1.2.2 nathanw goto shiftRight1;
4722 1.1.2.2 nathanw }
4723 1.1.2.2 nathanw aSig0 |= LIT64( 0x0001000000000000 );
4724 1.1.2.2 nathanw add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
4725 1.1.2.2 nathanw --zExp;
4726 1.1.2.2 nathanw if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack;
4727 1.1.2.2 nathanw ++zExp;
4728 1.1.2.2 nathanw shiftRight1:
4729 1.1.2.2 nathanw shift128ExtraRightJamming(
4730 1.1.2.2 nathanw zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
4731 1.1.2.2 nathanw roundAndPack:
4732 1.1.2.2 nathanw return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
4733 1.1.2.2 nathanw
4734 1.1.2.2 nathanw }
4735 1.1.2.2 nathanw
4736 1.1.2.2 nathanw /*
4737 1.1.2.2 nathanw -------------------------------------------------------------------------------
4738 1.1.2.2 nathanw Returns the result of subtracting the absolute values of the quadruple-
4739 1.1.2.2 nathanw precision floating-point values `a' and `b'. If `zSign' is 1, the
4740 1.1.2.2 nathanw difference is negated before being returned. `zSign' is ignored if the
4741 1.1.2.2 nathanw result is a NaN. The subtraction is performed according to the IEC/IEEE
4742 1.1.2.2 nathanw Standard for Binary Floating-Point Arithmetic.
4743 1.1.2.2 nathanw -------------------------------------------------------------------------------
4744 1.1.2.2 nathanw */
4745 1.1.2.2 nathanw static float128 subFloat128Sigs( float128 a, float128 b, flag zSign )
4746 1.1.2.2 nathanw {
4747 1.1.2.2 nathanw int32 aExp, bExp, zExp;
4748 1.1.2.2 nathanw bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
4749 1.1.2.2 nathanw int32 expDiff;
4750 1.1.2.2 nathanw float128 z;
4751 1.1.2.2 nathanw
4752 1.1.2.2 nathanw aSig1 = extractFloat128Frac1( a );
4753 1.1.2.2 nathanw aSig0 = extractFloat128Frac0( a );
4754 1.1.2.2 nathanw aExp = extractFloat128Exp( a );
4755 1.1.2.2 nathanw bSig1 = extractFloat128Frac1( b );
4756 1.1.2.2 nathanw bSig0 = extractFloat128Frac0( b );
4757 1.1.2.2 nathanw bExp = extractFloat128Exp( b );
4758 1.1.2.2 nathanw expDiff = aExp - bExp;
4759 1.1.2.2 nathanw shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
4760 1.1.2.2 nathanw shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
4761 1.1.2.2 nathanw if ( 0 < expDiff ) goto aExpBigger;
4762 1.1.2.2 nathanw if ( expDiff < 0 ) goto bExpBigger;
4763 1.1.2.2 nathanw if ( aExp == 0x7FFF ) {
4764 1.1.2.2 nathanw if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
4765 1.1.2.2 nathanw return propagateFloat128NaN( a, b );
4766 1.1.2.2 nathanw }
4767 1.1.2.2 nathanw float_raise( float_flag_invalid );
4768 1.1.2.2 nathanw z.low = float128_default_nan_low;
4769 1.1.2.2 nathanw z.high = float128_default_nan_high;
4770 1.1.2.2 nathanw return z;
4771 1.1.2.2 nathanw }
4772 1.1.2.2 nathanw if ( aExp == 0 ) {
4773 1.1.2.2 nathanw aExp = 1;
4774 1.1.2.2 nathanw bExp = 1;
4775 1.1.2.2 nathanw }
4776 1.1.2.2 nathanw if ( bSig0 < aSig0 ) goto aBigger;
4777 1.1.2.2 nathanw if ( aSig0 < bSig0 ) goto bBigger;
4778 1.1.2.2 nathanw if ( bSig1 < aSig1 ) goto aBigger;
4779 1.1.2.2 nathanw if ( aSig1 < bSig1 ) goto bBigger;
4780 1.1.2.2 nathanw return packFloat128( float_rounding_mode() == float_round_down, 0, 0, 0 );
4781 1.1.2.2 nathanw bExpBigger:
4782 1.1.2.2 nathanw if ( bExp == 0x7FFF ) {
4783 1.1.2.2 nathanw if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
4784 1.1.2.2 nathanw return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
4785 1.1.2.2 nathanw }
4786 1.1.2.2 nathanw if ( aExp == 0 ) {
4787 1.1.2.2 nathanw ++expDiff;
4788 1.1.2.2 nathanw }
4789 1.1.2.2 nathanw else {
4790 1.1.2.2 nathanw aSig0 |= LIT64( 0x4000000000000000 );
4791 1.1.2.2 nathanw }
4792 1.1.2.2 nathanw shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
4793 1.1.2.2 nathanw bSig0 |= LIT64( 0x4000000000000000 );
4794 1.1.2.2 nathanw bBigger:
4795 1.1.2.2 nathanw sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
4796 1.1.2.2 nathanw zExp = bExp;
4797 1.1.2.2 nathanw zSign ^= 1;
4798 1.1.2.2 nathanw goto normalizeRoundAndPack;
4799 1.1.2.2 nathanw aExpBigger:
4800 1.1.2.2 nathanw if ( aExp == 0x7FFF ) {
4801 1.1.2.2 nathanw if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
4802 1.1.2.2 nathanw return a;
4803 1.1.2.2 nathanw }
4804 1.1.2.2 nathanw if ( bExp == 0 ) {
4805 1.1.2.2 nathanw --expDiff;
4806 1.1.2.2 nathanw }
4807 1.1.2.2 nathanw else {
4808 1.1.2.2 nathanw bSig0 |= LIT64( 0x4000000000000000 );
4809 1.1.2.2 nathanw }
4810 1.1.2.2 nathanw shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
4811 1.1.2.2 nathanw aSig0 |= LIT64( 0x4000000000000000 );
4812 1.1.2.2 nathanw aBigger:
4813 1.1.2.2 nathanw sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
4814 1.1.2.2 nathanw zExp = aExp;
4815 1.1.2.2 nathanw normalizeRoundAndPack:
4816 1.1.2.2 nathanw --zExp;
4817 1.1.2.2 nathanw return normalizeRoundAndPackFloat128( zSign, zExp - 14, zSig0, zSig1 );
4818 1.1.2.2 nathanw
4819 1.1.2.2 nathanw }
4820 1.1.2.2 nathanw
4821 1.1.2.2 nathanw /*
4822 1.1.2.2 nathanw -------------------------------------------------------------------------------
4823 1.1.2.2 nathanw Returns the result of adding the quadruple-precision floating-point values
4824 1.1.2.2 nathanw `a' and `b'. The operation is performed according to the IEC/IEEE Standard
4825 1.1.2.2 nathanw for Binary Floating-Point Arithmetic.
4826 1.1.2.2 nathanw -------------------------------------------------------------------------------
4827 1.1.2.2 nathanw */
4828 1.1.2.2 nathanw float128 float128_add( float128 a, float128 b )
4829 1.1.2.2 nathanw {
4830 1.1.2.2 nathanw flag aSign, bSign;
4831 1.1.2.2 nathanw
4832 1.1.2.2 nathanw aSign = extractFloat128Sign( a );
4833 1.1.2.2 nathanw bSign = extractFloat128Sign( b );
4834 1.1.2.2 nathanw if ( aSign == bSign ) {
4835 1.1.2.2 nathanw return addFloat128Sigs( a, b, aSign );
4836 1.1.2.2 nathanw }
4837 1.1.2.2 nathanw else {
4838 1.1.2.2 nathanw return subFloat128Sigs( a, b, aSign );
4839 1.1.2.2 nathanw }
4840 1.1.2.2 nathanw
4841 1.1.2.2 nathanw }
4842 1.1.2.2 nathanw
4843 1.1.2.2 nathanw /*
4844 1.1.2.2 nathanw -------------------------------------------------------------------------------
4845 1.1.2.2 nathanw Returns the result of subtracting the quadruple-precision floating-point
4846 1.1.2.2 nathanw values `a' and `b'. The operation is performed according to the IEC/IEEE
4847 1.1.2.2 nathanw Standard for Binary Floating-Point Arithmetic.
4848 1.1.2.2 nathanw -------------------------------------------------------------------------------
4849 1.1.2.2 nathanw */
4850 1.1.2.2 nathanw float128 float128_sub( float128 a, float128 b )
4851 1.1.2.2 nathanw {
4852 1.1.2.2 nathanw flag aSign, bSign;
4853 1.1.2.2 nathanw
4854 1.1.2.2 nathanw aSign = extractFloat128Sign( a );
4855 1.1.2.2 nathanw bSign = extractFloat128Sign( b );
4856 1.1.2.2 nathanw if ( aSign == bSign ) {
4857 1.1.2.2 nathanw return subFloat128Sigs( a, b, aSign );
4858 1.1.2.2 nathanw }
4859 1.1.2.2 nathanw else {
4860 1.1.2.2 nathanw return addFloat128Sigs( a, b, aSign );
4861 1.1.2.2 nathanw }
4862 1.1.2.2 nathanw
4863 1.1.2.2 nathanw }
4864 1.1.2.2 nathanw
4865 1.1.2.2 nathanw /*
4866 1.1.2.2 nathanw -------------------------------------------------------------------------------
4867 1.1.2.2 nathanw Returns the result of multiplying the quadruple-precision floating-point
4868 1.1.2.2 nathanw values `a' and `b'. The operation is performed according to the IEC/IEEE
4869 1.1.2.2 nathanw Standard for Binary Floating-Point Arithmetic.
4870 1.1.2.2 nathanw -------------------------------------------------------------------------------
4871 1.1.2.2 nathanw */
4872 1.1.2.2 nathanw float128 float128_mul( float128 a, float128 b )
4873 1.1.2.2 nathanw {
4874 1.1.2.2 nathanw flag aSign, bSign, zSign;
4875 1.1.2.2 nathanw int32 aExp, bExp, zExp;
4876 1.1.2.2 nathanw bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
4877 1.1.2.2 nathanw float128 z;
4878 1.1.2.2 nathanw
4879 1.1.2.2 nathanw aSig1 = extractFloat128Frac1( a );
4880 1.1.2.2 nathanw aSig0 = extractFloat128Frac0( a );
4881 1.1.2.2 nathanw aExp = extractFloat128Exp( a );
4882 1.1.2.2 nathanw aSign = extractFloat128Sign( a );
4883 1.1.2.2 nathanw bSig1 = extractFloat128Frac1( b );
4884 1.1.2.2 nathanw bSig0 = extractFloat128Frac0( b );
4885 1.1.2.2 nathanw bExp = extractFloat128Exp( b );
4886 1.1.2.2 nathanw bSign = extractFloat128Sign( b );
4887 1.1.2.2 nathanw zSign = aSign ^ bSign;
4888 1.1.2.2 nathanw if ( aExp == 0x7FFF ) {
4889 1.1.2.2 nathanw if ( ( aSig0 | aSig1 )
4890 1.1.2.2 nathanw || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
4891 1.1.2.2 nathanw return propagateFloat128NaN( a, b );
4892 1.1.2.2 nathanw }
4893 1.1.2.2 nathanw if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
4894 1.1.2.2 nathanw return packFloat128( zSign, 0x7FFF, 0, 0 );
4895 1.1.2.2 nathanw }
4896 1.1.2.2 nathanw if ( bExp == 0x7FFF ) {
4897 1.1.2.2 nathanw if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
4898 1.1.2.2 nathanw if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
4899 1.1.2.2 nathanw invalid:
4900 1.1.2.2 nathanw float_raise( float_flag_invalid );
4901 1.1.2.2 nathanw z.low = float128_default_nan_low;
4902 1.1.2.2 nathanw z.high = float128_default_nan_high;
4903 1.1.2.2 nathanw return z;
4904 1.1.2.2 nathanw }
4905 1.1.2.2 nathanw return packFloat128( zSign, 0x7FFF, 0, 0 );
4906 1.1.2.2 nathanw }
4907 1.1.2.2 nathanw if ( aExp == 0 ) {
4908 1.1.2.2 nathanw if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
4909 1.1.2.2 nathanw normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
4910 1.1.2.2 nathanw }
4911 1.1.2.2 nathanw if ( bExp == 0 ) {
4912 1.1.2.2 nathanw if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
4913 1.1.2.2 nathanw normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
4914 1.1.2.2 nathanw }
4915 1.1.2.2 nathanw zExp = aExp + bExp - 0x4000;
4916 1.1.2.2 nathanw aSig0 |= LIT64( 0x0001000000000000 );
4917 1.1.2.2 nathanw shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
4918 1.1.2.2 nathanw mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
4919 1.1.2.2 nathanw add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
4920 1.1.2.2 nathanw zSig2 |= ( zSig3 != 0 );
4921 1.1.2.2 nathanw if ( LIT64( 0x0002000000000000 ) <= zSig0 ) {
4922 1.1.2.2 nathanw shift128ExtraRightJamming(
4923 1.1.2.2 nathanw zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
4924 1.1.2.2 nathanw ++zExp;
4925 1.1.2.2 nathanw }
4926 1.1.2.2 nathanw return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
4927 1.1.2.2 nathanw
4928 1.1.2.2 nathanw }
4929 1.1.2.2 nathanw
4930 1.1.2.2 nathanw /*
4931 1.1.2.2 nathanw -------------------------------------------------------------------------------
4932 1.1.2.2 nathanw Returns the result of dividing the quadruple-precision floating-point value
4933 1.1.2.2 nathanw `a' by the corresponding value `b'. The operation is performed according to
4934 1.1.2.2 nathanw the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4935 1.1.2.2 nathanw -------------------------------------------------------------------------------
4936 1.1.2.2 nathanw */
4937 1.1.2.2 nathanw float128 float128_div( float128 a, float128 b )
4938 1.1.2.2 nathanw {
4939 1.1.2.2 nathanw flag aSign, bSign, zSign;
4940 1.1.2.2 nathanw int32 aExp, bExp, zExp;
4941 1.1.2.2 nathanw bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
4942 1.1.2.2 nathanw bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
4943 1.1.2.2 nathanw float128 z;
4944 1.1.2.2 nathanw
4945 1.1.2.2 nathanw aSig1 = extractFloat128Frac1( a );
4946 1.1.2.2 nathanw aSig0 = extractFloat128Frac0( a );
4947 1.1.2.2 nathanw aExp = extractFloat128Exp( a );
4948 1.1.2.2 nathanw aSign = extractFloat128Sign( a );
4949 1.1.2.2 nathanw bSig1 = extractFloat128Frac1( b );
4950 1.1.2.2 nathanw bSig0 = extractFloat128Frac0( b );
4951 1.1.2.2 nathanw bExp = extractFloat128Exp( b );
4952 1.1.2.2 nathanw bSign = extractFloat128Sign( b );
4953 1.1.2.2 nathanw zSign = aSign ^ bSign;
4954 1.1.2.2 nathanw if ( aExp == 0x7FFF ) {
4955 1.1.2.2 nathanw if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
4956 1.1.2.2 nathanw if ( bExp == 0x7FFF ) {
4957 1.1.2.2 nathanw if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
4958 1.1.2.2 nathanw goto invalid;
4959 1.1.2.2 nathanw }
4960 1.1.2.2 nathanw return packFloat128( zSign, 0x7FFF, 0, 0 );
4961 1.1.2.2 nathanw }
4962 1.1.2.2 nathanw if ( bExp == 0x7FFF ) {
4963 1.1.2.2 nathanw if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
4964 1.1.2.2 nathanw return packFloat128( zSign, 0, 0, 0 );
4965 1.1.2.2 nathanw }
4966 1.1.2.2 nathanw if ( bExp == 0 ) {
4967 1.1.2.2 nathanw if ( ( bSig0 | bSig1 ) == 0 ) {
4968 1.1.2.2 nathanw if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
4969 1.1.2.2 nathanw invalid:
4970 1.1.2.2 nathanw float_raise( float_flag_invalid );
4971 1.1.2.2 nathanw z.low = float128_default_nan_low;
4972 1.1.2.2 nathanw z.high = float128_default_nan_high;
4973 1.1.2.2 nathanw return z;
4974 1.1.2.2 nathanw }
4975 1.1.2.2 nathanw float_raise( float_flag_divbyzero );
4976 1.1.2.2 nathanw return packFloat128( zSign, 0x7FFF, 0, 0 );
4977 1.1.2.2 nathanw }
4978 1.1.2.2 nathanw normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
4979 1.1.2.2 nathanw }
4980 1.1.2.2 nathanw if ( aExp == 0 ) {
4981 1.1.2.2 nathanw if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
4982 1.1.2.2 nathanw normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
4983 1.1.2.2 nathanw }
4984 1.1.2.2 nathanw zExp = aExp - bExp + 0x3FFD;
4985 1.1.2.2 nathanw shortShift128Left(
4986 1.1.2.2 nathanw aSig0 | LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 );
4987 1.1.2.2 nathanw shortShift128Left(
4988 1.1.2.2 nathanw bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
4989 1.1.2.2 nathanw if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
4990 1.1.2.2 nathanw shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
4991 1.1.2.2 nathanw ++zExp;
4992 1.1.2.2 nathanw }
4993 1.1.2.2 nathanw zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
4994 1.1.2.2 nathanw mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
4995 1.1.2.2 nathanw sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
4996 1.1.2.2 nathanw while ( (sbits64) rem0 < 0 ) {
4997 1.1.2.2 nathanw --zSig0;
4998 1.1.2.2 nathanw add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
4999 1.1.2.2 nathanw }
5000 1.1.2.2 nathanw zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
5001 1.1.2.2 nathanw if ( ( zSig1 & 0x3FFF ) <= 4 ) {
5002 1.1.2.2 nathanw mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
5003 1.1.2.2 nathanw sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
5004 1.1.2.2 nathanw while ( (sbits64) rem1 < 0 ) {
5005 1.1.2.2 nathanw --zSig1;
5006 1.1.2.2 nathanw add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
5007 1.1.2.2 nathanw }
5008 1.1.2.2 nathanw zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
5009 1.1.2.2 nathanw }
5010 1.1.2.2 nathanw shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
5011 1.1.2.2 nathanw return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
5012 1.1.2.2 nathanw
5013 1.1.2.2 nathanw }
5014 1.1.2.2 nathanw
5015 1.1.2.2 nathanw /*
5016 1.1.2.2 nathanw -------------------------------------------------------------------------------
5017 1.1.2.2 nathanw Returns the remainder of the quadruple-precision floating-point value `a'
5018 1.1.2.2 nathanw with respect to the corresponding value `b'. The operation is performed
5019 1.1.2.2 nathanw according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5020 1.1.2.2 nathanw -------------------------------------------------------------------------------
5021 1.1.2.2 nathanw */
5022 1.1.2.2 nathanw float128 float128_rem( float128 a, float128 b )
5023 1.1.2.2 nathanw {
5024 1.1.2.2 nathanw flag aSign, bSign, zSign;
5025 1.1.2.2 nathanw int32 aExp, bExp, expDiff;
5026 1.1.2.2 nathanw bits64 aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
5027 1.1.2.2 nathanw bits64 allZero, alternateASig0, alternateASig1, sigMean1;
5028 1.1.2.2 nathanw sbits64 sigMean0;
5029 1.1.2.2 nathanw float128 z;
5030 1.1.2.2 nathanw
5031 1.1.2.2 nathanw aSig1 = extractFloat128Frac1( a );
5032 1.1.2.2 nathanw aSig0 = extractFloat128Frac0( a );
5033 1.1.2.2 nathanw aExp = extractFloat128Exp( a );
5034 1.1.2.2 nathanw aSign = extractFloat128Sign( a );
5035 1.1.2.2 nathanw bSig1 = extractFloat128Frac1( b );
5036 1.1.2.2 nathanw bSig0 = extractFloat128Frac0( b );
5037 1.1.2.2 nathanw bExp = extractFloat128Exp( b );
5038 1.1.2.2 nathanw bSign = extractFloat128Sign( b );
5039 1.1.2.2 nathanw if ( aExp == 0x7FFF ) {
5040 1.1.2.2 nathanw if ( ( aSig0 | aSig1 )
5041 1.1.2.2 nathanw || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
5042 1.1.2.2 nathanw return propagateFloat128NaN( a, b );
5043 1.1.2.2 nathanw }
5044 1.1.2.2 nathanw goto invalid;
5045 1.1.2.2 nathanw }
5046 1.1.2.2 nathanw if ( bExp == 0x7FFF ) {
5047 1.1.2.2 nathanw if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
5048 1.1.2.2 nathanw return a;
5049 1.1.2.2 nathanw }
5050 1.1.2.2 nathanw if ( bExp == 0 ) {
5051 1.1.2.2 nathanw if ( ( bSig0 | bSig1 ) == 0 ) {
5052 1.1.2.2 nathanw invalid:
5053 1.1.2.2 nathanw float_raise( float_flag_invalid );
5054 1.1.2.2 nathanw z.low = float128_default_nan_low;
5055 1.1.2.2 nathanw z.high = float128_default_nan_high;
5056 1.1.2.2 nathanw return z;
5057 1.1.2.2 nathanw }
5058 1.1.2.2 nathanw normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
5059 1.1.2.2 nathanw }
5060 1.1.2.2 nathanw if ( aExp == 0 ) {
5061 1.1.2.2 nathanw if ( ( aSig0 | aSig1 ) == 0 ) return a;
5062 1.1.2.2 nathanw normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5063 1.1.2.2 nathanw }
5064 1.1.2.2 nathanw expDiff = aExp - bExp;
5065 1.1.2.2 nathanw if ( expDiff < -1 ) return a;
5066 1.1.2.2 nathanw shortShift128Left(
5067 1.1.2.2 nathanw aSig0 | LIT64( 0x0001000000000000 ),
5068 1.1.2.2 nathanw aSig1,
5069 1.1.2.2 nathanw 15 - ( expDiff < 0 ),
5070 1.1.2.2 nathanw &aSig0,
5071 1.1.2.2 nathanw &aSig1
5072 1.1.2.2 nathanw );
5073 1.1.2.2 nathanw shortShift128Left(
5074 1.1.2.2 nathanw bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
5075 1.1.2.2 nathanw q = le128( bSig0, bSig1, aSig0, aSig1 );
5076 1.1.2.2 nathanw if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
5077 1.1.2.2 nathanw expDiff -= 64;
5078 1.1.2.2 nathanw while ( 0 < expDiff ) {
5079 1.1.2.2 nathanw q = estimateDiv128To64( aSig0, aSig1, bSig0 );
5080 1.1.2.2 nathanw q = ( 4 < q ) ? q - 4 : 0;
5081 1.1.2.2 nathanw mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
5082 1.1.2.2 nathanw shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
5083 1.1.2.2 nathanw shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
5084 1.1.2.2 nathanw sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
5085 1.1.2.2 nathanw expDiff -= 61;
5086 1.1.2.2 nathanw }
5087 1.1.2.2 nathanw if ( -64 < expDiff ) {
5088 1.1.2.2 nathanw q = estimateDiv128To64( aSig0, aSig1, bSig0 );
5089 1.1.2.2 nathanw q = ( 4 < q ) ? q - 4 : 0;
5090 1.1.2.2 nathanw q >>= - expDiff;
5091 1.1.2.2 nathanw shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
5092 1.1.2.2 nathanw expDiff += 52;
5093 1.1.2.2 nathanw if ( expDiff < 0 ) {
5094 1.1.2.2 nathanw shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
5095 1.1.2.2 nathanw }
5096 1.1.2.2 nathanw else {
5097 1.1.2.2 nathanw shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
5098 1.1.2.2 nathanw }
5099 1.1.2.2 nathanw mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
5100 1.1.2.2 nathanw sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
5101 1.1.2.2 nathanw }
5102 1.1.2.2 nathanw else {
5103 1.1.2.2 nathanw shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
5104 1.1.2.2 nathanw shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
5105 1.1.2.2 nathanw }
5106 1.1.2.2 nathanw do {
5107 1.1.2.2 nathanw alternateASig0 = aSig0;
5108 1.1.2.2 nathanw alternateASig1 = aSig1;
5109 1.1.2.2 nathanw ++q;
5110 1.1.2.2 nathanw sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
5111 1.1.2.2 nathanw } while ( 0 <= (sbits64) aSig0 );
5112 1.1.2.2 nathanw add128(
5113 1.1.2.2 nathanw aSig0, aSig1, alternateASig0, alternateASig1, &sigMean0, &sigMean1 );
5114 1.1.2.2 nathanw if ( ( sigMean0 < 0 )
5115 1.1.2.2 nathanw || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
5116 1.1.2.2 nathanw aSig0 = alternateASig0;
5117 1.1.2.2 nathanw aSig1 = alternateASig1;
5118 1.1.2.2 nathanw }
5119 1.1.2.2 nathanw zSign = ( (sbits64) aSig0 < 0 );
5120 1.1.2.2 nathanw if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
5121 1.1.2.2 nathanw return
5122 1.1.2.2 nathanw normalizeRoundAndPackFloat128( aSign ^ zSign, bExp - 4, aSig0, aSig1 );
5123 1.1.2.2 nathanw
5124 1.1.2.2 nathanw }
5125 1.1.2.2 nathanw
5126 1.1.2.2 nathanw /*
5127 1.1.2.2 nathanw -------------------------------------------------------------------------------
5128 1.1.2.2 nathanw Returns the square root of the quadruple-precision floating-point value `a'.
5129 1.1.2.2 nathanw The operation is performed according to the IEC/IEEE Standard for Binary
5130 1.1.2.2 nathanw Floating-Point Arithmetic.
5131 1.1.2.2 nathanw -------------------------------------------------------------------------------
5132 1.1.2.2 nathanw */
5133 1.1.2.2 nathanw float128 float128_sqrt( float128 a )
5134 1.1.2.2 nathanw {
5135 1.1.2.2 nathanw flag aSign;
5136 1.1.2.2 nathanw int32 aExp, zExp;
5137 1.1.2.2 nathanw bits64 aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
5138 1.1.2.2 nathanw bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
5139 1.1.2.2 nathanw float128 z;
5140 1.1.2.2 nathanw
5141 1.1.2.2 nathanw aSig1 = extractFloat128Frac1( a );
5142 1.1.2.2 nathanw aSig0 = extractFloat128Frac0( a );
5143 1.1.2.2 nathanw aExp = extractFloat128Exp( a );
5144 1.1.2.2 nathanw aSign = extractFloat128Sign( a );
5145 1.1.2.2 nathanw if ( aExp == 0x7FFF ) {
5146 1.1.2.2 nathanw if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, a );
5147 1.1.2.2 nathanw if ( ! aSign ) return a;
5148 1.1.2.2 nathanw goto invalid;
5149 1.1.2.2 nathanw }
5150 1.1.2.2 nathanw if ( aSign ) {
5151 1.1.2.2 nathanw if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
5152 1.1.2.2 nathanw invalid:
5153 1.1.2.2 nathanw float_raise( float_flag_invalid );
5154 1.1.2.2 nathanw z.low = float128_default_nan_low;
5155 1.1.2.2 nathanw z.high = float128_default_nan_high;
5156 1.1.2.2 nathanw return z;
5157 1.1.2.2 nathanw }
5158 1.1.2.2 nathanw if ( aExp == 0 ) {
5159 1.1.2.2 nathanw if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
5160 1.1.2.2 nathanw normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5161 1.1.2.2 nathanw }
5162 1.1.2.2 nathanw zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
5163 1.1.2.2 nathanw aSig0 |= LIT64( 0x0001000000000000 );
5164 1.1.2.2 nathanw zSig0 = estimateSqrt32( aExp, aSig0>>17 );
5165 1.1.2.2 nathanw shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
5166 1.1.2.2 nathanw zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
5167 1.1.2.2 nathanw doubleZSig0 = zSig0<<1;
5168 1.1.2.2 nathanw mul64To128( zSig0, zSig0, &term0, &term1 );
5169 1.1.2.2 nathanw sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
5170 1.1.2.2 nathanw while ( (sbits64) rem0 < 0 ) {
5171 1.1.2.2 nathanw --zSig0;
5172 1.1.2.2 nathanw doubleZSig0 -= 2;
5173 1.1.2.2 nathanw add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
5174 1.1.2.2 nathanw }
5175 1.1.2.2 nathanw zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
5176 1.1.2.2 nathanw if ( ( zSig1 & 0x1FFF ) <= 5 ) {
5177 1.1.2.2 nathanw if ( zSig1 == 0 ) zSig1 = 1;
5178 1.1.2.2 nathanw mul64To128( doubleZSig0, zSig1, &term1, &term2 );
5179 1.1.2.2 nathanw sub128( rem1, 0, term1, term2, &rem1, &rem2 );
5180 1.1.2.2 nathanw mul64To128( zSig1, zSig1, &term2, &term3 );
5181 1.1.2.2 nathanw sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
5182 1.1.2.2 nathanw while ( (sbits64) rem1 < 0 ) {
5183 1.1.2.2 nathanw --zSig1;
5184 1.1.2.2 nathanw shortShift128Left( 0, zSig1, 1, &term2, &term3 );
5185 1.1.2.2 nathanw term3 |= 1;
5186 1.1.2.2 nathanw term2 |= doubleZSig0;
5187 1.1.2.2 nathanw add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
5188 1.1.2.2 nathanw }
5189 1.1.2.2 nathanw zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
5190 1.1.2.2 nathanw }
5191 1.1.2.2 nathanw shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
5192 1.1.2.2 nathanw return roundAndPackFloat128( 0, zExp, zSig0, zSig1, zSig2 );
5193 1.1.2.2 nathanw
5194 1.1.2.2 nathanw }
5195 1.1.2.2 nathanw
5196 1.1.2.2 nathanw /*
5197 1.1.2.2 nathanw -------------------------------------------------------------------------------
5198 1.1.2.2 nathanw Returns 1 if the quadruple-precision floating-point value `a' is equal to
5199 1.1.2.2 nathanw the corresponding value `b', and 0 otherwise. The comparison is performed
5200 1.1.2.2 nathanw according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5201 1.1.2.2 nathanw -------------------------------------------------------------------------------
5202 1.1.2.2 nathanw */
5203 1.1.2.2 nathanw flag float128_eq( float128 a, float128 b )
5204 1.1.2.2 nathanw {
5205 1.1.2.2 nathanw
5206 1.1.2.2 nathanw if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
5207 1.1.2.2 nathanw && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5208 1.1.2.2 nathanw || ( ( extractFloat128Exp( b ) == 0x7FFF )
5209 1.1.2.2 nathanw && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5210 1.1.2.2 nathanw ) {
5211 1.1.2.2 nathanw if ( float128_is_signaling_nan( a )
5212 1.1.2.2 nathanw || float128_is_signaling_nan( b ) ) {
5213 1.1.2.2 nathanw float_raise( float_flag_invalid );
5214 1.1.2.2 nathanw }
5215 1.1.2.2 nathanw return 0;
5216 1.1.2.2 nathanw }
5217 1.1.2.2 nathanw return
5218 1.1.2.2 nathanw ( a.low == b.low )
5219 1.1.2.2 nathanw && ( ( a.high == b.high )
5220 1.1.2.2 nathanw || ( ( a.low == 0 )
5221 1.1.2.2 nathanw && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) )
5222 1.1.2.2 nathanw );
5223 1.1.2.2 nathanw
5224 1.1.2.2 nathanw }
5225 1.1.2.2 nathanw
5226 1.1.2.2 nathanw /*
5227 1.1.2.2 nathanw -------------------------------------------------------------------------------
5228 1.1.2.2 nathanw Returns 1 if the quadruple-precision floating-point value `a' is less than
5229 1.1.2.2 nathanw or equal to the corresponding value `b', and 0 otherwise. The comparison
5230 1.1.2.2 nathanw is performed according to the IEC/IEEE Standard for Binary Floating-Point
5231 1.1.2.2 nathanw Arithmetic.
5232 1.1.2.2 nathanw -------------------------------------------------------------------------------
5233 1.1.2.2 nathanw */
5234 1.1.2.2 nathanw flag float128_le( float128 a, float128 b )
5235 1.1.2.2 nathanw {
5236 1.1.2.2 nathanw flag aSign, bSign;
5237 1.1.2.2 nathanw
5238 1.1.2.2 nathanw if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
5239 1.1.2.2 nathanw && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5240 1.1.2.2 nathanw || ( ( extractFloat128Exp( b ) == 0x7FFF )
5241 1.1.2.2 nathanw && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5242 1.1.2.2 nathanw ) {
5243 1.1.2.2 nathanw float_raise( float_flag_invalid );
5244 1.1.2.2 nathanw return 0;
5245 1.1.2.2 nathanw }
5246 1.1.2.2 nathanw aSign = extractFloat128Sign( a );
5247 1.1.2.2 nathanw bSign = extractFloat128Sign( b );
5248 1.1.2.2 nathanw if ( aSign != bSign ) {
5249 1.1.2.2 nathanw return
5250 1.1.2.2 nathanw aSign
5251 1.1.2.2 nathanw || ( ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5252 1.1.2.2 nathanw == 0 );
5253 1.1.2.2 nathanw }
5254 1.1.2.2 nathanw return
5255 1.1.2.2 nathanw aSign ? le128( b.high, b.low, a.high, a.low )
5256 1.1.2.2 nathanw : le128( a.high, a.low, b.high, b.low );
5257 1.1.2.2 nathanw
5258 1.1.2.2 nathanw }
5259 1.1.2.2 nathanw
5260 1.1.2.2 nathanw /*
5261 1.1.2.2 nathanw -------------------------------------------------------------------------------
5262 1.1.2.2 nathanw Returns 1 if the quadruple-precision floating-point value `a' is less than
5263 1.1.2.2 nathanw the corresponding value `b', and 0 otherwise. The comparison is performed
5264 1.1.2.2 nathanw according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5265 1.1.2.2 nathanw -------------------------------------------------------------------------------
5266 1.1.2.2 nathanw */
5267 1.1.2.2 nathanw flag float128_lt( float128 a, float128 b )
5268 1.1.2.2 nathanw {
5269 1.1.2.2 nathanw flag aSign, bSign;
5270 1.1.2.2 nathanw
5271 1.1.2.2 nathanw if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
5272 1.1.2.2 nathanw && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5273 1.1.2.2 nathanw || ( ( extractFloat128Exp( b ) == 0x7FFF )
5274 1.1.2.2 nathanw && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5275 1.1.2.2 nathanw ) {
5276 1.1.2.2 nathanw float_raise( float_flag_invalid );
5277 1.1.2.2 nathanw return 0;
5278 1.1.2.2 nathanw }
5279 1.1.2.2 nathanw aSign = extractFloat128Sign( a );
5280 1.1.2.2 nathanw bSign = extractFloat128Sign( b );
5281 1.1.2.2 nathanw if ( aSign != bSign ) {
5282 1.1.2.2 nathanw return
5283 1.1.2.2 nathanw aSign
5284 1.1.2.2 nathanw && ( ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5285 1.1.2.2 nathanw != 0 );
5286 1.1.2.2 nathanw }
5287 1.1.2.2 nathanw return
5288 1.1.2.2 nathanw aSign ? lt128( b.high, b.low, a.high, a.low )
5289 1.1.2.2 nathanw : lt128( a.high, a.low, b.high, b.low );
5290 1.1.2.2 nathanw
5291 1.1.2.2 nathanw }
5292 1.1.2.2 nathanw
5293 1.1.2.2 nathanw /*
5294 1.1.2.2 nathanw -------------------------------------------------------------------------------
5295 1.1.2.2 nathanw Returns 1 if the quadruple-precision floating-point value `a' is equal to
5296 1.1.2.2 nathanw the corresponding value `b', and 0 otherwise. The invalid exception is
5297 1.1.2.2 nathanw raised if either operand is a NaN. Otherwise, the comparison is performed
5298 1.1.2.2 nathanw according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5299 1.1.2.2 nathanw -------------------------------------------------------------------------------
5300 1.1.2.2 nathanw */
5301 1.1.2.2 nathanw flag float128_eq_signaling( float128 a, float128 b )
5302 1.1.2.2 nathanw {
5303 1.1.2.2 nathanw
5304 1.1.2.2 nathanw if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
5305 1.1.2.2 nathanw && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5306 1.1.2.2 nathanw || ( ( extractFloat128Exp( b ) == 0x7FFF )
5307 1.1.2.2 nathanw && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5308 1.1.2.2 nathanw ) {
5309 1.1.2.2 nathanw float_raise( float_flag_invalid );
5310 1.1.2.2 nathanw return 0;
5311 1.1.2.2 nathanw }
5312 1.1.2.2 nathanw return
5313 1.1.2.2 nathanw ( a.low == b.low )
5314 1.1.2.2 nathanw && ( ( a.high == b.high )
5315 1.1.2.2 nathanw || ( ( a.low == 0 )
5316 1.1.2.2 nathanw && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) )
5317 1.1.2.2 nathanw );
5318 1.1.2.2 nathanw
5319 1.1.2.2 nathanw }
5320 1.1.2.2 nathanw
5321 1.1.2.2 nathanw /*
5322 1.1.2.2 nathanw -------------------------------------------------------------------------------
5323 1.1.2.2 nathanw Returns 1 if the quadruple-precision floating-point value `a' is less than
5324 1.1.2.2 nathanw or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
5325 1.1.2.2 nathanw cause an exception. Otherwise, the comparison is performed according to the
5326 1.1.2.2 nathanw IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5327 1.1.2.2 nathanw -------------------------------------------------------------------------------
5328 1.1.2.2 nathanw */
5329 1.1.2.2 nathanw flag float128_le_quiet( float128 a, float128 b )
5330 1.1.2.2 nathanw {
5331 1.1.2.2 nathanw flag aSign, bSign;
5332 1.1.2.2 nathanw
5333 1.1.2.2 nathanw if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
5334 1.1.2.2 nathanw && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5335 1.1.2.2 nathanw || ( ( extractFloat128Exp( b ) == 0x7FFF )
5336 1.1.2.2 nathanw && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5337 1.1.2.2 nathanw ) {
5338 1.1.2.2 nathanw if ( float128_is_signaling_nan( a )
5339 1.1.2.2 nathanw || float128_is_signaling_nan( b ) ) {
5340 1.1.2.2 nathanw float_raise( float_flag_invalid );
5341 1.1.2.2 nathanw }
5342 1.1.2.2 nathanw return 0;
5343 1.1.2.2 nathanw }
5344 1.1.2.2 nathanw aSign = extractFloat128Sign( a );
5345 1.1.2.2 nathanw bSign = extractFloat128Sign( b );
5346 1.1.2.2 nathanw if ( aSign != bSign ) {
5347 1.1.2.2 nathanw return
5348 1.1.2.2 nathanw aSign
5349 1.1.2.2 nathanw || ( ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5350 1.1.2.2 nathanw == 0 );
5351 1.1.2.2 nathanw }
5352 1.1.2.2 nathanw return
5353 1.1.2.2 nathanw aSign ? le128( b.high, b.low, a.high, a.low )
5354 1.1.2.2 nathanw : le128( a.high, a.low, b.high, b.low );
5355 1.1.2.2 nathanw
5356 1.1.2.2 nathanw }
5357 1.1.2.2 nathanw
5358 1.1.2.2 nathanw /*
5359 1.1.2.2 nathanw -------------------------------------------------------------------------------
5360 1.1.2.2 nathanw Returns 1 if the quadruple-precision floating-point value `a' is less than
5361 1.1.2.2 nathanw the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
5362 1.1.2.2 nathanw exception. Otherwise, the comparison is performed according to the IEC/IEEE
5363 1.1.2.2 nathanw Standard for Binary Floating-Point Arithmetic.
5364 1.1.2.2 nathanw -------------------------------------------------------------------------------
5365 1.1.2.2 nathanw */
5366 1.1.2.2 nathanw flag float128_lt_quiet( float128 a, float128 b )
5367 1.1.2.2 nathanw {
5368 1.1.2.2 nathanw flag aSign, bSign;
5369 1.1.2.2 nathanw
5370 1.1.2.2 nathanw if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
5371 1.1.2.2 nathanw && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5372 1.1.2.2 nathanw || ( ( extractFloat128Exp( b ) == 0x7FFF )
5373 1.1.2.2 nathanw && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5374 1.1.2.2 nathanw ) {
5375 1.1.2.2 nathanw if ( float128_is_signaling_nan( a )
5376 1.1.2.2 nathanw || float128_is_signaling_nan( b ) ) {
5377 1.1.2.2 nathanw float_raise( float_flag_invalid );
5378 1.1.2.2 nathanw }
5379 1.1.2.2 nathanw return 0;
5380 1.1.2.2 nathanw }
5381 1.1.2.2 nathanw aSign = extractFloat128Sign( a );
5382 1.1.2.2 nathanw bSign = extractFloat128Sign( b );
5383 1.1.2.2 nathanw if ( aSign != bSign ) {
5384 1.1.2.2 nathanw return
5385 1.1.2.2 nathanw aSign
5386 1.1.2.2 nathanw && ( ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5387 1.1.2.2 nathanw != 0 );
5388 1.1.2.2 nathanw }
5389 1.1.2.2 nathanw return
5390 1.1.2.2 nathanw aSign ? lt128( b.high, b.low, a.high, a.low )
5391 1.1.2.2 nathanw : lt128( a.high, a.low, b.high, b.low );
5392 1.1.2.2 nathanw
5393 1.1.2.2 nathanw }
5394 1.1.2.2 nathanw
5395 1.1.2.2 nathanw #endif
5396 1.1.2.2 nathanw
5397 1.1.2.2 nathanw
5398 1.1.2.2 nathanw #if defined(SOFTFLOAT_FOR_GCC) && defined(SOFTFLOAT_NEED_FIXUNS)
5399 1.1.2.2 nathanw
5400 1.1.2.2 nathanw /*
5401 1.1.2.2 nathanw * These two routines are not part of the original softfloat distribution.
5402 1.1.2.2 nathanw *
5403 1.1.2.2 nathanw * They are based on the corresponding conversions to integer but return
5404 1.1.2.2 nathanw * unsigned numbers instead since these functions are required by GCC.
5405 1.1.2.2 nathanw *
5406 1.1.2.2 nathanw * Added by Mark Brinicombe <mark (at) netbsd.org> 27/09/97
5407 1.1.2.2 nathanw *
5408 1.1.2.2 nathanw * float64 version overhauled for SoftFloat 2a [bjh21 2000-07-15]
5409 1.1.2.2 nathanw */
5410 1.1.2.2 nathanw
5411 1.1.2.2 nathanw /*
5412 1.1.2.2 nathanw -------------------------------------------------------------------------------
5413 1.1.2.2 nathanw Returns the result of converting the double-precision floating-point value
5414 1.1.2.2 nathanw `a' to the 32-bit unsigned integer format. The conversion is
5415 1.1.2.2 nathanw performed according to the IEC/IEEE Standard for Binary Floating-point
5416 1.1.2.2 nathanw Arithmetic, except that the conversion is always rounded toward zero. If
5417 1.1.2.2 nathanw `a' is a NaN, the largest positive integer is returned. If the conversion
5418 1.1.2.2 nathanw overflows, the largest integer positive is returned.
5419 1.1.2.2 nathanw -------------------------------------------------------------------------------
5420 1.1.2.2 nathanw */
5421 1.1.2.2 nathanw uint32 float64_to_uint32_round_to_zero( float64 a )
5422 1.1.2.2 nathanw {
5423 1.1.2.2 nathanw flag aSign;
5424 1.1.2.2 nathanw int16 aExp, shiftCount;
5425 1.1.2.2 nathanw bits64 aSig, savedASig;
5426 1.1.2.2 nathanw uint32 z;
5427 1.1.2.2 nathanw
5428 1.1.2.2 nathanw aSig = extractFloat64Frac( a );
5429 1.1.2.2 nathanw aExp = extractFloat64Exp( a );
5430 1.1.2.2 nathanw aSign = extractFloat64Sign( a );
5431 1.1.2.2 nathanw
5432 1.1.2.2 nathanw if (aSign) {
5433 1.1.2.2 nathanw float_raise( float_flag_invalid );
5434 1.1.2.2 nathanw return(0);
5435 1.1.2.2 nathanw }
5436 1.1.2.2 nathanw
5437 1.1.2.2 nathanw if ( 0x41E < aExp ) {
5438 1.1.2.2 nathanw float_raise( float_flag_invalid );
5439 1.1.2.2 nathanw return 0xffffffff;
5440 1.1.2.2 nathanw }
5441 1.1.2.2 nathanw else if ( aExp < 0x3FF ) {
5442 1.1.2.2 nathanw if ( aExp || aSig ) float_set_inexact();
5443 1.1.2.2 nathanw return 0;
5444 1.1.2.2 nathanw }
5445 1.1.2.2 nathanw aSig |= LIT64( 0x0010000000000000 );
5446 1.1.2.2 nathanw shiftCount = 0x433 - aExp;
5447 1.1.2.2 nathanw savedASig = aSig;
5448 1.1.2.2 nathanw aSig >>= shiftCount;
5449 1.1.2.2 nathanw z = aSig;
5450 1.1.2.2 nathanw if ( ( aSig<<shiftCount ) != savedASig ) {
5451 1.1.2.2 nathanw float_set_inexact();
5452 1.1.2.2 nathanw }
5453 1.1.2.2 nathanw return z;
5454 1.1.2.2 nathanw
5455 1.1.2.2 nathanw }
5456 1.1.2.2 nathanw
5457 1.1.2.2 nathanw /*
5458 1.1.2.2 nathanw -------------------------------------------------------------------------------
5459 1.1.2.2 nathanw Returns the result of converting the single-precision floating-point value
5460 1.1.2.2 nathanw `a' to the 32-bit unsigned integer format. The conversion is
5461 1.1.2.2 nathanw performed according to the IEC/IEEE Standard for Binary Floating-point
5462 1.1.2.2 nathanw Arithmetic, except that the conversion is always rounded toward zero. If
5463 1.1.2.2 nathanw `a' is a NaN, the largest positive integer is returned. If the conversion
5464 1.1.2.2 nathanw overflows, the largest positive integer is returned.
5465 1.1.2.2 nathanw -------------------------------------------------------------------------------
5466 1.1.2.2 nathanw */
5467 1.1.2.2 nathanw uint32 float32_to_uint32_round_to_zero( float32 a )
5468 1.1.2.2 nathanw {
5469 1.1.2.2 nathanw flag aSign;
5470 1.1.2.2 nathanw int16 aExp, shiftCount;
5471 1.1.2.2 nathanw bits32 aSig;
5472 1.1.2.2 nathanw uint32 z;
5473 1.1.2.2 nathanw
5474 1.1.2.2 nathanw aSig = extractFloat32Frac( a );
5475 1.1.2.2 nathanw aExp = extractFloat32Exp( a );
5476 1.1.2.2 nathanw aSign = extractFloat32Sign( a );
5477 1.1.2.2 nathanw shiftCount = aExp - 0x9E;
5478 1.1.2.2 nathanw
5479 1.1.2.2 nathanw if (aSign) {
5480 1.1.2.2 nathanw float_raise( float_flag_invalid );
5481 1.1.2.2 nathanw return(0);
5482 1.1.2.2 nathanw }
5483 1.1.2.2 nathanw if ( 0 < shiftCount ) {
5484 1.1.2.2 nathanw float_raise( float_flag_invalid );
5485 1.1.2.2 nathanw return 0xFFFFFFFF;
5486 1.1.2.2 nathanw }
5487 1.1.2.2 nathanw else if ( aExp <= 0x7E ) {
5488 1.1.2.2 nathanw if ( aExp | aSig ) float_set_inexact();
5489 1.1.2.2 nathanw return 0;
5490 1.1.2.2 nathanw }
5491 1.1.2.2 nathanw aSig = ( aSig | 0x800000 )<<8;
5492 1.1.2.2 nathanw z = aSig>>( - shiftCount );
5493 1.1.2.2 nathanw if ( aSig<<( shiftCount & 31 ) ) {
5494 1.1.2.2 nathanw float_set_inexact();
5495 1.1.2.2 nathanw }
5496 1.1.2.2 nathanw return z;
5497 1.1.2.2 nathanw
5498 1.1.2.2 nathanw }
5499 1.1.2.2 nathanw
5500 1.1.2.2 nathanw #endif
5501 1.1.2.3 thorpej
5502 1.1.2.3 thorpej #endif /* _STANDALONE */
5503