systfloat.c revision 1.1 1 1.1 ross
2 1.1 ross /*
3 1.1 ross ===============================================================================
4 1.1 ross
5 1.1 ross This C source file is part of TestFloat, Release 2a, a package of programs
6 1.1 ross for testing the correctness of floating-point arithmetic complying to the
7 1.1 ross IEC/IEEE Standard for Floating-Point.
8 1.1 ross
9 1.1 ross Written by John R. Hauser. More information is available through the Web
10 1.1 ross page `http://HTTP.CS.Berkeley.EDU/~jhauser/arithmetic/TestFloat.html'.
11 1.1 ross
12 1.1 ross THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
13 1.1 ross has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
14 1.1 ross TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
15 1.1 ross PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
16 1.1 ross AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
17 1.1 ross
18 1.1 ross Derivative works are acceptable, even for commercial purposes, so long as
19 1.1 ross (1) they include prominent notice that the work is derivative, and (2) they
20 1.1 ross include prominent notice akin to these four paragraphs for those parts of
21 1.1 ross this code that are retained.
22 1.1 ross
23 1.1 ross ===============================================================================
24 1.1 ross */
25 1.1 ross
26 1.1 ross #include <math.h>
27 1.1 ross #include "milieu.h"
28 1.1 ross #include "softfloat.h"
29 1.1 ross #include "systfloat.h"
30 1.1 ross
31 1.1 ross float32 syst_int32_to_float32( int32 a )
32 1.1 ross {
33 1.1 ross float32 z;
34 1.1 ross
35 1.1 ross *( (float *) &z ) = a;
36 1.1 ross return z;
37 1.1 ross
38 1.1 ross }
39 1.1 ross
40 1.1 ross float64 syst_int32_to_float64( int32 a )
41 1.1 ross {
42 1.1 ross float64 z;
43 1.1 ross
44 1.1 ross *( (double *) &z ) = a;
45 1.1 ross return z;
46 1.1 ross
47 1.1 ross }
48 1.1 ross
49 1.1 ross #if defined( FLOATX80 ) && defined( LONG_DOUBLE_IS_FLOATX80 )
50 1.1 ross
51 1.1 ross floatx80 syst_int32_to_floatx80( int32 a )
52 1.1 ross {
53 1.1 ross floatx80 z;
54 1.1 ross
55 1.1 ross *( (long double *) &z ) = a;
56 1.1 ross return z;
57 1.1 ross
58 1.1 ross }
59 1.1 ross
60 1.1 ross #endif
61 1.1 ross
62 1.1 ross #if defined( FLOAT128 ) && defined( LONG_DOUBLE_IS_FLOAT128 )
63 1.1 ross
64 1.1 ross float128 syst_int32_to_float128( int32 a )
65 1.1 ross {
66 1.1 ross float128 z;
67 1.1 ross
68 1.1 ross *( (long double *) &z ) = a;
69 1.1 ross return z;
70 1.1 ross
71 1.1 ross }
72 1.1 ross
73 1.1 ross #endif
74 1.1 ross
75 1.1 ross #ifdef BITS64
76 1.1 ross
77 1.1 ross float32 syst_int64_to_float32( int64 a )
78 1.1 ross {
79 1.1 ross float32 z;
80 1.1 ross
81 1.1 ross *( (float *) &z ) = a;
82 1.1 ross return z;
83 1.1 ross
84 1.1 ross }
85 1.1 ross
86 1.1 ross float64 syst_int64_to_float64( int64 a )
87 1.1 ross {
88 1.1 ross float64 z;
89 1.1 ross
90 1.1 ross *( (double *) &z ) = a;
91 1.1 ross return z;
92 1.1 ross
93 1.1 ross }
94 1.1 ross
95 1.1 ross #if defined( FLOATX80 ) && defined( LONG_DOUBLE_IS_FLOATX80 )
96 1.1 ross
97 1.1 ross floatx80 syst_int64_to_floatx80( int64 a )
98 1.1 ross {
99 1.1 ross floatx80 z;
100 1.1 ross
101 1.1 ross *( (long double *) &z ) = a;
102 1.1 ross return z;
103 1.1 ross
104 1.1 ross }
105 1.1 ross
106 1.1 ross #endif
107 1.1 ross
108 1.1 ross #if defined( FLOAT128 ) && defined( LONG_DOUBLE_IS_FLOAT128 )
109 1.1 ross
110 1.1 ross float128 syst_int64_to_float128( int64 a )
111 1.1 ross {
112 1.1 ross float128 z;
113 1.1 ross
114 1.1 ross *( (long double *) &z ) = a;
115 1.1 ross return z;
116 1.1 ross
117 1.1 ross }
118 1.1 ross
119 1.1 ross #endif
120 1.1 ross
121 1.1 ross #endif
122 1.1 ross
123 1.1 ross int32 syst_float32_to_int32_round_to_zero( float32 a )
124 1.1 ross {
125 1.1 ross
126 1.1 ross return *( (float *) &a );
127 1.1 ross
128 1.1 ross }
129 1.1 ross
130 1.1 ross #ifdef BITS64
131 1.1 ross
132 1.1 ross int64 syst_float32_to_int64_round_to_zero( float32 a )
133 1.1 ross {
134 1.1 ross
135 1.1 ross return *( (float *) &a );
136 1.1 ross
137 1.1 ross }
138 1.1 ross
139 1.1 ross #endif
140 1.1 ross
141 1.1 ross float64 syst_float32_to_float64( float32 a )
142 1.1 ross {
143 1.1 ross float64 z;
144 1.1 ross
145 1.1 ross *( (double *) &z ) = *( (float *) &a );
146 1.1 ross return z;
147 1.1 ross
148 1.1 ross }
149 1.1 ross
150 1.1 ross #if defined( FLOATX80 ) && defined( LONG_DOUBLE_IS_FLOATX80 )
151 1.1 ross
152 1.1 ross floatx80 syst_float32_to_floatx80( float32 a )
153 1.1 ross {
154 1.1 ross floatx80 z;
155 1.1 ross
156 1.1 ross *( (long double *) &z ) = *( (float *) &a );
157 1.1 ross return z;
158 1.1 ross
159 1.1 ross }
160 1.1 ross
161 1.1 ross #endif
162 1.1 ross
163 1.1 ross #if defined( FLOAT128 ) && defined( LONG_DOUBLE_IS_FLOAT128 )
164 1.1 ross
165 1.1 ross float128 syst_float32_to_float128( float32 a )
166 1.1 ross {
167 1.1 ross float128 z;
168 1.1 ross
169 1.1 ross *( (long double *) &z ) = *( (float *) &a );
170 1.1 ross return z;
171 1.1 ross
172 1.1 ross }
173 1.1 ross
174 1.1 ross #endif
175 1.1 ross
176 1.1 ross float32 syst_float32_add( float32 a, float32 b )
177 1.1 ross {
178 1.1 ross float32 z;
179 1.1 ross
180 1.1 ross *( (float *) &z ) = *( (float *) &a ) + *( (float *) &b );
181 1.1 ross return z;
182 1.1 ross
183 1.1 ross }
184 1.1 ross
185 1.1 ross float32 syst_float32_sub( float32 a, float32 b )
186 1.1 ross {
187 1.1 ross float32 z;
188 1.1 ross
189 1.1 ross *( (float *) &z ) = *( (float *) &a ) - *( (float *) &b );
190 1.1 ross return z;
191 1.1 ross
192 1.1 ross }
193 1.1 ross
194 1.1 ross float32 syst_float32_mul( float32 a, float32 b )
195 1.1 ross {
196 1.1 ross float32 z;
197 1.1 ross
198 1.1 ross *( (float *) &z ) = *( (float *) &a ) * *( (float *) &b );
199 1.1 ross return z;
200 1.1 ross
201 1.1 ross }
202 1.1 ross
203 1.1 ross float32 syst_float32_div( float32 a, float32 b )
204 1.1 ross {
205 1.1 ross float32 z;
206 1.1 ross
207 1.1 ross *( (float *) &z ) = *( (float *) &a ) / *( (float *) &b );
208 1.1 ross return z;
209 1.1 ross
210 1.1 ross }
211 1.1 ross
212 1.1 ross flag syst_float32_eq( float32 a, float32 b )
213 1.1 ross {
214 1.1 ross
215 1.1 ross return ( *( (float *) &a ) == *( (float *) &b ) );
216 1.1 ross
217 1.1 ross }
218 1.1 ross
219 1.1 ross flag syst_float32_le( float32 a, float32 b )
220 1.1 ross {
221 1.1 ross
222 1.1 ross return ( *( (float *) &a ) <= *( (float *) &b ) );
223 1.1 ross
224 1.1 ross }
225 1.1 ross
226 1.1 ross flag syst_float32_lt( float32 a, float32 b )
227 1.1 ross {
228 1.1 ross
229 1.1 ross return ( *( (float *) &a ) < *( (float *) &b ) );
230 1.1 ross
231 1.1 ross }
232 1.1 ross
233 1.1 ross int32 syst_float64_to_int32_round_to_zero( float64 a )
234 1.1 ross {
235 1.1 ross
236 1.1 ross return *( (double *) &a );
237 1.1 ross
238 1.1 ross }
239 1.1 ross
240 1.1 ross #ifdef BITS64
241 1.1 ross
242 1.1 ross int64 syst_float64_to_int64_round_to_zero( float64 a )
243 1.1 ross {
244 1.1 ross
245 1.1 ross return *( (double *) &a );
246 1.1 ross
247 1.1 ross }
248 1.1 ross
249 1.1 ross #endif
250 1.1 ross
251 1.1 ross float32 syst_float64_to_float32( float64 a )
252 1.1 ross {
253 1.1 ross float32 z;
254 1.1 ross
255 1.1 ross *( (float *) &z ) = *( (double *) &a );
256 1.1 ross return z;
257 1.1 ross
258 1.1 ross }
259 1.1 ross
260 1.1 ross #if defined( FLOATX80 ) && defined( LONG_DOUBLE_IS_FLOATX80 )
261 1.1 ross
262 1.1 ross floatx80 syst_float64_to_floatx80( float64 a )
263 1.1 ross {
264 1.1 ross floatx80 z;
265 1.1 ross
266 1.1 ross *( (long double *) &z ) = *( (double *) &a );
267 1.1 ross return z;
268 1.1 ross
269 1.1 ross }
270 1.1 ross
271 1.1 ross #endif
272 1.1 ross
273 1.1 ross #if defined( FLOAT128 ) && defined( LONG_DOUBLE_IS_FLOAT128 )
274 1.1 ross
275 1.1 ross float128 syst_float64_to_float128( float64 a )
276 1.1 ross {
277 1.1 ross float128 z;
278 1.1 ross
279 1.1 ross *( (long double *) &z ) = *( (double *) &a );
280 1.1 ross return z;
281 1.1 ross
282 1.1 ross }
283 1.1 ross
284 1.1 ross #endif
285 1.1 ross
286 1.1 ross float64 syst_float64_add( float64 a, float64 b )
287 1.1 ross {
288 1.1 ross float64 z;
289 1.1 ross
290 1.1 ross *( (double *) &z ) = *( (double *) &a ) + *( (double *) &b );
291 1.1 ross return z;
292 1.1 ross
293 1.1 ross }
294 1.1 ross
295 1.1 ross float64 syst_float64_sub( float64 a, float64 b )
296 1.1 ross {
297 1.1 ross float64 z;
298 1.1 ross
299 1.1 ross *( (double *) &z ) = *( (double *) &a ) - *( (double *) &b );
300 1.1 ross return z;
301 1.1 ross
302 1.1 ross }
303 1.1 ross
304 1.1 ross float64 syst_float64_mul( float64 a, float64 b )
305 1.1 ross {
306 1.1 ross float64 z;
307 1.1 ross
308 1.1 ross *( (double *) &z ) = *( (double *) &a ) * *( (double *) &b );
309 1.1 ross return z;
310 1.1 ross
311 1.1 ross }
312 1.1 ross
313 1.1 ross float64 syst_float64_div( float64 a, float64 b )
314 1.1 ross {
315 1.1 ross float64 z;
316 1.1 ross
317 1.1 ross *( (double *) &z ) = *( (double *) &a ) / *( (double *) &b );
318 1.1 ross return z;
319 1.1 ross
320 1.1 ross }
321 1.1 ross
322 1.1 ross float64 syst_float64_sqrt( float64 a )
323 1.1 ross {
324 1.1 ross float64 z;
325 1.1 ross
326 1.1 ross *( (double *) &z ) = sqrt( *( (double *) &a ) );
327 1.1 ross return z;
328 1.1 ross
329 1.1 ross }
330 1.1 ross
331 1.1 ross flag syst_float64_eq( float64 a, float64 b )
332 1.1 ross {
333 1.1 ross
334 1.1 ross return ( *( (double *) &a ) == *( (double *) &b ) );
335 1.1 ross
336 1.1 ross }
337 1.1 ross
338 1.1 ross flag syst_float64_le( float64 a, float64 b )
339 1.1 ross {
340 1.1 ross
341 1.1 ross return ( *( (double *) &a ) <= *( (double *) &b ) );
342 1.1 ross
343 1.1 ross }
344 1.1 ross
345 1.1 ross flag syst_float64_lt( float64 a, float64 b )
346 1.1 ross {
347 1.1 ross
348 1.1 ross return ( *( (double *) &a ) < *( (double *) &b ) );
349 1.1 ross
350 1.1 ross }
351 1.1 ross
352 1.1 ross #if defined( FLOATX80 ) && defined( LONG_DOUBLE_IS_FLOATX80 )
353 1.1 ross
354 1.1 ross int32 syst_floatx80_to_int32_round_to_zero( floatx80 a )
355 1.1 ross {
356 1.1 ross
357 1.1 ross return *( (long double *) &a );
358 1.1 ross
359 1.1 ross }
360 1.1 ross
361 1.1 ross #ifdef BITS64
362 1.1 ross
363 1.1 ross int64 syst_floatx80_to_int64_round_to_zero( floatx80 a )
364 1.1 ross {
365 1.1 ross
366 1.1 ross return *( (long double *) &a );
367 1.1 ross
368 1.1 ross }
369 1.1 ross
370 1.1 ross #endif
371 1.1 ross
372 1.1 ross float32 syst_floatx80_to_float32( floatx80 a )
373 1.1 ross {
374 1.1 ross float32 z;
375 1.1 ross
376 1.1 ross *( (float *) &z ) = *( (long double *) &a );
377 1.1 ross return z;
378 1.1 ross
379 1.1 ross }
380 1.1 ross
381 1.1 ross float64 syst_floatx80_to_float64( floatx80 a )
382 1.1 ross {
383 1.1 ross float64 z;
384 1.1 ross
385 1.1 ross *( (double *) &z ) = *( (long double *) &a );
386 1.1 ross return z;
387 1.1 ross
388 1.1 ross }
389 1.1 ross
390 1.1 ross floatx80 syst_floatx80_add( floatx80 a, floatx80 b )
391 1.1 ross {
392 1.1 ross floatx80 z;
393 1.1 ross
394 1.1 ross *( (long double *) &z ) =
395 1.1 ross *( (long double *) &a ) + *( (long double *) &b );
396 1.1 ross return z;
397 1.1 ross
398 1.1 ross }
399 1.1 ross
400 1.1 ross floatx80 syst_floatx80_sub( floatx80 a, floatx80 b )
401 1.1 ross {
402 1.1 ross floatx80 z;
403 1.1 ross
404 1.1 ross *( (long double *) &z ) =
405 1.1 ross *( (long double *) &a ) - *( (long double *) &b );
406 1.1 ross return z;
407 1.1 ross
408 1.1 ross }
409 1.1 ross
410 1.1 ross floatx80 syst_floatx80_mul( floatx80 a, floatx80 b )
411 1.1 ross {
412 1.1 ross floatx80 z;
413 1.1 ross
414 1.1 ross *( (long double *) &z ) =
415 1.1 ross *( (long double *) &a ) * *( (long double *) &b );
416 1.1 ross return z;
417 1.1 ross
418 1.1 ross }
419 1.1 ross
420 1.1 ross floatx80 syst_floatx80_div( floatx80 a, floatx80 b )
421 1.1 ross {
422 1.1 ross floatx80 z;
423 1.1 ross
424 1.1 ross *( (long double *) &z ) =
425 1.1 ross *( (long double *) &a ) / *( (long double *) &b );
426 1.1 ross return z;
427 1.1 ross
428 1.1 ross }
429 1.1 ross
430 1.1 ross flag syst_floatx80_eq( floatx80 a, floatx80 b )
431 1.1 ross {
432 1.1 ross
433 1.1 ross return ( *( (long double *) &a ) == *( (long double *) &b ) );
434 1.1 ross
435 1.1 ross }
436 1.1 ross
437 1.1 ross flag syst_floatx80_le( floatx80 a, floatx80 b )
438 1.1 ross {
439 1.1 ross
440 1.1 ross return ( *( (long double *) &a ) <= *( (long double *) &b ) );
441 1.1 ross
442 1.1 ross }
443 1.1 ross
444 1.1 ross flag syst_floatx80_lt( floatx80 a, floatx80 b )
445 1.1 ross {
446 1.1 ross
447 1.1 ross return ( *( (long double *) &a ) < *( (long double *) &b ) );
448 1.1 ross
449 1.1 ross }
450 1.1 ross
451 1.1 ross #endif
452 1.1 ross
453 1.1 ross #if defined( FLOAT128 ) && defined( LONG_DOUBLE_IS_FLOAT128 )
454 1.1 ross
455 1.1 ross int32 syst_float128_to_int32_round_to_zero( float128 a )
456 1.1 ross {
457 1.1 ross
458 1.1 ross return *( (long double *) &a );
459 1.1 ross
460 1.1 ross }
461 1.1 ross
462 1.1 ross #ifdef BITS64
463 1.1 ross
464 1.1 ross int64 syst_float128_to_int64_round_to_zero( float128 a )
465 1.1 ross {
466 1.1 ross
467 1.1 ross return *( (long double *) &a );
468 1.1 ross
469 1.1 ross }
470 1.1 ross
471 1.1 ross #endif
472 1.1 ross
473 1.1 ross float32 syst_float128_to_float32( float128 a )
474 1.1 ross {
475 1.1 ross float32 z;
476 1.1 ross
477 1.1 ross *( (float *) &z ) = *( (long double *) &a );
478 1.1 ross return z;
479 1.1 ross
480 1.1 ross }
481 1.1 ross
482 1.1 ross float64 syst_float128_to_float64( float128 a )
483 1.1 ross {
484 1.1 ross float64 z;
485 1.1 ross
486 1.1 ross *( (double *) &z ) = *( (long double *) &a );
487 1.1 ross return z;
488 1.1 ross
489 1.1 ross }
490 1.1 ross
491 1.1 ross float128 syst_float128_add( float128 a, float128 b )
492 1.1 ross {
493 1.1 ross float128 z;
494 1.1 ross
495 1.1 ross *( (long double *) &z ) =
496 1.1 ross *( (long double *) &a ) + *( (long double *) &b );
497 1.1 ross return z;
498 1.1 ross
499 1.1 ross }
500 1.1 ross
501 1.1 ross float128 syst_float128_sub( float128 a, float128 b )
502 1.1 ross {
503 1.1 ross float128 z;
504 1.1 ross
505 1.1 ross *( (long double *) &z ) =
506 1.1 ross *( (long double *) &a ) - *( (long double *) &b );
507 1.1 ross return z;
508 1.1 ross
509 1.1 ross }
510 1.1 ross
511 1.1 ross float128 syst_float128_mul( float128 a, float128 b )
512 1.1 ross {
513 1.1 ross float128 z;
514 1.1 ross
515 1.1 ross *( (long double *) &z ) =
516 1.1 ross *( (long double *) &a ) * *( (long double *) &b );
517 1.1 ross return z;
518 1.1 ross
519 1.1 ross }
520 1.1 ross
521 1.1 ross float128 syst_float128_div( float128 a, float128 b )
522 1.1 ross {
523 1.1 ross float128 z;
524 1.1 ross
525 1.1 ross *( (long double *) &z ) =
526 1.1 ross *( (long double *) &a ) / *( (long double *) &b );
527 1.1 ross return z;
528 1.1 ross
529 1.1 ross }
530 1.1 ross
531 1.1 ross flag syst_float128_eq( float128 a, float128 b )
532 1.1 ross {
533 1.1 ross
534 1.1 ross return ( *( (long double *) &a ) == *( (long double *) &b ) );
535 1.1 ross
536 1.1 ross }
537 1.1 ross
538 1.1 ross flag syst_float128_le( float128 a, float128 b )
539 1.1 ross {
540 1.1 ross
541 1.1 ross return ( *( (long double *) &a ) <= *( (long double *) &b ) );
542 1.1 ross
543 1.1 ross }
544 1.1 ross
545 1.1 ross flag syst_float128_lt( float128 a, float128 b )
546 1.1 ross {
547 1.1 ross
548 1.1 ross return ( *( (long double *) &a ) < *( (long double *) &b ) );
549 1.1 ross
550 1.1 ross }
551 1.1 ross
552 1.1 ross #endif
553 1.1 ross
554