fenv.c revision 1.8 1 /* $NetBSD: fenv.c,v 1.8 2017/03/22 23:11:08 chs Exp $ */
2
3 /*-
4 * Copyright (c) 2004-2005 David Schultz <das (at) FreeBSD.ORG>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 #include <sys/cdefs.h>
30 __RCSID("$NetBSD: fenv.c,v 1.8 2017/03/22 23:11:08 chs Exp $");
31
32 #include "namespace.h"
33
34 #include <sys/param.h>
35 #include <sys/sysctl.h>
36 #include <assert.h>
37 #include <fenv.h>
38 #include <stddef.h>
39 #include <string.h>
40
41 #ifdef __weak_alias
42 __weak_alias(feclearexcept,_feclearexcept)
43 __weak_alias(fedisableexcept,_fedisableexcept)
44 __weak_alias(feenableexcept,_feenableexcept)
45 __weak_alias(fegetenv,_fegetenv)
46 __weak_alias(fegetexcept,_fegetexcept)
47 __weak_alias(fegetexceptflag,_fegetexceptflag)
48 __weak_alias(fegetround,_fegetround)
49 __weak_alias(feholdexcept,_feholdexcept)
50 __weak_alias(feraiseexcept,_feraiseexcept)
51 __weak_alias(fesetenv,_fesetenv)
52 __weak_alias(fesetexceptflag,_fesetexceptflag)
53 __weak_alias(fesetround,_fesetround)
54 __weak_alias(fetestexcept,_fetestexcept)
55 __weak_alias(feupdateenv,_feupdateenv)
56 #endif
57
58 /* Load x87 Control Word */
59 #define __fldcw(__cw) __asm__ __volatile__ \
60 ("fldcw %0" : : "m" (__cw))
61
62 /* No-Wait Store Control Word */
63 #define __fnstcw(__cw) __asm__ __volatile__ \
64 ("fnstcw %0" : "=m" (*(__cw)))
65
66 /* No-Wait Store Status Word */
67 #define __fnstsw(__sw) __asm__ __volatile__ \
68 ("fnstsw %0" : "=am" (*(__sw)))
69
70 /* No-Wait Clear Exception Flags */
71 #define __fnclex() __asm__ __volatile__ \
72 ("fnclex")
73
74 /* Load x87 Environment */
75 #define __fldenv(__env) __asm__ __volatile__ \
76 ("fldenv %0" : : "m" (__env))
77
78 /* No-Wait Store x87 environment */
79 #define __fnstenv(__env) __asm__ __volatile__ \
80 ("fnstenv %0" : "=m" (*(__env)))
81
82 /* Check for and handle pending unmasked x87 pending FPU exceptions */
83 #define __fwait(__env) __asm__ __volatile__ \
84 ("fwait")
85
86 /* Load the MXCSR register */
87 #define __ldmxcsr(__mxcsr) __asm__ __volatile__ \
88 ("ldmxcsr %0" : : "m" (__mxcsr))
89
90 /* Store the MXCSR register state */
91 #define __stmxcsr(__mxcsr) __asm__ __volatile__ \
92 ("stmxcsr %0" : "=m" (*(__mxcsr)))
93
94 /*
95 * The following constant represents the default floating-point environment
96 * (that is, the one installed at program startup) and has type pointer to
97 * const-qualified fenv_t.
98 *
99 * It can be used as an argument to the functions within the <fenv.h> header
100 * that manage the floating-point environment, namely fesetenv() and
101 * feupdateenv().
102 *
103 * x87 fpu registers are 16bit wide. The upper bits, 31-16, are marked as
104 * RESERVED. We provide a partial floating-point environment, where we
105 * define only the lower bits. The reserved bits are extracted and set by the
106 * consumers of FE_DFL_ENV, during runtime.
107 */
108 fenv_t __fe_dfl_env = {
109 {
110 __NetBSD_NPXCW__, /* Control word register */
111 0x0, /* Unused */
112 0x0000, /* Status word register */
113 0x0, /* Unused */
114 0x0000ffff, /* Tag word register */
115 0x0, /* Unused */
116 {
117 0x0000, 0x0000,
118 0x0000, 0xffff
119 }
120 },
121 __INITIAL_MXCSR__ /* MXCSR register */
122 };
123
124 /*
125 * Test for SSE support on this processor.
126 *
127 * We need to use ldmxcsr/stmxcsr to get correct results if any part
128 * of the program was compiled to use SSE floating-point, but we can't
129 * use SSE on older processors.
130 *
131 * In order to do so, we need to query the processor capabilities via the CPUID
132 * instruction. We can make it even simpler though, by querying the machdep.sse
133 * sysctl.
134 */
135 static int __HAS_SSE = 0;
136
137 static void __init_libm(void) __attribute__ ((constructor, used));
138
139 static void __init_libm(void)
140 {
141 size_t oldlen = sizeof(__HAS_SSE);
142 int rv;
143 uint16_t control;
144
145 rv = sysctlbyname("machdep.sse", &__HAS_SSE, &oldlen, NULL, 0);
146 if (rv == -1)
147 __HAS_SSE = 0;
148
149 __fnstcw(&control);
150 __fe_dfl_env.x87.control = control;
151 }
152
153 /*
154 * The feclearexcept() function clears the supported floating-point exceptions
155 * represented by `excepts'.
156 */
157 int
158 feclearexcept(int excepts)
159 {
160 fenv_t env;
161 uint32_t mxcsr;
162 int ex;
163
164 _DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0);
165
166 ex = excepts & FE_ALL_EXCEPT;
167
168 /* It's ~3x faster to call fnclex, than store/load fp env */
169 if (ex == FE_ALL_EXCEPT) {
170 __fnclex();
171 } else {
172 __fnstenv(&env);
173 env.x87.status &= ~ex;
174 __fldenv(env);
175 }
176
177 if (__HAS_SSE) {
178 __stmxcsr(&mxcsr);
179 mxcsr &= ~ex;
180 __ldmxcsr(mxcsr);
181 }
182
183 /* Success */
184 return (0);
185 }
186
187 /*
188 * The fegetexceptflag() function stores an implementation-defined
189 * representation of the states of the floating-point status flags indicated by
190 * the argument excepts in the object pointed to by the argument flagp.
191 */
192 int
193 fegetexceptflag(fexcept_t *flagp, int excepts)
194 {
195 uint32_t mxcsr;
196 uint16_t status;
197 int ex;
198
199 _DIAGASSERT(flagp != NULL);
200 _DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0);
201
202 ex = excepts & FE_ALL_EXCEPT;
203
204 __fnstsw(&status);
205 if (__HAS_SSE)
206 __stmxcsr(&mxcsr);
207 else
208 mxcsr = 0;
209
210 *flagp = (mxcsr | status) & ex;
211
212 /* Success */
213 return (0);
214 }
215
216 /*
217 * The feraiseexcept() function raises the supported floating-point exceptions
218 * represented by the argument `excepts'.
219 *
220 * The standard explicitly allows us to execute an instruction that has the
221 * exception as a side effect, but we choose to manipulate the status register
222 * directly.
223 *
224 * The validation of input is being deferred to fesetexceptflag().
225 */
226 int
227 feraiseexcept(int excepts)
228 {
229 fexcept_t ex;
230
231 _DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0);
232
233 ex = excepts & FE_ALL_EXCEPT;
234 fesetexceptflag(&ex, excepts);
235 __fwait();
236
237 /* Success */
238 return (0);
239 }
240
241 /*
242 * This function sets the floating-point status flags indicated by the argument
243 * `excepts' to the states stored in the object pointed to by `flagp'. It does
244 * NOT raise any floating-point exceptions, but only sets the state of the flags.
245 */
246 int
247 fesetexceptflag(const fexcept_t *flagp, int excepts)
248 {
249 fenv_t env;
250 uint32_t mxcsr;
251 int ex;
252
253 _DIAGASSERT(flagp != NULL);
254 _DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0);
255
256 ex = excepts & FE_ALL_EXCEPT;
257
258 __fnstenv(&env);
259 env.x87.status &= ~ex;
260 env.x87.status |= *flagp & ex;
261 __fldenv(env);
262
263 if (__HAS_SSE) {
264 __stmxcsr(&mxcsr);
265 mxcsr &= ~ex;
266 mxcsr |= *flagp & ex;
267 __ldmxcsr(mxcsr);
268 }
269
270 /* Success */
271 return (0);
272 }
273
274 /*
275 * The fetestexcept() function determines which of a specified subset of the
276 * floating-point exception flags are currently set. The `excepts' argument
277 * specifies the floating-point status flags to be queried.
278 */
279 int
280 fetestexcept(int excepts)
281 {
282 uint32_t mxcsr;
283 uint16_t status;
284 int ex;
285
286 _DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0);
287
288 ex = excepts & FE_ALL_EXCEPT;
289
290 __fnstsw(&status);
291 if (__HAS_SSE)
292 __stmxcsr(&mxcsr);
293 else
294 mxcsr = 0;
295
296 return ((status | mxcsr) & ex);
297 }
298
299 int
300 fegetround(void)
301 {
302 uint16_t control;
303
304 /*
305 * We assume that the x87 and the SSE unit agree on the
306 * rounding mode. Reading the control word on the x87 turns
307 * out to be about 5 times faster than reading it on the SSE
308 * unit on an Opteron 244.
309 */
310 __fnstcw(&control);
311
312 return (control & __X87_ROUND_MASK);
313 }
314
315 /*
316 * The fesetround() function shall establish the rounding direction represented
317 * by its argument round. If the argument is not equal to the value of a
318 * rounding direction macro, the rounding direction is not changed.
319 */
320 int
321 fesetround(int round)
322 {
323 uint32_t mxcsr;
324 uint16_t control;
325
326 if (round & ~__X87_ROUND_MASK) {
327 /* Failure */
328 return (-1);
329 }
330
331 __fnstcw(&control);
332 control &= ~__X87_ROUND_MASK;
333 control |= round;
334 __fldcw(control);
335
336 if (__HAS_SSE) {
337 __stmxcsr(&mxcsr);
338 mxcsr &= ~(__X87_ROUND_MASK << __SSE_ROUND_SHIFT);
339 mxcsr |= round << __SSE_ROUND_SHIFT;
340 __ldmxcsr(mxcsr);
341 }
342
343 /* Success */
344 return (0);
345 }
346
347 /*
348 * The fegetenv() function attempts to store the current floating-point
349 * environment in the object pointed to by envp.
350 */
351 int
352 fegetenv(fenv_t *envp)
353 {
354 uint32_t mxcsr;
355
356 _DIAGASSERT(flagp != NULL);
357
358 /*
359 * fnstenv masks all exceptions, so we need to restore the old control
360 * word to avoid this side effect.
361 */
362 __fnstenv(envp);
363 __fldcw(envp->x87.control);
364 if (__HAS_SSE) {
365 __stmxcsr(&mxcsr);
366 envp->mxcsr = mxcsr;
367 }
368
369 /* Success */
370 return (0);
371 }
372
373 /*
374 * The feholdexcept() function saves the current floating-point environment in
375 * the object pointed to by envp, clears the floating-point status flags, and
376 * then installs a non-stop (continue on floating-point exceptions) mode, if
377 * available, for all floating-point exceptions.
378 */
379 int
380 feholdexcept(fenv_t *envp)
381 {
382 uint32_t mxcsr;
383
384 _DIAGASSERT(envp != NULL);
385
386 __fnstenv(envp);
387 __fnclex();
388 if (__HAS_SSE) {
389 __stmxcsr(&mxcsr);
390 envp->mxcsr = mxcsr;
391 mxcsr &= ~FE_ALL_EXCEPT;
392 mxcsr |= FE_ALL_EXCEPT << __SSE_EMASK_SHIFT;
393 __ldmxcsr(mxcsr);
394 }
395
396 /* Success */
397 return (0);
398 }
399
400 /*
401 * The fesetenv() function attempts to establish the floating-point environment
402 * represented by the object pointed to by envp. The argument `envp' points
403 * to an object set by a call to fegetenv() or feholdexcept(), or equal a
404 * floating-point environment macro. The fesetenv() function does not raise
405 * floating-point exceptions, but only installs the state of the floating-point
406 * status flags represented through its argument.
407 */
408 int
409 fesetenv(const fenv_t *envp)
410 {
411 fenv_t env;
412
413 _DIAGASSERT(envp != NULL);
414
415 /* Store the x87 floating-point environment */
416 memset(&env, 0, sizeof(env));
417 __fnstenv(&env);
418
419 __fe_dfl_env.x87.unused1 = env.x87.unused1;
420 __fe_dfl_env.x87.unused2 = env.x87.unused2;
421 __fe_dfl_env.x87.unused3 = env.x87.unused3;
422 memcpy(__fe_dfl_env.x87.others, env.x87.others,
423 sizeof(__fe_dfl_env.x87.others));
424
425 __fldenv(envp->x87);
426 if (__HAS_SSE)
427 __ldmxcsr(envp->mxcsr);
428
429 /* Success */
430 return (0);
431 }
432
433 /*
434 * The feupdateenv() function saves the currently raised floating-point
435 * exceptions in its automatic storage, installs the floating-point environment
436 * represented by the object pointed to by `envp', and then raises the saved
437 * floating-point exceptions. The argument `envp' shall point to an object set
438 * by a call to feholdexcept() or fegetenv(), or equal a floating-point
439 * environment macro.
440 */
441 int
442 feupdateenv(const fenv_t *envp)
443 {
444 fenv_t env;
445 uint32_t mxcsr;
446 uint16_t status;
447
448 _DIAGASSERT(envp != NULL);
449
450 /* Store the x87 floating-point environment */
451 memset(&env, 0, sizeof(env));
452 __fnstenv(&env);
453
454 __fe_dfl_env.x87.unused1 = env.x87.unused1;
455 __fe_dfl_env.x87.unused2 = env.x87.unused2;
456 __fe_dfl_env.x87.unused3 = env.x87.unused3;
457 memcpy(__fe_dfl_env.x87.others, env.x87.others,
458 sizeof(__fe_dfl_env.x87.others));
459
460 __fnstsw(&status);
461 if (__HAS_SSE)
462 __stmxcsr(&mxcsr);
463 else
464 mxcsr = 0;
465 fesetenv(envp);
466 feraiseexcept((mxcsr | status) & FE_ALL_EXCEPT);
467
468 /* Success */
469 return (0);
470 }
471
472 /*
473 * The following functions are extentions to the standard
474 */
475 int
476 feenableexcept(int mask)
477 {
478 uint32_t mxcsr, omask;
479 uint16_t control;
480
481 mask &= FE_ALL_EXCEPT;
482 __fnstcw(&control);
483 if (__HAS_SSE)
484 __stmxcsr(&mxcsr);
485 else
486 mxcsr = 0;
487
488 omask = (control | mxcsr >> __SSE_EMASK_SHIFT) & FE_ALL_EXCEPT;
489 control &= ~mask;
490 __fldcw(control);
491 if (__HAS_SSE) {
492 mxcsr &= ~(mask << __SSE_EMASK_SHIFT);
493 __ldmxcsr(mxcsr);
494 }
495
496 return (FE_ALL_EXCEPT & ~omask);
497 }
498
499 int
500 fedisableexcept(int mask)
501 {
502 uint32_t mxcsr, omask;
503 uint16_t control;
504
505 mask &= FE_ALL_EXCEPT;
506 __fnstcw(&control);
507 if (__HAS_SSE)
508 __stmxcsr(&mxcsr);
509 else
510 mxcsr = 0;
511
512 omask = (control | mxcsr >> __SSE_EMASK_SHIFT) & FE_ALL_EXCEPT;
513 control |= mask;
514 __fldcw(control);
515 if (__HAS_SSE) {
516 mxcsr |= mask << __SSE_EMASK_SHIFT;
517 __ldmxcsr(mxcsr);
518 }
519
520 return (FE_ALL_EXCEPT & ~omask);
521 }
522
523 int
524 fegetexcept(void)
525 {
526 uint16_t control;
527
528 /*
529 * We assume that the masks for the x87 and the SSE unit are
530 * the same.
531 */
532 __fnstcw(&control);
533
534 return (~control & FE_ALL_EXCEPT);
535 }
536