fenv.c revision 1.5 1 /* $NetBSD: fenv.c,v 1.5 2013/05/29 00:53:19 riastradh Exp $ */
2
3 /*-
4 * Copyright (c) 2004-2005 David Schultz <das (at) FreeBSD.ORG>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 #include <sys/cdefs.h>
30 __RCSID("$NetBSD: fenv.c,v 1.5 2013/05/29 00:53:19 riastradh Exp $");
31
32 #include <sys/param.h>
33 #include <sys/sysctl.h>
34 #include <assert.h>
35 #include <fenv.h>
36 #include <stddef.h>
37 #include <string.h>
38
39 /* Load x87 Control Word */
40 #define __fldcw(__cw) __asm__ __volatile__ \
41 ("fldcw %0" : : "m" (__cw))
42
43 /* No-Wait Store Control Word */
44 #define __fnstcw(__cw) __asm__ __volatile__ \
45 ("fnstcw %0" : "=m" (*(__cw)))
46
47 /* No-Wait Store Status Word */
48 #define __fnstsw(__sw) __asm__ __volatile__ \
49 ("fnstsw %0" : "=am" (*(__sw)))
50
51 /* No-Wait Clear Exception Flags */
52 #define __fnclex() __asm__ __volatile__ \
53 ("fnclex")
54
55 /* Load x87 Environment */
56 #define __fldenv(__env) __asm__ __volatile__ \
57 ("fldenv %0" : : "m" (__env))
58
59 /* No-Wait Store x87 environment */
60 #define __fnstenv(__env) __asm__ __volatile__ \
61 ("fnstenv %0" : "=m" (*(__env)))
62
63 /* Check for and handle pending unmasked x87 pending FPU exceptions */
64 #define __fwait(__env) __asm__ __volatile__ \
65 ("fwait")
66
67 /* Load the MXCSR register */
68 #define __ldmxcsr(__mxcsr) __asm__ __volatile__ \
69 ("ldmxcsr %0" : : "m" (__mxcsr))
70
71 /* Store the MXCSR register state */
72 #define __stmxcsr(__mxcsr) __asm__ __volatile__ \
73 ("stmxcsr %0" : "=m" (*(__mxcsr)))
74
75 /*
76 * The following constant represents the default floating-point environment
77 * (that is, the one installed at program startup) and has type pointer to
78 * const-qualified fenv_t.
79 *
80 * It can be used as an argument to the functions within the <fenv.h> header
81 * that manage the floating-point environment, namely fesetenv() and
82 * feupdateenv().
83 *
84 * x87 fpu registers are 16bit wide. The upper bits, 31-16, are marked as
85 * RESERVED. We provide a partial floating-point environment, where we
86 * define only the lower bits. The reserved bits are extracted and set by the
87 * consumers of FE_DFL_ENV, during runtime.
88 */
89 fenv_t __fe_dfl_env = {
90 {
91 __NetBSD_NPXCW__, /* Control word register */
92 0x0, /* Unused */
93 0x0000, /* Status word register */
94 0x0, /* Unused */
95 0x0000ffff, /* Tag word register */
96 0x0, /* Unused */
97 {
98 0x0000, 0x0000,
99 0x0000, 0xffff
100 }
101 },
102 __INITIAL_MXCSR__ /* MXCSR register */
103 };
104
105 /*
106 * Test for SSE support on this processor.
107 *
108 * We need to use ldmxcsr/stmxcsr to get correct results if any part
109 * of the program was compiled to use SSE floating-point, but we can't
110 * use SSE on older processors.
111 *
112 * In order to do so, we need to query the processor capabilities via the CPUID
113 * instruction. We can make it even simpler though, by querying the machdep.sse
114 * sysctl.
115 */
116 static int __HAS_SSE = 0;
117
118 static void __test_sse(void) __attribute__ ((constructor));
119
120 static void __test_sse(void)
121 {
122 size_t oldlen = sizeof(__HAS_SSE);
123 int rv;
124
125 rv = sysctlbyname("machdep.sse", &__HAS_SSE, &oldlen, NULL, 0);
126 if (rv == -1)
127 __HAS_SSE = 0;
128 }
129
130 /*
131 * The feclearexcept() function clears the supported floating-point exceptions
132 * represented by `excepts'.
133 */
134 int
135 feclearexcept(int excepts)
136 {
137 fenv_t env;
138 uint32_t mxcsr;
139 int ex;
140
141 _DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0);
142
143 ex = excepts & FE_ALL_EXCEPT;
144
145 /* It's ~3x faster to call fnclex, than store/load fp env */
146 if (ex == FE_ALL_EXCEPT) {
147 __fnclex();
148 } else {
149 __fnstenv(&env);
150 env.x87.status &= ~ex;
151 __fldenv(env);
152 }
153
154 if (__HAS_SSE) {
155 __stmxcsr(&mxcsr);
156 mxcsr &= ~ex;
157 __ldmxcsr(mxcsr);
158 }
159
160 /* Success */
161 return (0);
162 }
163
164 /*
165 * The fegetexceptflag() function stores an implementation-defined
166 * representation of the states of the floating-point status flags indicated by
167 * the argument excepts in the object pointed to by the argument flagp.
168 */
169 int
170 fegetexceptflag(fexcept_t *flagp, int excepts)
171 {
172 uint32_t mxcsr;
173 uint16_t status;
174 int ex;
175
176 _DIAGASSERT(flagp != NULL);
177 _DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0);
178
179 ex = excepts & FE_ALL_EXCEPT;
180
181 __fnstsw(&status);
182 if (__HAS_SSE)
183 __stmxcsr(&mxcsr);
184 else
185 mxcsr = 0;
186
187 *flagp = (mxcsr | status) & ex;
188
189 /* Success */
190 return (0);
191 }
192
193 /*
194 * The feraiseexcept() function raises the supported floating-point exceptions
195 * represented by the argument `excepts'.
196 *
197 * The standard explicitly allows us to execute an instruction that has the
198 * exception as a side effect, but we choose to manipulate the status register
199 * directly.
200 *
201 * The validation of input is being deferred to fesetexceptflag().
202 */
203 int
204 feraiseexcept(int excepts)
205 {
206 fexcept_t ex;
207
208 _DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0);
209
210 ex = excepts & FE_ALL_EXCEPT;
211 fesetexceptflag(&ex, excepts);
212 __fwait();
213
214 /* Success */
215 return (0);
216 }
217
218 /*
219 * This function sets the floating-point status flags indicated by the argument
220 * `excepts' to the states stored in the object pointed to by `flagp'. It does
221 * NOT raise any floating-point exceptions, but only sets the state of the flags.
222 */
223 int
224 fesetexceptflag(const fexcept_t *flagp, int excepts)
225 {
226 fenv_t env;
227 uint32_t mxcsr;
228 int ex;
229
230 _DIAGASSERT(flagp != NULL);
231 _DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0);
232
233 ex = excepts & FE_ALL_EXCEPT;
234
235 __fnstenv(&env);
236 env.x87.status &= ~ex;
237 env.x87.status |= *flagp & ex;
238 __fldenv(env);
239
240 if (__HAS_SSE) {
241 __stmxcsr(&mxcsr);
242 mxcsr &= ~ex;
243 mxcsr |= *flagp & ex;
244 __ldmxcsr(mxcsr);
245 }
246
247 /* Success */
248 return (0);
249 }
250
251 /*
252 * The fetestexcept() function determines which of a specified subset of the
253 * floating-point exception flags are currently set. The `excepts' argument
254 * specifies the floating-point status flags to be queried.
255 */
256 int
257 fetestexcept(int excepts)
258 {
259 uint32_t mxcsr;
260 uint16_t status;
261 int ex;
262
263 _DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0);
264
265 ex = excepts & FE_ALL_EXCEPT;
266
267 __fnstsw(&status);
268 if (__HAS_SSE)
269 __stmxcsr(&mxcsr);
270 else
271 mxcsr = 0;
272
273 return ((status | mxcsr) & ex);
274 }
275
276 int
277 fegetround(void)
278 {
279 uint16_t control;
280
281 /*
282 * We assume that the x87 and the SSE unit agree on the
283 * rounding mode. Reading the control word on the x87 turns
284 * out to be about 5 times faster than reading it on the SSE
285 * unit on an Opteron 244.
286 */
287 __fnstcw(&control);
288
289 return (control & __X87_ROUND_MASK);
290 }
291
292 /*
293 * The fesetround() function shall establish the rounding direction represented
294 * by its argument round. If the argument is not equal to the value of a
295 * rounding direction macro, the rounding direction is not changed.
296 */
297 int
298 fesetround(int round)
299 {
300 uint32_t mxcsr;
301 uint16_t control;
302
303 if (round & ~__X87_ROUND_MASK) {
304 /* Failure */
305 return (-1);
306 }
307
308 __fnstcw(&control);
309 control &= ~__X87_ROUND_MASK;
310 control |= round;
311 __fldcw(control);
312
313 if (__HAS_SSE) {
314 __stmxcsr(&mxcsr);
315 mxcsr &= ~(__X87_ROUND_MASK << __SSE_ROUND_SHIFT);
316 mxcsr |= round << __SSE_ROUND_SHIFT;
317 __ldmxcsr(mxcsr);
318 }
319
320 /* Success */
321 return (0);
322 }
323
324 /*
325 * The fegetenv() function attempts to store the current floating-point
326 * environment in the object pointed to by envp.
327 */
328 int
329 fegetenv(fenv_t *envp)
330 {
331 uint32_t mxcsr;
332
333 _DIAGASSERT(flagp != NULL);
334
335 /*
336 * fnstenv masks all exceptions, so we need to restore the old control
337 * word to avoid this side effect.
338 */
339 __fnstenv(envp);
340 __fldcw(envp->x87.control);
341 if (__HAS_SSE) {
342 __stmxcsr(&mxcsr);
343 envp->mxcsr = mxcsr;
344 }
345
346 /* Success */
347 return (0);
348 }
349
350 /*
351 * The feholdexcept() function saves the current floating-point environment in
352 * the object pointed to by envp, clears the floating-point status flags, and
353 * then installs a non-stop (continue on floating-point exceptions) mode, if
354 * available, for all floating-point exceptions.
355 */
356 int
357 feholdexcept(fenv_t *envp)
358 {
359 uint32_t mxcsr;
360
361 _DIAGASSERT(envp != NULL);
362
363 __fnstenv(envp);
364 __fnclex();
365 if (__HAS_SSE) {
366 __stmxcsr(&mxcsr);
367 envp->mxcsr = mxcsr;
368 mxcsr &= ~FE_ALL_EXCEPT;
369 mxcsr |= FE_ALL_EXCEPT << __SSE_EMASK_SHIFT;
370 __ldmxcsr(mxcsr);
371 }
372
373 /* Success */
374 return (0);
375 }
376
377 /*
378 * The fesetenv() function attempts to establish the floating-point environment
379 * represented by the object pointed to by envp. The argument `envp' points
380 * to an object set by a call to fegetenv() or feholdexcept(), or equal a
381 * floating-point environment macro. The fesetenv() function does not raise
382 * floating-point exceptions, but only installs the state of the floating-point
383 * status flags represented through its argument.
384 */
385 int
386 fesetenv(const fenv_t *envp)
387 {
388 fenv_t env;
389
390 _DIAGASSERT(envp != NULL);
391
392 /* Store the x87 floating-point environment */
393 memset(&env, 0, sizeof(env));
394 __fnstenv(&env);
395
396 __fe_dfl_env.x87.unused1 = env.x87.unused1;
397 __fe_dfl_env.x87.unused2 = env.x87.unused2;
398 __fe_dfl_env.x87.unused3 = env.x87.unused3;
399 memcpy(__fe_dfl_env.x87.others,
400 env.x87.others,
401 sizeof(__fe_dfl_env.x87.others) / sizeof(uint32_t));
402
403 __fldenv(envp->x87);
404 if (__HAS_SSE)
405 __ldmxcsr(envp->mxcsr);
406
407 /* Success */
408 return (0);
409 }
410
411 /*
412 * The feupdateenv() function saves the currently raised floating-point
413 * exceptions in its automatic storage, installs the floating-point environment
414 * represented by the object pointed to by `envp', and then raises the saved
415 * floating-point exceptions. The argument `envp' shall point to an object set
416 * by a call to feholdexcept() or fegetenv(), or equal a floating-point
417 * environment macro.
418 */
419 int
420 feupdateenv(const fenv_t *envp)
421 {
422 fenv_t env;
423 uint32_t mxcsr;
424 uint16_t status;
425
426 _DIAGASSERT(envp != NULL);
427
428 /* Store the x87 floating-point environment */
429 memset(&env, 0, sizeof(env));
430 __fnstenv(&env);
431
432 __fe_dfl_env.x87.unused1 = env.x87.unused1;
433 __fe_dfl_env.x87.unused2 = env.x87.unused2;
434 __fe_dfl_env.x87.unused3 = env.x87.unused3;
435 memcpy(__fe_dfl_env.x87.others,
436 env.x87.others,
437 sizeof(__fe_dfl_env.x87.others) / sizeof(uint32_t));
438
439 __fnstsw(&status);
440 if (__HAS_SSE)
441 __stmxcsr(&mxcsr);
442 else
443 mxcsr = 0;
444 fesetenv(envp);
445 feraiseexcept((mxcsr | status) & FE_ALL_EXCEPT);
446
447 /* Success */
448 return (0);
449 }
450
451 /*
452 * The following functions are extentions to the standard
453 */
454 int
455 feenableexcept(int mask)
456 {
457 uint32_t mxcsr, omask;
458 uint16_t control;
459
460 mask &= FE_ALL_EXCEPT;
461 __fnstcw(&control);
462 if (__HAS_SSE)
463 __stmxcsr(&mxcsr);
464 else
465 mxcsr = 0;
466
467 omask = (control | mxcsr >> __SSE_EMASK_SHIFT) & FE_ALL_EXCEPT;
468 control &= ~mask;
469 __fldcw(control);
470 if (__HAS_SSE) {
471 mxcsr &= ~(mask << __SSE_EMASK_SHIFT);
472 __ldmxcsr(mxcsr);
473 }
474
475 return (FE_ALL_EXCEPT & ~omask);
476 }
477
478 int
479 fedisableexcept(int mask)
480 {
481 uint32_t mxcsr, omask;
482 uint16_t control;
483
484 mask &= FE_ALL_EXCEPT;
485 __fnstcw(&control);
486 if (__HAS_SSE)
487 __stmxcsr(&mxcsr);
488 else
489 mxcsr = 0;
490
491 omask = (control | mxcsr >> __SSE_EMASK_SHIFT) & FE_ALL_EXCEPT;
492 control |= mask;
493 __fldcw(control);
494 if (__HAS_SSE) {
495 mxcsr |= mask << __SSE_EMASK_SHIFT;
496 __ldmxcsr(mxcsr);
497 }
498
499 return (FE_ALL_EXCEPT & ~omask);
500 }
501
502 int
503 fegetexcept(void)
504 {
505 uint16_t control;
506
507 /*
508 * We assume that the masks for the x87 and the SSE unit are
509 * the same.
510 */
511 __fnstcw(&control);
512
513 return (~control & FE_ALL_EXCEPT);
514 }
515