Home | History | Annotate | Line # | Download | only in i387
fenv.c revision 1.8
      1 /* $NetBSD: fenv.c,v 1.8 2017/03/22 23:11:08 chs Exp $ */
      2 
      3 /*-
      4  * Copyright (c) 2004-2005 David Schultz <das (at) FreeBSD.ORG>
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
     17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     26  * SUCH DAMAGE.
     27  */
     28 
     29 #include <sys/cdefs.h>
     30 __RCSID("$NetBSD: fenv.c,v 1.8 2017/03/22 23:11:08 chs Exp $");
     31 
     32 #include "namespace.h"
     33 
     34 #include <sys/param.h>
     35 #include <sys/sysctl.h>
     36 #include <assert.h>
     37 #include <fenv.h>
     38 #include <stddef.h>
     39 #include <string.h>
     40 
     41 #ifdef __weak_alias
     42 __weak_alias(feclearexcept,_feclearexcept)
     43 __weak_alias(fedisableexcept,_fedisableexcept)
     44 __weak_alias(feenableexcept,_feenableexcept)
     45 __weak_alias(fegetenv,_fegetenv)
     46 __weak_alias(fegetexcept,_fegetexcept)
     47 __weak_alias(fegetexceptflag,_fegetexceptflag)
     48 __weak_alias(fegetround,_fegetround)
     49 __weak_alias(feholdexcept,_feholdexcept)
     50 __weak_alias(feraiseexcept,_feraiseexcept)
     51 __weak_alias(fesetenv,_fesetenv)
     52 __weak_alias(fesetexceptflag,_fesetexceptflag)
     53 __weak_alias(fesetround,_fesetround)
     54 __weak_alias(fetestexcept,_fetestexcept)
     55 __weak_alias(feupdateenv,_feupdateenv)
     56 #endif
     57 
     58 /* Load x87 Control Word */
     59 #define	__fldcw(__cw)		__asm__ __volatile__	\
     60 	("fldcw %0" : : "m" (__cw))
     61 
     62 /* No-Wait Store Control Word */
     63 #define	__fnstcw(__cw)		__asm__ __volatile__	\
     64 	("fnstcw %0" : "=m" (*(__cw)))
     65 
     66 /* No-Wait Store Status Word */
     67 #define	__fnstsw(__sw)		__asm__ __volatile__	\
     68 	("fnstsw %0" : "=am" (*(__sw)))
     69 
     70 /* No-Wait Clear Exception Flags */
     71 #define	__fnclex()		__asm__ __volatile__	\
     72 	("fnclex")
     73 
     74 /* Load x87 Environment */
     75 #define	__fldenv(__env)		__asm__ __volatile__	\
     76 	("fldenv %0" : : "m" (__env))
     77 
     78 /* No-Wait Store x87 environment */
     79 #define	__fnstenv(__env)	__asm__ __volatile__	\
     80 	("fnstenv %0" : "=m" (*(__env)))
     81 
     82 /* Check for and handle pending unmasked x87 pending FPU exceptions */
     83 #define	__fwait(__env)		__asm__	__volatile__	\
     84 	("fwait")
     85 
     86 /* Load the MXCSR register */
     87 #define	__ldmxcsr(__mxcsr)	__asm__ __volatile__	\
     88 	("ldmxcsr %0" : : "m" (__mxcsr))
     89 
     90 /* Store the MXCSR register state */
     91 #define	__stmxcsr(__mxcsr)	__asm__ __volatile__	\
     92 	("stmxcsr %0" : "=m" (*(__mxcsr)))
     93 
     94 /*
     95  * The following constant represents the default floating-point environment
     96  * (that is, the one installed at program startup) and has type pointer to
     97  * const-qualified fenv_t.
     98  *
     99  * It can be used as an argument to the functions within the <fenv.h> header
    100  * that manage the floating-point environment, namely fesetenv() and
    101  * feupdateenv().
    102  *
    103  * x87 fpu registers are 16bit wide. The upper bits, 31-16, are marked as
    104  * RESERVED. We provide a partial floating-point environment, where we
    105  * define only the lower bits. The reserved bits are extracted and set by the
    106  * consumers of FE_DFL_ENV, during runtime.
    107  */
    108 fenv_t __fe_dfl_env = {
    109 	{
    110 		__NetBSD_NPXCW__,       /* Control word register */
    111 		0x0,			/* Unused */
    112 		0x0000,                 /* Status word register */
    113 		0x0,			/* Unused */
    114 		0x0000ffff,             /* Tag word register */
    115 		0x0,			/* Unused */
    116 		{
    117 			0x0000, 0x0000,
    118 			0x0000, 0xffff
    119 		}
    120 	},
    121 	__INITIAL_MXCSR__		/* MXCSR register */
    122 };
    123 
    124 /*
    125  * Test for SSE support on this processor.
    126  *
    127  * We need to use ldmxcsr/stmxcsr to get correct results if any part
    128  * of the program was compiled to use SSE floating-point, but we can't
    129  * use SSE on older processors.
    130  *
    131  * In order to do so, we need to query the processor capabilities via the CPUID
    132  * instruction. We can make it even simpler though, by querying the machdep.sse
    133  * sysctl.
    134  */
    135 static int __HAS_SSE = 0;
    136 
    137 static void __init_libm(void) __attribute__ ((constructor, used));
    138 
    139 static void __init_libm(void)
    140 {
    141 	size_t oldlen = sizeof(__HAS_SSE);
    142 	int rv;
    143 	uint16_t control;
    144 
    145 	rv = sysctlbyname("machdep.sse", &__HAS_SSE, &oldlen, NULL, 0);
    146 	if (rv == -1)
    147 		__HAS_SSE = 0;
    148 
    149 	__fnstcw(&control);
    150 	__fe_dfl_env.x87.control = control;
    151 }
    152 
    153 /*
    154  * The feclearexcept() function clears the supported floating-point exceptions
    155  * represented by `excepts'.
    156  */
    157 int
    158 feclearexcept(int excepts)
    159 {
    160 	fenv_t env;
    161 	uint32_t mxcsr;
    162 	int ex;
    163 
    164 	_DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0);
    165 
    166 	ex = excepts & FE_ALL_EXCEPT;
    167 
    168 	/* It's ~3x faster to call fnclex, than store/load fp env */
    169 	if (ex == FE_ALL_EXCEPT) {
    170 		__fnclex();
    171 	} else {
    172 		__fnstenv(&env);
    173 		env.x87.status &= ~ex;
    174 		__fldenv(env);
    175 	}
    176 
    177 	if (__HAS_SSE) {
    178 		__stmxcsr(&mxcsr);
    179 		mxcsr &= ~ex;
    180 		__ldmxcsr(mxcsr);
    181 	}
    182 
    183 	/* Success */
    184 	return (0);
    185 }
    186 
    187 /*
    188  * The fegetexceptflag() function stores an implementation-defined
    189  * representation of the states of the floating-point status flags indicated by
    190  * the argument excepts in the object pointed to by the argument flagp.
    191  */
    192 int
    193 fegetexceptflag(fexcept_t *flagp, int excepts)
    194 {
    195 	uint32_t mxcsr;
    196 	uint16_t status;
    197 	int ex;
    198 
    199 	_DIAGASSERT(flagp != NULL);
    200 	_DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0);
    201 
    202 	ex = excepts & FE_ALL_EXCEPT;
    203 
    204 	__fnstsw(&status);
    205 	if (__HAS_SSE)
    206 		__stmxcsr(&mxcsr);
    207 	else
    208 		mxcsr = 0;
    209 
    210 	*flagp = (mxcsr | status) & ex;
    211 
    212 	/* Success */
    213 	return (0);
    214 }
    215 
    216 /*
    217  * The feraiseexcept() function raises the supported floating-point exceptions
    218  * represented by the argument `excepts'.
    219  *
    220  * The standard explicitly allows us to execute an instruction that has the
    221  * exception as a side effect, but we choose to manipulate the status register
    222  * directly.
    223  *
    224  * The validation of input is being deferred to fesetexceptflag().
    225  */
    226 int
    227 feraiseexcept(int excepts)
    228 {
    229 	fexcept_t ex;
    230 
    231 	_DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0);
    232 
    233 	ex = excepts & FE_ALL_EXCEPT;
    234 	fesetexceptflag(&ex, excepts);
    235 	__fwait();
    236 
    237 	/* Success */
    238 	return (0);
    239 }
    240 
    241 /*
    242  * This function sets the floating-point status flags indicated by the argument
    243  * `excepts' to the states stored in the object pointed to by `flagp'. It does
    244  * NOT raise any floating-point exceptions, but only sets the state of the flags.
    245  */
    246 int
    247 fesetexceptflag(const fexcept_t *flagp, int excepts)
    248 {
    249 	fenv_t env;
    250 	uint32_t mxcsr;
    251 	int ex;
    252 
    253 	_DIAGASSERT(flagp != NULL);
    254 	_DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0);
    255 
    256 	ex = excepts & FE_ALL_EXCEPT;
    257 
    258 	__fnstenv(&env);
    259 	env.x87.status &= ~ex;
    260 	env.x87.status |= *flagp & ex;
    261 	__fldenv(env);
    262 
    263 	if (__HAS_SSE) {
    264 		__stmxcsr(&mxcsr);
    265 		mxcsr &= ~ex;
    266 		mxcsr |= *flagp & ex;
    267 		__ldmxcsr(mxcsr);
    268 	}
    269 
    270 	/* Success */
    271 	return (0);
    272 }
    273 
    274 /*
    275  * The fetestexcept() function determines which of a specified subset of the
    276  * floating-point exception flags are currently set. The `excepts' argument
    277  * specifies the floating-point status flags to be queried.
    278  */
    279 int
    280 fetestexcept(int excepts)
    281 {
    282 	uint32_t mxcsr;
    283 	uint16_t status;
    284 	int ex;
    285 
    286 	_DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0);
    287 
    288 	ex = excepts & FE_ALL_EXCEPT;
    289 
    290 	__fnstsw(&status);
    291 	if (__HAS_SSE)
    292 		__stmxcsr(&mxcsr);
    293 	else
    294 		mxcsr = 0;
    295 
    296 	return ((status | mxcsr) & ex);
    297 }
    298 
    299 int
    300 fegetround(void)
    301 {
    302 	uint16_t control;
    303 
    304 	/*
    305 	 * We assume that the x87 and the SSE unit agree on the
    306 	 * rounding mode.  Reading the control word on the x87 turns
    307 	 * out to be about 5 times faster than reading it on the SSE
    308 	 * unit on an Opteron 244.
    309 	 */
    310 	__fnstcw(&control);
    311 
    312 	return (control & __X87_ROUND_MASK);
    313 }
    314 
    315 /*
    316  * The fesetround() function shall establish the rounding direction represented
    317  * by its argument round. If the argument is not equal to the value of a
    318  * rounding direction macro, the rounding direction is not changed.
    319  */
    320 int
    321 fesetround(int round)
    322 {
    323 	uint32_t mxcsr;
    324 	uint16_t control;
    325 
    326 	if (round & ~__X87_ROUND_MASK) {
    327 		/* Failure */
    328 		return (-1);
    329 	}
    330 
    331 	__fnstcw(&control);
    332 	control &= ~__X87_ROUND_MASK;
    333 	control |= round;
    334 	__fldcw(control);
    335 
    336 	if (__HAS_SSE) {
    337 		__stmxcsr(&mxcsr);
    338 		mxcsr &= ~(__X87_ROUND_MASK << __SSE_ROUND_SHIFT);
    339 		mxcsr |= round << __SSE_ROUND_SHIFT;
    340 		__ldmxcsr(mxcsr);
    341 	}
    342 
    343 	/* Success */
    344 	return (0);
    345 }
    346 
    347 /*
    348  * The fegetenv() function attempts to store the current floating-point
    349  * environment in the object pointed to by envp.
    350  */
    351 int
    352 fegetenv(fenv_t *envp)
    353 {
    354 	uint32_t mxcsr;
    355 
    356 	_DIAGASSERT(flagp != NULL);
    357 
    358 	/*
    359 	 * fnstenv masks all exceptions, so we need to restore the old control
    360 	 * word to avoid this side effect.
    361 	 */
    362 	__fnstenv(envp);
    363 	__fldcw(envp->x87.control);
    364 	if (__HAS_SSE) {
    365 		__stmxcsr(&mxcsr);
    366 		envp->mxcsr = mxcsr;
    367 	}
    368 
    369 	/* Success */
    370 	return (0);
    371 }
    372 
    373 /*
    374  * The feholdexcept() function saves the current floating-point environment in
    375  * the object pointed to by envp, clears the floating-point status flags, and
    376  * then installs a non-stop (continue on floating-point exceptions) mode, if
    377  * available, for all floating-point exceptions.
    378  */
    379 int
    380 feholdexcept(fenv_t *envp)
    381 {
    382 	uint32_t mxcsr;
    383 
    384 	_DIAGASSERT(envp != NULL);
    385 
    386 	__fnstenv(envp);
    387 	__fnclex();
    388 	if (__HAS_SSE) {
    389 		__stmxcsr(&mxcsr);
    390 		envp->mxcsr = mxcsr;
    391 		mxcsr &= ~FE_ALL_EXCEPT;
    392 		mxcsr |= FE_ALL_EXCEPT << __SSE_EMASK_SHIFT;
    393 		__ldmxcsr(mxcsr);
    394 	}
    395 
    396 	/* Success */
    397 	return (0);
    398 }
    399 
    400 /*
    401  * The fesetenv() function attempts to establish the floating-point environment
    402  * represented by the object pointed to by envp. The argument `envp' points
    403  * to an object set by a call to fegetenv() or feholdexcept(), or equal a
    404  * floating-point environment macro. The fesetenv() function does not raise
    405  * floating-point exceptions, but only installs the state of the floating-point
    406  * status flags represented through its argument.
    407  */
    408 int
    409 fesetenv(const fenv_t *envp)
    410 {
    411 	fenv_t env;
    412 
    413 	_DIAGASSERT(envp != NULL);
    414 
    415 	/* Store the x87 floating-point environment */
    416 	memset(&env, 0, sizeof(env));
    417 	__fnstenv(&env);
    418 
    419 	__fe_dfl_env.x87.unused1 = env.x87.unused1;
    420 	__fe_dfl_env.x87.unused2 = env.x87.unused2;
    421 	__fe_dfl_env.x87.unused3 = env.x87.unused3;
    422 	memcpy(__fe_dfl_env.x87.others, env.x87.others,
    423 	    sizeof(__fe_dfl_env.x87.others));
    424 
    425 	__fldenv(envp->x87);
    426 	if (__HAS_SSE)
    427 		__ldmxcsr(envp->mxcsr);
    428 
    429 	/* Success */
    430 	return (0);
    431 }
    432 
    433 /*
    434  * The feupdateenv() function saves the currently raised floating-point
    435  * exceptions in its automatic storage, installs the floating-point environment
    436  * represented by the object pointed to by `envp', and then raises the saved
    437  * floating-point exceptions. The argument `envp' shall point to an object set
    438  * by a call to feholdexcept() or fegetenv(), or equal a floating-point
    439  * environment macro.
    440  */
    441 int
    442 feupdateenv(const fenv_t *envp)
    443 {
    444 	fenv_t env;
    445 	uint32_t mxcsr;
    446 	uint16_t status;
    447 
    448 	_DIAGASSERT(envp != NULL);
    449 
    450 	/* Store the x87 floating-point environment */
    451 	memset(&env, 0, sizeof(env));
    452 	__fnstenv(&env);
    453 
    454 	__fe_dfl_env.x87.unused1 = env.x87.unused1;
    455 	__fe_dfl_env.x87.unused2 = env.x87.unused2;
    456 	__fe_dfl_env.x87.unused3 = env.x87.unused3;
    457 	memcpy(__fe_dfl_env.x87.others, env.x87.others,
    458 	    sizeof(__fe_dfl_env.x87.others));
    459 
    460 	__fnstsw(&status);
    461 	if (__HAS_SSE)
    462 		__stmxcsr(&mxcsr);
    463 	else
    464 		mxcsr = 0;
    465 	fesetenv(envp);
    466 	feraiseexcept((mxcsr | status) & FE_ALL_EXCEPT);
    467 
    468 	/* Success */
    469 	return (0);
    470 }
    471 
    472 /*
    473  * The following functions are extentions to the standard
    474  */
    475 int
    476 feenableexcept(int mask)
    477 {
    478 	uint32_t mxcsr, omask;
    479 	uint16_t control;
    480 
    481 	mask &= FE_ALL_EXCEPT;
    482 	__fnstcw(&control);
    483 	if (__HAS_SSE)
    484 		__stmxcsr(&mxcsr);
    485 	else
    486 		mxcsr = 0;
    487 
    488 	omask = (control | mxcsr >> __SSE_EMASK_SHIFT) & FE_ALL_EXCEPT;
    489 	control &= ~mask;
    490 	__fldcw(control);
    491 	if (__HAS_SSE) {
    492 		mxcsr &= ~(mask << __SSE_EMASK_SHIFT);
    493 		__ldmxcsr(mxcsr);
    494 	}
    495 
    496 	return (FE_ALL_EXCEPT & ~omask);
    497 }
    498 
    499 int
    500 fedisableexcept(int mask)
    501 {
    502 	uint32_t mxcsr, omask;
    503 	uint16_t control;
    504 
    505 	mask &= FE_ALL_EXCEPT;
    506 	__fnstcw(&control);
    507 	if (__HAS_SSE)
    508 		__stmxcsr(&mxcsr);
    509 	else
    510 		mxcsr = 0;
    511 
    512 	omask = (control | mxcsr >> __SSE_EMASK_SHIFT) & FE_ALL_EXCEPT;
    513 	control |= mask;
    514 	__fldcw(control);
    515 	if (__HAS_SSE) {
    516 		mxcsr |= mask << __SSE_EMASK_SHIFT;
    517 		__ldmxcsr(mxcsr);
    518 	}
    519 
    520 	return (FE_ALL_EXCEPT & ~omask);
    521 }
    522 
    523 int
    524 fegetexcept(void)
    525 {
    526 	uint16_t control;
    527 
    528 	/*
    529 	 * We assume that the masks for the x87 and the SSE unit are
    530 	 * the same.
    531 	 */
    532 	__fnstcw(&control);
    533 
    534 	return (~control & FE_ALL_EXCEPT);
    535 }
    536