Home | History | Annotate | Line # | Download | only in i387
fenv.c revision 1.6
      1 /* $NetBSD: fenv.c,v 1.6 2013/11/11 00:31:51 joerg Exp $ */
      2 
      3 /*-
      4  * Copyright (c) 2004-2005 David Schultz <das (at) FreeBSD.ORG>
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
     17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     26  * SUCH DAMAGE.
     27  */
     28 
     29 #include <sys/cdefs.h>
     30 __RCSID("$NetBSD: fenv.c,v 1.6 2013/11/11 00:31:51 joerg Exp $");
     31 
     32 #include <sys/param.h>
     33 #include <sys/sysctl.h>
     34 #include <assert.h>
     35 #include <fenv.h>
     36 #include <stddef.h>
     37 #include <string.h>
     38 
     39 /* Load x87 Control Word */
     40 #define	__fldcw(__cw)		__asm__ __volatile__	\
     41 	("fldcw %0" : : "m" (__cw))
     42 
     43 /* No-Wait Store Control Word */
     44 #define	__fnstcw(__cw)		__asm__ __volatile__	\
     45 	("fnstcw %0" : "=m" (*(__cw)))
     46 
     47 /* No-Wait Store Status Word */
     48 #define	__fnstsw(__sw)		__asm__ __volatile__	\
     49 	("fnstsw %0" : "=am" (*(__sw)))
     50 
     51 /* No-Wait Clear Exception Flags */
     52 #define	__fnclex()		__asm__ __volatile__	\
     53 	("fnclex")
     54 
     55 /* Load x87 Environment */
     56 #define	__fldenv(__env)		__asm__ __volatile__	\
     57 	("fldenv %0" : : "m" (__env))
     58 
     59 /* No-Wait Store x87 environment */
     60 #define	__fnstenv(__env)	__asm__ __volatile__	\
     61 	("fnstenv %0" : "=m" (*(__env)))
     62 
     63 /* Check for and handle pending unmasked x87 pending FPU exceptions */
     64 #define	__fwait(__env)		__asm__	__volatile__	\
     65 	("fwait")
     66 
     67 /* Load the MXCSR register */
     68 #define	__ldmxcsr(__mxcsr)	__asm__ __volatile__	\
     69 	("ldmxcsr %0" : : "m" (__mxcsr))
     70 
     71 /* Store the MXCSR register state */
     72 #define	__stmxcsr(__mxcsr)	__asm__ __volatile__	\
     73 	("stmxcsr %0" : "=m" (*(__mxcsr)))
     74 
     75 /*
     76  * The following constant represents the default floating-point environment
     77  * (that is, the one installed at program startup) and has type pointer to
     78  * const-qualified fenv_t.
     79  *
     80  * It can be used as an argument to the functions within the <fenv.h> header
     81  * that manage the floating-point environment, namely fesetenv() and
     82  * feupdateenv().
     83  *
     84  * x87 fpu registers are 16bit wide. The upper bits, 31-16, are marked as
     85  * RESERVED. We provide a partial floating-point environment, where we
     86  * define only the lower bits. The reserved bits are extracted and set by the
     87  * consumers of FE_DFL_ENV, during runtime.
     88  */
     89 fenv_t __fe_dfl_env = {
     90 	{
     91 		__NetBSD_NPXCW__,       /* Control word register */
     92 		0x0,			/* Unused */
     93 		0x0000,                 /* Status word register */
     94 		0x0,			/* Unused */
     95 		0x0000ffff,             /* Tag word register */
     96 		0x0,			/* Unused */
     97 		{
     98 			0x0000, 0x0000,
     99 			0x0000, 0xffff
    100 		}
    101 	},
    102 	__INITIAL_MXCSR__		/* MXCSR register */
    103 };
    104 
    105 /*
    106  * Test for SSE support on this processor.
    107  *
    108  * We need to use ldmxcsr/stmxcsr to get correct results if any part
    109  * of the program was compiled to use SSE floating-point, but we can't
    110  * use SSE on older processors.
    111  *
    112  * In order to do so, we need to query the processor capabilities via the CPUID
    113  * instruction. We can make it even simpler though, by querying the machdep.sse
    114  * sysctl.
    115  */
    116 static int __HAS_SSE = 0;
    117 
    118 static void __init_libm(void) __attribute__ ((constructor, used));
    119 
    120 static void __init_libm(void)
    121 {
    122 	size_t oldlen = sizeof(__HAS_SSE);
    123 	int rv;
    124 	uint16_t control;
    125 
    126 	rv = sysctlbyname("machdep.sse", &__HAS_SSE, &oldlen, NULL, 0);
    127 	if (rv == -1)
    128 		__HAS_SSE = 0;
    129 
    130 	__fnstcw(&control);
    131 	__fe_dfl_env.x87.control = control;
    132 }
    133 
    134 /*
    135  * The feclearexcept() function clears the supported floating-point exceptions
    136  * represented by `excepts'.
    137  */
    138 int
    139 feclearexcept(int excepts)
    140 {
    141 	fenv_t env;
    142 	uint32_t mxcsr;
    143 	int ex;
    144 
    145 	_DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0);
    146 
    147 	ex = excepts & FE_ALL_EXCEPT;
    148 
    149 	/* It's ~3x faster to call fnclex, than store/load fp env */
    150 	if (ex == FE_ALL_EXCEPT) {
    151 		__fnclex();
    152 	} else {
    153 		__fnstenv(&env);
    154 		env.x87.status &= ~ex;
    155 		__fldenv(env);
    156 	}
    157 
    158 	if (__HAS_SSE) {
    159 		__stmxcsr(&mxcsr);
    160 		mxcsr &= ~ex;
    161 		__ldmxcsr(mxcsr);
    162 	}
    163 
    164 	/* Success */
    165 	return (0);
    166 }
    167 
    168 /*
    169  * The fegetexceptflag() function stores an implementation-defined
    170  * representation of the states of the floating-point status flags indicated by
    171  * the argument excepts in the object pointed to by the argument flagp.
    172  */
    173 int
    174 fegetexceptflag(fexcept_t *flagp, int excepts)
    175 {
    176 	uint32_t mxcsr;
    177 	uint16_t status;
    178 	int ex;
    179 
    180 	_DIAGASSERT(flagp != NULL);
    181 	_DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0);
    182 
    183 	ex = excepts & FE_ALL_EXCEPT;
    184 
    185 	__fnstsw(&status);
    186 	if (__HAS_SSE)
    187 		__stmxcsr(&mxcsr);
    188 	else
    189 		mxcsr = 0;
    190 
    191 	*flagp = (mxcsr | status) & ex;
    192 
    193 	/* Success */
    194 	return (0);
    195 }
    196 
    197 /*
    198  * The feraiseexcept() function raises the supported floating-point exceptions
    199  * represented by the argument `excepts'.
    200  *
    201  * The standard explicitly allows us to execute an instruction that has the
    202  * exception as a side effect, but we choose to manipulate the status register
    203  * directly.
    204  *
    205  * The validation of input is being deferred to fesetexceptflag().
    206  */
    207 int
    208 feraiseexcept(int excepts)
    209 {
    210 	fexcept_t ex;
    211 
    212 	_DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0);
    213 
    214 	ex = excepts & FE_ALL_EXCEPT;
    215 	fesetexceptflag(&ex, excepts);
    216 	__fwait();
    217 
    218 	/* Success */
    219 	return (0);
    220 }
    221 
    222 /*
    223  * This function sets the floating-point status flags indicated by the argument
    224  * `excepts' to the states stored in the object pointed to by `flagp'. It does
    225  * NOT raise any floating-point exceptions, but only sets the state of the flags.
    226  */
    227 int
    228 fesetexceptflag(const fexcept_t *flagp, int excepts)
    229 {
    230 	fenv_t env;
    231 	uint32_t mxcsr;
    232 	int ex;
    233 
    234 	_DIAGASSERT(flagp != NULL);
    235 	_DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0);
    236 
    237 	ex = excepts & FE_ALL_EXCEPT;
    238 
    239 	__fnstenv(&env);
    240 	env.x87.status &= ~ex;
    241 	env.x87.status |= *flagp & ex;
    242 	__fldenv(env);
    243 
    244 	if (__HAS_SSE) {
    245 		__stmxcsr(&mxcsr);
    246 		mxcsr &= ~ex;
    247 		mxcsr |= *flagp & ex;
    248 		__ldmxcsr(mxcsr);
    249 	}
    250 
    251 	/* Success */
    252 	return (0);
    253 }
    254 
    255 /*
    256  * The fetestexcept() function determines which of a specified subset of the
    257  * floating-point exception flags are currently set. The `excepts' argument
    258  * specifies the floating-point status flags to be queried.
    259  */
    260 int
    261 fetestexcept(int excepts)
    262 {
    263 	uint32_t mxcsr;
    264 	uint16_t status;
    265 	int ex;
    266 
    267 	_DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0);
    268 
    269 	ex = excepts & FE_ALL_EXCEPT;
    270 
    271 	__fnstsw(&status);
    272 	if (__HAS_SSE)
    273 		__stmxcsr(&mxcsr);
    274 	else
    275 		mxcsr = 0;
    276 
    277 	return ((status | mxcsr) & ex);
    278 }
    279 
    280 int
    281 fegetround(void)
    282 {
    283 	uint16_t control;
    284 
    285 	/*
    286 	 * We assume that the x87 and the SSE unit agree on the
    287 	 * rounding mode.  Reading the control word on the x87 turns
    288 	 * out to be about 5 times faster than reading it on the SSE
    289 	 * unit on an Opteron 244.
    290 	 */
    291 	__fnstcw(&control);
    292 
    293 	return (control & __X87_ROUND_MASK);
    294 }
    295 
    296 /*
    297  * The fesetround() function shall establish the rounding direction represented
    298  * by its argument round. If the argument is not equal to the value of a
    299  * rounding direction macro, the rounding direction is not changed.
    300  */
    301 int
    302 fesetround(int round)
    303 {
    304 	uint32_t mxcsr;
    305 	uint16_t control;
    306 
    307 	if (round & ~__X87_ROUND_MASK) {
    308 		/* Failure */
    309 		return (-1);
    310 	}
    311 
    312 	__fnstcw(&control);
    313 	control &= ~__X87_ROUND_MASK;
    314 	control |= round;
    315 	__fldcw(control);
    316 
    317 	if (__HAS_SSE) {
    318 		__stmxcsr(&mxcsr);
    319 		mxcsr &= ~(__X87_ROUND_MASK << __SSE_ROUND_SHIFT);
    320 		mxcsr |= round << __SSE_ROUND_SHIFT;
    321 		__ldmxcsr(mxcsr);
    322 	}
    323 
    324 	/* Success */
    325 	return (0);
    326 }
    327 
    328 /*
    329  * The fegetenv() function attempts to store the current floating-point
    330  * environment in the object pointed to by envp.
    331  */
    332 int
    333 fegetenv(fenv_t *envp)
    334 {
    335 	uint32_t mxcsr;
    336 
    337 	_DIAGASSERT(flagp != NULL);
    338 
    339 	/*
    340 	 * fnstenv masks all exceptions, so we need to restore the old control
    341 	 * word to avoid this side effect.
    342 	 */
    343 	__fnstenv(envp);
    344 	__fldcw(envp->x87.control);
    345 	if (__HAS_SSE) {
    346 		__stmxcsr(&mxcsr);
    347 		envp->mxcsr = mxcsr;
    348 	}
    349 
    350 	/* Success */
    351 	return (0);
    352 }
    353 
    354 /*
    355  * The feholdexcept() function saves the current floating-point environment in
    356  * the object pointed to by envp, clears the floating-point status flags, and
    357  * then installs a non-stop (continue on floating-point exceptions) mode, if
    358  * available, for all floating-point exceptions.
    359  */
    360 int
    361 feholdexcept(fenv_t *envp)
    362 {
    363 	uint32_t mxcsr;
    364 
    365 	_DIAGASSERT(envp != NULL);
    366 
    367 	__fnstenv(envp);
    368 	__fnclex();
    369 	if (__HAS_SSE) {
    370 		__stmxcsr(&mxcsr);
    371 		envp->mxcsr = mxcsr;
    372 		mxcsr &= ~FE_ALL_EXCEPT;
    373 		mxcsr |= FE_ALL_EXCEPT << __SSE_EMASK_SHIFT;
    374 		__ldmxcsr(mxcsr);
    375 	}
    376 
    377 	/* Success */
    378 	return (0);
    379 }
    380 
    381 /*
    382  * The fesetenv() function attempts to establish the floating-point environment
    383  * represented by the object pointed to by envp. The argument `envp' points
    384  * to an object set by a call to fegetenv() or feholdexcept(), or equal a
    385  * floating-point environment macro. The fesetenv() function does not raise
    386  * floating-point exceptions, but only installs the state of the floating-point
    387  * status flags represented through its argument.
    388  */
    389 int
    390 fesetenv(const fenv_t *envp)
    391 {
    392 	fenv_t env;
    393 
    394 	_DIAGASSERT(envp != NULL);
    395 
    396 	/* Store the x87 floating-point environment */
    397 	memset(&env, 0, sizeof(env));
    398 	__fnstenv(&env);
    399 
    400 	__fe_dfl_env.x87.unused1 = env.x87.unused1;
    401 	__fe_dfl_env.x87.unused2 = env.x87.unused2;
    402 	__fe_dfl_env.x87.unused3 = env.x87.unused3;
    403 	memcpy(__fe_dfl_env.x87.others,
    404 	       env.x87.others,
    405 	       sizeof(__fe_dfl_env.x87.others) / sizeof(uint32_t));
    406 
    407 	__fldenv(envp->x87);
    408 	if (__HAS_SSE)
    409 		__ldmxcsr(envp->mxcsr);
    410 
    411 	/* Success */
    412 	return (0);
    413 }
    414 
    415 /*
    416  * The feupdateenv() function saves the currently raised floating-point
    417  * exceptions in its automatic storage, installs the floating-point environment
    418  * represented by the object pointed to by `envp', and then raises the saved
    419  * floating-point exceptions. The argument `envp' shall point to an object set
    420  * by a call to feholdexcept() or fegetenv(), or equal a floating-point
    421  * environment macro.
    422  */
    423 int
    424 feupdateenv(const fenv_t *envp)
    425 {
    426 	fenv_t env;
    427 	uint32_t mxcsr;
    428 	uint16_t status;
    429 
    430 	_DIAGASSERT(envp != NULL);
    431 
    432 	/* Store the x87 floating-point environment */
    433 	memset(&env, 0, sizeof(env));
    434 	__fnstenv(&env);
    435 
    436 	__fe_dfl_env.x87.unused1 = env.x87.unused1;
    437 	__fe_dfl_env.x87.unused2 = env.x87.unused2;
    438 	__fe_dfl_env.x87.unused3 = env.x87.unused3;
    439 	memcpy(__fe_dfl_env.x87.others,
    440 	       env.x87.others,
    441 	       sizeof(__fe_dfl_env.x87.others) / sizeof(uint32_t));
    442 
    443 	__fnstsw(&status);
    444 	if (__HAS_SSE)
    445 		__stmxcsr(&mxcsr);
    446 	else
    447 		mxcsr = 0;
    448 	fesetenv(envp);
    449 	feraiseexcept((mxcsr | status) & FE_ALL_EXCEPT);
    450 
    451 	/* Success */
    452 	return (0);
    453 }
    454 
    455 /*
    456  * The following functions are extentions to the standard
    457  */
    458 int
    459 feenableexcept(int mask)
    460 {
    461 	uint32_t mxcsr, omask;
    462 	uint16_t control;
    463 
    464 	mask &= FE_ALL_EXCEPT;
    465 	__fnstcw(&control);
    466 	if (__HAS_SSE)
    467 		__stmxcsr(&mxcsr);
    468 	else
    469 		mxcsr = 0;
    470 
    471 	omask = (control | mxcsr >> __SSE_EMASK_SHIFT) & FE_ALL_EXCEPT;
    472 	control &= ~mask;
    473 	__fldcw(control);
    474 	if (__HAS_SSE) {
    475 		mxcsr &= ~(mask << __SSE_EMASK_SHIFT);
    476 		__ldmxcsr(mxcsr);
    477 	}
    478 
    479 	return (FE_ALL_EXCEPT & ~omask);
    480 }
    481 
    482 int
    483 fedisableexcept(int mask)
    484 {
    485 	uint32_t mxcsr, omask;
    486 	uint16_t control;
    487 
    488 	mask &= FE_ALL_EXCEPT;
    489 	__fnstcw(&control);
    490 	if (__HAS_SSE)
    491 		__stmxcsr(&mxcsr);
    492 	else
    493 		mxcsr = 0;
    494 
    495 	omask = (control | mxcsr >> __SSE_EMASK_SHIFT) & FE_ALL_EXCEPT;
    496 	control |= mask;
    497 	__fldcw(control);
    498 	if (__HAS_SSE) {
    499 		mxcsr |= mask << __SSE_EMASK_SHIFT;
    500 		__ldmxcsr(mxcsr);
    501 	}
    502 
    503 	return (FE_ALL_EXCEPT & ~omask);
    504 }
    505 
    506 int
    507 fegetexcept(void)
    508 {
    509 	uint16_t control;
    510 
    511 	/*
    512 	 * We assume that the masks for the x87 and the SSE unit are
    513 	 * the same.
    514 	 */
    515 	__fnstcw(&control);
    516 
    517 	return (~control & FE_ALL_EXCEPT);
    518 }
    519