Home | History | Annotate | Line # | Download | only in i387
fenv.c revision 1.5
      1 /* $NetBSD: fenv.c,v 1.5 2013/05/29 00:53:19 riastradh Exp $ */
      2 
      3 /*-
      4  * Copyright (c) 2004-2005 David Schultz <das (at) FreeBSD.ORG>
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
     17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     26  * SUCH DAMAGE.
     27  */
     28 
     29 #include <sys/cdefs.h>
     30 __RCSID("$NetBSD: fenv.c,v 1.5 2013/05/29 00:53:19 riastradh Exp $");
     31 
     32 #include <sys/param.h>
     33 #include <sys/sysctl.h>
     34 #include <assert.h>
     35 #include <fenv.h>
     36 #include <stddef.h>
     37 #include <string.h>
     38 
     39 /* Load x87 Control Word */
     40 #define	__fldcw(__cw)		__asm__ __volatile__	\
     41 	("fldcw %0" : : "m" (__cw))
     42 
     43 /* No-Wait Store Control Word */
     44 #define	__fnstcw(__cw)		__asm__ __volatile__	\
     45 	("fnstcw %0" : "=m" (*(__cw)))
     46 
     47 /* No-Wait Store Status Word */
     48 #define	__fnstsw(__sw)		__asm__ __volatile__	\
     49 	("fnstsw %0" : "=am" (*(__sw)))
     50 
     51 /* No-Wait Clear Exception Flags */
     52 #define	__fnclex()		__asm__ __volatile__	\
     53 	("fnclex")
     54 
     55 /* Load x87 Environment */
     56 #define	__fldenv(__env)		__asm__ __volatile__	\
     57 	("fldenv %0" : : "m" (__env))
     58 
     59 /* No-Wait Store x87 environment */
     60 #define	__fnstenv(__env)	__asm__ __volatile__	\
     61 	("fnstenv %0" : "=m" (*(__env)))
     62 
     63 /* Check for and handle pending unmasked x87 pending FPU exceptions */
     64 #define	__fwait(__env)		__asm__	__volatile__	\
     65 	("fwait")
     66 
     67 /* Load the MXCSR register */
     68 #define	__ldmxcsr(__mxcsr)	__asm__ __volatile__	\
     69 	("ldmxcsr %0" : : "m" (__mxcsr))
     70 
     71 /* Store the MXCSR register state */
     72 #define	__stmxcsr(__mxcsr)	__asm__ __volatile__	\
     73 	("stmxcsr %0" : "=m" (*(__mxcsr)))
     74 
     75 /*
     76  * The following constant represents the default floating-point environment
     77  * (that is, the one installed at program startup) and has type pointer to
     78  * const-qualified fenv_t.
     79  *
     80  * It can be used as an argument to the functions within the <fenv.h> header
     81  * that manage the floating-point environment, namely fesetenv() and
     82  * feupdateenv().
     83  *
     84  * x87 fpu registers are 16bit wide. The upper bits, 31-16, are marked as
     85  * RESERVED. We provide a partial floating-point environment, where we
     86  * define only the lower bits. The reserved bits are extracted and set by the
     87  * consumers of FE_DFL_ENV, during runtime.
     88  */
     89 fenv_t __fe_dfl_env = {
     90 	{
     91 		__NetBSD_NPXCW__,       /* Control word register */
     92 		0x0,			/* Unused */
     93 		0x0000,                 /* Status word register */
     94 		0x0,			/* Unused */
     95 		0x0000ffff,             /* Tag word register */
     96 		0x0,			/* Unused */
     97 		{
     98 			0x0000, 0x0000,
     99 			0x0000, 0xffff
    100 		}
    101 	},
    102 	__INITIAL_MXCSR__		/* MXCSR register */
    103 };
    104 
    105 /*
    106  * Test for SSE support on this processor.
    107  *
    108  * We need to use ldmxcsr/stmxcsr to get correct results if any part
    109  * of the program was compiled to use SSE floating-point, but we can't
    110  * use SSE on older processors.
    111  *
    112  * In order to do so, we need to query the processor capabilities via the CPUID
    113  * instruction. We can make it even simpler though, by querying the machdep.sse
    114  * sysctl.
    115  */
    116 static int __HAS_SSE = 0;
    117 
    118 static void __test_sse(void) __attribute__ ((constructor));
    119 
    120 static void __test_sse(void)
    121 {
    122 	size_t oldlen = sizeof(__HAS_SSE);
    123 	int rv;
    124 
    125 	rv = sysctlbyname("machdep.sse", &__HAS_SSE, &oldlen, NULL, 0);
    126 	if (rv == -1)
    127 		__HAS_SSE = 0;
    128 }
    129 
    130 /*
    131  * The feclearexcept() function clears the supported floating-point exceptions
    132  * represented by `excepts'.
    133  */
    134 int
    135 feclearexcept(int excepts)
    136 {
    137 	fenv_t env;
    138 	uint32_t mxcsr;
    139 	int ex;
    140 
    141 	_DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0);
    142 
    143 	ex = excepts & FE_ALL_EXCEPT;
    144 
    145 	/* It's ~3x faster to call fnclex, than store/load fp env */
    146 	if (ex == FE_ALL_EXCEPT) {
    147 		__fnclex();
    148 	} else {
    149 		__fnstenv(&env);
    150 		env.x87.status &= ~ex;
    151 		__fldenv(env);
    152 	}
    153 
    154 	if (__HAS_SSE) {
    155 		__stmxcsr(&mxcsr);
    156 		mxcsr &= ~ex;
    157 		__ldmxcsr(mxcsr);
    158 	}
    159 
    160 	/* Success */
    161 	return (0);
    162 }
    163 
    164 /*
    165  * The fegetexceptflag() function stores an implementation-defined
    166  * representation of the states of the floating-point status flags indicated by
    167  * the argument excepts in the object pointed to by the argument flagp.
    168  */
    169 int
    170 fegetexceptflag(fexcept_t *flagp, int excepts)
    171 {
    172 	uint32_t mxcsr;
    173 	uint16_t status;
    174 	int ex;
    175 
    176 	_DIAGASSERT(flagp != NULL);
    177 	_DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0);
    178 
    179 	ex = excepts & FE_ALL_EXCEPT;
    180 
    181 	__fnstsw(&status);
    182 	if (__HAS_SSE)
    183 		__stmxcsr(&mxcsr);
    184 	else
    185 		mxcsr = 0;
    186 
    187 	*flagp = (mxcsr | status) & ex;
    188 
    189 	/* Success */
    190 	return (0);
    191 }
    192 
    193 /*
    194  * The feraiseexcept() function raises the supported floating-point exceptions
    195  * represented by the argument `excepts'.
    196  *
    197  * The standard explicitly allows us to execute an instruction that has the
    198  * exception as a side effect, but we choose to manipulate the status register
    199  * directly.
    200  *
    201  * The validation of input is being deferred to fesetexceptflag().
    202  */
    203 int
    204 feraiseexcept(int excepts)
    205 {
    206 	fexcept_t ex;
    207 
    208 	_DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0);
    209 
    210 	ex = excepts & FE_ALL_EXCEPT;
    211 	fesetexceptflag(&ex, excepts);
    212 	__fwait();
    213 
    214 	/* Success */
    215 	return (0);
    216 }
    217 
    218 /*
    219  * This function sets the floating-point status flags indicated by the argument
    220  * `excepts' to the states stored in the object pointed to by `flagp'. It does
    221  * NOT raise any floating-point exceptions, but only sets the state of the flags.
    222  */
    223 int
    224 fesetexceptflag(const fexcept_t *flagp, int excepts)
    225 {
    226 	fenv_t env;
    227 	uint32_t mxcsr;
    228 	int ex;
    229 
    230 	_DIAGASSERT(flagp != NULL);
    231 	_DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0);
    232 
    233 	ex = excepts & FE_ALL_EXCEPT;
    234 
    235 	__fnstenv(&env);
    236 	env.x87.status &= ~ex;
    237 	env.x87.status |= *flagp & ex;
    238 	__fldenv(env);
    239 
    240 	if (__HAS_SSE) {
    241 		__stmxcsr(&mxcsr);
    242 		mxcsr &= ~ex;
    243 		mxcsr |= *flagp & ex;
    244 		__ldmxcsr(mxcsr);
    245 	}
    246 
    247 	/* Success */
    248 	return (0);
    249 }
    250 
    251 /*
    252  * The fetestexcept() function determines which of a specified subset of the
    253  * floating-point exception flags are currently set. The `excepts' argument
    254  * specifies the floating-point status flags to be queried.
    255  */
    256 int
    257 fetestexcept(int excepts)
    258 {
    259 	uint32_t mxcsr;
    260 	uint16_t status;
    261 	int ex;
    262 
    263 	_DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0);
    264 
    265 	ex = excepts & FE_ALL_EXCEPT;
    266 
    267 	__fnstsw(&status);
    268 	if (__HAS_SSE)
    269 		__stmxcsr(&mxcsr);
    270 	else
    271 		mxcsr = 0;
    272 
    273 	return ((status | mxcsr) & ex);
    274 }
    275 
    276 int
    277 fegetround(void)
    278 {
    279 	uint16_t control;
    280 
    281 	/*
    282 	 * We assume that the x87 and the SSE unit agree on the
    283 	 * rounding mode.  Reading the control word on the x87 turns
    284 	 * out to be about 5 times faster than reading it on the SSE
    285 	 * unit on an Opteron 244.
    286 	 */
    287 	__fnstcw(&control);
    288 
    289 	return (control & __X87_ROUND_MASK);
    290 }
    291 
    292 /*
    293  * The fesetround() function shall establish the rounding direction represented
    294  * by its argument round. If the argument is not equal to the value of a
    295  * rounding direction macro, the rounding direction is not changed.
    296  */
    297 int
    298 fesetround(int round)
    299 {
    300 	uint32_t mxcsr;
    301 	uint16_t control;
    302 
    303 	if (round & ~__X87_ROUND_MASK) {
    304 		/* Failure */
    305 		return (-1);
    306 	}
    307 
    308 	__fnstcw(&control);
    309 	control &= ~__X87_ROUND_MASK;
    310 	control |= round;
    311 	__fldcw(control);
    312 
    313 	if (__HAS_SSE) {
    314 		__stmxcsr(&mxcsr);
    315 		mxcsr &= ~(__X87_ROUND_MASK << __SSE_ROUND_SHIFT);
    316 		mxcsr |= round << __SSE_ROUND_SHIFT;
    317 		__ldmxcsr(mxcsr);
    318 	}
    319 
    320 	/* Success */
    321 	return (0);
    322 }
    323 
    324 /*
    325  * The fegetenv() function attempts to store the current floating-point
    326  * environment in the object pointed to by envp.
    327  */
    328 int
    329 fegetenv(fenv_t *envp)
    330 {
    331 	uint32_t mxcsr;
    332 
    333 	_DIAGASSERT(flagp != NULL);
    334 
    335 	/*
    336 	 * fnstenv masks all exceptions, so we need to restore the old control
    337 	 * word to avoid this side effect.
    338 	 */
    339 	__fnstenv(envp);
    340 	__fldcw(envp->x87.control);
    341 	if (__HAS_SSE) {
    342 		__stmxcsr(&mxcsr);
    343 		envp->mxcsr = mxcsr;
    344 	}
    345 
    346 	/* Success */
    347 	return (0);
    348 }
    349 
    350 /*
    351  * The feholdexcept() function saves the current floating-point environment in
    352  * the object pointed to by envp, clears the floating-point status flags, and
    353  * then installs a non-stop (continue on floating-point exceptions) mode, if
    354  * available, for all floating-point exceptions.
    355  */
    356 int
    357 feholdexcept(fenv_t *envp)
    358 {
    359 	uint32_t mxcsr;
    360 
    361 	_DIAGASSERT(envp != NULL);
    362 
    363 	__fnstenv(envp);
    364 	__fnclex();
    365 	if (__HAS_SSE) {
    366 		__stmxcsr(&mxcsr);
    367 		envp->mxcsr = mxcsr;
    368 		mxcsr &= ~FE_ALL_EXCEPT;
    369 		mxcsr |= FE_ALL_EXCEPT << __SSE_EMASK_SHIFT;
    370 		__ldmxcsr(mxcsr);
    371 	}
    372 
    373 	/* Success */
    374 	return (0);
    375 }
    376 
    377 /*
    378  * The fesetenv() function attempts to establish the floating-point environment
    379  * represented by the object pointed to by envp. The argument `envp' points
    380  * to an object set by a call to fegetenv() or feholdexcept(), or equal a
    381  * floating-point environment macro. The fesetenv() function does not raise
    382  * floating-point exceptions, but only installs the state of the floating-point
    383  * status flags represented through its argument.
    384  */
    385 int
    386 fesetenv(const fenv_t *envp)
    387 {
    388 	fenv_t env;
    389 
    390 	_DIAGASSERT(envp != NULL);
    391 
    392 	/* Store the x87 floating-point environment */
    393 	memset(&env, 0, sizeof(env));
    394 	__fnstenv(&env);
    395 
    396 	__fe_dfl_env.x87.unused1 = env.x87.unused1;
    397 	__fe_dfl_env.x87.unused2 = env.x87.unused2;
    398 	__fe_dfl_env.x87.unused3 = env.x87.unused3;
    399 	memcpy(__fe_dfl_env.x87.others,
    400 	       env.x87.others,
    401 	       sizeof(__fe_dfl_env.x87.others) / sizeof(uint32_t));
    402 
    403 	__fldenv(envp->x87);
    404 	if (__HAS_SSE)
    405 		__ldmxcsr(envp->mxcsr);
    406 
    407 	/* Success */
    408 	return (0);
    409 }
    410 
    411 /*
    412  * The feupdateenv() function saves the currently raised floating-point
    413  * exceptions in its automatic storage, installs the floating-point environment
    414  * represented by the object pointed to by `envp', and then raises the saved
    415  * floating-point exceptions. The argument `envp' shall point to an object set
    416  * by a call to feholdexcept() or fegetenv(), or equal a floating-point
    417  * environment macro.
    418  */
    419 int
    420 feupdateenv(const fenv_t *envp)
    421 {
    422 	fenv_t env;
    423 	uint32_t mxcsr;
    424 	uint16_t status;
    425 
    426 	_DIAGASSERT(envp != NULL);
    427 
    428 	/* Store the x87 floating-point environment */
    429 	memset(&env, 0, sizeof(env));
    430 	__fnstenv(&env);
    431 
    432 	__fe_dfl_env.x87.unused1 = env.x87.unused1;
    433 	__fe_dfl_env.x87.unused2 = env.x87.unused2;
    434 	__fe_dfl_env.x87.unused3 = env.x87.unused3;
    435 	memcpy(__fe_dfl_env.x87.others,
    436 	       env.x87.others,
    437 	       sizeof(__fe_dfl_env.x87.others) / sizeof(uint32_t));
    438 
    439 	__fnstsw(&status);
    440 	if (__HAS_SSE)
    441 		__stmxcsr(&mxcsr);
    442 	else
    443 		mxcsr = 0;
    444 	fesetenv(envp);
    445 	feraiseexcept((mxcsr | status) & FE_ALL_EXCEPT);
    446 
    447 	/* Success */
    448 	return (0);
    449 }
    450 
    451 /*
    452  * The following functions are extentions to the standard
    453  */
    454 int
    455 feenableexcept(int mask)
    456 {
    457 	uint32_t mxcsr, omask;
    458 	uint16_t control;
    459 
    460 	mask &= FE_ALL_EXCEPT;
    461 	__fnstcw(&control);
    462 	if (__HAS_SSE)
    463 		__stmxcsr(&mxcsr);
    464 	else
    465 		mxcsr = 0;
    466 
    467 	omask = (control | mxcsr >> __SSE_EMASK_SHIFT) & FE_ALL_EXCEPT;
    468 	control &= ~mask;
    469 	__fldcw(control);
    470 	if (__HAS_SSE) {
    471 		mxcsr &= ~(mask << __SSE_EMASK_SHIFT);
    472 		__ldmxcsr(mxcsr);
    473 	}
    474 
    475 	return (FE_ALL_EXCEPT & ~omask);
    476 }
    477 
    478 int
    479 fedisableexcept(int mask)
    480 {
    481 	uint32_t mxcsr, omask;
    482 	uint16_t control;
    483 
    484 	mask &= FE_ALL_EXCEPT;
    485 	__fnstcw(&control);
    486 	if (__HAS_SSE)
    487 		__stmxcsr(&mxcsr);
    488 	else
    489 		mxcsr = 0;
    490 
    491 	omask = (control | mxcsr >> __SSE_EMASK_SHIFT) & FE_ALL_EXCEPT;
    492 	control |= mask;
    493 	__fldcw(control);
    494 	if (__HAS_SSE) {
    495 		mxcsr |= mask << __SSE_EMASK_SHIFT;
    496 		__ldmxcsr(mxcsr);
    497 	}
    498 
    499 	return (FE_ALL_EXCEPT & ~omask);
    500 }
    501 
    502 int
    503 fegetexcept(void)
    504 {
    505 	uint16_t control;
    506 
    507 	/*
    508 	 * We assume that the masks for the x87 and the SSE unit are
    509 	 * the same.
    510 	 */
    511 	__fnstcw(&control);
    512 
    513 	return (~control & FE_ALL_EXCEPT);
    514 }
    515