Home | History | Annotate | Line # | Download | only in i387
fenv.c revision 1.1
      1 /* $NetBSD: fenv.c,v 1.1 2010/07/31 21:47:53 joerg Exp $ */
      2 
      3 /*-
      4  * Copyright (c) 2004-2005 David Schultz <das (at) FreeBSD.ORG>
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
     17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     26  * SUCH DAMAGE.
     27  */
     28 
     29 #include <sys/cdefs.h>
     30 __RCSID("$NetBSD: fenv.c,v 1.1 2010/07/31 21:47:53 joerg Exp $");
     31 
     32 #include <sys/param.h>
     33 #include <sys/sysctl.h>
     34 #include <assert.h>
     35 #include <fenv.h>
     36 #include <stddef.h>
     37 #include <string.h>
     38 
     39 /* Load x87 Control Word */
     40 #define	__fldcw(__cw)		__asm__ __volatile__	\
     41 	("fldcw %0" : : "m" (__cw))
     42 
     43 /* No-Wait Store Control Word */
     44 #define	__fnstcw(__cw)		__asm__ __volatile__	\
     45 	("fnstcw %0" : "=m" (*(__cw)))
     46 
     47 /* No-Wait Store Status Word */
     48 #define	__fnstsw(__sw)		__asm__ __volatile__	\
     49 	("fnstsw %0" : "=am" (*(__sw)))
     50 
     51 /* No-Wait Clear Exception Flags */
     52 #define	__fnclex()		__asm__ __volatile__	\
     53 	("fnclex")
     54 
     55 /* Load x87 Environment */
     56 #define	__fldenv(__env)		__asm__ __volatile__	\
     57 	("fldenv %0" : : "m" (__env))
     58 
     59 /* No-Wait Store x87 environment */
     60 #define	__fnstenv(__env)	__asm__ __volatile__	\
     61 	("fnstenv %0" : "=m" (*(__env)))
     62 
     63 /* Check for and handle pending unmasked x87 pending FPU exceptions */
     64 #define	__fwait(__env)		__asm__	__volatile__	\
     65 	("fwait")
     66 
     67 /* Load the MXCSR register */
     68 #define	__ldmxcsr(__mxcsr)	__asm__ __volatile__	\
     69 	("ldmxcsr %0" : : "m" (__mxcsr))
     70 
     71 /* Store the MXCSR register state */
     72 #define	__stmxcsr(__mxcsr)	__asm__ __volatile__	\
     73 	("stmxcsr %0" : "=m" (*(__mxcsr)))
     74 
     75 /*
     76  * The following constant represents the default floating-point environment
     77  * (that is, the one installed at program startup) and has type pointer to
     78  * const-qualified fenv_t.
     79  *
     80  * It can be used as an argument to the functions within the <fenv.h> header
     81  * that manage the floating-point environment, namely fesetenv() and
     82  * feupdateenv().
     83  *
     84  * x87 fpu registers are 16bit wide. The upper bits, 31-16, are marked as
     85  * RESERVED. We provide a partial floating-point environment, where we
     86  * define only the lower bits. The reserved bits are extracted and set by the
     87  * consumers of FE_DFL_ENV, during runtime.
     88  */
     89 fenv_t __fe_dfl_env = {
     90 	{
     91 		__NetBSD_NPXCW__,       /* Control word register */
     92 		0x0,			/* Unused */
     93 		0x0000,                 /* Status word register */
     94 		0x0,			/* Unused */
     95 		0x0000ffff,             /* Tag word register */
     96 		0x0,			/* Unused */
     97 		{
     98 			0x0000, 0x0000,
     99 			0x0000, 0xffff
    100 		}
    101 	},
    102 	__INITIAL_MXCSR__		/* MXCSR register */
    103 };
    104 
    105 /*
    106  * Test for SSE support on this processor.
    107  *
    108  * We need to use ldmxcsr/stmxcsr to get correct results if any part
    109  * of the program was compiled to use SSE floating-point, but we can't
    110  * use SSE on older processors.
    111  *
    112  * In order to do so, we need to query the processor capabilities via the CPUID
    113  * instruction. We can make it even simpler though, by querying the machdep.sse
    114  * sysctl.
    115  */
    116 static int __HAS_SSE = 0;
    117 
    118 static void __test_sse(void) __attribute__ ((constructor));
    119 
    120 static void __test_sse(void)
    121 {
    122 	char machine[64];
    123 	size_t oldlen;
    124 	int rv;
    125 
    126 	rv = sysctlbyname("machdep.sse", &__HAS_SSE, &oldlen, NULL, 0);
    127 	_DIAGASSERT(rv != -1);
    128 	if (rv == -1)
    129 		__HAS_SSE = 0;
    130 }
    131 
    132 /*
    133  * The feclearexcept() function clears the supported floating-point exceptions
    134  * represented by `excepts'.
    135  */
    136 int
    137 feclearexcept(int excepts)
    138 {
    139 	fenv_t env;
    140 	uint32_t mxcsr;
    141 	int ex;
    142 
    143 	_DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0);
    144 
    145 	ex = excepts & FE_ALL_EXCEPT;
    146 
    147 	/* It's ~3x faster to call fnclex, than store/load fp env */
    148 	if (ex == FE_ALL_EXCEPT) {
    149 		__fnclex();
    150 	} else {
    151 		__fnstenv(&env);
    152 		env.x87.status &= ~ex;
    153 		__fldenv(env);
    154 	}
    155 
    156 	if (__HAS_SSE) {
    157 		__stmxcsr(&mxcsr);
    158 		mxcsr &= ~ex;
    159 		__ldmxcsr(mxcsr);
    160 	}
    161 
    162 	/* Success */
    163 	return (0);
    164 }
    165 
    166 /*
    167  * The fegetexceptflag() function stores an implementation-defined
    168  * representation of the states of the floating-point status flags indicated by
    169  * the argument excepts in the object pointed to by the argument flagp.
    170  */
    171 int
    172 fegetexceptflag(fexcept_t *flagp, int excepts)
    173 {
    174 	uint32_t mxcsr;
    175 	uint16_t status;
    176 	int ex;
    177 
    178 	_DIAGASSERT(flagp != NULL);
    179 	_DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0);
    180 
    181 	ex = excepts & FE_ALL_EXCEPT;
    182 
    183 	__fnstsw(&status);
    184 	if (__HAS_SSE)
    185 		__stmxcsr(&mxcsr);
    186 	else
    187 		mxcsr = 0;
    188 
    189 	*flagp = (mxcsr | status) & ex;
    190 
    191 	/* Success */
    192 	return (0);
    193 }
    194 
    195 /*
    196  * The feraiseexcept() function raises the supported floating-point exceptions
    197  * represented by the argument `excepts'.
    198  *
    199  * The standard explicitly allows us to execute an instruction that has the
    200  * exception as a side effect, but we choose to manipulate the status register
    201  * directly.
    202  *
    203  * The validation of input is being deferred to fesetexceptflag().
    204  */
    205 int
    206 feraiseexcept(int excepts)
    207 {
    208 	fexcept_t ex;
    209 
    210 	_DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0);
    211 
    212 	ex = excepts & FE_ALL_EXCEPT;
    213 	fesetexceptflag(&ex, excepts);
    214 	__fwait();
    215 
    216 	/* Success */
    217 	return (0);
    218 }
    219 
    220 /*
    221  * This function sets the floating-point status flags indicated by the argument
    222  * `excepts' to the states stored in the object pointed to by `flagp'. It does
    223  * NOT raise any floating-point exceptions, but only sets the state of the flags.
    224  */
    225 int
    226 fesetexceptflag(const fexcept_t *flagp, int excepts)
    227 {
    228 	fenv_t env;
    229 	uint32_t mxcsr;
    230 	int ex;
    231 
    232 	_DIAGASSERT(flagp != NULL);
    233 	_DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0);
    234 
    235 	ex = excepts & FE_ALL_EXCEPT;
    236 
    237 	__fnstenv(&env);
    238 	env.x87.status &= ~ex;
    239 	env.x87.status |= *flagp & ex;
    240 	__fldenv(env);
    241 
    242 	if (__HAS_SSE) {
    243 		__stmxcsr(&mxcsr);
    244 		mxcsr &= ~ex;
    245 		mxcsr |= *flagp & ex;
    246 		__ldmxcsr(mxcsr);
    247 	}
    248 
    249 	/* Success */
    250 	return (0);
    251 }
    252 
    253 /*
    254  * The fetestexcept() function determines which of a specified subset of the
    255  * floating-point exception flags are currently set. The `excepts' argument
    256  * specifies the floating-point status flags to be queried.
    257  */
    258 int
    259 fetestexcept(int excepts)
    260 {
    261 	uint32_t mxcsr;
    262 	uint16_t status;
    263 	int ex;
    264 
    265 	_DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0);
    266 
    267 	ex = excepts & FE_ALL_EXCEPT;
    268 
    269 	__fnstsw(&status);
    270 	if (__HAS_SSE)
    271 		__stmxcsr(&mxcsr);
    272 	else
    273 		mxcsr = 0;
    274 
    275 	return ((status | mxcsr) & ex);
    276 }
    277 
    278 int
    279 fegetround(void)
    280 {
    281 	uint16_t control;
    282 
    283 	/*
    284 	 * We assume that the x87 and the SSE unit agree on the
    285 	 * rounding mode.  Reading the control word on the x87 turns
    286 	 * out to be about 5 times faster than reading it on the SSE
    287 	 * unit on an Opteron 244.
    288 	 */
    289 	__fnstcw(&control);
    290 
    291 	return (control & __X87_ROUND_MASK);
    292 }
    293 
    294 /*
    295  * The fesetround() function shall establish the rounding direction represented
    296  * by its argument round. If the argument is not equal to the value of a
    297  * rounding direction macro, the rounding direction is not changed.
    298  */
    299 int
    300 fesetround(int round)
    301 {
    302 	uint32_t mxcsr;
    303 	uint16_t control;
    304 
    305 	if (round & ~__X87_ROUND_MASK) {
    306 		/* Failure */
    307 		return (-1);
    308 	}
    309 
    310 	__fnstcw(&control);
    311 	control &= ~__X87_ROUND_MASK;
    312 	control |= round;
    313 	__fldcw(control);
    314 
    315 	if (__HAS_SSE) {
    316 		__stmxcsr(&mxcsr);
    317 		mxcsr &= ~(__X87_ROUND_MASK << __SSE_ROUND_SHIFT);
    318 		mxcsr |= round << __SSE_ROUND_SHIFT;
    319 		__ldmxcsr(mxcsr);
    320 	}
    321 
    322 	/* Success */
    323 	return (0);
    324 }
    325 
    326 /*
    327  * The fegetenv() function attempts to store the current floating-point
    328  * environment in the object pointed to by envp.
    329  */
    330 int
    331 fegetenv(fenv_t *envp)
    332 {
    333 	uint32_t mxcsr;
    334 
    335 	_DIAGASSERT(flagp != NULL);
    336 
    337 	/*
    338 	 * fnstenv masks all exceptions, so we need to restore the old control
    339 	 * word to avoid this side effect.
    340 	 */
    341 	__fnstenv(envp);
    342 	__fldcw(envp->x87.control);
    343 	if (__HAS_SSE) {
    344 		__stmxcsr(&mxcsr);
    345 		envp->mxcsr = mxcsr;
    346 	}
    347 
    348 	/* Success */
    349 	return (0);
    350 }
    351 
    352 /*
    353  * The feholdexcept() function saves the current floating-point environment in
    354  * the object pointed to by envp, clears the floating-point status flags, and
    355  * then installs a non-stop (continue on floating-point exceptions) mode, if
    356  * available, for all floating-point exceptions.
    357  */
    358 int
    359 feholdexcept(fenv_t *envp)
    360 {
    361 	uint32_t mxcsr;
    362 
    363 	_DIAGASSERT(envp != NULL);
    364 
    365 	__fnstenv(envp);
    366 	__fnclex();
    367 	if (__HAS_SSE) {
    368 		__stmxcsr(&mxcsr);
    369 		envp->mxcsr = mxcsr;
    370 		mxcsr &= ~FE_ALL_EXCEPT;
    371 		mxcsr |= FE_ALL_EXCEPT << __SSE_EMASK_SHIFT;
    372 		__ldmxcsr(mxcsr);
    373 	}
    374 
    375 	/* Success */
    376 	return (0);
    377 }
    378 
    379 /*
    380  * The fesetenv() function attempts to establish the floating-point environment
    381  * represented by the object pointed to by envp. The argument `envp' points
    382  * to an object set by a call to fegetenv() or feholdexcept(), or equal a
    383  * floating-point environment macro. The fesetenv() function does not raise
    384  * floating-point exceptions, but only installs the state of the floating-point
    385  * status flags represented through its argument.
    386  */
    387 int
    388 fesetenv(const fenv_t *envp)
    389 {
    390 	fenv_t env;
    391 
    392 	_DIAGASSERT(envp != NULL);
    393 
    394 	/* Store the x87 floating-point environment */
    395 	memset(&env, 0, sizeof(env));
    396 	__fnstenv(&env);
    397 
    398 	__fe_dfl_env.x87.unused1 = env.x87.unused1;
    399 	__fe_dfl_env.x87.unused2 = env.x87.unused2;
    400 	__fe_dfl_env.x87.unused3 = env.x87.unused3;
    401 	memcpy(__fe_dfl_env.x87.others,
    402 	       env.x87.others,
    403 	       sizeof(__fe_dfl_env.x87.others) / sizeof(uint32_t));
    404 
    405 	__fldenv(envp->x87);
    406 	if (__HAS_SSE)
    407 		__ldmxcsr(envp->mxcsr);
    408 
    409 	/* Success */
    410 	return (0);
    411 }
    412 
    413 /*
    414  * The feupdateenv() function saves the currently raised floating-point
    415  * exceptions in its automatic storage, installs the floating-point environment
    416  * represented by the object pointed to by `envp', and then raises the saved
    417  * floating-point exceptions. The argument `envp' shall point to an object set
    418  * by a call to feholdexcept() or fegetenv(), or equal a floating-point
    419  * environment macro.
    420  */
    421 int
    422 feupdateenv(const fenv_t *envp)
    423 {
    424 	fenv_t env;
    425 	uint32_t mxcsr;
    426 	uint16_t status;
    427 
    428 	_DIAGASSERT(envp != NULL);
    429 
    430 	/* Store the x87 floating-point environment */
    431 	memset(&env, 0, sizeof(env));
    432 	__fnstenv(&env);
    433 
    434 	__fe_dfl_env.x87.unused1 = env.x87.unused1;
    435 	__fe_dfl_env.x87.unused2 = env.x87.unused2;
    436 	__fe_dfl_env.x87.unused3 = env.x87.unused3;
    437 	memcpy(__fe_dfl_env.x87.others,
    438 	       env.x87.others,
    439 	       sizeof(__fe_dfl_env.x87.others) / sizeof(uint32_t));
    440 
    441 	__fnstsw(&status);
    442 	if (__HAS_SSE)
    443 		__stmxcsr(&mxcsr);
    444 	else
    445 		mxcsr = 0;
    446 	fesetenv(envp);
    447 	feraiseexcept((mxcsr | status) & FE_ALL_EXCEPT);
    448 
    449 	/* Success */
    450 	return (0);
    451 }
    452 
    453 /*
    454  * The following functions are extentions to the standard
    455  */
    456 int
    457 feenableexcept(int mask)
    458 {
    459 	uint32_t mxcsr, omask;
    460 	uint16_t control;
    461 
    462 	mask &= FE_ALL_EXCEPT;
    463 	__fnstcw(&control);
    464 	if (__HAS_SSE)
    465 		__stmxcsr(&mxcsr);
    466 	else
    467 		mxcsr = 0;
    468 
    469 	omask = (control | mxcsr >> __SSE_EMASK_SHIFT) & FE_ALL_EXCEPT;
    470 	control &= ~mask;
    471 	__fldcw(control);
    472 	if (__HAS_SSE) {
    473 		mxcsr &= ~(mask << __SSE_EMASK_SHIFT);
    474 		__ldmxcsr(mxcsr);
    475 	}
    476 
    477 	return (~omask);
    478 }
    479 
    480 int
    481 fedisableexcept(int mask)
    482 {
    483 	uint32_t mxcsr, omask;
    484 	uint16_t control;
    485 
    486 	mask &= FE_ALL_EXCEPT;
    487 	__fnstcw(&control);
    488 	if (__HAS_SSE)
    489 		__stmxcsr(&mxcsr);
    490 	else
    491 		mxcsr = 0;
    492 
    493 	omask = (control | mxcsr >> __SSE_EMASK_SHIFT) & FE_ALL_EXCEPT;
    494 	control |= mask;
    495 	__fldcw(control);
    496 	if (__HAS_SSE) {
    497 		mxcsr |= mask << __SSE_EMASK_SHIFT;
    498 		__ldmxcsr(mxcsr);
    499 	}
    500 
    501 	return (~omask);
    502 }
    503 
    504 int
    505 fegetexcept(void)
    506 {
    507 	uint16_t control;
    508 
    509 	/*
    510 	 * We assume that the masks for the x87 and the SSE unit are
    511 	 * the same.
    512 	 */
    513 	__fnstcw(&control);
    514 
    515 	return (control & FE_ALL_EXCEPT);
    516 }
    517