Home | History | Annotate | Line # | Download | only in i387
      1 /* $NetBSD: fenv.c,v 1.10 2021/09/03 21:54:59 andvar Exp $ */
      2 
      3 /*-
      4  * Copyright (c) 2004-2005 David Schultz <das (at) FreeBSD.ORG>
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
     17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     26  * SUCH DAMAGE.
     27  */
     28 
     29 #include <sys/cdefs.h>
     30 __RCSID("$NetBSD: fenv.c,v 1.10 2021/09/03 21:54:59 andvar Exp $");
     31 
     32 #include "namespace.h"
     33 
     34 #include <sys/param.h>
     35 #include <sys/sysctl.h>
     36 #include <assert.h>
     37 #include <fenv.h>
     38 #include <stddef.h>
     39 #include <string.h>
     40 
     41 #ifdef __weak_alias
     42 __weak_alias(feclearexcept,_feclearexcept)
     43 __weak_alias(fedisableexcept,_fedisableexcept)
     44 __weak_alias(feenableexcept,_feenableexcept)
     45 __weak_alias(fegetenv,_fegetenv)
     46 __weak_alias(fegetexcept,_fegetexcept)
     47 __weak_alias(fegetexceptflag,_fegetexceptflag)
     48 __weak_alias(fegetround,_fegetround)
     49 __weak_alias(feholdexcept,_feholdexcept)
     50 __weak_alias(feraiseexcept,_feraiseexcept)
     51 __weak_alias(fesetenv,_fesetenv)
     52 __weak_alias(fesetexceptflag,_fesetexceptflag)
     53 __weak_alias(fesetround,_fesetround)
     54 __weak_alias(fetestexcept,_fetestexcept)
     55 __weak_alias(feupdateenv,_feupdateenv)
     56 #endif
     57 
     58 /* Load x87 Control Word */
     59 #define	__fldcw(__cw)		__asm__ __volatile__	\
     60 	("fldcw %0" : : "m" (__cw))
     61 
     62 /* No-Wait Store Control Word */
     63 #define	__fnstcw(__cw)		__asm__ __volatile__	\
     64 	("fnstcw %0" : "=m" (*(__cw)))
     65 
     66 /* No-Wait Store Status Word */
     67 #define	__fnstsw(__sw)		__asm__ __volatile__	\
     68 	("fnstsw %0" : "=am" (*(__sw)))
     69 
     70 /* No-Wait Clear Exception Flags */
     71 #define	__fnclex()		__asm__ __volatile__	\
     72 	("fnclex")
     73 
     74 /* Load x87 Environment */
     75 #define	__fldenv(__env)		__asm__ __volatile__	\
     76 	("fldenv %0" : : "m" (__env))
     77 
     78 /* No-Wait Store x87 environment */
     79 #define	__fnstenv(__env)	__asm__ __volatile__	\
     80 	("fnstenv %0" : "=m" (*(__env)))
     81 
     82 /* Check for and handle pending unmasked x87 pending FPU exceptions */
     83 #define	__fwait(__env)		__asm__	__volatile__	\
     84 	("fwait")
     85 
     86 /* Load the MXCSR register */
     87 #define	__ldmxcsr(__mxcsr)	__asm__ __volatile__	\
     88 	("ldmxcsr %0" : : "m" (__mxcsr))
     89 
     90 /* Store the MXCSR register state */
     91 #define	__stmxcsr(__mxcsr)	__asm__ __volatile__	\
     92 	("stmxcsr %0" : "=m" (*(__mxcsr)))
     93 
     94 /*
     95  * The following constant represents the default floating-point environment
     96  * (that is, the one installed at program startup) and has type pointer to
     97  * const-qualified fenv_t.
     98  *
     99  * It can be used as an argument to the functions within the <fenv.h> header
    100  * that manage the floating-point environment, namely fesetenv() and
    101  * feupdateenv().
    102  *
    103  * x87 fpu registers are 16bit wide. The upper bits, 31-16, are marked as
    104  * RESERVED. We provide a partial floating-point environment, where we
    105  * define only the lower bits. The reserved bits are extracted and set by the
    106  * consumers of FE_DFL_ENV, during runtime.
    107  */
    108 fenv_t __fe_dfl_env = {
    109 	.x87 = {
    110 		.control = __NetBSD_NPXCW__,    /* Control word register */
    111 		.unused1 = 0,			/* Unused */
    112 		.status = 0,  		     	/* Status word register */
    113 		.unused2 = 0,			/* Unused */
    114 		.tag = 0xffff,          	/* Tag word register */
    115 		.unused3 = 0,			/* Unused */
    116 		.others = {
    117 			0, 0, 0, 0x0000ffff,
    118 		}
    119 	},
    120 	.mxcsr = __INITIAL_MXCSR__		/* MXCSR register */
    121 };
    122 
    123 /*
    124  * Test for SSE support on this processor.
    125  *
    126  * We need to use ldmxcsr/stmxcsr to get correct results if any part
    127  * of the program was compiled to use SSE floating-point, but we can't
    128  * use SSE on older processors.
    129  *
    130  * In order to do so, we need to query the processor capabilities via the CPUID
    131  * instruction. We can make it even simpler though, by querying the machdep.sse
    132  * sysctl.
    133  */
    134 static int __HAS_SSE = 0;
    135 
    136 static void __init_libm(void) __attribute__ ((constructor, used));
    137 
    138 static void __init_libm(void)
    139 {
    140 	size_t oldlen = sizeof(__HAS_SSE);
    141 	int rv;
    142 	uint16_t control;
    143 
    144 	rv = sysctlbyname("machdep.sse", &__HAS_SSE, &oldlen, NULL, 0);
    145 	if (rv == -1)
    146 		__HAS_SSE = 0;
    147 
    148 	__fnstcw(&control);
    149 	__fe_dfl_env.x87.control = control;
    150 }
    151 
    152 /*
    153  * The feclearexcept() function clears the supported floating-point exceptions
    154  * represented by `excepts'.
    155  */
    156 int
    157 feclearexcept(int excepts)
    158 {
    159 	fenv_t env;
    160 	uint32_t mxcsr;
    161 	int ex;
    162 
    163 	_DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0);
    164 
    165 	ex = excepts & FE_ALL_EXCEPT;
    166 
    167 	/* It's ~3x faster to call fnclex, than store/load fp env */
    168 	if (ex == FE_ALL_EXCEPT) {
    169 		__fnclex();
    170 	} else {
    171 		__fnstenv(&env);
    172 		env.x87.status &= ~ex;
    173 		__fldenv(env);
    174 	}
    175 
    176 	if (__HAS_SSE) {
    177 		__stmxcsr(&mxcsr);
    178 		mxcsr &= ~ex;
    179 		__ldmxcsr(mxcsr);
    180 	}
    181 
    182 	/* Success */
    183 	return (0);
    184 }
    185 
    186 /*
    187  * The fegetexceptflag() function stores an implementation-defined
    188  * representation of the states of the floating-point status flags indicated by
    189  * the argument excepts in the object pointed to by the argument flagp.
    190  */
    191 int
    192 fegetexceptflag(fexcept_t *flagp, int excepts)
    193 {
    194 	uint32_t mxcsr;
    195 	uint16_t status;
    196 	int ex;
    197 
    198 	_DIAGASSERT(flagp != NULL);
    199 	_DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0);
    200 
    201 	ex = excepts & FE_ALL_EXCEPT;
    202 
    203 	__fnstsw(&status);
    204 	if (__HAS_SSE)
    205 		__stmxcsr(&mxcsr);
    206 	else
    207 		mxcsr = 0;
    208 
    209 	*flagp = (mxcsr | status) & ex;
    210 
    211 	/* Success */
    212 	return (0);
    213 }
    214 
    215 /*
    216  * The feraiseexcept() function raises the supported floating-point exceptions
    217  * represented by the argument `excepts'.
    218  *
    219  * The standard explicitly allows us to execute an instruction that has the
    220  * exception as a side effect, but we choose to manipulate the status register
    221  * directly.
    222  *
    223  * The validation of input is being deferred to fesetexceptflag().
    224  */
    225 int
    226 feraiseexcept(int excepts)
    227 {
    228 	fexcept_t ex;
    229 
    230 	_DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0);
    231 
    232 	ex = excepts & FE_ALL_EXCEPT;
    233 	fesetexceptflag(&ex, excepts);
    234 	__fwait();
    235 
    236 	/* Success */
    237 	return (0);
    238 }
    239 
    240 /*
    241  * This function sets the floating-point status flags indicated by the argument
    242  * `excepts' to the states stored in the object pointed to by `flagp'. It does
    243  * NOT raise any floating-point exceptions, but only sets the state of the flags.
    244  */
    245 int
    246 fesetexceptflag(const fexcept_t *flagp, int excepts)
    247 {
    248 	fenv_t env;
    249 	uint32_t mxcsr;
    250 	int ex;
    251 
    252 	_DIAGASSERT(flagp != NULL);
    253 	_DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0);
    254 
    255 	ex = excepts & FE_ALL_EXCEPT;
    256 
    257 	__fnstenv(&env);
    258 	env.x87.status &= ~ex;
    259 	env.x87.status |= *flagp & ex;
    260 	__fldenv(env);
    261 
    262 	if (__HAS_SSE) {
    263 		__stmxcsr(&mxcsr);
    264 		mxcsr &= ~ex;
    265 		mxcsr |= *flagp & ex;
    266 		__ldmxcsr(mxcsr);
    267 	}
    268 
    269 	/* Success */
    270 	return (0);
    271 }
    272 
    273 /*
    274  * The fetestexcept() function determines which of a specified subset of the
    275  * floating-point exception flags are currently set. The `excepts' argument
    276  * specifies the floating-point status flags to be queried.
    277  */
    278 int
    279 fetestexcept(int excepts)
    280 {
    281 	uint32_t mxcsr;
    282 	uint16_t status;
    283 	int ex;
    284 
    285 	_DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0);
    286 
    287 	ex = excepts & FE_ALL_EXCEPT;
    288 
    289 	__fnstsw(&status);
    290 	if (__HAS_SSE)
    291 		__stmxcsr(&mxcsr);
    292 	else
    293 		mxcsr = 0;
    294 
    295 	return ((status | mxcsr) & ex);
    296 }
    297 
    298 int
    299 fegetround(void)
    300 {
    301 	uint16_t control;
    302 
    303 	/*
    304 	 * We assume that the x87 and the SSE unit agree on the
    305 	 * rounding mode.  Reading the control word on the x87 turns
    306 	 * out to be about 5 times faster than reading it on the SSE
    307 	 * unit on an Opteron 244.
    308 	 */
    309 	__fnstcw(&control);
    310 
    311 	return (control & __X87_ROUND_MASK);
    312 }
    313 
    314 /*
    315  * The fesetround() function shall establish the rounding direction represented
    316  * by its argument round. If the argument is not equal to the value of a
    317  * rounding direction macro, the rounding direction is not changed.
    318  */
    319 int
    320 fesetround(int round)
    321 {
    322 	uint32_t mxcsr;
    323 	uint16_t control;
    324 
    325 	if (round & ~__X87_ROUND_MASK) {
    326 		/* Failure */
    327 		return (-1);
    328 	}
    329 
    330 	__fnstcw(&control);
    331 	control &= ~__X87_ROUND_MASK;
    332 	control |= round;
    333 	__fldcw(control);
    334 
    335 	if (__HAS_SSE) {
    336 		__stmxcsr(&mxcsr);
    337 		mxcsr &= ~(__X87_ROUND_MASK << __SSE_ROUND_SHIFT);
    338 		mxcsr |= round << __SSE_ROUND_SHIFT;
    339 		__ldmxcsr(mxcsr);
    340 	}
    341 
    342 	/* Success */
    343 	return (0);
    344 }
    345 
    346 /*
    347  * The fegetenv() function attempts to store the current floating-point
    348  * environment in the object pointed to by envp.
    349  */
    350 int
    351 fegetenv(fenv_t *envp)
    352 {
    353 	uint32_t mxcsr;
    354 
    355 	_DIAGASSERT(flagp != NULL);
    356 
    357 	/*
    358 	 * fnstenv masks all exceptions, so we need to restore the old control
    359 	 * word to avoid this side effect.
    360 	 */
    361 	__fnstenv(envp);
    362 	__fldcw(envp->x87.control);
    363 	if (__HAS_SSE) {
    364 		__stmxcsr(&mxcsr);
    365 		envp->mxcsr = mxcsr;
    366 	}
    367 
    368 	/* Success */
    369 	return (0);
    370 }
    371 
    372 /*
    373  * The feholdexcept() function saves the current floating-point environment in
    374  * the object pointed to by envp, clears the floating-point status flags, and
    375  * then installs a non-stop (continue on floating-point exceptions) mode, if
    376  * available, for all floating-point exceptions.
    377  */
    378 int
    379 feholdexcept(fenv_t *envp)
    380 {
    381 	uint32_t mxcsr;
    382 
    383 	_DIAGASSERT(envp != NULL);
    384 
    385 	__fnstenv(envp);
    386 	__fnclex();
    387 	if (__HAS_SSE) {
    388 		__stmxcsr(&mxcsr);
    389 		envp->mxcsr = mxcsr;
    390 		mxcsr &= ~FE_ALL_EXCEPT;
    391 		mxcsr |= FE_ALL_EXCEPT << __SSE_EMASK_SHIFT;
    392 		__ldmxcsr(mxcsr);
    393 	}
    394 
    395 	/* Success */
    396 	return (0);
    397 }
    398 
    399 /*
    400  * The fesetenv() function attempts to establish the floating-point environment
    401  * represented by the object pointed to by envp. The argument `envp' points
    402  * to an object set by a call to fegetenv() or feholdexcept(), or equal a
    403  * floating-point environment macro. The fesetenv() function does not raise
    404  * floating-point exceptions, but only installs the state of the floating-point
    405  * status flags represented through its argument.
    406  */
    407 int
    408 fesetenv(const fenv_t *envp)
    409 {
    410 	fenv_t env;
    411 
    412 	_DIAGASSERT(envp != NULL);
    413 
    414 	/* Store the x87 floating-point environment */
    415 	memset(&env, 0, sizeof(env));
    416 	__fnstenv(&env);
    417 
    418 	__fe_dfl_env.x87.unused1 = env.x87.unused1;
    419 	__fe_dfl_env.x87.unused2 = env.x87.unused2;
    420 	__fe_dfl_env.x87.unused3 = env.x87.unused3;
    421 	memcpy(__fe_dfl_env.x87.others, env.x87.others,
    422 	    sizeof(__fe_dfl_env.x87.others));
    423 
    424 	__fldenv(envp->x87);
    425 	if (__HAS_SSE)
    426 		__ldmxcsr(envp->mxcsr);
    427 
    428 	/* Success */
    429 	return (0);
    430 }
    431 
    432 /*
    433  * The feupdateenv() function saves the currently raised floating-point
    434  * exceptions in its automatic storage, installs the floating-point environment
    435  * represented by the object pointed to by `envp', and then raises the saved
    436  * floating-point exceptions. The argument `envp' shall point to an object set
    437  * by a call to feholdexcept() or fegetenv(), or equal a floating-point
    438  * environment macro.
    439  */
    440 int
    441 feupdateenv(const fenv_t *envp)
    442 {
    443 	fenv_t env;
    444 	uint32_t mxcsr;
    445 	uint16_t status;
    446 
    447 	_DIAGASSERT(envp != NULL);
    448 
    449 	/* Store the x87 floating-point environment */
    450 	memset(&env, 0, sizeof(env));
    451 	__fnstenv(&env);
    452 
    453 	__fe_dfl_env.x87.unused1 = env.x87.unused1;
    454 	__fe_dfl_env.x87.unused2 = env.x87.unused2;
    455 	__fe_dfl_env.x87.unused3 = env.x87.unused3;
    456 	memcpy(__fe_dfl_env.x87.others, env.x87.others,
    457 	    sizeof(__fe_dfl_env.x87.others));
    458 
    459 	__fnstsw(&status);
    460 	if (__HAS_SSE)
    461 		__stmxcsr(&mxcsr);
    462 	else
    463 		mxcsr = 0;
    464 	fesetenv(envp);
    465 	feraiseexcept((mxcsr | status) & FE_ALL_EXCEPT);
    466 
    467 	/* Success */
    468 	return (0);
    469 }
    470 
    471 /*
    472  * The following functions are extensions to the standard
    473  */
    474 int
    475 feenableexcept(int mask)
    476 {
    477 	uint32_t mxcsr, omask;
    478 	uint16_t control;
    479 
    480 	mask &= FE_ALL_EXCEPT;
    481 	__fnstcw(&control);
    482 	if (__HAS_SSE)
    483 		__stmxcsr(&mxcsr);
    484 	else
    485 		mxcsr = 0;
    486 
    487 	omask = (control | mxcsr >> __SSE_EMASK_SHIFT) & FE_ALL_EXCEPT;
    488 	control &= ~mask;
    489 	__fldcw(control);
    490 	if (__HAS_SSE) {
    491 		mxcsr &= ~(mask << __SSE_EMASK_SHIFT);
    492 		__ldmxcsr(mxcsr);
    493 	}
    494 
    495 	return (FE_ALL_EXCEPT & ~omask);
    496 }
    497 
    498 int
    499 fedisableexcept(int mask)
    500 {
    501 	uint32_t mxcsr, omask;
    502 	uint16_t control;
    503 
    504 	mask &= FE_ALL_EXCEPT;
    505 	__fnstcw(&control);
    506 	if (__HAS_SSE)
    507 		__stmxcsr(&mxcsr);
    508 	else
    509 		mxcsr = 0;
    510 
    511 	omask = (control | mxcsr >> __SSE_EMASK_SHIFT) & FE_ALL_EXCEPT;
    512 	control |= mask;
    513 	__fldcw(control);
    514 	if (__HAS_SSE) {
    515 		mxcsr |= mask << __SSE_EMASK_SHIFT;
    516 		__ldmxcsr(mxcsr);
    517 	}
    518 
    519 	return (FE_ALL_EXCEPT & ~omask);
    520 }
    521 
    522 int
    523 fegetexcept(void)
    524 {
    525 	uint16_t control;
    526 
    527 	/*
    528 	 * We assume that the masks for the x87 and the SSE unit are
    529 	 * the same.
    530 	 */
    531 	__fnstcw(&control);
    532 
    533 	return (~control & FE_ALL_EXCEPT);
    534 }
    535