Home | History | Annotate | Line # | Download | only in src
      1 /*
      2  * ====================================================
      3  * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
      4  *
      5  * Developed at SunPro, a Sun Microsystems, Inc. business.
      6  * Permission to use, copy, modify, and distribute this
      7  * software is freely granted, provided that this notice
      8  * is preserved.
      9  * ====================================================
     10  */
     11 
     12 /*
     13  * from: @(#)fdlibm.h 5.1 93/09/24
     14  * $NetBSD: math_private.h,v 1.34 2024/07/17 12:00:13 riastradh Exp $
     15  */
     16 
     17 #ifndef _MATH_PRIVATE_H_
     18 #define _MATH_PRIVATE_H_
     19 
     20 #include <assert.h>
     21 #include <sys/types.h>
     22 
     23 /*
     24  * The original fdlibm code used statements like:
     25  *
     26  *	n0 = ((*(int*)&one)>>29)^1;		// index of high word
     27  *	ix0 = *(n0+(int*)&x);			// high word of x
     28  *	ix1 = *((1-n0)+(int*)&x);		// low word of x
     29  *
     30  * to dig two 32-bit words out of the-64 bit IEEE floating point value.
     31  * That is non-ANSI, and, moreover, the gcc instruction scheduler gets
     32  * it wrong.  We instead use the following macros.  Unlike the original
     33  * code, we determine the endianness at compile time, not at run time;
     34  * I don't see much benefit to selecting endianness at run time.
     35  */
     36 
     37 #ifdef __arm__
     38 #if defined(__VFP_FP__) || defined(__ARM_EABI__)
     39 #define	IEEE_WORD_ORDER	BYTE_ORDER
     40 #else
     41 #define	IEEE_WORD_ORDER	BIG_ENDIAN
     42 #endif
     43 #else /* __arm__ */
     44 #define	IEEE_WORD_ORDER	BYTE_ORDER
     45 #endif
     46 
     47 /*
     48  * A union which permits us to convert between a long double and
     49  * four 32-bit integers.
     50  */
     51 
     52 #if IEEE_WORD_ORDER == BIG_ENDIAN
     53 
     54 typedef union
     55 {
     56   long double value;
     57   struct {
     58     u_int32_t mswhi;
     59     u_int32_t mswlo;
     60     u_int32_t lswhi;
     61     u_int32_t lswlo;
     62   } parts32;
     63   struct {
     64     u_int64_t msw;
     65     u_int64_t lsw;
     66   } parts64;
     67 } ieee_quad_shape_type;
     68 
     69 #endif
     70 
     71 #if IEEE_WORD_ORDER == LITTLE_ENDIAN
     72 
     73 typedef union
     74 {
     75   long double value;
     76   struct {
     77     u_int32_t lswlo;
     78     u_int32_t lswhi;
     79     u_int32_t mswlo;
     80     u_int32_t mswhi;
     81   } parts32;
     82   struct {
     83     u_int64_t lsw;
     84     u_int64_t msw;
     85   } parts64;
     86 } ieee_quad_shape_type;
     87 
     88 #endif
     89 
     90 /*
     91  * A union which permits us to convert between a double and two 32-bit
     92  * integers.
     93  */
     94 
     95 #if IEEE_WORD_ORDER == BIG_ENDIAN
     96 
     97 typedef union
     98 {
     99   double value;
    100   struct
    101   {
    102     u_int32_t msw;
    103     u_int32_t lsw;
    104   } parts;
    105   struct
    106   {
    107     u_int64_t w;
    108   } xparts;
    109 } ieee_double_shape_type;
    110 
    111 #endif
    112 
    113 #if IEEE_WORD_ORDER == LITTLE_ENDIAN
    114 
    115 typedef union
    116 {
    117   double value;
    118   struct
    119   {
    120     u_int32_t lsw;
    121     u_int32_t msw;
    122   } parts;
    123   struct
    124   {
    125     u_int64_t w;
    126   } xparts;
    127 } ieee_double_shape_type;
    128 
    129 #endif
    130 
    131 /* Get two 32-bit integers from a double.  */
    132 
    133 #define EXTRACT_WORDS(ix0,ix1,d)				\
    134 do {								\
    135   ieee_double_shape_type ew_u;					\
    136   ew_u.value = (d);						\
    137   (ix0) = ew_u.parts.msw;					\
    138   (ix1) = ew_u.parts.lsw;					\
    139 } while (0)
    140 
    141 /* Get a 64-bit integer from a double. */
    142 #define EXTRACT_WORD64(ix,d)					\
    143 do {								\
    144   ieee_double_shape_type ew_u;					\
    145   ew_u.value = (d);						\
    146   (ix) = ew_u.xparts.w;						\
    147 } while (0)
    148 
    149 
    150 /* Get the more significant 32-bit integer from a double.  */
    151 
    152 #define GET_HIGH_WORD(i,d)					\
    153 do {								\
    154   ieee_double_shape_type gh_u;					\
    155   gh_u.value = (d);						\
    156   (i) = gh_u.parts.msw;						\
    157 } while (0)
    158 
    159 /* Get the less significant 32-bit integer from a double.  */
    160 
    161 #define GET_LOW_WORD(i,d)					\
    162 do {								\
    163   ieee_double_shape_type gl_u;					\
    164   gl_u.value = (d);						\
    165   (i) = gl_u.parts.lsw;						\
    166 } while (0)
    167 
    168 /* Set a double from two 32-bit integers.  */
    169 
    170 #define INSERT_WORDS(d,ix0,ix1)					\
    171 do {								\
    172   ieee_double_shape_type iw_u;					\
    173   iw_u.parts.msw = (ix0);					\
    174   iw_u.parts.lsw = (ix1);					\
    175   (d) = iw_u.value;						\
    176 } while (0)
    177 
    178 /* Set a double from a 64-bit integer. */
    179 
    180 #define INSERT_WORD64(d,ix)					\
    181 do {								\
    182   ieee_double_shape_type iw_u;					\
    183   iw_u.xparts.w = (ix);						\
    184   (d) = iw_u.value;						\
    185 } while (0)
    186 
    187 /* Set the more significant 32 bits of a double from an integer.  */
    188 
    189 #define SET_HIGH_WORD(d,v)					\
    190 do {								\
    191   ieee_double_shape_type sh_u;					\
    192   sh_u.value = (d);						\
    193   sh_u.parts.msw = (v);						\
    194   (d) = sh_u.value;						\
    195 } while (0)
    196 
    197 /* Set the less significant 32 bits of a double from an integer.  */
    198 
    199 #define SET_LOW_WORD(d,v)					\
    200 do {								\
    201   ieee_double_shape_type sl_u;					\
    202   sl_u.value = (d);						\
    203   sl_u.parts.lsw = (v);						\
    204   (d) = sl_u.value;						\
    205 } while (0)
    206 
    207 /*
    208  * A union which permits us to convert between a float and a 32-bit
    209  * integer.
    210  */
    211 
    212 typedef union
    213 {
    214   float value;
    215   u_int32_t word;
    216 } ieee_float_shape_type;
    217 
    218 /* Get a 32-bit integer from a float.  */
    219 
    220 #define GET_FLOAT_WORD(i,d)					\
    221 do {								\
    222   ieee_float_shape_type gf_u;					\
    223   gf_u.value = (d);						\
    224   (i) = gf_u.word;						\
    225 } while (0)
    226 
    227 /* Set a float from a 32-bit integer.  */
    228 
    229 #define SET_FLOAT_WORD(d,i)					\
    230 do {								\
    231   ieee_float_shape_type sf_u;					\
    232   sf_u.word = (i);						\
    233   (d) = sf_u.value;						\
    234 } while (0)
    235 
    236 #define GET_EXPSIGN(u)						\
    237   (((u)->extu_sign << EXT_EXPBITS) | (u)->extu_exp)
    238 #define SET_EXPSIGN(u, x)					\
    239   (u)->extu_exp = (x),						\
    240   (u)->extu_sign = ((x) >> EXT_EXPBITS)
    241 #define GET_LDBL80_MAN(u)					\
    242   (((uint64_t)(u)->extu_frach << EXT_FRACLBITS) | (u)->extu_fracl)
    243 #define SET_LDBL80_MAN(u, v)					\
    244   ((u)->extu_fracl = (v) & ((1ULL << EXT_FRACLBITS) - 1),	\
    245   (u)->extu_frach = (v) >> EXT_FRACLBITS)
    246 
    247 /*
    248  * Get expsign as 16-bit integer ix0 and significand as 64-bit integer
    249  * ix1 from an 80-bit long double d.
    250  */
    251 
    252 #define	EXTRACT_LDBL80_WORDS(ix0,ix1,d)				\
    253 do {								\
    254   union ieee_ext_u ew_u;					\
    255   ew_u.extu_ld = (d);						\
    256   (ix0) = GET_EXPSIGN(&ew_u);					\
    257   (ix1) = GET_LDBL80_MAN(&ew_u);				\
    258 } while (0)
    259 
    260 /*
    261  * Get expsign as 16-bit integer ix0 and significand as two 64-bit
    262  * integers, ix1 high-order and ix2 low-order, from a 128-bit long
    263  * double d.
    264  */
    265 
    266 #define	EXTRACT_LDBL128_WORDS(ix0,ix1,ix2,d)			\
    267 do {								\
    268   union ieee_ext_u ew_u;					\
    269   ew_u.extu_ld = (d);						\
    270   (ix0) = GET_EXPSIGN(&ew_u);					\
    271   (ix1) = ew_u.extu_frach;					\
    272   (ix2) = ew_u.extu_fracl;					\
    273 } while (0)
    274 
    275 /* Get expsign as a 16-bit integer i from a long double d.  */
    276 
    277 #define	GET_LDBL_EXPSIGN(i,d)					\
    278 do {								\
    279   union ieee_ext_u ge_u;					\
    280   ge_u.extu_ld = (d);						\
    281   (i) = GET_EXPSIGN(&ge_u);					\
    282 } while (0)
    283 
    284 /*
    285  * Set an 80-bit long double d from a 16-bit integer expsign ix0 and a
    286  * 64-bit integer significand ix1.
    287  */
    288 
    289 #define	INSERT_LDBL80_WORDS(d,ix0,ix1)				\
    290 do {								\
    291   union ieee_ext_u iw_u;					\
    292   SET_EXPSIGN(&iw_u, ix0);					\
    293   SET_LDBL80_MAN(&iw_u, ix1);					\
    294   (d) = iw_u.extu_ld;						\
    295 } while (0)
    296 
    297 /*
    298  * Set a 128-bit long double d from a 16-bit integer expsign ix0 and
    299  * two 64-bit integers composing the significand, ix1 high-order and
    300  * ix2 low-order.
    301  */
    302 
    303 #define	INSERT_LDBL128_WORDS(d,ix0,ix1,ix2)			\
    304 do {								\
    305   union ieee_ext_u iw_u;					\
    306   SET_EXPSIGN(&iw_u, ix0);					\
    307   iw_u.extu_frach = (ix1);					\
    308   iw_u.extu_fracl = (ix2);					\
    309   (d) = iw_u.extu_ld;						\
    310 } while (0)
    311 
    312 /* Set expsign of a long double from a 16-bit integer.  */
    313 
    314 #define	SET_LDBL_EXPSIGN(d,v)					\
    315 do {								\
    316   union ieee_ext_u se_u;					\
    317   se_u.extu_ld = (d);						\
    318   SET_EXPSIGN(&se_u, v);						\
    319   (d) = se_u.extu_ld;						\
    320 } while (0)
    321 
    322 #ifdef __i386__
    323 /* Long double constants are broken on i386. */
    324 #define	LD80C(m, ex, v) {						\
    325 	.extu_fracl = (uint32_t)(__CONCAT(m, ULL)),			\
    326 	.extu_frach = __CONCAT(m, ULL) >> EXT_FRACLBITS,		\
    327 	.extu_exp = (0x3fff + (ex)),					\
    328 	.extu_sign = ((v) < 0),						\
    329 }
    330 #else
    331 /**XXX: the following comment may no longer be true:  kre 20240122 **/
    332 /* The above works on non-i386 too, but we use this to check v. */
    333 #define	LD80C(m, ex, v)	{ .extu_ld = (v), }
    334 #endif
    335 
    336 /*
    337  * Attempt to get strict C99 semantics for assignment with non-C99 compilers.
    338  */
    339 #if FLT_EVAL_METHOD == 0 || __GNUC__ == 0
    340 #define	STRICT_ASSIGN(type, lval, rval)	((lval) = (rval))
    341 #else
    342 #define	STRICT_ASSIGN(type, lval, rval) do {	\
    343 	volatile type __lval;			\
    344 						\
    345 	if (sizeof(type) >= sizeof(long double))	\
    346 		(lval) = (rval);		\
    347 	else {					\
    348 		__lval = (rval);		\
    349 		(lval) = __lval;		\
    350 	}					\
    351 } while (0)
    352 #endif
    353 
    354 /* Support switching the mode to FP_PE if necessary. */
    355 #if defined(__i386__) && !defined(NO_FPSETPREC)
    356 
    357 #include <ieeefp.h>
    358 
    359 #define	ENTERI() ENTERIT(long double)
    360 #define	ENTERIT(returntype)			\
    361 	returntype __retval;			\
    362 	fp_prec_t __oprec;			\
    363 						\
    364 	if ((__oprec = fpgetprec()) != FP_PE)	\
    365 		fpsetprec(FP_PE)
    366 #define	RETURNI(x) do {				\
    367 	__retval = (x);				\
    368 	if (__oprec != FP_PE)			\
    369 		fpsetprec(__oprec);		\
    370 	RETURNF(__retval);			\
    371 } while (0)
    372 #define	ENTERV()				\
    373 	fp_prec_t __oprec;			\
    374 						\
    375 	if ((__oprec = fpgetprec()) != FP_PE)	\
    376 		fpsetprec(FP_PE)
    377 #define	RETURNV() do {				\
    378 	if (__oprec != FP_PE)			\
    379 		fpsetprec(__oprec);		\
    380 	return;			\
    381 } while (0)
    382 #else
    383 #define	ENTERI()
    384 #define	ENTERIT(x)
    385 #define	RETURNI(x)	RETURNF(x)
    386 #define	ENTERV()
    387 #define	RETURNV()	return
    388 #endif
    389 
    390 /* Default return statement if hack*_t() is not used. */
    391 #define      RETURNF(v)      return (v)
    392 
    393 /*
    394  * 2sum gives the same result as 2sumF without requiring |a| >= |b| or
    395  * a == 0, but is slower.
    396  */
    397 #define	_2sum(a, b) do {	\
    398 	__typeof(a) __s, __w;	\
    399 				\
    400 	__w = (a) + (b);	\
    401 	__s = __w - (a);	\
    402 	(b) = ((a) - (__w - __s)) + ((b) - __s); \
    403 	(a) = __w;		\
    404 } while (0)
    405 
    406 /*
    407  * 2sumF algorithm.
    408  *
    409  * "Normalize" the terms in the infinite-precision expression a + b for
    410  * the sum of 2 floating point values so that b is as small as possible
    411  * relative to 'a'.  (The resulting 'a' is the value of the expression in
    412  * the same precision as 'a' and the resulting b is the rounding error.)
    413  * |a| must be >= |b| or 0, b's type must be no larger than 'a's type, and
    414  * exponent overflow or underflow must not occur.  This uses a Theorem of
    415  * Dekker (1971).  See Knuth (1981) 4.2.2 Theorem C.  The name "TwoSum"
    416  * is apparently due to Skewchuk (1997).
    417  *
    418  * For this to always work, assignment of a + b to 'a' must not retain any
    419  * extra precision in a + b.  This is required by C standards but broken
    420  * in many compilers.  The brokenness cannot be worked around using
    421  * STRICT_ASSIGN() like we do elsewhere, since the efficiency of this
    422  * algorithm would be destroyed by non-null strict assignments.  (The
    423  * compilers are correct to be broken -- the efficiency of all floating
    424  * point code calculations would be destroyed similarly if they forced the
    425  * conversions.)
    426  *
    427  * Fortunately, a case that works well can usually be arranged by building
    428  * any extra precision into the type of 'a' -- 'a' should have type float_t,
    429  * double_t or long double.  b's type should be no larger than 'a's type.
    430  * Callers should use these types with scopes as large as possible, to
    431  * reduce their own extra-precision and efficiciency problems.  In
    432  * particular, they shouldn't convert back and forth just to call here.
    433  */
    434 #ifdef DEBUG
    435 #define	_2sumF(a, b) do {				\
    436 	__typeof(a) __w;				\
    437 	volatile __typeof(a) __ia, __ib, __r, __vw;	\
    438 							\
    439 	__ia = (a);					\
    440 	__ib = (b);					\
    441 	assert(__ia == 0 || fabsl(__ia) >= fabsl(__ib));	\
    442 							\
    443 	__w = (a) + (b);				\
    444 	(b) = ((a) - __w) + (b);			\
    445 	(a) = __w;					\
    446 							\
    447 	/* The next 2 assertions are weak if (a) is already long double. */ \
    448 	assert((long double)__ia + __ib == (long double)(a) + (b));	\
    449 	__vw = __ia + __ib;				\
    450 	__r = __ia - __vw;				\
    451 	__r += __ib;					\
    452 	assert(__vw == (a) && __r == (b));		\
    453 } while (0)
    454 #else /* !DEBUG */
    455 #define	_2sumF(a, b) do {	\
    456 	__typeof(a) __w;	\
    457 				\
    458 	__w = (a) + (b);	\
    459 	(b) = ((a) - __w) + (b); \
    460 	(a) = __w;		\
    461 } while (0)
    462 #endif /* DEBUG */
    463 
    464 /*
    465  * Set x += c, where x is represented in extra precision as a + b.
    466  * x must be sufficiently normalized and sufficiently larger than c,
    467  * and the result is then sufficiently normalized.
    468  *
    469  * The details of ordering are that |a| must be >= |c| (so that (a, c)
    470  * can be normalized without extra work to swap 'a' with c).  The details of
    471  * the normalization are that b must be small relative to the normalized 'a'.
    472  * Normalization of (a, c) makes the normalized c tiny relative to the
    473  * normalized a, so b remains small relative to 'a' in the result.  However,
    474  * b need not ever be tiny relative to 'a'.  For example, b might be about
    475  * 2**20 times smaller than 'a' to give about 20 extra bits of precision.
    476  * That is usually enough, and adding c (which by normalization is about
    477  * 2**53 times smaller than a) cannot change b significantly.  However,
    478  * cancellation of 'a' with c in normalization of (a, c) may reduce 'a'
    479  * significantly relative to b.  The caller must ensure that significant
    480  * cancellation doesn't occur, either by having c of the same sign as 'a',
    481  * or by having |c| a few percent smaller than |a|.  Pre-normalization of
    482  * (a, b) may help.
    483  *
    484  * This is a variant of an algorithm of Kahan (see Knuth (1981) 4.2.2
    485  * exercise 19).  We gain considerable efficiency by requiring the terms to
    486  * be sufficiently normalized and sufficiently increasing.
    487  */
    488 #define	_3sumF(a, b, c) do {	\
    489 	__typeof(a) __tmp;	\
    490 				\
    491 	__tmp = (c);		\
    492 	_2sumF(__tmp, (a));	\
    493 	(b) += (a);		\
    494 	(a) = __tmp;		\
    495 } while (0)
    496 
    497 /*
    498  * Common routine to process the arguments to nan(), nanf(), and nanl().
    499  */
    500 void _scan_nan(uint32_t *__words, int __num_words, const char *__s);
    501 
    502 /*
    503  * Mix 0, 1 or 2 NaNs.  First add 0 to each arg.  This normally just turns
    504  * signaling NaNs into quiet NaNs by setting a quiet bit.  We do this
    505  * because we want to never return a signaling NaN, and also because we
    506  * don't want the quiet bit to affect the result.  Then mix the converted
    507  * args using the specified operation.
    508  *
    509  * When one arg is NaN, the result is typically that arg quieted.  When both
    510  * args are NaNs, the result is typically the quietening of the arg whose
    511  * significand is largest after quietening.  When neither arg is NaN, the
    512  * result may be NaN because it is indeterminate, or finite for subsequent
    513  * construction of a NaN as the indeterminate 0.0L/0.0L.
    514  *
    515  * Technical complications: the result in bits after rounding to the final
    516  * precision might depend on the runtime precision and/or on compiler
    517  * optimizations, especially when different register sets are used for
    518  * different precisions.  Try to make the result not depend on at least the
    519  * runtime precision by always doing the main mixing step in long double
    520  * precision.  Try to reduce dependencies on optimizations by adding the
    521  * the 0's in different precisions (unless everything is in long double
    522  * precision).
    523  */
    524 #define	nan_mix(x, y)		(nan_mix_op((x), (y), +))
    525 #define	nan_mix_op(x, y, op)	(((x) + 0.0L) op ((y) + 0))
    526 
    527 #ifdef	_COMPLEX_H
    528 
    529 /*
    530  * Quoting from ISO/IEC 9899:TC2:
    531  *
    532  * 6.2.5.13 Types
    533  * Each complex type has the same representation and alignment requirements as
    534  * an array type containing exactly two elements of the corresponding real type;
    535  * the first element is equal to the real part, and the second element to the
    536  * imaginary part, of the complex number.
    537  */
    538 typedef union {
    539 	float complex z;
    540 	float parts[2];
    541 } float_complex;
    542 
    543 typedef union {
    544 	double complex z;
    545 	double parts[2];
    546 } double_complex;
    547 
    548 typedef union {
    549 	long double complex z;
    550 	long double parts[2];
    551 } long_double_complex;
    552 
    553 #define	REAL_PART(z)	((z).parts[0])
    554 #define	IMAG_PART(z)	((z).parts[1])
    555 
    556 /*
    557  * Inline functions that can be used to construct complex values.
    558  *
    559  * The C99 standard intends x+I*y to be used for this, but x+I*y is
    560  * currently unusable in general since gcc introduces many overflow,
    561  * underflow, sign and efficiency bugs by rewriting I*y as
    562  * (0.0+I)*(y+0.0*I) and laboriously computing the full complex product.
    563  * In particular, I*Inf is corrupted to NaN+I*Inf, and I*-0 is corrupted
    564  * to -0.0+I*0.0.
    565  *
    566  * The C11 standard introduced the macros CMPLX(), CMPLXF() and CMPLXL()
    567  * to construct complex values.  Compilers that conform to the C99
    568  * standard require the following functions to avoid the above issues.
    569  */
    570 
    571 #ifndef CMPLXF
    572 static __inline float complex
    573 CMPLXF(float x, float y)
    574 {
    575 	float_complex z;
    576 
    577 	REAL_PART(z) = x;
    578 	IMAG_PART(z) = y;
    579 	return (z.z);
    580 }
    581 #endif
    582 
    583 #ifndef CMPLX
    584 static __inline double complex
    585 CMPLX(double x, double y)
    586 {
    587 	double_complex z;
    588 
    589 	REAL_PART(z) = x;
    590 	IMAG_PART(z) = y;
    591 	return (z.z);
    592 }
    593 #endif
    594 
    595 #ifndef CMPLXL
    596 static __inline long double complex
    597 CMPLXL(long double x, long double y)
    598 {
    599 	long_double_complex z;
    600 
    601 	REAL_PART(z) = x;
    602 	IMAG_PART(z) = y;
    603 	return (z.z);
    604 }
    605 #endif
    606 
    607 #endif	/* _COMPLEX_H */
    608 
    609 /* ieee style elementary functions */
    610 extern double __ieee754_sqrt __P((double));
    611 extern double __ieee754_acos __P((double));
    612 extern double __ieee754_acosh __P((double));
    613 extern double __ieee754_log __P((double));
    614 extern double __ieee754_atanh __P((double));
    615 extern double __ieee754_asin __P((double));
    616 extern double __ieee754_atan2 __P((double,double));
    617 extern double __ieee754_exp __P((double));
    618 extern double __ieee754_cosh __P((double));
    619 extern double __ieee754_fmod __P((double,double));
    620 extern double __ieee754_pow __P((double,double));
    621 extern double __ieee754_lgamma_r __P((double,int *));
    622 extern double __ieee754_gamma_r __P((double,int *));
    623 extern double __ieee754_lgamma __P((double));
    624 extern double __ieee754_gamma __P((double));
    625 extern double __ieee754_log10 __P((double));
    626 extern double __ieee754_log2 __P((double));
    627 extern double __ieee754_sinh __P((double));
    628 extern double __ieee754_hypot __P((double,double));
    629 extern double __ieee754_j0 __P((double));
    630 extern double __ieee754_j1 __P((double));
    631 extern double __ieee754_y0 __P((double));
    632 extern double __ieee754_y1 __P((double));
    633 extern double __ieee754_jn __P((int,double));
    634 extern double __ieee754_yn __P((int,double));
    635 extern double __ieee754_remainder __P((double,double));
    636 extern int32_t __ieee754_rem_pio2 __P((double,double*));
    637 extern double __ieee754_scalb __P((double,double));
    638 
    639 /* fdlibm kernel function */
    640 extern double __kernel_standard __P((double,double,int));
    641 extern double __kernel_sin __P((double,double,int));
    642 extern double __kernel_cos __P((double,double));
    643 extern double __kernel_tan __P((double,double,int));
    644 extern int    __kernel_rem_pio2 __P((double*,double*,int,int,int));
    645 
    646 
    647 /* ieee style elementary float functions */
    648 extern float __ieee754_sqrtf __P((float));
    649 extern float __ieee754_acosf __P((float));
    650 extern float __ieee754_acoshf __P((float));
    651 extern float __ieee754_logf __P((float));
    652 extern float __ieee754_atanhf __P((float));
    653 extern float __ieee754_asinf __P((float));
    654 extern float __ieee754_atan2f __P((float,float));
    655 extern float __ieee754_expf __P((float));
    656 extern float __ieee754_coshf __P((float));
    657 extern float __ieee754_fmodf __P((float,float));
    658 extern float __ieee754_powf __P((float,float));
    659 extern float __ieee754_lgammaf_r __P((float,int *));
    660 extern float __ieee754_gammaf_r __P((float,int *));
    661 extern float __ieee754_lgammaf __P((float));
    662 extern float __ieee754_gammaf __P((float));
    663 extern float __ieee754_log10f __P((float));
    664 extern float __ieee754_log2f __P((float));
    665 extern float __ieee754_sinhf __P((float));
    666 extern float __ieee754_hypotf __P((float,float));
    667 extern float __ieee754_j0f __P((float));
    668 extern float __ieee754_j1f __P((float));
    669 extern float __ieee754_y0f __P((float));
    670 extern float __ieee754_y1f __P((float));
    671 extern float __ieee754_jnf __P((int,float));
    672 extern float __ieee754_ynf __P((int,float));
    673 extern float __ieee754_remainderf __P((float,float));
    674 extern int32_t __ieee754_rem_pio2f __P((float,float*));
    675 extern float __ieee754_scalbf __P((float,float));
    676 
    677 /* float versions of fdlibm kernel functions */
    678 extern float __kernel_sinf __P((float,float,int));
    679 extern float __kernel_cosf __P((float,float));
    680 extern float __kernel_tanf __P((float,float,int));
    681 extern int   __kernel_rem_pio2f __P((float*,float*,int,int,int,const int32_t*));
    682 
    683 /* ieee style elementary long double functions */
    684 extern long double __ieee754_fmodl(long double, long double);
    685 extern long double __ieee754_sqrtl(long double);
    686 
    687 /*
    688  * TRUNC() is a macro that sets the trailing 27 bits in the significand of an
    689  * IEEE double variable to zero.  It must be expression-like for syntactic
    690  * reasons, and we implement this expression using an inline function
    691  * instead of a pure macro to avoid depending on the gcc feature of
    692  * statement-expressions.
    693  */
    694 #define	TRUNC(d)	(_b_trunc(&(d)))
    695 
    696 static __inline void
    697 _b_trunc(volatile double *_dp)
    698 {
    699 	uint32_t _lw;
    700 
    701 	GET_LOW_WORD(_lw, *_dp);
    702 	SET_LOW_WORD(*_dp, _lw & 0xf8000000);
    703 }
    704 
    705 struct Double {
    706 	double	a;
    707 	double	b;
    708 };
    709 
    710 /*
    711  * Functions internal to the math package, yet not static.
    712  */
    713 double	__exp__D(double, double);
    714 struct Double __log__D(double);
    715 
    716 /*
    717  * The rnint() family rounds to the nearest integer for a restricted range
    718  * range of args (up to about 2**MANT_DIG).  We assume that the current
    719  * rounding mode is FE_TONEAREST so that this can be done efficiently.
    720  * Extra precision causes more problems in practice, and we only centralize
    721  * this here to reduce those problems, and have not solved the efficiency
    722  * problems.  The exp2() family uses a more delicate version of this that
    723  * requires extracting bits from the intermediate value, so it is not
    724  * centralized here and should copy any solution of the efficiency problems.
    725  */
    726 
    727 static inline double
    728 rnint(double x)
    729 {
    730 	/*
    731 	 * This casts to double to kill any extra precision.  This depends
    732 	 * on the cast being applied to a double_t to avoid compiler bugs
    733 	 * (this is a cleaner version of STRICT_ASSIGN()).  This is
    734 	 * inefficient if there actually is extra precision, but is hard
    735 	 * to improve on.  We use double_t in the API to minimise conversions
    736 	 * for just calling here.  Note that we cannot easily change the
    737 	 * magic number to the one that works directly with double_t, since
    738 	 * the rounding precision is variable at runtime on x86 so the
    739 	 * magic number would need to be variable.  Assuming that the
    740 	 * rounding precision is always the default is too fragile.  This
    741 	 * and many other complications will move when the default is
    742 	 * changed to FP_PE.
    743 	 */
    744 	return ((double)(x + 0x1.8p52) - 0x1.8p52);
    745 }
    746 
    747 static inline float
    748 rnintf(float x)
    749 {
    750 	/*
    751 	 * As for rnint(), except we could just call that to handle the
    752 	 * extra precision case, usually without losing efficiency.
    753 	 */
    754 	return ((float)(x + 0x1.8p23F) - 0x1.8p23F);
    755 }
    756 
    757 #ifdef LDBL_MANT_DIG
    758 /*
    759  * The complications for extra precision are smaller for rnintl() since it
    760  * can safely assume that the rounding precision has been increased from
    761  * its default to FP_PE on x86.  We don't exploit that here to get small
    762  * optimizations from limiting the range to double.  We just need it for
    763  * the magic number to work with long doubles.  ld128 callers should use
    764  * rnint() instead of this if possible.  ld80 callers should prefer
    765  * rnintl() since for amd64 this avoids swapping the register set, while
    766  * for i386 it makes no difference (assuming FP_PE), and for other arches
    767  * it makes little difference.
    768  */
    769 static inline long double
    770 rnintl(long double x)
    771 {
    772 	return (x + ___CONCAT(0x1.8p,LDBL_MANT_DIG) / 2 -
    773 	    ___CONCAT(0x1.8p,LDBL_MANT_DIG) / 2);
    774 }
    775 #endif /* LDBL_MANT_DIG */
    776 
    777 /*
    778  * irint() and i64rint() give the same result as casting to their integer
    779  * return type provided their arg is a floating point integer.  They can
    780  * sometimes be more efficient because no rounding is required.
    781  */
    782 #if (defined(amd64) || defined(__i386__)) && defined(__GNUCLIKE_ASM)
    783 #define	irint(x)						\
    784     (sizeof(x) == sizeof(float) &&				\
    785     sizeof(__float_t) == sizeof(long double) ? irintf(x) :	\
    786     sizeof(x) == sizeof(double) &&				\
    787     sizeof(__double_t) == sizeof(long double) ? irintd(x) :	\
    788     sizeof(x) == sizeof(long double) ? irintl(x) : (int)(x))
    789 #else
    790 #define	irint(x)	((int)(x))
    791 #endif
    792 
    793 #define	i64rint(x)	((int64_t)(x))	/* only needed for ld128 so not opt. */
    794 
    795 #if defined(__i386__) && defined(__GNUCLIKE_ASM)
    796 static __inline int
    797 irintf(float x)
    798 {
    799 	int n;
    800 
    801 	__asm("fistl %0" : "=m" (n) : "t" (x));
    802 	return (n);
    803 }
    804 
    805 static __inline int
    806 irintd(double x)
    807 {
    808 	int n;
    809 
    810 	__asm("fistl %0" : "=m" (n) : "t" (x));
    811 	return (n);
    812 }
    813 #endif
    814 
    815 #if (defined(__amd64__) || defined(__i386__)) && defined(__GNUCLIKE_ASM)
    816 static __inline int
    817 irintl(long double x)
    818 {
    819 	int n;
    820 
    821 	__asm("fistl %0" : "=m" (n) : "t" (x));
    822 	return (n);
    823 }
    824 #endif
    825 
    826 /*
    827  * The following are fast floor macros for 0 <= |x| < 0x1p(N-1), where
    828  * N is the precision of the type of x. These macros are used in the
    829  * half-cycle trignometric functions (e.g., sinpi(x)).
    830  */
    831 #define	FFLOORF(x, j0, ix) do {			\
    832 	(j0) = (((ix) >> 23) & 0xff) - 0x7f;	\
    833 	(ix) &= ~(0x007fffff >> (j0));		\
    834 	SET_FLOAT_WORD((x), (ix));		\
    835 } while (0)
    836 
    837 #define	FFLOOR(x, j0, ix, lx) do {				\
    838 	(j0) = (((ix) >> 20) & 0x7ff) - 0x3ff;			\
    839 	if ((j0) < 20) {					\
    840 		(ix) &= ~(0x000fffff >> (j0));			\
    841 		(lx) = 0;					\
    842 	} else {						\
    843 		(lx) &= ~((uint32_t)0xffffffff >> ((j0) - 20));	\
    844 	}							\
    845 	INSERT_WORDS((x), (ix), (lx));				\
    846 } while (0)
    847 
    848 #define	FFLOORL80(x, j0, ix, lx) do {			\
    849 	j0 = ix - 0x3fff + 1;				\
    850 	if ((j0) < 32) {				\
    851 		(lx) = ((lx) >> 32) << 32;		\
    852 		(lx) &= ~((((lx) << 32)-1) >> (j0));	\
    853 	} else {					\
    854 		uint64_t _m;				\
    855 		_m = (uint64_t)-1 >> (j0);		\
    856 		if ((lx) & _m) (lx) &= ~_m;		\
    857 	}						\
    858 	INSERT_LDBL80_WORDS((x), (ix), (lx));		\
    859 } while (0)
    860 
    861 #define FFLOORL128(x, ai, ar) do {			\
    862 	union ieee_ext_u u;				\
    863 	uint64_t m;					\
    864 	int e;						\
    865 	u.extu_ld = (x);					\
    866 	e = u.extu_exp - 16383;				\
    867 	if (e < 48) {					\
    868 		m = ((1llu << 49) - 1) >> (e + 1);	\
    869 		u.extu_frach &= ~m;			\
    870 		u.extu_fracl = 0;			\
    871 	} else {					\
    872 		m = (uint64_t)-1 >> (e - 48);		\
    873 		u.extu_fracl &= ~m;			\
    874 	}						\
    875 	(ai) = u.extu_ld;					\
    876 	(ar) = (x) - (ai);				\
    877 } while (0)
    878 
    879 #ifdef DEBUG
    880 #if defined(__amd64__) || defined(__i386__)
    881 #define breakpoint()    asm("int $3")
    882 #else
    883 #include <signal.h>
    884 
    885 #define breakpoint()    raise(SIGTRAP)
    886 #endif
    887 #endif
    888 
    889 #ifdef STRUCT_RETURN
    890 #define	RETURNSP(rp) do {		\
    891 	if (!(rp)->lo_set)		\
    892 		RETURNF((rp)->hi);	\
    893 	RETURNF((rp)->hi + (rp)->lo);	\
    894 } while (0)
    895 #define	RETURNSPI(rp) do {		\
    896 	if (!(rp)->lo_set)		\
    897 		RETURNI((rp)->hi);	\
    898 	RETURNI((rp)->hi + (rp)->lo);	\
    899 } while (0)
    900 #endif
    901 
    902 #define	SUM2P(x, y) ({			\
    903 	const __typeof (x) __x = (x);	\
    904 	const __typeof (y) __y = (y);	\
    905 	__x + __y;			\
    906 })
    907 
    908 #ifndef INLINE_KERNEL_SINDF
    909 float   __kernel_sindf(double);
    910 #endif
    911 #ifndef INLINE_KERNEL_COSDF
    912 float   __kernel_cosdf(double);
    913 #endif
    914 #ifndef INLINE_KERNEL_TANDF
    915 float   __kernel_tandf(double,int);
    916 #endif
    917 
    918 /* long double precision kernel functions */
    919 long double __kernel_sinl(long double, long double, int);
    920 long double __kernel_cosl(long double, long double);
    921 long double __kernel_tanl(long double, long double, int);
    922 
    923 #endif /* _MATH_PRIVATE_H_ */
    924