101e04c3fSmrg/************************************************************************** 201e04c3fSmrg * 301e04c3fSmrg * Copyright 2008 VMware, Inc. 401e04c3fSmrg * All Rights Reserved. 501e04c3fSmrg * 601e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a 701e04c3fSmrg * copy of this software and associated documentation files (the 801e04c3fSmrg * "Software"), to deal in the Software without restriction, including 901e04c3fSmrg * without limitation the rights to use, copy, modify, merge, publish, 1001e04c3fSmrg * distribute, sub license, and/or sell copies of the Software, and to 1101e04c3fSmrg * permit persons to whom the Software is furnished to do so, subject to 1201e04c3fSmrg * the following conditions: 1301e04c3fSmrg * 1401e04c3fSmrg * The above copyright notice and this permission notice (including the 1501e04c3fSmrg * next paragraph) shall be included in all copies or substantial portions 1601e04c3fSmrg * of the Software. 1701e04c3fSmrg * 1801e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 1901e04c3fSmrg * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 2001e04c3fSmrg * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 2101e04c3fSmrg * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 2201e04c3fSmrg * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 2301e04c3fSmrg * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 2401e04c3fSmrg * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 2501e04c3fSmrg * 2601e04c3fSmrg **************************************************************************/ 2701e04c3fSmrg 2801e04c3fSmrg 2901e04c3fSmrg/** 3001e04c3fSmrg * Math utilities and approximations for common math functions. 3101e04c3fSmrg * Reduced precision is usually acceptable in shaders... 3201e04c3fSmrg * 3301e04c3fSmrg * "fast" is used in the names of functions which are low-precision, 3401e04c3fSmrg * or at least lower-precision than the normal C lib functions. 3501e04c3fSmrg */ 3601e04c3fSmrg 3701e04c3fSmrg 3801e04c3fSmrg#ifndef U_MATH_H 3901e04c3fSmrg#define U_MATH_H 4001e04c3fSmrg 4101e04c3fSmrg 4201e04c3fSmrg#include "c99_math.h" 4301e04c3fSmrg#include <assert.h> 4401e04c3fSmrg#include <float.h> 4501e04c3fSmrg#include <stdarg.h> 4601e04c3fSmrg 4701e04c3fSmrg#include "bitscan.h" 481463c08dSmrg#include "u_endian.h" /* for UTIL_ARCH_BIG_ENDIAN */ 4901e04c3fSmrg 5001e04c3fSmrg#ifdef __cplusplus 5101e04c3fSmrgextern "C" { 5201e04c3fSmrg#endif 5301e04c3fSmrg 5401e04c3fSmrg 5501e04c3fSmrg#ifndef M_SQRT2 5601e04c3fSmrg#define M_SQRT2 1.41421356237309504880 5701e04c3fSmrg#endif 5801e04c3fSmrg 5901e04c3fSmrg 6001e04c3fSmrg/** 6101e04c3fSmrg * Initialize math module. This should be called before using any 6201e04c3fSmrg * other functions in this module. 6301e04c3fSmrg */ 6401e04c3fSmrgextern void 6501e04c3fSmrgutil_init_math(void); 6601e04c3fSmrg 6701e04c3fSmrg 6801e04c3fSmrgunion fi { 6901e04c3fSmrg float f; 7001e04c3fSmrg int32_t i; 7101e04c3fSmrg uint32_t ui; 7201e04c3fSmrg}; 7301e04c3fSmrg 7401e04c3fSmrg 7501e04c3fSmrgunion di { 7601e04c3fSmrg double d; 7701e04c3fSmrg int64_t i; 7801e04c3fSmrg uint64_t ui; 7901e04c3fSmrg}; 8001e04c3fSmrg 8101e04c3fSmrg 8201e04c3fSmrg/** 8301e04c3fSmrg * Extract the IEEE float32 exponent. 8401e04c3fSmrg */ 8501e04c3fSmrgstatic inline signed 8601e04c3fSmrgutil_get_float32_exponent(float x) 8701e04c3fSmrg{ 8801e04c3fSmrg union fi f; 8901e04c3fSmrg 9001e04c3fSmrg f.f = x; 9101e04c3fSmrg 9201e04c3fSmrg return ((f.ui >> 23) & 0xff) - 127; 9301e04c3fSmrg} 9401e04c3fSmrg 9501e04c3fSmrg 961463c08dSmrg#define LOG2_TABLE_SIZE_LOG2 8 9701e04c3fSmrg#define LOG2_TABLE_SCALE (1 << LOG2_TABLE_SIZE_LOG2) 9801e04c3fSmrg#define LOG2_TABLE_SIZE (LOG2_TABLE_SCALE + 1) 9901e04c3fSmrgextern float log2_table[LOG2_TABLE_SIZE]; 10001e04c3fSmrg 10101e04c3fSmrg 10201e04c3fSmrg/** 10301e04c3fSmrg * Fast approximation to log2(x). 10401e04c3fSmrg */ 10501e04c3fSmrgstatic inline float 10601e04c3fSmrgutil_fast_log2(float x) 10701e04c3fSmrg{ 10801e04c3fSmrg union fi num; 10901e04c3fSmrg float epart, mpart; 11001e04c3fSmrg num.f = x; 11101e04c3fSmrg epart = (float)(((num.i & 0x7f800000) >> 23) - 127); 11201e04c3fSmrg /* mpart = log2_table[mantissa*LOG2_TABLE_SCALE + 0.5] */ 11301e04c3fSmrg mpart = log2_table[((num.i & 0x007fffff) + (1 << (22 - LOG2_TABLE_SIZE_LOG2))) >> (23 - LOG2_TABLE_SIZE_LOG2)]; 11401e04c3fSmrg return epart + mpart; 11501e04c3fSmrg} 11601e04c3fSmrg 11701e04c3fSmrg 11801e04c3fSmrg/** 11901e04c3fSmrg * Floor(x), returned as int. 12001e04c3fSmrg */ 12101e04c3fSmrgstatic inline int 12201e04c3fSmrgutil_ifloor(float f) 12301e04c3fSmrg{ 1241463c08dSmrg#if defined(USE_X86_ASM) && defined(__GNUC__) && defined(__i386__) 1251463c08dSmrg /* 1261463c08dSmrg * IEEE floor for computers that round to nearest or even. 1271463c08dSmrg * 'f' must be between -4194304 and 4194303. 1281463c08dSmrg * This floor operation is done by "(iround(f + .5) + iround(f - .5)) >> 1", 1291463c08dSmrg * but uses some IEEE specific tricks for better speed. 1301463c08dSmrg * Contributed by Josh Vanderhoof 1311463c08dSmrg */ 1321463c08dSmrg int ai, bi; 1331463c08dSmrg double af, bf; 1341463c08dSmrg af = (3 << 22) + 0.5 + (double)f; 1351463c08dSmrg bf = (3 << 22) + 0.5 - (double)f; 1361463c08dSmrg /* GCC generates an extra fstp/fld without this. */ 1371463c08dSmrg __asm__ ("fstps %0" : "=m" (ai) : "t" (af) : "st"); 1381463c08dSmrg __asm__ ("fstps %0" : "=m" (bi) : "t" (bf) : "st"); 1391463c08dSmrg return (ai - bi) >> 1; 1401463c08dSmrg#else 14101e04c3fSmrg int ai, bi; 14201e04c3fSmrg double af, bf; 14301e04c3fSmrg union fi u; 14401e04c3fSmrg af = (3 << 22) + 0.5 + (double) f; 14501e04c3fSmrg bf = (3 << 22) + 0.5 - (double) f; 14601e04c3fSmrg u.f = (float) af; ai = u.i; 14701e04c3fSmrg u.f = (float) bf; bi = u.i; 14801e04c3fSmrg return (ai - bi) >> 1; 1491463c08dSmrg#endif 15001e04c3fSmrg} 15101e04c3fSmrg 15201e04c3fSmrg 15301e04c3fSmrg/** 15401e04c3fSmrg * Round float to nearest int. 15501e04c3fSmrg */ 15601e04c3fSmrgstatic inline int 15701e04c3fSmrgutil_iround(float f) 15801e04c3fSmrg{ 15901e04c3fSmrg#if defined(PIPE_CC_GCC) && defined(PIPE_ARCH_X86) 16001e04c3fSmrg int r; 16101e04c3fSmrg __asm__ ("fistpl %0" : "=m" (r) : "t" (f) : "st"); 16201e04c3fSmrg return r; 16301e04c3fSmrg#elif defined(PIPE_CC_MSVC) && defined(PIPE_ARCH_X86) 16401e04c3fSmrg int r; 16501e04c3fSmrg _asm { 16601e04c3fSmrg fld f 16701e04c3fSmrg fistp r 16801e04c3fSmrg } 16901e04c3fSmrg return r; 17001e04c3fSmrg#else 17101e04c3fSmrg if (f >= 0.0f) 17201e04c3fSmrg return (int) (f + 0.5f); 17301e04c3fSmrg else 17401e04c3fSmrg return (int) (f - 0.5f); 17501e04c3fSmrg#endif 17601e04c3fSmrg} 17701e04c3fSmrg 17801e04c3fSmrg 17901e04c3fSmrg/** 18001e04c3fSmrg * Approximate floating point comparison 18101e04c3fSmrg */ 1821463c08dSmrgstatic inline bool 18301e04c3fSmrgutil_is_approx(float a, float b, float tol) 18401e04c3fSmrg{ 18501e04c3fSmrg return fabsf(b - a) <= tol; 18601e04c3fSmrg} 18701e04c3fSmrg 18801e04c3fSmrg 18901e04c3fSmrg/** 19001e04c3fSmrg * util_is_X_inf_or_nan = test if x is NaN or +/- Inf 19101e04c3fSmrg * util_is_X_nan = test if x is NaN 19201e04c3fSmrg * util_X_inf_sign = return +1 for +Inf, -1 for -Inf, or 0 for not Inf 19301e04c3fSmrg * 19401e04c3fSmrg * NaN can be checked with x != x, however this fails with the fast math flag 19501e04c3fSmrg **/ 19601e04c3fSmrg 19701e04c3fSmrg 19801e04c3fSmrg/** 19901e04c3fSmrg * Single-float 20001e04c3fSmrg */ 2011463c08dSmrgstatic inline bool 20201e04c3fSmrgutil_is_inf_or_nan(float x) 20301e04c3fSmrg{ 20401e04c3fSmrg union fi tmp; 20501e04c3fSmrg tmp.f = x; 20601e04c3fSmrg return (tmp.ui & 0x7f800000) == 0x7f800000; 20701e04c3fSmrg} 20801e04c3fSmrg 20901e04c3fSmrg 2101463c08dSmrgstatic inline bool 21101e04c3fSmrgutil_is_nan(float x) 21201e04c3fSmrg{ 21301e04c3fSmrg union fi tmp; 21401e04c3fSmrg tmp.f = x; 21501e04c3fSmrg return (tmp.ui & 0x7fffffff) > 0x7f800000; 21601e04c3fSmrg} 21701e04c3fSmrg 21801e04c3fSmrg 21901e04c3fSmrgstatic inline int 22001e04c3fSmrgutil_inf_sign(float x) 22101e04c3fSmrg{ 22201e04c3fSmrg union fi tmp; 22301e04c3fSmrg tmp.f = x; 22401e04c3fSmrg if ((tmp.ui & 0x7fffffff) != 0x7f800000) { 22501e04c3fSmrg return 0; 22601e04c3fSmrg } 22701e04c3fSmrg 22801e04c3fSmrg return (x < 0) ? -1 : 1; 22901e04c3fSmrg} 23001e04c3fSmrg 23101e04c3fSmrg 23201e04c3fSmrg/** 23301e04c3fSmrg * Double-float 23401e04c3fSmrg */ 2351463c08dSmrgstatic inline bool 23601e04c3fSmrgutil_is_double_inf_or_nan(double x) 23701e04c3fSmrg{ 23801e04c3fSmrg union di tmp; 23901e04c3fSmrg tmp.d = x; 24001e04c3fSmrg return (tmp.ui & 0x7ff0000000000000ULL) == 0x7ff0000000000000ULL; 24101e04c3fSmrg} 24201e04c3fSmrg 24301e04c3fSmrg 2441463c08dSmrgstatic inline bool 24501e04c3fSmrgutil_is_double_nan(double x) 24601e04c3fSmrg{ 24701e04c3fSmrg union di tmp; 24801e04c3fSmrg tmp.d = x; 24901e04c3fSmrg return (tmp.ui & 0x7fffffffffffffffULL) > 0x7ff0000000000000ULL; 25001e04c3fSmrg} 25101e04c3fSmrg 25201e04c3fSmrg 25301e04c3fSmrgstatic inline int 25401e04c3fSmrgutil_double_inf_sign(double x) 25501e04c3fSmrg{ 25601e04c3fSmrg union di tmp; 25701e04c3fSmrg tmp.d = x; 25801e04c3fSmrg if ((tmp.ui & 0x7fffffffffffffffULL) != 0x7ff0000000000000ULL) { 25901e04c3fSmrg return 0; 26001e04c3fSmrg } 26101e04c3fSmrg 26201e04c3fSmrg return (x < 0) ? -1 : 1; 26301e04c3fSmrg} 26401e04c3fSmrg 26501e04c3fSmrg 26601e04c3fSmrg/** 26701e04c3fSmrg * Half-float 26801e04c3fSmrg */ 2691463c08dSmrgstatic inline bool 27001e04c3fSmrgutil_is_half_inf_or_nan(int16_t x) 27101e04c3fSmrg{ 27201e04c3fSmrg return (x & 0x7c00) == 0x7c00; 27301e04c3fSmrg} 27401e04c3fSmrg 27501e04c3fSmrg 2761463c08dSmrgstatic inline bool 27701e04c3fSmrgutil_is_half_nan(int16_t x) 27801e04c3fSmrg{ 27901e04c3fSmrg return (x & 0x7fff) > 0x7c00; 28001e04c3fSmrg} 28101e04c3fSmrg 28201e04c3fSmrg 28301e04c3fSmrgstatic inline int 28401e04c3fSmrgutil_half_inf_sign(int16_t x) 28501e04c3fSmrg{ 28601e04c3fSmrg if ((x & 0x7fff) != 0x7c00) { 28701e04c3fSmrg return 0; 28801e04c3fSmrg } 28901e04c3fSmrg 29001e04c3fSmrg return (x < 0) ? -1 : 1; 29101e04c3fSmrg} 29201e04c3fSmrg 29301e04c3fSmrg 29401e04c3fSmrg/** 29501e04c3fSmrg * Return float bits. 29601e04c3fSmrg */ 29701e04c3fSmrgstatic inline unsigned 29801e04c3fSmrgfui( float f ) 29901e04c3fSmrg{ 30001e04c3fSmrg union fi fi; 30101e04c3fSmrg fi.f = f; 30201e04c3fSmrg return fi.ui; 30301e04c3fSmrg} 30401e04c3fSmrg 30501e04c3fSmrgstatic inline float 30601e04c3fSmrguif(uint32_t ui) 30701e04c3fSmrg{ 30801e04c3fSmrg union fi fi; 30901e04c3fSmrg fi.ui = ui; 31001e04c3fSmrg return fi.f; 31101e04c3fSmrg} 31201e04c3fSmrg 31301e04c3fSmrg 31401e04c3fSmrg/** 3151463c08dSmrg * Convert uint8_t to float in [0, 1]. 31601e04c3fSmrg */ 31701e04c3fSmrgstatic inline float 3181463c08dSmrgubyte_to_float(uint8_t ub) 31901e04c3fSmrg{ 32001e04c3fSmrg return (float) ub * (1.0f / 255.0f); 32101e04c3fSmrg} 32201e04c3fSmrg 32301e04c3fSmrg 32401e04c3fSmrg/** 3251463c08dSmrg * Convert float in [0,1] to uint8_t in [0,255] with clamping. 32601e04c3fSmrg */ 3271463c08dSmrgstatic inline uint8_t 32801e04c3fSmrgfloat_to_ubyte(float f) 32901e04c3fSmrg{ 33001e04c3fSmrg /* return 0 for NaN too */ 33101e04c3fSmrg if (!(f > 0.0f)) { 3321463c08dSmrg return (uint8_t) 0; 33301e04c3fSmrg } 33401e04c3fSmrg else if (f >= 1.0f) { 3351463c08dSmrg return (uint8_t) 255; 33601e04c3fSmrg } 33701e04c3fSmrg else { 33801e04c3fSmrg union fi tmp; 33901e04c3fSmrg tmp.f = f; 34001e04c3fSmrg tmp.f = tmp.f * (255.0f/256.0f) + 32768.0f; 3411463c08dSmrg return (uint8_t) tmp.i; 34201e04c3fSmrg } 34301e04c3fSmrg} 34401e04c3fSmrg 345d8407755Smaya/** 3461463c08dSmrg * Convert uint16_t to float in [0, 1]. 347d8407755Smaya */ 348d8407755Smayastatic inline float 3491463c08dSmrgushort_to_float(uint16_t us) 350d8407755Smaya{ 351d8407755Smaya return (float) us * (1.0f / 65535.0f); 352d8407755Smaya} 353d8407755Smaya 354d8407755Smaya 355d8407755Smaya/** 3561463c08dSmrg * Convert float in [0,1] to uint16_t in [0,65535] with clamping. 357d8407755Smaya */ 3581463c08dSmrgstatic inline uint16_t 359d8407755Smayafloat_to_ushort(float f) 360d8407755Smaya{ 361d8407755Smaya /* return 0 for NaN too */ 362d8407755Smaya if (!(f > 0.0f)) { 3631463c08dSmrg return (uint16_t) 0; 364d8407755Smaya } 365d8407755Smaya else if (f >= 1.0f) { 3661463c08dSmrg return (uint16_t) 65535; 367d8407755Smaya } 368d8407755Smaya else { 369d8407755Smaya union fi tmp; 370d8407755Smaya tmp.f = f; 371d8407755Smaya tmp.f = tmp.f * (65535.0f/65536.0f) + 128.0f; 3721463c08dSmrg return (uint16_t) tmp.i; 373d8407755Smaya } 374d8407755Smaya} 375d8407755Smaya 37601e04c3fSmrgstatic inline float 37701e04c3fSmrgbyte_to_float_tex(int8_t b) 37801e04c3fSmrg{ 37901e04c3fSmrg return (b == -128) ? -1.0F : b * 1.0F / 127.0F; 38001e04c3fSmrg} 38101e04c3fSmrg 38201e04c3fSmrgstatic inline int8_t 38301e04c3fSmrgfloat_to_byte_tex(float f) 38401e04c3fSmrg{ 38501e04c3fSmrg return (int8_t) (127.0F * f); 38601e04c3fSmrg} 38701e04c3fSmrg 38801e04c3fSmrg/** 38901e04c3fSmrg * Calc log base 2 39001e04c3fSmrg */ 39101e04c3fSmrgstatic inline unsigned 39201e04c3fSmrgutil_logbase2(unsigned n) 39301e04c3fSmrg{ 39401e04c3fSmrg#if defined(HAVE___BUILTIN_CLZ) 39501e04c3fSmrg return ((sizeof(unsigned) * 8 - 1) - __builtin_clz(n | 1)); 39601e04c3fSmrg#else 39701e04c3fSmrg unsigned pos = 0; 39801e04c3fSmrg if (n >= 1<<16) { n >>= 16; pos += 16; } 39901e04c3fSmrg if (n >= 1<< 8) { n >>= 8; pos += 8; } 40001e04c3fSmrg if (n >= 1<< 4) { n >>= 4; pos += 4; } 40101e04c3fSmrg if (n >= 1<< 2) { n >>= 2; pos += 2; } 40201e04c3fSmrg if (n >= 1<< 1) { pos += 1; } 40301e04c3fSmrg return pos; 40401e04c3fSmrg#endif 40501e04c3fSmrg} 40601e04c3fSmrg 40701e04c3fSmrgstatic inline uint64_t 40801e04c3fSmrgutil_logbase2_64(uint64_t n) 40901e04c3fSmrg{ 41001e04c3fSmrg#if defined(HAVE___BUILTIN_CLZLL) 41101e04c3fSmrg return ((sizeof(uint64_t) * 8 - 1) - __builtin_clzll(n | 1)); 41201e04c3fSmrg#else 41301e04c3fSmrg uint64_t pos = 0ull; 41401e04c3fSmrg if (n >= 1ull<<32) { n >>= 32; pos += 32; } 41501e04c3fSmrg if (n >= 1ull<<16) { n >>= 16; pos += 16; } 41601e04c3fSmrg if (n >= 1ull<< 8) { n >>= 8; pos += 8; } 41701e04c3fSmrg if (n >= 1ull<< 4) { n >>= 4; pos += 4; } 41801e04c3fSmrg if (n >= 1ull<< 2) { n >>= 2; pos += 2; } 41901e04c3fSmrg if (n >= 1ull<< 1) { pos += 1; } 42001e04c3fSmrg return pos; 42101e04c3fSmrg#endif 42201e04c3fSmrg} 42301e04c3fSmrg 42401e04c3fSmrg/** 42501e04c3fSmrg * Returns the ceiling of log n base 2, and 0 when n == 0. Equivalently, 42601e04c3fSmrg * returns the smallest x such that n <= 2**x. 42701e04c3fSmrg */ 42801e04c3fSmrgstatic inline unsigned 42901e04c3fSmrgutil_logbase2_ceil(unsigned n) 43001e04c3fSmrg{ 43101e04c3fSmrg if (n <= 1) 43201e04c3fSmrg return 0; 43301e04c3fSmrg 43401e04c3fSmrg return 1 + util_logbase2(n - 1); 43501e04c3fSmrg} 43601e04c3fSmrg 43701e04c3fSmrgstatic inline uint64_t 43801e04c3fSmrgutil_logbase2_ceil64(uint64_t n) 43901e04c3fSmrg{ 44001e04c3fSmrg if (n <= 1) 44101e04c3fSmrg return 0; 44201e04c3fSmrg 44301e04c3fSmrg return 1ull + util_logbase2_64(n - 1); 44401e04c3fSmrg} 44501e04c3fSmrg 44601e04c3fSmrg/** 44701e04c3fSmrg * Returns the smallest power of two >= x 44801e04c3fSmrg */ 44901e04c3fSmrgstatic inline unsigned 45001e04c3fSmrgutil_next_power_of_two(unsigned x) 45101e04c3fSmrg{ 45201e04c3fSmrg#if defined(HAVE___BUILTIN_CLZ) 45301e04c3fSmrg if (x <= 1) 45401e04c3fSmrg return 1; 45501e04c3fSmrg 45601e04c3fSmrg return (1 << ((sizeof(unsigned) * 8) - __builtin_clz(x - 1))); 45701e04c3fSmrg#else 45801e04c3fSmrg unsigned val = x; 45901e04c3fSmrg 46001e04c3fSmrg if (x <= 1) 46101e04c3fSmrg return 1; 46201e04c3fSmrg 46301e04c3fSmrg if (util_is_power_of_two_or_zero(x)) 46401e04c3fSmrg return x; 46501e04c3fSmrg 46601e04c3fSmrg val--; 46701e04c3fSmrg val = (val >> 1) | val; 46801e04c3fSmrg val = (val >> 2) | val; 46901e04c3fSmrg val = (val >> 4) | val; 47001e04c3fSmrg val = (val >> 8) | val; 47101e04c3fSmrg val = (val >> 16) | val; 47201e04c3fSmrg val++; 47301e04c3fSmrg return val; 47401e04c3fSmrg#endif 47501e04c3fSmrg} 47601e04c3fSmrg 47701e04c3fSmrgstatic inline uint64_t 47801e04c3fSmrgutil_next_power_of_two64(uint64_t x) 47901e04c3fSmrg{ 48001e04c3fSmrg#if defined(HAVE___BUILTIN_CLZLL) 48101e04c3fSmrg if (x <= 1) 48201e04c3fSmrg return 1; 48301e04c3fSmrg 48401e04c3fSmrg return (1ull << ((sizeof(uint64_t) * 8) - __builtin_clzll(x - 1))); 48501e04c3fSmrg#else 48601e04c3fSmrg uint64_t val = x; 48701e04c3fSmrg 48801e04c3fSmrg if (x <= 1) 48901e04c3fSmrg return 1; 49001e04c3fSmrg 49101e04c3fSmrg if (util_is_power_of_two_or_zero64(x)) 49201e04c3fSmrg return x; 49301e04c3fSmrg 49401e04c3fSmrg val--; 49501e04c3fSmrg val = (val >> 1) | val; 49601e04c3fSmrg val = (val >> 2) | val; 49701e04c3fSmrg val = (val >> 4) | val; 49801e04c3fSmrg val = (val >> 8) | val; 49901e04c3fSmrg val = (val >> 16) | val; 50001e04c3fSmrg val = (val >> 32) | val; 50101e04c3fSmrg val++; 50201e04c3fSmrg return val; 50301e04c3fSmrg#endif 50401e04c3fSmrg} 50501e04c3fSmrg 50601e04c3fSmrg/** 50701e04c3fSmrg * Reverse bits in n 50801e04c3fSmrg * Algorithm taken from: 50901e04c3fSmrg * http://stackoverflow.com/questions/9144800/c-reverse-bits-in-unsigned-integer 51001e04c3fSmrg */ 51101e04c3fSmrgstatic inline unsigned 51201e04c3fSmrgutil_bitreverse(unsigned n) 51301e04c3fSmrg{ 51401e04c3fSmrg n = ((n >> 1) & 0x55555555u) | ((n & 0x55555555u) << 1); 51501e04c3fSmrg n = ((n >> 2) & 0x33333333u) | ((n & 0x33333333u) << 2); 51601e04c3fSmrg n = ((n >> 4) & 0x0f0f0f0fu) | ((n & 0x0f0f0f0fu) << 4); 51701e04c3fSmrg n = ((n >> 8) & 0x00ff00ffu) | ((n & 0x00ff00ffu) << 8); 51801e04c3fSmrg n = ((n >> 16) & 0xffffu) | ((n & 0xffffu) << 16); 51901e04c3fSmrg return n; 52001e04c3fSmrg} 52101e04c3fSmrg 52201e04c3fSmrg/** 52301e04c3fSmrg * Convert from little endian to CPU byte order. 52401e04c3fSmrg */ 52501e04c3fSmrg 5261463c08dSmrg#if UTIL_ARCH_BIG_ENDIAN 52701e04c3fSmrg#define util_le64_to_cpu(x) util_bswap64(x) 52801e04c3fSmrg#define util_le32_to_cpu(x) util_bswap32(x) 52901e04c3fSmrg#define util_le16_to_cpu(x) util_bswap16(x) 53001e04c3fSmrg#else 53101e04c3fSmrg#define util_le64_to_cpu(x) (x) 53201e04c3fSmrg#define util_le32_to_cpu(x) (x) 53301e04c3fSmrg#define util_le16_to_cpu(x) (x) 53401e04c3fSmrg#endif 53501e04c3fSmrg 53601e04c3fSmrg#define util_cpu_to_le64(x) util_le64_to_cpu(x) 53701e04c3fSmrg#define util_cpu_to_le32(x) util_le32_to_cpu(x) 53801e04c3fSmrg#define util_cpu_to_le16(x) util_le16_to_cpu(x) 53901e04c3fSmrg 54001e04c3fSmrg/** 54101e04c3fSmrg * Reverse byte order of a 32 bit word. 54201e04c3fSmrg */ 54301e04c3fSmrgstatic inline uint32_t 54401e04c3fSmrgutil_bswap32(uint32_t n) 54501e04c3fSmrg{ 54601e04c3fSmrg#if defined(HAVE___BUILTIN_BSWAP32) 54701e04c3fSmrg return __builtin_bswap32(n); 54801e04c3fSmrg#else 54901e04c3fSmrg return (n >> 24) | 55001e04c3fSmrg ((n >> 8) & 0x0000ff00) | 55101e04c3fSmrg ((n << 8) & 0x00ff0000) | 55201e04c3fSmrg (n << 24); 55301e04c3fSmrg#endif 55401e04c3fSmrg} 55501e04c3fSmrg 55601e04c3fSmrg/** 55701e04c3fSmrg * Reverse byte order of a 64bit word. 55801e04c3fSmrg */ 55901e04c3fSmrgstatic inline uint64_t 56001e04c3fSmrgutil_bswap64(uint64_t n) 56101e04c3fSmrg{ 56201e04c3fSmrg#if defined(HAVE___BUILTIN_BSWAP64) 56301e04c3fSmrg return __builtin_bswap64(n); 56401e04c3fSmrg#else 56501e04c3fSmrg return ((uint64_t)util_bswap32((uint32_t)n) << 32) | 56601e04c3fSmrg util_bswap32((n >> 32)); 56701e04c3fSmrg#endif 56801e04c3fSmrg} 56901e04c3fSmrg 57001e04c3fSmrg 57101e04c3fSmrg/** 57201e04c3fSmrg * Reverse byte order of a 16 bit word. 57301e04c3fSmrg */ 57401e04c3fSmrgstatic inline uint16_t 57501e04c3fSmrgutil_bswap16(uint16_t n) 57601e04c3fSmrg{ 57701e04c3fSmrg return (n >> 8) | 57801e04c3fSmrg (n << 8); 57901e04c3fSmrg} 58001e04c3fSmrg 5811463c08dSmrg/** 5821463c08dSmrg * Extend sign. 5831463c08dSmrg */ 5841463c08dSmrgstatic inline int64_t 5851463c08dSmrgutil_sign_extend(uint64_t val, unsigned width) 5861463c08dSmrg{ 5871463c08dSmrg assert(width > 0); 5881463c08dSmrg if (val & (UINT64_C(1) << (width - 1))) { 5891463c08dSmrg return -(int64_t)((UINT64_C(1) << width) - val); 5901463c08dSmrg } else { 5911463c08dSmrg return val; 5921463c08dSmrg } 5931463c08dSmrg} 5941463c08dSmrg 59501e04c3fSmrgstatic inline void* 59601e04c3fSmrgutil_memcpy_cpu_to_le32(void * restrict dest, const void * restrict src, size_t n) 59701e04c3fSmrg{ 5981463c08dSmrg#if UTIL_ARCH_BIG_ENDIAN 59901e04c3fSmrg size_t i, e; 60001e04c3fSmrg assert(n % 4 == 0); 60101e04c3fSmrg 60201e04c3fSmrg for (i = 0, e = n / 4; i < e; i++) { 60301e04c3fSmrg uint32_t * restrict d = (uint32_t* restrict)dest; 60401e04c3fSmrg const uint32_t * restrict s = (const uint32_t* restrict)src; 60501e04c3fSmrg d[i] = util_bswap32(s[i]); 60601e04c3fSmrg } 60701e04c3fSmrg return dest; 60801e04c3fSmrg#else 60901e04c3fSmrg return memcpy(dest, src, n); 61001e04c3fSmrg#endif 61101e04c3fSmrg} 61201e04c3fSmrg 61301e04c3fSmrg/** 61401e04c3fSmrg * Clamp X to [MIN, MAX]. 61501e04c3fSmrg * This is a macro to allow float, int, uint, etc. types. 61601e04c3fSmrg * We arbitrarily turn NaN into MIN. 61701e04c3fSmrg */ 61801e04c3fSmrg#define CLAMP( X, MIN, MAX ) ( (X)>(MIN) ? ((X)>(MAX) ? (MAX) : (X)) : (MIN) ) 61901e04c3fSmrg 6201463c08dSmrg/* Syntax sugar occuring frequently in graphics code */ 6211463c08dSmrg#define SATURATE( X ) CLAMP(X, 0.0f, 1.0f) 6221463c08dSmrg 62301e04c3fSmrg#define MIN2( A, B ) ( (A)<(B) ? (A) : (B) ) 62401e04c3fSmrg#define MAX2( A, B ) ( (A)>(B) ? (A) : (B) ) 62501e04c3fSmrg 62601e04c3fSmrg#define MIN3( A, B, C ) ((A) < (B) ? MIN2(A, C) : MIN2(B, C)) 62701e04c3fSmrg#define MAX3( A, B, C ) ((A) > (B) ? MAX2(A, C) : MAX2(B, C)) 62801e04c3fSmrg 62901e04c3fSmrg#define MIN4( A, B, C, D ) ((A) < (B) ? MIN3(A, C, D) : MIN3(B, C, D)) 63001e04c3fSmrg#define MAX4( A, B, C, D ) ((A) > (B) ? MAX3(A, C, D) : MAX3(B, C, D)) 63101e04c3fSmrg 63201e04c3fSmrg 6331463c08dSmrg/** 6341463c08dSmrg * Align a value up to an alignment value 6351463c08dSmrg * 6361463c08dSmrg * If \c value is not already aligned to the requested alignment value, it 6371463c08dSmrg * will be rounded up. 6381463c08dSmrg * 6391463c08dSmrg * \param value Value to be rounded 6401463c08dSmrg * \param alignment Alignment value to be used. This must be a power of two. 6411463c08dSmrg * 6421463c08dSmrg * \sa ROUND_DOWN_TO() 6431463c08dSmrg */ 6441463c08dSmrg 6451463c08dSmrg#if defined(ALIGN) 6461463c08dSmrg#undef ALIGN 6471463c08dSmrg#endif 6481463c08dSmrgstatic inline uintptr_t 6491463c08dSmrgALIGN(uintptr_t value, int32_t alignment) 6501463c08dSmrg{ 6511463c08dSmrg assert(util_is_power_of_two_nonzero(alignment)); 6521463c08dSmrg return (((value) + (alignment) - 1) & ~((alignment) - 1)); 6531463c08dSmrg} 6541463c08dSmrg 6551463c08dSmrg/** 6561463c08dSmrg * Like ALIGN(), but works with a non-power-of-two alignment. 6571463c08dSmrg */ 6581463c08dSmrgstatic inline uintptr_t 6591463c08dSmrgALIGN_NPOT(uintptr_t value, int32_t alignment) 6601463c08dSmrg{ 6611463c08dSmrg assert(alignment > 0); 6621463c08dSmrg return (value + alignment - 1) / alignment * alignment; 6631463c08dSmrg} 6641463c08dSmrg 6651463c08dSmrg/** 6661463c08dSmrg * Align a value down to an alignment value 6671463c08dSmrg * 6681463c08dSmrg * If \c value is not already aligned to the requested alignment value, it 6691463c08dSmrg * will be rounded down. 6701463c08dSmrg * 6711463c08dSmrg * \param value Value to be rounded 6721463c08dSmrg * \param alignment Alignment value to be used. This must be a power of two. 6731463c08dSmrg * 6741463c08dSmrg * \sa ALIGN() 6751463c08dSmrg */ 6761463c08dSmrgstatic inline uint64_t 6771463c08dSmrgROUND_DOWN_TO(uint64_t value, int32_t alignment) 6781463c08dSmrg{ 6791463c08dSmrg assert(util_is_power_of_two_nonzero(alignment)); 6801463c08dSmrg return ((value) & ~(alignment - 1)); 6811463c08dSmrg} 6821463c08dSmrg 68301e04c3fSmrg/** 68401e04c3fSmrg * Align a value, only works pot alignemnts. 68501e04c3fSmrg */ 68601e04c3fSmrgstatic inline int 68701e04c3fSmrgalign(int value, int alignment) 68801e04c3fSmrg{ 68901e04c3fSmrg return (value + alignment - 1) & ~(alignment - 1); 69001e04c3fSmrg} 69101e04c3fSmrg 69201e04c3fSmrgstatic inline uint64_t 69301e04c3fSmrgalign64(uint64_t value, unsigned alignment) 69401e04c3fSmrg{ 69501e04c3fSmrg return (value + alignment - 1) & ~((uint64_t)alignment - 1); 69601e04c3fSmrg} 69701e04c3fSmrg 69801e04c3fSmrg/** 69901e04c3fSmrg * Works like align but on npot alignments. 70001e04c3fSmrg */ 70101e04c3fSmrgstatic inline size_t 70201e04c3fSmrgutil_align_npot(size_t value, size_t alignment) 70301e04c3fSmrg{ 70401e04c3fSmrg if (value % alignment) 70501e04c3fSmrg return value + (alignment - (value % alignment)); 70601e04c3fSmrg return value; 70701e04c3fSmrg} 70801e04c3fSmrg 70901e04c3fSmrgstatic inline unsigned 71001e04c3fSmrgu_minify(unsigned value, unsigned levels) 71101e04c3fSmrg{ 71201e04c3fSmrg return MAX2(1, value >> levels); 71301e04c3fSmrg} 71401e04c3fSmrg 71501e04c3fSmrg#ifndef COPY_4V 71601e04c3fSmrg#define COPY_4V( DST, SRC ) \ 71701e04c3fSmrgdo { \ 71801e04c3fSmrg (DST)[0] = (SRC)[0]; \ 71901e04c3fSmrg (DST)[1] = (SRC)[1]; \ 72001e04c3fSmrg (DST)[2] = (SRC)[2]; \ 72101e04c3fSmrg (DST)[3] = (SRC)[3]; \ 72201e04c3fSmrg} while (0) 72301e04c3fSmrg#endif 72401e04c3fSmrg 72501e04c3fSmrg 72601e04c3fSmrg#ifndef COPY_4FV 72701e04c3fSmrg#define COPY_4FV( DST, SRC ) COPY_4V(DST, SRC) 72801e04c3fSmrg#endif 72901e04c3fSmrg 73001e04c3fSmrg 73101e04c3fSmrg#ifndef ASSIGN_4V 73201e04c3fSmrg#define ASSIGN_4V( DST, V0, V1, V2, V3 ) \ 73301e04c3fSmrgdo { \ 73401e04c3fSmrg (DST)[0] = (V0); \ 73501e04c3fSmrg (DST)[1] = (V1); \ 73601e04c3fSmrg (DST)[2] = (V2); \ 73701e04c3fSmrg (DST)[3] = (V3); \ 73801e04c3fSmrg} while (0) 73901e04c3fSmrg#endif 74001e04c3fSmrg 74101e04c3fSmrg 74201e04c3fSmrgstatic inline uint32_t 74301e04c3fSmrgutil_unsigned_fixed(float value, unsigned frac_bits) 74401e04c3fSmrg{ 74501e04c3fSmrg return value < 0 ? 0 : (uint32_t)(value * (1<<frac_bits)); 74601e04c3fSmrg} 74701e04c3fSmrg 74801e04c3fSmrgstatic inline int32_t 74901e04c3fSmrgutil_signed_fixed(float value, unsigned frac_bits) 75001e04c3fSmrg{ 75101e04c3fSmrg return (int32_t)(value * (1<<frac_bits)); 75201e04c3fSmrg} 75301e04c3fSmrg 75401e04c3fSmrgunsigned 75501e04c3fSmrgutil_fpstate_get(void); 75601e04c3fSmrgunsigned 75701e04c3fSmrgutil_fpstate_set_denorms_to_zero(unsigned current_fpstate); 75801e04c3fSmrgvoid 75901e04c3fSmrgutil_fpstate_set(unsigned fpstate); 76001e04c3fSmrg 7611463c08dSmrg/** 7621463c08dSmrg * For indexed draw calls, return true if the vertex count to be drawn is 7631463c08dSmrg * much lower than the vertex count that has to be uploaded, meaning 7641463c08dSmrg * that the driver should flatten indices instead of trying to upload 7651463c08dSmrg * a too big range. 7661463c08dSmrg * 7671463c08dSmrg * This is used by vertex upload code in u_vbuf and glthread. 7681463c08dSmrg */ 7691463c08dSmrgstatic inline bool 7701463c08dSmrgutil_is_vbo_upload_ratio_too_large(unsigned draw_vertex_count, 7711463c08dSmrg unsigned upload_vertex_count) 7721463c08dSmrg{ 7731463c08dSmrg if (draw_vertex_count > 1024) 7741463c08dSmrg return upload_vertex_count > draw_vertex_count * 4; 7751463c08dSmrg else if (draw_vertex_count > 32) 7761463c08dSmrg return upload_vertex_count > draw_vertex_count * 8; 7771463c08dSmrg else 7781463c08dSmrg return upload_vertex_count > draw_vertex_count * 16; 7791463c08dSmrg} 7801463c08dSmrg 7811463c08dSmrgbool util_invert_mat4x4(float *out, const float *m); 78201e04c3fSmrg 7831463c08dSmrg/* Quantize the lod bias value to reduce the number of sampler state 7841463c08dSmrg * variants in gallium because apps use it for smooth mipmap transitions, 7851463c08dSmrg * thrashing cso_cache and degrading performance. 7861463c08dSmrg * 7871463c08dSmrg * This quantization matches the AMD hw specification, so having more 7881463c08dSmrg * precision would have no effect anyway. 7891463c08dSmrg */ 7901463c08dSmrgstatic inline float 7911463c08dSmrgutil_quantize_lod_bias(float lod) 7921463c08dSmrg{ 7931463c08dSmrg lod = CLAMP(lod, -16, 16); 7941463c08dSmrg return roundf(lod * 256) / 256; 7951463c08dSmrg} 79601e04c3fSmrg 79701e04c3fSmrg#ifdef __cplusplus 79801e04c3fSmrg} 79901e04c3fSmrg#endif 80001e04c3fSmrg 80101e04c3fSmrg#endif /* U_MATH_H */ 802