u_math.c revision 01e04c3f
101e04c3fSmrg/************************************************************************** 201e04c3fSmrg * 301e04c3fSmrg * Copyright 2008 VMware, Inc. 401e04c3fSmrg * All Rights Reserved. 501e04c3fSmrg * 601e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a 701e04c3fSmrg * copy of this software and associated documentation files (the 801e04c3fSmrg * "Software"), to deal in the Software without restriction, including 901e04c3fSmrg * without limitation the rights to use, copy, modify, merge, publish, 1001e04c3fSmrg * distribute, sub license, and/or sell copies of the Software, and to 1101e04c3fSmrg * permit persons to whom the Software is furnished to do so, subject to 1201e04c3fSmrg * the following conditions: 1301e04c3fSmrg * 1401e04c3fSmrg * The above copyright notice and this permission notice (including the 1501e04c3fSmrg * next paragraph) shall be included in all copies or substantial portions 1601e04c3fSmrg * of the Software. 1701e04c3fSmrg * 1801e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 1901e04c3fSmrg * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 2001e04c3fSmrg * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 2101e04c3fSmrg * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 2201e04c3fSmrg * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 2301e04c3fSmrg * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 2401e04c3fSmrg * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 2501e04c3fSmrg * 2601e04c3fSmrg **************************************************************************/ 2701e04c3fSmrg 2801e04c3fSmrg 2901e04c3fSmrg 3001e04c3fSmrg#include "pipe/p_config.h" 3101e04c3fSmrg#include "util/u_math.h" 3201e04c3fSmrg#include "util/u_cpu_detect.h" 3301e04c3fSmrg 3401e04c3fSmrg#if defined(PIPE_ARCH_SSE) 3501e04c3fSmrg#include <xmmintrin.h> 3601e04c3fSmrg/* This is defined in pmmintrin.h, but it can only be included when -msse3 is 3701e04c3fSmrg * used, so just define it here to avoid further. */ 3801e04c3fSmrg#define _MM_DENORMALS_ZERO_MASK 0x0040 3901e04c3fSmrg#endif 4001e04c3fSmrg 4101e04c3fSmrg 4201e04c3fSmrg/** 2^x, for x in [-1.0, 1.0) */ 4301e04c3fSmrgfloat pow2_table[POW2_TABLE_SIZE]; 4401e04c3fSmrg 4501e04c3fSmrg 4601e04c3fSmrgstatic void 4701e04c3fSmrginit_pow2_table(void) 4801e04c3fSmrg{ 4901e04c3fSmrg int i; 5001e04c3fSmrg for (i = 0; i < POW2_TABLE_SIZE; i++) 5101e04c3fSmrg pow2_table[i] = exp2f((i - POW2_TABLE_OFFSET) / POW2_TABLE_SCALE); 5201e04c3fSmrg} 5301e04c3fSmrg 5401e04c3fSmrg 5501e04c3fSmrg/** log2(x), for x in [1.0, 2.0) */ 5601e04c3fSmrgfloat log2_table[LOG2_TABLE_SIZE]; 5701e04c3fSmrg 5801e04c3fSmrg 5901e04c3fSmrgstatic void 6001e04c3fSmrginit_log2_table(void) 6101e04c3fSmrg{ 6201e04c3fSmrg unsigned i; 6301e04c3fSmrg for (i = 0; i < LOG2_TABLE_SIZE; i++) 6401e04c3fSmrg log2_table[i] = (float) log2(1.0 + i * (1.0 / LOG2_TABLE_SCALE)); 6501e04c3fSmrg} 6601e04c3fSmrg 6701e04c3fSmrg 6801e04c3fSmrg/** 6901e04c3fSmrg * One time init for math utilities. 7001e04c3fSmrg */ 7101e04c3fSmrgvoid 7201e04c3fSmrgutil_init_math(void) 7301e04c3fSmrg{ 7401e04c3fSmrg static boolean initialized = FALSE; 7501e04c3fSmrg if (!initialized) { 7601e04c3fSmrg init_pow2_table(); 7701e04c3fSmrg init_log2_table(); 7801e04c3fSmrg initialized = TRUE; 7901e04c3fSmrg } 8001e04c3fSmrg} 8101e04c3fSmrg 8201e04c3fSmrg/** 8301e04c3fSmrg * Fetches the contents of the fpstate (mxcsr on x86) register. 8401e04c3fSmrg * 8501e04c3fSmrg * On platforms without support for it just returns 0. 8601e04c3fSmrg */ 8701e04c3fSmrgunsigned 8801e04c3fSmrgutil_fpstate_get(void) 8901e04c3fSmrg{ 9001e04c3fSmrg unsigned mxcsr = 0; 9101e04c3fSmrg 9201e04c3fSmrg#if defined(PIPE_ARCH_SSE) 9301e04c3fSmrg if (util_cpu_caps.has_sse) { 9401e04c3fSmrg mxcsr = _mm_getcsr(); 9501e04c3fSmrg } 9601e04c3fSmrg#endif 9701e04c3fSmrg 9801e04c3fSmrg return mxcsr; 9901e04c3fSmrg} 10001e04c3fSmrg 10101e04c3fSmrg/** 10201e04c3fSmrg * Make sure that the fp treats the denormalized floating 10301e04c3fSmrg * point numbers as zero. 10401e04c3fSmrg * 10501e04c3fSmrg * This is the behavior required by D3D10. OpenGL doesn't care. 10601e04c3fSmrg */ 10701e04c3fSmrgunsigned 10801e04c3fSmrgutil_fpstate_set_denorms_to_zero(unsigned current_mxcsr) 10901e04c3fSmrg{ 11001e04c3fSmrg#if defined(PIPE_ARCH_SSE) 11101e04c3fSmrg if (util_cpu_caps.has_sse) { 11201e04c3fSmrg /* Enable flush to zero mode */ 11301e04c3fSmrg current_mxcsr |= _MM_FLUSH_ZERO_MASK; 11401e04c3fSmrg if (util_cpu_caps.has_daz) { 11501e04c3fSmrg /* Enable denormals are zero mode */ 11601e04c3fSmrg current_mxcsr |= _MM_DENORMALS_ZERO_MASK; 11701e04c3fSmrg } 11801e04c3fSmrg util_fpstate_set(current_mxcsr); 11901e04c3fSmrg } 12001e04c3fSmrg#endif 12101e04c3fSmrg return current_mxcsr; 12201e04c3fSmrg} 12301e04c3fSmrg 12401e04c3fSmrg/** 12501e04c3fSmrg * Set the state of the fpstate (mxcsr on x86) register. 12601e04c3fSmrg * 12701e04c3fSmrg * On platforms without support for it's a noop. 12801e04c3fSmrg */ 12901e04c3fSmrgvoid 13001e04c3fSmrgutil_fpstate_set(unsigned mxcsr) 13101e04c3fSmrg{ 13201e04c3fSmrg#if defined(PIPE_ARCH_SSE) 13301e04c3fSmrg if (util_cpu_caps.has_sse) { 13401e04c3fSmrg _mm_setcsr(mxcsr); 13501e04c3fSmrg } 13601e04c3fSmrg#endif 13701e04c3fSmrg} 138