1848b8605Smrg/* 2848b8605Smrg * Mesa 3-D graphics library 3848b8605Smrg * 4848b8605Smrg * Copyright (C) 1999-2006 Brian Paul All Rights Reserved. 5848b8605Smrg * 6848b8605Smrg * Permission is hereby granted, free of charge, to any person obtaining a 7848b8605Smrg * copy of this software and associated documentation files (the "Software"), 8848b8605Smrg * to deal in the Software without restriction, including without limitation 9848b8605Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10848b8605Smrg * and/or sell copies of the Software, and to permit persons to whom the 11848b8605Smrg * Software is furnished to do so, subject to the following conditions: 12848b8605Smrg * 13848b8605Smrg * The above copyright notice and this permission notice shall be included 14848b8605Smrg * in all copies or substantial portions of the Software. 15848b8605Smrg * 16848b8605Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 17848b8605Smrg * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18848b8605Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19848b8605Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20848b8605Smrg * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21848b8605Smrg * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22848b8605Smrg * OTHER DEALINGS IN THE SOFTWARE. 23848b8605Smrg */ 24848b8605Smrg 25848b8605Smrg/** 26848b8605Smrg * \file common_x86.c 27848b8605Smrg * 28848b8605Smrg * Check CPU capabilities & initialize optimized funtions for this particular 29848b8605Smrg * processor. 30848b8605Smrg * 31848b8605Smrg * Changed by Andre Werthmann for using the new SSE functions. 32848b8605Smrg * 33848b8605Smrg * \author Holger Waechtler <holger@akaflieg.extern.tu-berlin.de> 34848b8605Smrg * \author Andre Werthmann <wertmann@cs.uni-potsdam.de> 35848b8605Smrg */ 36848b8605Smrg 37848b8605Smrg/* XXX these includes should probably go into imports.h or glheader.h */ 38848b8605Smrg#if defined(USE_SSE_ASM) && defined(__FreeBSD__) 39848b8605Smrg#include <sys/types.h> 40848b8605Smrg#include <sys/sysctl.h> 41848b8605Smrg#endif 42b8e80941Smrg#if defined(USE_SSE_ASM) && (defined(__OpenBSD__) || defined(__NetBSD__)) 43848b8605Smrg#include <sys/param.h> 44848b8605Smrg#include <sys/sysctl.h> 45848b8605Smrg#include <machine/cpu.h> 46848b8605Smrg#endif 47848b8605Smrg#if defined(USE_X86_64_ASM) 48848b8605Smrg#include <cpuid.h> 49848b8605Smrg#if !defined(bit_SSE4_1) && defined(bit_SSE41) 50848b8605Smrg/* XXX: clang defines bit_SSE41 instead of bit_SSE4_1 */ 51848b8605Smrg#define bit_SSE4_1 bit_SSE41 52848b8605Smrg#elif !defined(bit_SSE4_1) && !defined(bit_SSE41) 53848b8605Smrg#define bit_SSE4_1 0x00080000 54848b8605Smrg#endif 55848b8605Smrg#endif 56848b8605Smrg 57b8e80941Smrg#include "main/errors.h" 58848b8605Smrg#include "main/imports.h" 59848b8605Smrg#include "common_x86_asm.h" 60848b8605Smrg 61848b8605Smrg 62848b8605Smrg/** Bitmask of X86_FEATURE_x bits */ 63848b8605Smrgint _mesa_x86_cpu_features = 0x0; 64848b8605Smrg 65848b8605Smrgstatic int detection_debug = GL_FALSE; 66848b8605Smrg 67848b8605Smrg/* No reason for this to be public. 68848b8605Smrg */ 69b8e80941Smrgextern GLuint _mesa_x86_has_cpuid(void); 70b8e80941Smrgextern void _mesa_x86_cpuid(GLuint op, GLuint *reg_eax, GLuint *reg_ebx, GLuint *reg_ecx, GLuint *reg_edx); 71b8e80941Smrgextern GLuint _mesa_x86_cpuid_eax(GLuint op); 72b8e80941Smrgextern GLuint _mesa_x86_cpuid_ebx(GLuint op); 73b8e80941Smrgextern GLuint _mesa_x86_cpuid_ecx(GLuint op); 74b8e80941Smrgextern GLuint _mesa_x86_cpuid_edx(GLuint op); 75848b8605Smrg 76848b8605Smrg 77848b8605Smrg#if defined(USE_SSE_ASM) 78848b8605Smrg/* 79848b8605Smrg * We must verify that the Streaming SIMD Extensions are truly supported 80848b8605Smrg * on this processor before we go ahead and hook out the optimized code. 81848b8605Smrg * 82848b8605Smrg * However, I have been told by Alan Cox that all 2.4 (and later) Linux 83848b8605Smrg * kernels provide full SSE support on all processors that expose SSE via 84848b8605Smrg * the CPUID mechanism. 85848b8605Smrg */ 86848b8605Smrg 87848b8605Smrg/* These are assembly functions: */ 88848b8605Smrgextern void _mesa_test_os_sse_support( void ); 89848b8605Smrgextern void _mesa_test_os_sse_exception_support( void ); 90848b8605Smrg 91848b8605Smrg 92848b8605Smrg#if defined(_WIN32) 93848b8605Smrg#ifndef STATUS_FLOAT_MULTIPLE_TRAPS 94848b8605Smrg# define STATUS_FLOAT_MULTIPLE_TRAPS (0xC00002B5L) 95848b8605Smrg#endif 96848b8605Smrgstatic LONG WINAPI ExceptionFilter(LPEXCEPTION_POINTERS exp) 97848b8605Smrg{ 98848b8605Smrg PEXCEPTION_RECORD rec = exp->ExceptionRecord; 99848b8605Smrg PCONTEXT ctx = exp->ContextRecord; 100848b8605Smrg 101848b8605Smrg if ( rec->ExceptionCode == EXCEPTION_ILLEGAL_INSTRUCTION ) { 102848b8605Smrg _mesa_debug(NULL, "EXCEPTION_ILLEGAL_INSTRUCTION\n" ); 103848b8605Smrg _mesa_x86_cpu_features &= ~(X86_FEATURE_XMM); 104848b8605Smrg } else if ( rec->ExceptionCode == STATUS_FLOAT_MULTIPLE_TRAPS ) { 105848b8605Smrg _mesa_debug(NULL, "STATUS_FLOAT_MULTIPLE_TRAPS\n"); 106848b8605Smrg /* Windows seems to clear the exception flag itself, we just have to increment Eip */ 107848b8605Smrg } else { 108848b8605Smrg _mesa_debug(NULL, "UNEXPECTED EXCEPTION (0x%08x), terminating!\n" ); 109848b8605Smrg return EXCEPTION_EXECUTE_HANDLER; 110848b8605Smrg } 111848b8605Smrg 112848b8605Smrg if ( (ctx->ContextFlags & CONTEXT_CONTROL) != CONTEXT_CONTROL ) { 113848b8605Smrg _mesa_debug(NULL, "Context does not contain control registers, terminating!\n"); 114848b8605Smrg return EXCEPTION_EXECUTE_HANDLER; 115848b8605Smrg } 116848b8605Smrg ctx->Eip += 3; 117848b8605Smrg 118848b8605Smrg return EXCEPTION_CONTINUE_EXECUTION; 119848b8605Smrg} 120848b8605Smrg#endif /* _WIN32 */ 121848b8605Smrg 122848b8605Smrg 123848b8605Smrg/** 124848b8605Smrg * Check if SSE is supported. 125848b8605Smrg * If not, turn off the X86_FEATURE_XMM flag in _mesa_x86_cpu_features. 126848b8605Smrg */ 127848b8605Smrgvoid _mesa_check_os_sse_support( void ) 128848b8605Smrg{ 129848b8605Smrg#if defined(__FreeBSD__) 130848b8605Smrg { 131848b8605Smrg int ret, enabled; 132848b8605Smrg unsigned int len; 133848b8605Smrg len = sizeof(enabled); 134848b8605Smrg ret = sysctlbyname("hw.instruction_sse", &enabled, &len, NULL, 0); 135848b8605Smrg if (ret || !enabled) 136848b8605Smrg _mesa_x86_cpu_features &= ~(X86_FEATURE_XMM); 137848b8605Smrg } 138848b8605Smrg#elif defined (__NetBSD__) 139848b8605Smrg { 140848b8605Smrg int ret, enabled; 141848b8605Smrg size_t len = sizeof(enabled); 142848b8605Smrg ret = sysctlbyname("machdep.sse", &enabled, &len, (void *)NULL, 0); 143848b8605Smrg if (ret || !enabled) 144848b8605Smrg _mesa_x86_cpu_features &= ~(X86_FEATURE_XMM); 145848b8605Smrg } 146848b8605Smrg#elif defined(__OpenBSD__) 147848b8605Smrg { 148848b8605Smrg int mib[2]; 149848b8605Smrg int ret, enabled; 150848b8605Smrg size_t len = sizeof(enabled); 151848b8605Smrg 152848b8605Smrg mib[0] = CTL_MACHDEP; 153848b8605Smrg mib[1] = CPU_SSE; 154848b8605Smrg 155848b8605Smrg ret = sysctl(mib, 2, &enabled, &len, NULL, 0); 156848b8605Smrg if (ret || !enabled) 157848b8605Smrg _mesa_x86_cpu_features &= ~(X86_FEATURE_XMM); 158848b8605Smrg } 159848b8605Smrg#elif defined(_WIN32) 160848b8605Smrg LPTOP_LEVEL_EXCEPTION_FILTER oldFilter; 161848b8605Smrg 162848b8605Smrg /* Install our ExceptionFilter */ 163848b8605Smrg oldFilter = SetUnhandledExceptionFilter( ExceptionFilter ); 164848b8605Smrg 165848b8605Smrg if ( cpu_has_xmm ) { 166848b8605Smrg _mesa_debug(NULL, "Testing OS support for SSE...\n"); 167848b8605Smrg 168848b8605Smrg _mesa_test_os_sse_support(); 169848b8605Smrg 170848b8605Smrg if ( cpu_has_xmm ) { 171848b8605Smrg _mesa_debug(NULL, "Yes.\n"); 172848b8605Smrg } else { 173848b8605Smrg _mesa_debug(NULL, "No!\n"); 174848b8605Smrg } 175848b8605Smrg } 176848b8605Smrg 177848b8605Smrg if ( cpu_has_xmm ) { 178848b8605Smrg _mesa_debug(NULL, "Testing OS support for SSE unmasked exceptions...\n"); 179848b8605Smrg 180848b8605Smrg _mesa_test_os_sse_exception_support(); 181848b8605Smrg 182848b8605Smrg if ( cpu_has_xmm ) { 183848b8605Smrg _mesa_debug(NULL, "Yes.\n"); 184848b8605Smrg } else { 185848b8605Smrg _mesa_debug(NULL, "No!\n"); 186848b8605Smrg } 187848b8605Smrg } 188848b8605Smrg 189848b8605Smrg /* Restore previous exception filter */ 190848b8605Smrg SetUnhandledExceptionFilter( oldFilter ); 191848b8605Smrg 192848b8605Smrg if ( cpu_has_xmm ) { 193848b8605Smrg _mesa_debug(NULL, "Tests of OS support for SSE passed.\n"); 194848b8605Smrg } else { 195848b8605Smrg _mesa_debug(NULL, "Tests of OS support for SSE failed!\n"); 196848b8605Smrg } 197848b8605Smrg#else 198848b8605Smrg /* Do nothing on other platforms for now. 199848b8605Smrg */ 200848b8605Smrg if (detection_debug) 201848b8605Smrg _mesa_debug(NULL, "Not testing OS support for SSE, leaving enabled.\n"); 202848b8605Smrg#endif /* __FreeBSD__ */ 203848b8605Smrg} 204848b8605Smrg 205848b8605Smrg#endif /* USE_SSE_ASM */ 206848b8605Smrg 207848b8605Smrg 208848b8605Smrg/** 209848b8605Smrg * Initialize the _mesa_x86_cpu_features bitfield. 210848b8605Smrg * This is a no-op if called more than once. 211848b8605Smrg */ 212848b8605Smrgvoid 213848b8605Smrg_mesa_get_x86_features(void) 214848b8605Smrg{ 215848b8605Smrg static int called = 0; 216848b8605Smrg 217848b8605Smrg if (called) 218848b8605Smrg return; 219848b8605Smrg 220848b8605Smrg called = 1; 221848b8605Smrg 222848b8605Smrg#ifdef USE_X86_ASM 223848b8605Smrg _mesa_x86_cpu_features = 0x0; 224848b8605Smrg 225b8e80941Smrg if (getenv( "MESA_NO_ASM")) { 226848b8605Smrg return; 227848b8605Smrg } 228848b8605Smrg 229848b8605Smrg if (!_mesa_x86_has_cpuid()) { 230848b8605Smrg _mesa_debug(NULL, "CPUID not detected\n"); 231848b8605Smrg } 232848b8605Smrg else { 233848b8605Smrg GLuint cpu_features, cpu_features_ecx; 234848b8605Smrg GLuint cpu_ext_features; 235848b8605Smrg GLuint cpu_ext_info; 236848b8605Smrg char cpu_vendor[13]; 237848b8605Smrg GLuint result; 238848b8605Smrg 239848b8605Smrg /* get vendor name */ 240848b8605Smrg _mesa_x86_cpuid(0, &result, (GLuint *)(cpu_vendor + 0), (GLuint *)(cpu_vendor + 8), (GLuint *)(cpu_vendor + 4)); 241848b8605Smrg cpu_vendor[12] = '\0'; 242848b8605Smrg 243848b8605Smrg if (detection_debug) 244848b8605Smrg _mesa_debug(NULL, "CPU vendor: %s\n", cpu_vendor); 245848b8605Smrg 246848b8605Smrg /* get cpu features */ 247848b8605Smrg cpu_features = _mesa_x86_cpuid_edx(1); 248848b8605Smrg cpu_features_ecx = _mesa_x86_cpuid_ecx(1); 249848b8605Smrg 250848b8605Smrg if (cpu_features & X86_CPU_FPU) 251848b8605Smrg _mesa_x86_cpu_features |= X86_FEATURE_FPU; 252848b8605Smrg if (cpu_features & X86_CPU_CMOV) 253848b8605Smrg _mesa_x86_cpu_features |= X86_FEATURE_CMOV; 254848b8605Smrg 255848b8605Smrg#ifdef USE_MMX_ASM 256848b8605Smrg if (cpu_features & X86_CPU_MMX) 257848b8605Smrg _mesa_x86_cpu_features |= X86_FEATURE_MMX; 258848b8605Smrg#endif 259848b8605Smrg 260848b8605Smrg#ifdef USE_SSE_ASM 261848b8605Smrg if (cpu_features & X86_CPU_XMM) 262848b8605Smrg _mesa_x86_cpu_features |= X86_FEATURE_XMM; 263848b8605Smrg if (cpu_features & X86_CPU_XMM2) 264848b8605Smrg _mesa_x86_cpu_features |= X86_FEATURE_XMM2; 265848b8605Smrg if (cpu_features_ecx & X86_CPU_SSE4_1) 266848b8605Smrg _mesa_x86_cpu_features |= X86_FEATURE_SSE4_1; 267848b8605Smrg#endif 268848b8605Smrg 269848b8605Smrg /* query extended cpu features */ 270848b8605Smrg if ((cpu_ext_info = _mesa_x86_cpuid_eax(0x80000000)) > 0x80000000) { 271848b8605Smrg if (cpu_ext_info >= 0x80000001) { 272848b8605Smrg 273848b8605Smrg cpu_ext_features = _mesa_x86_cpuid_edx(0x80000001); 274848b8605Smrg 275848b8605Smrg if (cpu_features & X86_CPU_MMX) { 276848b8605Smrg 277848b8605Smrg#ifdef USE_3DNOW_ASM 278848b8605Smrg if (cpu_ext_features & X86_CPUEXT_3DNOW) 279848b8605Smrg _mesa_x86_cpu_features |= X86_FEATURE_3DNOW; 280848b8605Smrg if (cpu_ext_features & X86_CPUEXT_3DNOW_EXT) 281848b8605Smrg _mesa_x86_cpu_features |= X86_FEATURE_3DNOWEXT; 282848b8605Smrg#endif 283848b8605Smrg 284848b8605Smrg#ifdef USE_MMX_ASM 285848b8605Smrg if (cpu_ext_features & X86_CPUEXT_MMX_EXT) 286848b8605Smrg _mesa_x86_cpu_features |= X86_FEATURE_MMXEXT; 287848b8605Smrg#endif 288848b8605Smrg } 289848b8605Smrg } 290848b8605Smrg 291848b8605Smrg /* query cpu name */ 292848b8605Smrg if (cpu_ext_info >= 0x80000002) { 293848b8605Smrg GLuint ofs; 294848b8605Smrg char cpu_name[49]; 295848b8605Smrg for (ofs = 0; ofs < 3; ofs++) 296848b8605Smrg _mesa_x86_cpuid(0x80000002+ofs, (GLuint *)(cpu_name + (16*ofs)+0), (GLuint *)(cpu_name + (16*ofs)+4), (GLuint *)(cpu_name + (16*ofs)+8), (GLuint *)(cpu_name + (16*ofs)+12)); 297848b8605Smrg cpu_name[48] = '\0'; /* the name should be NULL terminated, but just to be sure */ 298848b8605Smrg 299848b8605Smrg if (detection_debug) 300848b8605Smrg _mesa_debug(NULL, "CPU name: %s\n", cpu_name); 301848b8605Smrg } 302848b8605Smrg } 303848b8605Smrg 304848b8605Smrg } 305848b8605Smrg 306848b8605Smrg#ifdef USE_MMX_ASM 307848b8605Smrg if ( cpu_has_mmx ) { 308b8e80941Smrg if ( getenv( "MESA_NO_MMX" ) == 0 ) { 309848b8605Smrg if (detection_debug) 310848b8605Smrg _mesa_debug(NULL, "MMX cpu detected.\n"); 311848b8605Smrg } else { 312848b8605Smrg _mesa_x86_cpu_features &= ~(X86_FEATURE_MMX); 313848b8605Smrg } 314848b8605Smrg } 315848b8605Smrg#endif 316848b8605Smrg 317848b8605Smrg#ifdef USE_3DNOW_ASM 318848b8605Smrg if ( cpu_has_3dnow ) { 319b8e80941Smrg if ( getenv( "MESA_NO_3DNOW" ) == 0 ) { 320848b8605Smrg if (detection_debug) 321848b8605Smrg _mesa_debug(NULL, "3DNow! cpu detected.\n"); 322848b8605Smrg } else { 323848b8605Smrg _mesa_x86_cpu_features &= ~(X86_FEATURE_3DNOW); 324848b8605Smrg } 325848b8605Smrg } 326848b8605Smrg#endif 327848b8605Smrg 328848b8605Smrg#ifdef USE_SSE_ASM 329848b8605Smrg if ( cpu_has_xmm ) { 330b8e80941Smrg if ( getenv( "MESA_NO_SSE" ) == 0 ) { 331848b8605Smrg if (detection_debug) 332848b8605Smrg _mesa_debug(NULL, "SSE cpu detected.\n"); 333b8e80941Smrg if ( getenv( "MESA_FORCE_SSE" ) == 0 ) { 334848b8605Smrg _mesa_check_os_sse_support(); 335848b8605Smrg } 336848b8605Smrg } else { 337848b8605Smrg _mesa_debug(NULL, "SSE cpu detected, but switched off by user.\n"); 338848b8605Smrg _mesa_x86_cpu_features &= ~(X86_FEATURE_XMM); 339848b8605Smrg } 340848b8605Smrg } 341848b8605Smrg#endif 342848b8605Smrg 343848b8605Smrg#elif defined(USE_X86_64_ASM) 344848b8605Smrg { 345b8e80941Smrg unsigned int eax, ebx, ecx, edx; 346848b8605Smrg 347848b8605Smrg /* Always available on x86-64. */ 348848b8605Smrg _mesa_x86_cpu_features |= X86_FEATURE_XMM | X86_FEATURE_XMM2; 349848b8605Smrg 350b8e80941Smrg if (!__get_cpuid(1, &eax, &ebx, &ecx, &edx)) 351b8e80941Smrg return; 352848b8605Smrg 353848b8605Smrg if (ecx & bit_SSE4_1) 354848b8605Smrg _mesa_x86_cpu_features |= X86_FEATURE_SSE4_1; 355848b8605Smrg } 356848b8605Smrg#endif /* USE_X86_64_ASM */ 357848b8605Smrg 358848b8605Smrg (void) detection_debug; 359848b8605Smrg} 360