1848b8605Smrg/*
2848b8605Smrg * Mesa 3-D graphics library
3848b8605Smrg *
4848b8605Smrg * Copyright (C) 1999-2006  Brian Paul   All Rights Reserved.
5848b8605Smrg *
6848b8605Smrg * Permission is hereby granted, free of charge, to any person obtaining a
7848b8605Smrg * copy of this software and associated documentation files (the "Software"),
8848b8605Smrg * to deal in the Software without restriction, including without limitation
9848b8605Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10848b8605Smrg * and/or sell copies of the Software, and to permit persons to whom the
11848b8605Smrg * Software is furnished to do so, subject to the following conditions:
12848b8605Smrg *
13848b8605Smrg * The above copyright notice and this permission notice shall be included
14848b8605Smrg * in all copies or substantial portions of the Software.
15848b8605Smrg *
16848b8605Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17848b8605Smrg * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18848b8605Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19848b8605Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20848b8605Smrg * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21848b8605Smrg * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22848b8605Smrg * OTHER DEALINGS IN THE SOFTWARE.
23848b8605Smrg */
24848b8605Smrg
25848b8605Smrg/**
26848b8605Smrg * \file common_x86.c
27848b8605Smrg *
28848b8605Smrg * Check CPU capabilities & initialize optimized funtions for this particular
29848b8605Smrg * processor.
30848b8605Smrg *
31848b8605Smrg * Changed by Andre Werthmann for using the new SSE functions.
32848b8605Smrg *
33848b8605Smrg * \author Holger Waechtler <holger@akaflieg.extern.tu-berlin.de>
34848b8605Smrg * \author Andre Werthmann <wertmann@cs.uni-potsdam.de>
35848b8605Smrg */
36848b8605Smrg
37848b8605Smrg/* XXX these includes should probably go into imports.h or glheader.h */
38848b8605Smrg#if defined(USE_SSE_ASM) && defined(__FreeBSD__)
39848b8605Smrg#include <sys/types.h>
40848b8605Smrg#include <sys/sysctl.h>
41848b8605Smrg#endif
42b8e80941Smrg#if defined(USE_SSE_ASM) && (defined(__OpenBSD__) || defined(__NetBSD__))
43848b8605Smrg#include <sys/param.h>
44848b8605Smrg#include <sys/sysctl.h>
45848b8605Smrg#include <machine/cpu.h>
46848b8605Smrg#endif
47848b8605Smrg#if defined(USE_X86_64_ASM)
48848b8605Smrg#include <cpuid.h>
49848b8605Smrg#if !defined(bit_SSE4_1) && defined(bit_SSE41)
50848b8605Smrg/* XXX: clang defines bit_SSE41 instead of bit_SSE4_1 */
51848b8605Smrg#define bit_SSE4_1 bit_SSE41
52848b8605Smrg#elif !defined(bit_SSE4_1) && !defined(bit_SSE41)
53848b8605Smrg#define bit_SSE4_1 0x00080000
54848b8605Smrg#endif
55848b8605Smrg#endif
56848b8605Smrg
57b8e80941Smrg#include "main/errors.h"
58848b8605Smrg#include "main/imports.h"
59848b8605Smrg#include "common_x86_asm.h"
60848b8605Smrg
61848b8605Smrg
62848b8605Smrg/** Bitmask of X86_FEATURE_x bits */
63848b8605Smrgint _mesa_x86_cpu_features = 0x0;
64848b8605Smrg
65848b8605Smrgstatic int detection_debug = GL_FALSE;
66848b8605Smrg
67848b8605Smrg/* No reason for this to be public.
68848b8605Smrg */
69b8e80941Smrgextern GLuint _mesa_x86_has_cpuid(void);
70b8e80941Smrgextern void _mesa_x86_cpuid(GLuint op, GLuint *reg_eax, GLuint *reg_ebx, GLuint *reg_ecx, GLuint *reg_edx);
71b8e80941Smrgextern GLuint _mesa_x86_cpuid_eax(GLuint op);
72b8e80941Smrgextern GLuint _mesa_x86_cpuid_ebx(GLuint op);
73b8e80941Smrgextern GLuint _mesa_x86_cpuid_ecx(GLuint op);
74b8e80941Smrgextern GLuint _mesa_x86_cpuid_edx(GLuint op);
75848b8605Smrg
76848b8605Smrg
77848b8605Smrg#if defined(USE_SSE_ASM)
78848b8605Smrg/*
79848b8605Smrg * We must verify that the Streaming SIMD Extensions are truly supported
80848b8605Smrg * on this processor before we go ahead and hook out the optimized code.
81848b8605Smrg *
82848b8605Smrg * However, I have been told by Alan Cox that all 2.4 (and later) Linux
83848b8605Smrg * kernels provide full SSE support on all processors that expose SSE via
84848b8605Smrg * the CPUID mechanism.
85848b8605Smrg */
86848b8605Smrg
87848b8605Smrg/* These are assembly functions: */
88848b8605Smrgextern void _mesa_test_os_sse_support( void );
89848b8605Smrgextern void _mesa_test_os_sse_exception_support( void );
90848b8605Smrg
91848b8605Smrg
92848b8605Smrg#if defined(_WIN32)
93848b8605Smrg#ifndef STATUS_FLOAT_MULTIPLE_TRAPS
94848b8605Smrg# define STATUS_FLOAT_MULTIPLE_TRAPS (0xC00002B5L)
95848b8605Smrg#endif
96848b8605Smrgstatic LONG WINAPI ExceptionFilter(LPEXCEPTION_POINTERS exp)
97848b8605Smrg{
98848b8605Smrg   PEXCEPTION_RECORD rec = exp->ExceptionRecord;
99848b8605Smrg   PCONTEXT ctx = exp->ContextRecord;
100848b8605Smrg
101848b8605Smrg   if ( rec->ExceptionCode == EXCEPTION_ILLEGAL_INSTRUCTION ) {
102848b8605Smrg      _mesa_debug(NULL, "EXCEPTION_ILLEGAL_INSTRUCTION\n" );
103848b8605Smrg      _mesa_x86_cpu_features &= ~(X86_FEATURE_XMM);
104848b8605Smrg   } else if ( rec->ExceptionCode == STATUS_FLOAT_MULTIPLE_TRAPS ) {
105848b8605Smrg      _mesa_debug(NULL, "STATUS_FLOAT_MULTIPLE_TRAPS\n");
106848b8605Smrg      /* Windows seems to clear the exception flag itself, we just have to increment Eip */
107848b8605Smrg   } else {
108848b8605Smrg      _mesa_debug(NULL, "UNEXPECTED EXCEPTION (0x%08x), terminating!\n" );
109848b8605Smrg      return EXCEPTION_EXECUTE_HANDLER;
110848b8605Smrg   }
111848b8605Smrg
112848b8605Smrg   if ( (ctx->ContextFlags & CONTEXT_CONTROL) != CONTEXT_CONTROL ) {
113848b8605Smrg      _mesa_debug(NULL, "Context does not contain control registers, terminating!\n");
114848b8605Smrg      return EXCEPTION_EXECUTE_HANDLER;
115848b8605Smrg   }
116848b8605Smrg   ctx->Eip += 3;
117848b8605Smrg
118848b8605Smrg   return EXCEPTION_CONTINUE_EXECUTION;
119848b8605Smrg}
120848b8605Smrg#endif /* _WIN32 */
121848b8605Smrg
122848b8605Smrg
123848b8605Smrg/**
124848b8605Smrg * Check if SSE is supported.
125848b8605Smrg * If not, turn off the X86_FEATURE_XMM flag in _mesa_x86_cpu_features.
126848b8605Smrg */
127848b8605Smrgvoid _mesa_check_os_sse_support( void )
128848b8605Smrg{
129848b8605Smrg#if defined(__FreeBSD__)
130848b8605Smrg   {
131848b8605Smrg      int ret, enabled;
132848b8605Smrg      unsigned int len;
133848b8605Smrg      len = sizeof(enabled);
134848b8605Smrg      ret = sysctlbyname("hw.instruction_sse", &enabled, &len, NULL, 0);
135848b8605Smrg      if (ret || !enabled)
136848b8605Smrg         _mesa_x86_cpu_features &= ~(X86_FEATURE_XMM);
137848b8605Smrg   }
138848b8605Smrg#elif defined (__NetBSD__)
139848b8605Smrg   {
140848b8605Smrg      int ret, enabled;
141848b8605Smrg      size_t len = sizeof(enabled);
142848b8605Smrg      ret = sysctlbyname("machdep.sse", &enabled, &len, (void *)NULL, 0);
143848b8605Smrg      if (ret || !enabled)
144848b8605Smrg         _mesa_x86_cpu_features &= ~(X86_FEATURE_XMM);
145848b8605Smrg   }
146848b8605Smrg#elif defined(__OpenBSD__)
147848b8605Smrg   {
148848b8605Smrg      int mib[2];
149848b8605Smrg      int ret, enabled;
150848b8605Smrg      size_t len = sizeof(enabled);
151848b8605Smrg
152848b8605Smrg      mib[0] = CTL_MACHDEP;
153848b8605Smrg      mib[1] = CPU_SSE;
154848b8605Smrg
155848b8605Smrg      ret = sysctl(mib, 2, &enabled, &len, NULL, 0);
156848b8605Smrg      if (ret || !enabled)
157848b8605Smrg         _mesa_x86_cpu_features &= ~(X86_FEATURE_XMM);
158848b8605Smrg   }
159848b8605Smrg#elif defined(_WIN32)
160848b8605Smrg   LPTOP_LEVEL_EXCEPTION_FILTER oldFilter;
161848b8605Smrg
162848b8605Smrg   /* Install our ExceptionFilter */
163848b8605Smrg   oldFilter = SetUnhandledExceptionFilter( ExceptionFilter );
164848b8605Smrg
165848b8605Smrg   if ( cpu_has_xmm ) {
166848b8605Smrg      _mesa_debug(NULL, "Testing OS support for SSE...\n");
167848b8605Smrg
168848b8605Smrg      _mesa_test_os_sse_support();
169848b8605Smrg
170848b8605Smrg      if ( cpu_has_xmm ) {
171848b8605Smrg	 _mesa_debug(NULL, "Yes.\n");
172848b8605Smrg      } else {
173848b8605Smrg	 _mesa_debug(NULL, "No!\n");
174848b8605Smrg      }
175848b8605Smrg   }
176848b8605Smrg
177848b8605Smrg   if ( cpu_has_xmm ) {
178848b8605Smrg      _mesa_debug(NULL, "Testing OS support for SSE unmasked exceptions...\n");
179848b8605Smrg
180848b8605Smrg      _mesa_test_os_sse_exception_support();
181848b8605Smrg
182848b8605Smrg      if ( cpu_has_xmm ) {
183848b8605Smrg	 _mesa_debug(NULL, "Yes.\n");
184848b8605Smrg      } else {
185848b8605Smrg	 _mesa_debug(NULL, "No!\n");
186848b8605Smrg      }
187848b8605Smrg   }
188848b8605Smrg
189848b8605Smrg   /* Restore previous exception filter */
190848b8605Smrg   SetUnhandledExceptionFilter( oldFilter );
191848b8605Smrg
192848b8605Smrg   if ( cpu_has_xmm ) {
193848b8605Smrg      _mesa_debug(NULL, "Tests of OS support for SSE passed.\n");
194848b8605Smrg   } else {
195848b8605Smrg      _mesa_debug(NULL, "Tests of OS support for SSE failed!\n");
196848b8605Smrg   }
197848b8605Smrg#else
198848b8605Smrg   /* Do nothing on other platforms for now.
199848b8605Smrg    */
200848b8605Smrg   if (detection_debug)
201848b8605Smrg      _mesa_debug(NULL, "Not testing OS support for SSE, leaving enabled.\n");
202848b8605Smrg#endif /* __FreeBSD__ */
203848b8605Smrg}
204848b8605Smrg
205848b8605Smrg#endif /* USE_SSE_ASM */
206848b8605Smrg
207848b8605Smrg
208848b8605Smrg/**
209848b8605Smrg * Initialize the _mesa_x86_cpu_features bitfield.
210848b8605Smrg * This is a no-op if called more than once.
211848b8605Smrg */
212848b8605Smrgvoid
213848b8605Smrg_mesa_get_x86_features(void)
214848b8605Smrg{
215848b8605Smrg   static int called = 0;
216848b8605Smrg
217848b8605Smrg   if (called)
218848b8605Smrg      return;
219848b8605Smrg
220848b8605Smrg   called = 1;
221848b8605Smrg
222848b8605Smrg#ifdef USE_X86_ASM
223848b8605Smrg   _mesa_x86_cpu_features = 0x0;
224848b8605Smrg
225b8e80941Smrg   if (getenv( "MESA_NO_ASM")) {
226848b8605Smrg      return;
227848b8605Smrg   }
228848b8605Smrg
229848b8605Smrg   if (!_mesa_x86_has_cpuid()) {
230848b8605Smrg       _mesa_debug(NULL, "CPUID not detected\n");
231848b8605Smrg   }
232848b8605Smrg   else {
233848b8605Smrg       GLuint cpu_features, cpu_features_ecx;
234848b8605Smrg       GLuint cpu_ext_features;
235848b8605Smrg       GLuint cpu_ext_info;
236848b8605Smrg       char cpu_vendor[13];
237848b8605Smrg       GLuint result;
238848b8605Smrg
239848b8605Smrg       /* get vendor name */
240848b8605Smrg       _mesa_x86_cpuid(0, &result, (GLuint *)(cpu_vendor + 0), (GLuint *)(cpu_vendor + 8), (GLuint *)(cpu_vendor + 4));
241848b8605Smrg       cpu_vendor[12] = '\0';
242848b8605Smrg
243848b8605Smrg       if (detection_debug)
244848b8605Smrg	  _mesa_debug(NULL, "CPU vendor: %s\n", cpu_vendor);
245848b8605Smrg
246848b8605Smrg       /* get cpu features */
247848b8605Smrg       cpu_features = _mesa_x86_cpuid_edx(1);
248848b8605Smrg       cpu_features_ecx = _mesa_x86_cpuid_ecx(1);
249848b8605Smrg
250848b8605Smrg       if (cpu_features & X86_CPU_FPU)
251848b8605Smrg	   _mesa_x86_cpu_features |= X86_FEATURE_FPU;
252848b8605Smrg       if (cpu_features & X86_CPU_CMOV)
253848b8605Smrg	   _mesa_x86_cpu_features |= X86_FEATURE_CMOV;
254848b8605Smrg
255848b8605Smrg#ifdef USE_MMX_ASM
256848b8605Smrg       if (cpu_features & X86_CPU_MMX)
257848b8605Smrg	   _mesa_x86_cpu_features |= X86_FEATURE_MMX;
258848b8605Smrg#endif
259848b8605Smrg
260848b8605Smrg#ifdef USE_SSE_ASM
261848b8605Smrg       if (cpu_features & X86_CPU_XMM)
262848b8605Smrg	   _mesa_x86_cpu_features |= X86_FEATURE_XMM;
263848b8605Smrg       if (cpu_features & X86_CPU_XMM2)
264848b8605Smrg	   _mesa_x86_cpu_features |= X86_FEATURE_XMM2;
265848b8605Smrg       if (cpu_features_ecx & X86_CPU_SSE4_1)
266848b8605Smrg	   _mesa_x86_cpu_features |= X86_FEATURE_SSE4_1;
267848b8605Smrg#endif
268848b8605Smrg
269848b8605Smrg       /* query extended cpu features */
270848b8605Smrg       if ((cpu_ext_info = _mesa_x86_cpuid_eax(0x80000000)) > 0x80000000) {
271848b8605Smrg	   if (cpu_ext_info >= 0x80000001) {
272848b8605Smrg
273848b8605Smrg	       cpu_ext_features = _mesa_x86_cpuid_edx(0x80000001);
274848b8605Smrg
275848b8605Smrg	       if (cpu_features & X86_CPU_MMX) {
276848b8605Smrg
277848b8605Smrg#ifdef USE_3DNOW_ASM
278848b8605Smrg		   if (cpu_ext_features & X86_CPUEXT_3DNOW)
279848b8605Smrg		       _mesa_x86_cpu_features |= X86_FEATURE_3DNOW;
280848b8605Smrg		   if (cpu_ext_features & X86_CPUEXT_3DNOW_EXT)
281848b8605Smrg		       _mesa_x86_cpu_features |= X86_FEATURE_3DNOWEXT;
282848b8605Smrg#endif
283848b8605Smrg
284848b8605Smrg#ifdef USE_MMX_ASM
285848b8605Smrg		   if (cpu_ext_features & X86_CPUEXT_MMX_EXT)
286848b8605Smrg		       _mesa_x86_cpu_features |= X86_FEATURE_MMXEXT;
287848b8605Smrg#endif
288848b8605Smrg	       }
289848b8605Smrg	   }
290848b8605Smrg
291848b8605Smrg	   /* query cpu name */
292848b8605Smrg	   if (cpu_ext_info >= 0x80000002) {
293848b8605Smrg	       GLuint ofs;
294848b8605Smrg	       char cpu_name[49];
295848b8605Smrg	       for (ofs = 0; ofs < 3; ofs++)
296848b8605Smrg		   _mesa_x86_cpuid(0x80000002+ofs, (GLuint *)(cpu_name + (16*ofs)+0), (GLuint *)(cpu_name + (16*ofs)+4), (GLuint *)(cpu_name + (16*ofs)+8), (GLuint *)(cpu_name + (16*ofs)+12));
297848b8605Smrg	       cpu_name[48] = '\0'; /* the name should be NULL terminated, but just to be sure */
298848b8605Smrg
299848b8605Smrg	       if (detection_debug)
300848b8605Smrg		  _mesa_debug(NULL, "CPU name: %s\n", cpu_name);
301848b8605Smrg	   }
302848b8605Smrg       }
303848b8605Smrg
304848b8605Smrg   }
305848b8605Smrg
306848b8605Smrg#ifdef USE_MMX_ASM
307848b8605Smrg   if ( cpu_has_mmx ) {
308b8e80941Smrg      if ( getenv( "MESA_NO_MMX" ) == 0 ) {
309848b8605Smrg	 if (detection_debug)
310848b8605Smrg	    _mesa_debug(NULL, "MMX cpu detected.\n");
311848b8605Smrg      } else {
312848b8605Smrg         _mesa_x86_cpu_features &= ~(X86_FEATURE_MMX);
313848b8605Smrg      }
314848b8605Smrg   }
315848b8605Smrg#endif
316848b8605Smrg
317848b8605Smrg#ifdef USE_3DNOW_ASM
318848b8605Smrg   if ( cpu_has_3dnow ) {
319b8e80941Smrg      if ( getenv( "MESA_NO_3DNOW" ) == 0 ) {
320848b8605Smrg	 if (detection_debug)
321848b8605Smrg	    _mesa_debug(NULL, "3DNow! cpu detected.\n");
322848b8605Smrg      } else {
323848b8605Smrg         _mesa_x86_cpu_features &= ~(X86_FEATURE_3DNOW);
324848b8605Smrg      }
325848b8605Smrg   }
326848b8605Smrg#endif
327848b8605Smrg
328848b8605Smrg#ifdef USE_SSE_ASM
329848b8605Smrg   if ( cpu_has_xmm ) {
330b8e80941Smrg      if ( getenv( "MESA_NO_SSE" ) == 0 ) {
331848b8605Smrg	 if (detection_debug)
332848b8605Smrg	    _mesa_debug(NULL, "SSE cpu detected.\n");
333b8e80941Smrg         if ( getenv( "MESA_FORCE_SSE" ) == 0 ) {
334848b8605Smrg            _mesa_check_os_sse_support();
335848b8605Smrg         }
336848b8605Smrg      } else {
337848b8605Smrg         _mesa_debug(NULL, "SSE cpu detected, but switched off by user.\n");
338848b8605Smrg         _mesa_x86_cpu_features &= ~(X86_FEATURE_XMM);
339848b8605Smrg      }
340848b8605Smrg   }
341848b8605Smrg#endif
342848b8605Smrg
343848b8605Smrg#elif defined(USE_X86_64_ASM)
344848b8605Smrg   {
345b8e80941Smrg      unsigned int eax, ebx, ecx, edx;
346848b8605Smrg
347848b8605Smrg      /* Always available on x86-64. */
348848b8605Smrg      _mesa_x86_cpu_features |= X86_FEATURE_XMM | X86_FEATURE_XMM2;
349848b8605Smrg
350b8e80941Smrg      if (!__get_cpuid(1, &eax, &ebx, &ecx, &edx))
351b8e80941Smrg         return;
352848b8605Smrg
353848b8605Smrg      if (ecx & bit_SSE4_1)
354848b8605Smrg         _mesa_x86_cpu_features |= X86_FEATURE_SSE4_1;
355848b8605Smrg   }
356848b8605Smrg#endif /* USE_X86_64_ASM */
357848b8605Smrg
358848b8605Smrg   (void) detection_debug;
359848b8605Smrg}
360