1b8e80941Smrg/************************************************************************** 2b8e80941Smrg * 3b8e80941Smrg * Copyright 2008 Dennis Smit 4b8e80941Smrg * All Rights Reserved. 5b8e80941Smrg * 6b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a 7b8e80941Smrg * copy of this software and associated documentation files (the "Software"), 8b8e80941Smrg * to deal in the Software without restriction, including without limitation 9b8e80941Smrg * on the rights to use, copy, modify, merge, publish, distribute, sub 10b8e80941Smrg * license, and/or sell copies of the Software, and to permit persons to whom 11b8e80941Smrg * the Software is furnished to do so, subject to the following conditions: 12b8e80941Smrg * 13b8e80941Smrg * The above copyright notice and this permission notice (including the next 14b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the 15b8e80941Smrg * Software. 16b8e80941Smrg * 17b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 20b8e80941Smrg * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 21b8e80941Smrg * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 22b8e80941Smrg * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 23b8e80941Smrg * USE OR OTHER DEALINGS IN THE SOFTWARE. 24b8e80941Smrg * 25b8e80941Smrg **************************************************************************/ 26b8e80941Smrg 27b8e80941Smrg/** 28b8e80941Smrg * @file 29b8e80941Smrg * CPU feature detection. 30b8e80941Smrg * 31b8e80941Smrg * @author Dennis Smit 32b8e80941Smrg * @author Based on the work of Eric Anholt <anholt@FreeBSD.org> 33b8e80941Smrg */ 34b8e80941Smrg 35b8e80941Smrg#include "pipe/p_config.h" 36b8e80941Smrg 37b8e80941Smrg#include "util/u_debug.h" 38b8e80941Smrg#include "u_cpu_detect.h" 39b8e80941Smrg#include "c11/threads.h" 40b8e80941Smrg 41b8e80941Smrg#if defined(PIPE_ARCH_PPC) 42b8e80941Smrg#if defined(PIPE_OS_APPLE) 43b8e80941Smrg#include <sys/sysctl.h> 44b8e80941Smrg#else 45b8e80941Smrg#include <signal.h> 46b8e80941Smrg#include <setjmp.h> 47b8e80941Smrg#endif 48b8e80941Smrg#endif 49b8e80941Smrg 50b8e80941Smrg#if defined(PIPE_OS_NETBSD) || defined(PIPE_OS_OPENBSD) 51b8e80941Smrg#include <sys/param.h> 52b8e80941Smrg#include <sys/sysctl.h> 53b8e80941Smrg#include <machine/cpu.h> 54b8e80941Smrg#endif 55b8e80941Smrg 56b8e80941Smrg#if defined(PIPE_OS_FREEBSD) || defined(PIPE_OS_DRAGONFLY) 57b8e80941Smrg#include <sys/types.h> 58b8e80941Smrg#include <sys/sysctl.h> 59b8e80941Smrg#endif 60b8e80941Smrg 61b8e80941Smrg#if defined(PIPE_OS_LINUX) 62b8e80941Smrg#include <signal.h> 63b8e80941Smrg#include <fcntl.h> 64b8e80941Smrg#include <elf.h> 65b8e80941Smrg#endif 66b8e80941Smrg 67b8e80941Smrg#ifdef PIPE_OS_UNIX 68b8e80941Smrg#include <unistd.h> 69b8e80941Smrg#endif 70b8e80941Smrg 71b8e80941Smrg#if defined(HAS_ANDROID_CPUFEATURES) 72b8e80941Smrg#include <cpu-features.h> 73b8e80941Smrg#endif 74b8e80941Smrg 75b8e80941Smrg#if defined(PIPE_OS_WINDOWS) 76b8e80941Smrg#include <windows.h> 77b8e80941Smrg#if defined(PIPE_CC_MSVC) 78b8e80941Smrg#include <intrin.h> 79b8e80941Smrg#endif 80b8e80941Smrg#endif 81b8e80941Smrg 82b8e80941Smrg 83b8e80941Smrg#ifdef DEBUG 84b8e80941SmrgDEBUG_GET_ONCE_BOOL_OPTION(dump_cpu, "GALLIUM_DUMP_CPU", FALSE) 85b8e80941Smrg#endif 86b8e80941Smrg 87b8e80941Smrg 88b8e80941Smrgstruct util_cpu_caps util_cpu_caps; 89b8e80941Smrg 90b8e80941Smrg#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) 91b8e80941Smrgstatic int has_cpuid(void); 92b8e80941Smrg#endif 93b8e80941Smrg 94b8e80941Smrg 95b8e80941Smrg#if defined(PIPE_ARCH_PPC) && !defined(PIPE_OS_APPLE) 96b8e80941Smrgstatic jmp_buf __lv_powerpc_jmpbuf; 97b8e80941Smrgstatic volatile sig_atomic_t __lv_powerpc_canjump = 0; 98b8e80941Smrg 99b8e80941Smrgstatic void 100b8e80941Smrgsigill_handler(int sig) 101b8e80941Smrg{ 102b8e80941Smrg if (!__lv_powerpc_canjump) { 103b8e80941Smrg signal (sig, SIG_DFL); 104b8e80941Smrg raise (sig); 105b8e80941Smrg } 106b8e80941Smrg 107b8e80941Smrg __lv_powerpc_canjump = 0; 108b8e80941Smrg longjmp(__lv_powerpc_jmpbuf, 1); 109b8e80941Smrg} 110b8e80941Smrg#endif 111b8e80941Smrg 112b8e80941Smrg#if defined(PIPE_ARCH_PPC) 113b8e80941Smrgstatic void 114b8e80941Smrgcheck_os_altivec_support(void) 115b8e80941Smrg{ 116b8e80941Smrg#if defined(PIPE_OS_APPLE) 117b8e80941Smrg int sels[2] = {CTL_HW, HW_VECTORUNIT}; 118b8e80941Smrg int has_vu = 0; 119b8e80941Smrg int len = sizeof (has_vu); 120b8e80941Smrg int err; 121b8e80941Smrg 122b8e80941Smrg err = sysctl(sels, 2, &has_vu, &len, NULL, 0); 123b8e80941Smrg 124b8e80941Smrg if (err == 0) { 125b8e80941Smrg if (has_vu != 0) { 126b8e80941Smrg util_cpu_caps.has_altivec = 1; 127b8e80941Smrg } 128b8e80941Smrg } 129b8e80941Smrg#else /* !PIPE_OS_APPLE */ 130b8e80941Smrg /* not on Apple/Darwin, do it the brute-force way */ 131b8e80941Smrg /* this is borrowed from the libmpeg2 library */ 132b8e80941Smrg signal(SIGILL, sigill_handler); 133b8e80941Smrg if (setjmp(__lv_powerpc_jmpbuf)) { 134b8e80941Smrg signal(SIGILL, SIG_DFL); 135b8e80941Smrg } else { 136b8e80941Smrg boolean enable_altivec = TRUE; /* Default: enable if available, and if not overridden */ 137b8e80941Smrg boolean enable_vsx = TRUE; 138b8e80941Smrg#ifdef DEBUG 139b8e80941Smrg /* Disabling Altivec code generation is not the same as disabling VSX code generation, 140b8e80941Smrg * which can be done simply by passing -mattr=-vsx to the LLVM compiler; cf. 141b8e80941Smrg * lp_build_create_jit_compiler_for_module(). 142b8e80941Smrg * If you want to disable Altivec code generation, the best place to do it is here. 143b8e80941Smrg */ 144b8e80941Smrg char *env_control = getenv("GALLIVM_ALTIVEC"); /* 1=enable (default); 0=disable */ 145b8e80941Smrg if (env_control && env_control[0] == '0') { 146b8e80941Smrg enable_altivec = FALSE; 147b8e80941Smrg } 148b8e80941Smrg#endif 149b8e80941Smrg /* VSX instructions can be explicitly enabled/disabled via GALLIVM_VSX=1 or 0 */ 150b8e80941Smrg char *env_vsx = getenv("GALLIVM_VSX"); 151b8e80941Smrg if (env_vsx && env_vsx[0] == '0') { 152b8e80941Smrg enable_vsx = FALSE; 153b8e80941Smrg } 154b8e80941Smrg if (enable_altivec) { 155b8e80941Smrg __lv_powerpc_canjump = 1; 156b8e80941Smrg 157b8e80941Smrg __asm __volatile 158b8e80941Smrg ("mtspr 256, %0\n\t" 159b8e80941Smrg "vand %%v0, %%v0, %%v0" 160b8e80941Smrg : 161b8e80941Smrg : "r" (-1)); 162b8e80941Smrg 163b8e80941Smrg util_cpu_caps.has_altivec = 1; 164b8e80941Smrg 165b8e80941Smrg if (enable_vsx) { 166b8e80941Smrg __asm __volatile("xxland %vs0, %vs0, %vs0"); 167b8e80941Smrg util_cpu_caps.has_vsx = 1; 168b8e80941Smrg } 169b8e80941Smrg signal(SIGILL, SIG_DFL); 170b8e80941Smrg } else { 171b8e80941Smrg util_cpu_caps.has_altivec = 0; 172b8e80941Smrg } 173b8e80941Smrg } 174b8e80941Smrg#endif /* !PIPE_OS_APPLE */ 175b8e80941Smrg} 176b8e80941Smrg#endif /* PIPE_ARCH_PPC */ 177b8e80941Smrg 178b8e80941Smrg 179b8e80941Smrg#if defined(PIPE_ARCH_X86) || defined (PIPE_ARCH_X86_64) 180b8e80941Smrgstatic int has_cpuid(void) 181b8e80941Smrg{ 182b8e80941Smrg#if defined(PIPE_ARCH_X86) 183b8e80941Smrg#if defined(PIPE_OS_GCC) 184b8e80941Smrg int a, c; 185b8e80941Smrg 186b8e80941Smrg __asm __volatile 187b8e80941Smrg ("pushf\n" 188b8e80941Smrg "popl %0\n" 189b8e80941Smrg "movl %0, %1\n" 190b8e80941Smrg "xorl $0x200000, %0\n" 191b8e80941Smrg "push %0\n" 192b8e80941Smrg "popf\n" 193b8e80941Smrg "pushf\n" 194b8e80941Smrg "popl %0\n" 195b8e80941Smrg : "=a" (a), "=c" (c) 196b8e80941Smrg : 197b8e80941Smrg : "cc"); 198b8e80941Smrg 199b8e80941Smrg return a != c; 200b8e80941Smrg#else 201b8e80941Smrg /* FIXME */ 202b8e80941Smrg return 1; 203b8e80941Smrg#endif 204b8e80941Smrg#elif defined(PIPE_ARCH_X86_64) 205b8e80941Smrg return 1; 206b8e80941Smrg#else 207b8e80941Smrg return 0; 208b8e80941Smrg#endif 209b8e80941Smrg} 210b8e80941Smrg 211b8e80941Smrg 212b8e80941Smrg/** 213b8e80941Smrg * @sa cpuid.h included in gcc-4.3 onwards. 214b8e80941Smrg * @sa http://msdn.microsoft.com/en-us/library/hskdteyh.aspx 215b8e80941Smrg */ 216b8e80941Smrgstatic inline void 217b8e80941Smrgcpuid(uint32_t ax, uint32_t *p) 218b8e80941Smrg{ 219b8e80941Smrg#if defined(PIPE_CC_GCC) && defined(PIPE_ARCH_X86) 220b8e80941Smrg __asm __volatile ( 221b8e80941Smrg "xchgl %%ebx, %1\n\t" 222b8e80941Smrg "cpuid\n\t" 223b8e80941Smrg "xchgl %%ebx, %1" 224b8e80941Smrg : "=a" (p[0]), 225b8e80941Smrg "=S" (p[1]), 226b8e80941Smrg "=c" (p[2]), 227b8e80941Smrg "=d" (p[3]) 228b8e80941Smrg : "0" (ax) 229b8e80941Smrg ); 230b8e80941Smrg#elif defined(PIPE_CC_GCC) && defined(PIPE_ARCH_X86_64) 231b8e80941Smrg __asm __volatile ( 232b8e80941Smrg "cpuid\n\t" 233b8e80941Smrg : "=a" (p[0]), 234b8e80941Smrg "=b" (p[1]), 235b8e80941Smrg "=c" (p[2]), 236b8e80941Smrg "=d" (p[3]) 237b8e80941Smrg : "0" (ax) 238b8e80941Smrg ); 239b8e80941Smrg#elif defined(PIPE_CC_MSVC) 240b8e80941Smrg __cpuid(p, ax); 241b8e80941Smrg#else 242b8e80941Smrg p[0] = 0; 243b8e80941Smrg p[1] = 0; 244b8e80941Smrg p[2] = 0; 245b8e80941Smrg p[3] = 0; 246b8e80941Smrg#endif 247b8e80941Smrg} 248b8e80941Smrg 249b8e80941Smrg/** 250b8e80941Smrg * @sa cpuid.h included in gcc-4.4 onwards. 251b8e80941Smrg * @sa http://msdn.microsoft.com/en-us/library/hskdteyh%28v=vs.90%29.aspx 252b8e80941Smrg */ 253b8e80941Smrgstatic inline void 254b8e80941Smrgcpuid_count(uint32_t ax, uint32_t cx, uint32_t *p) 255b8e80941Smrg{ 256b8e80941Smrg#if defined(PIPE_CC_GCC) && defined(PIPE_ARCH_X86) 257b8e80941Smrg __asm __volatile ( 258b8e80941Smrg "xchgl %%ebx, %1\n\t" 259b8e80941Smrg "cpuid\n\t" 260b8e80941Smrg "xchgl %%ebx, %1" 261b8e80941Smrg : "=a" (p[0]), 262b8e80941Smrg "=S" (p[1]), 263b8e80941Smrg "=c" (p[2]), 264b8e80941Smrg "=d" (p[3]) 265b8e80941Smrg : "0" (ax), "2" (cx) 266b8e80941Smrg ); 267b8e80941Smrg#elif defined(PIPE_CC_GCC) && defined(PIPE_ARCH_X86_64) 268b8e80941Smrg __asm __volatile ( 269b8e80941Smrg "cpuid\n\t" 270b8e80941Smrg : "=a" (p[0]), 271b8e80941Smrg "=b" (p[1]), 272b8e80941Smrg "=c" (p[2]), 273b8e80941Smrg "=d" (p[3]) 274b8e80941Smrg : "0" (ax), "2" (cx) 275b8e80941Smrg ); 276b8e80941Smrg#elif defined(PIPE_CC_MSVC) 277b8e80941Smrg __cpuidex(p, ax, cx); 278b8e80941Smrg#else 279b8e80941Smrg p[0] = 0; 280b8e80941Smrg p[1] = 0; 281b8e80941Smrg p[2] = 0; 282b8e80941Smrg p[3] = 0; 283b8e80941Smrg#endif 284b8e80941Smrg} 285b8e80941Smrg 286b8e80941Smrg 287b8e80941Smrgstatic inline uint64_t xgetbv(void) 288b8e80941Smrg{ 289b8e80941Smrg#if defined(PIPE_CC_GCC) 290b8e80941Smrg uint32_t eax, edx; 291b8e80941Smrg 292b8e80941Smrg __asm __volatile ( 293b8e80941Smrg ".byte 0x0f, 0x01, 0xd0" // xgetbv isn't supported on gcc < 4.4 294b8e80941Smrg : "=a"(eax), 295b8e80941Smrg "=d"(edx) 296b8e80941Smrg : "c"(0) 297b8e80941Smrg ); 298b8e80941Smrg 299b8e80941Smrg return ((uint64_t)edx << 32) | eax; 300b8e80941Smrg#elif defined(PIPE_CC_MSVC) && defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK) 301b8e80941Smrg return _xgetbv(_XCR_XFEATURE_ENABLED_MASK); 302b8e80941Smrg#else 303b8e80941Smrg return 0; 304b8e80941Smrg#endif 305b8e80941Smrg} 306b8e80941Smrg 307b8e80941Smrg 308b8e80941Smrg#if defined(PIPE_ARCH_X86) 309b8e80941SmrgPIPE_ALIGN_STACK static inline boolean sse2_has_daz(void) 310b8e80941Smrg{ 311b8e80941Smrg struct { 312b8e80941Smrg uint32_t pad1[7]; 313b8e80941Smrg uint32_t mxcsr_mask; 314b8e80941Smrg uint32_t pad2[128-8]; 315b8e80941Smrg } PIPE_ALIGN_VAR(16) fxarea; 316b8e80941Smrg 317b8e80941Smrg fxarea.mxcsr_mask = 0; 318b8e80941Smrg#if defined(PIPE_CC_GCC) 319b8e80941Smrg __asm __volatile ("fxsave %0" : "+m" (fxarea)); 320b8e80941Smrg#elif defined(PIPE_CC_MSVC) || defined(PIPE_CC_ICL) 321b8e80941Smrg _fxsave(&fxarea); 322b8e80941Smrg#else 323b8e80941Smrg fxarea.mxcsr_mask = 0; 324b8e80941Smrg#endif 325b8e80941Smrg return !!(fxarea.mxcsr_mask & (1 << 6)); 326b8e80941Smrg} 327b8e80941Smrg#endif 328b8e80941Smrg 329b8e80941Smrg#endif /* X86 or X86_64 */ 330b8e80941Smrg 331b8e80941Smrg#if defined(PIPE_ARCH_ARM) 332b8e80941Smrgstatic void 333b8e80941Smrgcheck_os_arm_support(void) 334b8e80941Smrg{ 335b8e80941Smrg /* 336b8e80941Smrg * On Android, the cpufeatures library is preferred way of checking 337b8e80941Smrg * CPU capabilities. However, it is not available for standalone Mesa 338b8e80941Smrg * builds, i.e. when Android build system (Android.mk-based) is not 339b8e80941Smrg * used. Because of this we cannot use PIPE_OS_ANDROID here, but rather 340b8e80941Smrg * have a separate macro that only gets enabled from respective Android.mk. 341b8e80941Smrg */ 342b8e80941Smrg#if defined(HAS_ANDROID_CPUFEATURES) 343b8e80941Smrg AndroidCpuFamily cpu_family = android_getCpuFamily(); 344b8e80941Smrg uint64_t cpu_features = android_getCpuFeatures(); 345b8e80941Smrg 346b8e80941Smrg if (cpu_family == ANDROID_CPU_FAMILY_ARM) { 347b8e80941Smrg if (cpu_features & ANDROID_CPU_ARM_FEATURE_NEON) 348b8e80941Smrg util_cpu_caps.has_neon = 1; 349b8e80941Smrg } 350b8e80941Smrg#elif defined(PIPE_OS_LINUX) 351b8e80941Smrg Elf32_auxv_t aux; 352b8e80941Smrg int fd; 353b8e80941Smrg 354b8e80941Smrg fd = open("/proc/self/auxv", O_RDONLY | O_CLOEXEC); 355b8e80941Smrg if (fd >= 0) { 356b8e80941Smrg while (read(fd, &aux, sizeof(Elf32_auxv_t)) == sizeof(Elf32_auxv_t)) { 357b8e80941Smrg if (aux.a_type == AT_HWCAP) { 358b8e80941Smrg uint32_t hwcap = aux.a_un.a_val; 359b8e80941Smrg 360b8e80941Smrg util_cpu_caps.has_neon = (hwcap >> 12) & 1; 361b8e80941Smrg break; 362b8e80941Smrg } 363b8e80941Smrg } 364b8e80941Smrg close (fd); 365b8e80941Smrg } 366b8e80941Smrg#endif /* PIPE_OS_LINUX */ 367b8e80941Smrg} 368b8e80941Smrg 369b8e80941Smrg#elif defined(PIPE_ARCH_AARCH64) 370b8e80941Smrgstatic void 371b8e80941Smrgcheck_os_arm_support(void) 372b8e80941Smrg{ 373b8e80941Smrg util_cpu_caps.has_neon = true; 374b8e80941Smrg} 375b8e80941Smrg#endif /* PIPE_ARCH_ARM || PIPE_ARCH_AARCH64 */ 376b8e80941Smrg 377b8e80941Smrgstatic void 378b8e80941Smrgget_cpu_topology(void) 379b8e80941Smrg{ 380b8e80941Smrg /* Default. This is correct if L3 is not present or there is only one. */ 381b8e80941Smrg util_cpu_caps.cores_per_L3 = util_cpu_caps.nr_cpus; 382b8e80941Smrg 383b8e80941Smrg#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) 384b8e80941Smrg /* AMD Zen */ 385b8e80941Smrg if (util_cpu_caps.x86_cpu_type == 0x17) { 386b8e80941Smrg uint32_t regs[4]; 387b8e80941Smrg 388b8e80941Smrg /* Query the L3 cache topology information. */ 389b8e80941Smrg cpuid_count(0x8000001D, 3, regs); 390b8e80941Smrg unsigned cache_level = (regs[0] >> 5) & 0x7; 391b8e80941Smrg unsigned cores_per_cache = ((regs[0] >> 14) & 0xfff) + 1; 392b8e80941Smrg 393b8e80941Smrg if (cache_level == 3) 394b8e80941Smrg util_cpu_caps.cores_per_L3 = cores_per_cache; 395b8e80941Smrg } 396b8e80941Smrg#endif 397b8e80941Smrg} 398b8e80941Smrg 399b8e80941Smrgstatic void 400b8e80941Smrgutil_cpu_detect_once(void) 401b8e80941Smrg{ 402b8e80941Smrg memset(&util_cpu_caps, 0, sizeof util_cpu_caps); 403b8e80941Smrg 404b8e80941Smrg /* Count the number of CPUs in system */ 405b8e80941Smrg#if defined(PIPE_OS_WINDOWS) 406b8e80941Smrg { 407b8e80941Smrg SYSTEM_INFO system_info; 408b8e80941Smrg GetSystemInfo(&system_info); 409b8e80941Smrg util_cpu_caps.nr_cpus = system_info.dwNumberOfProcessors; 410b8e80941Smrg } 411b8e80941Smrg#elif defined(PIPE_OS_UNIX) && defined(_SC_NPROCESSORS_ONLN) 412b8e80941Smrg util_cpu_caps.nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); 413b8e80941Smrg if (util_cpu_caps.nr_cpus == ~0) 414b8e80941Smrg util_cpu_caps.nr_cpus = 1; 415b8e80941Smrg#elif defined(PIPE_OS_BSD) 416b8e80941Smrg { 417b8e80941Smrg int mib[2], ncpu; 418b8e80941Smrg int len; 419b8e80941Smrg 420b8e80941Smrg mib[0] = CTL_HW; 421b8e80941Smrg mib[1] = HW_NCPU; 422b8e80941Smrg 423b8e80941Smrg len = sizeof (ncpu); 424b8e80941Smrg sysctl(mib, 2, &ncpu, &len, NULL, 0); 425b8e80941Smrg util_cpu_caps.nr_cpus = ncpu; 426b8e80941Smrg } 427b8e80941Smrg#else 428b8e80941Smrg util_cpu_caps.nr_cpus = 1; 429b8e80941Smrg#endif 430b8e80941Smrg 431b8e80941Smrg /* Make the fallback cacheline size nonzero so that it can be 432b8e80941Smrg * safely passed to align(). 433b8e80941Smrg */ 434b8e80941Smrg util_cpu_caps.cacheline = sizeof(void *); 435b8e80941Smrg 436b8e80941Smrg#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) 437b8e80941Smrg if (has_cpuid()) { 438b8e80941Smrg uint32_t regs[4]; 439b8e80941Smrg uint32_t regs2[4]; 440b8e80941Smrg 441b8e80941Smrg util_cpu_caps.cacheline = 32; 442b8e80941Smrg 443b8e80941Smrg /* Get max cpuid level */ 444b8e80941Smrg cpuid(0x00000000, regs); 445b8e80941Smrg 446b8e80941Smrg if (regs[0] >= 0x00000001) { 447b8e80941Smrg unsigned int cacheline; 448b8e80941Smrg 449b8e80941Smrg cpuid (0x00000001, regs2); 450b8e80941Smrg 451b8e80941Smrg util_cpu_caps.x86_cpu_type = (regs2[0] >> 8) & 0xf; 452b8e80941Smrg /* Add "extended family". */ 453b8e80941Smrg if (util_cpu_caps.x86_cpu_type == 0xf) 454b8e80941Smrg util_cpu_caps.x86_cpu_type += ((regs2[0] >> 20) & 0xff); 455b8e80941Smrg 456b8e80941Smrg /* general feature flags */ 457b8e80941Smrg util_cpu_caps.has_tsc = (regs2[3] >> 4) & 1; /* 0x0000010 */ 458b8e80941Smrg util_cpu_caps.has_mmx = (regs2[3] >> 23) & 1; /* 0x0800000 */ 459b8e80941Smrg util_cpu_caps.has_sse = (regs2[3] >> 25) & 1; /* 0x2000000 */ 460b8e80941Smrg util_cpu_caps.has_sse2 = (regs2[3] >> 26) & 1; /* 0x4000000 */ 461b8e80941Smrg util_cpu_caps.has_sse3 = (regs2[2] >> 0) & 1; /* 0x0000001 */ 462b8e80941Smrg util_cpu_caps.has_ssse3 = (regs2[2] >> 9) & 1; /* 0x0000020 */ 463b8e80941Smrg util_cpu_caps.has_sse4_1 = (regs2[2] >> 19) & 1; 464b8e80941Smrg util_cpu_caps.has_sse4_2 = (regs2[2] >> 20) & 1; 465b8e80941Smrg util_cpu_caps.has_popcnt = (regs2[2] >> 23) & 1; 466b8e80941Smrg util_cpu_caps.has_avx = ((regs2[2] >> 28) & 1) && // AVX 467b8e80941Smrg ((regs2[2] >> 27) & 1) && // OSXSAVE 468b8e80941Smrg ((xgetbv() & 6) == 6); // XMM & YMM 469b8e80941Smrg util_cpu_caps.has_f16c = ((regs2[2] >> 29) & 1) && util_cpu_caps.has_avx; 470b8e80941Smrg util_cpu_caps.has_fma = ((regs2[2] >> 12) & 1) && util_cpu_caps.has_avx; 471b8e80941Smrg util_cpu_caps.has_mmx2 = util_cpu_caps.has_sse; /* SSE cpus supports mmxext too */ 472b8e80941Smrg#if defined(PIPE_ARCH_X86_64) 473b8e80941Smrg util_cpu_caps.has_daz = 1; 474b8e80941Smrg#else 475b8e80941Smrg util_cpu_caps.has_daz = util_cpu_caps.has_sse3 || 476b8e80941Smrg (util_cpu_caps.has_sse2 && sse2_has_daz()); 477b8e80941Smrg#endif 478b8e80941Smrg 479b8e80941Smrg cacheline = ((regs2[1] >> 8) & 0xFF) * 8; 480b8e80941Smrg if (cacheline > 0) 481b8e80941Smrg util_cpu_caps.cacheline = cacheline; 482b8e80941Smrg } 483b8e80941Smrg if (util_cpu_caps.has_avx && regs[0] >= 0x00000007) { 484b8e80941Smrg uint32_t regs7[4]; 485b8e80941Smrg cpuid_count(0x00000007, 0x00000000, regs7); 486b8e80941Smrg util_cpu_caps.has_avx2 = (regs7[1] >> 5) & 1; 487b8e80941Smrg } 488b8e80941Smrg 489b8e80941Smrg // check for avx512 490b8e80941Smrg if (((regs2[2] >> 27) & 1) && // OSXSAVE 491b8e80941Smrg (xgetbv() & (0x7 << 5)) && // OPMASK: upper-256 enabled by OS 492b8e80941Smrg ((xgetbv() & 6) == 6)) { // XMM/YMM enabled by OS 493b8e80941Smrg uint32_t regs3[4]; 494b8e80941Smrg cpuid_count(0x00000007, 0x00000000, regs3); 495b8e80941Smrg util_cpu_caps.has_avx512f = (regs3[1] >> 16) & 1; 496b8e80941Smrg util_cpu_caps.has_avx512dq = (regs3[1] >> 17) & 1; 497b8e80941Smrg util_cpu_caps.has_avx512ifma = (regs3[1] >> 21) & 1; 498b8e80941Smrg util_cpu_caps.has_avx512pf = (regs3[1] >> 26) & 1; 499b8e80941Smrg util_cpu_caps.has_avx512er = (regs3[1] >> 27) & 1; 500b8e80941Smrg util_cpu_caps.has_avx512cd = (regs3[1] >> 28) & 1; 501b8e80941Smrg util_cpu_caps.has_avx512bw = (regs3[1] >> 30) & 1; 502b8e80941Smrg util_cpu_caps.has_avx512vl = (regs3[1] >> 31) & 1; 503b8e80941Smrg util_cpu_caps.has_avx512vbmi = (regs3[2] >> 1) & 1; 504b8e80941Smrg } 505b8e80941Smrg 506b8e80941Smrg if (regs[1] == 0x756e6547 && regs[2] == 0x6c65746e && regs[3] == 0x49656e69) { 507b8e80941Smrg /* GenuineIntel */ 508b8e80941Smrg util_cpu_caps.has_intel = 1; 509b8e80941Smrg } 510b8e80941Smrg 511b8e80941Smrg cpuid(0x80000000, regs); 512b8e80941Smrg 513b8e80941Smrg if (regs[0] >= 0x80000001) { 514b8e80941Smrg 515b8e80941Smrg cpuid(0x80000001, regs2); 516b8e80941Smrg 517b8e80941Smrg util_cpu_caps.has_mmx |= (regs2[3] >> 23) & 1; 518b8e80941Smrg util_cpu_caps.has_mmx2 |= (regs2[3] >> 22) & 1; 519b8e80941Smrg util_cpu_caps.has_3dnow = (regs2[3] >> 31) & 1; 520b8e80941Smrg util_cpu_caps.has_3dnow_ext = (regs2[3] >> 30) & 1; 521b8e80941Smrg 522b8e80941Smrg util_cpu_caps.has_xop = util_cpu_caps.has_avx && 523b8e80941Smrg ((regs2[2] >> 11) & 1); 524b8e80941Smrg } 525b8e80941Smrg 526b8e80941Smrg if (regs[0] >= 0x80000006) { 527b8e80941Smrg /* should we really do this if the clflush size above worked? */ 528b8e80941Smrg unsigned int cacheline; 529b8e80941Smrg cpuid(0x80000006, regs2); 530b8e80941Smrg cacheline = regs2[2] & 0xFF; 531b8e80941Smrg if (cacheline > 0) 532b8e80941Smrg util_cpu_caps.cacheline = cacheline; 533b8e80941Smrg } 534b8e80941Smrg 535b8e80941Smrg if (!util_cpu_caps.has_sse) { 536b8e80941Smrg util_cpu_caps.has_sse2 = 0; 537b8e80941Smrg util_cpu_caps.has_sse3 = 0; 538b8e80941Smrg util_cpu_caps.has_ssse3 = 0; 539b8e80941Smrg util_cpu_caps.has_sse4_1 = 0; 540b8e80941Smrg } 541b8e80941Smrg } 542b8e80941Smrg#endif /* PIPE_ARCH_X86 || PIPE_ARCH_X86_64 */ 543b8e80941Smrg 544b8e80941Smrg#if defined(PIPE_ARCH_ARM) || defined(PIPE_ARCH_AARCH64) 545b8e80941Smrg check_os_arm_support(); 546b8e80941Smrg#endif 547b8e80941Smrg 548b8e80941Smrg#if defined(PIPE_ARCH_PPC) 549b8e80941Smrg check_os_altivec_support(); 550b8e80941Smrg#endif /* PIPE_ARCH_PPC */ 551b8e80941Smrg 552b8e80941Smrg get_cpu_topology(); 553b8e80941Smrg 554b8e80941Smrg#ifdef DEBUG 555b8e80941Smrg if (debug_get_option_dump_cpu()) { 556b8e80941Smrg debug_printf("util_cpu_caps.nr_cpus = %u\n", util_cpu_caps.nr_cpus); 557b8e80941Smrg 558b8e80941Smrg debug_printf("util_cpu_caps.x86_cpu_type = %u\n", util_cpu_caps.x86_cpu_type); 559b8e80941Smrg debug_printf("util_cpu_caps.cacheline = %u\n", util_cpu_caps.cacheline); 560b8e80941Smrg 561b8e80941Smrg debug_printf("util_cpu_caps.has_tsc = %u\n", util_cpu_caps.has_tsc); 562b8e80941Smrg debug_printf("util_cpu_caps.has_mmx = %u\n", util_cpu_caps.has_mmx); 563b8e80941Smrg debug_printf("util_cpu_caps.has_mmx2 = %u\n", util_cpu_caps.has_mmx2); 564b8e80941Smrg debug_printf("util_cpu_caps.has_sse = %u\n", util_cpu_caps.has_sse); 565b8e80941Smrg debug_printf("util_cpu_caps.has_sse2 = %u\n", util_cpu_caps.has_sse2); 566b8e80941Smrg debug_printf("util_cpu_caps.has_sse3 = %u\n", util_cpu_caps.has_sse3); 567b8e80941Smrg debug_printf("util_cpu_caps.has_ssse3 = %u\n", util_cpu_caps.has_ssse3); 568b8e80941Smrg debug_printf("util_cpu_caps.has_sse4_1 = %u\n", util_cpu_caps.has_sse4_1); 569b8e80941Smrg debug_printf("util_cpu_caps.has_sse4_2 = %u\n", util_cpu_caps.has_sse4_2); 570b8e80941Smrg debug_printf("util_cpu_caps.has_avx = %u\n", util_cpu_caps.has_avx); 571b8e80941Smrg debug_printf("util_cpu_caps.has_avx2 = %u\n", util_cpu_caps.has_avx2); 572b8e80941Smrg debug_printf("util_cpu_caps.has_f16c = %u\n", util_cpu_caps.has_f16c); 573b8e80941Smrg debug_printf("util_cpu_caps.has_popcnt = %u\n", util_cpu_caps.has_popcnt); 574b8e80941Smrg debug_printf("util_cpu_caps.has_3dnow = %u\n", util_cpu_caps.has_3dnow); 575b8e80941Smrg debug_printf("util_cpu_caps.has_3dnow_ext = %u\n", util_cpu_caps.has_3dnow_ext); 576b8e80941Smrg debug_printf("util_cpu_caps.has_xop = %u\n", util_cpu_caps.has_xop); 577b8e80941Smrg debug_printf("util_cpu_caps.has_altivec = %u\n", util_cpu_caps.has_altivec); 578b8e80941Smrg debug_printf("util_cpu_caps.has_vsx = %u\n", util_cpu_caps.has_vsx); 579b8e80941Smrg debug_printf("util_cpu_caps.has_neon = %u\n", util_cpu_caps.has_neon); 580b8e80941Smrg debug_printf("util_cpu_caps.has_daz = %u\n", util_cpu_caps.has_daz); 581b8e80941Smrg debug_printf("util_cpu_caps.has_avx512f = %u\n", util_cpu_caps.has_avx512f); 582b8e80941Smrg debug_printf("util_cpu_caps.has_avx512dq = %u\n", util_cpu_caps.has_avx512dq); 583b8e80941Smrg debug_printf("util_cpu_caps.has_avx512ifma = %u\n", util_cpu_caps.has_avx512ifma); 584b8e80941Smrg debug_printf("util_cpu_caps.has_avx512pf = %u\n", util_cpu_caps.has_avx512pf); 585b8e80941Smrg debug_printf("util_cpu_caps.has_avx512er = %u\n", util_cpu_caps.has_avx512er); 586b8e80941Smrg debug_printf("util_cpu_caps.has_avx512cd = %u\n", util_cpu_caps.has_avx512cd); 587b8e80941Smrg debug_printf("util_cpu_caps.has_avx512bw = %u\n", util_cpu_caps.has_avx512bw); 588b8e80941Smrg debug_printf("util_cpu_caps.has_avx512vl = %u\n", util_cpu_caps.has_avx512vl); 589b8e80941Smrg debug_printf("util_cpu_caps.has_avx512vbmi = %u\n", util_cpu_caps.has_avx512vbmi); 590b8e80941Smrg } 591b8e80941Smrg#endif 592b8e80941Smrg} 593b8e80941Smrg 594b8e80941Smrgstatic once_flag cpu_once_flag = ONCE_FLAG_INIT; 595b8e80941Smrg 596b8e80941Smrgvoid 597b8e80941Smrgutil_cpu_detect(void) 598b8e80941Smrg{ 599b8e80941Smrg call_once(&cpu_once_flag, util_cpu_detect_once); 600b8e80941Smrg} 601