101e04c3fSmrg/************************************************************************** 201e04c3fSmrg * 301e04c3fSmrg * Copyright 2008 Dennis Smit 401e04c3fSmrg * All Rights Reserved. 501e04c3fSmrg * 601e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a 701e04c3fSmrg * copy of this software and associated documentation files (the "Software"), 801e04c3fSmrg * to deal in the Software without restriction, including without limitation 901e04c3fSmrg * on the rights to use, copy, modify, merge, publish, distribute, sub 1001e04c3fSmrg * license, and/or sell copies of the Software, and to permit persons to whom 1101e04c3fSmrg * the Software is furnished to do so, subject to the following conditions: 1201e04c3fSmrg * 1301e04c3fSmrg * The above copyright notice and this permission notice (including the next 1401e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the 1501e04c3fSmrg * Software. 1601e04c3fSmrg * 1701e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1801e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1901e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 2001e04c3fSmrg * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 2101e04c3fSmrg * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 2201e04c3fSmrg * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 2301e04c3fSmrg * USE OR OTHER DEALINGS IN THE SOFTWARE. 2401e04c3fSmrg * 2501e04c3fSmrg **************************************************************************/ 2601e04c3fSmrg 2701e04c3fSmrg/** 2801e04c3fSmrg * @file 2901e04c3fSmrg * CPU feature detection. 3001e04c3fSmrg * 3101e04c3fSmrg * @author Dennis Smit 3201e04c3fSmrg * @author Based on the work of Eric Anholt <anholt@FreeBSD.org> 3301e04c3fSmrg */ 3401e04c3fSmrg 3501e04c3fSmrg#include "pipe/p_config.h" 367ec681f3Smrg#include "pipe/p_compiler.h" 3701e04c3fSmrg 3801e04c3fSmrg#include "util/u_debug.h" 3901e04c3fSmrg#include "u_cpu_detect.h" 407ec681f3Smrg#include "u_math.h" 4101e04c3fSmrg#include "c11/threads.h" 4201e04c3fSmrg 437ec681f3Smrg#include <stdio.h> 447ec681f3Smrg#include <inttypes.h> 457ec681f3Smrg 4601e04c3fSmrg#if defined(PIPE_ARCH_PPC) 4701e04c3fSmrg#if defined(PIPE_OS_APPLE) 4801e04c3fSmrg#include <sys/sysctl.h> 4901e04c3fSmrg#else 5001e04c3fSmrg#include <signal.h> 5101e04c3fSmrg#include <setjmp.h> 5201e04c3fSmrg#endif 5301e04c3fSmrg#endif 5401e04c3fSmrg 557ec681f3Smrg#if defined(PIPE_OS_BSD) 5601e04c3fSmrg#include <sys/param.h> 5701e04c3fSmrg#include <sys/sysctl.h> 5801e04c3fSmrg#include <machine/cpu.h> 5901e04c3fSmrg#endif 6001e04c3fSmrg 617ec681f3Smrg#if defined(PIPE_OS_FREEBSD) 627ec681f3Smrg#if __has_include(<sys/auxv.h>) 637ec681f3Smrg#include <sys/auxv.h> 647ec681f3Smrg#define HAVE_ELF_AUX_INFO 657ec681f3Smrg#endif 6601e04c3fSmrg#endif 6701e04c3fSmrg 6801e04c3fSmrg#if defined(PIPE_OS_LINUX) 6901e04c3fSmrg#include <signal.h> 7001e04c3fSmrg#include <fcntl.h> 7101e04c3fSmrg#include <elf.h> 7201e04c3fSmrg#endif 7301e04c3fSmrg 7401e04c3fSmrg#ifdef PIPE_OS_UNIX 7501e04c3fSmrg#include <unistd.h> 7601e04c3fSmrg#endif 7701e04c3fSmrg 7801e04c3fSmrg#if defined(HAS_ANDROID_CPUFEATURES) 7901e04c3fSmrg#include <cpu-features.h> 8001e04c3fSmrg#endif 8101e04c3fSmrg 8201e04c3fSmrg#if defined(PIPE_OS_WINDOWS) 8301e04c3fSmrg#include <windows.h> 8401e04c3fSmrg#if defined(PIPE_CC_MSVC) 8501e04c3fSmrg#include <intrin.h> 8601e04c3fSmrg#endif 8701e04c3fSmrg#endif 8801e04c3fSmrg 897ec681f3Smrg#if defined(HAS_SCHED_H) 907ec681f3Smrg#include <sched.h> 9101e04c3fSmrg#endif 9201e04c3fSmrg 937ec681f3SmrgDEBUG_GET_ONCE_BOOL_OPTION(dump_cpu, "GALLIUM_DUMP_CPU", false) 9401e04c3fSmrg 957ec681f3Smrg 967ec681f3Smrgstruct util_cpu_caps_t util_cpu_caps; 9701e04c3fSmrg 9801e04c3fSmrg#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) 9901e04c3fSmrgstatic int has_cpuid(void); 10001e04c3fSmrg#endif 10101e04c3fSmrg 10201e04c3fSmrg 1037ec681f3Smrg#if defined(PIPE_ARCH_PPC) && !defined(PIPE_OS_APPLE) && !defined(PIPE_OS_BSD) && !defined(PIPE_OS_LINUX) 10401e04c3fSmrgstatic jmp_buf __lv_powerpc_jmpbuf; 10501e04c3fSmrgstatic volatile sig_atomic_t __lv_powerpc_canjump = 0; 10601e04c3fSmrg 10701e04c3fSmrgstatic void 10801e04c3fSmrgsigill_handler(int sig) 10901e04c3fSmrg{ 11001e04c3fSmrg if (!__lv_powerpc_canjump) { 11101e04c3fSmrg signal (sig, SIG_DFL); 11201e04c3fSmrg raise (sig); 11301e04c3fSmrg } 11401e04c3fSmrg 11501e04c3fSmrg __lv_powerpc_canjump = 0; 11601e04c3fSmrg longjmp(__lv_powerpc_jmpbuf, 1); 11701e04c3fSmrg} 11801e04c3fSmrg#endif 11901e04c3fSmrg 12001e04c3fSmrg#if defined(PIPE_ARCH_PPC) 12101e04c3fSmrgstatic void 12201e04c3fSmrgcheck_os_altivec_support(void) 12301e04c3fSmrg{ 1247ec681f3Smrg#if defined(__ALTIVEC__) 1257ec681f3Smrg util_cpu_caps.has_altivec = 1; 1267ec681f3Smrg#endif 1277ec681f3Smrg#if defined(__VSX__) 1287ec681f3Smrg util_cpu_caps.has_vsx = 1; 1297ec681f3Smrg#endif 1307ec681f3Smrg#if defined(__ALTIVEC__) && defined(__VSX__) 1317ec681f3Smrg/* Do nothing */ 1327ec681f3Smrg#elif defined(PIPE_OS_APPLE) || defined(PIPE_OS_NETBSD) || defined(PIPE_OS_OPENBSD) 1337ec681f3Smrg#ifdef HW_VECTORUNIT 13401e04c3fSmrg int sels[2] = {CTL_HW, HW_VECTORUNIT}; 1357ec681f3Smrg#else 1367ec681f3Smrg int sels[2] = {CTL_MACHDEP, CPU_ALTIVEC}; 1377ec681f3Smrg#endif 13801e04c3fSmrg int has_vu = 0; 139c87a3a8aSchristos size_t len = sizeof (has_vu); 14001e04c3fSmrg int err; 14101e04c3fSmrg 14201e04c3fSmrg err = sysctl(sels, 2, &has_vu, &len, NULL, 0); 14301e04c3fSmrg 14401e04c3fSmrg if (err == 0) { 14501e04c3fSmrg if (has_vu != 0) { 14601e04c3fSmrg util_cpu_caps.has_altivec = 1; 14701e04c3fSmrg } 14801e04c3fSmrg } 1497ec681f3Smrg#elif defined(PIPE_OS_FREEBSD) /* !PIPE_OS_APPLE && !PIPE_OS_NETBSD && !PIPE_OS_OPENBSD */ 1507ec681f3Smrg unsigned long hwcap = 0; 1517ec681f3Smrg#ifdef HAVE_ELF_AUX_INFO 1527ec681f3Smrg elf_aux_info(AT_HWCAP, &hwcap, sizeof(hwcap)); 1537ec681f3Smrg#else 1547ec681f3Smrg size_t len = sizeof(hwcap); 1557ec681f3Smrg sysctlbyname("hw.cpu_features", &hwcap, &len, NULL, 0); 1567ec681f3Smrg#endif 1577ec681f3Smrg if (hwcap & PPC_FEATURE_HAS_ALTIVEC) 1587ec681f3Smrg util_cpu_caps.has_altivec = 1; 1597ec681f3Smrg if (hwcap & PPC_FEATURE_HAS_VSX) 1607ec681f3Smrg util_cpu_caps.has_vsx = 1; 1617ec681f3Smrg#elif defined(PIPE_OS_LINUX) /* !PIPE_OS_FREEBSD */ 1627ec681f3Smrg#if defined(PIPE_ARCH_PPC_64) 1637ec681f3Smrg Elf64_auxv_t aux; 1647ec681f3Smrg#else 1657ec681f3Smrg Elf32_auxv_t aux; 1667ec681f3Smrg#endif 1677ec681f3Smrg int fd = open("/proc/self/auxv", O_RDONLY | O_CLOEXEC); 1687ec681f3Smrg if (fd >= 0) { 1697ec681f3Smrg while (read(fd, &aux, sizeof(aux)) == sizeof(aux)) { 1707ec681f3Smrg if (aux.a_type == AT_HWCAP) { 1717ec681f3Smrg char *env_vsx = getenv("GALLIVM_VSX"); 1727ec681f3Smrg uint64_t hwcap = aux.a_un.a_val; 1737ec681f3Smrg util_cpu_caps.has_altivec = (hwcap >> 28) & 1; 1747ec681f3Smrg if (!env_vsx || env_vsx[0] != '0') { 1757ec681f3Smrg util_cpu_caps.has_vsx = (hwcap >> 7) & 1; 1767ec681f3Smrg } 1777ec681f3Smrg break; 1787ec681f3Smrg } 1797ec681f3Smrg } 1807ec681f3Smrg close(fd); 1817ec681f3Smrg } 1827ec681f3Smrg#else /* !PIPE_OS_APPLE && !PIPE_OS_BSD && !PIPE_OS_LINUX */ 1837ec681f3Smrg /* not on Apple/Darwin or Linux, do it the brute-force way */ 18401e04c3fSmrg /* this is borrowed from the libmpeg2 library */ 18501e04c3fSmrg signal(SIGILL, sigill_handler); 18601e04c3fSmrg if (setjmp(__lv_powerpc_jmpbuf)) { 18701e04c3fSmrg signal(SIGILL, SIG_DFL); 18801e04c3fSmrg } else { 18901e04c3fSmrg boolean enable_altivec = TRUE; /* Default: enable if available, and if not overridden */ 19001e04c3fSmrg boolean enable_vsx = TRUE; 19101e04c3fSmrg#ifdef DEBUG 19201e04c3fSmrg /* Disabling Altivec code generation is not the same as disabling VSX code generation, 19301e04c3fSmrg * which can be done simply by passing -mattr=-vsx to the LLVM compiler; cf. 19401e04c3fSmrg * lp_build_create_jit_compiler_for_module(). 19501e04c3fSmrg * If you want to disable Altivec code generation, the best place to do it is here. 19601e04c3fSmrg */ 19701e04c3fSmrg char *env_control = getenv("GALLIVM_ALTIVEC"); /* 1=enable (default); 0=disable */ 19801e04c3fSmrg if (env_control && env_control[0] == '0') { 19901e04c3fSmrg enable_altivec = FALSE; 20001e04c3fSmrg } 20101e04c3fSmrg#endif 20201e04c3fSmrg /* VSX instructions can be explicitly enabled/disabled via GALLIVM_VSX=1 or 0 */ 20301e04c3fSmrg char *env_vsx = getenv("GALLIVM_VSX"); 20401e04c3fSmrg if (env_vsx && env_vsx[0] == '0') { 20501e04c3fSmrg enable_vsx = FALSE; 20601e04c3fSmrg } 20701e04c3fSmrg if (enable_altivec) { 20801e04c3fSmrg __lv_powerpc_canjump = 1; 20901e04c3fSmrg 21001e04c3fSmrg __asm __volatile 21101e04c3fSmrg ("mtspr 256, %0\n\t" 21201e04c3fSmrg "vand %%v0, %%v0, %%v0" 21301e04c3fSmrg : 21401e04c3fSmrg : "r" (-1)); 21501e04c3fSmrg 21601e04c3fSmrg util_cpu_caps.has_altivec = 1; 21701e04c3fSmrg 21801e04c3fSmrg if (enable_vsx) { 21901e04c3fSmrg __asm __volatile("xxland %vs0, %vs0, %vs0"); 22001e04c3fSmrg util_cpu_caps.has_vsx = 1; 22101e04c3fSmrg } 22201e04c3fSmrg signal(SIGILL, SIG_DFL); 22301e04c3fSmrg } else { 22401e04c3fSmrg util_cpu_caps.has_altivec = 0; 22501e04c3fSmrg } 22601e04c3fSmrg } 2277ec681f3Smrg#endif /* !PIPE_OS_APPLE && !PIPE_OS_LINUX */ 22801e04c3fSmrg} 22901e04c3fSmrg#endif /* PIPE_ARCH_PPC */ 23001e04c3fSmrg 23101e04c3fSmrg 23201e04c3fSmrg#if defined(PIPE_ARCH_X86) || defined (PIPE_ARCH_X86_64) 23301e04c3fSmrgstatic int has_cpuid(void) 23401e04c3fSmrg{ 23501e04c3fSmrg#if defined(PIPE_ARCH_X86) 23601e04c3fSmrg#if defined(PIPE_OS_GCC) 23701e04c3fSmrg int a, c; 23801e04c3fSmrg 23901e04c3fSmrg __asm __volatile 24001e04c3fSmrg ("pushf\n" 24101e04c3fSmrg "popl %0\n" 24201e04c3fSmrg "movl %0, %1\n" 24301e04c3fSmrg "xorl $0x200000, %0\n" 24401e04c3fSmrg "push %0\n" 24501e04c3fSmrg "popf\n" 24601e04c3fSmrg "pushf\n" 24701e04c3fSmrg "popl %0\n" 24801e04c3fSmrg : "=a" (a), "=c" (c) 24901e04c3fSmrg : 25001e04c3fSmrg : "cc"); 25101e04c3fSmrg 25201e04c3fSmrg return a != c; 25301e04c3fSmrg#else 25401e04c3fSmrg /* FIXME */ 25501e04c3fSmrg return 1; 25601e04c3fSmrg#endif 25701e04c3fSmrg#elif defined(PIPE_ARCH_X86_64) 25801e04c3fSmrg return 1; 25901e04c3fSmrg#else 26001e04c3fSmrg return 0; 26101e04c3fSmrg#endif 26201e04c3fSmrg} 26301e04c3fSmrg 26401e04c3fSmrg 26501e04c3fSmrg/** 26601e04c3fSmrg * @sa cpuid.h included in gcc-4.3 onwards. 26701e04c3fSmrg * @sa http://msdn.microsoft.com/en-us/library/hskdteyh.aspx 26801e04c3fSmrg */ 26901e04c3fSmrgstatic inline void 27001e04c3fSmrgcpuid(uint32_t ax, uint32_t *p) 27101e04c3fSmrg{ 27201e04c3fSmrg#if defined(PIPE_CC_GCC) && defined(PIPE_ARCH_X86) 27301e04c3fSmrg __asm __volatile ( 27401e04c3fSmrg "xchgl %%ebx, %1\n\t" 27501e04c3fSmrg "cpuid\n\t" 27601e04c3fSmrg "xchgl %%ebx, %1" 27701e04c3fSmrg : "=a" (p[0]), 27801e04c3fSmrg "=S" (p[1]), 27901e04c3fSmrg "=c" (p[2]), 28001e04c3fSmrg "=d" (p[3]) 28101e04c3fSmrg : "0" (ax) 28201e04c3fSmrg ); 28301e04c3fSmrg#elif defined(PIPE_CC_GCC) && defined(PIPE_ARCH_X86_64) 28401e04c3fSmrg __asm __volatile ( 28501e04c3fSmrg "cpuid\n\t" 28601e04c3fSmrg : "=a" (p[0]), 28701e04c3fSmrg "=b" (p[1]), 28801e04c3fSmrg "=c" (p[2]), 28901e04c3fSmrg "=d" (p[3]) 29001e04c3fSmrg : "0" (ax) 29101e04c3fSmrg ); 29201e04c3fSmrg#elif defined(PIPE_CC_MSVC) 29301e04c3fSmrg __cpuid(p, ax); 29401e04c3fSmrg#else 29501e04c3fSmrg p[0] = 0; 29601e04c3fSmrg p[1] = 0; 29701e04c3fSmrg p[2] = 0; 29801e04c3fSmrg p[3] = 0; 29901e04c3fSmrg#endif 30001e04c3fSmrg} 30101e04c3fSmrg 30201e04c3fSmrg/** 30301e04c3fSmrg * @sa cpuid.h included in gcc-4.4 onwards. 30401e04c3fSmrg * @sa http://msdn.microsoft.com/en-us/library/hskdteyh%28v=vs.90%29.aspx 30501e04c3fSmrg */ 30601e04c3fSmrgstatic inline void 30701e04c3fSmrgcpuid_count(uint32_t ax, uint32_t cx, uint32_t *p) 30801e04c3fSmrg{ 30901e04c3fSmrg#if defined(PIPE_CC_GCC) && defined(PIPE_ARCH_X86) 31001e04c3fSmrg __asm __volatile ( 31101e04c3fSmrg "xchgl %%ebx, %1\n\t" 31201e04c3fSmrg "cpuid\n\t" 31301e04c3fSmrg "xchgl %%ebx, %1" 31401e04c3fSmrg : "=a" (p[0]), 31501e04c3fSmrg "=S" (p[1]), 31601e04c3fSmrg "=c" (p[2]), 31701e04c3fSmrg "=d" (p[3]) 31801e04c3fSmrg : "0" (ax), "2" (cx) 31901e04c3fSmrg ); 32001e04c3fSmrg#elif defined(PIPE_CC_GCC) && defined(PIPE_ARCH_X86_64) 32101e04c3fSmrg __asm __volatile ( 32201e04c3fSmrg "cpuid\n\t" 32301e04c3fSmrg : "=a" (p[0]), 32401e04c3fSmrg "=b" (p[1]), 32501e04c3fSmrg "=c" (p[2]), 32601e04c3fSmrg "=d" (p[3]) 32701e04c3fSmrg : "0" (ax), "2" (cx) 32801e04c3fSmrg ); 32901e04c3fSmrg#elif defined(PIPE_CC_MSVC) 33001e04c3fSmrg __cpuidex(p, ax, cx); 33101e04c3fSmrg#else 33201e04c3fSmrg p[0] = 0; 33301e04c3fSmrg p[1] = 0; 33401e04c3fSmrg p[2] = 0; 33501e04c3fSmrg p[3] = 0; 33601e04c3fSmrg#endif 33701e04c3fSmrg} 33801e04c3fSmrg 33901e04c3fSmrg 34001e04c3fSmrgstatic inline uint64_t xgetbv(void) 34101e04c3fSmrg{ 34201e04c3fSmrg#if defined(PIPE_CC_GCC) 34301e04c3fSmrg uint32_t eax, edx; 34401e04c3fSmrg 34501e04c3fSmrg __asm __volatile ( 34601e04c3fSmrg ".byte 0x0f, 0x01, 0xd0" // xgetbv isn't supported on gcc < 4.4 34701e04c3fSmrg : "=a"(eax), 34801e04c3fSmrg "=d"(edx) 34901e04c3fSmrg : "c"(0) 35001e04c3fSmrg ); 35101e04c3fSmrg 35201e04c3fSmrg return ((uint64_t)edx << 32) | eax; 35301e04c3fSmrg#elif defined(PIPE_CC_MSVC) && defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK) 35401e04c3fSmrg return _xgetbv(_XCR_XFEATURE_ENABLED_MASK); 35501e04c3fSmrg#else 35601e04c3fSmrg return 0; 35701e04c3fSmrg#endif 35801e04c3fSmrg} 35901e04c3fSmrg 36001e04c3fSmrg 36101e04c3fSmrg#if defined(PIPE_ARCH_X86) 36201e04c3fSmrgPIPE_ALIGN_STACK static inline boolean sse2_has_daz(void) 36301e04c3fSmrg{ 36401e04c3fSmrg struct { 36501e04c3fSmrg uint32_t pad1[7]; 36601e04c3fSmrg uint32_t mxcsr_mask; 36701e04c3fSmrg uint32_t pad2[128-8]; 36801e04c3fSmrg } PIPE_ALIGN_VAR(16) fxarea; 36901e04c3fSmrg 37001e04c3fSmrg fxarea.mxcsr_mask = 0; 37101e04c3fSmrg#if defined(PIPE_CC_GCC) 37201e04c3fSmrg __asm __volatile ("fxsave %0" : "+m" (fxarea)); 37301e04c3fSmrg#elif defined(PIPE_CC_MSVC) || defined(PIPE_CC_ICL) 37401e04c3fSmrg _fxsave(&fxarea); 37501e04c3fSmrg#else 37601e04c3fSmrg fxarea.mxcsr_mask = 0; 37701e04c3fSmrg#endif 37801e04c3fSmrg return !!(fxarea.mxcsr_mask & (1 << 6)); 37901e04c3fSmrg} 38001e04c3fSmrg#endif 38101e04c3fSmrg 38201e04c3fSmrg#endif /* X86 or X86_64 */ 38301e04c3fSmrg 38401e04c3fSmrg#if defined(PIPE_ARCH_ARM) 38501e04c3fSmrgstatic void 38601e04c3fSmrgcheck_os_arm_support(void) 38701e04c3fSmrg{ 38801e04c3fSmrg /* 38901e04c3fSmrg * On Android, the cpufeatures library is preferred way of checking 39001e04c3fSmrg * CPU capabilities. However, it is not available for standalone Mesa 39101e04c3fSmrg * builds, i.e. when Android build system (Android.mk-based) is not 39201e04c3fSmrg * used. Because of this we cannot use PIPE_OS_ANDROID here, but rather 39301e04c3fSmrg * have a separate macro that only gets enabled from respective Android.mk. 39401e04c3fSmrg */ 3957ec681f3Smrg#if defined(__ARM_NEON) || defined(__ARM_NEON__) 3967ec681f3Smrg util_cpu_caps.has_neon = 1; 3977ec681f3Smrg#elif defined(PIPE_OS_FREEBSD) && defined(HAVE_ELF_AUX_INFO) 3987ec681f3Smrg unsigned long hwcap = 0; 3997ec681f3Smrg elf_aux_info(AT_HWCAP, &hwcap, sizeof(hwcap)); 4007ec681f3Smrg if (hwcap & HWCAP_NEON) 4017ec681f3Smrg util_cpu_caps.has_neon = 1; 4027ec681f3Smrg#elif defined(HAS_ANDROID_CPUFEATURES) 40301e04c3fSmrg AndroidCpuFamily cpu_family = android_getCpuFamily(); 40401e04c3fSmrg uint64_t cpu_features = android_getCpuFeatures(); 40501e04c3fSmrg 40601e04c3fSmrg if (cpu_family == ANDROID_CPU_FAMILY_ARM) { 40701e04c3fSmrg if (cpu_features & ANDROID_CPU_ARM_FEATURE_NEON) 40801e04c3fSmrg util_cpu_caps.has_neon = 1; 40901e04c3fSmrg } 41001e04c3fSmrg#elif defined(PIPE_OS_LINUX) 41101e04c3fSmrg Elf32_auxv_t aux; 41201e04c3fSmrg int fd; 41301e04c3fSmrg 41401e04c3fSmrg fd = open("/proc/self/auxv", O_RDONLY | O_CLOEXEC); 41501e04c3fSmrg if (fd >= 0) { 41601e04c3fSmrg while (read(fd, &aux, sizeof(Elf32_auxv_t)) == sizeof(Elf32_auxv_t)) { 41701e04c3fSmrg if (aux.a_type == AT_HWCAP) { 41801e04c3fSmrg uint32_t hwcap = aux.a_un.a_val; 41901e04c3fSmrg 42001e04c3fSmrg util_cpu_caps.has_neon = (hwcap >> 12) & 1; 42101e04c3fSmrg break; 42201e04c3fSmrg } 42301e04c3fSmrg } 42401e04c3fSmrg close (fd); 42501e04c3fSmrg } 42601e04c3fSmrg#endif /* PIPE_OS_LINUX */ 42701e04c3fSmrg} 42801e04c3fSmrg 4298a1362adSmaya#elif defined(PIPE_ARCH_AARCH64) 43001e04c3fSmrgstatic void 4318a1362adSmayacheck_os_arm_support(void) 43201e04c3fSmrg{ 4338a1362adSmaya util_cpu_caps.has_neon = true; 4348a1362adSmaya} 4358a1362adSmaya#endif /* PIPE_ARCH_ARM || PIPE_ARCH_AARCH64 */ 43601e04c3fSmrg 4377ec681f3Smrg#if defined(PIPE_ARCH_MIPS64) 43854c3abb5Smartin#ifdef __NetBSD__ 43954c3abb5Smartinstatic void 440cbafdbbfSmartincheck_os_mips64_support(void) 44154c3abb5Smartin{ 44254c3abb5Smartin util_cpu_caps.has_msa = false; /* XXX seems there is no way to detect MSA support from userland */ 44354c3abb5Smartin} 44454c3abb5Smartin#else 4457ec681f3Smrgstatic void 4467ec681f3Smrgcheck_os_mips64_support(void) 4477ec681f3Smrg{ 4487ec681f3Smrg Elf64_auxv_t aux; 4497ec681f3Smrg int fd; 4507ec681f3Smrg 4517ec681f3Smrg fd = open("/proc/self/auxv", O_RDONLY | O_CLOEXEC); 4527ec681f3Smrg if (fd >= 0) { 4537ec681f3Smrg while (read(fd, &aux, sizeof(Elf64_auxv_t)) == sizeof(Elf64_auxv_t)) { 4547ec681f3Smrg if (aux.a_type == AT_HWCAP) { 4557ec681f3Smrg uint64_t hwcap = aux.a_un.a_val; 4567ec681f3Smrg 4577ec681f3Smrg util_cpu_caps.has_msa = (hwcap >> 1) & 1; 4587ec681f3Smrg break; 4597ec681f3Smrg } 4607ec681f3Smrg } 4617ec681f3Smrg close (fd); 4627ec681f3Smrg } 4637ec681f3Smrg} 46454c3abb5Smartin#endif 4657ec681f3Smrg#endif /* PIPE_ARCH_MIPS64 */ 4667ec681f3Smrg 4677ec681f3Smrg 4688a1362adSmayastatic void 4698a1362adSmayaget_cpu_topology(void) 4708a1362adSmaya{ 4717ec681f3Smrg /* Default. This is OK if L3 is not present or there is only one. */ 4727ec681f3Smrg util_cpu_caps.num_L3_caches = 1; 4737ec681f3Smrg 4747ec681f3Smrg memset(util_cpu_caps.cpu_to_L3, 0xff, sizeof(util_cpu_caps.cpu_to_L3)); 47501e04c3fSmrg 47601e04c3fSmrg#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) 47701e04c3fSmrg /* AMD Zen */ 4787ec681f3Smrg if (util_cpu_caps.family >= CPU_AMD_ZEN1_ZEN2 && 4797ec681f3Smrg util_cpu_caps.family < CPU_AMD_LAST) { 4808a1362adSmaya uint32_t regs[4]; 4818a1362adSmaya 4827ec681f3Smrg uint32_t saved_mask[UTIL_MAX_CPUS / 32] = {0}; 4837ec681f3Smrg uint32_t mask[UTIL_MAX_CPUS / 32] = {0}; 4847ec681f3Smrg bool saved = false; 4857ec681f3Smrg 4867ec681f3Smrg uint32_t L3_found[UTIL_MAX_CPUS] = {0}; 4877ec681f3Smrg uint32_t num_L3_caches = 0; 4887ec681f3Smrg util_affinity_mask *L3_affinity_masks = NULL; 4897ec681f3Smrg 4907ec681f3Smrg /* Query APIC IDs from each CPU core. 4917ec681f3Smrg * 4927ec681f3Smrg * An APIC ID is a logical ID of the CPU with respect to the cache 4937ec681f3Smrg * hierarchy, meaning that consecutive APIC IDs are neighbours in 4947ec681f3Smrg * the hierarchy, e.g. sharing the same cache. 4957ec681f3Smrg * 4967ec681f3Smrg * For example, CPU 0 can have APIC ID 0 and CPU 12 can have APIC ID 1, 4977ec681f3Smrg * which means that both CPU 0 and 12 are next to each other. 4987ec681f3Smrg * (e.g. they are 2 threads belonging to 1 SMT2 core) 4997ec681f3Smrg * 5007ec681f3Smrg * We need to find out which CPUs share the same L3 cache and they can 5017ec681f3Smrg * be all over the place. 5027ec681f3Smrg * 5037ec681f3Smrg * Querying the APIC ID can only be done by pinning the current thread 5047ec681f3Smrg * to each core. The original affinity mask is saved. 5057ec681f3Smrg * 5067ec681f3Smrg * Loop over all possible CPUs even though some may be offline. 5077ec681f3Smrg */ 5087ec681f3Smrg for (int16_t i = 0; i < util_cpu_caps.max_cpus && i < UTIL_MAX_CPUS; i++) { 5097ec681f3Smrg uint32_t cpu_bit = 1u << (i % 32); 5107ec681f3Smrg 5117ec681f3Smrg mask[i / 32] = cpu_bit; 5127ec681f3Smrg 5137ec681f3Smrg /* The assumption is that trying to bind the thread to a CPU that is 5147ec681f3Smrg * offline will fail. 5157ec681f3Smrg */ 5167ec681f3Smrg if (util_set_current_thread_affinity(mask, 5177ec681f3Smrg !saved ? saved_mask : NULL, 5187ec681f3Smrg util_cpu_caps.num_cpu_mask_bits)) { 5197ec681f3Smrg saved = true; 5207ec681f3Smrg 5217ec681f3Smrg /* Query the APIC ID of the current core. */ 5227ec681f3Smrg cpuid(0x00000001, regs); 5237ec681f3Smrg unsigned apic_id = regs[1] >> 24; 5247ec681f3Smrg 5257ec681f3Smrg /* Query the total core count for the CPU */ 5267ec681f3Smrg uint32_t core_count = 1; 5277ec681f3Smrg if (regs[3] & (1 << 28)) 5287ec681f3Smrg core_count = (regs[1] >> 16) & 0xff; 5297ec681f3Smrg 5307ec681f3Smrg core_count = util_next_power_of_two(core_count); 5317ec681f3Smrg 5327ec681f3Smrg /* Query the L3 cache count. */ 5337ec681f3Smrg cpuid_count(0x8000001D, 3, regs); 5347ec681f3Smrg unsigned cache_level = (regs[0] >> 5) & 0x7; 5357ec681f3Smrg unsigned cores_per_L3 = ((regs[0] >> 14) & 0xfff) + 1; 5367ec681f3Smrg 5377ec681f3Smrg if (cache_level != 3) 5387ec681f3Smrg continue; 5397ec681f3Smrg 5407ec681f3Smrg unsigned local_core_id = apic_id & (core_count - 1); 5417ec681f3Smrg unsigned phys_id = (apic_id & ~(core_count - 1)) >> util_logbase2(core_count); 5427ec681f3Smrg unsigned local_l3_cache_index = local_core_id / util_next_power_of_two(cores_per_L3); 5437ec681f3Smrg#define L3_ID(p, i) (p << 16 | i << 1 | 1); 5447ec681f3Smrg 5457ec681f3Smrg unsigned l3_id = L3_ID(phys_id, local_l3_cache_index); 5467ec681f3Smrg int idx = -1; 5477ec681f3Smrg for (unsigned c = 0; c < num_L3_caches; c++) { 5487ec681f3Smrg if (L3_found[c] == l3_id) { 5497ec681f3Smrg idx = c; 5507ec681f3Smrg break; 5517ec681f3Smrg } 5527ec681f3Smrg } 5537ec681f3Smrg if (idx == -1) { 5547ec681f3Smrg idx = num_L3_caches; 5557ec681f3Smrg L3_found[num_L3_caches++] = l3_id; 5567ec681f3Smrg L3_affinity_masks = realloc(L3_affinity_masks, sizeof(util_affinity_mask) * num_L3_caches); 5577ec681f3Smrg if (!L3_affinity_masks) 5587ec681f3Smrg return; 5597ec681f3Smrg memset(&L3_affinity_masks[num_L3_caches - 1], 0, sizeof(util_affinity_mask)); 5607ec681f3Smrg } 5617ec681f3Smrg util_cpu_caps.cpu_to_L3[i] = idx; 5627ec681f3Smrg L3_affinity_masks[idx][i / 32] |= cpu_bit; 5637ec681f3Smrg 5647ec681f3Smrg } 5657ec681f3Smrg mask[i / 32] = 0; 5667ec681f3Smrg } 5677ec681f3Smrg 5687ec681f3Smrg util_cpu_caps.num_L3_caches = num_L3_caches; 5697ec681f3Smrg util_cpu_caps.L3_affinity_mask = L3_affinity_masks; 5707ec681f3Smrg 5717ec681f3Smrg if (saved) { 5727ec681f3Smrg if (debug_get_option_dump_cpu()) { 5737ec681f3Smrg fprintf(stderr, "CPU <-> L3 cache mapping:\n"); 5747ec681f3Smrg for (unsigned i = 0; i < util_cpu_caps.num_L3_caches; i++) { 5757ec681f3Smrg fprintf(stderr, " - L3 %u mask = ", i); 5767ec681f3Smrg for (int j = util_cpu_caps.max_cpus - 1; j >= 0; j -= 32) 5777ec681f3Smrg fprintf(stderr, "%08x ", util_cpu_caps.L3_affinity_mask[i][j / 32]); 5787ec681f3Smrg fprintf(stderr, "\n"); 5797ec681f3Smrg } 5807ec681f3Smrg } 58101e04c3fSmrg 5827ec681f3Smrg /* Restore the original affinity mask. */ 5837ec681f3Smrg util_set_current_thread_affinity(saved_mask, NULL, 5847ec681f3Smrg util_cpu_caps.num_cpu_mask_bits); 5857ec681f3Smrg } else { 5867ec681f3Smrg if (debug_get_option_dump_cpu()) 5877ec681f3Smrg fprintf(stderr, "Cannot set thread affinity for any thread.\n"); 5887ec681f3Smrg } 58901e04c3fSmrg } 59001e04c3fSmrg#endif 59101e04c3fSmrg} 59201e04c3fSmrg 59301e04c3fSmrgstatic void 59401e04c3fSmrgutil_cpu_detect_once(void) 59501e04c3fSmrg{ 5967ec681f3Smrg int available_cpus = 0; 5977ec681f3Smrg int total_cpus = 0; 5987ec681f3Smrg 59901e04c3fSmrg memset(&util_cpu_caps, 0, sizeof util_cpu_caps); 60001e04c3fSmrg 60101e04c3fSmrg /* Count the number of CPUs in system */ 60201e04c3fSmrg#if defined(PIPE_OS_WINDOWS) 60301e04c3fSmrg { 60401e04c3fSmrg SYSTEM_INFO system_info; 60501e04c3fSmrg GetSystemInfo(&system_info); 6067ec681f3Smrg available_cpus = MAX2(1, system_info.dwNumberOfProcessors); 60701e04c3fSmrg } 6087ec681f3Smrg#elif defined(PIPE_OS_UNIX) 6097ec681f3Smrg# if defined(HAS_SCHED_GETAFFINITY) 61001e04c3fSmrg { 6117ec681f3Smrg /* sched_setaffinity() can be used to further restrict the number of 6127ec681f3Smrg * CPUs on which the process can run. Use sched_getaffinity() to 6137ec681f3Smrg * determine the true number of available CPUs. 6147ec681f3Smrg * 6157ec681f3Smrg * FIXME: The Linux manual page for sched_getaffinity describes how this 6167ec681f3Smrg * simple implementation will fail with > 1024 CPUs, and we'll fall back 6177ec681f3Smrg * to the _SC_NPROCESSORS_ONLN path. Support for > 1024 CPUs can be 6187ec681f3Smrg * added to this path once someone has such a system for testing. 6197ec681f3Smrg */ 6207ec681f3Smrg cpu_set_t affin; 6217ec681f3Smrg if (sched_getaffinity(getpid(), sizeof(affin), &affin) == 0) 6227ec681f3Smrg available_cpus = CPU_COUNT(&affin); 6237ec681f3Smrg } 6247ec681f3Smrg# endif 62501e04c3fSmrg 6267ec681f3Smrg /* Linux, FreeBSD, DragonFly, and Mac OS X should have 6277ec681f3Smrg * _SC_NOPROCESSORS_ONLN. NetBSD and OpenBSD should have HW_NCPUONLINE. 6287ec681f3Smrg * This is what FFmpeg uses on those platforms. 6297ec681f3Smrg */ 6307ec681f3Smrg# if defined(PIPE_OS_BSD) && defined(HW_NCPUONLINE) 6317ec681f3Smrg if (available_cpus == 0) { 6327ec681f3Smrg const int mib[] = { CTL_HW, HW_NCPUONLINE }; 6337ec681f3Smrg int ncpu; 63422fc2aa6Srjs size_t len = sizeof(ncpu); 63501e04c3fSmrg 63601e04c3fSmrg sysctl(mib, 2, &ncpu, &len, NULL, 0); 6377ec681f3Smrg available_cpus = ncpu; 63801e04c3fSmrg } 6397ec681f3Smrg# elif defined(_SC_NPROCESSORS_ONLN) 6407ec681f3Smrg if (available_cpus == 0) { 6417ec681f3Smrg available_cpus = sysconf(_SC_NPROCESSORS_ONLN); 6427ec681f3Smrg if (available_cpus == ~0) 6437ec681f3Smrg available_cpus = 1; 6447ec681f3Smrg } 6457ec681f3Smrg# elif defined(PIPE_OS_BSD) 6467ec681f3Smrg if (available_cpus == 0) { 6477ec681f3Smrg const int mib[] = { CTL_HW, HW_NCPU }; 6487ec681f3Smrg int ncpu; 6497ec681f3Smrg int len = sizeof(ncpu); 6507ec681f3Smrg 6517ec681f3Smrg sysctl(mib, 2, &ncpu, &len, NULL, 0); 6527ec681f3Smrg available_cpus = ncpu; 6537ec681f3Smrg } 6547ec681f3Smrg# endif /* defined(PIPE_OS_BSD) */ 6557ec681f3Smrg 6567ec681f3Smrg /* Determine the maximum number of CPUs configured in the system. This is 6577ec681f3Smrg * used to properly set num_cpu_mask_bits below. On BSDs that don't have 6587ec681f3Smrg * HW_NCPUONLINE, it was not clear whether HW_NCPU is the number of 6597ec681f3Smrg * configured or the number of online CPUs. For that reason, prefer the 6607ec681f3Smrg * _SC_NPROCESSORS_CONF path on all BSDs. 6617ec681f3Smrg */ 6627ec681f3Smrg# if defined(_SC_NPROCESSORS_CONF) 6637ec681f3Smrg total_cpus = sysconf(_SC_NPROCESSORS_CONF); 6647ec681f3Smrg if (total_cpus == ~0) 6657ec681f3Smrg total_cpus = 1; 6667ec681f3Smrg# elif defined(PIPE_OS_BSD) 6677ec681f3Smrg { 6687ec681f3Smrg const int mib[] = { CTL_HW, HW_NCPU }; 6697ec681f3Smrg int ncpu; 6707ec681f3Smrg int len = sizeof(ncpu); 6717ec681f3Smrg 6727ec681f3Smrg sysctl(mib, 2, &ncpu, &len, NULL, 0); 6737ec681f3Smrg total_cpus = ncpu; 6747ec681f3Smrg } 6757ec681f3Smrg# endif /* defined(PIPE_OS_BSD) */ 6767ec681f3Smrg#endif /* defined(PIPE_OS_UNIX) */ 6777ec681f3Smrg 6787ec681f3Smrg util_cpu_caps.nr_cpus = MAX2(1, available_cpus); 6797ec681f3Smrg total_cpus = MAX2(total_cpus, util_cpu_caps.nr_cpus); 6807ec681f3Smrg 6817ec681f3Smrg util_cpu_caps.max_cpus = total_cpus; 6827ec681f3Smrg util_cpu_caps.num_cpu_mask_bits = align(total_cpus, 32); 68301e04c3fSmrg 68401e04c3fSmrg /* Make the fallback cacheline size nonzero so that it can be 68501e04c3fSmrg * safely passed to align(). 68601e04c3fSmrg */ 68701e04c3fSmrg util_cpu_caps.cacheline = sizeof(void *); 68801e04c3fSmrg 68901e04c3fSmrg#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) 69001e04c3fSmrg if (has_cpuid()) { 69101e04c3fSmrg uint32_t regs[4]; 69201e04c3fSmrg uint32_t regs2[4]; 69301e04c3fSmrg 69401e04c3fSmrg util_cpu_caps.cacheline = 32; 69501e04c3fSmrg 69601e04c3fSmrg /* Get max cpuid level */ 69701e04c3fSmrg cpuid(0x00000000, regs); 69801e04c3fSmrg 69901e04c3fSmrg if (regs[0] >= 0x00000001) { 70001e04c3fSmrg unsigned int cacheline; 70101e04c3fSmrg 70201e04c3fSmrg cpuid (0x00000001, regs2); 70301e04c3fSmrg 70401e04c3fSmrg util_cpu_caps.x86_cpu_type = (regs2[0] >> 8) & 0xf; 70501e04c3fSmrg /* Add "extended family". */ 70601e04c3fSmrg if (util_cpu_caps.x86_cpu_type == 0xf) 70701e04c3fSmrg util_cpu_caps.x86_cpu_type += ((regs2[0] >> 20) & 0xff); 70801e04c3fSmrg 7097ec681f3Smrg switch (util_cpu_caps.x86_cpu_type) { 7107ec681f3Smrg case 0x17: 7117ec681f3Smrg util_cpu_caps.family = CPU_AMD_ZEN1_ZEN2; 7127ec681f3Smrg break; 7137ec681f3Smrg case 0x18: 7147ec681f3Smrg util_cpu_caps.family = CPU_AMD_ZEN_HYGON; 7157ec681f3Smrg break; 7167ec681f3Smrg case 0x19: 7177ec681f3Smrg util_cpu_caps.family = CPU_AMD_ZEN3; 7187ec681f3Smrg break; 7197ec681f3Smrg default: 7207ec681f3Smrg if (util_cpu_caps.x86_cpu_type > 0x19) 7217ec681f3Smrg util_cpu_caps.family = CPU_AMD_ZEN_NEXT; 7227ec681f3Smrg } 7237ec681f3Smrg 72401e04c3fSmrg /* general feature flags */ 72501e04c3fSmrg util_cpu_caps.has_tsc = (regs2[3] >> 4) & 1; /* 0x0000010 */ 72601e04c3fSmrg util_cpu_caps.has_mmx = (regs2[3] >> 23) & 1; /* 0x0800000 */ 72701e04c3fSmrg util_cpu_caps.has_sse = (regs2[3] >> 25) & 1; /* 0x2000000 */ 72801e04c3fSmrg util_cpu_caps.has_sse2 = (regs2[3] >> 26) & 1; /* 0x4000000 */ 72901e04c3fSmrg util_cpu_caps.has_sse3 = (regs2[2] >> 0) & 1; /* 0x0000001 */ 73001e04c3fSmrg util_cpu_caps.has_ssse3 = (regs2[2] >> 9) & 1; /* 0x0000020 */ 73101e04c3fSmrg util_cpu_caps.has_sse4_1 = (regs2[2] >> 19) & 1; 73201e04c3fSmrg util_cpu_caps.has_sse4_2 = (regs2[2] >> 20) & 1; 73301e04c3fSmrg util_cpu_caps.has_popcnt = (regs2[2] >> 23) & 1; 73401e04c3fSmrg util_cpu_caps.has_avx = ((regs2[2] >> 28) & 1) && // AVX 73501e04c3fSmrg ((regs2[2] >> 27) & 1) && // OSXSAVE 73601e04c3fSmrg ((xgetbv() & 6) == 6); // XMM & YMM 73701e04c3fSmrg util_cpu_caps.has_f16c = ((regs2[2] >> 29) & 1) && util_cpu_caps.has_avx; 73801e04c3fSmrg util_cpu_caps.has_fma = ((regs2[2] >> 12) & 1) && util_cpu_caps.has_avx; 73901e04c3fSmrg util_cpu_caps.has_mmx2 = util_cpu_caps.has_sse; /* SSE cpus supports mmxext too */ 74001e04c3fSmrg#if defined(PIPE_ARCH_X86_64) 74101e04c3fSmrg util_cpu_caps.has_daz = 1; 74201e04c3fSmrg#else 74301e04c3fSmrg util_cpu_caps.has_daz = util_cpu_caps.has_sse3 || 74401e04c3fSmrg (util_cpu_caps.has_sse2 && sse2_has_daz()); 74501e04c3fSmrg#endif 74601e04c3fSmrg 74701e04c3fSmrg cacheline = ((regs2[1] >> 8) & 0xFF) * 8; 74801e04c3fSmrg if (cacheline > 0) 74901e04c3fSmrg util_cpu_caps.cacheline = cacheline; 75001e04c3fSmrg } 75101e04c3fSmrg if (util_cpu_caps.has_avx && regs[0] >= 0x00000007) { 75201e04c3fSmrg uint32_t regs7[4]; 75301e04c3fSmrg cpuid_count(0x00000007, 0x00000000, regs7); 75401e04c3fSmrg util_cpu_caps.has_avx2 = (regs7[1] >> 5) & 1; 75501e04c3fSmrg } 75601e04c3fSmrg 75701e04c3fSmrg // check for avx512 75801e04c3fSmrg if (((regs2[2] >> 27) & 1) && // OSXSAVE 75901e04c3fSmrg (xgetbv() & (0x7 << 5)) && // OPMASK: upper-256 enabled by OS 76001e04c3fSmrg ((xgetbv() & 6) == 6)) { // XMM/YMM enabled by OS 76101e04c3fSmrg uint32_t regs3[4]; 76201e04c3fSmrg cpuid_count(0x00000007, 0x00000000, regs3); 76301e04c3fSmrg util_cpu_caps.has_avx512f = (regs3[1] >> 16) & 1; 76401e04c3fSmrg util_cpu_caps.has_avx512dq = (regs3[1] >> 17) & 1; 76501e04c3fSmrg util_cpu_caps.has_avx512ifma = (regs3[1] >> 21) & 1; 76601e04c3fSmrg util_cpu_caps.has_avx512pf = (regs3[1] >> 26) & 1; 76701e04c3fSmrg util_cpu_caps.has_avx512er = (regs3[1] >> 27) & 1; 76801e04c3fSmrg util_cpu_caps.has_avx512cd = (regs3[1] >> 28) & 1; 76901e04c3fSmrg util_cpu_caps.has_avx512bw = (regs3[1] >> 30) & 1; 77001e04c3fSmrg util_cpu_caps.has_avx512vl = (regs3[1] >> 31) & 1; 77101e04c3fSmrg util_cpu_caps.has_avx512vbmi = (regs3[2] >> 1) & 1; 77201e04c3fSmrg } 77301e04c3fSmrg 77401e04c3fSmrg if (regs[1] == 0x756e6547 && regs[2] == 0x6c65746e && regs[3] == 0x49656e69) { 77501e04c3fSmrg /* GenuineIntel */ 77601e04c3fSmrg util_cpu_caps.has_intel = 1; 77701e04c3fSmrg } 77801e04c3fSmrg 77901e04c3fSmrg cpuid(0x80000000, regs); 78001e04c3fSmrg 78101e04c3fSmrg if (regs[0] >= 0x80000001) { 78201e04c3fSmrg 78301e04c3fSmrg cpuid(0x80000001, regs2); 78401e04c3fSmrg 78501e04c3fSmrg util_cpu_caps.has_mmx |= (regs2[3] >> 23) & 1; 78601e04c3fSmrg util_cpu_caps.has_mmx2 |= (regs2[3] >> 22) & 1; 78701e04c3fSmrg util_cpu_caps.has_3dnow = (regs2[3] >> 31) & 1; 78801e04c3fSmrg util_cpu_caps.has_3dnow_ext = (regs2[3] >> 30) & 1; 78901e04c3fSmrg 79001e04c3fSmrg util_cpu_caps.has_xop = util_cpu_caps.has_avx && 79101e04c3fSmrg ((regs2[2] >> 11) & 1); 79201e04c3fSmrg } 79301e04c3fSmrg 79401e04c3fSmrg if (regs[0] >= 0x80000006) { 79501e04c3fSmrg /* should we really do this if the clflush size above worked? */ 79601e04c3fSmrg unsigned int cacheline; 79701e04c3fSmrg cpuid(0x80000006, regs2); 79801e04c3fSmrg cacheline = regs2[2] & 0xFF; 79901e04c3fSmrg if (cacheline > 0) 80001e04c3fSmrg util_cpu_caps.cacheline = cacheline; 80101e04c3fSmrg } 80201e04c3fSmrg 80301e04c3fSmrg if (!util_cpu_caps.has_sse) { 80401e04c3fSmrg util_cpu_caps.has_sse2 = 0; 80501e04c3fSmrg util_cpu_caps.has_sse3 = 0; 80601e04c3fSmrg util_cpu_caps.has_ssse3 = 0; 80701e04c3fSmrg util_cpu_caps.has_sse4_1 = 0; 80801e04c3fSmrg } 80901e04c3fSmrg } 81001e04c3fSmrg#endif /* PIPE_ARCH_X86 || PIPE_ARCH_X86_64 */ 81101e04c3fSmrg 8128a1362adSmaya#if defined(PIPE_ARCH_ARM) || defined(PIPE_ARCH_AARCH64) 81301e04c3fSmrg check_os_arm_support(); 81401e04c3fSmrg#endif 81501e04c3fSmrg 81601e04c3fSmrg#if defined(PIPE_ARCH_PPC) 81701e04c3fSmrg check_os_altivec_support(); 81801e04c3fSmrg#endif /* PIPE_ARCH_PPC */ 81901e04c3fSmrg 8207ec681f3Smrg#if defined(PIPE_ARCH_MIPS64) 8217ec681f3Smrg check_os_mips64_support(); 8227ec681f3Smrg#endif /* PIPE_ARCH_MIPS64 */ 8237ec681f3Smrg 82401e04c3fSmrg get_cpu_topology(); 82501e04c3fSmrg 82601e04c3fSmrg if (debug_get_option_dump_cpu()) { 8277ec681f3Smrg printf("util_cpu_caps.nr_cpus = %u\n", util_cpu_caps.nr_cpus); 8287ec681f3Smrg 8297ec681f3Smrg printf("util_cpu_caps.x86_cpu_type = %u\n", util_cpu_caps.x86_cpu_type); 8307ec681f3Smrg printf("util_cpu_caps.cacheline = %u\n", util_cpu_caps.cacheline); 8317ec681f3Smrg 8327ec681f3Smrg printf("util_cpu_caps.has_tsc = %u\n", util_cpu_caps.has_tsc); 8337ec681f3Smrg printf("util_cpu_caps.has_mmx = %u\n", util_cpu_caps.has_mmx); 8347ec681f3Smrg printf("util_cpu_caps.has_mmx2 = %u\n", util_cpu_caps.has_mmx2); 8357ec681f3Smrg printf("util_cpu_caps.has_sse = %u\n", util_cpu_caps.has_sse); 8367ec681f3Smrg printf("util_cpu_caps.has_sse2 = %u\n", util_cpu_caps.has_sse2); 8377ec681f3Smrg printf("util_cpu_caps.has_sse3 = %u\n", util_cpu_caps.has_sse3); 8387ec681f3Smrg printf("util_cpu_caps.has_ssse3 = %u\n", util_cpu_caps.has_ssse3); 8397ec681f3Smrg printf("util_cpu_caps.has_sse4_1 = %u\n", util_cpu_caps.has_sse4_1); 8407ec681f3Smrg printf("util_cpu_caps.has_sse4_2 = %u\n", util_cpu_caps.has_sse4_2); 8417ec681f3Smrg printf("util_cpu_caps.has_avx = %u\n", util_cpu_caps.has_avx); 8427ec681f3Smrg printf("util_cpu_caps.has_avx2 = %u\n", util_cpu_caps.has_avx2); 8437ec681f3Smrg printf("util_cpu_caps.has_f16c = %u\n", util_cpu_caps.has_f16c); 8447ec681f3Smrg printf("util_cpu_caps.has_popcnt = %u\n", util_cpu_caps.has_popcnt); 8457ec681f3Smrg printf("util_cpu_caps.has_3dnow = %u\n", util_cpu_caps.has_3dnow); 8467ec681f3Smrg printf("util_cpu_caps.has_3dnow_ext = %u\n", util_cpu_caps.has_3dnow_ext); 8477ec681f3Smrg printf("util_cpu_caps.has_xop = %u\n", util_cpu_caps.has_xop); 8487ec681f3Smrg printf("util_cpu_caps.has_altivec = %u\n", util_cpu_caps.has_altivec); 8497ec681f3Smrg printf("util_cpu_caps.has_vsx = %u\n", util_cpu_caps.has_vsx); 8507ec681f3Smrg printf("util_cpu_caps.has_neon = %u\n", util_cpu_caps.has_neon); 8517ec681f3Smrg printf("util_cpu_caps.has_msa = %u\n", util_cpu_caps.has_msa); 8527ec681f3Smrg printf("util_cpu_caps.has_daz = %u\n", util_cpu_caps.has_daz); 8537ec681f3Smrg printf("util_cpu_caps.has_avx512f = %u\n", util_cpu_caps.has_avx512f); 8547ec681f3Smrg printf("util_cpu_caps.has_avx512dq = %u\n", util_cpu_caps.has_avx512dq); 8557ec681f3Smrg printf("util_cpu_caps.has_avx512ifma = %u\n", util_cpu_caps.has_avx512ifma); 8567ec681f3Smrg printf("util_cpu_caps.has_avx512pf = %u\n", util_cpu_caps.has_avx512pf); 8577ec681f3Smrg printf("util_cpu_caps.has_avx512er = %u\n", util_cpu_caps.has_avx512er); 8587ec681f3Smrg printf("util_cpu_caps.has_avx512cd = %u\n", util_cpu_caps.has_avx512cd); 8597ec681f3Smrg printf("util_cpu_caps.has_avx512bw = %u\n", util_cpu_caps.has_avx512bw); 8607ec681f3Smrg printf("util_cpu_caps.has_avx512vl = %u\n", util_cpu_caps.has_avx512vl); 8617ec681f3Smrg printf("util_cpu_caps.has_avx512vbmi = %u\n", util_cpu_caps.has_avx512vbmi); 8627ec681f3Smrg printf("util_cpu_caps.num_L3_caches = %u\n", util_cpu_caps.num_L3_caches); 8637ec681f3Smrg printf("util_cpu_caps.num_cpu_mask_bits = %u\n", util_cpu_caps.num_cpu_mask_bits); 86401e04c3fSmrg } 86501e04c3fSmrg} 86601e04c3fSmrg 86701e04c3fSmrgstatic once_flag cpu_once_flag = ONCE_FLAG_INIT; 86801e04c3fSmrg 86901e04c3fSmrgvoid 87001e04c3fSmrgutil_cpu_detect(void) 87101e04c3fSmrg{ 87201e04c3fSmrg call_once(&cpu_once_flag, util_cpu_detect_once); 87301e04c3fSmrg} 874