1 /* 2 * isadetection.h -- detect supported instruction set(s) 3 * 4 * Slightly modified version of isadetection.h in simdjson. 5 * 6 * Copyright (c) 2024 NLnet Labs (Jeroen Koekkoek) 7 * Copyright (c) 2020- simdjson (Daniel Lemire, 8 * Geoff Langdale, 9 * John Keiser) 10 * Copyright (c) 2016- Facebook, Inc (Adam Paszke) 11 * Copyright (c) 2014- Facebook, Inc (Soumith Chintala) 12 * Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert) 13 * Copyright (c) 2012-2014 Deepmind Technologies (Koray Kavukcuoglu) 14 * Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu) 15 * Copyright (c) 2011-2013 NYU (Clement Farabet) 16 * Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, 17 * Leon Bottou, 18 * Iain Melvin, 19 * Jason Weston) 20 * Copyright (c) 2006 Idiap Research Institute (Samy Bengio) 21 * Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert, 22 * Samy Bengio, 23 * Johnny Mariethoz) 24 * 25 * All rights reserved. 26 * 27 * Redistribution and use in source and binary forms, with or without 28 * modification, are permitted provided that the following conditions are met: 29 * 30 * 1. Redistributions of source code must retain the above copyright 31 * notice, this list of conditions and the following disclaimer. 32 * 33 * 2. Redistributions in binary form must reproduce the above copyright 34 * notice, this list of conditions and the following disclaimer in the 35 * documentation and/or other materials provided with the distribution. 36 * 37 * 3. Neither the names of simdjson, Facebook, Deepmind Technologies, NYU, 38 * NEC Laboratories America and IDIAP Research Institute nor the names of 39 * its contributors may be used to endorse or promote products derived from 40 * this software without specific prior written permission. 41 * 42 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 43 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 44 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 45 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 46 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 47 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 48 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 49 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 50 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 51 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 52 * POSSIBILITY OF SUCH DAMAGE. 53 */ 54 55 #ifndef ISADETECTION_H 56 #define ISADETECTION_H 57 58 #include <stdbool.h> 59 #include <stdint.h> 60 #include <stdlib.h> 61 #if defined(_MSC_VER) 62 #include <intrin.h> 63 #include <immintrin.h> 64 #elif defined(HAVE_CPUID) 65 #include <cpuid.h> 66 #endif 67 68 enum instruction_set { 69 DEFAULT = 0x0, 70 NEON = 0x1, 71 AVX2 = 0x4, 72 SSE42 = 0x8, 73 PCLMULQDQ = 0x10, 74 BMI1 = 0x20, 75 BMI2 = 0x40, 76 ALTIVEC = 0x80, 77 AVX512F = 0x100, 78 AVX512DQ = 0x200, 79 AVX512IFMA = 0x400, 80 AVX512PF = 0x800, 81 AVX512ER = 0x1000, 82 AVX512CD = 0x2000, 83 AVX512BW = 0x4000, 84 AVX512VL = 0x8000, 85 AVX512VBMI2 = 0x10000 86 }; 87 88 #if defined(__PPC64__) 89 90 static inline uint32_t detect_supported_architectures(void) { 91 return ALTIVEC; 92 } 93 94 #elif defined(__arm__) || defined(__aarch64__) // incl. armel, armhf, arm64 95 96 #if defined(__ARM_NEON) 97 98 static inline uint32_t detect_supported_architectures(void) { 99 return NEON; 100 } 101 102 #else // ARM without NEON 103 104 static inline uint32_t detect_supported_architectures(void) { 105 return DEFAULT; 106 } 107 108 #endif 109 110 #elif defined(__x86_64__) || defined(_M_AMD64) // x64 111 112 // Can be found on Intel ISA Reference for CPUID 113 static const uint32_t cpuid_avx2_bit = 1 << 5; ///< @private Bit 5 of EBX for EAX=0x7 114 static const uint32_t cpuid_bmi1_bit = 1 << 3; ///< @private bit 3 of EBX for EAX=0x7 115 static const uint32_t cpuid_bmi2_bit = 1 << 8; ///< @private bit 8 of EBX for EAX=0x7 116 static const uint32_t cpuid_avx512f_bit = 1 << 16; ///< @private bit 16 of EBX for EAX=0x7 117 static const uint32_t cpuid_avx512dq_bit = 1 << 17; ///< @private bit 17 of EBX for EAX=0x7 118 static const uint32_t cpuid_avx512ifma_bit = 1 << 21; ///< @private bit 21 of EBX for EAX=0x7 119 static const uint32_t cpuid_avx512pf_bit = 1 << 26; ///< @private bit 26 of EBX for EAX=0x7 120 static const uint32_t cpuid_avx512er_bit = 1 << 27; ///< @private bit 27 of EBX for EAX=0x7 121 static const uint32_t cpuid_avx512cd_bit = 1 << 28; ///< @private bit 28 of EBX for EAX=0x7 122 static const uint32_t cpuid_avx512bw_bit = 1 << 30; ///< @private bit 30 of EBX for EAX=0x7 123 static const uint32_t cpuid_avx512vl_bit = 1U << 31; ///< @private bit 31 of EBX for EAX=0x7 124 static const uint32_t cpuid_avx512vbmi2_bit = 1 << 6; ///< @private bit 6 of ECX for EAX=0x7 125 static const uint32_t cpuid_sse42_bit = 1 << 20; ///< @private bit 20 of ECX for EAX=0x1 126 static const uint32_t cpuid_pclmulqdq_bit = 1 << 1; ///< @private bit 1 of ECX for EAX=0x1 127 static const uint32_t cpuid_have_xgetbv_bit = 1 << 27; ///< @private bit 27 of ECX for EAX=0x1 128 static const uint32_t cpuid_have_avx_bit = 1 << 28; ///< @private bit 28 of ECX for EAX=0x1 129 130 static inline void cpuid( 131 uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx) 132 { 133 #if defined(_MSC_VER) 134 int cpu_info[4]; 135 __cpuid(cpu_info, *eax); 136 *eax = cpu_info[0]; 137 *ebx = cpu_info[1]; 138 *ecx = cpu_info[2]; 139 *edx = cpu_info[3]; 140 #elif defined(HAVE_CPUID) 141 uint32_t level = *eax; 142 __get_cpuid(level, eax, ebx, ecx, edx); 143 #else 144 uint32_t a = *eax, b, c = *ecx, d; 145 asm volatile("cpuid\n\t" : "+a"(a), "=b"(b), "+c"(c), "=d"(d)); 146 *eax = a; 147 *ebx = b; 148 *ecx = c; 149 *edx = d; 150 #endif 151 } 152 153 static inline uint64_t xgetbv(uint32_t ecx) 154 { 155 #if defined(_MSC_VER) 156 return _xgetbv(ecx); 157 #else 158 uint32_t a, c = ecx, d; 159 asm volatile("xgetbv\n\t" : "=d"(d), "=a"(a) : "c"(c)); 160 uint64_t xcr0 = ((uint64_t)d << 32) | (uint64_t)a; 161 return xcr0; 162 #endif 163 } 164 165 static inline uint32_t detect_supported_architectures(void) 166 { 167 uint32_t eax, ebx, ecx, edx; 168 uint32_t host_isa = 0x0, host_avx_isa = 0x0; 169 170 // ECX for EAX=0x7 171 eax = 0x7; 172 ecx = 0x0; 173 cpuid(&eax, &ebx, &ecx, &edx); 174 if (ebx & cpuid_bmi1_bit) { 175 host_isa |= BMI1; 176 } 177 178 if (ebx & cpuid_bmi2_bit) { 179 host_isa |= BMI2; 180 } 181 182 if (ebx & cpuid_avx2_bit) { 183 host_avx_isa |= AVX2; 184 } 185 186 if (ebx & cpuid_avx512f_bit) { 187 host_avx_isa |= AVX512F; 188 } 189 190 if (ebx & cpuid_avx512dq_bit) { 191 host_avx_isa |= AVX512DQ; 192 } 193 194 if (ebx & cpuid_avx512ifma_bit) { 195 host_avx_isa |= AVX512IFMA; 196 } 197 198 if (ebx & cpuid_avx512pf_bit) { 199 host_avx_isa |= AVX512PF; 200 } 201 202 if (ebx & cpuid_avx512er_bit) { 203 host_avx_isa |= AVX512ER; 204 } 205 206 if (ebx & cpuid_avx512cd_bit) { 207 host_avx_isa |= AVX512CD; 208 } 209 210 if (ebx & cpuid_avx512bw_bit) { 211 host_avx_isa |= AVX512BW; 212 } 213 214 if (ebx & cpuid_avx512vl_bit) { 215 host_avx_isa |= AVX512VL; 216 } 217 218 if (ecx & cpuid_avx512vbmi2_bit) { 219 host_avx_isa |= AVX512VBMI2; 220 } 221 222 bool have_avx = false, have_xgetbv = false; 223 224 // EBX for EAX=0x1 225 eax = 0x1; 226 cpuid(&eax, &ebx, &ecx, &edx); 227 if (ecx & cpuid_sse42_bit) { 228 host_isa |= SSE42; 229 } 230 231 if (ecx & cpuid_pclmulqdq_bit) { 232 host_isa |= PCLMULQDQ; 233 } 234 235 // Correct detection of AVX2 support requires more than checking the CPUID 236 // bit. Peter Cordes provides an excellent answer on Stack Overflow 237 // (https://stackoverflow.com/a/34071400) quoting the article Introduction 238 // to Intel Advanced Vector Extensions (search Wayback Machine). 239 // 240 // 1. Verify that the operating system supports XGETBV using 241 // CPUID.1:ECX.OSXSAVE bit 27 = 1. 242 // 2. Verify the processor supports the AVX instruction extensions using: 243 // CPUID.1:ECX bit 28 = 1. 244 // 3. Issue XGETBV, and verify that the feature-enabled mask at bits 1 and 2 245 // are 11b (XMM state and YMM state enabled by the operating system). 246 247 248 // Determine if the CPU supports AVX 249 have_avx = (ecx & cpuid_have_avx_bit) != 0; 250 // Determine if the Operating System supports XGETBV 251 have_xgetbv = (ecx & cpuid_have_xgetbv_bit) != 0; 252 253 if (have_avx && have_xgetbv) { 254 uint64_t xcr0 = xgetbv(0x0); 255 if ((xcr0 & 0x6) == 0x6) 256 host_isa |= host_avx_isa; 257 } 258 259 return host_isa; 260 } 261 #else // fallback 262 263 static inline uint32_t detect_supported_architectures(void) { 264 return DEFAULT; 265 } 266 267 #endif // end SIMD extension detection code 268 269 #endif // ISADETECTION_H 270