Home | History | Annotate | Line # | Download | only in src
      1 /*
      2  * isadetection.h -- detect supported instruction set(s)
      3  *
      4  * Slightly modified version of isadetection.h in simdjson.
      5  *
      6  * Copyright (c) 2024      NLnet Labs               (Jeroen Koekkoek)
      7  * Copyright (c) 2020-     simdjson                 (Daniel Lemire,
      8  *                                                   Geoff Langdale,
      9  *                                                   John Keiser)
     10  * Copyright (c) 2016-     Facebook, Inc            (Adam Paszke)
     11  * Copyright (c) 2014-     Facebook, Inc            (Soumith Chintala)
     12  * Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert)
     13  * Copyright (c) 2012-2014 Deepmind Technologies    (Koray Kavukcuoglu)
     14  * Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu)
     15  * Copyright (c) 2011-2013 NYU                      (Clement Farabet)
     16  * Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert,
     17  *                                                   Leon Bottou,
     18  *                                                   Iain Melvin,
     19  *                                                   Jason Weston)
     20  * Copyright (c) 2006      Idiap Research Institute (Samy Bengio)
     21  * Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert,
     22  *                                                   Samy Bengio,
     23  *                                                   Johnny Mariethoz)
     24  *
     25  * All rights reserved.
     26  *
     27  * Redistribution and use in source and binary forms, with or without
     28  * modification, are permitted provided that the following conditions are met:
     29  *
     30  * 1. Redistributions of source code must retain the above copyright
     31  *    notice, this list of conditions and the following disclaimer.
     32  *
     33  * 2. Redistributions in binary form must reproduce the above copyright
     34  *    notice, this list of conditions and the following disclaimer in the
     35  *    documentation and/or other materials provided with the distribution.
     36  *
     37  * 3. Neither the names of simdjson, Facebook, Deepmind Technologies, NYU,
     38  *    NEC Laboratories America and IDIAP Research Institute nor the names of
     39  *    its contributors may be used to endorse or promote products derived from
     40  *    this software without specific prior written permission.
     41  *
     42  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     43  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     44  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     45  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
     46  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     47  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     48  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     49  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     50  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     51  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     52  * POSSIBILITY OF SUCH DAMAGE.
     53  */
     54 
     55 #ifndef ISADETECTION_H
     56 #define ISADETECTION_H
     57 
     58 #include <stdbool.h>
     59 #include <stdint.h>
     60 #include <stdlib.h>
     61 #if defined(_MSC_VER)
     62 #include <intrin.h>
     63 #include <immintrin.h>
     64 #elif defined(HAVE_CPUID)
     65 #include <cpuid.h>
     66 #endif
     67 
     68 enum instruction_set {
     69   DEFAULT = 0x0,
     70   NEON = 0x1,
     71   AVX2 = 0x4,
     72   SSE42 = 0x8,
     73   PCLMULQDQ = 0x10,
     74   BMI1 = 0x20,
     75   BMI2 = 0x40,
     76   ALTIVEC = 0x80,
     77   AVX512F = 0x100,
     78   AVX512DQ = 0x200,
     79   AVX512IFMA = 0x400,
     80   AVX512PF = 0x800,
     81   AVX512ER = 0x1000,
     82   AVX512CD = 0x2000,
     83   AVX512BW = 0x4000,
     84   AVX512VL = 0x8000,
     85   AVX512VBMI2 = 0x10000
     86 };
     87 
     88 #if defined(__PPC64__)
     89 
     90 static inline uint32_t detect_supported_architectures(void) {
     91   return ALTIVEC;
     92 }
     93 
     94 #elif defined(__arm__) || defined(__aarch64__) // incl. armel, armhf, arm64
     95 
     96 #if defined(__ARM_NEON)
     97 
     98 static inline uint32_t detect_supported_architectures(void) {
     99   return NEON;
    100 }
    101 
    102 #else // ARM without NEON
    103 
    104 static inline uint32_t detect_supported_architectures(void) {
    105   return DEFAULT;
    106 }
    107 
    108 #endif
    109 
    110 #elif defined(__x86_64__) || defined(_M_AMD64) // x64
    111 
    112 // Can be found on Intel ISA Reference for CPUID
    113 static const uint32_t cpuid_avx2_bit = 1 << 5;          ///< @private Bit  5 of EBX for EAX=0x7
    114 static const uint32_t cpuid_bmi1_bit = 1 << 3;          ///< @private bit  3 of EBX for EAX=0x7
    115 static const uint32_t cpuid_bmi2_bit = 1 << 8;          ///< @private bit  8 of EBX for EAX=0x7
    116 static const uint32_t cpuid_avx512f_bit = 1 << 16;      ///< @private bit 16 of EBX for EAX=0x7
    117 static const uint32_t cpuid_avx512dq_bit = 1 << 17;     ///< @private bit 17 of EBX for EAX=0x7
    118 static const uint32_t cpuid_avx512ifma_bit = 1 << 21;   ///< @private bit 21 of EBX for EAX=0x7
    119 static const uint32_t cpuid_avx512pf_bit = 1 << 26;     ///< @private bit 26 of EBX for EAX=0x7
    120 static const uint32_t cpuid_avx512er_bit = 1 << 27;     ///< @private bit 27 of EBX for EAX=0x7
    121 static const uint32_t cpuid_avx512cd_bit = 1 << 28;     ///< @private bit 28 of EBX for EAX=0x7
    122 static const uint32_t cpuid_avx512bw_bit = 1 << 30;     ///< @private bit 30 of EBX for EAX=0x7
    123 static const uint32_t cpuid_avx512vl_bit = 1U << 31;    ///< @private bit 31 of EBX for EAX=0x7
    124 static const uint32_t cpuid_avx512vbmi2_bit = 1 << 6;   ///< @private bit  6 of ECX for EAX=0x7
    125 static const uint32_t cpuid_sse42_bit = 1 << 20;        ///< @private bit 20 of ECX for EAX=0x1
    126 static const uint32_t cpuid_pclmulqdq_bit = 1 << 1;     ///< @private bit  1 of ECX for EAX=0x1
    127 static const uint32_t cpuid_have_xgetbv_bit = 1 << 27;  ///< @private bit 27 of ECX for EAX=0x1
    128 static const uint32_t cpuid_have_avx_bit = 1 << 28;     ///< @private bit 28 of ECX for EAX=0x1
    129 
    130 static inline void cpuid(
    131   uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx)
    132 {
    133 #if defined(_MSC_VER)
    134   int cpu_info[4];
    135   __cpuid(cpu_info, *eax);
    136   *eax = cpu_info[0];
    137   *ebx = cpu_info[1];
    138   *ecx = cpu_info[2];
    139   *edx = cpu_info[3];
    140 #elif defined(HAVE_CPUID)
    141   uint32_t level = *eax;
    142   __get_cpuid(level, eax, ebx, ecx, edx);
    143 #else
    144   uint32_t a = *eax, b, c = *ecx, d;
    145   asm volatile("cpuid\n\t" : "+a"(a), "=b"(b), "+c"(c), "=d"(d));
    146   *eax = a;
    147   *ebx = b;
    148   *ecx = c;
    149   *edx = d;
    150 #endif
    151 }
    152 
    153 static inline uint64_t xgetbv(uint32_t ecx)
    154 {
    155 #if defined(_MSC_VER)
    156   return _xgetbv(ecx);
    157 #else
    158   uint32_t a, c = ecx, d;
    159   asm volatile("xgetbv\n\t" : "=d"(d), "=a"(a) : "c"(c));
    160   uint64_t xcr0 = ((uint64_t)d << 32) | (uint64_t)a;
    161   return xcr0;
    162 #endif
    163 }
    164 
    165 static inline uint32_t detect_supported_architectures(void)
    166 {
    167   uint32_t eax, ebx, ecx, edx;
    168   uint32_t host_isa = 0x0, host_avx_isa = 0x0;
    169 
    170   // ECX for EAX=0x7
    171   eax = 0x7;
    172   ecx = 0x0;
    173   cpuid(&eax, &ebx, &ecx, &edx);
    174   if (ebx & cpuid_bmi1_bit) {
    175     host_isa |= BMI1;
    176   }
    177 
    178   if (ebx & cpuid_bmi2_bit) {
    179     host_isa |= BMI2;
    180   }
    181 
    182   if (ebx & cpuid_avx2_bit) {
    183     host_avx_isa |= AVX2;
    184   }
    185 
    186   if (ebx & cpuid_avx512f_bit) {
    187     host_avx_isa |= AVX512F;
    188   }
    189 
    190   if (ebx & cpuid_avx512dq_bit) {
    191     host_avx_isa |= AVX512DQ;
    192   }
    193 
    194   if (ebx & cpuid_avx512ifma_bit) {
    195     host_avx_isa |= AVX512IFMA;
    196   }
    197 
    198   if (ebx & cpuid_avx512pf_bit) {
    199     host_avx_isa |= AVX512PF;
    200   }
    201 
    202   if (ebx & cpuid_avx512er_bit) {
    203     host_avx_isa |= AVX512ER;
    204   }
    205 
    206   if (ebx & cpuid_avx512cd_bit) {
    207     host_avx_isa |= AVX512CD;
    208   }
    209 
    210   if (ebx & cpuid_avx512bw_bit) {
    211     host_avx_isa |= AVX512BW;
    212   }
    213 
    214   if (ebx & cpuid_avx512vl_bit) {
    215     host_avx_isa |= AVX512VL;
    216   }
    217 
    218   if (ecx & cpuid_avx512vbmi2_bit) {
    219     host_avx_isa |= AVX512VBMI2;
    220   }
    221 
    222   bool have_avx = false, have_xgetbv = false;
    223 
    224   // EBX for EAX=0x1
    225   eax = 0x1;
    226   cpuid(&eax, &ebx, &ecx, &edx);
    227   if (ecx & cpuid_sse42_bit) {
    228     host_isa |= SSE42;
    229   }
    230 
    231   if (ecx & cpuid_pclmulqdq_bit) {
    232     host_isa |= PCLMULQDQ;
    233   }
    234 
    235   // Correct detection of AVX2 support requires more than checking the CPUID
    236   // bit. Peter Cordes provides an excellent answer on Stack Overflow
    237   // (https://stackoverflow.com/a/34071400) quoting the article Introduction
    238   // to Intel Advanced Vector Extensions (search Wayback Machine).
    239   //
    240   // 1. Verify that the operating system supports XGETBV using
    241   //    CPUID.1:ECX.OSXSAVE bit 27 = 1.
    242   // 2. Verify the processor supports the AVX instruction extensions using:
    243   //    CPUID.1:ECX bit 28 = 1.
    244   // 3. Issue XGETBV, and verify that the feature-enabled mask at bits 1 and 2
    245   //    are 11b (XMM state and YMM state enabled by the operating system).
    246 
    247 
    248   // Determine if the CPU supports AVX
    249   have_avx = (ecx & cpuid_have_avx_bit) != 0;
    250   // Determine if the Operating System supports XGETBV
    251   have_xgetbv = (ecx & cpuid_have_xgetbv_bit) != 0;
    252 
    253   if (have_avx && have_xgetbv) {
    254     uint64_t xcr0 = xgetbv(0x0);
    255     if ((xcr0 & 0x6) == 0x6)
    256       host_isa |= host_avx_isa;
    257   }
    258 
    259   return host_isa;
    260 }
    261 #else // fallback
    262 
    263 static inline uint32_t detect_supported_architectures(void) {
    264   return DEFAULT;
    265 }
    266 
    267 #endif // end SIMD extension detection code
    268 
    269 #endif // ISADETECTION_H
    270