Home | History | Annotate | Line # | Download | only in Support
      1 //===-- Host.cpp - Implement OS Host Concept --------------------*- C++ -*-===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //===----------------------------------------------------------------------===//
      8 //
      9 //  This file implements the operating system Host concept.
     10 //
     11 //===----------------------------------------------------------------------===//
     12 
     13 #include "llvm/Support/Host.h"
     14 #include "llvm/ADT/SmallSet.h"
     15 #include "llvm/ADT/SmallVector.h"
     16 #include "llvm/ADT/StringMap.h"
     17 #include "llvm/ADT/StringRef.h"
     18 #include "llvm/ADT/StringSwitch.h"
     19 #include "llvm/ADT/Triple.h"
     20 #include "llvm/Config/llvm-config.h"
     21 #include "llvm/Support/Debug.h"
     22 #include "llvm/Support/FileSystem.h"
     23 #include "llvm/Support/MemoryBuffer.h"
     24 #include "llvm/Support/X86TargetParser.h"
     25 #include "llvm/Support/raw_ostream.h"
     26 #include <assert.h>
     27 #include <string.h>
     28 
     29 // Include the platform-specific parts of this class.
     30 #ifdef LLVM_ON_UNIX
     31 #include "Unix/Host.inc"
     32 #include <sched.h>
     33 #endif
     34 #ifdef _WIN32
     35 #include "Windows/Host.inc"
     36 #endif
     37 #ifdef _MSC_VER
     38 #include <intrin.h>
     39 #endif
     40 #if defined(__APPLE__) && (!defined(__x86_64__))
     41 #include <mach/host_info.h>
     42 #include <mach/mach.h>
     43 #include <mach/mach_host.h>
     44 #include <mach/machine.h>
     45 #endif
     46 #ifdef _AIX
     47 #include <sys/systemcfg.h>
     48 #endif
     49 
     50 #define DEBUG_TYPE "host-detection"
     51 
     52 //===----------------------------------------------------------------------===//
     53 //
     54 //  Implementations of the CPU detection routines
     55 //
     56 //===----------------------------------------------------------------------===//
     57 
     58 using namespace llvm;
     59 
     60 static std::unique_ptr<llvm::MemoryBuffer>
     61     LLVM_ATTRIBUTE_UNUSED getProcCpuinfoContent() {
     62   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text =
     63       llvm::MemoryBuffer::getFileAsStream("/proc/cpuinfo");
     64   if (std::error_code EC = Text.getError()) {
     65     llvm::errs() << "Can't read "
     66                  << "/proc/cpuinfo: " << EC.message() << "\n";
     67     return nullptr;
     68   }
     69   return std::move(*Text);
     70 }
     71 
     72 StringRef sys::detail::getHostCPUNameForPowerPC(StringRef ProcCpuinfoContent) {
     73   // Access to the Processor Version Register (PVR) on PowerPC is privileged,
     74   // and so we must use an operating-system interface to determine the current
     75   // processor type. On Linux, this is exposed through the /proc/cpuinfo file.
     76   const char *generic = "generic";
     77 
     78   // The cpu line is second (after the 'processor: 0' line), so if this
     79   // buffer is too small then something has changed (or is wrong).
     80   StringRef::const_iterator CPUInfoStart = ProcCpuinfoContent.begin();
     81   StringRef::const_iterator CPUInfoEnd = ProcCpuinfoContent.end();
     82 
     83   StringRef::const_iterator CIP = CPUInfoStart;
     84 
     85   StringRef::const_iterator CPUStart = 0;
     86   size_t CPULen = 0;
     87 
     88   // We need to find the first line which starts with cpu, spaces, and a colon.
     89   // After the colon, there may be some additional spaces and then the cpu type.
     90   while (CIP < CPUInfoEnd && CPUStart == 0) {
     91     if (CIP < CPUInfoEnd && *CIP == '\n')
     92       ++CIP;
     93 
     94     if (CIP < CPUInfoEnd && *CIP == 'c') {
     95       ++CIP;
     96       if (CIP < CPUInfoEnd && *CIP == 'p') {
     97         ++CIP;
     98         if (CIP < CPUInfoEnd && *CIP == 'u') {
     99           ++CIP;
    100           while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t'))
    101             ++CIP;
    102 
    103           if (CIP < CPUInfoEnd && *CIP == ':') {
    104             ++CIP;
    105             while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t'))
    106               ++CIP;
    107 
    108             if (CIP < CPUInfoEnd) {
    109               CPUStart = CIP;
    110               while (CIP < CPUInfoEnd && (*CIP != ' ' && *CIP != '\t' &&
    111                                           *CIP != ',' && *CIP != '\n'))
    112                 ++CIP;
    113               CPULen = CIP - CPUStart;
    114             }
    115           }
    116         }
    117       }
    118     }
    119 
    120     if (CPUStart == 0)
    121       while (CIP < CPUInfoEnd && *CIP != '\n')
    122         ++CIP;
    123   }
    124 
    125   if (CPUStart == 0)
    126     return generic;
    127 
    128   return StringSwitch<const char *>(StringRef(CPUStart, CPULen))
    129       .Case("604e", "604e")
    130       .Case("604", "604")
    131       .Case("7400", "7400")
    132       .Case("7410", "7400")
    133       .Case("7447", "7400")
    134       .Case("7455", "7450")
    135       .Case("G4", "g4")
    136       .Case("POWER4", "970")
    137       .Case("PPC970FX", "970")
    138       .Case("PPC970MP", "970")
    139       .Case("G5", "g5")
    140       .Case("POWER5", "g5")
    141       .Case("A2", "a2")
    142       .Case("POWER6", "pwr6")
    143       .Case("POWER7", "pwr7")
    144       .Case("POWER8", "pwr8")
    145       .Case("POWER8E", "pwr8")
    146       .Case("POWER8NVL", "pwr8")
    147       .Case("POWER9", "pwr9")
    148       .Case("POWER10", "pwr10")
    149       // FIXME: If we get a simulator or machine with the capabilities of
    150       // mcpu=future, we should revisit this and add the name reported by the
    151       // simulator/machine.
    152       .Default(generic);
    153 }
    154 
    155 StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) {
    156   // The cpuid register on arm is not accessible from user space. On Linux,
    157   // it is exposed through the /proc/cpuinfo file.
    158 
    159   // Read 32 lines from /proc/cpuinfo, which should contain the CPU part line
    160   // in all cases.
    161   SmallVector<StringRef, 32> Lines;
    162   ProcCpuinfoContent.split(Lines, "\n");
    163 
    164   // Look for the CPU implementer line.
    165   StringRef Implementer;
    166   StringRef Hardware;
    167   StringRef Part;
    168   for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
    169     if (Lines[I].startswith("CPU implementer"))
    170       Implementer = Lines[I].substr(15).ltrim("\t :");
    171     if (Lines[I].startswith("Hardware"))
    172       Hardware = Lines[I].substr(8).ltrim("\t :");
    173     if (Lines[I].startswith("CPU part"))
    174       Part = Lines[I].substr(8).ltrim("\t :");
    175   }
    176 
    177   if (Implementer == "0x41") { // ARM Ltd.
    178     // MSM8992/8994 may give cpu part for the core that the kernel is running on,
    179     // which is undeterministic and wrong. Always return cortex-a53 for these SoC.
    180     if (Hardware.endswith("MSM8994") || Hardware.endswith("MSM8996"))
    181       return "cortex-a53";
    182 
    183 
    184     // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
    185     // values correspond to the "Part number" in the CP15/c0 register. The
    186     // contents are specified in the various processor manuals.
    187     // This corresponds to the Main ID Register in Technical Reference Manuals.
    188     // and is used in programs like sys-utils
    189     return StringSwitch<const char *>(Part)
    190         .Case("0x926", "arm926ej-s")
    191         .Case("0xb02", "mpcore")
    192         .Case("0xb36", "arm1136j-s")
    193         .Case("0xb56", "arm1156t2-s")
    194         .Case("0xb76", "arm1176jz-s")
    195         .Case("0xc08", "cortex-a8")
    196         .Case("0xc09", "cortex-a9")
    197         .Case("0xc0f", "cortex-a15")
    198         .Case("0xc20", "cortex-m0")
    199         .Case("0xc23", "cortex-m3")
    200         .Case("0xc24", "cortex-m4")
    201         .Case("0xd22", "cortex-m55")
    202         .Case("0xd02", "cortex-a34")
    203         .Case("0xd04", "cortex-a35")
    204         .Case("0xd03", "cortex-a53")
    205         .Case("0xd07", "cortex-a57")
    206         .Case("0xd08", "cortex-a72")
    207         .Case("0xd09", "cortex-a73")
    208         .Case("0xd0a", "cortex-a75")
    209         .Case("0xd0b", "cortex-a76")
    210         .Case("0xd0d", "cortex-a77")
    211         .Case("0xd41", "cortex-a78")
    212         .Case("0xd44", "cortex-x1")
    213         .Case("0xd0c", "neoverse-n1")
    214         .Case("0xd49", "neoverse-n2")
    215         .Default("generic");
    216   }
    217 
    218   if (Implementer == "0x42" || Implementer == "0x43") { // Broadcom | Cavium.
    219     return StringSwitch<const char *>(Part)
    220       .Case("0x516", "thunderx2t99")
    221       .Case("0x0516", "thunderx2t99")
    222       .Case("0xaf", "thunderx2t99")
    223       .Case("0x0af", "thunderx2t99")
    224       .Case("0xa1", "thunderxt88")
    225       .Case("0x0a1", "thunderxt88")
    226       .Default("generic");
    227   }
    228 
    229   if (Implementer == "0x46") { // Fujitsu Ltd.
    230     return StringSwitch<const char *>(Part)
    231       .Case("0x001", "a64fx")
    232       .Default("generic");
    233   }
    234 
    235   if (Implementer == "0x4e") { // NVIDIA Corporation
    236     return StringSwitch<const char *>(Part)
    237         .Case("0x004", "carmel")
    238         .Default("generic");
    239   }
    240 
    241   if (Implementer == "0x48") // HiSilicon Technologies, Inc.
    242     // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
    243     // values correspond to the "Part number" in the CP15/c0 register. The
    244     // contents are specified in the various processor manuals.
    245     return StringSwitch<const char *>(Part)
    246       .Case("0xd01", "tsv110")
    247       .Default("generic");
    248 
    249   if (Implementer == "0x51") // Qualcomm Technologies, Inc.
    250     // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
    251     // values correspond to the "Part number" in the CP15/c0 register. The
    252     // contents are specified in the various processor manuals.
    253     return StringSwitch<const char *>(Part)
    254         .Case("0x06f", "krait") // APQ8064
    255         .Case("0x201", "kryo")
    256         .Case("0x205", "kryo")
    257         .Case("0x211", "kryo")
    258         .Case("0x800", "cortex-a73") // Kryo 2xx Gold
    259         .Case("0x801", "cortex-a73") // Kryo 2xx Silver
    260         .Case("0x802", "cortex-a75") // Kryo 3xx Gold
    261         .Case("0x803", "cortex-a75") // Kryo 3xx Silver
    262         .Case("0x804", "cortex-a76") // Kryo 4xx Gold
    263         .Case("0x805", "cortex-a76") // Kryo 4xx/5xx Silver
    264         .Case("0xc00", "falkor")
    265         .Case("0xc01", "saphira")
    266         .Default("generic");
    267   if (Implementer == "0x53") { // Samsung Electronics Co., Ltd.
    268     // The Exynos chips have a convoluted ID scheme that doesn't seem to follow
    269     // any predictive pattern across variants and parts.
    270     unsigned Variant = 0, Part = 0;
    271 
    272     // Look for the CPU variant line, whose value is a 1 digit hexadecimal
    273     // number, corresponding to the Variant bits in the CP15/C0 register.
    274     for (auto I : Lines)
    275       if (I.consume_front("CPU variant"))
    276         I.ltrim("\t :").getAsInteger(0, Variant);
    277 
    278     // Look for the CPU part line, whose value is a 3 digit hexadecimal
    279     // number, corresponding to the PartNum bits in the CP15/C0 register.
    280     for (auto I : Lines)
    281       if (I.consume_front("CPU part"))
    282         I.ltrim("\t :").getAsInteger(0, Part);
    283 
    284     unsigned Exynos = (Variant << 12) | Part;
    285     switch (Exynos) {
    286     default:
    287       // Default by falling through to Exynos M3.
    288       LLVM_FALLTHROUGH;
    289     case 0x1002:
    290       return "exynos-m3";
    291     case 0x1003:
    292       return "exynos-m4";
    293     }
    294   }
    295 
    296   return "generic";
    297 }
    298 
    299 StringRef sys::detail::getHostCPUNameForS390x(StringRef ProcCpuinfoContent) {
    300   // STIDP is a privileged operation, so use /proc/cpuinfo instead.
    301 
    302   // The "processor 0:" line comes after a fair amount of other information,
    303   // including a cache breakdown, but this should be plenty.
    304   SmallVector<StringRef, 32> Lines;
    305   ProcCpuinfoContent.split(Lines, "\n");
    306 
    307   // Look for the CPU features.
    308   SmallVector<StringRef, 32> CPUFeatures;
    309   for (unsigned I = 0, E = Lines.size(); I != E; ++I)
    310     if (Lines[I].startswith("features")) {
    311       size_t Pos = Lines[I].find(':');
    312       if (Pos != StringRef::npos) {
    313         Lines[I].drop_front(Pos + 1).split(CPUFeatures, ' ');
    314         break;
    315       }
    316     }
    317 
    318   // We need to check for the presence of vector support independently of
    319   // the machine type, since we may only use the vector register set when
    320   // supported by the kernel (and hypervisor).
    321   bool HaveVectorSupport = false;
    322   for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) {
    323     if (CPUFeatures[I] == "vx")
    324       HaveVectorSupport = true;
    325   }
    326 
    327   // Now check the processor machine type.
    328   for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
    329     if (Lines[I].startswith("processor ")) {
    330       size_t Pos = Lines[I].find("machine = ");
    331       if (Pos != StringRef::npos) {
    332         Pos += sizeof("machine = ") - 1;
    333         unsigned int Id;
    334         if (!Lines[I].drop_front(Pos).getAsInteger(10, Id)) {
    335           if (Id >= 8561 && HaveVectorSupport)
    336             return "z15";
    337           if (Id >= 3906 && HaveVectorSupport)
    338             return "z14";
    339           if (Id >= 2964 && HaveVectorSupport)
    340             return "z13";
    341           if (Id >= 2827)
    342             return "zEC12";
    343           if (Id >= 2817)
    344             return "z196";
    345         }
    346       }
    347       break;
    348     }
    349   }
    350 
    351   return "generic";
    352 }
    353 
    354 StringRef sys::detail::getHostCPUNameForBPF() {
    355 #if !defined(__linux__) || !defined(__x86_64__)
    356   return "generic";
    357 #else
    358   uint8_t v3_insns[40] __attribute__ ((aligned (8))) =
    359       /* BPF_MOV64_IMM(BPF_REG_0, 0) */
    360     { 0xb7, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
    361       /* BPF_MOV64_IMM(BPF_REG_2, 1) */
    362       0xb7, 0x2, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
    363       /* BPF_JMP32_REG(BPF_JLT, BPF_REG_0, BPF_REG_2, 1) */
    364       0xae, 0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0,
    365       /* BPF_MOV64_IMM(BPF_REG_0, 1) */
    366       0xb7, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
    367       /* BPF_EXIT_INSN() */
    368       0x95, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 };
    369 
    370   uint8_t v2_insns[40] __attribute__ ((aligned (8))) =
    371       /* BPF_MOV64_IMM(BPF_REG_0, 0) */
    372     { 0xb7, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
    373       /* BPF_MOV64_IMM(BPF_REG_2, 1) */
    374       0xb7, 0x2, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
    375       /* BPF_JMP_REG(BPF_JLT, BPF_REG_0, BPF_REG_2, 1) */
    376       0xad, 0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0,
    377       /* BPF_MOV64_IMM(BPF_REG_0, 1) */
    378       0xb7, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
    379       /* BPF_EXIT_INSN() */
    380       0x95, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 };
    381 
    382   struct bpf_prog_load_attr {
    383     uint32_t prog_type;
    384     uint32_t insn_cnt;
    385     uint64_t insns;
    386     uint64_t license;
    387     uint32_t log_level;
    388     uint32_t log_size;
    389     uint64_t log_buf;
    390     uint32_t kern_version;
    391     uint32_t prog_flags;
    392   } attr = {};
    393   attr.prog_type = 1; /* BPF_PROG_TYPE_SOCKET_FILTER */
    394   attr.insn_cnt = 5;
    395   attr.insns = (uint64_t)v3_insns;
    396   attr.license = (uint64_t)"DUMMY";
    397 
    398   int fd = syscall(321 /* __NR_bpf */, 5 /* BPF_PROG_LOAD */, &attr,
    399                    sizeof(attr));
    400   if (fd >= 0) {
    401     close(fd);
    402     return "v3";
    403   }
    404 
    405   /* Clear the whole attr in case its content changed by syscall. */
    406   memset(&attr, 0, sizeof(attr));
    407   attr.prog_type = 1; /* BPF_PROG_TYPE_SOCKET_FILTER */
    408   attr.insn_cnt = 5;
    409   attr.insns = (uint64_t)v2_insns;
    410   attr.license = (uint64_t)"DUMMY";
    411   fd = syscall(321 /* __NR_bpf */, 5 /* BPF_PROG_LOAD */, &attr, sizeof(attr));
    412   if (fd >= 0) {
    413     close(fd);
    414     return "v2";
    415   }
    416   return "v1";
    417 #endif
    418 }
    419 
    420 #if defined(__i386__) || defined(_M_IX86) || \
    421     defined(__x86_64__) || defined(_M_X64)
    422 
    423 // The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max).
    424 // Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID
    425 // support. Consequently, for i386, the presence of CPUID is checked first
    426 // via the corresponding eflags bit.
    427 // Removal of cpuid.h header motivated by PR30384
    428 // Header cpuid.h and method __get_cpuid_max are not used in llvm, clang, openmp
    429 // or test-suite, but are used in external projects e.g. libstdcxx
    430 static bool isCpuIdSupported() {
    431 #if defined(__GNUC__) || defined(__clang__)
    432 #if defined(__i386__)
    433   int __cpuid_supported;
    434   __asm__("  pushfl\n"
    435           "  popl   %%eax\n"
    436           "  movl   %%eax,%%ecx\n"
    437           "  xorl   $0x00200000,%%eax\n"
    438           "  pushl  %%eax\n"
    439           "  popfl\n"
    440           "  pushfl\n"
    441           "  popl   %%eax\n"
    442           "  movl   $0,%0\n"
    443           "  cmpl   %%eax,%%ecx\n"
    444           "  je     1f\n"
    445           "  movl   $1,%0\n"
    446           "1:"
    447           : "=r"(__cpuid_supported)
    448           :
    449           : "eax", "ecx");
    450   if (!__cpuid_supported)
    451     return false;
    452 #endif
    453   return true;
    454 #endif
    455   return true;
    456 }
    457 
    458 /// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in
    459 /// the specified arguments.  If we can't run cpuid on the host, return true.
    460 static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
    461                                unsigned *rECX, unsigned *rEDX) {
    462 #if defined(__GNUC__) || defined(__clang__)
    463 #if defined(__x86_64__)
    464   // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
    465   // FIXME: should we save this for Clang?
    466   __asm__("movq\t%%rbx, %%rsi\n\t"
    467           "cpuid\n\t"
    468           "xchgq\t%%rbx, %%rsi\n\t"
    469           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
    470           : "a"(value));
    471   return false;
    472 #elif defined(__i386__)
    473   __asm__("movl\t%%ebx, %%esi\n\t"
    474           "cpuid\n\t"
    475           "xchgl\t%%ebx, %%esi\n\t"
    476           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
    477           : "a"(value));
    478   return false;
    479 #else
    480   return true;
    481 #endif
    482 #elif defined(_MSC_VER)
    483   // The MSVC intrinsic is portable across x86 and x64.
    484   int registers[4];
    485   __cpuid(registers, value);
    486   *rEAX = registers[0];
    487   *rEBX = registers[1];
    488   *rECX = registers[2];
    489   *rEDX = registers[3];
    490   return false;
    491 #else
    492   return true;
    493 #endif
    494 }
    495 
    496 namespace llvm {
    497 namespace sys {
    498 namespace detail {
    499 namespace x86 {
    500 
    501 VendorSignatures getVendorSignature(unsigned *MaxLeaf) {
    502   unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
    503   if (MaxLeaf == nullptr)
    504     MaxLeaf = &EAX;
    505   else
    506     *MaxLeaf = 0;
    507 
    508   if (!isCpuIdSupported())
    509     return VendorSignatures::UNKNOWN;
    510 
    511   if (getX86CpuIDAndInfo(0, MaxLeaf, &EBX, &ECX, &EDX) || *MaxLeaf < 1)
    512     return VendorSignatures::UNKNOWN;
    513 
    514   // "Genu ineI ntel"
    515   if (EBX == 0x756e6547 && EDX == 0x49656e69 && ECX == 0x6c65746e)
    516     return VendorSignatures::GENUINE_INTEL;
    517 
    518   // "Auth enti cAMD"
    519   if (EBX == 0x68747541 && EDX == 0x69746e65 && ECX == 0x444d4163)
    520     return VendorSignatures::AUTHENTIC_AMD;
    521 
    522   return VendorSignatures::UNKNOWN;
    523 }
    524 
    525 } // namespace x86
    526 } // namespace detail
    527 } // namespace sys
    528 } // namespace llvm
    529 
    530 using namespace llvm::sys::detail::x86;
    531 
    532 /// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return
    533 /// the 4 values in the specified arguments.  If we can't run cpuid on the host,
    534 /// return true.
    535 static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
    536                                  unsigned *rEAX, unsigned *rEBX, unsigned *rECX,
    537                                  unsigned *rEDX) {
    538 #if defined(__GNUC__) || defined(__clang__)
    539 #if defined(__x86_64__)
    540   // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
    541   // FIXME: should we save this for Clang?
    542   __asm__("movq\t%%rbx, %%rsi\n\t"
    543           "cpuid\n\t"
    544           "xchgq\t%%rbx, %%rsi\n\t"
    545           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
    546           : "a"(value), "c"(subleaf));
    547   return false;
    548 #elif defined(__i386__)
    549   __asm__("movl\t%%ebx, %%esi\n\t"
    550           "cpuid\n\t"
    551           "xchgl\t%%ebx, %%esi\n\t"
    552           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
    553           : "a"(value), "c"(subleaf));
    554   return false;
    555 #else
    556   return true;
    557 #endif
    558 #elif defined(_MSC_VER)
    559   int registers[4];
    560   __cpuidex(registers, value, subleaf);
    561   *rEAX = registers[0];
    562   *rEBX = registers[1];
    563   *rECX = registers[2];
    564   *rEDX = registers[3];
    565   return false;
    566 #else
    567   return true;
    568 #endif
    569 }
    570 
    571 // Read control register 0 (XCR0). Used to detect features such as AVX.
    572 static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) {
    573 #if defined(__GNUC__) || defined(__clang__)
    574   // Check xgetbv; this uses a .byte sequence instead of the instruction
    575   // directly because older assemblers do not include support for xgetbv and
    576   // there is no easy way to conditionally compile based on the assembler used.
    577   __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0));
    578   return false;
    579 #elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK)
    580   unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
    581   *rEAX = Result;
    582   *rEDX = Result >> 32;
    583   return false;
    584 #else
    585   return true;
    586 #endif
    587 }
    588 
    589 static void detectX86FamilyModel(unsigned EAX, unsigned *Family,
    590                                  unsigned *Model) {
    591   *Family = (EAX >> 8) & 0xf; // Bits 8 - 11
    592   *Model = (EAX >> 4) & 0xf;  // Bits 4 - 7
    593   if (*Family == 6 || *Family == 0xf) {
    594     if (*Family == 0xf)
    595       // Examine extended family ID if family ID is F.
    596       *Family += (EAX >> 20) & 0xff; // Bits 20 - 27
    597     // Examine extended model ID if family ID is 6 or F.
    598     *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
    599   }
    600 }
    601 
    602 static StringRef
    603 getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
    604                                 const unsigned *Features,
    605                                 unsigned *Type, unsigned *Subtype) {
    606   auto testFeature = [&](unsigned F) {
    607     return (Features[F / 32] & (1U << (F % 32))) != 0;
    608   };
    609 
    610   StringRef CPU;
    611 
    612   switch (Family) {
    613   case 3:
    614     CPU = "i386";
    615     break;
    616   case 4:
    617     CPU = "i486";
    618     break;
    619   case 5:
    620     if (testFeature(X86::FEATURE_MMX)) {
    621       CPU = "pentium-mmx";
    622       break;
    623     }
    624     CPU = "pentium";
    625     break;
    626   case 6:
    627     switch (Model) {
    628     case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile
    629                // processor, Intel Core 2 Quad processor, Intel Core 2 Quad
    630                // mobile processor, Intel Core 2 Extreme processor, Intel
    631                // Pentium Dual-Core processor, Intel Xeon processor, model
    632                // 0Fh. All processors are manufactured using the 65 nm process.
    633     case 0x16: // Intel Celeron processor model 16h. All processors are
    634                // manufactured using the 65 nm process
    635       CPU = "core2";
    636       *Type = X86::INTEL_CORE2;
    637       break;
    638     case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model
    639                // 17h. All processors are manufactured using the 45 nm process.
    640                //
    641                // 45nm: Penryn , Wolfdale, Yorkfield (XE)
    642     case 0x1d: // Intel Xeon processor MP. All processors are manufactured using
    643                // the 45 nm process.
    644       CPU = "penryn";
    645       *Type = X86::INTEL_CORE2;
    646       break;
    647     case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All
    648                // processors are manufactured using the 45 nm process.
    649     case 0x1e: // Intel(R) Core(TM) i7 CPU         870  @ 2.93GHz.
    650                // As found in a Summer 2010 model iMac.
    651     case 0x1f:
    652     case 0x2e:              // Nehalem EX
    653       CPU = "nehalem";
    654       *Type = X86::INTEL_COREI7;
    655       *Subtype = X86::INTEL_COREI7_NEHALEM;
    656       break;
    657     case 0x25: // Intel Core i7, laptop version.
    658     case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All
    659                // processors are manufactured using the 32 nm process.
    660     case 0x2f: // Westmere EX
    661       CPU = "westmere";
    662       *Type = X86::INTEL_COREI7;
    663       *Subtype = X86::INTEL_COREI7_WESTMERE;
    664       break;
    665     case 0x2a: // Intel Core i7 processor. All processors are manufactured
    666                // using the 32 nm process.
    667     case 0x2d:
    668       CPU = "sandybridge";
    669       *Type = X86::INTEL_COREI7;
    670       *Subtype = X86::INTEL_COREI7_SANDYBRIDGE;
    671       break;
    672     case 0x3a:
    673     case 0x3e:              // Ivy Bridge EP
    674       CPU = "ivybridge";
    675       *Type = X86::INTEL_COREI7;
    676       *Subtype = X86::INTEL_COREI7_IVYBRIDGE;
    677       break;
    678 
    679     // Haswell:
    680     case 0x3c:
    681     case 0x3f:
    682     case 0x45:
    683     case 0x46:
    684       CPU = "haswell";
    685       *Type = X86::INTEL_COREI7;
    686       *Subtype = X86::INTEL_COREI7_HASWELL;
    687       break;
    688 
    689     // Broadwell:
    690     case 0x3d:
    691     case 0x47:
    692     case 0x4f:
    693     case 0x56:
    694       CPU = "broadwell";
    695       *Type = X86::INTEL_COREI7;
    696       *Subtype = X86::INTEL_COREI7_BROADWELL;
    697       break;
    698 
    699     // Skylake:
    700     case 0x4e:              // Skylake mobile
    701     case 0x5e:              // Skylake desktop
    702     case 0x8e:              // Kaby Lake mobile
    703     case 0x9e:              // Kaby Lake desktop
    704     case 0xa5:              // Comet Lake-H/S
    705     case 0xa6:              // Comet Lake-U
    706       CPU = "skylake";
    707       *Type = X86::INTEL_COREI7;
    708       *Subtype = X86::INTEL_COREI7_SKYLAKE;
    709       break;
    710 
    711     // Rocketlake:
    712     case 0xa7:
    713       CPU = "rocketlake";
    714       *Type = X86::INTEL_COREI7;
    715       *Subtype = X86::INTEL_COREI7_ROCKETLAKE;
    716       break;
    717 
    718     // Skylake Xeon:
    719     case 0x55:
    720       *Type = X86::INTEL_COREI7;
    721       if (testFeature(X86::FEATURE_AVX512BF16)) {
    722         CPU = "cooperlake";
    723         *Subtype = X86::INTEL_COREI7_COOPERLAKE;
    724       } else if (testFeature(X86::FEATURE_AVX512VNNI)) {
    725         CPU = "cascadelake";
    726         *Subtype = X86::INTEL_COREI7_CASCADELAKE;
    727       } else {
    728         CPU = "skylake-avx512";
    729         *Subtype = X86::INTEL_COREI7_SKYLAKE_AVX512;
    730       }
    731       break;
    732 
    733     // Cannonlake:
    734     case 0x66:
    735       CPU = "cannonlake";
    736       *Type = X86::INTEL_COREI7;
    737       *Subtype = X86::INTEL_COREI7_CANNONLAKE;
    738       break;
    739 
    740     // Icelake:
    741     case 0x7d:
    742     case 0x7e:
    743       CPU = "icelake-client";
    744       *Type = X86::INTEL_COREI7;
    745       *Subtype = X86::INTEL_COREI7_ICELAKE_CLIENT;
    746       break;
    747 
    748     // Icelake Xeon:
    749     case 0x6a:
    750     case 0x6c:
    751       CPU = "icelake-server";
    752       *Type = X86::INTEL_COREI7;
    753       *Subtype = X86::INTEL_COREI7_ICELAKE_SERVER;
    754       break;
    755 
    756     // Sapphire Rapids:
    757     case 0x8f:
    758       CPU = "sapphirerapids";
    759       *Type = X86::INTEL_COREI7;
    760       *Subtype = X86::INTEL_COREI7_SAPPHIRERAPIDS;
    761       break;
    762 
    763     case 0x1c: // Most 45 nm Intel Atom processors
    764     case 0x26: // 45 nm Atom Lincroft
    765     case 0x27: // 32 nm Atom Medfield
    766     case 0x35: // 32 nm Atom Midview
    767     case 0x36: // 32 nm Atom Midview
    768       CPU = "bonnell";
    769       *Type = X86::INTEL_BONNELL;
    770       break;
    771 
    772     // Atom Silvermont codes from the Intel software optimization guide.
    773     case 0x37:
    774     case 0x4a:
    775     case 0x4d:
    776     case 0x5a:
    777     case 0x5d:
    778     case 0x4c: // really airmont
    779       CPU = "silvermont";
    780       *Type = X86::INTEL_SILVERMONT;
    781       break;
    782     // Goldmont:
    783     case 0x5c: // Apollo Lake
    784     case 0x5f: // Denverton
    785       CPU = "goldmont";
    786       *Type = X86::INTEL_GOLDMONT;
    787       break;
    788     case 0x7a:
    789       CPU = "goldmont-plus";
    790       *Type = X86::INTEL_GOLDMONT_PLUS;
    791       break;
    792     case 0x86:
    793       CPU = "tremont";
    794       *Type = X86::INTEL_TREMONT;
    795       break;
    796 
    797     // Xeon Phi (Knights Landing + Knights Mill):
    798     case 0x57:
    799       CPU = "knl";
    800       *Type = X86::INTEL_KNL;
    801       break;
    802     case 0x85:
    803       CPU = "knm";
    804       *Type = X86::INTEL_KNM;
    805       break;
    806 
    807     default: // Unknown family 6 CPU, try to guess.
    808       // Don't both with Type/Subtype here, they aren't used by the caller.
    809       // They're used above to keep the code in sync with compiler-rt.
    810       // TODO detect tigerlake host from model
    811       if (testFeature(X86::FEATURE_AVX512VP2INTERSECT)) {
    812         CPU = "tigerlake";
    813       } else if (testFeature(X86::FEATURE_AVX512VBMI2)) {
    814         CPU = "icelake-client";
    815       } else if (testFeature(X86::FEATURE_AVX512VBMI)) {
    816         CPU = "cannonlake";
    817       } else if (testFeature(X86::FEATURE_AVX512BF16)) {
    818         CPU = "cooperlake";
    819       } else if (testFeature(X86::FEATURE_AVX512VNNI)) {
    820         CPU = "cascadelake";
    821       } else if (testFeature(X86::FEATURE_AVX512VL)) {
    822         CPU = "skylake-avx512";
    823       } else if (testFeature(X86::FEATURE_AVX512ER)) {
    824         CPU = "knl";
    825       } else if (testFeature(X86::FEATURE_CLFLUSHOPT)) {
    826         if (testFeature(X86::FEATURE_SHA))
    827           CPU = "goldmont";
    828         else
    829           CPU = "skylake";
    830       } else if (testFeature(X86::FEATURE_ADX)) {
    831         CPU = "broadwell";
    832       } else if (testFeature(X86::FEATURE_AVX2)) {
    833         CPU = "haswell";
    834       } else if (testFeature(X86::FEATURE_AVX)) {
    835         CPU = "sandybridge";
    836       } else if (testFeature(X86::FEATURE_SSE4_2)) {
    837         if (testFeature(X86::FEATURE_MOVBE))
    838           CPU = "silvermont";
    839         else
    840           CPU = "nehalem";
    841       } else if (testFeature(X86::FEATURE_SSE4_1)) {
    842         CPU = "penryn";
    843       } else if (testFeature(X86::FEATURE_SSSE3)) {
    844         if (testFeature(X86::FEATURE_MOVBE))
    845           CPU = "bonnell";
    846         else
    847           CPU = "core2";
    848       } else if (testFeature(X86::FEATURE_64BIT)) {
    849         CPU = "core2";
    850       } else if (testFeature(X86::FEATURE_SSE3)) {
    851         CPU = "yonah";
    852       } else if (testFeature(X86::FEATURE_SSE2)) {
    853         CPU = "pentium-m";
    854       } else if (testFeature(X86::FEATURE_SSE)) {
    855         CPU = "pentium3";
    856       } else if (testFeature(X86::FEATURE_MMX)) {
    857         CPU = "pentium2";
    858       } else {
    859         CPU = "pentiumpro";
    860       }
    861       break;
    862     }
    863     break;
    864   case 15: {
    865     if (testFeature(X86::FEATURE_64BIT)) {
    866       CPU = "nocona";
    867       break;
    868     }
    869     if (testFeature(X86::FEATURE_SSE3)) {
    870       CPU = "prescott";
    871       break;
    872     }
    873     CPU = "pentium4";
    874     break;
    875   }
    876   default:
    877     break; // Unknown.
    878   }
    879 
    880   return CPU;
    881 }
    882 
    883 static StringRef
    884 getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
    885                               const unsigned *Features,
    886                               unsigned *Type, unsigned *Subtype) {
    887   auto testFeature = [&](unsigned F) {
    888     return (Features[F / 32] & (1U << (F % 32))) != 0;
    889   };
    890 
    891   StringRef CPU;
    892 
    893   switch (Family) {
    894   case 4:
    895     CPU = "i486";
    896     break;
    897   case 5:
    898     CPU = "pentium";
    899     switch (Model) {
    900     case 6:
    901     case 7:
    902       CPU = "k6";
    903       break;
    904     case 8:
    905       CPU = "k6-2";
    906       break;
    907     case 9:
    908     case 13:
    909       CPU = "k6-3";
    910       break;
    911     case 10:
    912       CPU = "geode";
    913       break;
    914     }
    915     break;
    916   case 6:
    917     if (testFeature(X86::FEATURE_SSE)) {
    918       CPU = "athlon-xp";
    919       break;
    920     }
    921     CPU = "athlon";
    922     break;
    923   case 15:
    924     if (testFeature(X86::FEATURE_SSE3)) {
    925       CPU = "k8-sse3";
    926       break;
    927     }
    928     CPU = "k8";
    929     break;
    930   case 16:
    931     CPU = "amdfam10";
    932     *Type = X86::AMDFAM10H; // "amdfam10"
    933     switch (Model) {
    934     case 2:
    935       *Subtype = X86::AMDFAM10H_BARCELONA;
    936       break;
    937     case 4:
    938       *Subtype = X86::AMDFAM10H_SHANGHAI;
    939       break;
    940     case 8:
    941       *Subtype = X86::AMDFAM10H_ISTANBUL;
    942       break;
    943     }
    944     break;
    945   case 20:
    946     CPU = "btver1";
    947     *Type = X86::AMD_BTVER1;
    948     break;
    949   case 21:
    950     CPU = "bdver1";
    951     *Type = X86::AMDFAM15H;
    952     if (Model >= 0x60 && Model <= 0x7f) {
    953       CPU = "bdver4";
    954       *Subtype = X86::AMDFAM15H_BDVER4;
    955       break; // 60h-7Fh: Excavator
    956     }
    957     if (Model >= 0x30 && Model <= 0x3f) {
    958       CPU = "bdver3";
    959       *Subtype = X86::AMDFAM15H_BDVER3;
    960       break; // 30h-3Fh: Steamroller
    961     }
    962     if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) {
    963       CPU = "bdver2";
    964       *Subtype = X86::AMDFAM15H_BDVER2;
    965       break; // 02h, 10h-1Fh: Piledriver
    966     }
    967     if (Model <= 0x0f) {
    968       *Subtype = X86::AMDFAM15H_BDVER1;
    969       break; // 00h-0Fh: Bulldozer
    970     }
    971     break;
    972   case 22:
    973     CPU = "btver2";
    974     *Type = X86::AMD_BTVER2;
    975     break;
    976   case 23:
    977     CPU = "znver1";
    978     *Type = X86::AMDFAM17H;
    979     if ((Model >= 0x30 && Model <= 0x3f) || Model == 0x71) {
    980       CPU = "znver2";
    981       *Subtype = X86::AMDFAM17H_ZNVER2;
    982       break; // 30h-3fh, 71h: Zen2
    983     }
    984     if (Model <= 0x0f) {
    985       *Subtype = X86::AMDFAM17H_ZNVER1;
    986       break; // 00h-0Fh: Zen1
    987     }
    988     break;
    989   case 25:
    990     CPU = "znver3";
    991     *Type = X86::AMDFAM19H;
    992     if (Model <= 0x0f) {
    993       *Subtype = X86::AMDFAM19H_ZNVER3;
    994       break; // 00h-0Fh: Zen3
    995     }
    996     break;
    997   default:
    998     break; // Unknown AMD CPU.
    999   }
   1000 
   1001   return CPU;
   1002 }
   1003 
   1004 static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
   1005                                  unsigned *Features) {
   1006   unsigned EAX, EBX;
   1007 
   1008   auto setFeature = [&](unsigned F) {
   1009     Features[F / 32] |= 1U << (F % 32);
   1010   };
   1011 
   1012   if ((EDX >> 15) & 1)
   1013     setFeature(X86::FEATURE_CMOV);
   1014   if ((EDX >> 23) & 1)
   1015     setFeature(X86::FEATURE_MMX);
   1016   if ((EDX >> 25) & 1)
   1017     setFeature(X86::FEATURE_SSE);
   1018   if ((EDX >> 26) & 1)
   1019     setFeature(X86::FEATURE_SSE2);
   1020 
   1021   if ((ECX >> 0) & 1)
   1022     setFeature(X86::FEATURE_SSE3);
   1023   if ((ECX >> 1) & 1)
   1024     setFeature(X86::FEATURE_PCLMUL);
   1025   if ((ECX >> 9) & 1)
   1026     setFeature(X86::FEATURE_SSSE3);
   1027   if ((ECX >> 12) & 1)
   1028     setFeature(X86::FEATURE_FMA);
   1029   if ((ECX >> 19) & 1)
   1030     setFeature(X86::FEATURE_SSE4_1);
   1031   if ((ECX >> 20) & 1)
   1032     setFeature(X86::FEATURE_SSE4_2);
   1033   if ((ECX >> 23) & 1)
   1034     setFeature(X86::FEATURE_POPCNT);
   1035   if ((ECX >> 25) & 1)
   1036     setFeature(X86::FEATURE_AES);
   1037 
   1038   if ((ECX >> 22) & 1)
   1039     setFeature(X86::FEATURE_MOVBE);
   1040 
   1041   // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
   1042   // indicates that the AVX registers will be saved and restored on context
   1043   // switch, then we have full AVX support.
   1044   const unsigned AVXBits = (1 << 27) | (1 << 28);
   1045   bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) &&
   1046                 ((EAX & 0x6) == 0x6);
   1047 #if defined(__APPLE__)
   1048   // Darwin lazily saves the AVX512 context on first use: trust that the OS will
   1049   // save the AVX512 context if we use AVX512 instructions, even the bit is not
   1050   // set right now.
   1051   bool HasAVX512Save = true;
   1052 #else
   1053   // AVX512 requires additional context to be saved by the OS.
   1054   bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0);
   1055 #endif
   1056 
   1057   if (HasAVX)
   1058     setFeature(X86::FEATURE_AVX);
   1059 
   1060   bool HasLeaf7 =
   1061       MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
   1062 
   1063   if (HasLeaf7 && ((EBX >> 3) & 1))
   1064     setFeature(X86::FEATURE_BMI);
   1065   if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX)
   1066     setFeature(X86::FEATURE_AVX2);
   1067   if (HasLeaf7 && ((EBX >> 8) & 1))
   1068     setFeature(X86::FEATURE_BMI2);
   1069   if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save)
   1070     setFeature(X86::FEATURE_AVX512F);
   1071   if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save)
   1072     setFeature(X86::FEATURE_AVX512DQ);
   1073   if (HasLeaf7 && ((EBX >> 19) & 1))
   1074     setFeature(X86::FEATURE_ADX);
   1075   if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save)
   1076     setFeature(X86::FEATURE_AVX512IFMA);
   1077   if (HasLeaf7 && ((EBX >> 23) & 1))
   1078     setFeature(X86::FEATURE_CLFLUSHOPT);
   1079   if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save)
   1080     setFeature(X86::FEATURE_AVX512PF);
   1081   if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save)
   1082     setFeature(X86::FEATURE_AVX512ER);
   1083   if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save)
   1084     setFeature(X86::FEATURE_AVX512CD);
   1085   if (HasLeaf7 && ((EBX >> 29) & 1))
   1086     setFeature(X86::FEATURE_SHA);
   1087   if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save)
   1088     setFeature(X86::FEATURE_AVX512BW);
   1089   if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save)
   1090     setFeature(X86::FEATURE_AVX512VL);
   1091 
   1092   if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save)
   1093     setFeature(X86::FEATURE_AVX512VBMI);
   1094   if (HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save)
   1095     setFeature(X86::FEATURE_AVX512VBMI2);
   1096   if (HasLeaf7 && ((ECX >> 8) & 1))
   1097     setFeature(X86::FEATURE_GFNI);
   1098   if (HasLeaf7 && ((ECX >> 10) & 1) && HasAVX)
   1099     setFeature(X86::FEATURE_VPCLMULQDQ);
   1100   if (HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save)
   1101     setFeature(X86::FEATURE_AVX512VNNI);
   1102   if (HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save)
   1103     setFeature(X86::FEATURE_AVX512BITALG);
   1104   if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save)
   1105     setFeature(X86::FEATURE_AVX512VPOPCNTDQ);
   1106 
   1107   if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save)
   1108     setFeature(X86::FEATURE_AVX5124VNNIW);
   1109   if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save)
   1110     setFeature(X86::FEATURE_AVX5124FMAPS);
   1111   if (HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save)
   1112     setFeature(X86::FEATURE_AVX512VP2INTERSECT);
   1113 
   1114   bool HasLeaf7Subleaf1 =
   1115       MaxLeaf >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
   1116   if (HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save)
   1117     setFeature(X86::FEATURE_AVX512BF16);
   1118 
   1119   unsigned MaxExtLevel;
   1120   getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
   1121 
   1122   bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
   1123                      !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
   1124   if (HasExtLeaf1 && ((ECX >> 6) & 1))
   1125     setFeature(X86::FEATURE_SSE4_A);
   1126   if (HasExtLeaf1 && ((ECX >> 11) & 1))
   1127     setFeature(X86::FEATURE_XOP);
   1128   if (HasExtLeaf1 && ((ECX >> 16) & 1))
   1129     setFeature(X86::FEATURE_FMA4);
   1130 
   1131   if (HasExtLeaf1 && ((EDX >> 29) & 1))
   1132     setFeature(X86::FEATURE_64BIT);
   1133 }
   1134 
   1135 StringRef sys::getHostCPUName() {
   1136   unsigned MaxLeaf = 0;
   1137   const VendorSignatures Vendor = getVendorSignature(&MaxLeaf);
   1138   if (Vendor == VendorSignatures::UNKNOWN)
   1139     return "generic";
   1140 
   1141   unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
   1142   getX86CpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX);
   1143 
   1144   unsigned Family = 0, Model = 0;
   1145   unsigned Features[(X86::CPU_FEATURE_MAX + 31) / 32] = {0};
   1146   detectX86FamilyModel(EAX, &Family, &Model);
   1147   getAvailableFeatures(ECX, EDX, MaxLeaf, Features);
   1148 
   1149   // These aren't consumed in this file, but we try to keep some source code the
   1150   // same or similar to compiler-rt.
   1151   unsigned Type = 0;
   1152   unsigned Subtype = 0;
   1153 
   1154   StringRef CPU;
   1155 
   1156   if (Vendor == VendorSignatures::GENUINE_INTEL) {
   1157     CPU = getIntelProcessorTypeAndSubtype(Family, Model, Features, &Type,
   1158                                           &Subtype);
   1159   } else if (Vendor == VendorSignatures::AUTHENTIC_AMD) {
   1160     CPU = getAMDProcessorTypeAndSubtype(Family, Model, Features, &Type,
   1161                                         &Subtype);
   1162   }
   1163 
   1164   if (!CPU.empty())
   1165     return CPU;
   1166 
   1167   return "generic";
   1168 }
   1169 
   1170 #elif defined(__APPLE__) && (defined(__ppc__) || defined(__powerpc__))
   1171 StringRef sys::getHostCPUName() {
   1172   host_basic_info_data_t hostInfo;
   1173   mach_msg_type_number_t infoCount;
   1174 
   1175   infoCount = HOST_BASIC_INFO_COUNT;
   1176   mach_port_t hostPort = mach_host_self();
   1177   host_info(hostPort, HOST_BASIC_INFO, (host_info_t)&hostInfo,
   1178             &infoCount);
   1179   mach_port_deallocate(mach_task_self(), hostPort);
   1180 
   1181   if (hostInfo.cpu_type != CPU_TYPE_POWERPC)
   1182     return "generic";
   1183 
   1184   switch (hostInfo.cpu_subtype) {
   1185   case CPU_SUBTYPE_POWERPC_601:
   1186     return "601";
   1187   case CPU_SUBTYPE_POWERPC_602:
   1188     return "602";
   1189   case CPU_SUBTYPE_POWERPC_603:
   1190     return "603";
   1191   case CPU_SUBTYPE_POWERPC_603e:
   1192     return "603e";
   1193   case CPU_SUBTYPE_POWERPC_603ev:
   1194     return "603ev";
   1195   case CPU_SUBTYPE_POWERPC_604:
   1196     return "604";
   1197   case CPU_SUBTYPE_POWERPC_604e:
   1198     return "604e";
   1199   case CPU_SUBTYPE_POWERPC_620:
   1200     return "620";
   1201   case CPU_SUBTYPE_POWERPC_750:
   1202     return "750";
   1203   case CPU_SUBTYPE_POWERPC_7400:
   1204     return "7400";
   1205   case CPU_SUBTYPE_POWERPC_7450:
   1206     return "7450";
   1207   case CPU_SUBTYPE_POWERPC_970:
   1208     return "970";
   1209   default:;
   1210   }
   1211 
   1212   return "generic";
   1213 }
   1214 #elif defined(__linux__) && (defined(__ppc__) || defined(__powerpc__))
   1215 StringRef sys::getHostCPUName() {
   1216   std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
   1217   StringRef Content = P ? P->getBuffer() : "";
   1218   return detail::getHostCPUNameForPowerPC(Content);
   1219 }
   1220 #elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__))
   1221 StringRef sys::getHostCPUName() {
   1222   std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
   1223   StringRef Content = P ? P->getBuffer() : "";
   1224   return detail::getHostCPUNameForARM(Content);
   1225 }
   1226 #elif defined(__linux__) && defined(__s390x__)
   1227 StringRef sys::getHostCPUName() {
   1228   std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
   1229   StringRef Content = P ? P->getBuffer() : "";
   1230   return detail::getHostCPUNameForS390x(Content);
   1231 }
   1232 #elif defined(__APPLE__) && defined(__aarch64__)
   1233 StringRef sys::getHostCPUName() {
   1234   return "cyclone";
   1235 }
   1236 #elif defined(__APPLE__) && defined(__arm__)
   1237 StringRef sys::getHostCPUName() {
   1238   host_basic_info_data_t hostInfo;
   1239   mach_msg_type_number_t infoCount;
   1240 
   1241   infoCount = HOST_BASIC_INFO_COUNT;
   1242   mach_port_t hostPort = mach_host_self();
   1243   host_info(hostPort, HOST_BASIC_INFO, (host_info_t)&hostInfo,
   1244             &infoCount);
   1245   mach_port_deallocate(mach_task_self(), hostPort);
   1246 
   1247   if (hostInfo.cpu_type != CPU_TYPE_ARM) {
   1248     assert(false && "CPUType not equal to ARM should not be possible on ARM");
   1249     return "generic";
   1250   }
   1251   switch (hostInfo.cpu_subtype) {
   1252     case CPU_SUBTYPE_ARM_V7S:
   1253       return "swift";
   1254     default:;
   1255     }
   1256 
   1257   return "generic";
   1258 }
   1259 #elif defined(_AIX)
   1260 StringRef sys::getHostCPUName() {
   1261   switch (_system_configuration.implementation) {
   1262   case POWER_4:
   1263     if (_system_configuration.version == PV_4_3)
   1264       return "970";
   1265     return "pwr4";
   1266   case POWER_5:
   1267     if (_system_configuration.version == PV_5)
   1268       return "pwr5";
   1269     return "pwr5x";
   1270   case POWER_6:
   1271     if (_system_configuration.version == PV_6_Compat)
   1272       return "pwr6";
   1273     return "pwr6x";
   1274   case POWER_7:
   1275     return "pwr7";
   1276   case POWER_8:
   1277     return "pwr8";
   1278   case POWER_9:
   1279     return "pwr9";
   1280 // TODO: simplify this once the macro is available in all OS levels.
   1281 #ifdef POWER_10
   1282   case POWER_10:
   1283 #else
   1284   case 0x40000:
   1285 #endif
   1286     return "pwr10";
   1287   default:
   1288     return "generic";
   1289   }
   1290 }
   1291 #else
   1292 StringRef sys::getHostCPUName() { return "generic"; }
   1293 namespace llvm {
   1294 namespace sys {
   1295 namespace detail {
   1296 namespace x86 {
   1297 
   1298 VendorSignatures getVendorSignature(unsigned *MaxLeaf) {
   1299   return VendorSignatures::UNKNOWN;
   1300 }
   1301 
   1302 } // namespace x86
   1303 } // namespace detail
   1304 } // namespace sys
   1305 } // namespace llvm
   1306 #endif
   1307 
   1308 #if defined(__linux__) && (defined(__i386__) || defined(__x86_64__))
   1309 // On Linux, the number of physical cores can be computed from /proc/cpuinfo,
   1310 // using the number of unique physical/core id pairs. The following
   1311 // implementation reads the /proc/cpuinfo format on an x86_64 system.
   1312 int computeHostNumPhysicalCores() {
   1313   // Enabled represents the number of physical id/core id pairs with at least
   1314   // one processor id enabled by the CPU affinity mask.
   1315   cpu_set_t Affinity, Enabled;
   1316   if (sched_getaffinity(0, sizeof(Affinity), &Affinity) != 0)
   1317     return -1;
   1318   CPU_ZERO(&Enabled);
   1319 
   1320   // Read /proc/cpuinfo as a stream (until EOF reached). It cannot be
   1321   // mmapped because it appears to have 0 size.
   1322   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text =
   1323       llvm::MemoryBuffer::getFileAsStream("/proc/cpuinfo");
   1324   if (std::error_code EC = Text.getError()) {
   1325     llvm::errs() << "Can't read "
   1326                  << "/proc/cpuinfo: " << EC.message() << "\n";
   1327     return -1;
   1328   }
   1329   SmallVector<StringRef, 8> strs;
   1330   (*Text)->getBuffer().split(strs, "\n", /*MaxSplit=*/-1,
   1331                              /*KeepEmpty=*/false);
   1332   int CurProcessor = -1;
   1333   int CurPhysicalId = -1;
   1334   int CurSiblings = -1;
   1335   int CurCoreId = -1;
   1336   for (StringRef Line : strs) {
   1337     std::pair<StringRef, StringRef> Data = Line.split(':');
   1338     auto Name = Data.first.trim();
   1339     auto Val = Data.second.trim();
   1340     // These fields are available if the kernel is configured with CONFIG_SMP.
   1341     if (Name == "processor")
   1342       Val.getAsInteger(10, CurProcessor);
   1343     else if (Name == "physical id")
   1344       Val.getAsInteger(10, CurPhysicalId);
   1345     else if (Name == "siblings")
   1346       Val.getAsInteger(10, CurSiblings);
   1347     else if (Name == "core id") {
   1348       Val.getAsInteger(10, CurCoreId);
   1349       // The processor id corresponds to an index into cpu_set_t.
   1350       if (CPU_ISSET(CurProcessor, &Affinity))
   1351         CPU_SET(CurPhysicalId * CurSiblings + CurCoreId, &Enabled);
   1352     }
   1353   }
   1354   return CPU_COUNT(&Enabled);
   1355 }
   1356 #elif defined(__linux__) && defined(__powerpc__)
   1357 int computeHostNumPhysicalCores() {
   1358   cpu_set_t Affinity;
   1359   if (sched_getaffinity(0, sizeof(Affinity), &Affinity) == 0)
   1360     return CPU_COUNT(&Affinity);
   1361 
   1362   // The call to sched_getaffinity() may have failed because the Affinity
   1363   // mask is too small for the number of CPU's on the system (i.e. the
   1364   // system has more than 1024 CPUs). Allocate a mask large enough for
   1365   // twice as many CPUs.
   1366   cpu_set_t *DynAffinity;
   1367   DynAffinity = CPU_ALLOC(2048);
   1368   if (sched_getaffinity(0, CPU_ALLOC_SIZE(2048), DynAffinity) == 0) {
   1369     int NumCPUs = CPU_COUNT(DynAffinity);
   1370     CPU_FREE(DynAffinity);
   1371     return NumCPUs;
   1372   }
   1373   return -1;
   1374 }
   1375 #elif defined(__linux__) && defined(__s390x__)
   1376 int computeHostNumPhysicalCores() { return sysconf(_SC_NPROCESSORS_ONLN); }
   1377 #elif defined(__APPLE__) && defined(__x86_64__)
   1378 #include <sys/param.h>
   1379 #include <sys/sysctl.h>
   1380 
   1381 // Gets the number of *physical cores* on the machine.
   1382 int computeHostNumPhysicalCores() {
   1383   uint32_t count;
   1384   size_t len = sizeof(count);
   1385   sysctlbyname("hw.physicalcpu", &count, &len, NULL, 0);
   1386   if (count < 1) {
   1387     int nm[2];
   1388     nm[0] = CTL_HW;
   1389     nm[1] = HW_AVAILCPU;
   1390     sysctl(nm, 2, &count, &len, NULL, 0);
   1391     if (count < 1)
   1392       return -1;
   1393   }
   1394   return count;
   1395 }
   1396 #elif defined(__MVS__)
   1397 int computeHostNumPhysicalCores() {
   1398   enum {
   1399     // Byte offset of the pointer to the Communications Vector Table (CVT) in
   1400     // the Prefixed Save Area (PSA). The table entry is a 31-bit pointer and
   1401     // will be zero-extended to uintptr_t.
   1402     FLCCVT = 16,
   1403     // Byte offset of the pointer to the Common System Data Area (CSD) in the
   1404     // CVT. The table entry is a 31-bit pointer and will be zero-extended to
   1405     // uintptr_t.
   1406     CVTCSD = 660,
   1407     // Byte offset to the number of live CPs in the LPAR, stored as a signed
   1408     // 32-bit value in the table.
   1409     CSD_NUMBER_ONLINE_STANDARD_CPS = 264,
   1410   };
   1411   char *PSA = 0;
   1412   char *CVT = reinterpret_cast<char *>(
   1413       static_cast<uintptr_t>(reinterpret_cast<unsigned int &>(PSA[FLCCVT])));
   1414   char *CSD = reinterpret_cast<char *>(
   1415       static_cast<uintptr_t>(reinterpret_cast<unsigned int &>(CVT[CVTCSD])));
   1416   return reinterpret_cast<int &>(CSD[CSD_NUMBER_ONLINE_STANDARD_CPS]);
   1417 }
   1418 #elif defined(_WIN32) && LLVM_ENABLE_THREADS != 0
   1419 // Defined in llvm/lib/Support/Windows/Threading.inc
   1420 int computeHostNumPhysicalCores();
   1421 #else
   1422 // On other systems, return -1 to indicate unknown.
   1423 static int computeHostNumPhysicalCores() { return -1; }
   1424 #endif
   1425 
   1426 int sys::getHostNumPhysicalCores() {
   1427   static int NumCores = computeHostNumPhysicalCores();
   1428   return NumCores;
   1429 }
   1430 
   1431 #if defined(__i386__) || defined(_M_IX86) || \
   1432     defined(__x86_64__) || defined(_M_X64)
   1433 bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
   1434   unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
   1435   unsigned MaxLevel;
   1436 
   1437   if (getX86CpuIDAndInfo(0, &MaxLevel, &EBX, &ECX, &EDX) || MaxLevel < 1)
   1438     return false;
   1439 
   1440   getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX);
   1441 
   1442   Features["cx8"]    = (EDX >>  8) & 1;
   1443   Features["cmov"]   = (EDX >> 15) & 1;
   1444   Features["mmx"]    = (EDX >> 23) & 1;
   1445   Features["fxsr"]   = (EDX >> 24) & 1;
   1446   Features["sse"]    = (EDX >> 25) & 1;
   1447   Features["sse2"]   = (EDX >> 26) & 1;
   1448 
   1449   Features["sse3"]   = (ECX >>  0) & 1;
   1450   Features["pclmul"] = (ECX >>  1) & 1;
   1451   Features["ssse3"]  = (ECX >>  9) & 1;
   1452   Features["cx16"]   = (ECX >> 13) & 1;
   1453   Features["sse4.1"] = (ECX >> 19) & 1;
   1454   Features["sse4.2"] = (ECX >> 20) & 1;
   1455   Features["movbe"]  = (ECX >> 22) & 1;
   1456   Features["popcnt"] = (ECX >> 23) & 1;
   1457   Features["aes"]    = (ECX >> 25) & 1;
   1458   Features["rdrnd"]  = (ECX >> 30) & 1;
   1459 
   1460   // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
   1461   // indicates that the AVX registers will be saved and restored on context
   1462   // switch, then we have full AVX support.
   1463   bool HasXSave = ((ECX >> 27) & 1) && !getX86XCR0(&EAX, &EDX);
   1464   bool HasAVXSave = HasXSave && ((ECX >> 28) & 1) && ((EAX & 0x6) == 0x6);
   1465 #if defined(__APPLE__)
   1466   // Darwin lazily saves the AVX512 context on first use: trust that the OS will
   1467   // save the AVX512 context if we use AVX512 instructions, even the bit is not
   1468   // set right now.
   1469   bool HasAVX512Save = true;
   1470 #else
   1471   // AVX512 requires additional context to be saved by the OS.
   1472   bool HasAVX512Save = HasAVXSave && ((EAX & 0xe0) == 0xe0);
   1473 #endif
   1474   // AMX requires additional context to be saved by the OS.
   1475   const unsigned AMXBits = (1 << 17) | (1 << 18);
   1476   bool HasAMXSave = HasXSave && ((EAX & AMXBits) == AMXBits);
   1477 
   1478   Features["avx"]   = HasAVXSave;
   1479   Features["fma"]   = ((ECX >> 12) & 1) && HasAVXSave;
   1480   // Only enable XSAVE if OS has enabled support for saving YMM state.
   1481   Features["xsave"] = ((ECX >> 26) & 1) && HasAVXSave;
   1482   Features["f16c"]  = ((ECX >> 29) & 1) && HasAVXSave;
   1483 
   1484   unsigned MaxExtLevel;
   1485   getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
   1486 
   1487   bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
   1488                      !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
   1489   Features["sahf"]   = HasExtLeaf1 && ((ECX >>  0) & 1);
   1490   Features["lzcnt"]  = HasExtLeaf1 && ((ECX >>  5) & 1);
   1491   Features["sse4a"]  = HasExtLeaf1 && ((ECX >>  6) & 1);
   1492   Features["prfchw"] = HasExtLeaf1 && ((ECX >>  8) & 1);
   1493   Features["xop"]    = HasExtLeaf1 && ((ECX >> 11) & 1) && HasAVXSave;
   1494   Features["lwp"]    = HasExtLeaf1 && ((ECX >> 15) & 1);
   1495   Features["fma4"]   = HasExtLeaf1 && ((ECX >> 16) & 1) && HasAVXSave;
   1496   Features["tbm"]    = HasExtLeaf1 && ((ECX >> 21) & 1);
   1497   Features["mwaitx"] = HasExtLeaf1 && ((ECX >> 29) & 1);
   1498 
   1499   Features["64bit"]  = HasExtLeaf1 && ((EDX >> 29) & 1);
   1500 
   1501   // Miscellaneous memory related features, detected by
   1502   // using the 0x80000008 leaf of the CPUID instruction
   1503   bool HasExtLeaf8 = MaxExtLevel >= 0x80000008 &&
   1504                      !getX86CpuIDAndInfo(0x80000008, &EAX, &EBX, &ECX, &EDX);
   1505   Features["clzero"]   = HasExtLeaf8 && ((EBX >> 0) & 1);
   1506   Features["wbnoinvd"] = HasExtLeaf8 && ((EBX >> 9) & 1);
   1507 
   1508   bool HasLeaf7 =
   1509       MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
   1510 
   1511   Features["fsgsbase"]   = HasLeaf7 && ((EBX >>  0) & 1);
   1512   Features["sgx"]        = HasLeaf7 && ((EBX >>  2) & 1);
   1513   Features["bmi"]        = HasLeaf7 && ((EBX >>  3) & 1);
   1514   // AVX2 is only supported if we have the OS save support from AVX.
   1515   Features["avx2"]       = HasLeaf7 && ((EBX >>  5) & 1) && HasAVXSave;
   1516   Features["bmi2"]       = HasLeaf7 && ((EBX >>  8) & 1);
   1517   Features["invpcid"]    = HasLeaf7 && ((EBX >> 10) & 1);
   1518   Features["rtm"]        = HasLeaf7 && ((EBX >> 11) & 1);
   1519   // AVX512 is only supported if the OS supports the context save for it.
   1520   Features["avx512f"]    = HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save;
   1521   Features["avx512dq"]   = HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save;
   1522   Features["rdseed"]     = HasLeaf7 && ((EBX >> 18) & 1);
   1523   Features["adx"]        = HasLeaf7 && ((EBX >> 19) & 1);
   1524   Features["avx512ifma"] = HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save;
   1525   Features["clflushopt"] = HasLeaf7 && ((EBX >> 23) & 1);
   1526   Features["clwb"]       = HasLeaf7 && ((EBX >> 24) & 1);
   1527   Features["avx512pf"]   = HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save;
   1528   Features["avx512er"]   = HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save;
   1529   Features["avx512cd"]   = HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save;
   1530   Features["sha"]        = HasLeaf7 && ((EBX >> 29) & 1);
   1531   Features["avx512bw"]   = HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save;
   1532   Features["avx512vl"]   = HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save;
   1533 
   1534   Features["prefetchwt1"]     = HasLeaf7 && ((ECX >>  0) & 1);
   1535   Features["avx512vbmi"]      = HasLeaf7 && ((ECX >>  1) & 1) && HasAVX512Save;
   1536   Features["pku"]             = HasLeaf7 && ((ECX >>  4) & 1);
   1537   Features["waitpkg"]         = HasLeaf7 && ((ECX >>  5) & 1);
   1538   Features["avx512vbmi2"]     = HasLeaf7 && ((ECX >>  6) & 1) && HasAVX512Save;
   1539   Features["shstk"]           = HasLeaf7 && ((ECX >>  7) & 1);
   1540   Features["gfni"]            = HasLeaf7 && ((ECX >>  8) & 1);
   1541   Features["vaes"]            = HasLeaf7 && ((ECX >>  9) & 1) && HasAVXSave;
   1542   Features["vpclmulqdq"]      = HasLeaf7 && ((ECX >> 10) & 1) && HasAVXSave;
   1543   Features["avx512vnni"]      = HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save;
   1544   Features["avx512bitalg"]    = HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save;
   1545   Features["avx512vpopcntdq"] = HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save;
   1546   Features["rdpid"]           = HasLeaf7 && ((ECX >> 22) & 1);
   1547   Features["kl"]              = HasLeaf7 && ((ECX >> 23) & 1); // key locker
   1548   Features["cldemote"]        = HasLeaf7 && ((ECX >> 25) & 1);
   1549   Features["movdiri"]         = HasLeaf7 && ((ECX >> 27) & 1);
   1550   Features["movdir64b"]       = HasLeaf7 && ((ECX >> 28) & 1);
   1551   Features["enqcmd"]          = HasLeaf7 && ((ECX >> 29) & 1);
   1552 
   1553   Features["uintr"]           = HasLeaf7 && ((EDX >> 5) & 1);
   1554   Features["avx512vp2intersect"] =
   1555       HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save;
   1556   Features["serialize"]       = HasLeaf7 && ((EDX >> 14) & 1);
   1557   Features["tsxldtrk"]        = HasLeaf7 && ((EDX >> 16) & 1);
   1558   // There are two CPUID leafs which information associated with the pconfig
   1559   // instruction:
   1560   // EAX=0x7, ECX=0x0 indicates the availability of the instruction (via the 18th
   1561   // bit of EDX), while the EAX=0x1b leaf returns information on the
   1562   // availability of specific pconfig leafs.
   1563   // The target feature here only refers to the the first of these two.
   1564   // Users might need to check for the availability of specific pconfig
   1565   // leaves using cpuid, since that information is ignored while
   1566   // detecting features using the "-march=native" flag.
   1567   // For more info, see X86 ISA docs.
   1568   Features["pconfig"] = HasLeaf7 && ((EDX >> 18) & 1);
   1569   Features["amx-bf16"]   = HasLeaf7 && ((EDX >> 22) & 1) && HasAMXSave;
   1570   Features["amx-tile"]   = HasLeaf7 && ((EDX >> 24) & 1) && HasAMXSave;
   1571   Features["amx-int8"]   = HasLeaf7 && ((EDX >> 25) & 1) && HasAMXSave;
   1572   bool HasLeaf7Subleaf1 =
   1573       MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
   1574   Features["avxvnni"]    = HasLeaf7Subleaf1 && ((EAX >> 4) & 1) && HasAVXSave;
   1575   Features["avx512bf16"] = HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save;
   1576   Features["hreset"]     = HasLeaf7Subleaf1 && ((EAX >> 22) & 1);
   1577 
   1578   bool HasLeafD = MaxLevel >= 0xd &&
   1579                   !getX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX);
   1580 
   1581   // Only enable XSAVE if OS has enabled support for saving YMM state.
   1582   Features["xsaveopt"] = HasLeafD && ((EAX >> 0) & 1) && HasAVXSave;
   1583   Features["xsavec"]   = HasLeafD && ((EAX >> 1) & 1) && HasAVXSave;
   1584   Features["xsaves"]   = HasLeafD && ((EAX >> 3) & 1) && HasAVXSave;
   1585 
   1586   bool HasLeaf14 = MaxLevel >= 0x14 &&
   1587                   !getX86CpuIDAndInfoEx(0x14, 0x0, &EAX, &EBX, &ECX, &EDX);
   1588 
   1589   Features["ptwrite"] = HasLeaf14 && ((EBX >> 4) & 1);
   1590 
   1591   bool HasLeaf19 =
   1592       MaxLevel >= 0x19 && !getX86CpuIDAndInfo(0x19, &EAX, &EBX, &ECX, &EDX);
   1593   Features["widekl"] = HasLeaf7 && HasLeaf19 && ((EBX >> 2) & 1);
   1594 
   1595   return true;
   1596 }
   1597 #elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__))
   1598 bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
   1599   std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
   1600   if (!P)
   1601     return false;
   1602 
   1603   SmallVector<StringRef, 32> Lines;
   1604   P->getBuffer().split(Lines, "\n");
   1605 
   1606   SmallVector<StringRef, 32> CPUFeatures;
   1607 
   1608   // Look for the CPU features.
   1609   for (unsigned I = 0, E = Lines.size(); I != E; ++I)
   1610     if (Lines[I].startswith("Features")) {
   1611       Lines[I].split(CPUFeatures, ' ');
   1612       break;
   1613     }
   1614 
   1615 #if defined(__aarch64__)
   1616   // Keep track of which crypto features we have seen
   1617   enum { CAP_AES = 0x1, CAP_PMULL = 0x2, CAP_SHA1 = 0x4, CAP_SHA2 = 0x8 };
   1618   uint32_t crypto = 0;
   1619 #endif
   1620 
   1621   for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) {
   1622     StringRef LLVMFeatureStr = StringSwitch<StringRef>(CPUFeatures[I])
   1623 #if defined(__aarch64__)
   1624                                    .Case("asimd", "neon")
   1625                                    .Case("fp", "fp-armv8")
   1626                                    .Case("crc32", "crc")
   1627 #else
   1628                                    .Case("half", "fp16")
   1629                                    .Case("neon", "neon")
   1630                                    .Case("vfpv3", "vfp3")
   1631                                    .Case("vfpv3d16", "d16")
   1632                                    .Case("vfpv4", "vfp4")
   1633                                    .Case("idiva", "hwdiv-arm")
   1634                                    .Case("idivt", "hwdiv")
   1635 #endif
   1636                                    .Default("");
   1637 
   1638 #if defined(__aarch64__)
   1639     // We need to check crypto separately since we need all of the crypto
   1640     // extensions to enable the subtarget feature
   1641     if (CPUFeatures[I] == "aes")
   1642       crypto |= CAP_AES;
   1643     else if (CPUFeatures[I] == "pmull")
   1644       crypto |= CAP_PMULL;
   1645     else if (CPUFeatures[I] == "sha1")
   1646       crypto |= CAP_SHA1;
   1647     else if (CPUFeatures[I] == "sha2")
   1648       crypto |= CAP_SHA2;
   1649 #endif
   1650 
   1651     if (LLVMFeatureStr != "")
   1652       Features[LLVMFeatureStr] = true;
   1653   }
   1654 
   1655 #if defined(__aarch64__)
   1656   // If we have all crypto bits we can add the feature
   1657   if (crypto == (CAP_AES | CAP_PMULL | CAP_SHA1 | CAP_SHA2))
   1658     Features["crypto"] = true;
   1659 #endif
   1660 
   1661   return true;
   1662 }
   1663 #elif defined(_WIN32) && (defined(__aarch64__) || defined(_M_ARM64))
   1664 bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
   1665   if (IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE))
   1666     Features["neon"] = true;
   1667   if (IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE))
   1668     Features["crc"] = true;
   1669   if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE))
   1670     Features["crypto"] = true;
   1671 
   1672   return true;
   1673 }
   1674 #else
   1675 bool sys::getHostCPUFeatures(StringMap<bool> &Features) { return false; }
   1676 #endif
   1677 
   1678 std::string sys::getProcessTriple() {
   1679   std::string TargetTripleString = updateTripleOSVersion(LLVM_HOST_TRIPLE);
   1680   Triple PT(Triple::normalize(TargetTripleString));
   1681 
   1682   if (sizeof(void *) == 8 && PT.isArch32Bit())
   1683     PT = PT.get64BitArchVariant();
   1684   if (sizeof(void *) == 4 && PT.isArch64Bit())
   1685     PT = PT.get32BitArchVariant();
   1686 
   1687   return PT.str();
   1688 }
   1689