1/****************************************************************************
2 * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 ****************************************************************************/
23
24#pragma once
25
26#include <iostream>
27#include <vector>
28#include <bitset>
29#include <array>
30#include <string>
31#include <algorithm>
32
33// Clang for Windows does supply an intrin.h with __cpuid intrinsics, however...
34// It seems to not realize that a write to "b" (ebx) will kill the value in rbx.
35// This attempts to use the "native" clang / gcc intrinsics instead of the windows
36// compatible ones.
37#if defined(_MSC_VER) && !defined(__clang__)
38#include <intrin.h>
39#else
40#include <string.h>
41#if !defined(__cpuid)
42#include <cpuid.h>
43#endif
44#endif
45
46class InstructionSet
47{
48public:
49    InstructionSet() : CPU_Rep(){};
50
51    // getters
52    std::string Vendor(void) { return CPU_Rep.vendor_; }
53    std::string Brand(void) { return CPU_Rep.brand_; }
54
55    bool SSE3(void) { return CPU_Rep.f_1_ECX_[0]; }
56    bool PCLMULQDQ(void) { return CPU_Rep.f_1_ECX_[1]; }
57    bool MONITOR(void) { return CPU_Rep.f_1_ECX_[3]; }
58    bool SSSE3(void) { return CPU_Rep.f_1_ECX_[9]; }
59    bool FMA(void) { return CPU_Rep.f_1_ECX_[12]; }
60    bool CMPXCHG16B(void) { return CPU_Rep.f_1_ECX_[13]; }
61    bool SSE41(void) { return CPU_Rep.f_1_ECX_[19]; }
62    bool SSE42(void) { return CPU_Rep.f_1_ECX_[20]; }
63    bool MOVBE(void) { return CPU_Rep.f_1_ECX_[22]; }
64    bool POPCNT(void) { return CPU_Rep.f_1_ECX_[23]; }
65    bool AES(void) { return CPU_Rep.f_1_ECX_[25]; }
66    bool XSAVE(void) { return CPU_Rep.f_1_ECX_[26]; }
67    bool OSXSAVE(void) { return CPU_Rep.f_1_ECX_[27]; }
68    bool RDRAND(void) { return CPU_Rep.f_1_ECX_[30]; }
69
70    bool MSR(void) { return CPU_Rep.f_1_EDX_[5]; }
71    bool CX8(void) { return CPU_Rep.f_1_EDX_[8]; }
72    bool SEP(void) { return CPU_Rep.f_1_EDX_[11]; }
73    bool CMOV(void) { return CPU_Rep.f_1_EDX_[15]; }
74    bool CLFSH(void) { return CPU_Rep.f_1_EDX_[19]; }
75    bool MMX(void) { return CPU_Rep.f_1_EDX_[23]; }
76    bool FXSR(void) { return CPU_Rep.f_1_EDX_[24]; }
77    bool SSE(void) { return CPU_Rep.f_1_EDX_[25]; }
78    bool SSE2(void) { return CPU_Rep.f_1_EDX_[26]; }
79
80    bool FSGSBASE(void) { return CPU_Rep.f_7_EBX_[0]; }
81    bool BMI1(void) { return CPU_Rep.f_7_EBX_[3]; }
82    bool HLE(void) { return CPU_Rep.isIntel_ && CPU_Rep.f_7_EBX_[4]; }
83    bool BMI2(void) { return CPU_Rep.f_7_EBX_[8]; }
84    bool ERMS(void) { return CPU_Rep.f_7_EBX_[9]; }
85    bool INVPCID(void) { return CPU_Rep.f_7_EBX_[10]; }
86    bool RTM(void) { return CPU_Rep.isIntel_ && CPU_Rep.f_7_EBX_[11]; }
87    bool RDSEED(void) { return CPU_Rep.f_7_EBX_[18]; }
88    bool ADX(void) { return CPU_Rep.f_7_EBX_[19]; }
89    bool SHA(void) { return CPU_Rep.f_7_EBX_[29]; }
90
91    bool PREFETCHWT1(void) { return CPU_Rep.f_7_ECX_[0]; }
92
93    bool LAHF(void) { return CPU_Rep.f_81_ECX_[0]; }
94    bool LZCNT(void) { return CPU_Rep.isIntel_ && CPU_Rep.f_81_ECX_[5]; }
95    bool ABM(void) { return CPU_Rep.isAMD_ && CPU_Rep.f_81_ECX_[5]; }
96    bool SSE4a(void) { return CPU_Rep.isAMD_ && CPU_Rep.f_81_ECX_[6]; }
97    bool XOP(void) { return CPU_Rep.isAMD_ && CPU_Rep.f_81_ECX_[11]; }
98    bool TBM(void) { return CPU_Rep.isAMD_ && CPU_Rep.f_81_ECX_[21]; }
99
100    bool SYSCALL(void) { return CPU_Rep.isIntel_ && CPU_Rep.f_81_EDX_[11]; }
101    bool MMXEXT(void) { return CPU_Rep.isAMD_ && CPU_Rep.f_81_EDX_[22]; }
102    bool RDTSCP(void) { return CPU_Rep.isIntel_ && CPU_Rep.f_81_EDX_[27]; }
103    bool _3DNOWEXT(void) { return CPU_Rep.isAMD_ && CPU_Rep.f_81_EDX_[30]; }
104    bool _3DNOW(void) { return CPU_Rep.isAMD_ && CPU_Rep.f_81_EDX_[31]; }
105
106    bool AVX(void) { return CPU_Rep.f_1_ECX_[28]; }
107    bool F16C(void) { return CPU_Rep.f_1_ECX_[29]; }
108    bool AVX2(void) { return CPU_Rep.f_7_EBX_[5]; }
109    bool AVX512F(void) { return CPU_Rep.f_7_EBX_[16]; }
110    bool AVX512PF(void) { return CPU_Rep.f_7_EBX_[26]; }
111    bool AVX512ER(void) { return CPU_Rep.f_7_EBX_[27]; }
112    bool AVX512CD(void) { return CPU_Rep.f_7_EBX_[28]; }
113
114private:
115    class InstructionSet_Internal
116    {
117    public:
118        InstructionSet_Internal() :
119            nIds_{0}, nExIds_{0}, isIntel_{false}, isAMD_{false}, f_1_ECX_{0}, f_1_EDX_{0},
120            f_7_EBX_{0}, f_7_ECX_{0}, f_81_ECX_{0}, f_81_EDX_{0}, data_{}, extdata_{}
121        {
122            // int cpuInfo[4] = {-1};
123            std::array<int, 4> cpui;
124
125            // Calling __cpuid with 0x0 as the function_id argument
126            // gets the number of the highest valid function ID.
127#if defined(_MSC_VER) && !defined(__clang__)
128            __cpuid(cpui.data(), 0);
129            nIds_ = cpui[0];
130#else
131            nIds_ = __get_cpuid_max(0, NULL);
132#endif
133
134            for (int i = 0; i <= nIds_; ++i)
135            {
136#if defined(_MSC_VER) && !defined(__clang__)
137                __cpuidex(cpui.data(), i, 0);
138#else
139                int* data = cpui.data();
140                __cpuid_count(i, 0, data[0], data[1], data[2], data[3]);
141#endif
142                data_.push_back(cpui);
143            }
144
145            // Capture vendor string
146            char vendor[0x20];
147            memset(vendor, 0, sizeof(vendor));
148            *reinterpret_cast<int*>(vendor)     = data_[0][1];
149            *reinterpret_cast<int*>(vendor + 4) = data_[0][3];
150            *reinterpret_cast<int*>(vendor + 8) = data_[0][2];
151            vendor_                             = vendor;
152            if (vendor_ == "GenuineIntel")
153            {
154                isIntel_ = true;
155            }
156            else if (vendor_ == "AuthenticAMD")
157            {
158                isAMD_ = true;
159            }
160
161            // load bitset with flags for function 0x00000001
162            if (nIds_ >= 1)
163            {
164                f_1_ECX_ = data_[1][2];
165                f_1_EDX_ = data_[1][3];
166            }
167
168            // load bitset with flags for function 0x00000007
169            if (nIds_ >= 7)
170            {
171                f_7_EBX_ = data_[7][1];
172                f_7_ECX_ = data_[7][2];
173            }
174
175            // Calling __cpuid with 0x80000000 as the function_id argument
176            // gets the number of the highest valid extended ID.
177#if defined(_MSC_VER) && !defined(__clang__)
178            __cpuid(cpui.data(), 0x80000000);
179            nExIds_ = cpui[0];
180#else
181            nExIds_ = __get_cpuid_max(0x80000000, NULL);
182#endif
183
184            char brand[0x40];
185            memset(brand, 0, sizeof(brand));
186
187            for (unsigned i = 0x80000000; i <= nExIds_; ++i)
188            {
189#if defined(_MSC_VER) && !defined(__clang__)
190                __cpuidex(cpui.data(), i, 0);
191#else
192                int* data = cpui.data();
193                __cpuid_count(i, 0, data[0], data[1], data[2], data[3]);
194#endif
195                extdata_.push_back(cpui);
196            }
197
198            // load bitset with flags for function 0x80000001
199            if (nExIds_ >= 0x80000001)
200            {
201                f_81_ECX_ = extdata_[1][2];
202                f_81_EDX_ = extdata_[1][3];
203            }
204
205            // Interpret CPU brand string if reported
206            if (nExIds_ >= 0x80000004)
207            {
208                memcpy(brand, extdata_[2].data(), sizeof(cpui));
209                memcpy(brand + 16, extdata_[3].data(), sizeof(cpui));
210                memcpy(brand + 32, extdata_[4].data(), sizeof(cpui));
211                brand_ = brand;
212            }
213        };
214
215        int                             nIds_;
216        unsigned                        nExIds_;
217        std::string                     vendor_;
218        std::string                     brand_;
219        bool                            isIntel_;
220        bool                            isAMD_;
221        std::bitset<32>                 f_1_ECX_;
222        std::bitset<32>                 f_1_EDX_;
223        std::bitset<32>                 f_7_EBX_;
224        std::bitset<32>                 f_7_ECX_;
225        std::bitset<32>                 f_81_ECX_;
226        std::bitset<32>                 f_81_EDX_;
227        std::vector<std::array<int, 4>> data_;
228        std::vector<std::array<int, 4>> extdata_;
229    };
230    const InstructionSet_Internal CPU_Rep;
231};
232