u_cpu_detect.c revision b8e80941
1/**************************************************************************
2 *
3 * Copyright 2008 Dennis Smit
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * on the rights to use, copy, modify, merge, publish, distribute, sub
10 * license, and/or sell copies of the Software, and to permit persons to whom
11 * the Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
20 * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
21 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
22 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
23 * USE OR OTHER DEALINGS IN THE SOFTWARE.
24 *
25 **************************************************************************/
26
27/**
28 * @file
29 * CPU feature detection.
30 *
31 * @author Dennis Smit
32 * @author Based on the work of Eric Anholt <anholt@FreeBSD.org>
33 */
34
35#include "pipe/p_config.h"
36
37#include "util/u_debug.h"
38#include "u_cpu_detect.h"
39#include "c11/threads.h"
40
41#if defined(PIPE_ARCH_PPC)
42#if defined(PIPE_OS_APPLE)
43#include <sys/sysctl.h>
44#else
45#include <signal.h>
46#include <setjmp.h>
47#endif
48#endif
49
50#if defined(PIPE_OS_NETBSD) || defined(PIPE_OS_OPENBSD)
51#include <sys/param.h>
52#include <sys/sysctl.h>
53#include <machine/cpu.h>
54#endif
55
56#if defined(PIPE_OS_FREEBSD) || defined(PIPE_OS_DRAGONFLY)
57#include <sys/types.h>
58#include <sys/sysctl.h>
59#endif
60
61#if defined(PIPE_OS_LINUX)
62#include <signal.h>
63#include <fcntl.h>
64#include <elf.h>
65#endif
66
67#ifdef PIPE_OS_UNIX
68#include <unistd.h>
69#endif
70
71#if defined(HAS_ANDROID_CPUFEATURES)
72#include <cpu-features.h>
73#endif
74
75#if defined(PIPE_OS_WINDOWS)
76#include <windows.h>
77#if defined(PIPE_CC_MSVC)
78#include <intrin.h>
79#endif
80#endif
81
82
83#ifdef DEBUG
84DEBUG_GET_ONCE_BOOL_OPTION(dump_cpu, "GALLIUM_DUMP_CPU", FALSE)
85#endif
86
87
88struct util_cpu_caps util_cpu_caps;
89
90#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
91static int has_cpuid(void);
92#endif
93
94
95#if defined(PIPE_ARCH_PPC) && !defined(PIPE_OS_APPLE)
96static jmp_buf  __lv_powerpc_jmpbuf;
97static volatile sig_atomic_t __lv_powerpc_canjump = 0;
98
99static void
100sigill_handler(int sig)
101{
102   if (!__lv_powerpc_canjump) {
103      signal (sig, SIG_DFL);
104      raise (sig);
105   }
106
107   __lv_powerpc_canjump = 0;
108   longjmp(__lv_powerpc_jmpbuf, 1);
109}
110#endif
111
112#if defined(PIPE_ARCH_PPC)
113static void
114check_os_altivec_support(void)
115{
116#if defined(PIPE_OS_APPLE)
117   int sels[2] = {CTL_HW, HW_VECTORUNIT};
118   int has_vu = 0;
119   int len = sizeof (has_vu);
120   int err;
121
122   err = sysctl(sels, 2, &has_vu, &len, NULL, 0);
123
124   if (err == 0) {
125      if (has_vu != 0) {
126         util_cpu_caps.has_altivec = 1;
127      }
128   }
129#else /* !PIPE_OS_APPLE */
130   /* not on Apple/Darwin, do it the brute-force way */
131   /* this is borrowed from the libmpeg2 library */
132   signal(SIGILL, sigill_handler);
133   if (setjmp(__lv_powerpc_jmpbuf)) {
134      signal(SIGILL, SIG_DFL);
135   } else {
136      boolean enable_altivec = TRUE;    /* Default: enable  if available, and if not overridden */
137      boolean enable_vsx = TRUE;
138#ifdef DEBUG
139      /* Disabling Altivec code generation is not the same as disabling VSX code generation,
140       * which can be done simply by passing -mattr=-vsx to the LLVM compiler; cf.
141       * lp_build_create_jit_compiler_for_module().
142       * If you want to disable Altivec code generation, the best place to do it is here.
143       */
144      char *env_control = getenv("GALLIVM_ALTIVEC");    /* 1=enable (default); 0=disable */
145      if (env_control && env_control[0] == '0') {
146         enable_altivec = FALSE;
147      }
148#endif
149      /* VSX instructions can be explicitly enabled/disabled via GALLIVM_VSX=1 or 0 */
150      char *env_vsx = getenv("GALLIVM_VSX");
151      if (env_vsx && env_vsx[0] == '0') {
152         enable_vsx = FALSE;
153      }
154      if (enable_altivec) {
155         __lv_powerpc_canjump = 1;
156
157         __asm __volatile
158            ("mtspr 256, %0\n\t"
159             "vand %%v0, %%v0, %%v0"
160             :
161             : "r" (-1));
162
163         util_cpu_caps.has_altivec = 1;
164
165         if (enable_vsx) {
166            __asm __volatile("xxland %vs0, %vs0, %vs0");
167            util_cpu_caps.has_vsx = 1;
168         }
169         signal(SIGILL, SIG_DFL);
170      } else {
171         util_cpu_caps.has_altivec = 0;
172      }
173   }
174#endif /* !PIPE_OS_APPLE */
175}
176#endif /* PIPE_ARCH_PPC */
177
178
179#if defined(PIPE_ARCH_X86) || defined (PIPE_ARCH_X86_64)
180static int has_cpuid(void)
181{
182#if defined(PIPE_ARCH_X86)
183#if defined(PIPE_OS_GCC)
184   int a, c;
185
186   __asm __volatile
187      ("pushf\n"
188       "popl %0\n"
189       "movl %0, %1\n"
190       "xorl $0x200000, %0\n"
191       "push %0\n"
192       "popf\n"
193       "pushf\n"
194       "popl %0\n"
195       : "=a" (a), "=c" (c)
196       :
197       : "cc");
198
199   return a != c;
200#else
201   /* FIXME */
202   return 1;
203#endif
204#elif defined(PIPE_ARCH_X86_64)
205   return 1;
206#else
207   return 0;
208#endif
209}
210
211
212/**
213 * @sa cpuid.h included in gcc-4.3 onwards.
214 * @sa http://msdn.microsoft.com/en-us/library/hskdteyh.aspx
215 */
216static inline void
217cpuid(uint32_t ax, uint32_t *p)
218{
219#if defined(PIPE_CC_GCC) && defined(PIPE_ARCH_X86)
220   __asm __volatile (
221     "xchgl %%ebx, %1\n\t"
222     "cpuid\n\t"
223     "xchgl %%ebx, %1"
224     : "=a" (p[0]),
225       "=S" (p[1]),
226       "=c" (p[2]),
227       "=d" (p[3])
228     : "0" (ax)
229   );
230#elif defined(PIPE_CC_GCC) && defined(PIPE_ARCH_X86_64)
231   __asm __volatile (
232     "cpuid\n\t"
233     : "=a" (p[0]),
234       "=b" (p[1]),
235       "=c" (p[2]),
236       "=d" (p[3])
237     : "0" (ax)
238   );
239#elif defined(PIPE_CC_MSVC)
240   __cpuid(p, ax);
241#else
242   p[0] = 0;
243   p[1] = 0;
244   p[2] = 0;
245   p[3] = 0;
246#endif
247}
248
249/**
250 * @sa cpuid.h included in gcc-4.4 onwards.
251 * @sa http://msdn.microsoft.com/en-us/library/hskdteyh%28v=vs.90%29.aspx
252 */
253static inline void
254cpuid_count(uint32_t ax, uint32_t cx, uint32_t *p)
255{
256#if defined(PIPE_CC_GCC) && defined(PIPE_ARCH_X86)
257   __asm __volatile (
258     "xchgl %%ebx, %1\n\t"
259     "cpuid\n\t"
260     "xchgl %%ebx, %1"
261     : "=a" (p[0]),
262       "=S" (p[1]),
263       "=c" (p[2]),
264       "=d" (p[3])
265     : "0" (ax), "2" (cx)
266   );
267#elif defined(PIPE_CC_GCC) && defined(PIPE_ARCH_X86_64)
268   __asm __volatile (
269     "cpuid\n\t"
270     : "=a" (p[0]),
271       "=b" (p[1]),
272       "=c" (p[2]),
273       "=d" (p[3])
274     : "0" (ax), "2" (cx)
275   );
276#elif defined(PIPE_CC_MSVC)
277   __cpuidex(p, ax, cx);
278#else
279   p[0] = 0;
280   p[1] = 0;
281   p[2] = 0;
282   p[3] = 0;
283#endif
284}
285
286
287static inline uint64_t xgetbv(void)
288{
289#if defined(PIPE_CC_GCC)
290   uint32_t eax, edx;
291
292   __asm __volatile (
293     ".byte 0x0f, 0x01, 0xd0" // xgetbv isn't supported on gcc < 4.4
294     : "=a"(eax),
295       "=d"(edx)
296     : "c"(0)
297   );
298
299   return ((uint64_t)edx << 32) | eax;
300#elif defined(PIPE_CC_MSVC) && defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK)
301   return _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
302#else
303   return 0;
304#endif
305}
306
307
308#if defined(PIPE_ARCH_X86)
309PIPE_ALIGN_STACK static inline boolean sse2_has_daz(void)
310{
311   struct {
312      uint32_t pad1[7];
313      uint32_t mxcsr_mask;
314      uint32_t pad2[128-8];
315   } PIPE_ALIGN_VAR(16) fxarea;
316
317   fxarea.mxcsr_mask = 0;
318#if defined(PIPE_CC_GCC)
319   __asm __volatile ("fxsave %0" : "+m" (fxarea));
320#elif defined(PIPE_CC_MSVC) || defined(PIPE_CC_ICL)
321   _fxsave(&fxarea);
322#else
323   fxarea.mxcsr_mask = 0;
324#endif
325   return !!(fxarea.mxcsr_mask & (1 << 6));
326}
327#endif
328
329#endif /* X86 or X86_64 */
330
331#if defined(PIPE_ARCH_ARM)
332static void
333check_os_arm_support(void)
334{
335   /*
336    * On Android, the cpufeatures library is preferred way of checking
337    * CPU capabilities. However, it is not available for standalone Mesa
338    * builds, i.e. when Android build system (Android.mk-based) is not
339    * used. Because of this we cannot use PIPE_OS_ANDROID here, but rather
340    * have a separate macro that only gets enabled from respective Android.mk.
341    */
342#if defined(HAS_ANDROID_CPUFEATURES)
343   AndroidCpuFamily cpu_family = android_getCpuFamily();
344   uint64_t cpu_features = android_getCpuFeatures();
345
346   if (cpu_family == ANDROID_CPU_FAMILY_ARM) {
347      if (cpu_features & ANDROID_CPU_ARM_FEATURE_NEON)
348         util_cpu_caps.has_neon = 1;
349   }
350#elif defined(PIPE_OS_LINUX)
351    Elf32_auxv_t aux;
352    int fd;
353
354    fd = open("/proc/self/auxv", O_RDONLY | O_CLOEXEC);
355    if (fd >= 0) {
356       while (read(fd, &aux, sizeof(Elf32_auxv_t)) == sizeof(Elf32_auxv_t)) {
357          if (aux.a_type == AT_HWCAP) {
358             uint32_t hwcap = aux.a_un.a_val;
359
360             util_cpu_caps.has_neon = (hwcap >> 12) & 1;
361             break;
362          }
363       }
364       close (fd);
365    }
366#endif /* PIPE_OS_LINUX */
367}
368
369#elif defined(PIPE_ARCH_AARCH64)
370static void
371check_os_arm_support(void)
372{
373    util_cpu_caps.has_neon = true;
374}
375#endif /* PIPE_ARCH_ARM || PIPE_ARCH_AARCH64 */
376
377static void
378get_cpu_topology(void)
379{
380   /* Default. This is correct if L3 is not present or there is only one. */
381   util_cpu_caps.cores_per_L3 = util_cpu_caps.nr_cpus;
382
383#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
384   /* AMD Zen */
385   if (util_cpu_caps.x86_cpu_type == 0x17) {
386      uint32_t regs[4];
387
388      /* Query the L3 cache topology information. */
389      cpuid_count(0x8000001D, 3, regs);
390      unsigned cache_level = (regs[0] >> 5) & 0x7;
391      unsigned cores_per_cache = ((regs[0] >> 14) & 0xfff) + 1;
392
393      if (cache_level == 3)
394         util_cpu_caps.cores_per_L3 = cores_per_cache;
395   }
396#endif
397}
398
399static void
400util_cpu_detect_once(void)
401{
402   memset(&util_cpu_caps, 0, sizeof util_cpu_caps);
403
404   /* Count the number of CPUs in system */
405#if defined(PIPE_OS_WINDOWS)
406   {
407      SYSTEM_INFO system_info;
408      GetSystemInfo(&system_info);
409      util_cpu_caps.nr_cpus = system_info.dwNumberOfProcessors;
410   }
411#elif defined(PIPE_OS_UNIX) && defined(_SC_NPROCESSORS_ONLN)
412   util_cpu_caps.nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
413   if (util_cpu_caps.nr_cpus == ~0)
414      util_cpu_caps.nr_cpus = 1;
415#elif defined(PIPE_OS_BSD)
416   {
417      int mib[2], ncpu;
418      int len;
419
420      mib[0] = CTL_HW;
421      mib[1] = HW_NCPU;
422
423      len = sizeof (ncpu);
424      sysctl(mib, 2, &ncpu, &len, NULL, 0);
425      util_cpu_caps.nr_cpus = ncpu;
426   }
427#else
428   util_cpu_caps.nr_cpus = 1;
429#endif
430
431   /* Make the fallback cacheline size nonzero so that it can be
432    * safely passed to align().
433    */
434   util_cpu_caps.cacheline = sizeof(void *);
435
436#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
437   if (has_cpuid()) {
438      uint32_t regs[4];
439      uint32_t regs2[4];
440
441      util_cpu_caps.cacheline = 32;
442
443      /* Get max cpuid level */
444      cpuid(0x00000000, regs);
445
446      if (regs[0] >= 0x00000001) {
447         unsigned int cacheline;
448
449         cpuid (0x00000001, regs2);
450
451         util_cpu_caps.x86_cpu_type = (regs2[0] >> 8) & 0xf;
452         /* Add "extended family". */
453         if (util_cpu_caps.x86_cpu_type == 0xf)
454             util_cpu_caps.x86_cpu_type += ((regs2[0] >> 20) & 0xff);
455
456         /* general feature flags */
457         util_cpu_caps.has_tsc    = (regs2[3] >>  4) & 1; /* 0x0000010 */
458         util_cpu_caps.has_mmx    = (regs2[3] >> 23) & 1; /* 0x0800000 */
459         util_cpu_caps.has_sse    = (regs2[3] >> 25) & 1; /* 0x2000000 */
460         util_cpu_caps.has_sse2   = (regs2[3] >> 26) & 1; /* 0x4000000 */
461         util_cpu_caps.has_sse3   = (regs2[2] >>  0) & 1; /* 0x0000001 */
462         util_cpu_caps.has_ssse3  = (regs2[2] >>  9) & 1; /* 0x0000020 */
463         util_cpu_caps.has_sse4_1 = (regs2[2] >> 19) & 1;
464         util_cpu_caps.has_sse4_2 = (regs2[2] >> 20) & 1;
465         util_cpu_caps.has_popcnt = (regs2[2] >> 23) & 1;
466         util_cpu_caps.has_avx    = ((regs2[2] >> 28) & 1) && // AVX
467                                    ((regs2[2] >> 27) & 1) && // OSXSAVE
468                                    ((xgetbv() & 6) == 6);    // XMM & YMM
469         util_cpu_caps.has_f16c   = ((regs2[2] >> 29) & 1) && util_cpu_caps.has_avx;
470         util_cpu_caps.has_fma    = ((regs2[2] >> 12) & 1) && util_cpu_caps.has_avx;
471         util_cpu_caps.has_mmx2   = util_cpu_caps.has_sse; /* SSE cpus supports mmxext too */
472#if defined(PIPE_ARCH_X86_64)
473         util_cpu_caps.has_daz = 1;
474#else
475         util_cpu_caps.has_daz = util_cpu_caps.has_sse3 ||
476            (util_cpu_caps.has_sse2 && sse2_has_daz());
477#endif
478
479         cacheline = ((regs2[1] >> 8) & 0xFF) * 8;
480         if (cacheline > 0)
481            util_cpu_caps.cacheline = cacheline;
482      }
483      if (util_cpu_caps.has_avx && regs[0] >= 0x00000007) {
484         uint32_t regs7[4];
485         cpuid_count(0x00000007, 0x00000000, regs7);
486         util_cpu_caps.has_avx2 = (regs7[1] >> 5) & 1;
487      }
488
489      // check for avx512
490      if (((regs2[2] >> 27) & 1) && // OSXSAVE
491          (xgetbv() & (0x7 << 5)) && // OPMASK: upper-256 enabled by OS
492          ((xgetbv() & 6) == 6)) { // XMM/YMM enabled by OS
493         uint32_t regs3[4];
494         cpuid_count(0x00000007, 0x00000000, regs3);
495         util_cpu_caps.has_avx512f    = (regs3[1] >> 16) & 1;
496         util_cpu_caps.has_avx512dq   = (regs3[1] >> 17) & 1;
497         util_cpu_caps.has_avx512ifma = (regs3[1] >> 21) & 1;
498         util_cpu_caps.has_avx512pf   = (regs3[1] >> 26) & 1;
499         util_cpu_caps.has_avx512er   = (regs3[1] >> 27) & 1;
500         util_cpu_caps.has_avx512cd   = (regs3[1] >> 28) & 1;
501         util_cpu_caps.has_avx512bw   = (regs3[1] >> 30) & 1;
502         util_cpu_caps.has_avx512vl   = (regs3[1] >> 31) & 1;
503         util_cpu_caps.has_avx512vbmi = (regs3[2] >>  1) & 1;
504      }
505
506      if (regs[1] == 0x756e6547 && regs[2] == 0x6c65746e && regs[3] == 0x49656e69) {
507         /* GenuineIntel */
508         util_cpu_caps.has_intel = 1;
509      }
510
511      cpuid(0x80000000, regs);
512
513      if (regs[0] >= 0x80000001) {
514
515         cpuid(0x80000001, regs2);
516
517         util_cpu_caps.has_mmx  |= (regs2[3] >> 23) & 1;
518         util_cpu_caps.has_mmx2 |= (regs2[3] >> 22) & 1;
519         util_cpu_caps.has_3dnow = (regs2[3] >> 31) & 1;
520         util_cpu_caps.has_3dnow_ext = (regs2[3] >> 30) & 1;
521
522         util_cpu_caps.has_xop = util_cpu_caps.has_avx &&
523                                 ((regs2[2] >> 11) & 1);
524      }
525
526      if (regs[0] >= 0x80000006) {
527         /* should we really do this if the clflush size above worked? */
528         unsigned int cacheline;
529         cpuid(0x80000006, regs2);
530         cacheline = regs2[2] & 0xFF;
531         if (cacheline > 0)
532            util_cpu_caps.cacheline = cacheline;
533      }
534
535      if (!util_cpu_caps.has_sse) {
536         util_cpu_caps.has_sse2 = 0;
537         util_cpu_caps.has_sse3 = 0;
538         util_cpu_caps.has_ssse3 = 0;
539         util_cpu_caps.has_sse4_1 = 0;
540      }
541   }
542#endif /* PIPE_ARCH_X86 || PIPE_ARCH_X86_64 */
543
544#if defined(PIPE_ARCH_ARM) || defined(PIPE_ARCH_AARCH64)
545   check_os_arm_support();
546#endif
547
548#if defined(PIPE_ARCH_PPC)
549   check_os_altivec_support();
550#endif /* PIPE_ARCH_PPC */
551
552   get_cpu_topology();
553
554#ifdef DEBUG
555   if (debug_get_option_dump_cpu()) {
556      debug_printf("util_cpu_caps.nr_cpus = %u\n", util_cpu_caps.nr_cpus);
557
558      debug_printf("util_cpu_caps.x86_cpu_type = %u\n", util_cpu_caps.x86_cpu_type);
559      debug_printf("util_cpu_caps.cacheline = %u\n", util_cpu_caps.cacheline);
560
561      debug_printf("util_cpu_caps.has_tsc = %u\n", util_cpu_caps.has_tsc);
562      debug_printf("util_cpu_caps.has_mmx = %u\n", util_cpu_caps.has_mmx);
563      debug_printf("util_cpu_caps.has_mmx2 = %u\n", util_cpu_caps.has_mmx2);
564      debug_printf("util_cpu_caps.has_sse = %u\n", util_cpu_caps.has_sse);
565      debug_printf("util_cpu_caps.has_sse2 = %u\n", util_cpu_caps.has_sse2);
566      debug_printf("util_cpu_caps.has_sse3 = %u\n", util_cpu_caps.has_sse3);
567      debug_printf("util_cpu_caps.has_ssse3 = %u\n", util_cpu_caps.has_ssse3);
568      debug_printf("util_cpu_caps.has_sse4_1 = %u\n", util_cpu_caps.has_sse4_1);
569      debug_printf("util_cpu_caps.has_sse4_2 = %u\n", util_cpu_caps.has_sse4_2);
570      debug_printf("util_cpu_caps.has_avx = %u\n", util_cpu_caps.has_avx);
571      debug_printf("util_cpu_caps.has_avx2 = %u\n", util_cpu_caps.has_avx2);
572      debug_printf("util_cpu_caps.has_f16c = %u\n", util_cpu_caps.has_f16c);
573      debug_printf("util_cpu_caps.has_popcnt = %u\n", util_cpu_caps.has_popcnt);
574      debug_printf("util_cpu_caps.has_3dnow = %u\n", util_cpu_caps.has_3dnow);
575      debug_printf("util_cpu_caps.has_3dnow_ext = %u\n", util_cpu_caps.has_3dnow_ext);
576      debug_printf("util_cpu_caps.has_xop = %u\n", util_cpu_caps.has_xop);
577      debug_printf("util_cpu_caps.has_altivec = %u\n", util_cpu_caps.has_altivec);
578      debug_printf("util_cpu_caps.has_vsx = %u\n", util_cpu_caps.has_vsx);
579      debug_printf("util_cpu_caps.has_neon = %u\n", util_cpu_caps.has_neon);
580      debug_printf("util_cpu_caps.has_daz = %u\n", util_cpu_caps.has_daz);
581      debug_printf("util_cpu_caps.has_avx512f = %u\n", util_cpu_caps.has_avx512f);
582      debug_printf("util_cpu_caps.has_avx512dq = %u\n", util_cpu_caps.has_avx512dq);
583      debug_printf("util_cpu_caps.has_avx512ifma = %u\n", util_cpu_caps.has_avx512ifma);
584      debug_printf("util_cpu_caps.has_avx512pf = %u\n", util_cpu_caps.has_avx512pf);
585      debug_printf("util_cpu_caps.has_avx512er = %u\n", util_cpu_caps.has_avx512er);
586      debug_printf("util_cpu_caps.has_avx512cd = %u\n", util_cpu_caps.has_avx512cd);
587      debug_printf("util_cpu_caps.has_avx512bw = %u\n", util_cpu_caps.has_avx512bw);
588      debug_printf("util_cpu_caps.has_avx512vl = %u\n", util_cpu_caps.has_avx512vl);
589      debug_printf("util_cpu_caps.has_avx512vbmi = %u\n", util_cpu_caps.has_avx512vbmi);
590   }
591#endif
592}
593
594static once_flag cpu_once_flag = ONCE_FLAG_INIT;
595
596void
597util_cpu_detect(void)
598{
599   call_once(&cpu_once_flag, util_cpu_detect_once);
600}
601