xref: /freebsd/contrib/llvm-project/compiler-rt/lib/builtins/cpu_model/x86.c (revision c80e69b00d976a5a3b3e84527f270fa7e72a8205)
1 //===-- cpu_model/x86.c - Support for __cpu_model builtin  --------*- C -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file is based on LLVM's lib/Support/Host.cpp.
10 //  It implements the operating system Host concept and builtin
11 //  __cpu_model for the compiler_rt library for x86.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "cpu_model.h"
16 
17 #if !(defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) ||          \
18       defined(_M_X64))
19 #error This file is intended only for x86-based targets
20 #endif
21 
22 #if defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER)
23 
24 #include <assert.h>
25 
26 #ifdef _MSC_VER
27 #include <intrin.h>
28 #endif
29 
30 enum VendorSignatures {
31   SIG_INTEL = 0x756e6547, // Genu
32   SIG_AMD = 0x68747541,   // Auth
33 };
34 
35 enum ProcessorVendors {
36   VENDOR_INTEL = 1,
37   VENDOR_AMD,
38   VENDOR_OTHER,
39   VENDOR_MAX
40 };
41 
42 enum ProcessorTypes {
43   INTEL_BONNELL = 1,
44   INTEL_CORE2,
45   INTEL_COREI7,
46   AMDFAM10H,
47   AMDFAM15H,
48   INTEL_SILVERMONT,
49   INTEL_KNL,
50   AMD_BTVER1,
51   AMD_BTVER2,
52   AMDFAM17H,
53   INTEL_KNM,
54   INTEL_GOLDMONT,
55   INTEL_GOLDMONT_PLUS,
56   INTEL_TREMONT,
57   AMDFAM19H,
58   ZHAOXIN_FAM7H,
59   INTEL_SIERRAFOREST,
60   INTEL_GRANDRIDGE,
61   INTEL_CLEARWATERFOREST,
62   AMDFAM1AH,
63   CPU_TYPE_MAX
64 };
65 
66 enum ProcessorSubtypes {
67   INTEL_COREI7_NEHALEM = 1,
68   INTEL_COREI7_WESTMERE,
69   INTEL_COREI7_SANDYBRIDGE,
70   AMDFAM10H_BARCELONA,
71   AMDFAM10H_SHANGHAI,
72   AMDFAM10H_ISTANBUL,
73   AMDFAM15H_BDVER1,
74   AMDFAM15H_BDVER2,
75   AMDFAM15H_BDVER3,
76   AMDFAM15H_BDVER4,
77   AMDFAM17H_ZNVER1,
78   INTEL_COREI7_IVYBRIDGE,
79   INTEL_COREI7_HASWELL,
80   INTEL_COREI7_BROADWELL,
81   INTEL_COREI7_SKYLAKE,
82   INTEL_COREI7_SKYLAKE_AVX512,
83   INTEL_COREI7_CANNONLAKE,
84   INTEL_COREI7_ICELAKE_CLIENT,
85   INTEL_COREI7_ICELAKE_SERVER,
86   AMDFAM17H_ZNVER2,
87   INTEL_COREI7_CASCADELAKE,
88   INTEL_COREI7_TIGERLAKE,
89   INTEL_COREI7_COOPERLAKE,
90   INTEL_COREI7_SAPPHIRERAPIDS,
91   INTEL_COREI7_ALDERLAKE,
92   AMDFAM19H_ZNVER3,
93   INTEL_COREI7_ROCKETLAKE,
94   ZHAOXIN_FAM7H_LUJIAZUI,
95   AMDFAM19H_ZNVER4,
96   INTEL_COREI7_GRANITERAPIDS,
97   INTEL_COREI7_GRANITERAPIDS_D,
98   INTEL_COREI7_ARROWLAKE,
99   INTEL_COREI7_ARROWLAKE_S,
100   INTEL_COREI7_PANTHERLAKE,
101   AMDFAM1AH_ZNVER5,
102   CPU_SUBTYPE_MAX
103 };
104 
105 enum ProcessorFeatures {
106   FEATURE_CMOV = 0,
107   FEATURE_MMX,
108   FEATURE_POPCNT,
109   FEATURE_SSE,
110   FEATURE_SSE2,
111   FEATURE_SSE3,
112   FEATURE_SSSE3,
113   FEATURE_SSE4_1,
114   FEATURE_SSE4_2,
115   FEATURE_AVX,
116   FEATURE_AVX2,
117   FEATURE_SSE4_A,
118   FEATURE_FMA4,
119   FEATURE_XOP,
120   FEATURE_FMA,
121   FEATURE_AVX512F,
122   FEATURE_BMI,
123   FEATURE_BMI2,
124   FEATURE_AES,
125   FEATURE_PCLMUL,
126   FEATURE_AVX512VL,
127   FEATURE_AVX512BW,
128   FEATURE_AVX512DQ,
129   FEATURE_AVX512CD,
130   FEATURE_AVX512ER,
131   FEATURE_AVX512PF,
132   FEATURE_AVX512VBMI,
133   FEATURE_AVX512IFMA,
134   FEATURE_AVX5124VNNIW,
135   FEATURE_AVX5124FMAPS,
136   FEATURE_AVX512VPOPCNTDQ,
137   FEATURE_AVX512VBMI2,
138   FEATURE_GFNI,
139   FEATURE_VPCLMULQDQ,
140   FEATURE_AVX512VNNI,
141   FEATURE_AVX512BITALG,
142   FEATURE_AVX512BF16,
143   FEATURE_AVX512VP2INTERSECT,
144   // FIXME: Below Features has some missings comparing to gcc, it's because gcc
145   // has some not one-to-one mapped in llvm.
146   // FEATURE_3DNOW,
147   // FEATURE_3DNOWP,
148   FEATURE_ADX = 40,
149   // FEATURE_ABM,
150   FEATURE_CLDEMOTE = 42,
151   FEATURE_CLFLUSHOPT,
152   FEATURE_CLWB,
153   FEATURE_CLZERO,
154   FEATURE_CMPXCHG16B,
155   // FIXME: Not adding FEATURE_CMPXCHG8B is a workaround to make 'generic' as
156   // a cpu string with no X86_FEATURE_COMPAT features, which is required in
157   // current implementantion of cpu_specific/cpu_dispatch FMV feature.
158   // FEATURE_CMPXCHG8B,
159   FEATURE_ENQCMD = 48,
160   FEATURE_F16C,
161   FEATURE_FSGSBASE,
162   // FEATURE_FXSAVE,
163   // FEATURE_HLE,
164   // FEATURE_IBT,
165   FEATURE_LAHF_LM = 54,
166   FEATURE_LM,
167   FEATURE_LWP,
168   FEATURE_LZCNT,
169   FEATURE_MOVBE,
170   FEATURE_MOVDIR64B,
171   FEATURE_MOVDIRI,
172   FEATURE_MWAITX,
173   // FEATURE_OSXSAVE,
174   FEATURE_PCONFIG = 63,
175   FEATURE_PKU,
176   FEATURE_PREFETCHWT1,
177   FEATURE_PRFCHW,
178   FEATURE_PTWRITE,
179   FEATURE_RDPID,
180   FEATURE_RDRND,
181   FEATURE_RDSEED,
182   FEATURE_RTM,
183   FEATURE_SERIALIZE,
184   FEATURE_SGX,
185   FEATURE_SHA,
186   FEATURE_SHSTK,
187   FEATURE_TBM,
188   FEATURE_TSXLDTRK,
189   FEATURE_VAES,
190   FEATURE_WAITPKG,
191   FEATURE_WBNOINVD,
192   FEATURE_XSAVE,
193   FEATURE_XSAVEC,
194   FEATURE_XSAVEOPT,
195   FEATURE_XSAVES,
196   FEATURE_AMX_TILE,
197   FEATURE_AMX_INT8,
198   FEATURE_AMX_BF16,
199   FEATURE_UINTR,
200   FEATURE_HRESET,
201   FEATURE_KL,
202   // FEATURE_AESKLE,
203   FEATURE_WIDEKL = 92,
204   FEATURE_AVXVNNI,
205   FEATURE_AVX512FP16,
206   FEATURE_X86_64_BASELINE,
207   FEATURE_X86_64_V2,
208   FEATURE_X86_64_V3,
209   FEATURE_X86_64_V4,
210   FEATURE_AVXIFMA,
211   FEATURE_AVXVNNIINT8,
212   FEATURE_AVXNECONVERT,
213   FEATURE_CMPCCXADD,
214   FEATURE_AMX_FP16,
215   FEATURE_PREFETCHI,
216   FEATURE_RAOINT,
217   FEATURE_AMX_COMPLEX,
218   FEATURE_AVXVNNIINT16,
219   FEATURE_SM3,
220   FEATURE_SHA512,
221   FEATURE_SM4,
222   FEATURE_APXF,
223   FEATURE_USERMSR,
224   FEATURE_AVX10_1_256,
225   FEATURE_AVX10_1_512,
226   CPU_FEATURE_MAX
227 };
228 
229 // The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max).
230 // Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID
231 // support. Consequently, for i386, the presence of CPUID is checked first
232 // via the corresponding eflags bit.
isCpuIdSupported(void)233 static bool isCpuIdSupported(void) {
234 #if defined(__GNUC__) || defined(__clang__)
235 #if defined(__i386__)
236   int __cpuid_supported;
237   __asm__("  pushfl\n"
238           "  popl   %%eax\n"
239           "  movl   %%eax,%%ecx\n"
240           "  xorl   $0x00200000,%%eax\n"
241           "  pushl  %%eax\n"
242           "  popfl\n"
243           "  pushfl\n"
244           "  popl   %%eax\n"
245           "  movl   $0,%0\n"
246           "  cmpl   %%eax,%%ecx\n"
247           "  je     1f\n"
248           "  movl   $1,%0\n"
249           "1:"
250           : "=r"(__cpuid_supported)
251           :
252           : "eax", "ecx");
253   if (!__cpuid_supported)
254     return false;
255 #endif
256   return true;
257 #endif
258   return true;
259 }
260 
261 // This code is copied from lib/Support/Host.cpp.
262 // Changes to either file should be mirrored in the other.
263 
264 /// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in
265 /// the specified arguments.  If we can't run cpuid on the host, return true.
getX86CpuIDAndInfo(unsigned value,unsigned * rEAX,unsigned * rEBX,unsigned * rECX,unsigned * rEDX)266 static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
267                                unsigned *rECX, unsigned *rEDX) {
268 #if defined(__GNUC__) || defined(__clang__)
269 #if defined(__x86_64__)
270   // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
271   // FIXME: should we save this for Clang?
272   __asm__("movq\t%%rbx, %%rsi\n\t"
273           "cpuid\n\t"
274           "xchgq\t%%rbx, %%rsi\n\t"
275           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
276           : "a"(value));
277   return false;
278 #elif defined(__i386__)
279   __asm__("movl\t%%ebx, %%esi\n\t"
280           "cpuid\n\t"
281           "xchgl\t%%ebx, %%esi\n\t"
282           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
283           : "a"(value));
284   return false;
285 #else
286   return true;
287 #endif
288 #elif defined(_MSC_VER)
289   // The MSVC intrinsic is portable across x86 and x64.
290   int registers[4];
291   __cpuid(registers, value);
292   *rEAX = registers[0];
293   *rEBX = registers[1];
294   *rECX = registers[2];
295   *rEDX = registers[3];
296   return false;
297 #else
298   return true;
299 #endif
300 }
301 
302 /// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return
303 /// the 4 values in the specified arguments.  If we can't run cpuid on the host,
304 /// return true.
getX86CpuIDAndInfoEx(unsigned value,unsigned subleaf,unsigned * rEAX,unsigned * rEBX,unsigned * rECX,unsigned * rEDX)305 static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
306                                  unsigned *rEAX, unsigned *rEBX, unsigned *rECX,
307                                  unsigned *rEDX) {
308 #if defined(__GNUC__) || defined(__clang__)
309 #if defined(__x86_64__)
310   // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
311   // FIXME: should we save this for Clang?
312   __asm__("movq\t%%rbx, %%rsi\n\t"
313           "cpuid\n\t"
314           "xchgq\t%%rbx, %%rsi\n\t"
315           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
316           : "a"(value), "c"(subleaf));
317   return false;
318 #elif defined(__i386__)
319   __asm__("movl\t%%ebx, %%esi\n\t"
320           "cpuid\n\t"
321           "xchgl\t%%ebx, %%esi\n\t"
322           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
323           : "a"(value), "c"(subleaf));
324   return false;
325 #else
326   return true;
327 #endif
328 #elif defined(_MSC_VER)
329   int registers[4];
330   __cpuidex(registers, value, subleaf);
331   *rEAX = registers[0];
332   *rEBX = registers[1];
333   *rECX = registers[2];
334   *rEDX = registers[3];
335   return false;
336 #else
337   return true;
338 #endif
339 }
340 
341 // Read control register 0 (XCR0). Used to detect features such as AVX.
getX86XCR0(unsigned * rEAX,unsigned * rEDX)342 static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) {
343 #if defined(__GNUC__) || defined(__clang__)
344   // Check xgetbv; this uses a .byte sequence instead of the instruction
345   // directly because older assemblers do not include support for xgetbv and
346   // there is no easy way to conditionally compile based on the assembler used.
347   __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0));
348   return false;
349 #elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK)
350   unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
351   *rEAX = Result;
352   *rEDX = Result >> 32;
353   return false;
354 #else
355   return true;
356 #endif
357 }
358 
detectX86FamilyModel(unsigned EAX,unsigned * Family,unsigned * Model)359 static void detectX86FamilyModel(unsigned EAX, unsigned *Family,
360                                  unsigned *Model) {
361   *Family = (EAX >> 8) & 0xf; // Bits 8 - 11
362   *Model = (EAX >> 4) & 0xf;  // Bits 4 - 7
363   if (*Family == 6 || *Family == 0xf) {
364     if (*Family == 0xf)
365       // Examine extended family ID if family ID is F.
366       *Family += (EAX >> 20) & 0xff; // Bits 20 - 27
367     // Examine extended model ID if family ID is 6 or F.
368     *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
369   }
370 }
371 
372 #define testFeature(F) (Features[F / 32] & (1 << (F % 32))) != 0
373 
getIntelProcessorTypeAndSubtype(unsigned Family,unsigned Model,const unsigned * Features,unsigned * Type,unsigned * Subtype)374 static const char *getIntelProcessorTypeAndSubtype(unsigned Family,
375                                                    unsigned Model,
376                                                    const unsigned *Features,
377                                                    unsigned *Type,
378                                                    unsigned *Subtype) {
379   // We select CPU strings to match the code in Host.cpp, but we don't use them
380   // in compiler-rt.
381   const char *CPU = 0;
382 
383   switch (Family) {
384   case 6:
385     switch (Model) {
386     case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile
387                // processor, Intel Core 2 Quad processor, Intel Core 2 Quad
388                // mobile processor, Intel Core 2 Extreme processor, Intel
389                // Pentium Dual-Core processor, Intel Xeon processor, model
390                // 0Fh. All processors are manufactured using the 65 nm process.
391     case 0x16: // Intel Celeron processor model 16h. All processors are
392                // manufactured using the 65 nm process
393       CPU = "core2";
394       *Type = INTEL_CORE2;
395       break;
396     case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model
397                // 17h. All processors are manufactured using the 45 nm process.
398                //
399                // 45nm: Penryn , Wolfdale, Yorkfield (XE)
400     case 0x1d: // Intel Xeon processor MP. All processors are manufactured using
401                // the 45 nm process.
402       CPU = "penryn";
403       *Type = INTEL_CORE2;
404       break;
405     case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All
406                // processors are manufactured using the 45 nm process.
407     case 0x1e: // Intel(R) Core(TM) i7 CPU         870  @ 2.93GHz.
408                // As found in a Summer 2010 model iMac.
409     case 0x1f:
410     case 0x2e: // Nehalem EX
411       CPU = "nehalem";
412       *Type = INTEL_COREI7;
413       *Subtype = INTEL_COREI7_NEHALEM;
414       break;
415     case 0x25: // Intel Core i7, laptop version.
416     case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All
417                // processors are manufactured using the 32 nm process.
418     case 0x2f: // Westmere EX
419       CPU = "westmere";
420       *Type = INTEL_COREI7;
421       *Subtype = INTEL_COREI7_WESTMERE;
422       break;
423     case 0x2a: // Intel Core i7 processor. All processors are manufactured
424                // using the 32 nm process.
425     case 0x2d:
426       CPU = "sandybridge";
427       *Type = INTEL_COREI7;
428       *Subtype = INTEL_COREI7_SANDYBRIDGE;
429       break;
430     case 0x3a:
431     case 0x3e: // Ivy Bridge EP
432       CPU = "ivybridge";
433       *Type = INTEL_COREI7;
434       *Subtype = INTEL_COREI7_IVYBRIDGE;
435       break;
436 
437     // Haswell:
438     case 0x3c:
439     case 0x3f:
440     case 0x45:
441     case 0x46:
442       CPU = "haswell";
443       *Type = INTEL_COREI7;
444       *Subtype = INTEL_COREI7_HASWELL;
445       break;
446 
447     // Broadwell:
448     case 0x3d:
449     case 0x47:
450     case 0x4f:
451     case 0x56:
452       CPU = "broadwell";
453       *Type = INTEL_COREI7;
454       *Subtype = INTEL_COREI7_BROADWELL;
455       break;
456 
457     // Skylake:
458     case 0x4e: // Skylake mobile
459     case 0x5e: // Skylake desktop
460     case 0x8e: // Kaby Lake mobile
461     case 0x9e: // Kaby Lake desktop
462     case 0xa5: // Comet Lake-H/S
463     case 0xa6: // Comet Lake-U
464       CPU = "skylake";
465       *Type = INTEL_COREI7;
466       *Subtype = INTEL_COREI7_SKYLAKE;
467       break;
468 
469     // Rocketlake:
470     case 0xa7:
471       CPU = "rocketlake";
472       *Type = INTEL_COREI7;
473       *Subtype = INTEL_COREI7_ROCKETLAKE;
474       break;
475 
476     // Skylake Xeon:
477     case 0x55:
478       *Type = INTEL_COREI7;
479       if (testFeature(FEATURE_AVX512BF16)) {
480         CPU = "cooperlake";
481         *Subtype = INTEL_COREI7_COOPERLAKE;
482       } else if (testFeature(FEATURE_AVX512VNNI)) {
483         CPU = "cascadelake";
484         *Subtype = INTEL_COREI7_CASCADELAKE;
485       } else {
486         CPU = "skylake-avx512";
487         *Subtype = INTEL_COREI7_SKYLAKE_AVX512;
488       }
489       break;
490 
491     // Cannonlake:
492     case 0x66:
493       CPU = "cannonlake";
494       *Type = INTEL_COREI7;
495       *Subtype = INTEL_COREI7_CANNONLAKE;
496       break;
497 
498     // Icelake:
499     case 0x7d:
500     case 0x7e:
501       CPU = "icelake-client";
502       *Type = INTEL_COREI7;
503       *Subtype = INTEL_COREI7_ICELAKE_CLIENT;
504       break;
505 
506     // Tigerlake:
507     case 0x8c:
508     case 0x8d:
509       CPU = "tigerlake";
510       *Type = INTEL_COREI7;
511       *Subtype = INTEL_COREI7_TIGERLAKE;
512       break;
513 
514     // Alderlake:
515     case 0x97:
516     case 0x9a:
517     // Raptorlake:
518     case 0xb7:
519     case 0xba:
520     case 0xbf:
521     // Meteorlake:
522     case 0xaa:
523     case 0xac:
524     // Gracemont:
525     case 0xbe:
526       CPU = "alderlake";
527       *Type = INTEL_COREI7;
528       *Subtype = INTEL_COREI7_ALDERLAKE;
529       break;
530 
531     // Arrowlake:
532     case 0xc5:
533       CPU = "arrowlake";
534       *Type = INTEL_COREI7;
535       *Subtype = INTEL_COREI7_ARROWLAKE;
536       break;
537 
538     // Arrowlake S:
539     case 0xc6:
540     // Lunarlake:
541     case 0xbd:
542       CPU = "arrowlake-s";
543       *Type = INTEL_COREI7;
544       *Subtype = INTEL_COREI7_ARROWLAKE_S;
545       break;
546 
547     // Pantherlake:
548     case 0xcc:
549       CPU = "pantherlake";
550       *Type = INTEL_COREI7;
551       *Subtype = INTEL_COREI7_PANTHERLAKE;
552       break;
553 
554     // Icelake Xeon:
555     case 0x6a:
556     case 0x6c:
557       CPU = "icelake-server";
558       *Type = INTEL_COREI7;
559       *Subtype = INTEL_COREI7_ICELAKE_SERVER;
560       break;
561 
562     // Emerald Rapids:
563     case 0xcf:
564     // Sapphire Rapids:
565     case 0x8f:
566       CPU = "sapphirerapids";
567       *Type = INTEL_COREI7;
568       *Subtype = INTEL_COREI7_SAPPHIRERAPIDS;
569       break;
570 
571     // Granite Rapids:
572     case 0xad:
573       CPU = "graniterapids";
574       *Type = INTEL_COREI7;
575       *Subtype = INTEL_COREI7_GRANITERAPIDS;
576       break;
577 
578     // Granite Rapids D:
579     case 0xae:
580       CPU = "graniterapids-d";
581       *Type = INTEL_COREI7;
582       *Subtype = INTEL_COREI7_GRANITERAPIDS_D;
583       break;
584 
585     case 0x1c: // Most 45 nm Intel Atom processors
586     case 0x26: // 45 nm Atom Lincroft
587     case 0x27: // 32 nm Atom Medfield
588     case 0x35: // 32 nm Atom Midview
589     case 0x36: // 32 nm Atom Midview
590       CPU = "bonnell";
591       *Type = INTEL_BONNELL;
592       break;
593 
594     // Atom Silvermont codes from the Intel software optimization guide.
595     case 0x37:
596     case 0x4a:
597     case 0x4d:
598     case 0x5a:
599     case 0x5d:
600     case 0x4c: // really airmont
601       CPU = "silvermont";
602       *Type = INTEL_SILVERMONT;
603       break;
604     // Goldmont:
605     case 0x5c: // Apollo Lake
606     case 0x5f: // Denverton
607       CPU = "goldmont";
608       *Type = INTEL_GOLDMONT;
609       break; // "goldmont"
610     case 0x7a:
611       CPU = "goldmont-plus";
612       *Type = INTEL_GOLDMONT_PLUS;
613       break;
614     case 0x86:
615     case 0x8a: // Lakefield
616     case 0x96: // Elkhart Lake
617     case 0x9c: // Jasper Lake
618       CPU = "tremont";
619       *Type = INTEL_TREMONT;
620       break;
621 
622     // Sierraforest:
623     case 0xaf:
624       CPU = "sierraforest";
625       *Type = INTEL_SIERRAFOREST;
626       break;
627 
628     // Grandridge:
629     case 0xb6:
630       CPU = "grandridge";
631       *Type = INTEL_GRANDRIDGE;
632       break;
633 
634     // Clearwaterforest:
635     case 0xdd:
636       CPU = "clearwaterforest";
637       *Type = INTEL_COREI7;
638       *Subtype = INTEL_CLEARWATERFOREST;
639       break;
640 
641     case 0x57:
642       CPU = "knl";
643       *Type = INTEL_KNL;
644       break;
645 
646     case 0x85:
647       CPU = "knm";
648       *Type = INTEL_KNM;
649       break;
650 
651     default: // Unknown family 6 CPU.
652       break;
653     }
654     break;
655   default:
656     break; // Unknown.
657   }
658 
659   return CPU;
660 }
661 
getAMDProcessorTypeAndSubtype(unsigned Family,unsigned Model,const unsigned * Features,unsigned * Type,unsigned * Subtype)662 static const char *getAMDProcessorTypeAndSubtype(unsigned Family,
663                                                  unsigned Model,
664                                                  const unsigned *Features,
665                                                  unsigned *Type,
666                                                  unsigned *Subtype) {
667   const char *CPU = 0;
668 
669   switch (Family) {
670   case 4:
671     CPU = "i486";
672     break;
673   case 5:
674     CPU = "pentium";
675     switch (Model) {
676     case 6:
677     case 7:
678       CPU = "k6";
679       break;
680     case 8:
681       CPU = "k6-2";
682       break;
683     case 9:
684     case 13:
685       CPU = "k6-3";
686       break;
687     case 10:
688       CPU = "geode";
689       break;
690     }
691     break;
692   case 6:
693     if (testFeature(FEATURE_SSE)) {
694       CPU = "athlon-xp";
695       break;
696     }
697     CPU = "athlon";
698     break;
699   case 15:
700     if (testFeature(FEATURE_SSE3)) {
701       CPU = "k8-sse3";
702       break;
703     }
704     CPU = "k8";
705     break;
706   case 16:
707     CPU = "amdfam10";
708     *Type = AMDFAM10H; // "amdfam10"
709     switch (Model) {
710     case 2:
711       *Subtype = AMDFAM10H_BARCELONA;
712       break;
713     case 4:
714       *Subtype = AMDFAM10H_SHANGHAI;
715       break;
716     case 8:
717       *Subtype = AMDFAM10H_ISTANBUL;
718       break;
719     }
720     break;
721   case 20:
722     CPU = "btver1";
723     *Type = AMD_BTVER1;
724     break;
725   case 21:
726     CPU = "bdver1";
727     *Type = AMDFAM15H;
728     if (Model >= 0x60 && Model <= 0x7f) {
729       CPU = "bdver4";
730       *Subtype = AMDFAM15H_BDVER4;
731       break; // 60h-7Fh: Excavator
732     }
733     if (Model >= 0x30 && Model <= 0x3f) {
734       CPU = "bdver3";
735       *Subtype = AMDFAM15H_BDVER3;
736       break; // 30h-3Fh: Steamroller
737     }
738     if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) {
739       CPU = "bdver2";
740       *Subtype = AMDFAM15H_BDVER2;
741       break; // 02h, 10h-1Fh: Piledriver
742     }
743     if (Model <= 0x0f) {
744       *Subtype = AMDFAM15H_BDVER1;
745       break; // 00h-0Fh: Bulldozer
746     }
747     break;
748   case 22:
749     CPU = "btver2";
750     *Type = AMD_BTVER2;
751     break;
752   case 23:
753     CPU = "znver1";
754     *Type = AMDFAM17H;
755     if ((Model >= 0x30 && Model <= 0x3f) || (Model == 0x47) ||
756         (Model >= 0x60 && Model <= 0x67) || (Model >= 0x68 && Model <= 0x6f) ||
757         (Model >= 0x70 && Model <= 0x7f) || (Model >= 0x84 && Model <= 0x87) ||
758         (Model >= 0x90 && Model <= 0x97) || (Model >= 0x98 && Model <= 0x9f) ||
759         (Model >= 0xa0 && Model <= 0xaf)) {
760       // Family 17h Models 30h-3Fh (Starship) Zen 2
761       // Family 17h Models 47h (Cardinal) Zen 2
762       // Family 17h Models 60h-67h (Renoir) Zen 2
763       // Family 17h Models 68h-6Fh (Lucienne) Zen 2
764       // Family 17h Models 70h-7Fh (Matisse) Zen 2
765       // Family 17h Models 84h-87h (ProjectX) Zen 2
766       // Family 17h Models 90h-97h (VanGogh) Zen 2
767       // Family 17h Models 98h-9Fh (Mero) Zen 2
768       // Family 17h Models A0h-AFh (Mendocino) Zen 2
769       CPU = "znver2";
770       *Subtype = AMDFAM17H_ZNVER2;
771       break;
772     }
773     if ((Model >= 0x10 && Model <= 0x1f) || (Model >= 0x20 && Model <= 0x2f)) {
774       // Family 17h Models 10h-1Fh (Raven1) Zen
775       // Family 17h Models 10h-1Fh (Picasso) Zen+
776       // Family 17h Models 20h-2Fh (Raven2 x86) Zen
777       *Subtype = AMDFAM17H_ZNVER1;
778       break;
779     }
780     break;
781   case 25:
782     CPU = "znver3";
783     *Type = AMDFAM19H;
784     if (Model <= 0x0f || (Model >= 0x20 && Model <= 0x2f) ||
785         (Model >= 0x30 && Model <= 0x3f) || (Model >= 0x40 && Model <= 0x4f) ||
786         (Model >= 0x50 && Model <= 0x5f)) {
787       // Family 19h Models 00h-0Fh (Genesis, Chagall) Zen 3
788       // Family 19h Models 20h-2Fh (Vermeer) Zen 3
789       // Family 19h Models 30h-3Fh (Badami) Zen 3
790       // Family 19h Models 40h-4Fh (Rembrandt) Zen 3+
791       // Family 19h Models 50h-5Fh (Cezanne) Zen 3
792       *Subtype = AMDFAM19H_ZNVER3;
793       break;
794     }
795     if ((Model >= 0x10 && Model <= 0x1f) || (Model >= 0x60 && Model <= 0x6f) ||
796         (Model >= 0x70 && Model <= 0x77) || (Model >= 0x78 && Model <= 0x7f) ||
797         (Model >= 0xa0 && Model <= 0xaf)) {
798       // Family 19h Models 10h-1Fh (Stones; Storm Peak) Zen 4
799       // Family 19h Models 60h-6Fh (Raphael) Zen 4
800       // Family 19h Models 70h-77h (Phoenix, Hawkpoint1) Zen 4
801       // Family 19h Models 78h-7Fh (Phoenix 2, Hawkpoint2) Zen 4
802       // Family 19h Models A0h-AFh (Stones-Dense) Zen 4
803       CPU = "znver4";
804       *Subtype = AMDFAM19H_ZNVER4;
805       break; //  "znver4"
806     }
807     break; // family 19h
808   case 26:
809     CPU = "znver5";
810     *Type = AMDFAM1AH;
811     if (Model <= 0x77) {
812       // Models 00h-0Fh (Breithorn).
813       // Models 10h-1Fh (Breithorn-Dense).
814       // Models 20h-2Fh (Strix 1).
815       // Models 30h-37h (Strix 2).
816       // Models 38h-3Fh (Strix 3).
817       // Models 40h-4Fh (Granite Ridge).
818       // Models 50h-5Fh (Weisshorn).
819       // Models 60h-6Fh (Krackan1).
820       // Models 70h-77h (Sarlak).
821       CPU = "znver5";
822       *Subtype = AMDFAM1AH_ZNVER5;
823       break; //  "znver5"
824     }
825     break;
826   default:
827     break; // Unknown AMD CPU.
828   }
829 
830   return CPU;
831 }
832 
833 #undef testFeature
834 
getAvailableFeatures(unsigned ECX,unsigned EDX,unsigned MaxLeaf,unsigned * Features)835 static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
836                                  unsigned *Features) {
837   unsigned EAX = 0, EBX = 0;
838 
839 #define hasFeature(F) ((Features[F / 32] >> (F % 32)) & 1)
840 #define setFeature(F) Features[F / 32] |= 1U << (F % 32)
841 
842   if ((EDX >> 15) & 1)
843     setFeature(FEATURE_CMOV);
844   if ((EDX >> 23) & 1)
845     setFeature(FEATURE_MMX);
846   if ((EDX >> 25) & 1)
847     setFeature(FEATURE_SSE);
848   if ((EDX >> 26) & 1)
849     setFeature(FEATURE_SSE2);
850 
851   if ((ECX >> 0) & 1)
852     setFeature(FEATURE_SSE3);
853   if ((ECX >> 1) & 1)
854     setFeature(FEATURE_PCLMUL);
855   if ((ECX >> 9) & 1)
856     setFeature(FEATURE_SSSE3);
857   if ((ECX >> 12) & 1)
858     setFeature(FEATURE_FMA);
859   if ((ECX >> 13) & 1)
860     setFeature(FEATURE_CMPXCHG16B);
861   if ((ECX >> 19) & 1)
862     setFeature(FEATURE_SSE4_1);
863   if ((ECX >> 20) & 1)
864     setFeature(FEATURE_SSE4_2);
865   if ((ECX >> 22) & 1)
866     setFeature(FEATURE_MOVBE);
867   if ((ECX >> 23) & 1)
868     setFeature(FEATURE_POPCNT);
869   if ((ECX >> 25) & 1)
870     setFeature(FEATURE_AES);
871   if ((ECX >> 29) & 1)
872     setFeature(FEATURE_F16C);
873   if ((ECX >> 30) & 1)
874     setFeature(FEATURE_RDRND);
875 
876   // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
877   // indicates that the AVX registers will be saved and restored on context
878   // switch, then we have full AVX support.
879   const unsigned AVXBits = (1 << 27) | (1 << 28);
880   bool HasAVXSave = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) &&
881                     ((EAX & 0x6) == 0x6);
882 #if defined(__APPLE__)
883   // Darwin lazily saves the AVX512 context on first use: trust that the OS will
884   // save the AVX512 context if we use AVX512 instructions, even the bit is not
885   // set right now.
886   bool HasAVX512Save = true;
887 #else
888   // AVX512 requires additional context to be saved by the OS.
889   bool HasAVX512Save = HasAVXSave && ((EAX & 0xe0) == 0xe0);
890 #endif
891   // AMX requires additional context to be saved by the OS.
892   const unsigned AMXBits = (1 << 17) | (1 << 18);
893   bool HasXSave = ((ECX >> 27) & 1) && !getX86XCR0(&EAX, &EDX);
894   bool HasAMXSave = HasXSave && ((EAX & AMXBits) == AMXBits);
895 
896   if (HasAVXSave)
897     setFeature(FEATURE_AVX);
898 
899   if (((ECX >> 26) & 1) && HasAVXSave)
900     setFeature(FEATURE_XSAVE);
901 
902   bool HasLeaf7 =
903       MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
904 
905   if (HasLeaf7 && ((EBX >> 0) & 1))
906     setFeature(FEATURE_FSGSBASE);
907   if (HasLeaf7 && ((EBX >> 2) & 1))
908     setFeature(FEATURE_SGX);
909   if (HasLeaf7 && ((EBX >> 3) & 1))
910     setFeature(FEATURE_BMI);
911   if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVXSave)
912     setFeature(FEATURE_AVX2);
913   if (HasLeaf7 && ((EBX >> 8) & 1))
914     setFeature(FEATURE_BMI2);
915   if (HasLeaf7 && ((EBX >> 11) & 1))
916     setFeature(FEATURE_RTM);
917   if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save)
918     setFeature(FEATURE_AVX512F);
919   if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save)
920     setFeature(FEATURE_AVX512DQ);
921   if (HasLeaf7 && ((EBX >> 18) & 1))
922     setFeature(FEATURE_RDSEED);
923   if (HasLeaf7 && ((EBX >> 19) & 1))
924     setFeature(FEATURE_ADX);
925   if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save)
926     setFeature(FEATURE_AVX512IFMA);
927   if (HasLeaf7 && ((EBX >> 24) & 1))
928     setFeature(FEATURE_CLWB);
929   if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save)
930     setFeature(FEATURE_AVX512PF);
931   if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save)
932     setFeature(FEATURE_AVX512ER);
933   if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save)
934     setFeature(FEATURE_AVX512CD);
935   if (HasLeaf7 && ((EBX >> 29) & 1))
936     setFeature(FEATURE_SHA);
937   if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save)
938     setFeature(FEATURE_AVX512BW);
939   if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save)
940     setFeature(FEATURE_AVX512VL);
941 
942   if (HasLeaf7 && ((ECX >> 0) & 1))
943     setFeature(FEATURE_PREFETCHWT1);
944   if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save)
945     setFeature(FEATURE_AVX512VBMI);
946   if (HasLeaf7 && ((ECX >> 4) & 1))
947     setFeature(FEATURE_PKU);
948   if (HasLeaf7 && ((ECX >> 5) & 1))
949     setFeature(FEATURE_WAITPKG);
950   if (HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save)
951     setFeature(FEATURE_AVX512VBMI2);
952   if (HasLeaf7 && ((ECX >> 7) & 1))
953     setFeature(FEATURE_SHSTK);
954   if (HasLeaf7 && ((ECX >> 8) & 1))
955     setFeature(FEATURE_GFNI);
956   if (HasLeaf7 && ((ECX >> 9) & 1) && HasAVXSave)
957     setFeature(FEATURE_VAES);
958   if (HasLeaf7 && ((ECX >> 10) & 1) && HasAVXSave)
959     setFeature(FEATURE_VPCLMULQDQ);
960   if (HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save)
961     setFeature(FEATURE_AVX512VNNI);
962   if (HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save)
963     setFeature(FEATURE_AVX512BITALG);
964   if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save)
965     setFeature(FEATURE_AVX512VPOPCNTDQ);
966   if (HasLeaf7 && ((ECX >> 22) & 1))
967     setFeature(FEATURE_RDPID);
968   if (HasLeaf7 && ((ECX >> 23) & 1))
969     setFeature(FEATURE_KL);
970   if (HasLeaf7 && ((ECX >> 25) & 1))
971     setFeature(FEATURE_CLDEMOTE);
972   if (HasLeaf7 && ((ECX >> 27) & 1))
973     setFeature(FEATURE_MOVDIRI);
974   if (HasLeaf7 && ((ECX >> 28) & 1))
975     setFeature(FEATURE_MOVDIR64B);
976   if (HasLeaf7 && ((ECX >> 29) & 1))
977     setFeature(FEATURE_ENQCMD);
978 
979   if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save)
980     setFeature(FEATURE_AVX5124VNNIW);
981   if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save)
982     setFeature(FEATURE_AVX5124FMAPS);
983   if (HasLeaf7 && ((EDX >> 5) & 1))
984     setFeature(FEATURE_UINTR);
985   if (HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save)
986     setFeature(FEATURE_AVX512VP2INTERSECT);
987   if (HasLeaf7 && ((EDX >> 14) & 1))
988     setFeature(FEATURE_SERIALIZE);
989   if (HasLeaf7 && ((EDX >> 16) & 1))
990     setFeature(FEATURE_TSXLDTRK);
991   if (HasLeaf7 && ((EDX >> 18) & 1))
992     setFeature(FEATURE_PCONFIG);
993   if (HasLeaf7 && ((EDX >> 22) & 1) && HasAMXSave)
994     setFeature(FEATURE_AMX_BF16);
995   if (HasLeaf7 && ((EDX >> 23) & 1) && HasAVX512Save)
996     setFeature(FEATURE_AVX512FP16);
997   if (HasLeaf7 && ((EDX >> 24) & 1) && HasAMXSave)
998     setFeature(FEATURE_AMX_TILE);
999   if (HasLeaf7 && ((EDX >> 25) & 1) && HasAMXSave)
1000     setFeature(FEATURE_AMX_INT8);
1001 
1002   // EAX from subleaf 0 is the maximum subleaf supported. Some CPUs don't
1003   // return all 0s for invalid subleaves so check the limit.
1004   bool HasLeaf7Subleaf1 =
1005       HasLeaf7 && EAX >= 1 &&
1006       !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
1007   if (HasLeaf7Subleaf1 && ((EAX >> 0) & 1))
1008     setFeature(FEATURE_SHA512);
1009   if (HasLeaf7Subleaf1 && ((EAX >> 1) & 1))
1010     setFeature(FEATURE_SM3);
1011   if (HasLeaf7Subleaf1 && ((EAX >> 2) & 1))
1012     setFeature(FEATURE_SM4);
1013   if (HasLeaf7Subleaf1 && ((EAX >> 3) & 1))
1014     setFeature(FEATURE_RAOINT);
1015   if (HasLeaf7Subleaf1 && ((EAX >> 4) & 1) && HasAVXSave)
1016     setFeature(FEATURE_AVXVNNI);
1017   if (HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save)
1018     setFeature(FEATURE_AVX512BF16);
1019   if (HasLeaf7Subleaf1 && ((EAX >> 7) & 1))
1020     setFeature(FEATURE_CMPCCXADD);
1021   if (HasLeaf7Subleaf1 && ((EAX >> 21) & 1) && HasAMXSave)
1022     setFeature(FEATURE_AMX_FP16);
1023   if (HasLeaf7Subleaf1 && ((EAX >> 22) & 1))
1024     setFeature(FEATURE_HRESET);
1025   if (HasLeaf7Subleaf1 && ((EAX >> 23) & 1) && HasAVXSave)
1026     setFeature(FEATURE_AVXIFMA);
1027 
1028   if (HasLeaf7Subleaf1 && ((EDX >> 4) & 1) && HasAVXSave)
1029     setFeature(FEATURE_AVXVNNIINT8);
1030   if (HasLeaf7Subleaf1 && ((EDX >> 5) & 1) && HasAVXSave)
1031     setFeature(FEATURE_AVXNECONVERT);
1032   if (HasLeaf7Subleaf1 && ((EDX >> 8) & 1) && HasAMXSave)
1033     setFeature(FEATURE_AMX_COMPLEX);
1034   if (HasLeaf7Subleaf1 && ((EDX >> 10) & 1) && HasAVXSave)
1035     setFeature(FEATURE_AVXVNNIINT16);
1036   if (HasLeaf7Subleaf1 && ((EDX >> 14) & 1))
1037     setFeature(FEATURE_PREFETCHI);
1038   if (HasLeaf7Subleaf1 && ((EDX >> 15) & 1))
1039     setFeature(FEATURE_USERMSR);
1040   if (HasLeaf7Subleaf1 && ((EDX >> 19) & 1))
1041     setFeature(FEATURE_AVX10_1_256);
1042   if (HasLeaf7Subleaf1 && ((EDX >> 21) & 1))
1043     setFeature(FEATURE_APXF);
1044 
1045   unsigned MaxLevel;
1046   getX86CpuIDAndInfo(0, &MaxLevel, &EBX, &ECX, &EDX);
1047   bool HasLeafD = MaxLevel >= 0xd &&
1048                   !getX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX);
1049   if (HasLeafD && ((EAX >> 0) & 1) && HasAVXSave)
1050     setFeature(FEATURE_XSAVEOPT);
1051   if (HasLeafD && ((EAX >> 1) & 1) && HasAVXSave)
1052     setFeature(FEATURE_XSAVEC);
1053   if (HasLeafD && ((EAX >> 3) & 1) && HasAVXSave)
1054     setFeature(FEATURE_XSAVES);
1055 
1056   bool HasLeaf24 =
1057       MaxLevel >= 0x24 && !getX86CpuIDAndInfo(0x24, &EAX, &EBX, &ECX, &EDX);
1058   if (HasLeaf7Subleaf1 && ((EDX >> 19) & 1) && HasLeaf24 && ((EBX >> 18) & 1))
1059     setFeature(FEATURE_AVX10_1_512);
1060 
1061   unsigned MaxExtLevel;
1062   getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
1063 
1064   bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
1065                      !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
1066   if (HasExtLeaf1) {
1067     if (ECX & 1)
1068       setFeature(FEATURE_LAHF_LM);
1069     if ((ECX >> 5) & 1)
1070       setFeature(FEATURE_LZCNT);
1071     if (((ECX >> 6) & 1))
1072       setFeature(FEATURE_SSE4_A);
1073     if (((ECX >> 8) & 1))
1074       setFeature(FEATURE_PRFCHW);
1075     if (((ECX >> 11) & 1))
1076       setFeature(FEATURE_XOP);
1077     if (((ECX >> 15) & 1))
1078       setFeature(FEATURE_LWP);
1079     if (((ECX >> 16) & 1))
1080       setFeature(FEATURE_FMA4);
1081     if (((ECX >> 21) & 1))
1082       setFeature(FEATURE_TBM);
1083     if (((ECX >> 29) & 1))
1084       setFeature(FEATURE_MWAITX);
1085 
1086     if (((EDX >> 29) & 1))
1087       setFeature(FEATURE_LM);
1088   }
1089 
1090   bool HasExtLeaf8 = MaxExtLevel >= 0x80000008 &&
1091                      !getX86CpuIDAndInfo(0x80000008, &EAX, &EBX, &ECX, &EDX);
1092   if (HasExtLeaf8 && ((EBX >> 0) & 1))
1093     setFeature(FEATURE_CLZERO);
1094   if (HasExtLeaf8 && ((EBX >> 9) & 1))
1095     setFeature(FEATURE_WBNOINVD);
1096 
1097   bool HasLeaf14 = MaxLevel >= 0x14 &&
1098                    !getX86CpuIDAndInfoEx(0x14, 0x0, &EAX, &EBX, &ECX, &EDX);
1099   if (HasLeaf14 && ((EBX >> 4) & 1))
1100     setFeature(FEATURE_PTWRITE);
1101 
1102   bool HasLeaf19 =
1103       MaxLevel >= 0x19 && !getX86CpuIDAndInfo(0x19, &EAX, &EBX, &ECX, &EDX);
1104   if (HasLeaf7 && HasLeaf19 && ((EBX >> 2) & 1))
1105     setFeature(FEATURE_WIDEKL);
1106 
1107   if (hasFeature(FEATURE_LM) && hasFeature(FEATURE_SSE2)) {
1108     setFeature(FEATURE_X86_64_BASELINE);
1109     if (hasFeature(FEATURE_CMPXCHG16B) && hasFeature(FEATURE_POPCNT) &&
1110         hasFeature(FEATURE_LAHF_LM) && hasFeature(FEATURE_SSE4_2)) {
1111       setFeature(FEATURE_X86_64_V2);
1112       if (hasFeature(FEATURE_AVX2) && hasFeature(FEATURE_BMI) &&
1113           hasFeature(FEATURE_BMI2) && hasFeature(FEATURE_F16C) &&
1114           hasFeature(FEATURE_FMA) && hasFeature(FEATURE_LZCNT) &&
1115           hasFeature(FEATURE_MOVBE)) {
1116         setFeature(FEATURE_X86_64_V3);
1117         if (hasFeature(FEATURE_AVX512BW) && hasFeature(FEATURE_AVX512CD) &&
1118             hasFeature(FEATURE_AVX512DQ) && hasFeature(FEATURE_AVX512VL))
1119           setFeature(FEATURE_X86_64_V4);
1120       }
1121     }
1122   }
1123 
1124 #undef hasFeature
1125 #undef setFeature
1126 }
1127 
1128 #ifndef _WIN32
1129 __attribute__((visibility("hidden")))
1130 #endif
1131 int __cpu_indicator_init(void) CONSTRUCTOR_ATTRIBUTE;
1132 
1133 #ifndef _WIN32
1134 __attribute__((visibility("hidden")))
1135 #endif
1136 struct __processor_model {
1137   unsigned int __cpu_vendor;
1138   unsigned int __cpu_type;
1139   unsigned int __cpu_subtype;
1140   unsigned int __cpu_features[1];
1141 } __cpu_model = {0, 0, 0, {0}};
1142 
1143 #ifndef _WIN32
1144 __attribute__((visibility("hidden")))
1145 #endif
1146 unsigned __cpu_features2[(CPU_FEATURE_MAX - 1) / 32];
1147 
1148 // A constructor function that is sets __cpu_model and __cpu_features2 with
1149 // the right values.  This needs to run only once.  This constructor is
1150 // given the highest priority and it should run before constructors without
1151 // the priority set.  However, it still runs after ifunc initializers and
1152 // needs to be called explicitly there.
1153 
__cpu_indicator_init(void)1154 int CONSTRUCTOR_ATTRIBUTE __cpu_indicator_init(void) {
1155   unsigned EAX, EBX, ECX, EDX;
1156   unsigned MaxLeaf = 5;
1157   unsigned Vendor;
1158   unsigned Model, Family;
1159   unsigned Features[(CPU_FEATURE_MAX + 31) / 32] = {0};
1160   static_assert(sizeof(Features) / sizeof(Features[0]) == 4, "");
1161   static_assert(sizeof(__cpu_features2) / sizeof(__cpu_features2[0]) == 3, "");
1162 
1163   // This function needs to run just once.
1164   if (__cpu_model.__cpu_vendor)
1165     return 0;
1166 
1167   if (!isCpuIdSupported() ||
1168       getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX) || MaxLeaf < 1) {
1169     __cpu_model.__cpu_vendor = VENDOR_OTHER;
1170     return -1;
1171   }
1172 
1173   getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX);
1174   detectX86FamilyModel(EAX, &Family, &Model);
1175 
1176   // Find available features.
1177   getAvailableFeatures(ECX, EDX, MaxLeaf, &Features[0]);
1178 
1179   __cpu_model.__cpu_features[0] = Features[0];
1180   __cpu_features2[0] = Features[1];
1181   __cpu_features2[1] = Features[2];
1182   __cpu_features2[2] = Features[3];
1183 
1184   if (Vendor == SIG_INTEL) {
1185     // Get CPU type.
1186     getIntelProcessorTypeAndSubtype(Family, Model, &Features[0],
1187                                     &(__cpu_model.__cpu_type),
1188                                     &(__cpu_model.__cpu_subtype));
1189     __cpu_model.__cpu_vendor = VENDOR_INTEL;
1190   } else if (Vendor == SIG_AMD) {
1191     // Get CPU type.
1192     getAMDProcessorTypeAndSubtype(Family, Model, &Features[0],
1193                                   &(__cpu_model.__cpu_type),
1194                                   &(__cpu_model.__cpu_subtype));
1195     __cpu_model.__cpu_vendor = VENDOR_AMD;
1196   } else
1197     __cpu_model.__cpu_vendor = VENDOR_OTHER;
1198 
1199   assert(__cpu_model.__cpu_vendor < VENDOR_MAX);
1200   assert(__cpu_model.__cpu_type < CPU_TYPE_MAX);
1201   assert(__cpu_model.__cpu_subtype < CPU_SUBTYPE_MAX);
1202 
1203   return 0;
1204 }
1205 #endif // defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER)
1206