xref: /freebsd/contrib/llvm-project/compiler-rt/lib/builtins/cpu_model/x86.c (revision 95eb4b873b6a8b527c5bd78d7191975dfca38998)
1 //===-- cpu_model/x86.c - Support for __cpu_model builtin  --------*- C -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file is based on LLVM's lib/Support/Host.cpp.
10 //  It implements the operating system Host concept and builtin
11 //  __cpu_model for the compiler_rt library for x86.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "cpu_model.h"
16 
17 #if !(defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) ||          \
18       defined(_M_X64))
19 #error This file is intended only for x86-based targets
20 #endif
21 
22 #if defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER)
23 
24 #include <assert.h>
25 
26 #ifdef _MSC_VER
27 #include <intrin.h>
28 #endif
29 
30 enum VendorSignatures {
31   SIG_INTEL = 0x756e6547, // Genu
32   SIG_AMD = 0x68747541,   // Auth
33 };
34 
35 enum ProcessorVendors {
36   VENDOR_INTEL = 1,
37   VENDOR_AMD,
38   VENDOR_OTHER,
39   VENDOR_MAX
40 };
41 
42 enum ProcessorTypes {
43   INTEL_BONNELL = 1,
44   INTEL_CORE2,
45   INTEL_COREI7,
46   AMDFAM10H,
47   AMDFAM15H,
48   INTEL_SILVERMONT,
49   INTEL_KNL,
50   AMD_BTVER1,
51   AMD_BTVER2,
52   AMDFAM17H,
53   INTEL_KNM,
54   INTEL_GOLDMONT,
55   INTEL_GOLDMONT_PLUS,
56   INTEL_TREMONT,
57   AMDFAM19H,
58   ZHAOXIN_FAM7H,
59   INTEL_SIERRAFOREST,
60   INTEL_GRANDRIDGE,
61   INTEL_CLEARWATERFOREST,
62   CPU_TYPE_MAX
63 };
64 
65 enum ProcessorSubtypes {
66   INTEL_COREI7_NEHALEM = 1,
67   INTEL_COREI7_WESTMERE,
68   INTEL_COREI7_SANDYBRIDGE,
69   AMDFAM10H_BARCELONA,
70   AMDFAM10H_SHANGHAI,
71   AMDFAM10H_ISTANBUL,
72   AMDFAM15H_BDVER1,
73   AMDFAM15H_BDVER2,
74   AMDFAM15H_BDVER3,
75   AMDFAM15H_BDVER4,
76   AMDFAM17H_ZNVER1,
77   INTEL_COREI7_IVYBRIDGE,
78   INTEL_COREI7_HASWELL,
79   INTEL_COREI7_BROADWELL,
80   INTEL_COREI7_SKYLAKE,
81   INTEL_COREI7_SKYLAKE_AVX512,
82   INTEL_COREI7_CANNONLAKE,
83   INTEL_COREI7_ICELAKE_CLIENT,
84   INTEL_COREI7_ICELAKE_SERVER,
85   AMDFAM17H_ZNVER2,
86   INTEL_COREI7_CASCADELAKE,
87   INTEL_COREI7_TIGERLAKE,
88   INTEL_COREI7_COOPERLAKE,
89   INTEL_COREI7_SAPPHIRERAPIDS,
90   INTEL_COREI7_ALDERLAKE,
91   AMDFAM19H_ZNVER3,
92   INTEL_COREI7_ROCKETLAKE,
93   ZHAOXIN_FAM7H_LUJIAZUI,
94   AMDFAM19H_ZNVER4,
95   INTEL_COREI7_GRANITERAPIDS,
96   INTEL_COREI7_GRANITERAPIDS_D,
97   INTEL_COREI7_ARROWLAKE,
98   INTEL_COREI7_ARROWLAKE_S,
99   INTEL_COREI7_PANTHERLAKE,
100   CPU_SUBTYPE_MAX
101 };
102 
103 enum ProcessorFeatures {
104   FEATURE_CMOV = 0,
105   FEATURE_MMX,
106   FEATURE_POPCNT,
107   FEATURE_SSE,
108   FEATURE_SSE2,
109   FEATURE_SSE3,
110   FEATURE_SSSE3,
111   FEATURE_SSE4_1,
112   FEATURE_SSE4_2,
113   FEATURE_AVX,
114   FEATURE_AVX2,
115   FEATURE_SSE4_A,
116   FEATURE_FMA4,
117   FEATURE_XOP,
118   FEATURE_FMA,
119   FEATURE_AVX512F,
120   FEATURE_BMI,
121   FEATURE_BMI2,
122   FEATURE_AES,
123   FEATURE_PCLMUL,
124   FEATURE_AVX512VL,
125   FEATURE_AVX512BW,
126   FEATURE_AVX512DQ,
127   FEATURE_AVX512CD,
128   FEATURE_AVX512ER,
129   FEATURE_AVX512PF,
130   FEATURE_AVX512VBMI,
131   FEATURE_AVX512IFMA,
132   FEATURE_AVX5124VNNIW,
133   FEATURE_AVX5124FMAPS,
134   FEATURE_AVX512VPOPCNTDQ,
135   FEATURE_AVX512VBMI2,
136   FEATURE_GFNI,
137   FEATURE_VPCLMULQDQ,
138   FEATURE_AVX512VNNI,
139   FEATURE_AVX512BITALG,
140   FEATURE_AVX512BF16,
141   FEATURE_AVX512VP2INTERSECT,
142 
143   FEATURE_CMPXCHG16B = 46,
144   FEATURE_F16C = 49,
145   FEATURE_LAHF_LM = 54,
146   FEATURE_LM,
147   FEATURE_WP,
148   FEATURE_LZCNT,
149   FEATURE_MOVBE,
150 
151   FEATURE_AVX512FP16 = 94,
152   FEATURE_X86_64_BASELINE,
153   FEATURE_X86_64_V2,
154   FEATURE_X86_64_V3,
155   FEATURE_X86_64_V4,
156   CPU_FEATURE_MAX
157 };
158 
159 // The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max).
160 // Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID
161 // support. Consequently, for i386, the presence of CPUID is checked first
162 // via the corresponding eflags bit.
163 static bool isCpuIdSupported(void) {
164 #if defined(__GNUC__) || defined(__clang__)
165 #if defined(__i386__)
166   int __cpuid_supported;
167   __asm__("  pushfl\n"
168           "  popl   %%eax\n"
169           "  movl   %%eax,%%ecx\n"
170           "  xorl   $0x00200000,%%eax\n"
171           "  pushl  %%eax\n"
172           "  popfl\n"
173           "  pushfl\n"
174           "  popl   %%eax\n"
175           "  movl   $0,%0\n"
176           "  cmpl   %%eax,%%ecx\n"
177           "  je     1f\n"
178           "  movl   $1,%0\n"
179           "1:"
180           : "=r"(__cpuid_supported)
181           :
182           : "eax", "ecx");
183   if (!__cpuid_supported)
184     return false;
185 #endif
186   return true;
187 #endif
188   return true;
189 }
190 
191 // This code is copied from lib/Support/Host.cpp.
192 // Changes to either file should be mirrored in the other.
193 
194 /// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in
195 /// the specified arguments.  If we can't run cpuid on the host, return true.
196 static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
197                                unsigned *rECX, unsigned *rEDX) {
198 #if defined(__GNUC__) || defined(__clang__)
199 #if defined(__x86_64__)
200   // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
201   // FIXME: should we save this for Clang?
202   __asm__("movq\t%%rbx, %%rsi\n\t"
203           "cpuid\n\t"
204           "xchgq\t%%rbx, %%rsi\n\t"
205           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
206           : "a"(value));
207   return false;
208 #elif defined(__i386__)
209   __asm__("movl\t%%ebx, %%esi\n\t"
210           "cpuid\n\t"
211           "xchgl\t%%ebx, %%esi\n\t"
212           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
213           : "a"(value));
214   return false;
215 #else
216   return true;
217 #endif
218 #elif defined(_MSC_VER)
219   // The MSVC intrinsic is portable across x86 and x64.
220   int registers[4];
221   __cpuid(registers, value);
222   *rEAX = registers[0];
223   *rEBX = registers[1];
224   *rECX = registers[2];
225   *rEDX = registers[3];
226   return false;
227 #else
228   return true;
229 #endif
230 }
231 
232 /// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return
233 /// the 4 values in the specified arguments.  If we can't run cpuid on the host,
234 /// return true.
235 static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
236                                  unsigned *rEAX, unsigned *rEBX, unsigned *rECX,
237                                  unsigned *rEDX) {
238 #if defined(__GNUC__) || defined(__clang__)
239 #if defined(__x86_64__)
240   // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
241   // FIXME: should we save this for Clang?
242   __asm__("movq\t%%rbx, %%rsi\n\t"
243           "cpuid\n\t"
244           "xchgq\t%%rbx, %%rsi\n\t"
245           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
246           : "a"(value), "c"(subleaf));
247   return false;
248 #elif defined(__i386__)
249   __asm__("movl\t%%ebx, %%esi\n\t"
250           "cpuid\n\t"
251           "xchgl\t%%ebx, %%esi\n\t"
252           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
253           : "a"(value), "c"(subleaf));
254   return false;
255 #else
256   return true;
257 #endif
258 #elif defined(_MSC_VER)
259   int registers[4];
260   __cpuidex(registers, value, subleaf);
261   *rEAX = registers[0];
262   *rEBX = registers[1];
263   *rECX = registers[2];
264   *rEDX = registers[3];
265   return false;
266 #else
267   return true;
268 #endif
269 }
270 
271 // Read control register 0 (XCR0). Used to detect features such as AVX.
272 static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) {
273 #if defined(__GNUC__) || defined(__clang__)
274   // Check xgetbv; this uses a .byte sequence instead of the instruction
275   // directly because older assemblers do not include support for xgetbv and
276   // there is no easy way to conditionally compile based on the assembler used.
277   __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0));
278   return false;
279 #elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK)
280   unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
281   *rEAX = Result;
282   *rEDX = Result >> 32;
283   return false;
284 #else
285   return true;
286 #endif
287 }
288 
289 static void detectX86FamilyModel(unsigned EAX, unsigned *Family,
290                                  unsigned *Model) {
291   *Family = (EAX >> 8) & 0xf; // Bits 8 - 11
292   *Model = (EAX >> 4) & 0xf;  // Bits 4 - 7
293   if (*Family == 6 || *Family == 0xf) {
294     if (*Family == 0xf)
295       // Examine extended family ID if family ID is F.
296       *Family += (EAX >> 20) & 0xff; // Bits 20 - 27
297     // Examine extended model ID if family ID is 6 or F.
298     *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
299   }
300 }
301 
302 static const char *getIntelProcessorTypeAndSubtype(unsigned Family,
303                                                    unsigned Model,
304                                                    const unsigned *Features,
305                                                    unsigned *Type,
306                                                    unsigned *Subtype) {
307 #define testFeature(F) (Features[F / 32] & (1 << (F % 32))) != 0
308 
309   // We select CPU strings to match the code in Host.cpp, but we don't use them
310   // in compiler-rt.
311   const char *CPU = 0;
312 
313   switch (Family) {
314   case 6:
315     switch (Model) {
316     case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile
317                // processor, Intel Core 2 Quad processor, Intel Core 2 Quad
318                // mobile processor, Intel Core 2 Extreme processor, Intel
319                // Pentium Dual-Core processor, Intel Xeon processor, model
320                // 0Fh. All processors are manufactured using the 65 nm process.
321     case 0x16: // Intel Celeron processor model 16h. All processors are
322                // manufactured using the 65 nm process
323       CPU = "core2";
324       *Type = INTEL_CORE2;
325       break;
326     case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model
327                // 17h. All processors are manufactured using the 45 nm process.
328                //
329                // 45nm: Penryn , Wolfdale, Yorkfield (XE)
330     case 0x1d: // Intel Xeon processor MP. All processors are manufactured using
331                // the 45 nm process.
332       CPU = "penryn";
333       *Type = INTEL_CORE2;
334       break;
335     case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All
336                // processors are manufactured using the 45 nm process.
337     case 0x1e: // Intel(R) Core(TM) i7 CPU         870  @ 2.93GHz.
338                // As found in a Summer 2010 model iMac.
339     case 0x1f:
340     case 0x2e: // Nehalem EX
341       CPU = "nehalem";
342       *Type = INTEL_COREI7;
343       *Subtype = INTEL_COREI7_NEHALEM;
344       break;
345     case 0x25: // Intel Core i7, laptop version.
346     case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All
347                // processors are manufactured using the 32 nm process.
348     case 0x2f: // Westmere EX
349       CPU = "westmere";
350       *Type = INTEL_COREI7;
351       *Subtype = INTEL_COREI7_WESTMERE;
352       break;
353     case 0x2a: // Intel Core i7 processor. All processors are manufactured
354                // using the 32 nm process.
355     case 0x2d:
356       CPU = "sandybridge";
357       *Type = INTEL_COREI7;
358       *Subtype = INTEL_COREI7_SANDYBRIDGE;
359       break;
360     case 0x3a:
361     case 0x3e: // Ivy Bridge EP
362       CPU = "ivybridge";
363       *Type = INTEL_COREI7;
364       *Subtype = INTEL_COREI7_IVYBRIDGE;
365       break;
366 
367     // Haswell:
368     case 0x3c:
369     case 0x3f:
370     case 0x45:
371     case 0x46:
372       CPU = "haswell";
373       *Type = INTEL_COREI7;
374       *Subtype = INTEL_COREI7_HASWELL;
375       break;
376 
377     // Broadwell:
378     case 0x3d:
379     case 0x47:
380     case 0x4f:
381     case 0x56:
382       CPU = "broadwell";
383       *Type = INTEL_COREI7;
384       *Subtype = INTEL_COREI7_BROADWELL;
385       break;
386 
387     // Skylake:
388     case 0x4e: // Skylake mobile
389     case 0x5e: // Skylake desktop
390     case 0x8e: // Kaby Lake mobile
391     case 0x9e: // Kaby Lake desktop
392     case 0xa5: // Comet Lake-H/S
393     case 0xa6: // Comet Lake-U
394       CPU = "skylake";
395       *Type = INTEL_COREI7;
396       *Subtype = INTEL_COREI7_SKYLAKE;
397       break;
398 
399     // Rocketlake:
400     case 0xa7:
401       CPU = "rocketlake";
402       *Type = INTEL_COREI7;
403       *Subtype = INTEL_COREI7_ROCKETLAKE;
404       break;
405 
406     // Skylake Xeon:
407     case 0x55:
408       *Type = INTEL_COREI7;
409       if (testFeature(FEATURE_AVX512BF16)) {
410         CPU = "cooperlake";
411         *Subtype = INTEL_COREI7_COOPERLAKE;
412       } else if (testFeature(FEATURE_AVX512VNNI)) {
413         CPU = "cascadelake";
414         *Subtype = INTEL_COREI7_CASCADELAKE;
415       } else {
416         CPU = "skylake-avx512";
417         *Subtype = INTEL_COREI7_SKYLAKE_AVX512;
418       }
419       break;
420 
421     // Cannonlake:
422     case 0x66:
423       CPU = "cannonlake";
424       *Type = INTEL_COREI7;
425       *Subtype = INTEL_COREI7_CANNONLAKE;
426       break;
427 
428     // Icelake:
429     case 0x7d:
430     case 0x7e:
431       CPU = "icelake-client";
432       *Type = INTEL_COREI7;
433       *Subtype = INTEL_COREI7_ICELAKE_CLIENT;
434       break;
435 
436     // Tigerlake:
437     case 0x8c:
438     case 0x8d:
439       CPU = "tigerlake";
440       *Type = INTEL_COREI7;
441       *Subtype = INTEL_COREI7_TIGERLAKE;
442       break;
443 
444     // Alderlake:
445     case 0x97:
446     case 0x9a:
447     // Raptorlake:
448     case 0xb7:
449     case 0xba:
450     case 0xbf:
451     // Meteorlake:
452     case 0xaa:
453     case 0xac:
454     // Gracemont:
455     case 0xbe:
456       CPU = "alderlake";
457       *Type = INTEL_COREI7;
458       *Subtype = INTEL_COREI7_ALDERLAKE;
459       break;
460 
461     // Arrowlake:
462     case 0xc5:
463       CPU = "arrowlake";
464       *Type = INTEL_COREI7;
465       *Subtype = INTEL_COREI7_ARROWLAKE;
466       break;
467 
468     // Arrowlake S:
469     case 0xc6:
470     // Lunarlake:
471     case 0xbd:
472       CPU = "arrowlake-s";
473       *Type = INTEL_COREI7;
474       *Subtype = INTEL_COREI7_ARROWLAKE_S;
475       break;
476 
477     // Pantherlake:
478     case 0xcc:
479       CPU = "pantherlake";
480       *Type = INTEL_COREI7;
481       *Subtype = INTEL_COREI7_PANTHERLAKE;
482       break;
483 
484     // Icelake Xeon:
485     case 0x6a:
486     case 0x6c:
487       CPU = "icelake-server";
488       *Type = INTEL_COREI7;
489       *Subtype = INTEL_COREI7_ICELAKE_SERVER;
490       break;
491 
492     // Emerald Rapids:
493     case 0xcf:
494     // Sapphire Rapids:
495     case 0x8f:
496       CPU = "sapphirerapids";
497       *Type = INTEL_COREI7;
498       *Subtype = INTEL_COREI7_SAPPHIRERAPIDS;
499       break;
500 
501     // Granite Rapids:
502     case 0xad:
503       CPU = "graniterapids";
504       *Type = INTEL_COREI7;
505       *Subtype = INTEL_COREI7_GRANITERAPIDS;
506       break;
507 
508     // Granite Rapids D:
509     case 0xae:
510       CPU = "graniterapids-d";
511       *Type = INTEL_COREI7;
512       *Subtype = INTEL_COREI7_GRANITERAPIDS_D;
513       break;
514 
515     case 0x1c: // Most 45 nm Intel Atom processors
516     case 0x26: // 45 nm Atom Lincroft
517     case 0x27: // 32 nm Atom Medfield
518     case 0x35: // 32 nm Atom Midview
519     case 0x36: // 32 nm Atom Midview
520       CPU = "bonnell";
521       *Type = INTEL_BONNELL;
522       break;
523 
524     // Atom Silvermont codes from the Intel software optimization guide.
525     case 0x37:
526     case 0x4a:
527     case 0x4d:
528     case 0x5a:
529     case 0x5d:
530     case 0x4c: // really airmont
531       CPU = "silvermont";
532       *Type = INTEL_SILVERMONT;
533       break;
534     // Goldmont:
535     case 0x5c: // Apollo Lake
536     case 0x5f: // Denverton
537       CPU = "goldmont";
538       *Type = INTEL_GOLDMONT;
539       break; // "goldmont"
540     case 0x7a:
541       CPU = "goldmont-plus";
542       *Type = INTEL_GOLDMONT_PLUS;
543       break;
544     case 0x86:
545     case 0x8a: // Lakefield
546     case 0x96: // Elkhart Lake
547     case 0x9c: // Jasper Lake
548       CPU = "tremont";
549       *Type = INTEL_TREMONT;
550       break;
551 
552     // Sierraforest:
553     case 0xaf:
554       CPU = "sierraforest";
555       *Type = INTEL_SIERRAFOREST;
556       break;
557 
558     // Grandridge:
559     case 0xb6:
560       CPU = "grandridge";
561       *Type = INTEL_GRANDRIDGE;
562       break;
563 
564     // Clearwaterforest:
565     case 0xdd:
566       CPU = "clearwaterforest";
567       *Type = INTEL_COREI7;
568       *Subtype = INTEL_CLEARWATERFOREST;
569       break;
570 
571     case 0x57:
572       CPU = "knl";
573       *Type = INTEL_KNL;
574       break;
575 
576     case 0x85:
577       CPU = "knm";
578       *Type = INTEL_KNM;
579       break;
580 
581     default: // Unknown family 6 CPU.
582       break;
583     }
584     break;
585   default:
586     break; // Unknown.
587   }
588 
589   return CPU;
590 }
591 
592 static const char *getAMDProcessorTypeAndSubtype(unsigned Family,
593                                                  unsigned Model,
594                                                  const unsigned *Features,
595                                                  unsigned *Type,
596                                                  unsigned *Subtype) {
597   // We select CPU strings to match the code in Host.cpp, but we don't use them
598   // in compiler-rt.
599   const char *CPU = 0;
600 
601   switch (Family) {
602   case 16:
603     CPU = "amdfam10";
604     *Type = AMDFAM10H;
605     switch (Model) {
606     case 2:
607       *Subtype = AMDFAM10H_BARCELONA;
608       break;
609     case 4:
610       *Subtype = AMDFAM10H_SHANGHAI;
611       break;
612     case 8:
613       *Subtype = AMDFAM10H_ISTANBUL;
614       break;
615     }
616     break;
617   case 20:
618     CPU = "btver1";
619     *Type = AMD_BTVER1;
620     break;
621   case 21:
622     CPU = "bdver1";
623     *Type = AMDFAM15H;
624     if (Model >= 0x60 && Model <= 0x7f) {
625       CPU = "bdver4";
626       *Subtype = AMDFAM15H_BDVER4;
627       break; // 60h-7Fh: Excavator
628     }
629     if (Model >= 0x30 && Model <= 0x3f) {
630       CPU = "bdver3";
631       *Subtype = AMDFAM15H_BDVER3;
632       break; // 30h-3Fh: Steamroller
633     }
634     if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) {
635       CPU = "bdver2";
636       *Subtype = AMDFAM15H_BDVER2;
637       break; // 02h, 10h-1Fh: Piledriver
638     }
639     if (Model <= 0x0f) {
640       *Subtype = AMDFAM15H_BDVER1;
641       break; // 00h-0Fh: Bulldozer
642     }
643     break;
644   case 22:
645     CPU = "btver2";
646     *Type = AMD_BTVER2;
647     break;
648   case 23:
649     CPU = "znver1";
650     *Type = AMDFAM17H;
651     if ((Model >= 0x30 && Model <= 0x3f) || (Model == 0x47) ||
652         (Model >= 0x60 && Model <= 0x67) || (Model >= 0x68 && Model <= 0x6f) ||
653         (Model >= 0x70 && Model <= 0x7f) || (Model >= 0x84 && Model <= 0x87) ||
654         (Model >= 0x90 && Model <= 0x97) || (Model >= 0x98 && Model <= 0x9f) ||
655         (Model >= 0xa0 && Model <= 0xaf)) {
656       // Family 17h Models 30h-3Fh (Starship) Zen 2
657       // Family 17h Models 47h (Cardinal) Zen 2
658       // Family 17h Models 60h-67h (Renoir) Zen 2
659       // Family 17h Models 68h-6Fh (Lucienne) Zen 2
660       // Family 17h Models 70h-7Fh (Matisse) Zen 2
661       // Family 17h Models 84h-87h (ProjectX) Zen 2
662       // Family 17h Models 90h-97h (VanGogh) Zen 2
663       // Family 17h Models 98h-9Fh (Mero) Zen 2
664       // Family 17h Models A0h-AFh (Mendocino) Zen 2
665       CPU = "znver2";
666       *Subtype = AMDFAM17H_ZNVER2;
667       break;
668     }
669     if ((Model >= 0x10 && Model <= 0x1f) || (Model >= 0x20 && Model <= 0x2f)) {
670       // Family 17h Models 10h-1Fh (Raven1) Zen
671       // Family 17h Models 10h-1Fh (Picasso) Zen+
672       // Family 17h Models 20h-2Fh (Raven2 x86) Zen
673       *Subtype = AMDFAM17H_ZNVER1;
674       break;
675     }
676     break;
677   case 25:
678     CPU = "znver3";
679     *Type = AMDFAM19H;
680     if ((Model <= 0x0f) || (Model >= 0x20 && Model <= 0x2f) ||
681         (Model >= 0x30 && Model <= 0x3f) || (Model >= 0x40 && Model <= 0x4f) ||
682         (Model >= 0x50 && Model <= 0x5f)) {
683       // Family 19h Models 00h-0Fh (Genesis, Chagall) Zen 3
684       // Family 19h Models 20h-2Fh (Vermeer) Zen 3
685       // Family 19h Models 30h-3Fh (Badami) Zen 3
686       // Family 19h Models 40h-4Fh (Rembrandt) Zen 3+
687       // Family 19h Models 50h-5Fh (Cezanne) Zen 3
688       *Subtype = AMDFAM19H_ZNVER3;
689       break;
690     }
691     if ((Model >= 0x10 && Model <= 0x1f) || (Model >= 0x60 && Model <= 0x6f) ||
692         (Model >= 0x70 && Model <= 0x77) || (Model >= 0x78 && Model <= 0x7f) ||
693         (Model >= 0xa0 && Model <= 0xaf)) {
694       // Family 19h Models 10h-1Fh (Stones; Storm Peak) Zen 4
695       // Family 19h Models 60h-6Fh (Raphael) Zen 4
696       // Family 19h Models 70h-77h (Phoenix, Hawkpoint1) Zen 4
697       // Family 19h Models 78h-7Fh (Phoenix 2, Hawkpoint2) Zen 4
698       // Family 19h Models A0h-AFh (Stones-Dense) Zen 4
699       CPU = "znver4";
700       *Subtype = AMDFAM19H_ZNVER4;
701       break; //  "znver4"
702     }
703     break; // family 19h
704   default:
705     break; // Unknown AMD CPU.
706   }
707 
708   return CPU;
709 }
710 
711 static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
712                                  unsigned *Features) {
713   unsigned EAX = 0, EBX = 0;
714 
715 #define hasFeature(F) ((Features[F / 32] >> (F % 32)) & 1)
716 #define setFeature(F) Features[F / 32] |= 1U << (F % 32)
717 
718   if ((EDX >> 15) & 1)
719     setFeature(FEATURE_CMOV);
720   if ((EDX >> 23) & 1)
721     setFeature(FEATURE_MMX);
722   if ((EDX >> 25) & 1)
723     setFeature(FEATURE_SSE);
724   if ((EDX >> 26) & 1)
725     setFeature(FEATURE_SSE2);
726 
727   if ((ECX >> 0) & 1)
728     setFeature(FEATURE_SSE3);
729   if ((ECX >> 1) & 1)
730     setFeature(FEATURE_PCLMUL);
731   if ((ECX >> 9) & 1)
732     setFeature(FEATURE_SSSE3);
733   if ((ECX >> 12) & 1)
734     setFeature(FEATURE_FMA);
735   if ((ECX >> 13) & 1)
736     setFeature(FEATURE_CMPXCHG16B);
737   if ((ECX >> 19) & 1)
738     setFeature(FEATURE_SSE4_1);
739   if ((ECX >> 20) & 1)
740     setFeature(FEATURE_SSE4_2);
741   if ((ECX >> 22) & 1)
742     setFeature(FEATURE_MOVBE);
743   if ((ECX >> 23) & 1)
744     setFeature(FEATURE_POPCNT);
745   if ((ECX >> 25) & 1)
746     setFeature(FEATURE_AES);
747   if ((ECX >> 29) & 1)
748     setFeature(FEATURE_F16C);
749 
750   // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
751   // indicates that the AVX registers will be saved and restored on context
752   // switch, then we have full AVX support.
753   const unsigned AVXBits = (1 << 27) | (1 << 28);
754   bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) &&
755                 ((EAX & 0x6) == 0x6);
756 #if defined(__APPLE__)
757   // Darwin lazily saves the AVX512 context on first use: trust that the OS will
758   // save the AVX512 context if we use AVX512 instructions, even the bit is not
759   // set right now.
760   bool HasAVX512Save = true;
761 #else
762   // AVX512 requires additional context to be saved by the OS.
763   bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0);
764 #endif
765 
766   if (HasAVX)
767     setFeature(FEATURE_AVX);
768 
769   bool HasLeaf7 =
770       MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
771 
772   if (HasLeaf7 && ((EBX >> 3) & 1))
773     setFeature(FEATURE_BMI);
774   if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX)
775     setFeature(FEATURE_AVX2);
776   if (HasLeaf7 && ((EBX >> 8) & 1))
777     setFeature(FEATURE_BMI2);
778   if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save)
779     setFeature(FEATURE_AVX512F);
780   if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save)
781     setFeature(FEATURE_AVX512DQ);
782   if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save)
783     setFeature(FEATURE_AVX512IFMA);
784   if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save)
785     setFeature(FEATURE_AVX512PF);
786   if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save)
787     setFeature(FEATURE_AVX512ER);
788   if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save)
789     setFeature(FEATURE_AVX512CD);
790   if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save)
791     setFeature(FEATURE_AVX512BW);
792   if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save)
793     setFeature(FEATURE_AVX512VL);
794 
795   if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save)
796     setFeature(FEATURE_AVX512VBMI);
797   if (HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save)
798     setFeature(FEATURE_AVX512VBMI2);
799   if (HasLeaf7 && ((ECX >> 8) & 1))
800     setFeature(FEATURE_GFNI);
801   if (HasLeaf7 && ((ECX >> 10) & 1) && HasAVX)
802     setFeature(FEATURE_VPCLMULQDQ);
803   if (HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save)
804     setFeature(FEATURE_AVX512VNNI);
805   if (HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save)
806     setFeature(FEATURE_AVX512BITALG);
807   if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save)
808     setFeature(FEATURE_AVX512VPOPCNTDQ);
809 
810   if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save)
811     setFeature(FEATURE_AVX5124VNNIW);
812   if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save)
813     setFeature(FEATURE_AVX5124FMAPS);
814   if (HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save)
815     setFeature(FEATURE_AVX512VP2INTERSECT);
816   if (HasLeaf7 && ((EDX >> 23) & 1) && HasAVX512Save)
817     setFeature(FEATURE_AVX512FP16);
818 
819   // EAX from subleaf 0 is the maximum subleaf supported. Some CPUs don't
820   // return all 0s for invalid subleaves so check the limit.
821   bool HasLeaf7Subleaf1 =
822       HasLeaf7 && EAX >= 1 &&
823       !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
824   if (HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save)
825     setFeature(FEATURE_AVX512BF16);
826 
827   unsigned MaxExtLevel;
828   getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
829 
830   bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
831                      !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
832   if (HasExtLeaf1) {
833     if (ECX & 1)
834       setFeature(FEATURE_LAHF_LM);
835     if ((ECX >> 5) & 1)
836       setFeature(FEATURE_LZCNT);
837     if (((ECX >> 6) & 1))
838       setFeature(FEATURE_SSE4_A);
839     if (((ECX >> 11) & 1))
840       setFeature(FEATURE_XOP);
841     if (((ECX >> 16) & 1))
842       setFeature(FEATURE_FMA4);
843     if (((EDX >> 29) & 1))
844       setFeature(FEATURE_LM);
845   }
846 
847   if (hasFeature(FEATURE_LM) && hasFeature(FEATURE_SSE2)) {
848     setFeature(FEATURE_X86_64_BASELINE);
849     if (hasFeature(FEATURE_CMPXCHG16B) && hasFeature(FEATURE_POPCNT) &&
850         hasFeature(FEATURE_LAHF_LM) && hasFeature(FEATURE_SSE4_2)) {
851       setFeature(FEATURE_X86_64_V2);
852       if (hasFeature(FEATURE_AVX2) && hasFeature(FEATURE_BMI) &&
853           hasFeature(FEATURE_BMI2) && hasFeature(FEATURE_F16C) &&
854           hasFeature(FEATURE_FMA) && hasFeature(FEATURE_LZCNT) &&
855           hasFeature(FEATURE_MOVBE)) {
856         setFeature(FEATURE_X86_64_V3);
857         if (hasFeature(FEATURE_AVX512BW) && hasFeature(FEATURE_AVX512CD) &&
858             hasFeature(FEATURE_AVX512DQ) && hasFeature(FEATURE_AVX512VL))
859           setFeature(FEATURE_X86_64_V4);
860       }
861     }
862   }
863 
864 #undef hasFeature
865 #undef setFeature
866 }
867 
868 #ifndef _WIN32
869 __attribute__((visibility("hidden")))
870 #endif
871 int __cpu_indicator_init(void) CONSTRUCTOR_ATTRIBUTE;
872 
873 #ifndef _WIN32
874 __attribute__((visibility("hidden")))
875 #endif
876 struct __processor_model {
877   unsigned int __cpu_vendor;
878   unsigned int __cpu_type;
879   unsigned int __cpu_subtype;
880   unsigned int __cpu_features[1];
881 } __cpu_model = {0, 0, 0, {0}};
882 
883 #ifndef _WIN32
884 __attribute__((visibility("hidden")))
885 #endif
886 unsigned __cpu_features2[(CPU_FEATURE_MAX - 1) / 32];
887 
888 // A constructor function that is sets __cpu_model and __cpu_features2 with
889 // the right values.  This needs to run only once.  This constructor is
890 // given the highest priority and it should run before constructors without
891 // the priority set.  However, it still runs after ifunc initializers and
892 // needs to be called explicitly there.
893 
894 int CONSTRUCTOR_ATTRIBUTE __cpu_indicator_init(void) {
895   unsigned EAX, EBX, ECX, EDX;
896   unsigned MaxLeaf = 5;
897   unsigned Vendor;
898   unsigned Model, Family;
899   unsigned Features[(CPU_FEATURE_MAX + 31) / 32] = {0};
900   static_assert(sizeof(Features) / sizeof(Features[0]) == 4, "");
901   static_assert(sizeof(__cpu_features2) / sizeof(__cpu_features2[0]) == 3, "");
902 
903   // This function needs to run just once.
904   if (__cpu_model.__cpu_vendor)
905     return 0;
906 
907   if (!isCpuIdSupported() ||
908       getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX) || MaxLeaf < 1) {
909     __cpu_model.__cpu_vendor = VENDOR_OTHER;
910     return -1;
911   }
912 
913   getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX);
914   detectX86FamilyModel(EAX, &Family, &Model);
915 
916   // Find available features.
917   getAvailableFeatures(ECX, EDX, MaxLeaf, &Features[0]);
918 
919   __cpu_model.__cpu_features[0] = Features[0];
920   __cpu_features2[0] = Features[1];
921   __cpu_features2[1] = Features[2];
922   __cpu_features2[2] = Features[3];
923 
924   if (Vendor == SIG_INTEL) {
925     // Get CPU type.
926     getIntelProcessorTypeAndSubtype(Family, Model, &Features[0],
927                                     &(__cpu_model.__cpu_type),
928                                     &(__cpu_model.__cpu_subtype));
929     __cpu_model.__cpu_vendor = VENDOR_INTEL;
930   } else if (Vendor == SIG_AMD) {
931     // Get CPU type.
932     getAMDProcessorTypeAndSubtype(Family, Model, &Features[0],
933                                   &(__cpu_model.__cpu_type),
934                                   &(__cpu_model.__cpu_subtype));
935     __cpu_model.__cpu_vendor = VENDOR_AMD;
936   } else
937     __cpu_model.__cpu_vendor = VENDOR_OTHER;
938 
939   assert(__cpu_model.__cpu_vendor < VENDOR_MAX);
940   assert(__cpu_model.__cpu_type < CPU_TYPE_MAX);
941   assert(__cpu_model.__cpu_subtype < CPU_SUBTYPE_MAX);
942 
943   return 0;
944 }
945 #endif // defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER)
946