xref: /freebsd/contrib/llvm-project/compiler-rt/lib/builtins/cpu_model/x86.c (revision 1db9f3b21e39176dd5b67cf8ac378633b172463e)
1 //===-- cpu_model/x86.c - Support for __cpu_model builtin  --------*- C -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file is based on LLVM's lib/Support/Host.cpp.
10 //  It implements the operating system Host concept and builtin
11 //  __cpu_model for the compiler_rt library for x86.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "cpu_model.h"
16 
17 #if !(defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) ||          \
18       defined(_M_X64))
19 #error This file is intended only for x86-based targets
20 #endif
21 
22 #if defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER)
23 
24 #include <assert.h>
25 
26 #ifdef _MSC_VER
27 #include <intrin.h>
28 #endif
29 
30 enum VendorSignatures {
31   SIG_INTEL = 0x756e6547, // Genu
32   SIG_AMD = 0x68747541,   // Auth
33 };
34 
35 enum ProcessorVendors {
36   VENDOR_INTEL = 1,
37   VENDOR_AMD,
38   VENDOR_OTHER,
39   VENDOR_MAX
40 };
41 
42 enum ProcessorTypes {
43   INTEL_BONNELL = 1,
44   INTEL_CORE2,
45   INTEL_COREI7,
46   AMDFAM10H,
47   AMDFAM15H,
48   INTEL_SILVERMONT,
49   INTEL_KNL,
50   AMD_BTVER1,
51   AMD_BTVER2,
52   AMDFAM17H,
53   INTEL_KNM,
54   INTEL_GOLDMONT,
55   INTEL_GOLDMONT_PLUS,
56   INTEL_TREMONT,
57   AMDFAM19H,
58   ZHAOXIN_FAM7H,
59   INTEL_SIERRAFOREST,
60   INTEL_GRANDRIDGE,
61   INTEL_CLEARWATERFOREST,
62   CPU_TYPE_MAX
63 };
64 
65 enum ProcessorSubtypes {
66   INTEL_COREI7_NEHALEM = 1,
67   INTEL_COREI7_WESTMERE,
68   INTEL_COREI7_SANDYBRIDGE,
69   AMDFAM10H_BARCELONA,
70   AMDFAM10H_SHANGHAI,
71   AMDFAM10H_ISTANBUL,
72   AMDFAM15H_BDVER1,
73   AMDFAM15H_BDVER2,
74   AMDFAM15H_BDVER3,
75   AMDFAM15H_BDVER4,
76   AMDFAM17H_ZNVER1,
77   INTEL_COREI7_IVYBRIDGE,
78   INTEL_COREI7_HASWELL,
79   INTEL_COREI7_BROADWELL,
80   INTEL_COREI7_SKYLAKE,
81   INTEL_COREI7_SKYLAKE_AVX512,
82   INTEL_COREI7_CANNONLAKE,
83   INTEL_COREI7_ICELAKE_CLIENT,
84   INTEL_COREI7_ICELAKE_SERVER,
85   AMDFAM17H_ZNVER2,
86   INTEL_COREI7_CASCADELAKE,
87   INTEL_COREI7_TIGERLAKE,
88   INTEL_COREI7_COOPERLAKE,
89   INTEL_COREI7_SAPPHIRERAPIDS,
90   INTEL_COREI7_ALDERLAKE,
91   AMDFAM19H_ZNVER3,
92   INTEL_COREI7_ROCKETLAKE,
93   ZHAOXIN_FAM7H_LUJIAZUI,
94   AMDFAM19H_ZNVER4,
95   INTEL_COREI7_GRANITERAPIDS,
96   INTEL_COREI7_GRANITERAPIDS_D,
97   INTEL_COREI7_ARROWLAKE,
98   INTEL_COREI7_ARROWLAKE_S,
99   INTEL_COREI7_PANTHERLAKE,
100   CPU_SUBTYPE_MAX
101 };
102 
103 enum ProcessorFeatures {
104   FEATURE_CMOV = 0,
105   FEATURE_MMX,
106   FEATURE_POPCNT,
107   FEATURE_SSE,
108   FEATURE_SSE2,
109   FEATURE_SSE3,
110   FEATURE_SSSE3,
111   FEATURE_SSE4_1,
112   FEATURE_SSE4_2,
113   FEATURE_AVX,
114   FEATURE_AVX2,
115   FEATURE_SSE4_A,
116   FEATURE_FMA4,
117   FEATURE_XOP,
118   FEATURE_FMA,
119   FEATURE_AVX512F,
120   FEATURE_BMI,
121   FEATURE_BMI2,
122   FEATURE_AES,
123   FEATURE_PCLMUL,
124   FEATURE_AVX512VL,
125   FEATURE_AVX512BW,
126   FEATURE_AVX512DQ,
127   FEATURE_AVX512CD,
128   FEATURE_AVX512ER,
129   FEATURE_AVX512PF,
130   FEATURE_AVX512VBMI,
131   FEATURE_AVX512IFMA,
132   FEATURE_AVX5124VNNIW,
133   FEATURE_AVX5124FMAPS,
134   FEATURE_AVX512VPOPCNTDQ,
135   FEATURE_AVX512VBMI2,
136   FEATURE_GFNI,
137   FEATURE_VPCLMULQDQ,
138   FEATURE_AVX512VNNI,
139   FEATURE_AVX512BITALG,
140   FEATURE_AVX512BF16,
141   FEATURE_AVX512VP2INTERSECT,
142 
143   FEATURE_CMPXCHG16B = 46,
144   FEATURE_F16C = 49,
145   FEATURE_LAHF_LM = 54,
146   FEATURE_LM,
147   FEATURE_WP,
148   FEATURE_LZCNT,
149   FEATURE_MOVBE,
150 
151   FEATURE_X86_64_BASELINE = 95,
152   FEATURE_X86_64_V2,
153   FEATURE_X86_64_V3,
154   FEATURE_X86_64_V4,
155   CPU_FEATURE_MAX
156 };
157 
158 // The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max).
159 // Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID
160 // support. Consequently, for i386, the presence of CPUID is checked first
161 // via the corresponding eflags bit.
162 static bool isCpuIdSupported(void) {
163 #if defined(__GNUC__) || defined(__clang__)
164 #if defined(__i386__)
165   int __cpuid_supported;
166   __asm__("  pushfl\n"
167           "  popl   %%eax\n"
168           "  movl   %%eax,%%ecx\n"
169           "  xorl   $0x00200000,%%eax\n"
170           "  pushl  %%eax\n"
171           "  popfl\n"
172           "  pushfl\n"
173           "  popl   %%eax\n"
174           "  movl   $0,%0\n"
175           "  cmpl   %%eax,%%ecx\n"
176           "  je     1f\n"
177           "  movl   $1,%0\n"
178           "1:"
179           : "=r"(__cpuid_supported)
180           :
181           : "eax", "ecx");
182   if (!__cpuid_supported)
183     return false;
184 #endif
185   return true;
186 #endif
187   return true;
188 }
189 
190 // This code is copied from lib/Support/Host.cpp.
191 // Changes to either file should be mirrored in the other.
192 
193 /// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in
194 /// the specified arguments.  If we can't run cpuid on the host, return true.
195 static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
196                                unsigned *rECX, unsigned *rEDX) {
197 #if defined(__GNUC__) || defined(__clang__)
198 #if defined(__x86_64__)
199   // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
200   // FIXME: should we save this for Clang?
201   __asm__("movq\t%%rbx, %%rsi\n\t"
202           "cpuid\n\t"
203           "xchgq\t%%rbx, %%rsi\n\t"
204           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
205           : "a"(value));
206   return false;
207 #elif defined(__i386__)
208   __asm__("movl\t%%ebx, %%esi\n\t"
209           "cpuid\n\t"
210           "xchgl\t%%ebx, %%esi\n\t"
211           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
212           : "a"(value));
213   return false;
214 #else
215   return true;
216 #endif
217 #elif defined(_MSC_VER)
218   // The MSVC intrinsic is portable across x86 and x64.
219   int registers[4];
220   __cpuid(registers, value);
221   *rEAX = registers[0];
222   *rEBX = registers[1];
223   *rECX = registers[2];
224   *rEDX = registers[3];
225   return false;
226 #else
227   return true;
228 #endif
229 }
230 
231 /// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return
232 /// the 4 values in the specified arguments.  If we can't run cpuid on the host,
233 /// return true.
234 static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
235                                  unsigned *rEAX, unsigned *rEBX, unsigned *rECX,
236                                  unsigned *rEDX) {
237 #if defined(__GNUC__) || defined(__clang__)
238 #if defined(__x86_64__)
239   // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
240   // FIXME: should we save this for Clang?
241   __asm__("movq\t%%rbx, %%rsi\n\t"
242           "cpuid\n\t"
243           "xchgq\t%%rbx, %%rsi\n\t"
244           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
245           : "a"(value), "c"(subleaf));
246   return false;
247 #elif defined(__i386__)
248   __asm__("movl\t%%ebx, %%esi\n\t"
249           "cpuid\n\t"
250           "xchgl\t%%ebx, %%esi\n\t"
251           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
252           : "a"(value), "c"(subleaf));
253   return false;
254 #else
255   return true;
256 #endif
257 #elif defined(_MSC_VER)
258   int registers[4];
259   __cpuidex(registers, value, subleaf);
260   *rEAX = registers[0];
261   *rEBX = registers[1];
262   *rECX = registers[2];
263   *rEDX = registers[3];
264   return false;
265 #else
266   return true;
267 #endif
268 }
269 
270 // Read control register 0 (XCR0). Used to detect features such as AVX.
271 static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) {
272 #if defined(__GNUC__) || defined(__clang__)
273   // Check xgetbv; this uses a .byte sequence instead of the instruction
274   // directly because older assemblers do not include support for xgetbv and
275   // there is no easy way to conditionally compile based on the assembler used.
276   __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0));
277   return false;
278 #elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK)
279   unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
280   *rEAX = Result;
281   *rEDX = Result >> 32;
282   return false;
283 #else
284   return true;
285 #endif
286 }
287 
288 static void detectX86FamilyModel(unsigned EAX, unsigned *Family,
289                                  unsigned *Model) {
290   *Family = (EAX >> 8) & 0xf; // Bits 8 - 11
291   *Model = (EAX >> 4) & 0xf;  // Bits 4 - 7
292   if (*Family == 6 || *Family == 0xf) {
293     if (*Family == 0xf)
294       // Examine extended family ID if family ID is F.
295       *Family += (EAX >> 20) & 0xff; // Bits 20 - 27
296     // Examine extended model ID if family ID is 6 or F.
297     *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
298   }
299 }
300 
301 static const char *getIntelProcessorTypeAndSubtype(unsigned Family,
302                                                    unsigned Model,
303                                                    const unsigned *Features,
304                                                    unsigned *Type,
305                                                    unsigned *Subtype) {
306 #define testFeature(F) (Features[F / 32] & (1 << (F % 32))) != 0
307 
308   // We select CPU strings to match the code in Host.cpp, but we don't use them
309   // in compiler-rt.
310   const char *CPU = 0;
311 
312   switch (Family) {
313   case 6:
314     switch (Model) {
315     case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile
316                // processor, Intel Core 2 Quad processor, Intel Core 2 Quad
317                // mobile processor, Intel Core 2 Extreme processor, Intel
318                // Pentium Dual-Core processor, Intel Xeon processor, model
319                // 0Fh. All processors are manufactured using the 65 nm process.
320     case 0x16: // Intel Celeron processor model 16h. All processors are
321                // manufactured using the 65 nm process
322       CPU = "core2";
323       *Type = INTEL_CORE2;
324       break;
325     case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model
326                // 17h. All processors are manufactured using the 45 nm process.
327                //
328                // 45nm: Penryn , Wolfdale, Yorkfield (XE)
329     case 0x1d: // Intel Xeon processor MP. All processors are manufactured using
330                // the 45 nm process.
331       CPU = "penryn";
332       *Type = INTEL_CORE2;
333       break;
334     case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All
335                // processors are manufactured using the 45 nm process.
336     case 0x1e: // Intel(R) Core(TM) i7 CPU         870  @ 2.93GHz.
337                // As found in a Summer 2010 model iMac.
338     case 0x1f:
339     case 0x2e: // Nehalem EX
340       CPU = "nehalem";
341       *Type = INTEL_COREI7;
342       *Subtype = INTEL_COREI7_NEHALEM;
343       break;
344     case 0x25: // Intel Core i7, laptop version.
345     case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All
346                // processors are manufactured using the 32 nm process.
347     case 0x2f: // Westmere EX
348       CPU = "westmere";
349       *Type = INTEL_COREI7;
350       *Subtype = INTEL_COREI7_WESTMERE;
351       break;
352     case 0x2a: // Intel Core i7 processor. All processors are manufactured
353                // using the 32 nm process.
354     case 0x2d:
355       CPU = "sandybridge";
356       *Type = INTEL_COREI7;
357       *Subtype = INTEL_COREI7_SANDYBRIDGE;
358       break;
359     case 0x3a:
360     case 0x3e: // Ivy Bridge EP
361       CPU = "ivybridge";
362       *Type = INTEL_COREI7;
363       *Subtype = INTEL_COREI7_IVYBRIDGE;
364       break;
365 
366     // Haswell:
367     case 0x3c:
368     case 0x3f:
369     case 0x45:
370     case 0x46:
371       CPU = "haswell";
372       *Type = INTEL_COREI7;
373       *Subtype = INTEL_COREI7_HASWELL;
374       break;
375 
376     // Broadwell:
377     case 0x3d:
378     case 0x47:
379     case 0x4f:
380     case 0x56:
381       CPU = "broadwell";
382       *Type = INTEL_COREI7;
383       *Subtype = INTEL_COREI7_BROADWELL;
384       break;
385 
386     // Skylake:
387     case 0x4e: // Skylake mobile
388     case 0x5e: // Skylake desktop
389     case 0x8e: // Kaby Lake mobile
390     case 0x9e: // Kaby Lake desktop
391     case 0xa5: // Comet Lake-H/S
392     case 0xa6: // Comet Lake-U
393       CPU = "skylake";
394       *Type = INTEL_COREI7;
395       *Subtype = INTEL_COREI7_SKYLAKE;
396       break;
397 
398     // Rocketlake:
399     case 0xa7:
400       CPU = "rocketlake";
401       *Type = INTEL_COREI7;
402       *Subtype = INTEL_COREI7_ROCKETLAKE;
403       break;
404 
405     // Skylake Xeon:
406     case 0x55:
407       *Type = INTEL_COREI7;
408       if (testFeature(FEATURE_AVX512BF16)) {
409         CPU = "cooperlake";
410         *Subtype = INTEL_COREI7_COOPERLAKE;
411       } else if (testFeature(FEATURE_AVX512VNNI)) {
412         CPU = "cascadelake";
413         *Subtype = INTEL_COREI7_CASCADELAKE;
414       } else {
415         CPU = "skylake-avx512";
416         *Subtype = INTEL_COREI7_SKYLAKE_AVX512;
417       }
418       break;
419 
420     // Cannonlake:
421     case 0x66:
422       CPU = "cannonlake";
423       *Type = INTEL_COREI7;
424       *Subtype = INTEL_COREI7_CANNONLAKE;
425       break;
426 
427     // Icelake:
428     case 0x7d:
429     case 0x7e:
430       CPU = "icelake-client";
431       *Type = INTEL_COREI7;
432       *Subtype = INTEL_COREI7_ICELAKE_CLIENT;
433       break;
434 
435     // Tigerlake:
436     case 0x8c:
437     case 0x8d:
438       CPU = "tigerlake";
439       *Type = INTEL_COREI7;
440       *Subtype = INTEL_COREI7_TIGERLAKE;
441       break;
442 
443     // Alderlake:
444     case 0x97:
445     case 0x9a:
446     // Raptorlake:
447     case 0xb7:
448     case 0xba:
449     case 0xbf:
450     // Meteorlake:
451     case 0xaa:
452     case 0xac:
453     // Gracemont:
454     case 0xbe:
455       CPU = "alderlake";
456       *Type = INTEL_COREI7;
457       *Subtype = INTEL_COREI7_ALDERLAKE;
458       break;
459 
460     // Arrowlake:
461     case 0xc5:
462       CPU = "arrowlake";
463       *Type = INTEL_COREI7;
464       *Subtype = INTEL_COREI7_ARROWLAKE;
465       break;
466 
467     // Arrowlake S:
468     case 0xc6:
469     // Lunarlake:
470     case 0xbd:
471       CPU = "arrowlake-s";
472       *Type = INTEL_COREI7;
473       *Subtype = INTEL_COREI7_ARROWLAKE_S;
474       break;
475 
476     // Pantherlake:
477     case 0xcc:
478       CPU = "pantherlake";
479       *Type = INTEL_COREI7;
480       *Subtype = INTEL_COREI7_PANTHERLAKE;
481       break;
482 
483     // Icelake Xeon:
484     case 0x6a:
485     case 0x6c:
486       CPU = "icelake-server";
487       *Type = INTEL_COREI7;
488       *Subtype = INTEL_COREI7_ICELAKE_SERVER;
489       break;
490 
491     // Emerald Rapids:
492     case 0xcf:
493     // Sapphire Rapids:
494     case 0x8f:
495       CPU = "sapphirerapids";
496       *Type = INTEL_COREI7;
497       *Subtype = INTEL_COREI7_SAPPHIRERAPIDS;
498       break;
499 
500     // Granite Rapids:
501     case 0xad:
502       CPU = "graniterapids";
503       *Type = INTEL_COREI7;
504       *Subtype = INTEL_COREI7_GRANITERAPIDS;
505       break;
506 
507     // Granite Rapids D:
508     case 0xae:
509       CPU = "graniterapids-d";
510       *Type = INTEL_COREI7;
511       *Subtype = INTEL_COREI7_GRANITERAPIDS_D;
512       break;
513 
514     case 0x1c: // Most 45 nm Intel Atom processors
515     case 0x26: // 45 nm Atom Lincroft
516     case 0x27: // 32 nm Atom Medfield
517     case 0x35: // 32 nm Atom Midview
518     case 0x36: // 32 nm Atom Midview
519       CPU = "bonnell";
520       *Type = INTEL_BONNELL;
521       break;
522 
523     // Atom Silvermont codes from the Intel software optimization guide.
524     case 0x37:
525     case 0x4a:
526     case 0x4d:
527     case 0x5a:
528     case 0x5d:
529     case 0x4c: // really airmont
530       CPU = "silvermont";
531       *Type = INTEL_SILVERMONT;
532       break;
533     // Goldmont:
534     case 0x5c: // Apollo Lake
535     case 0x5f: // Denverton
536       CPU = "goldmont";
537       *Type = INTEL_GOLDMONT;
538       break; // "goldmont"
539     case 0x7a:
540       CPU = "goldmont-plus";
541       *Type = INTEL_GOLDMONT_PLUS;
542       break;
543     case 0x86:
544     case 0x8a: // Lakefield
545     case 0x96: // Elkhart Lake
546     case 0x9c: // Jasper Lake
547       CPU = "tremont";
548       *Type = INTEL_TREMONT;
549       break;
550 
551     // Sierraforest:
552     case 0xaf:
553       CPU = "sierraforest";
554       *Type = INTEL_SIERRAFOREST;
555       break;
556 
557     // Grandridge:
558     case 0xb6:
559       CPU = "grandridge";
560       *Type = INTEL_GRANDRIDGE;
561       break;
562 
563     // Clearwaterforest:
564     case 0xdd:
565       CPU = "clearwaterforest";
566       *Type = INTEL_COREI7;
567       *Subtype = INTEL_CLEARWATERFOREST;
568       break;
569 
570     case 0x57:
571       CPU = "knl";
572       *Type = INTEL_KNL;
573       break;
574 
575     case 0x85:
576       CPU = "knm";
577       *Type = INTEL_KNM;
578       break;
579 
580     default: // Unknown family 6 CPU.
581       break;
582     }
583     break;
584   default:
585     break; // Unknown.
586   }
587 
588   return CPU;
589 }
590 
591 static const char *getAMDProcessorTypeAndSubtype(unsigned Family,
592                                                  unsigned Model,
593                                                  const unsigned *Features,
594                                                  unsigned *Type,
595                                                  unsigned *Subtype) {
596   // We select CPU strings to match the code in Host.cpp, but we don't use them
597   // in compiler-rt.
598   const char *CPU = 0;
599 
600   switch (Family) {
601   case 16:
602     CPU = "amdfam10";
603     *Type = AMDFAM10H;
604     switch (Model) {
605     case 2:
606       *Subtype = AMDFAM10H_BARCELONA;
607       break;
608     case 4:
609       *Subtype = AMDFAM10H_SHANGHAI;
610       break;
611     case 8:
612       *Subtype = AMDFAM10H_ISTANBUL;
613       break;
614     }
615     break;
616   case 20:
617     CPU = "btver1";
618     *Type = AMD_BTVER1;
619     break;
620   case 21:
621     CPU = "bdver1";
622     *Type = AMDFAM15H;
623     if (Model >= 0x60 && Model <= 0x7f) {
624       CPU = "bdver4";
625       *Subtype = AMDFAM15H_BDVER4;
626       break; // 60h-7Fh: Excavator
627     }
628     if (Model >= 0x30 && Model <= 0x3f) {
629       CPU = "bdver3";
630       *Subtype = AMDFAM15H_BDVER3;
631       break; // 30h-3Fh: Steamroller
632     }
633     if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) {
634       CPU = "bdver2";
635       *Subtype = AMDFAM15H_BDVER2;
636       break; // 02h, 10h-1Fh: Piledriver
637     }
638     if (Model <= 0x0f) {
639       *Subtype = AMDFAM15H_BDVER1;
640       break; // 00h-0Fh: Bulldozer
641     }
642     break;
643   case 22:
644     CPU = "btver2";
645     *Type = AMD_BTVER2;
646     break;
647   case 23:
648     CPU = "znver1";
649     *Type = AMDFAM17H;
650     if ((Model >= 0x30 && Model <= 0x3f) || (Model == 0x47) ||
651         (Model >= 0x60 && Model <= 0x67) || (Model >= 0x68 && Model <= 0x6f) ||
652         (Model >= 0x70 && Model <= 0x7f) || (Model >= 0x84 && Model <= 0x87) ||
653         (Model >= 0x90 && Model <= 0x97) || (Model >= 0x98 && Model <= 0x9f) ||
654         (Model >= 0xa0 && Model <= 0xaf)) {
655       // Family 17h Models 30h-3Fh (Starship) Zen 2
656       // Family 17h Models 47h (Cardinal) Zen 2
657       // Family 17h Models 60h-67h (Renoir) Zen 2
658       // Family 17h Models 68h-6Fh (Lucienne) Zen 2
659       // Family 17h Models 70h-7Fh (Matisse) Zen 2
660       // Family 17h Models 84h-87h (ProjectX) Zen 2
661       // Family 17h Models 90h-97h (VanGogh) Zen 2
662       // Family 17h Models 98h-9Fh (Mero) Zen 2
663       // Family 17h Models A0h-AFh (Mendocino) Zen 2
664       CPU = "znver2";
665       *Subtype = AMDFAM17H_ZNVER2;
666       break;
667     }
668     if ((Model >= 0x10 && Model <= 0x1f) || (Model >= 0x20 && Model <= 0x2f)) {
669       // Family 17h Models 10h-1Fh (Raven1) Zen
670       // Family 17h Models 10h-1Fh (Picasso) Zen+
671       // Family 17h Models 20h-2Fh (Raven2 x86) Zen
672       *Subtype = AMDFAM17H_ZNVER1;
673       break;
674     }
675     break;
676   case 25:
677     CPU = "znver3";
678     *Type = AMDFAM19H;
679     if ((Model <= 0x0f) || (Model >= 0x20 && Model <= 0x2f) ||
680         (Model >= 0x30 && Model <= 0x3f) || (Model >= 0x40 && Model <= 0x4f) ||
681         (Model >= 0x50 && Model <= 0x5f)) {
682       // Family 19h Models 00h-0Fh (Genesis, Chagall) Zen 3
683       // Family 19h Models 20h-2Fh (Vermeer) Zen 3
684       // Family 19h Models 30h-3Fh (Badami) Zen 3
685       // Family 19h Models 40h-4Fh (Rembrandt) Zen 3+
686       // Family 19h Models 50h-5Fh (Cezanne) Zen 3
687       *Subtype = AMDFAM19H_ZNVER3;
688       break;
689     }
690     if ((Model >= 0x10 && Model <= 0x1f) || (Model >= 0x60 && Model <= 0x6f) ||
691         (Model >= 0x70 && Model <= 0x77) || (Model >= 0x78 && Model <= 0x7f) ||
692         (Model >= 0xa0 && Model <= 0xaf)) {
693       // Family 19h Models 10h-1Fh (Stones; Storm Peak) Zen 4
694       // Family 19h Models 60h-6Fh (Raphael) Zen 4
695       // Family 19h Models 70h-77h (Phoenix, Hawkpoint1) Zen 4
696       // Family 19h Models 78h-7Fh (Phoenix 2, Hawkpoint2) Zen 4
697       // Family 19h Models A0h-AFh (Stones-Dense) Zen 4
698       CPU = "znver4";
699       *Subtype = AMDFAM19H_ZNVER4;
700       break; //  "znver4"
701     }
702     break; // family 19h
703   default:
704     break; // Unknown AMD CPU.
705   }
706 
707   return CPU;
708 }
709 
710 static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
711                                  unsigned *Features) {
712   unsigned EAX = 0, EBX = 0;
713 
714 #define hasFeature(F) ((Features[F / 32] >> (F % 32)) & 1)
715 #define setFeature(F) Features[F / 32] |= 1U << (F % 32)
716 
717   if ((EDX >> 15) & 1)
718     setFeature(FEATURE_CMOV);
719   if ((EDX >> 23) & 1)
720     setFeature(FEATURE_MMX);
721   if ((EDX >> 25) & 1)
722     setFeature(FEATURE_SSE);
723   if ((EDX >> 26) & 1)
724     setFeature(FEATURE_SSE2);
725 
726   if ((ECX >> 0) & 1)
727     setFeature(FEATURE_SSE3);
728   if ((ECX >> 1) & 1)
729     setFeature(FEATURE_PCLMUL);
730   if ((ECX >> 9) & 1)
731     setFeature(FEATURE_SSSE3);
732   if ((ECX >> 12) & 1)
733     setFeature(FEATURE_FMA);
734   if ((ECX >> 13) & 1)
735     setFeature(FEATURE_CMPXCHG16B);
736   if ((ECX >> 19) & 1)
737     setFeature(FEATURE_SSE4_1);
738   if ((ECX >> 20) & 1)
739     setFeature(FEATURE_SSE4_2);
740   if ((ECX >> 22) & 1)
741     setFeature(FEATURE_MOVBE);
742   if ((ECX >> 23) & 1)
743     setFeature(FEATURE_POPCNT);
744   if ((ECX >> 25) & 1)
745     setFeature(FEATURE_AES);
746   if ((ECX >> 29) & 1)
747     setFeature(FEATURE_F16C);
748 
749   // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
750   // indicates that the AVX registers will be saved and restored on context
751   // switch, then we have full AVX support.
752   const unsigned AVXBits = (1 << 27) | (1 << 28);
753   bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) &&
754                 ((EAX & 0x6) == 0x6);
755 #if defined(__APPLE__)
756   // Darwin lazily saves the AVX512 context on first use: trust that the OS will
757   // save the AVX512 context if we use AVX512 instructions, even the bit is not
758   // set right now.
759   bool HasAVX512Save = true;
760 #else
761   // AVX512 requires additional context to be saved by the OS.
762   bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0);
763 #endif
764 
765   if (HasAVX)
766     setFeature(FEATURE_AVX);
767 
768   bool HasLeaf7 =
769       MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
770 
771   if (HasLeaf7 && ((EBX >> 3) & 1))
772     setFeature(FEATURE_BMI);
773   if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX)
774     setFeature(FEATURE_AVX2);
775   if (HasLeaf7 && ((EBX >> 8) & 1))
776     setFeature(FEATURE_BMI2);
777   if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save)
778     setFeature(FEATURE_AVX512F);
779   if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save)
780     setFeature(FEATURE_AVX512DQ);
781   if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save)
782     setFeature(FEATURE_AVX512IFMA);
783   if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save)
784     setFeature(FEATURE_AVX512PF);
785   if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save)
786     setFeature(FEATURE_AVX512ER);
787   if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save)
788     setFeature(FEATURE_AVX512CD);
789   if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save)
790     setFeature(FEATURE_AVX512BW);
791   if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save)
792     setFeature(FEATURE_AVX512VL);
793 
794   if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save)
795     setFeature(FEATURE_AVX512VBMI);
796   if (HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save)
797     setFeature(FEATURE_AVX512VBMI2);
798   if (HasLeaf7 && ((ECX >> 8) & 1))
799     setFeature(FEATURE_GFNI);
800   if (HasLeaf7 && ((ECX >> 10) & 1) && HasAVX)
801     setFeature(FEATURE_VPCLMULQDQ);
802   if (HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save)
803     setFeature(FEATURE_AVX512VNNI);
804   if (HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save)
805     setFeature(FEATURE_AVX512BITALG);
806   if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save)
807     setFeature(FEATURE_AVX512VPOPCNTDQ);
808 
809   if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save)
810     setFeature(FEATURE_AVX5124VNNIW);
811   if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save)
812     setFeature(FEATURE_AVX5124FMAPS);
813   if (HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save)
814     setFeature(FEATURE_AVX512VP2INTERSECT);
815 
816   // EAX from subleaf 0 is the maximum subleaf supported. Some CPUs don't
817   // return all 0s for invalid subleaves so check the limit.
818   bool HasLeaf7Subleaf1 =
819       HasLeaf7 && EAX >= 1 &&
820       !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
821   if (HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save)
822     setFeature(FEATURE_AVX512BF16);
823 
824   unsigned MaxExtLevel;
825   getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
826 
827   bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
828                      !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
829   if (HasExtLeaf1) {
830     if (ECX & 1)
831       setFeature(FEATURE_LAHF_LM);
832     if ((ECX >> 5) & 1)
833       setFeature(FEATURE_LZCNT);
834     if (((ECX >> 6) & 1))
835       setFeature(FEATURE_SSE4_A);
836     if (((ECX >> 11) & 1))
837       setFeature(FEATURE_XOP);
838     if (((ECX >> 16) & 1))
839       setFeature(FEATURE_FMA4);
840     if (((EDX >> 29) & 1))
841       setFeature(FEATURE_LM);
842   }
843 
844   if (hasFeature(FEATURE_LM) && hasFeature(FEATURE_SSE2)) {
845     setFeature(FEATURE_X86_64_BASELINE);
846     if (hasFeature(FEATURE_CMPXCHG16B) && hasFeature(FEATURE_POPCNT) &&
847         hasFeature(FEATURE_LAHF_LM) && hasFeature(FEATURE_SSE4_2)) {
848       setFeature(FEATURE_X86_64_V2);
849       if (hasFeature(FEATURE_AVX2) && hasFeature(FEATURE_BMI) &&
850           hasFeature(FEATURE_BMI2) && hasFeature(FEATURE_F16C) &&
851           hasFeature(FEATURE_FMA) && hasFeature(FEATURE_LZCNT) &&
852           hasFeature(FEATURE_MOVBE)) {
853         setFeature(FEATURE_X86_64_V3);
854         if (hasFeature(FEATURE_AVX512BW) && hasFeature(FEATURE_AVX512CD) &&
855             hasFeature(FEATURE_AVX512DQ) && hasFeature(FEATURE_AVX512VL))
856           setFeature(FEATURE_X86_64_V4);
857       }
858     }
859   }
860 
861 #undef hasFeature
862 #undef setFeature
863 }
864 
865 #ifndef _WIN32
866 __attribute__((visibility("hidden")))
867 #endif
868 int __cpu_indicator_init(void) CONSTRUCTOR_ATTRIBUTE;
869 
870 #ifndef _WIN32
871 __attribute__((visibility("hidden")))
872 #endif
873 struct __processor_model {
874   unsigned int __cpu_vendor;
875   unsigned int __cpu_type;
876   unsigned int __cpu_subtype;
877   unsigned int __cpu_features[1];
878 } __cpu_model = {0, 0, 0, {0}};
879 
880 #ifndef _WIN32
881 __attribute__((visibility("hidden")))
882 #endif
883 unsigned __cpu_features2[(CPU_FEATURE_MAX - 1) / 32];
884 
885 // A constructor function that is sets __cpu_model and __cpu_features2 with
886 // the right values.  This needs to run only once.  This constructor is
887 // given the highest priority and it should run before constructors without
888 // the priority set.  However, it still runs after ifunc initializers and
889 // needs to be called explicitly there.
890 
891 int CONSTRUCTOR_ATTRIBUTE __cpu_indicator_init(void) {
892   unsigned EAX, EBX, ECX, EDX;
893   unsigned MaxLeaf = 5;
894   unsigned Vendor;
895   unsigned Model, Family;
896   unsigned Features[(CPU_FEATURE_MAX + 31) / 32] = {0};
897   static_assert(sizeof(Features) / sizeof(Features[0]) == 4, "");
898   static_assert(sizeof(__cpu_features2) / sizeof(__cpu_features2[0]) == 3, "");
899 
900   // This function needs to run just once.
901   if (__cpu_model.__cpu_vendor)
902     return 0;
903 
904   if (!isCpuIdSupported() ||
905       getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX) || MaxLeaf < 1) {
906     __cpu_model.__cpu_vendor = VENDOR_OTHER;
907     return -1;
908   }
909 
910   getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX);
911   detectX86FamilyModel(EAX, &Family, &Model);
912 
913   // Find available features.
914   getAvailableFeatures(ECX, EDX, MaxLeaf, &Features[0]);
915 
916   __cpu_model.__cpu_features[0] = Features[0];
917   __cpu_features2[0] = Features[1];
918   __cpu_features2[1] = Features[2];
919   __cpu_features2[2] = Features[3];
920 
921   if (Vendor == SIG_INTEL) {
922     // Get CPU type.
923     getIntelProcessorTypeAndSubtype(Family, Model, &Features[0],
924                                     &(__cpu_model.__cpu_type),
925                                     &(__cpu_model.__cpu_subtype));
926     __cpu_model.__cpu_vendor = VENDOR_INTEL;
927   } else if (Vendor == SIG_AMD) {
928     // Get CPU type.
929     getAMDProcessorTypeAndSubtype(Family, Model, &Features[0],
930                                   &(__cpu_model.__cpu_type),
931                                   &(__cpu_model.__cpu_subtype));
932     __cpu_model.__cpu_vendor = VENDOR_AMD;
933   } else
934     __cpu_model.__cpu_vendor = VENDOR_OTHER;
935 
936   assert(__cpu_model.__cpu_vendor < VENDOR_MAX);
937   assert(__cpu_model.__cpu_type < CPU_TYPE_MAX);
938   assert(__cpu_model.__cpu_subtype < CPU_SUBTYPE_MAX);
939 
940   return 0;
941 }
942 #endif // defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER)
943