1 //===-- cpu_model/x86.c - Support for __cpu_model builtin --------*- C -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file is based on LLVM's lib/Support/Host.cpp.
10 // It implements the operating system Host concept and builtin
11 // __cpu_model for the compiler_rt library for x86.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #include "cpu_model.h"
16
17 #if !(defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || \
18 defined(_M_X64))
19 #error This file is intended only for x86-based targets
20 #endif
21
22 #if defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER)
23
24 #include <assert.h>
25
26 #ifdef _MSC_VER
27 #include <intrin.h>
28 #endif
29
30 enum VendorSignatures {
31 SIG_INTEL = 0x756e6547, // Genu
32 SIG_AMD = 0x68747541, // Auth
33 };
34
35 enum ProcessorVendors {
36 VENDOR_INTEL = 1,
37 VENDOR_AMD,
38 VENDOR_OTHER,
39 VENDOR_MAX
40 };
41
42 enum ProcessorTypes {
43 INTEL_BONNELL = 1,
44 INTEL_CORE2,
45 INTEL_COREI7,
46 AMDFAM10H,
47 AMDFAM15H,
48 INTEL_SILVERMONT,
49 INTEL_KNL,
50 AMD_BTVER1,
51 AMD_BTVER2,
52 AMDFAM17H,
53 INTEL_KNM,
54 INTEL_GOLDMONT,
55 INTEL_GOLDMONT_PLUS,
56 INTEL_TREMONT,
57 AMDFAM19H,
58 ZHAOXIN_FAM7H,
59 INTEL_SIERRAFOREST,
60 INTEL_GRANDRIDGE,
61 INTEL_CLEARWATERFOREST,
62 AMDFAM1AH,
63 CPU_TYPE_MAX
64 };
65
66 enum ProcessorSubtypes {
67 INTEL_COREI7_NEHALEM = 1,
68 INTEL_COREI7_WESTMERE,
69 INTEL_COREI7_SANDYBRIDGE,
70 AMDFAM10H_BARCELONA,
71 AMDFAM10H_SHANGHAI,
72 AMDFAM10H_ISTANBUL,
73 AMDFAM15H_BDVER1,
74 AMDFAM15H_BDVER2,
75 AMDFAM15H_BDVER3,
76 AMDFAM15H_BDVER4,
77 AMDFAM17H_ZNVER1,
78 INTEL_COREI7_IVYBRIDGE,
79 INTEL_COREI7_HASWELL,
80 INTEL_COREI7_BROADWELL,
81 INTEL_COREI7_SKYLAKE,
82 INTEL_COREI7_SKYLAKE_AVX512,
83 INTEL_COREI7_CANNONLAKE,
84 INTEL_COREI7_ICELAKE_CLIENT,
85 INTEL_COREI7_ICELAKE_SERVER,
86 AMDFAM17H_ZNVER2,
87 INTEL_COREI7_CASCADELAKE,
88 INTEL_COREI7_TIGERLAKE,
89 INTEL_COREI7_COOPERLAKE,
90 INTEL_COREI7_SAPPHIRERAPIDS,
91 INTEL_COREI7_ALDERLAKE,
92 AMDFAM19H_ZNVER3,
93 INTEL_COREI7_ROCKETLAKE,
94 ZHAOXIN_FAM7H_LUJIAZUI,
95 AMDFAM19H_ZNVER4,
96 INTEL_COREI7_GRANITERAPIDS,
97 INTEL_COREI7_GRANITERAPIDS_D,
98 INTEL_COREI7_ARROWLAKE,
99 INTEL_COREI7_ARROWLAKE_S,
100 INTEL_COREI7_PANTHERLAKE,
101 AMDFAM1AH_ZNVER5,
102 CPU_SUBTYPE_MAX
103 };
104
105 enum ProcessorFeatures {
106 FEATURE_CMOV = 0,
107 FEATURE_MMX,
108 FEATURE_POPCNT,
109 FEATURE_SSE,
110 FEATURE_SSE2,
111 FEATURE_SSE3,
112 FEATURE_SSSE3,
113 FEATURE_SSE4_1,
114 FEATURE_SSE4_2,
115 FEATURE_AVX,
116 FEATURE_AVX2,
117 FEATURE_SSE4_A,
118 FEATURE_FMA4,
119 FEATURE_XOP,
120 FEATURE_FMA,
121 FEATURE_AVX512F,
122 FEATURE_BMI,
123 FEATURE_BMI2,
124 FEATURE_AES,
125 FEATURE_PCLMUL,
126 FEATURE_AVX512VL,
127 FEATURE_AVX512BW,
128 FEATURE_AVX512DQ,
129 FEATURE_AVX512CD,
130 FEATURE_AVX512ER,
131 FEATURE_AVX512PF,
132 FEATURE_AVX512VBMI,
133 FEATURE_AVX512IFMA,
134 FEATURE_AVX5124VNNIW,
135 FEATURE_AVX5124FMAPS,
136 FEATURE_AVX512VPOPCNTDQ,
137 FEATURE_AVX512VBMI2,
138 FEATURE_GFNI,
139 FEATURE_VPCLMULQDQ,
140 FEATURE_AVX512VNNI,
141 FEATURE_AVX512BITALG,
142 FEATURE_AVX512BF16,
143 FEATURE_AVX512VP2INTERSECT,
144 // FIXME: Below Features has some missings comparing to gcc, it's because gcc
145 // has some not one-to-one mapped in llvm.
146 // FEATURE_3DNOW,
147 // FEATURE_3DNOWP,
148 FEATURE_ADX = 40,
149 // FEATURE_ABM,
150 FEATURE_CLDEMOTE = 42,
151 FEATURE_CLFLUSHOPT,
152 FEATURE_CLWB,
153 FEATURE_CLZERO,
154 FEATURE_CMPXCHG16B,
155 // FIXME: Not adding FEATURE_CMPXCHG8B is a workaround to make 'generic' as
156 // a cpu string with no X86_FEATURE_COMPAT features, which is required in
157 // current implementantion of cpu_specific/cpu_dispatch FMV feature.
158 // FEATURE_CMPXCHG8B,
159 FEATURE_ENQCMD = 48,
160 FEATURE_F16C,
161 FEATURE_FSGSBASE,
162 // FEATURE_FXSAVE,
163 // FEATURE_HLE,
164 // FEATURE_IBT,
165 FEATURE_LAHF_LM = 54,
166 FEATURE_LM,
167 FEATURE_LWP,
168 FEATURE_LZCNT,
169 FEATURE_MOVBE,
170 FEATURE_MOVDIR64B,
171 FEATURE_MOVDIRI,
172 FEATURE_MWAITX,
173 // FEATURE_OSXSAVE,
174 FEATURE_PCONFIG = 63,
175 FEATURE_PKU,
176 FEATURE_PREFETCHWT1,
177 FEATURE_PRFCHW,
178 FEATURE_PTWRITE,
179 FEATURE_RDPID,
180 FEATURE_RDRND,
181 FEATURE_RDSEED,
182 FEATURE_RTM,
183 FEATURE_SERIALIZE,
184 FEATURE_SGX,
185 FEATURE_SHA,
186 FEATURE_SHSTK,
187 FEATURE_TBM,
188 FEATURE_TSXLDTRK,
189 FEATURE_VAES,
190 FEATURE_WAITPKG,
191 FEATURE_WBNOINVD,
192 FEATURE_XSAVE,
193 FEATURE_XSAVEC,
194 FEATURE_XSAVEOPT,
195 FEATURE_XSAVES,
196 FEATURE_AMX_TILE,
197 FEATURE_AMX_INT8,
198 FEATURE_AMX_BF16,
199 FEATURE_UINTR,
200 FEATURE_HRESET,
201 FEATURE_KL,
202 // FEATURE_AESKLE,
203 FEATURE_WIDEKL = 92,
204 FEATURE_AVXVNNI,
205 FEATURE_AVX512FP16,
206 FEATURE_X86_64_BASELINE,
207 FEATURE_X86_64_V2,
208 FEATURE_X86_64_V3,
209 FEATURE_X86_64_V4,
210 FEATURE_AVXIFMA,
211 FEATURE_AVXVNNIINT8,
212 FEATURE_AVXNECONVERT,
213 FEATURE_CMPCCXADD,
214 FEATURE_AMX_FP16,
215 FEATURE_PREFETCHI,
216 FEATURE_RAOINT,
217 FEATURE_AMX_COMPLEX,
218 FEATURE_AVXVNNIINT16,
219 FEATURE_SM3,
220 FEATURE_SHA512,
221 FEATURE_SM4,
222 FEATURE_APXF,
223 FEATURE_USERMSR,
224 FEATURE_AVX10_1_256,
225 FEATURE_AVX10_1_512,
226 CPU_FEATURE_MAX
227 };
228
229 // The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max).
230 // Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID
231 // support. Consequently, for i386, the presence of CPUID is checked first
232 // via the corresponding eflags bit.
isCpuIdSupported(void)233 static bool isCpuIdSupported(void) {
234 #if defined(__GNUC__) || defined(__clang__)
235 #if defined(__i386__)
236 int __cpuid_supported;
237 __asm__(" pushfl\n"
238 " popl %%eax\n"
239 " movl %%eax,%%ecx\n"
240 " xorl $0x00200000,%%eax\n"
241 " pushl %%eax\n"
242 " popfl\n"
243 " pushfl\n"
244 " popl %%eax\n"
245 " movl $0,%0\n"
246 " cmpl %%eax,%%ecx\n"
247 " je 1f\n"
248 " movl $1,%0\n"
249 "1:"
250 : "=r"(__cpuid_supported)
251 :
252 : "eax", "ecx");
253 if (!__cpuid_supported)
254 return false;
255 #endif
256 return true;
257 #endif
258 return true;
259 }
260
261 // This code is copied from lib/Support/Host.cpp.
262 // Changes to either file should be mirrored in the other.
263
264 /// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in
265 /// the specified arguments. If we can't run cpuid on the host, return true.
getX86CpuIDAndInfo(unsigned value,unsigned * rEAX,unsigned * rEBX,unsigned * rECX,unsigned * rEDX)266 static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
267 unsigned *rECX, unsigned *rEDX) {
268 #if defined(__GNUC__) || defined(__clang__)
269 #if defined(__x86_64__)
270 // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
271 // FIXME: should we save this for Clang?
272 __asm__("movq\t%%rbx, %%rsi\n\t"
273 "cpuid\n\t"
274 "xchgq\t%%rbx, %%rsi\n\t"
275 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
276 : "a"(value));
277 return false;
278 #elif defined(__i386__)
279 __asm__("movl\t%%ebx, %%esi\n\t"
280 "cpuid\n\t"
281 "xchgl\t%%ebx, %%esi\n\t"
282 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
283 : "a"(value));
284 return false;
285 #else
286 return true;
287 #endif
288 #elif defined(_MSC_VER)
289 // The MSVC intrinsic is portable across x86 and x64.
290 int registers[4];
291 __cpuid(registers, value);
292 *rEAX = registers[0];
293 *rEBX = registers[1];
294 *rECX = registers[2];
295 *rEDX = registers[3];
296 return false;
297 #else
298 return true;
299 #endif
300 }
301
302 /// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return
303 /// the 4 values in the specified arguments. If we can't run cpuid on the host,
304 /// return true.
getX86CpuIDAndInfoEx(unsigned value,unsigned subleaf,unsigned * rEAX,unsigned * rEBX,unsigned * rECX,unsigned * rEDX)305 static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
306 unsigned *rEAX, unsigned *rEBX, unsigned *rECX,
307 unsigned *rEDX) {
308 #if defined(__GNUC__) || defined(__clang__)
309 #if defined(__x86_64__)
310 // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
311 // FIXME: should we save this for Clang?
312 __asm__("movq\t%%rbx, %%rsi\n\t"
313 "cpuid\n\t"
314 "xchgq\t%%rbx, %%rsi\n\t"
315 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
316 : "a"(value), "c"(subleaf));
317 return false;
318 #elif defined(__i386__)
319 __asm__("movl\t%%ebx, %%esi\n\t"
320 "cpuid\n\t"
321 "xchgl\t%%ebx, %%esi\n\t"
322 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
323 : "a"(value), "c"(subleaf));
324 return false;
325 #else
326 return true;
327 #endif
328 #elif defined(_MSC_VER)
329 int registers[4];
330 __cpuidex(registers, value, subleaf);
331 *rEAX = registers[0];
332 *rEBX = registers[1];
333 *rECX = registers[2];
334 *rEDX = registers[3];
335 return false;
336 #else
337 return true;
338 #endif
339 }
340
341 // Read control register 0 (XCR0). Used to detect features such as AVX.
getX86XCR0(unsigned * rEAX,unsigned * rEDX)342 static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) {
343 #if defined(__GNUC__) || defined(__clang__)
344 // Check xgetbv; this uses a .byte sequence instead of the instruction
345 // directly because older assemblers do not include support for xgetbv and
346 // there is no easy way to conditionally compile based on the assembler used.
347 __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0));
348 return false;
349 #elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK)
350 unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
351 *rEAX = Result;
352 *rEDX = Result >> 32;
353 return false;
354 #else
355 return true;
356 #endif
357 }
358
detectX86FamilyModel(unsigned EAX,unsigned * Family,unsigned * Model)359 static void detectX86FamilyModel(unsigned EAX, unsigned *Family,
360 unsigned *Model) {
361 *Family = (EAX >> 8) & 0xf; // Bits 8 - 11
362 *Model = (EAX >> 4) & 0xf; // Bits 4 - 7
363 if (*Family == 6 || *Family == 0xf) {
364 if (*Family == 0xf)
365 // Examine extended family ID if family ID is F.
366 *Family += (EAX >> 20) & 0xff; // Bits 20 - 27
367 // Examine extended model ID if family ID is 6 or F.
368 *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
369 }
370 }
371
372 #define testFeature(F) (Features[F / 32] & (1 << (F % 32))) != 0
373
getIntelProcessorTypeAndSubtype(unsigned Family,unsigned Model,const unsigned * Features,unsigned * Type,unsigned * Subtype)374 static const char *getIntelProcessorTypeAndSubtype(unsigned Family,
375 unsigned Model,
376 const unsigned *Features,
377 unsigned *Type,
378 unsigned *Subtype) {
379 // We select CPU strings to match the code in Host.cpp, but we don't use them
380 // in compiler-rt.
381 const char *CPU = 0;
382
383 switch (Family) {
384 case 6:
385 switch (Model) {
386 case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile
387 // processor, Intel Core 2 Quad processor, Intel Core 2 Quad
388 // mobile processor, Intel Core 2 Extreme processor, Intel
389 // Pentium Dual-Core processor, Intel Xeon processor, model
390 // 0Fh. All processors are manufactured using the 65 nm process.
391 case 0x16: // Intel Celeron processor model 16h. All processors are
392 // manufactured using the 65 nm process
393 CPU = "core2";
394 *Type = INTEL_CORE2;
395 break;
396 case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model
397 // 17h. All processors are manufactured using the 45 nm process.
398 //
399 // 45nm: Penryn , Wolfdale, Yorkfield (XE)
400 case 0x1d: // Intel Xeon processor MP. All processors are manufactured using
401 // the 45 nm process.
402 CPU = "penryn";
403 *Type = INTEL_CORE2;
404 break;
405 case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All
406 // processors are manufactured using the 45 nm process.
407 case 0x1e: // Intel(R) Core(TM) i7 CPU 870 @ 2.93GHz.
408 // As found in a Summer 2010 model iMac.
409 case 0x1f:
410 case 0x2e: // Nehalem EX
411 CPU = "nehalem";
412 *Type = INTEL_COREI7;
413 *Subtype = INTEL_COREI7_NEHALEM;
414 break;
415 case 0x25: // Intel Core i7, laptop version.
416 case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All
417 // processors are manufactured using the 32 nm process.
418 case 0x2f: // Westmere EX
419 CPU = "westmere";
420 *Type = INTEL_COREI7;
421 *Subtype = INTEL_COREI7_WESTMERE;
422 break;
423 case 0x2a: // Intel Core i7 processor. All processors are manufactured
424 // using the 32 nm process.
425 case 0x2d:
426 CPU = "sandybridge";
427 *Type = INTEL_COREI7;
428 *Subtype = INTEL_COREI7_SANDYBRIDGE;
429 break;
430 case 0x3a:
431 case 0x3e: // Ivy Bridge EP
432 CPU = "ivybridge";
433 *Type = INTEL_COREI7;
434 *Subtype = INTEL_COREI7_IVYBRIDGE;
435 break;
436
437 // Haswell:
438 case 0x3c:
439 case 0x3f:
440 case 0x45:
441 case 0x46:
442 CPU = "haswell";
443 *Type = INTEL_COREI7;
444 *Subtype = INTEL_COREI7_HASWELL;
445 break;
446
447 // Broadwell:
448 case 0x3d:
449 case 0x47:
450 case 0x4f:
451 case 0x56:
452 CPU = "broadwell";
453 *Type = INTEL_COREI7;
454 *Subtype = INTEL_COREI7_BROADWELL;
455 break;
456
457 // Skylake:
458 case 0x4e: // Skylake mobile
459 case 0x5e: // Skylake desktop
460 case 0x8e: // Kaby Lake mobile
461 case 0x9e: // Kaby Lake desktop
462 case 0xa5: // Comet Lake-H/S
463 case 0xa6: // Comet Lake-U
464 CPU = "skylake";
465 *Type = INTEL_COREI7;
466 *Subtype = INTEL_COREI7_SKYLAKE;
467 break;
468
469 // Rocketlake:
470 case 0xa7:
471 CPU = "rocketlake";
472 *Type = INTEL_COREI7;
473 *Subtype = INTEL_COREI7_ROCKETLAKE;
474 break;
475
476 // Skylake Xeon:
477 case 0x55:
478 *Type = INTEL_COREI7;
479 if (testFeature(FEATURE_AVX512BF16)) {
480 CPU = "cooperlake";
481 *Subtype = INTEL_COREI7_COOPERLAKE;
482 } else if (testFeature(FEATURE_AVX512VNNI)) {
483 CPU = "cascadelake";
484 *Subtype = INTEL_COREI7_CASCADELAKE;
485 } else {
486 CPU = "skylake-avx512";
487 *Subtype = INTEL_COREI7_SKYLAKE_AVX512;
488 }
489 break;
490
491 // Cannonlake:
492 case 0x66:
493 CPU = "cannonlake";
494 *Type = INTEL_COREI7;
495 *Subtype = INTEL_COREI7_CANNONLAKE;
496 break;
497
498 // Icelake:
499 case 0x7d:
500 case 0x7e:
501 CPU = "icelake-client";
502 *Type = INTEL_COREI7;
503 *Subtype = INTEL_COREI7_ICELAKE_CLIENT;
504 break;
505
506 // Tigerlake:
507 case 0x8c:
508 case 0x8d:
509 CPU = "tigerlake";
510 *Type = INTEL_COREI7;
511 *Subtype = INTEL_COREI7_TIGERLAKE;
512 break;
513
514 // Alderlake:
515 case 0x97:
516 case 0x9a:
517 // Raptorlake:
518 case 0xb7:
519 case 0xba:
520 case 0xbf:
521 // Meteorlake:
522 case 0xaa:
523 case 0xac:
524 // Gracemont:
525 case 0xbe:
526 CPU = "alderlake";
527 *Type = INTEL_COREI7;
528 *Subtype = INTEL_COREI7_ALDERLAKE;
529 break;
530
531 // Arrowlake:
532 case 0xc5:
533 CPU = "arrowlake";
534 *Type = INTEL_COREI7;
535 *Subtype = INTEL_COREI7_ARROWLAKE;
536 break;
537
538 // Arrowlake S:
539 case 0xc6:
540 // Lunarlake:
541 case 0xbd:
542 CPU = "arrowlake-s";
543 *Type = INTEL_COREI7;
544 *Subtype = INTEL_COREI7_ARROWLAKE_S;
545 break;
546
547 // Pantherlake:
548 case 0xcc:
549 CPU = "pantherlake";
550 *Type = INTEL_COREI7;
551 *Subtype = INTEL_COREI7_PANTHERLAKE;
552 break;
553
554 // Icelake Xeon:
555 case 0x6a:
556 case 0x6c:
557 CPU = "icelake-server";
558 *Type = INTEL_COREI7;
559 *Subtype = INTEL_COREI7_ICELAKE_SERVER;
560 break;
561
562 // Emerald Rapids:
563 case 0xcf:
564 // Sapphire Rapids:
565 case 0x8f:
566 CPU = "sapphirerapids";
567 *Type = INTEL_COREI7;
568 *Subtype = INTEL_COREI7_SAPPHIRERAPIDS;
569 break;
570
571 // Granite Rapids:
572 case 0xad:
573 CPU = "graniterapids";
574 *Type = INTEL_COREI7;
575 *Subtype = INTEL_COREI7_GRANITERAPIDS;
576 break;
577
578 // Granite Rapids D:
579 case 0xae:
580 CPU = "graniterapids-d";
581 *Type = INTEL_COREI7;
582 *Subtype = INTEL_COREI7_GRANITERAPIDS_D;
583 break;
584
585 case 0x1c: // Most 45 nm Intel Atom processors
586 case 0x26: // 45 nm Atom Lincroft
587 case 0x27: // 32 nm Atom Medfield
588 case 0x35: // 32 nm Atom Midview
589 case 0x36: // 32 nm Atom Midview
590 CPU = "bonnell";
591 *Type = INTEL_BONNELL;
592 break;
593
594 // Atom Silvermont codes from the Intel software optimization guide.
595 case 0x37:
596 case 0x4a:
597 case 0x4d:
598 case 0x5a:
599 case 0x5d:
600 case 0x4c: // really airmont
601 CPU = "silvermont";
602 *Type = INTEL_SILVERMONT;
603 break;
604 // Goldmont:
605 case 0x5c: // Apollo Lake
606 case 0x5f: // Denverton
607 CPU = "goldmont";
608 *Type = INTEL_GOLDMONT;
609 break; // "goldmont"
610 case 0x7a:
611 CPU = "goldmont-plus";
612 *Type = INTEL_GOLDMONT_PLUS;
613 break;
614 case 0x86:
615 case 0x8a: // Lakefield
616 case 0x96: // Elkhart Lake
617 case 0x9c: // Jasper Lake
618 CPU = "tremont";
619 *Type = INTEL_TREMONT;
620 break;
621
622 // Sierraforest:
623 case 0xaf:
624 CPU = "sierraforest";
625 *Type = INTEL_SIERRAFOREST;
626 break;
627
628 // Grandridge:
629 case 0xb6:
630 CPU = "grandridge";
631 *Type = INTEL_GRANDRIDGE;
632 break;
633
634 // Clearwaterforest:
635 case 0xdd:
636 CPU = "clearwaterforest";
637 *Type = INTEL_COREI7;
638 *Subtype = INTEL_CLEARWATERFOREST;
639 break;
640
641 case 0x57:
642 CPU = "knl";
643 *Type = INTEL_KNL;
644 break;
645
646 case 0x85:
647 CPU = "knm";
648 *Type = INTEL_KNM;
649 break;
650
651 default: // Unknown family 6 CPU.
652 break;
653 }
654 break;
655 default:
656 break; // Unknown.
657 }
658
659 return CPU;
660 }
661
getAMDProcessorTypeAndSubtype(unsigned Family,unsigned Model,const unsigned * Features,unsigned * Type,unsigned * Subtype)662 static const char *getAMDProcessorTypeAndSubtype(unsigned Family,
663 unsigned Model,
664 const unsigned *Features,
665 unsigned *Type,
666 unsigned *Subtype) {
667 const char *CPU = 0;
668
669 switch (Family) {
670 case 4:
671 CPU = "i486";
672 break;
673 case 5:
674 CPU = "pentium";
675 switch (Model) {
676 case 6:
677 case 7:
678 CPU = "k6";
679 break;
680 case 8:
681 CPU = "k6-2";
682 break;
683 case 9:
684 case 13:
685 CPU = "k6-3";
686 break;
687 case 10:
688 CPU = "geode";
689 break;
690 }
691 break;
692 case 6:
693 if (testFeature(FEATURE_SSE)) {
694 CPU = "athlon-xp";
695 break;
696 }
697 CPU = "athlon";
698 break;
699 case 15:
700 if (testFeature(FEATURE_SSE3)) {
701 CPU = "k8-sse3";
702 break;
703 }
704 CPU = "k8";
705 break;
706 case 16:
707 CPU = "amdfam10";
708 *Type = AMDFAM10H; // "amdfam10"
709 switch (Model) {
710 case 2:
711 *Subtype = AMDFAM10H_BARCELONA;
712 break;
713 case 4:
714 *Subtype = AMDFAM10H_SHANGHAI;
715 break;
716 case 8:
717 *Subtype = AMDFAM10H_ISTANBUL;
718 break;
719 }
720 break;
721 case 20:
722 CPU = "btver1";
723 *Type = AMD_BTVER1;
724 break;
725 case 21:
726 CPU = "bdver1";
727 *Type = AMDFAM15H;
728 if (Model >= 0x60 && Model <= 0x7f) {
729 CPU = "bdver4";
730 *Subtype = AMDFAM15H_BDVER4;
731 break; // 60h-7Fh: Excavator
732 }
733 if (Model >= 0x30 && Model <= 0x3f) {
734 CPU = "bdver3";
735 *Subtype = AMDFAM15H_BDVER3;
736 break; // 30h-3Fh: Steamroller
737 }
738 if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) {
739 CPU = "bdver2";
740 *Subtype = AMDFAM15H_BDVER2;
741 break; // 02h, 10h-1Fh: Piledriver
742 }
743 if (Model <= 0x0f) {
744 *Subtype = AMDFAM15H_BDVER1;
745 break; // 00h-0Fh: Bulldozer
746 }
747 break;
748 case 22:
749 CPU = "btver2";
750 *Type = AMD_BTVER2;
751 break;
752 case 23:
753 CPU = "znver1";
754 *Type = AMDFAM17H;
755 if ((Model >= 0x30 && Model <= 0x3f) || (Model == 0x47) ||
756 (Model >= 0x60 && Model <= 0x67) || (Model >= 0x68 && Model <= 0x6f) ||
757 (Model >= 0x70 && Model <= 0x7f) || (Model >= 0x84 && Model <= 0x87) ||
758 (Model >= 0x90 && Model <= 0x97) || (Model >= 0x98 && Model <= 0x9f) ||
759 (Model >= 0xa0 && Model <= 0xaf)) {
760 // Family 17h Models 30h-3Fh (Starship) Zen 2
761 // Family 17h Models 47h (Cardinal) Zen 2
762 // Family 17h Models 60h-67h (Renoir) Zen 2
763 // Family 17h Models 68h-6Fh (Lucienne) Zen 2
764 // Family 17h Models 70h-7Fh (Matisse) Zen 2
765 // Family 17h Models 84h-87h (ProjectX) Zen 2
766 // Family 17h Models 90h-97h (VanGogh) Zen 2
767 // Family 17h Models 98h-9Fh (Mero) Zen 2
768 // Family 17h Models A0h-AFh (Mendocino) Zen 2
769 CPU = "znver2";
770 *Subtype = AMDFAM17H_ZNVER2;
771 break;
772 }
773 if ((Model >= 0x10 && Model <= 0x1f) || (Model >= 0x20 && Model <= 0x2f)) {
774 // Family 17h Models 10h-1Fh (Raven1) Zen
775 // Family 17h Models 10h-1Fh (Picasso) Zen+
776 // Family 17h Models 20h-2Fh (Raven2 x86) Zen
777 *Subtype = AMDFAM17H_ZNVER1;
778 break;
779 }
780 break;
781 case 25:
782 CPU = "znver3";
783 *Type = AMDFAM19H;
784 if (Model <= 0x0f || (Model >= 0x20 && Model <= 0x2f) ||
785 (Model >= 0x30 && Model <= 0x3f) || (Model >= 0x40 && Model <= 0x4f) ||
786 (Model >= 0x50 && Model <= 0x5f)) {
787 // Family 19h Models 00h-0Fh (Genesis, Chagall) Zen 3
788 // Family 19h Models 20h-2Fh (Vermeer) Zen 3
789 // Family 19h Models 30h-3Fh (Badami) Zen 3
790 // Family 19h Models 40h-4Fh (Rembrandt) Zen 3+
791 // Family 19h Models 50h-5Fh (Cezanne) Zen 3
792 *Subtype = AMDFAM19H_ZNVER3;
793 break;
794 }
795 if ((Model >= 0x10 && Model <= 0x1f) || (Model >= 0x60 && Model <= 0x6f) ||
796 (Model >= 0x70 && Model <= 0x77) || (Model >= 0x78 && Model <= 0x7f) ||
797 (Model >= 0xa0 && Model <= 0xaf)) {
798 // Family 19h Models 10h-1Fh (Stones; Storm Peak) Zen 4
799 // Family 19h Models 60h-6Fh (Raphael) Zen 4
800 // Family 19h Models 70h-77h (Phoenix, Hawkpoint1) Zen 4
801 // Family 19h Models 78h-7Fh (Phoenix 2, Hawkpoint2) Zen 4
802 // Family 19h Models A0h-AFh (Stones-Dense) Zen 4
803 CPU = "znver4";
804 *Subtype = AMDFAM19H_ZNVER4;
805 break; // "znver4"
806 }
807 break; // family 19h
808 case 26:
809 CPU = "znver5";
810 *Type = AMDFAM1AH;
811 if (Model <= 0x77) {
812 // Models 00h-0Fh (Breithorn).
813 // Models 10h-1Fh (Breithorn-Dense).
814 // Models 20h-2Fh (Strix 1).
815 // Models 30h-37h (Strix 2).
816 // Models 38h-3Fh (Strix 3).
817 // Models 40h-4Fh (Granite Ridge).
818 // Models 50h-5Fh (Weisshorn).
819 // Models 60h-6Fh (Krackan1).
820 // Models 70h-77h (Sarlak).
821 CPU = "znver5";
822 *Subtype = AMDFAM1AH_ZNVER5;
823 break; // "znver5"
824 }
825 break;
826 default:
827 break; // Unknown AMD CPU.
828 }
829
830 return CPU;
831 }
832
833 #undef testFeature
834
getAvailableFeatures(unsigned ECX,unsigned EDX,unsigned MaxLeaf,unsigned * Features)835 static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
836 unsigned *Features) {
837 unsigned EAX = 0, EBX = 0;
838
839 #define hasFeature(F) ((Features[F / 32] >> (F % 32)) & 1)
840 #define setFeature(F) Features[F / 32] |= 1U << (F % 32)
841
842 if ((EDX >> 15) & 1)
843 setFeature(FEATURE_CMOV);
844 if ((EDX >> 23) & 1)
845 setFeature(FEATURE_MMX);
846 if ((EDX >> 25) & 1)
847 setFeature(FEATURE_SSE);
848 if ((EDX >> 26) & 1)
849 setFeature(FEATURE_SSE2);
850
851 if ((ECX >> 0) & 1)
852 setFeature(FEATURE_SSE3);
853 if ((ECX >> 1) & 1)
854 setFeature(FEATURE_PCLMUL);
855 if ((ECX >> 9) & 1)
856 setFeature(FEATURE_SSSE3);
857 if ((ECX >> 12) & 1)
858 setFeature(FEATURE_FMA);
859 if ((ECX >> 13) & 1)
860 setFeature(FEATURE_CMPXCHG16B);
861 if ((ECX >> 19) & 1)
862 setFeature(FEATURE_SSE4_1);
863 if ((ECX >> 20) & 1)
864 setFeature(FEATURE_SSE4_2);
865 if ((ECX >> 22) & 1)
866 setFeature(FEATURE_MOVBE);
867 if ((ECX >> 23) & 1)
868 setFeature(FEATURE_POPCNT);
869 if ((ECX >> 25) & 1)
870 setFeature(FEATURE_AES);
871 if ((ECX >> 29) & 1)
872 setFeature(FEATURE_F16C);
873 if ((ECX >> 30) & 1)
874 setFeature(FEATURE_RDRND);
875
876 // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
877 // indicates that the AVX registers will be saved and restored on context
878 // switch, then we have full AVX support.
879 const unsigned AVXBits = (1 << 27) | (1 << 28);
880 bool HasAVXSave = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) &&
881 ((EAX & 0x6) == 0x6);
882 #if defined(__APPLE__)
883 // Darwin lazily saves the AVX512 context on first use: trust that the OS will
884 // save the AVX512 context if we use AVX512 instructions, even the bit is not
885 // set right now.
886 bool HasAVX512Save = true;
887 #else
888 // AVX512 requires additional context to be saved by the OS.
889 bool HasAVX512Save = HasAVXSave && ((EAX & 0xe0) == 0xe0);
890 #endif
891 // AMX requires additional context to be saved by the OS.
892 const unsigned AMXBits = (1 << 17) | (1 << 18);
893 bool HasXSave = ((ECX >> 27) & 1) && !getX86XCR0(&EAX, &EDX);
894 bool HasAMXSave = HasXSave && ((EAX & AMXBits) == AMXBits);
895
896 if (HasAVXSave)
897 setFeature(FEATURE_AVX);
898
899 if (((ECX >> 26) & 1) && HasAVXSave)
900 setFeature(FEATURE_XSAVE);
901
902 bool HasLeaf7 =
903 MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
904
905 if (HasLeaf7 && ((EBX >> 0) & 1))
906 setFeature(FEATURE_FSGSBASE);
907 if (HasLeaf7 && ((EBX >> 2) & 1))
908 setFeature(FEATURE_SGX);
909 if (HasLeaf7 && ((EBX >> 3) & 1))
910 setFeature(FEATURE_BMI);
911 if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVXSave)
912 setFeature(FEATURE_AVX2);
913 if (HasLeaf7 && ((EBX >> 8) & 1))
914 setFeature(FEATURE_BMI2);
915 if (HasLeaf7 && ((EBX >> 11) & 1))
916 setFeature(FEATURE_RTM);
917 if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save)
918 setFeature(FEATURE_AVX512F);
919 if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save)
920 setFeature(FEATURE_AVX512DQ);
921 if (HasLeaf7 && ((EBX >> 18) & 1))
922 setFeature(FEATURE_RDSEED);
923 if (HasLeaf7 && ((EBX >> 19) & 1))
924 setFeature(FEATURE_ADX);
925 if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save)
926 setFeature(FEATURE_AVX512IFMA);
927 if (HasLeaf7 && ((EBX >> 24) & 1))
928 setFeature(FEATURE_CLWB);
929 if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save)
930 setFeature(FEATURE_AVX512PF);
931 if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save)
932 setFeature(FEATURE_AVX512ER);
933 if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save)
934 setFeature(FEATURE_AVX512CD);
935 if (HasLeaf7 && ((EBX >> 29) & 1))
936 setFeature(FEATURE_SHA);
937 if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save)
938 setFeature(FEATURE_AVX512BW);
939 if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save)
940 setFeature(FEATURE_AVX512VL);
941
942 if (HasLeaf7 && ((ECX >> 0) & 1))
943 setFeature(FEATURE_PREFETCHWT1);
944 if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save)
945 setFeature(FEATURE_AVX512VBMI);
946 if (HasLeaf7 && ((ECX >> 4) & 1))
947 setFeature(FEATURE_PKU);
948 if (HasLeaf7 && ((ECX >> 5) & 1))
949 setFeature(FEATURE_WAITPKG);
950 if (HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save)
951 setFeature(FEATURE_AVX512VBMI2);
952 if (HasLeaf7 && ((ECX >> 7) & 1))
953 setFeature(FEATURE_SHSTK);
954 if (HasLeaf7 && ((ECX >> 8) & 1))
955 setFeature(FEATURE_GFNI);
956 if (HasLeaf7 && ((ECX >> 9) & 1) && HasAVXSave)
957 setFeature(FEATURE_VAES);
958 if (HasLeaf7 && ((ECX >> 10) & 1) && HasAVXSave)
959 setFeature(FEATURE_VPCLMULQDQ);
960 if (HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save)
961 setFeature(FEATURE_AVX512VNNI);
962 if (HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save)
963 setFeature(FEATURE_AVX512BITALG);
964 if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save)
965 setFeature(FEATURE_AVX512VPOPCNTDQ);
966 if (HasLeaf7 && ((ECX >> 22) & 1))
967 setFeature(FEATURE_RDPID);
968 if (HasLeaf7 && ((ECX >> 23) & 1))
969 setFeature(FEATURE_KL);
970 if (HasLeaf7 && ((ECX >> 25) & 1))
971 setFeature(FEATURE_CLDEMOTE);
972 if (HasLeaf7 && ((ECX >> 27) & 1))
973 setFeature(FEATURE_MOVDIRI);
974 if (HasLeaf7 && ((ECX >> 28) & 1))
975 setFeature(FEATURE_MOVDIR64B);
976 if (HasLeaf7 && ((ECX >> 29) & 1))
977 setFeature(FEATURE_ENQCMD);
978
979 if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save)
980 setFeature(FEATURE_AVX5124VNNIW);
981 if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save)
982 setFeature(FEATURE_AVX5124FMAPS);
983 if (HasLeaf7 && ((EDX >> 5) & 1))
984 setFeature(FEATURE_UINTR);
985 if (HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save)
986 setFeature(FEATURE_AVX512VP2INTERSECT);
987 if (HasLeaf7 && ((EDX >> 14) & 1))
988 setFeature(FEATURE_SERIALIZE);
989 if (HasLeaf7 && ((EDX >> 16) & 1))
990 setFeature(FEATURE_TSXLDTRK);
991 if (HasLeaf7 && ((EDX >> 18) & 1))
992 setFeature(FEATURE_PCONFIG);
993 if (HasLeaf7 && ((EDX >> 22) & 1) && HasAMXSave)
994 setFeature(FEATURE_AMX_BF16);
995 if (HasLeaf7 && ((EDX >> 23) & 1) && HasAVX512Save)
996 setFeature(FEATURE_AVX512FP16);
997 if (HasLeaf7 && ((EDX >> 24) & 1) && HasAMXSave)
998 setFeature(FEATURE_AMX_TILE);
999 if (HasLeaf7 && ((EDX >> 25) & 1) && HasAMXSave)
1000 setFeature(FEATURE_AMX_INT8);
1001
1002 // EAX from subleaf 0 is the maximum subleaf supported. Some CPUs don't
1003 // return all 0s for invalid subleaves so check the limit.
1004 bool HasLeaf7Subleaf1 =
1005 HasLeaf7 && EAX >= 1 &&
1006 !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
1007 if (HasLeaf7Subleaf1 && ((EAX >> 0) & 1))
1008 setFeature(FEATURE_SHA512);
1009 if (HasLeaf7Subleaf1 && ((EAX >> 1) & 1))
1010 setFeature(FEATURE_SM3);
1011 if (HasLeaf7Subleaf1 && ((EAX >> 2) & 1))
1012 setFeature(FEATURE_SM4);
1013 if (HasLeaf7Subleaf1 && ((EAX >> 3) & 1))
1014 setFeature(FEATURE_RAOINT);
1015 if (HasLeaf7Subleaf1 && ((EAX >> 4) & 1) && HasAVXSave)
1016 setFeature(FEATURE_AVXVNNI);
1017 if (HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save)
1018 setFeature(FEATURE_AVX512BF16);
1019 if (HasLeaf7Subleaf1 && ((EAX >> 7) & 1))
1020 setFeature(FEATURE_CMPCCXADD);
1021 if (HasLeaf7Subleaf1 && ((EAX >> 21) & 1) && HasAMXSave)
1022 setFeature(FEATURE_AMX_FP16);
1023 if (HasLeaf7Subleaf1 && ((EAX >> 22) & 1))
1024 setFeature(FEATURE_HRESET);
1025 if (HasLeaf7Subleaf1 && ((EAX >> 23) & 1) && HasAVXSave)
1026 setFeature(FEATURE_AVXIFMA);
1027
1028 if (HasLeaf7Subleaf1 && ((EDX >> 4) & 1) && HasAVXSave)
1029 setFeature(FEATURE_AVXVNNIINT8);
1030 if (HasLeaf7Subleaf1 && ((EDX >> 5) & 1) && HasAVXSave)
1031 setFeature(FEATURE_AVXNECONVERT);
1032 if (HasLeaf7Subleaf1 && ((EDX >> 8) & 1) && HasAMXSave)
1033 setFeature(FEATURE_AMX_COMPLEX);
1034 if (HasLeaf7Subleaf1 && ((EDX >> 10) & 1) && HasAVXSave)
1035 setFeature(FEATURE_AVXVNNIINT16);
1036 if (HasLeaf7Subleaf1 && ((EDX >> 14) & 1))
1037 setFeature(FEATURE_PREFETCHI);
1038 if (HasLeaf7Subleaf1 && ((EDX >> 15) & 1))
1039 setFeature(FEATURE_USERMSR);
1040 if (HasLeaf7Subleaf1 && ((EDX >> 19) & 1))
1041 setFeature(FEATURE_AVX10_1_256);
1042 if (HasLeaf7Subleaf1 && ((EDX >> 21) & 1))
1043 setFeature(FEATURE_APXF);
1044
1045 unsigned MaxLevel;
1046 getX86CpuIDAndInfo(0, &MaxLevel, &EBX, &ECX, &EDX);
1047 bool HasLeafD = MaxLevel >= 0xd &&
1048 !getX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX);
1049 if (HasLeafD && ((EAX >> 0) & 1) && HasAVXSave)
1050 setFeature(FEATURE_XSAVEOPT);
1051 if (HasLeafD && ((EAX >> 1) & 1) && HasAVXSave)
1052 setFeature(FEATURE_XSAVEC);
1053 if (HasLeafD && ((EAX >> 3) & 1) && HasAVXSave)
1054 setFeature(FEATURE_XSAVES);
1055
1056 bool HasLeaf24 =
1057 MaxLevel >= 0x24 && !getX86CpuIDAndInfo(0x24, &EAX, &EBX, &ECX, &EDX);
1058 if (HasLeaf7Subleaf1 && ((EDX >> 19) & 1) && HasLeaf24 && ((EBX >> 18) & 1))
1059 setFeature(FEATURE_AVX10_1_512);
1060
1061 unsigned MaxExtLevel;
1062 getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
1063
1064 bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
1065 !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
1066 if (HasExtLeaf1) {
1067 if (ECX & 1)
1068 setFeature(FEATURE_LAHF_LM);
1069 if ((ECX >> 5) & 1)
1070 setFeature(FEATURE_LZCNT);
1071 if (((ECX >> 6) & 1))
1072 setFeature(FEATURE_SSE4_A);
1073 if (((ECX >> 8) & 1))
1074 setFeature(FEATURE_PRFCHW);
1075 if (((ECX >> 11) & 1))
1076 setFeature(FEATURE_XOP);
1077 if (((ECX >> 15) & 1))
1078 setFeature(FEATURE_LWP);
1079 if (((ECX >> 16) & 1))
1080 setFeature(FEATURE_FMA4);
1081 if (((ECX >> 21) & 1))
1082 setFeature(FEATURE_TBM);
1083 if (((ECX >> 29) & 1))
1084 setFeature(FEATURE_MWAITX);
1085
1086 if (((EDX >> 29) & 1))
1087 setFeature(FEATURE_LM);
1088 }
1089
1090 bool HasExtLeaf8 = MaxExtLevel >= 0x80000008 &&
1091 !getX86CpuIDAndInfo(0x80000008, &EAX, &EBX, &ECX, &EDX);
1092 if (HasExtLeaf8 && ((EBX >> 0) & 1))
1093 setFeature(FEATURE_CLZERO);
1094 if (HasExtLeaf8 && ((EBX >> 9) & 1))
1095 setFeature(FEATURE_WBNOINVD);
1096
1097 bool HasLeaf14 = MaxLevel >= 0x14 &&
1098 !getX86CpuIDAndInfoEx(0x14, 0x0, &EAX, &EBX, &ECX, &EDX);
1099 if (HasLeaf14 && ((EBX >> 4) & 1))
1100 setFeature(FEATURE_PTWRITE);
1101
1102 bool HasLeaf19 =
1103 MaxLevel >= 0x19 && !getX86CpuIDAndInfo(0x19, &EAX, &EBX, &ECX, &EDX);
1104 if (HasLeaf7 && HasLeaf19 && ((EBX >> 2) & 1))
1105 setFeature(FEATURE_WIDEKL);
1106
1107 if (hasFeature(FEATURE_LM) && hasFeature(FEATURE_SSE2)) {
1108 setFeature(FEATURE_X86_64_BASELINE);
1109 if (hasFeature(FEATURE_CMPXCHG16B) && hasFeature(FEATURE_POPCNT) &&
1110 hasFeature(FEATURE_LAHF_LM) && hasFeature(FEATURE_SSE4_2)) {
1111 setFeature(FEATURE_X86_64_V2);
1112 if (hasFeature(FEATURE_AVX2) && hasFeature(FEATURE_BMI) &&
1113 hasFeature(FEATURE_BMI2) && hasFeature(FEATURE_F16C) &&
1114 hasFeature(FEATURE_FMA) && hasFeature(FEATURE_LZCNT) &&
1115 hasFeature(FEATURE_MOVBE)) {
1116 setFeature(FEATURE_X86_64_V3);
1117 if (hasFeature(FEATURE_AVX512BW) && hasFeature(FEATURE_AVX512CD) &&
1118 hasFeature(FEATURE_AVX512DQ) && hasFeature(FEATURE_AVX512VL))
1119 setFeature(FEATURE_X86_64_V4);
1120 }
1121 }
1122 }
1123
1124 #undef hasFeature
1125 #undef setFeature
1126 }
1127
1128 #ifndef _WIN32
1129 __attribute__((visibility("hidden")))
1130 #endif
1131 int __cpu_indicator_init(void) CONSTRUCTOR_ATTRIBUTE;
1132
1133 #ifndef _WIN32
1134 __attribute__((visibility("hidden")))
1135 #endif
1136 struct __processor_model {
1137 unsigned int __cpu_vendor;
1138 unsigned int __cpu_type;
1139 unsigned int __cpu_subtype;
1140 unsigned int __cpu_features[1];
1141 } __cpu_model = {0, 0, 0, {0}};
1142
1143 #ifndef _WIN32
1144 __attribute__((visibility("hidden")))
1145 #endif
1146 unsigned __cpu_features2[(CPU_FEATURE_MAX - 1) / 32];
1147
1148 // A constructor function that is sets __cpu_model and __cpu_features2 with
1149 // the right values. This needs to run only once. This constructor is
1150 // given the highest priority and it should run before constructors without
1151 // the priority set. However, it still runs after ifunc initializers and
1152 // needs to be called explicitly there.
1153
__cpu_indicator_init(void)1154 int CONSTRUCTOR_ATTRIBUTE __cpu_indicator_init(void) {
1155 unsigned EAX, EBX, ECX, EDX;
1156 unsigned MaxLeaf = 5;
1157 unsigned Vendor;
1158 unsigned Model, Family;
1159 unsigned Features[(CPU_FEATURE_MAX + 31) / 32] = {0};
1160 static_assert(sizeof(Features) / sizeof(Features[0]) == 4, "");
1161 static_assert(sizeof(__cpu_features2) / sizeof(__cpu_features2[0]) == 3, "");
1162
1163 // This function needs to run just once.
1164 if (__cpu_model.__cpu_vendor)
1165 return 0;
1166
1167 if (!isCpuIdSupported() ||
1168 getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX) || MaxLeaf < 1) {
1169 __cpu_model.__cpu_vendor = VENDOR_OTHER;
1170 return -1;
1171 }
1172
1173 getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX);
1174 detectX86FamilyModel(EAX, &Family, &Model);
1175
1176 // Find available features.
1177 getAvailableFeatures(ECX, EDX, MaxLeaf, &Features[0]);
1178
1179 __cpu_model.__cpu_features[0] = Features[0];
1180 __cpu_features2[0] = Features[1];
1181 __cpu_features2[1] = Features[2];
1182 __cpu_features2[2] = Features[3];
1183
1184 if (Vendor == SIG_INTEL) {
1185 // Get CPU type.
1186 getIntelProcessorTypeAndSubtype(Family, Model, &Features[0],
1187 &(__cpu_model.__cpu_type),
1188 &(__cpu_model.__cpu_subtype));
1189 __cpu_model.__cpu_vendor = VENDOR_INTEL;
1190 } else if (Vendor == SIG_AMD) {
1191 // Get CPU type.
1192 getAMDProcessorTypeAndSubtype(Family, Model, &Features[0],
1193 &(__cpu_model.__cpu_type),
1194 &(__cpu_model.__cpu_subtype));
1195 __cpu_model.__cpu_vendor = VENDOR_AMD;
1196 } else
1197 __cpu_model.__cpu_vendor = VENDOR_OTHER;
1198
1199 assert(__cpu_model.__cpu_vendor < VENDOR_MAX);
1200 assert(__cpu_model.__cpu_type < CPU_TYPE_MAX);
1201 assert(__cpu_model.__cpu_subtype < CPU_SUBTYPE_MAX);
1202
1203 return 0;
1204 }
1205 #endif // defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER)
1206