1 //===-- cpu_model/x86.c - Support for __cpu_model builtin --------*- C -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file is based on LLVM's lib/Support/Host.cpp. 10 // It implements the operating system Host concept and builtin 11 // __cpu_model for the compiler_rt library for x86. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "cpu_model.h" 16 17 #if !(defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || \ 18 defined(_M_X64)) 19 #error This file is intended only for x86-based targets 20 #endif 21 22 #if defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER) 23 24 #include <assert.h> 25 26 #ifdef _MSC_VER 27 #include <intrin.h> 28 #endif 29 30 enum VendorSignatures { 31 SIG_INTEL = 0x756e6547, // Genu 32 SIG_AMD = 0x68747541, // Auth 33 }; 34 35 enum ProcessorVendors { 36 VENDOR_INTEL = 1, 37 VENDOR_AMD, 38 VENDOR_OTHER, 39 VENDOR_MAX 40 }; 41 42 enum ProcessorTypes { 43 INTEL_BONNELL = 1, 44 INTEL_CORE2, 45 INTEL_COREI7, 46 AMDFAM10H, 47 AMDFAM15H, 48 INTEL_SILVERMONT, 49 INTEL_KNL, 50 AMD_BTVER1, 51 AMD_BTVER2, 52 AMDFAM17H, 53 INTEL_KNM, 54 INTEL_GOLDMONT, 55 INTEL_GOLDMONT_PLUS, 56 INTEL_TREMONT, 57 AMDFAM19H, 58 ZHAOXIN_FAM7H, 59 INTEL_SIERRAFOREST, 60 INTEL_GRANDRIDGE, 61 INTEL_CLEARWATERFOREST, 62 CPU_TYPE_MAX 63 }; 64 65 enum ProcessorSubtypes { 66 INTEL_COREI7_NEHALEM = 1, 67 INTEL_COREI7_WESTMERE, 68 INTEL_COREI7_SANDYBRIDGE, 69 AMDFAM10H_BARCELONA, 70 AMDFAM10H_SHANGHAI, 71 AMDFAM10H_ISTANBUL, 72 AMDFAM15H_BDVER1, 73 AMDFAM15H_BDVER2, 74 AMDFAM15H_BDVER3, 75 AMDFAM15H_BDVER4, 76 AMDFAM17H_ZNVER1, 77 INTEL_COREI7_IVYBRIDGE, 78 INTEL_COREI7_HASWELL, 79 INTEL_COREI7_BROADWELL, 80 INTEL_COREI7_SKYLAKE, 81 INTEL_COREI7_SKYLAKE_AVX512, 82 INTEL_COREI7_CANNONLAKE, 83 INTEL_COREI7_ICELAKE_CLIENT, 84 INTEL_COREI7_ICELAKE_SERVER, 85 AMDFAM17H_ZNVER2, 86 INTEL_COREI7_CASCADELAKE, 87 INTEL_COREI7_TIGERLAKE, 88 INTEL_COREI7_COOPERLAKE, 89 INTEL_COREI7_SAPPHIRERAPIDS, 90 INTEL_COREI7_ALDERLAKE, 91 AMDFAM19H_ZNVER3, 92 INTEL_COREI7_ROCKETLAKE, 93 ZHAOXIN_FAM7H_LUJIAZUI, 94 AMDFAM19H_ZNVER4, 95 INTEL_COREI7_GRANITERAPIDS, 96 INTEL_COREI7_GRANITERAPIDS_D, 97 INTEL_COREI7_ARROWLAKE, 98 INTEL_COREI7_ARROWLAKE_S, 99 INTEL_COREI7_PANTHERLAKE, 100 CPU_SUBTYPE_MAX 101 }; 102 103 enum ProcessorFeatures { 104 FEATURE_CMOV = 0, 105 FEATURE_MMX, 106 FEATURE_POPCNT, 107 FEATURE_SSE, 108 FEATURE_SSE2, 109 FEATURE_SSE3, 110 FEATURE_SSSE3, 111 FEATURE_SSE4_1, 112 FEATURE_SSE4_2, 113 FEATURE_AVX, 114 FEATURE_AVX2, 115 FEATURE_SSE4_A, 116 FEATURE_FMA4, 117 FEATURE_XOP, 118 FEATURE_FMA, 119 FEATURE_AVX512F, 120 FEATURE_BMI, 121 FEATURE_BMI2, 122 FEATURE_AES, 123 FEATURE_PCLMUL, 124 FEATURE_AVX512VL, 125 FEATURE_AVX512BW, 126 FEATURE_AVX512DQ, 127 FEATURE_AVX512CD, 128 FEATURE_AVX512ER, 129 FEATURE_AVX512PF, 130 FEATURE_AVX512VBMI, 131 FEATURE_AVX512IFMA, 132 FEATURE_AVX5124VNNIW, 133 FEATURE_AVX5124FMAPS, 134 FEATURE_AVX512VPOPCNTDQ, 135 FEATURE_AVX512VBMI2, 136 FEATURE_GFNI, 137 FEATURE_VPCLMULQDQ, 138 FEATURE_AVX512VNNI, 139 FEATURE_AVX512BITALG, 140 FEATURE_AVX512BF16, 141 FEATURE_AVX512VP2INTERSECT, 142 143 FEATURE_CMPXCHG16B = 46, 144 FEATURE_F16C = 49, 145 FEATURE_LAHF_LM = 54, 146 FEATURE_LM, 147 FEATURE_WP, 148 FEATURE_LZCNT, 149 FEATURE_MOVBE, 150 151 FEATURE_X86_64_BASELINE = 95, 152 FEATURE_X86_64_V2, 153 FEATURE_X86_64_V3, 154 FEATURE_X86_64_V4, 155 CPU_FEATURE_MAX 156 }; 157 158 // The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max). 159 // Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID 160 // support. Consequently, for i386, the presence of CPUID is checked first 161 // via the corresponding eflags bit. 162 static bool isCpuIdSupported(void) { 163 #if defined(__GNUC__) || defined(__clang__) 164 #if defined(__i386__) 165 int __cpuid_supported; 166 __asm__(" pushfl\n" 167 " popl %%eax\n" 168 " movl %%eax,%%ecx\n" 169 " xorl $0x00200000,%%eax\n" 170 " pushl %%eax\n" 171 " popfl\n" 172 " pushfl\n" 173 " popl %%eax\n" 174 " movl $0,%0\n" 175 " cmpl %%eax,%%ecx\n" 176 " je 1f\n" 177 " movl $1,%0\n" 178 "1:" 179 : "=r"(__cpuid_supported) 180 : 181 : "eax", "ecx"); 182 if (!__cpuid_supported) 183 return false; 184 #endif 185 return true; 186 #endif 187 return true; 188 } 189 190 // This code is copied from lib/Support/Host.cpp. 191 // Changes to either file should be mirrored in the other. 192 193 /// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in 194 /// the specified arguments. If we can't run cpuid on the host, return true. 195 static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX, 196 unsigned *rECX, unsigned *rEDX) { 197 #if defined(__GNUC__) || defined(__clang__) 198 #if defined(__x86_64__) 199 // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually. 200 // FIXME: should we save this for Clang? 201 __asm__("movq\t%%rbx, %%rsi\n\t" 202 "cpuid\n\t" 203 "xchgq\t%%rbx, %%rsi\n\t" 204 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) 205 : "a"(value)); 206 return false; 207 #elif defined(__i386__) 208 __asm__("movl\t%%ebx, %%esi\n\t" 209 "cpuid\n\t" 210 "xchgl\t%%ebx, %%esi\n\t" 211 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) 212 : "a"(value)); 213 return false; 214 #else 215 return true; 216 #endif 217 #elif defined(_MSC_VER) 218 // The MSVC intrinsic is portable across x86 and x64. 219 int registers[4]; 220 __cpuid(registers, value); 221 *rEAX = registers[0]; 222 *rEBX = registers[1]; 223 *rECX = registers[2]; 224 *rEDX = registers[3]; 225 return false; 226 #else 227 return true; 228 #endif 229 } 230 231 /// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return 232 /// the 4 values in the specified arguments. If we can't run cpuid on the host, 233 /// return true. 234 static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf, 235 unsigned *rEAX, unsigned *rEBX, unsigned *rECX, 236 unsigned *rEDX) { 237 #if defined(__GNUC__) || defined(__clang__) 238 #if defined(__x86_64__) 239 // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually. 240 // FIXME: should we save this for Clang? 241 __asm__("movq\t%%rbx, %%rsi\n\t" 242 "cpuid\n\t" 243 "xchgq\t%%rbx, %%rsi\n\t" 244 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) 245 : "a"(value), "c"(subleaf)); 246 return false; 247 #elif defined(__i386__) 248 __asm__("movl\t%%ebx, %%esi\n\t" 249 "cpuid\n\t" 250 "xchgl\t%%ebx, %%esi\n\t" 251 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) 252 : "a"(value), "c"(subleaf)); 253 return false; 254 #else 255 return true; 256 #endif 257 #elif defined(_MSC_VER) 258 int registers[4]; 259 __cpuidex(registers, value, subleaf); 260 *rEAX = registers[0]; 261 *rEBX = registers[1]; 262 *rECX = registers[2]; 263 *rEDX = registers[3]; 264 return false; 265 #else 266 return true; 267 #endif 268 } 269 270 // Read control register 0 (XCR0). Used to detect features such as AVX. 271 static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) { 272 #if defined(__GNUC__) || defined(__clang__) 273 // Check xgetbv; this uses a .byte sequence instead of the instruction 274 // directly because older assemblers do not include support for xgetbv and 275 // there is no easy way to conditionally compile based on the assembler used. 276 __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0)); 277 return false; 278 #elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK) 279 unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK); 280 *rEAX = Result; 281 *rEDX = Result >> 32; 282 return false; 283 #else 284 return true; 285 #endif 286 } 287 288 static void detectX86FamilyModel(unsigned EAX, unsigned *Family, 289 unsigned *Model) { 290 *Family = (EAX >> 8) & 0xf; // Bits 8 - 11 291 *Model = (EAX >> 4) & 0xf; // Bits 4 - 7 292 if (*Family == 6 || *Family == 0xf) { 293 if (*Family == 0xf) 294 // Examine extended family ID if family ID is F. 295 *Family += (EAX >> 20) & 0xff; // Bits 20 - 27 296 // Examine extended model ID if family ID is 6 or F. 297 *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19 298 } 299 } 300 301 static const char *getIntelProcessorTypeAndSubtype(unsigned Family, 302 unsigned Model, 303 const unsigned *Features, 304 unsigned *Type, 305 unsigned *Subtype) { 306 #define testFeature(F) (Features[F / 32] & (1 << (F % 32))) != 0 307 308 // We select CPU strings to match the code in Host.cpp, but we don't use them 309 // in compiler-rt. 310 const char *CPU = 0; 311 312 switch (Family) { 313 case 6: 314 switch (Model) { 315 case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile 316 // processor, Intel Core 2 Quad processor, Intel Core 2 Quad 317 // mobile processor, Intel Core 2 Extreme processor, Intel 318 // Pentium Dual-Core processor, Intel Xeon processor, model 319 // 0Fh. All processors are manufactured using the 65 nm process. 320 case 0x16: // Intel Celeron processor model 16h. All processors are 321 // manufactured using the 65 nm process 322 CPU = "core2"; 323 *Type = INTEL_CORE2; 324 break; 325 case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model 326 // 17h. All processors are manufactured using the 45 nm process. 327 // 328 // 45nm: Penryn , Wolfdale, Yorkfield (XE) 329 case 0x1d: // Intel Xeon processor MP. All processors are manufactured using 330 // the 45 nm process. 331 CPU = "penryn"; 332 *Type = INTEL_CORE2; 333 break; 334 case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All 335 // processors are manufactured using the 45 nm process. 336 case 0x1e: // Intel(R) Core(TM) i7 CPU 870 @ 2.93GHz. 337 // As found in a Summer 2010 model iMac. 338 case 0x1f: 339 case 0x2e: // Nehalem EX 340 CPU = "nehalem"; 341 *Type = INTEL_COREI7; 342 *Subtype = INTEL_COREI7_NEHALEM; 343 break; 344 case 0x25: // Intel Core i7, laptop version. 345 case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All 346 // processors are manufactured using the 32 nm process. 347 case 0x2f: // Westmere EX 348 CPU = "westmere"; 349 *Type = INTEL_COREI7; 350 *Subtype = INTEL_COREI7_WESTMERE; 351 break; 352 case 0x2a: // Intel Core i7 processor. All processors are manufactured 353 // using the 32 nm process. 354 case 0x2d: 355 CPU = "sandybridge"; 356 *Type = INTEL_COREI7; 357 *Subtype = INTEL_COREI7_SANDYBRIDGE; 358 break; 359 case 0x3a: 360 case 0x3e: // Ivy Bridge EP 361 CPU = "ivybridge"; 362 *Type = INTEL_COREI7; 363 *Subtype = INTEL_COREI7_IVYBRIDGE; 364 break; 365 366 // Haswell: 367 case 0x3c: 368 case 0x3f: 369 case 0x45: 370 case 0x46: 371 CPU = "haswell"; 372 *Type = INTEL_COREI7; 373 *Subtype = INTEL_COREI7_HASWELL; 374 break; 375 376 // Broadwell: 377 case 0x3d: 378 case 0x47: 379 case 0x4f: 380 case 0x56: 381 CPU = "broadwell"; 382 *Type = INTEL_COREI7; 383 *Subtype = INTEL_COREI7_BROADWELL; 384 break; 385 386 // Skylake: 387 case 0x4e: // Skylake mobile 388 case 0x5e: // Skylake desktop 389 case 0x8e: // Kaby Lake mobile 390 case 0x9e: // Kaby Lake desktop 391 case 0xa5: // Comet Lake-H/S 392 case 0xa6: // Comet Lake-U 393 CPU = "skylake"; 394 *Type = INTEL_COREI7; 395 *Subtype = INTEL_COREI7_SKYLAKE; 396 break; 397 398 // Rocketlake: 399 case 0xa7: 400 CPU = "rocketlake"; 401 *Type = INTEL_COREI7; 402 *Subtype = INTEL_COREI7_ROCKETLAKE; 403 break; 404 405 // Skylake Xeon: 406 case 0x55: 407 *Type = INTEL_COREI7; 408 if (testFeature(FEATURE_AVX512BF16)) { 409 CPU = "cooperlake"; 410 *Subtype = INTEL_COREI7_COOPERLAKE; 411 } else if (testFeature(FEATURE_AVX512VNNI)) { 412 CPU = "cascadelake"; 413 *Subtype = INTEL_COREI7_CASCADELAKE; 414 } else { 415 CPU = "skylake-avx512"; 416 *Subtype = INTEL_COREI7_SKYLAKE_AVX512; 417 } 418 break; 419 420 // Cannonlake: 421 case 0x66: 422 CPU = "cannonlake"; 423 *Type = INTEL_COREI7; 424 *Subtype = INTEL_COREI7_CANNONLAKE; 425 break; 426 427 // Icelake: 428 case 0x7d: 429 case 0x7e: 430 CPU = "icelake-client"; 431 *Type = INTEL_COREI7; 432 *Subtype = INTEL_COREI7_ICELAKE_CLIENT; 433 break; 434 435 // Tigerlake: 436 case 0x8c: 437 case 0x8d: 438 CPU = "tigerlake"; 439 *Type = INTEL_COREI7; 440 *Subtype = INTEL_COREI7_TIGERLAKE; 441 break; 442 443 // Alderlake: 444 case 0x97: 445 case 0x9a: 446 // Raptorlake: 447 case 0xb7: 448 case 0xba: 449 case 0xbf: 450 // Meteorlake: 451 case 0xaa: 452 case 0xac: 453 // Gracemont: 454 case 0xbe: 455 CPU = "alderlake"; 456 *Type = INTEL_COREI7; 457 *Subtype = INTEL_COREI7_ALDERLAKE; 458 break; 459 460 // Arrowlake: 461 case 0xc5: 462 CPU = "arrowlake"; 463 *Type = INTEL_COREI7; 464 *Subtype = INTEL_COREI7_ARROWLAKE; 465 break; 466 467 // Arrowlake S: 468 case 0xc6: 469 // Lunarlake: 470 case 0xbd: 471 CPU = "arrowlake-s"; 472 *Type = INTEL_COREI7; 473 *Subtype = INTEL_COREI7_ARROWLAKE_S; 474 break; 475 476 // Pantherlake: 477 case 0xcc: 478 CPU = "pantherlake"; 479 *Type = INTEL_COREI7; 480 *Subtype = INTEL_COREI7_PANTHERLAKE; 481 break; 482 483 // Icelake Xeon: 484 case 0x6a: 485 case 0x6c: 486 CPU = "icelake-server"; 487 *Type = INTEL_COREI7; 488 *Subtype = INTEL_COREI7_ICELAKE_SERVER; 489 break; 490 491 // Emerald Rapids: 492 case 0xcf: 493 // Sapphire Rapids: 494 case 0x8f: 495 CPU = "sapphirerapids"; 496 *Type = INTEL_COREI7; 497 *Subtype = INTEL_COREI7_SAPPHIRERAPIDS; 498 break; 499 500 // Granite Rapids: 501 case 0xad: 502 CPU = "graniterapids"; 503 *Type = INTEL_COREI7; 504 *Subtype = INTEL_COREI7_GRANITERAPIDS; 505 break; 506 507 // Granite Rapids D: 508 case 0xae: 509 CPU = "graniterapids-d"; 510 *Type = INTEL_COREI7; 511 *Subtype = INTEL_COREI7_GRANITERAPIDS_D; 512 break; 513 514 case 0x1c: // Most 45 nm Intel Atom processors 515 case 0x26: // 45 nm Atom Lincroft 516 case 0x27: // 32 nm Atom Medfield 517 case 0x35: // 32 nm Atom Midview 518 case 0x36: // 32 nm Atom Midview 519 CPU = "bonnell"; 520 *Type = INTEL_BONNELL; 521 break; 522 523 // Atom Silvermont codes from the Intel software optimization guide. 524 case 0x37: 525 case 0x4a: 526 case 0x4d: 527 case 0x5a: 528 case 0x5d: 529 case 0x4c: // really airmont 530 CPU = "silvermont"; 531 *Type = INTEL_SILVERMONT; 532 break; 533 // Goldmont: 534 case 0x5c: // Apollo Lake 535 case 0x5f: // Denverton 536 CPU = "goldmont"; 537 *Type = INTEL_GOLDMONT; 538 break; // "goldmont" 539 case 0x7a: 540 CPU = "goldmont-plus"; 541 *Type = INTEL_GOLDMONT_PLUS; 542 break; 543 case 0x86: 544 case 0x8a: // Lakefield 545 case 0x96: // Elkhart Lake 546 case 0x9c: // Jasper Lake 547 CPU = "tremont"; 548 *Type = INTEL_TREMONT; 549 break; 550 551 // Sierraforest: 552 case 0xaf: 553 CPU = "sierraforest"; 554 *Type = INTEL_SIERRAFOREST; 555 break; 556 557 // Grandridge: 558 case 0xb6: 559 CPU = "grandridge"; 560 *Type = INTEL_GRANDRIDGE; 561 break; 562 563 // Clearwaterforest: 564 case 0xdd: 565 CPU = "clearwaterforest"; 566 *Type = INTEL_COREI7; 567 *Subtype = INTEL_CLEARWATERFOREST; 568 break; 569 570 case 0x57: 571 CPU = "knl"; 572 *Type = INTEL_KNL; 573 break; 574 575 case 0x85: 576 CPU = "knm"; 577 *Type = INTEL_KNM; 578 break; 579 580 default: // Unknown family 6 CPU. 581 break; 582 } 583 break; 584 default: 585 break; // Unknown. 586 } 587 588 return CPU; 589 } 590 591 static const char *getAMDProcessorTypeAndSubtype(unsigned Family, 592 unsigned Model, 593 const unsigned *Features, 594 unsigned *Type, 595 unsigned *Subtype) { 596 // We select CPU strings to match the code in Host.cpp, but we don't use them 597 // in compiler-rt. 598 const char *CPU = 0; 599 600 switch (Family) { 601 case 16: 602 CPU = "amdfam10"; 603 *Type = AMDFAM10H; 604 switch (Model) { 605 case 2: 606 *Subtype = AMDFAM10H_BARCELONA; 607 break; 608 case 4: 609 *Subtype = AMDFAM10H_SHANGHAI; 610 break; 611 case 8: 612 *Subtype = AMDFAM10H_ISTANBUL; 613 break; 614 } 615 break; 616 case 20: 617 CPU = "btver1"; 618 *Type = AMD_BTVER1; 619 break; 620 case 21: 621 CPU = "bdver1"; 622 *Type = AMDFAM15H; 623 if (Model >= 0x60 && Model <= 0x7f) { 624 CPU = "bdver4"; 625 *Subtype = AMDFAM15H_BDVER4; 626 break; // 60h-7Fh: Excavator 627 } 628 if (Model >= 0x30 && Model <= 0x3f) { 629 CPU = "bdver3"; 630 *Subtype = AMDFAM15H_BDVER3; 631 break; // 30h-3Fh: Steamroller 632 } 633 if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) { 634 CPU = "bdver2"; 635 *Subtype = AMDFAM15H_BDVER2; 636 break; // 02h, 10h-1Fh: Piledriver 637 } 638 if (Model <= 0x0f) { 639 *Subtype = AMDFAM15H_BDVER1; 640 break; // 00h-0Fh: Bulldozer 641 } 642 break; 643 case 22: 644 CPU = "btver2"; 645 *Type = AMD_BTVER2; 646 break; 647 case 23: 648 CPU = "znver1"; 649 *Type = AMDFAM17H; 650 if ((Model >= 0x30 && Model <= 0x3f) || (Model == 0x47) || 651 (Model >= 0x60 && Model <= 0x67) || (Model >= 0x68 && Model <= 0x6f) || 652 (Model >= 0x70 && Model <= 0x7f) || (Model >= 0x84 && Model <= 0x87) || 653 (Model >= 0x90 && Model <= 0x97) || (Model >= 0x98 && Model <= 0x9f) || 654 (Model >= 0xa0 && Model <= 0xaf)) { 655 // Family 17h Models 30h-3Fh (Starship) Zen 2 656 // Family 17h Models 47h (Cardinal) Zen 2 657 // Family 17h Models 60h-67h (Renoir) Zen 2 658 // Family 17h Models 68h-6Fh (Lucienne) Zen 2 659 // Family 17h Models 70h-7Fh (Matisse) Zen 2 660 // Family 17h Models 84h-87h (ProjectX) Zen 2 661 // Family 17h Models 90h-97h (VanGogh) Zen 2 662 // Family 17h Models 98h-9Fh (Mero) Zen 2 663 // Family 17h Models A0h-AFh (Mendocino) Zen 2 664 CPU = "znver2"; 665 *Subtype = AMDFAM17H_ZNVER2; 666 break; 667 } 668 if ((Model >= 0x10 && Model <= 0x1f) || (Model >= 0x20 && Model <= 0x2f)) { 669 // Family 17h Models 10h-1Fh (Raven1) Zen 670 // Family 17h Models 10h-1Fh (Picasso) Zen+ 671 // Family 17h Models 20h-2Fh (Raven2 x86) Zen 672 *Subtype = AMDFAM17H_ZNVER1; 673 break; 674 } 675 break; 676 case 25: 677 CPU = "znver3"; 678 *Type = AMDFAM19H; 679 if ((Model <= 0x0f) || (Model >= 0x20 && Model <= 0x2f) || 680 (Model >= 0x30 && Model <= 0x3f) || (Model >= 0x40 && Model <= 0x4f) || 681 (Model >= 0x50 && Model <= 0x5f)) { 682 // Family 19h Models 00h-0Fh (Genesis, Chagall) Zen 3 683 // Family 19h Models 20h-2Fh (Vermeer) Zen 3 684 // Family 19h Models 30h-3Fh (Badami) Zen 3 685 // Family 19h Models 40h-4Fh (Rembrandt) Zen 3+ 686 // Family 19h Models 50h-5Fh (Cezanne) Zen 3 687 *Subtype = AMDFAM19H_ZNVER3; 688 break; 689 } 690 if ((Model >= 0x10 && Model <= 0x1f) || (Model >= 0x60 && Model <= 0x6f) || 691 (Model >= 0x70 && Model <= 0x77) || (Model >= 0x78 && Model <= 0x7f) || 692 (Model >= 0xa0 && Model <= 0xaf)) { 693 // Family 19h Models 10h-1Fh (Stones; Storm Peak) Zen 4 694 // Family 19h Models 60h-6Fh (Raphael) Zen 4 695 // Family 19h Models 70h-77h (Phoenix, Hawkpoint1) Zen 4 696 // Family 19h Models 78h-7Fh (Phoenix 2, Hawkpoint2) Zen 4 697 // Family 19h Models A0h-AFh (Stones-Dense) Zen 4 698 CPU = "znver4"; 699 *Subtype = AMDFAM19H_ZNVER4; 700 break; // "znver4" 701 } 702 break; // family 19h 703 default: 704 break; // Unknown AMD CPU. 705 } 706 707 return CPU; 708 } 709 710 static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf, 711 unsigned *Features) { 712 unsigned EAX = 0, EBX = 0; 713 714 #define hasFeature(F) ((Features[F / 32] >> (F % 32)) & 1) 715 #define setFeature(F) Features[F / 32] |= 1U << (F % 32) 716 717 if ((EDX >> 15) & 1) 718 setFeature(FEATURE_CMOV); 719 if ((EDX >> 23) & 1) 720 setFeature(FEATURE_MMX); 721 if ((EDX >> 25) & 1) 722 setFeature(FEATURE_SSE); 723 if ((EDX >> 26) & 1) 724 setFeature(FEATURE_SSE2); 725 726 if ((ECX >> 0) & 1) 727 setFeature(FEATURE_SSE3); 728 if ((ECX >> 1) & 1) 729 setFeature(FEATURE_PCLMUL); 730 if ((ECX >> 9) & 1) 731 setFeature(FEATURE_SSSE3); 732 if ((ECX >> 12) & 1) 733 setFeature(FEATURE_FMA); 734 if ((ECX >> 13) & 1) 735 setFeature(FEATURE_CMPXCHG16B); 736 if ((ECX >> 19) & 1) 737 setFeature(FEATURE_SSE4_1); 738 if ((ECX >> 20) & 1) 739 setFeature(FEATURE_SSE4_2); 740 if ((ECX >> 22) & 1) 741 setFeature(FEATURE_MOVBE); 742 if ((ECX >> 23) & 1) 743 setFeature(FEATURE_POPCNT); 744 if ((ECX >> 25) & 1) 745 setFeature(FEATURE_AES); 746 if ((ECX >> 29) & 1) 747 setFeature(FEATURE_F16C); 748 749 // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV 750 // indicates that the AVX registers will be saved and restored on context 751 // switch, then we have full AVX support. 752 const unsigned AVXBits = (1 << 27) | (1 << 28); 753 bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) && 754 ((EAX & 0x6) == 0x6); 755 #if defined(__APPLE__) 756 // Darwin lazily saves the AVX512 context on first use: trust that the OS will 757 // save the AVX512 context if we use AVX512 instructions, even the bit is not 758 // set right now. 759 bool HasAVX512Save = true; 760 #else 761 // AVX512 requires additional context to be saved by the OS. 762 bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0); 763 #endif 764 765 if (HasAVX) 766 setFeature(FEATURE_AVX); 767 768 bool HasLeaf7 = 769 MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX); 770 771 if (HasLeaf7 && ((EBX >> 3) & 1)) 772 setFeature(FEATURE_BMI); 773 if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX) 774 setFeature(FEATURE_AVX2); 775 if (HasLeaf7 && ((EBX >> 8) & 1)) 776 setFeature(FEATURE_BMI2); 777 if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save) 778 setFeature(FEATURE_AVX512F); 779 if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save) 780 setFeature(FEATURE_AVX512DQ); 781 if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save) 782 setFeature(FEATURE_AVX512IFMA); 783 if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save) 784 setFeature(FEATURE_AVX512PF); 785 if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save) 786 setFeature(FEATURE_AVX512ER); 787 if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save) 788 setFeature(FEATURE_AVX512CD); 789 if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save) 790 setFeature(FEATURE_AVX512BW); 791 if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save) 792 setFeature(FEATURE_AVX512VL); 793 794 if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save) 795 setFeature(FEATURE_AVX512VBMI); 796 if (HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save) 797 setFeature(FEATURE_AVX512VBMI2); 798 if (HasLeaf7 && ((ECX >> 8) & 1)) 799 setFeature(FEATURE_GFNI); 800 if (HasLeaf7 && ((ECX >> 10) & 1) && HasAVX) 801 setFeature(FEATURE_VPCLMULQDQ); 802 if (HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save) 803 setFeature(FEATURE_AVX512VNNI); 804 if (HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save) 805 setFeature(FEATURE_AVX512BITALG); 806 if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save) 807 setFeature(FEATURE_AVX512VPOPCNTDQ); 808 809 if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save) 810 setFeature(FEATURE_AVX5124VNNIW); 811 if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save) 812 setFeature(FEATURE_AVX5124FMAPS); 813 if (HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save) 814 setFeature(FEATURE_AVX512VP2INTERSECT); 815 816 // EAX from subleaf 0 is the maximum subleaf supported. Some CPUs don't 817 // return all 0s for invalid subleaves so check the limit. 818 bool HasLeaf7Subleaf1 = 819 HasLeaf7 && EAX >= 1 && 820 !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX); 821 if (HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save) 822 setFeature(FEATURE_AVX512BF16); 823 824 unsigned MaxExtLevel; 825 getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX); 826 827 bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 && 828 !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); 829 if (HasExtLeaf1) { 830 if (ECX & 1) 831 setFeature(FEATURE_LAHF_LM); 832 if ((ECX >> 5) & 1) 833 setFeature(FEATURE_LZCNT); 834 if (((ECX >> 6) & 1)) 835 setFeature(FEATURE_SSE4_A); 836 if (((ECX >> 11) & 1)) 837 setFeature(FEATURE_XOP); 838 if (((ECX >> 16) & 1)) 839 setFeature(FEATURE_FMA4); 840 if (((EDX >> 29) & 1)) 841 setFeature(FEATURE_LM); 842 } 843 844 if (hasFeature(FEATURE_LM) && hasFeature(FEATURE_SSE2)) { 845 setFeature(FEATURE_X86_64_BASELINE); 846 if (hasFeature(FEATURE_CMPXCHG16B) && hasFeature(FEATURE_POPCNT) && 847 hasFeature(FEATURE_LAHF_LM) && hasFeature(FEATURE_SSE4_2)) { 848 setFeature(FEATURE_X86_64_V2); 849 if (hasFeature(FEATURE_AVX2) && hasFeature(FEATURE_BMI) && 850 hasFeature(FEATURE_BMI2) && hasFeature(FEATURE_F16C) && 851 hasFeature(FEATURE_FMA) && hasFeature(FEATURE_LZCNT) && 852 hasFeature(FEATURE_MOVBE)) { 853 setFeature(FEATURE_X86_64_V3); 854 if (hasFeature(FEATURE_AVX512BW) && hasFeature(FEATURE_AVX512CD) && 855 hasFeature(FEATURE_AVX512DQ) && hasFeature(FEATURE_AVX512VL)) 856 setFeature(FEATURE_X86_64_V4); 857 } 858 } 859 } 860 861 #undef hasFeature 862 #undef setFeature 863 } 864 865 #ifndef _WIN32 866 __attribute__((visibility("hidden"))) 867 #endif 868 int __cpu_indicator_init(void) CONSTRUCTOR_ATTRIBUTE; 869 870 #ifndef _WIN32 871 __attribute__((visibility("hidden"))) 872 #endif 873 struct __processor_model { 874 unsigned int __cpu_vendor; 875 unsigned int __cpu_type; 876 unsigned int __cpu_subtype; 877 unsigned int __cpu_features[1]; 878 } __cpu_model = {0, 0, 0, {0}}; 879 880 #ifndef _WIN32 881 __attribute__((visibility("hidden"))) 882 #endif 883 unsigned __cpu_features2[(CPU_FEATURE_MAX - 1) / 32]; 884 885 // A constructor function that is sets __cpu_model and __cpu_features2 with 886 // the right values. This needs to run only once. This constructor is 887 // given the highest priority and it should run before constructors without 888 // the priority set. However, it still runs after ifunc initializers and 889 // needs to be called explicitly there. 890 891 int CONSTRUCTOR_ATTRIBUTE __cpu_indicator_init(void) { 892 unsigned EAX, EBX, ECX, EDX; 893 unsigned MaxLeaf = 5; 894 unsigned Vendor; 895 unsigned Model, Family; 896 unsigned Features[(CPU_FEATURE_MAX + 31) / 32] = {0}; 897 static_assert(sizeof(Features) / sizeof(Features[0]) == 4, ""); 898 static_assert(sizeof(__cpu_features2) / sizeof(__cpu_features2[0]) == 3, ""); 899 900 // This function needs to run just once. 901 if (__cpu_model.__cpu_vendor) 902 return 0; 903 904 if (!isCpuIdSupported() || 905 getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX) || MaxLeaf < 1) { 906 __cpu_model.__cpu_vendor = VENDOR_OTHER; 907 return -1; 908 } 909 910 getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX); 911 detectX86FamilyModel(EAX, &Family, &Model); 912 913 // Find available features. 914 getAvailableFeatures(ECX, EDX, MaxLeaf, &Features[0]); 915 916 __cpu_model.__cpu_features[0] = Features[0]; 917 __cpu_features2[0] = Features[1]; 918 __cpu_features2[1] = Features[2]; 919 __cpu_features2[2] = Features[3]; 920 921 if (Vendor == SIG_INTEL) { 922 // Get CPU type. 923 getIntelProcessorTypeAndSubtype(Family, Model, &Features[0], 924 &(__cpu_model.__cpu_type), 925 &(__cpu_model.__cpu_subtype)); 926 __cpu_model.__cpu_vendor = VENDOR_INTEL; 927 } else if (Vendor == SIG_AMD) { 928 // Get CPU type. 929 getAMDProcessorTypeAndSubtype(Family, Model, &Features[0], 930 &(__cpu_model.__cpu_type), 931 &(__cpu_model.__cpu_subtype)); 932 __cpu_model.__cpu_vendor = VENDOR_AMD; 933 } else 934 __cpu_model.__cpu_vendor = VENDOR_OTHER; 935 936 assert(__cpu_model.__cpu_vendor < VENDOR_MAX); 937 assert(__cpu_model.__cpu_type < CPU_TYPE_MAX); 938 assert(__cpu_model.__cpu_subtype < CPU_SUBTYPE_MAX); 939 940 return 0; 941 } 942 #endif // defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER) 943