1 //===-- cpu_model/x86.c - Support for __cpu_model builtin --------*- C -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file is based on LLVM's lib/Support/Host.cpp. 10 // It implements the operating system Host concept and builtin 11 // __cpu_model for the compiler_rt library for x86. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "cpu_model.h" 16 17 #if !(defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || \ 18 defined(_M_X64)) 19 #error This file is intended only for x86-based targets 20 #endif 21 22 #if defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER) 23 24 #include <assert.h> 25 26 #ifdef _MSC_VER 27 #include <intrin.h> 28 #endif 29 30 enum VendorSignatures { 31 SIG_INTEL = 0x756e6547, // Genu 32 SIG_AMD = 0x68747541, // Auth 33 }; 34 35 enum ProcessorVendors { 36 VENDOR_INTEL = 1, 37 VENDOR_AMD, 38 VENDOR_OTHER, 39 VENDOR_MAX 40 }; 41 42 enum ProcessorTypes { 43 INTEL_BONNELL = 1, 44 INTEL_CORE2, 45 INTEL_COREI7, 46 AMDFAM10H, 47 AMDFAM15H, 48 INTEL_SILVERMONT, 49 INTEL_KNL, 50 AMD_BTVER1, 51 AMD_BTVER2, 52 AMDFAM17H, 53 INTEL_KNM, 54 INTEL_GOLDMONT, 55 INTEL_GOLDMONT_PLUS, 56 INTEL_TREMONT, 57 AMDFAM19H, 58 ZHAOXIN_FAM7H, 59 INTEL_SIERRAFOREST, 60 INTEL_GRANDRIDGE, 61 INTEL_CLEARWATERFOREST, 62 CPU_TYPE_MAX 63 }; 64 65 enum ProcessorSubtypes { 66 INTEL_COREI7_NEHALEM = 1, 67 INTEL_COREI7_WESTMERE, 68 INTEL_COREI7_SANDYBRIDGE, 69 AMDFAM10H_BARCELONA, 70 AMDFAM10H_SHANGHAI, 71 AMDFAM10H_ISTANBUL, 72 AMDFAM15H_BDVER1, 73 AMDFAM15H_BDVER2, 74 AMDFAM15H_BDVER3, 75 AMDFAM15H_BDVER4, 76 AMDFAM17H_ZNVER1, 77 INTEL_COREI7_IVYBRIDGE, 78 INTEL_COREI7_HASWELL, 79 INTEL_COREI7_BROADWELL, 80 INTEL_COREI7_SKYLAKE, 81 INTEL_COREI7_SKYLAKE_AVX512, 82 INTEL_COREI7_CANNONLAKE, 83 INTEL_COREI7_ICELAKE_CLIENT, 84 INTEL_COREI7_ICELAKE_SERVER, 85 AMDFAM17H_ZNVER2, 86 INTEL_COREI7_CASCADELAKE, 87 INTEL_COREI7_TIGERLAKE, 88 INTEL_COREI7_COOPERLAKE, 89 INTEL_COREI7_SAPPHIRERAPIDS, 90 INTEL_COREI7_ALDERLAKE, 91 AMDFAM19H_ZNVER3, 92 INTEL_COREI7_ROCKETLAKE, 93 ZHAOXIN_FAM7H_LUJIAZUI, 94 AMDFAM19H_ZNVER4, 95 INTEL_COREI7_GRANITERAPIDS, 96 INTEL_COREI7_GRANITERAPIDS_D, 97 INTEL_COREI7_ARROWLAKE, 98 INTEL_COREI7_ARROWLAKE_S, 99 INTEL_COREI7_PANTHERLAKE, 100 CPU_SUBTYPE_MAX 101 }; 102 103 enum ProcessorFeatures { 104 FEATURE_CMOV = 0, 105 FEATURE_MMX, 106 FEATURE_POPCNT, 107 FEATURE_SSE, 108 FEATURE_SSE2, 109 FEATURE_SSE3, 110 FEATURE_SSSE3, 111 FEATURE_SSE4_1, 112 FEATURE_SSE4_2, 113 FEATURE_AVX, 114 FEATURE_AVX2, 115 FEATURE_SSE4_A, 116 FEATURE_FMA4, 117 FEATURE_XOP, 118 FEATURE_FMA, 119 FEATURE_AVX512F, 120 FEATURE_BMI, 121 FEATURE_BMI2, 122 FEATURE_AES, 123 FEATURE_PCLMUL, 124 FEATURE_AVX512VL, 125 FEATURE_AVX512BW, 126 FEATURE_AVX512DQ, 127 FEATURE_AVX512CD, 128 FEATURE_AVX512ER, 129 FEATURE_AVX512PF, 130 FEATURE_AVX512VBMI, 131 FEATURE_AVX512IFMA, 132 FEATURE_AVX5124VNNIW, 133 FEATURE_AVX5124FMAPS, 134 FEATURE_AVX512VPOPCNTDQ, 135 FEATURE_AVX512VBMI2, 136 FEATURE_GFNI, 137 FEATURE_VPCLMULQDQ, 138 FEATURE_AVX512VNNI, 139 FEATURE_AVX512BITALG, 140 FEATURE_AVX512BF16, 141 FEATURE_AVX512VP2INTERSECT, 142 143 FEATURE_CMPXCHG16B = 46, 144 FEATURE_F16C = 49, 145 FEATURE_LAHF_LM = 54, 146 FEATURE_LM, 147 FEATURE_WP, 148 FEATURE_LZCNT, 149 FEATURE_MOVBE, 150 151 FEATURE_AVX512FP16 = 94, 152 FEATURE_X86_64_BASELINE, 153 FEATURE_X86_64_V2, 154 FEATURE_X86_64_V3, 155 FEATURE_X86_64_V4, 156 CPU_FEATURE_MAX 157 }; 158 159 // The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max). 160 // Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID 161 // support. Consequently, for i386, the presence of CPUID is checked first 162 // via the corresponding eflags bit. 163 static bool isCpuIdSupported(void) { 164 #if defined(__GNUC__) || defined(__clang__) 165 #if defined(__i386__) 166 int __cpuid_supported; 167 __asm__(" pushfl\n" 168 " popl %%eax\n" 169 " movl %%eax,%%ecx\n" 170 " xorl $0x00200000,%%eax\n" 171 " pushl %%eax\n" 172 " popfl\n" 173 " pushfl\n" 174 " popl %%eax\n" 175 " movl $0,%0\n" 176 " cmpl %%eax,%%ecx\n" 177 " je 1f\n" 178 " movl $1,%0\n" 179 "1:" 180 : "=r"(__cpuid_supported) 181 : 182 : "eax", "ecx"); 183 if (!__cpuid_supported) 184 return false; 185 #endif 186 return true; 187 #endif 188 return true; 189 } 190 191 // This code is copied from lib/Support/Host.cpp. 192 // Changes to either file should be mirrored in the other. 193 194 /// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in 195 /// the specified arguments. If we can't run cpuid on the host, return true. 196 static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX, 197 unsigned *rECX, unsigned *rEDX) { 198 #if defined(__GNUC__) || defined(__clang__) 199 #if defined(__x86_64__) 200 // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually. 201 // FIXME: should we save this for Clang? 202 __asm__("movq\t%%rbx, %%rsi\n\t" 203 "cpuid\n\t" 204 "xchgq\t%%rbx, %%rsi\n\t" 205 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) 206 : "a"(value)); 207 return false; 208 #elif defined(__i386__) 209 __asm__("movl\t%%ebx, %%esi\n\t" 210 "cpuid\n\t" 211 "xchgl\t%%ebx, %%esi\n\t" 212 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) 213 : "a"(value)); 214 return false; 215 #else 216 return true; 217 #endif 218 #elif defined(_MSC_VER) 219 // The MSVC intrinsic is portable across x86 and x64. 220 int registers[4]; 221 __cpuid(registers, value); 222 *rEAX = registers[0]; 223 *rEBX = registers[1]; 224 *rECX = registers[2]; 225 *rEDX = registers[3]; 226 return false; 227 #else 228 return true; 229 #endif 230 } 231 232 /// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return 233 /// the 4 values in the specified arguments. If we can't run cpuid on the host, 234 /// return true. 235 static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf, 236 unsigned *rEAX, unsigned *rEBX, unsigned *rECX, 237 unsigned *rEDX) { 238 #if defined(__GNUC__) || defined(__clang__) 239 #if defined(__x86_64__) 240 // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually. 241 // FIXME: should we save this for Clang? 242 __asm__("movq\t%%rbx, %%rsi\n\t" 243 "cpuid\n\t" 244 "xchgq\t%%rbx, %%rsi\n\t" 245 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) 246 : "a"(value), "c"(subleaf)); 247 return false; 248 #elif defined(__i386__) 249 __asm__("movl\t%%ebx, %%esi\n\t" 250 "cpuid\n\t" 251 "xchgl\t%%ebx, %%esi\n\t" 252 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) 253 : "a"(value), "c"(subleaf)); 254 return false; 255 #else 256 return true; 257 #endif 258 #elif defined(_MSC_VER) 259 int registers[4]; 260 __cpuidex(registers, value, subleaf); 261 *rEAX = registers[0]; 262 *rEBX = registers[1]; 263 *rECX = registers[2]; 264 *rEDX = registers[3]; 265 return false; 266 #else 267 return true; 268 #endif 269 } 270 271 // Read control register 0 (XCR0). Used to detect features such as AVX. 272 static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) { 273 #if defined(__GNUC__) || defined(__clang__) 274 // Check xgetbv; this uses a .byte sequence instead of the instruction 275 // directly because older assemblers do not include support for xgetbv and 276 // there is no easy way to conditionally compile based on the assembler used. 277 __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0)); 278 return false; 279 #elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK) 280 unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK); 281 *rEAX = Result; 282 *rEDX = Result >> 32; 283 return false; 284 #else 285 return true; 286 #endif 287 } 288 289 static void detectX86FamilyModel(unsigned EAX, unsigned *Family, 290 unsigned *Model) { 291 *Family = (EAX >> 8) & 0xf; // Bits 8 - 11 292 *Model = (EAX >> 4) & 0xf; // Bits 4 - 7 293 if (*Family == 6 || *Family == 0xf) { 294 if (*Family == 0xf) 295 // Examine extended family ID if family ID is F. 296 *Family += (EAX >> 20) & 0xff; // Bits 20 - 27 297 // Examine extended model ID if family ID is 6 or F. 298 *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19 299 } 300 } 301 302 static const char *getIntelProcessorTypeAndSubtype(unsigned Family, 303 unsigned Model, 304 const unsigned *Features, 305 unsigned *Type, 306 unsigned *Subtype) { 307 #define testFeature(F) (Features[F / 32] & (1 << (F % 32))) != 0 308 309 // We select CPU strings to match the code in Host.cpp, but we don't use them 310 // in compiler-rt. 311 const char *CPU = 0; 312 313 switch (Family) { 314 case 6: 315 switch (Model) { 316 case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile 317 // processor, Intel Core 2 Quad processor, Intel Core 2 Quad 318 // mobile processor, Intel Core 2 Extreme processor, Intel 319 // Pentium Dual-Core processor, Intel Xeon processor, model 320 // 0Fh. All processors are manufactured using the 65 nm process. 321 case 0x16: // Intel Celeron processor model 16h. All processors are 322 // manufactured using the 65 nm process 323 CPU = "core2"; 324 *Type = INTEL_CORE2; 325 break; 326 case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model 327 // 17h. All processors are manufactured using the 45 nm process. 328 // 329 // 45nm: Penryn , Wolfdale, Yorkfield (XE) 330 case 0x1d: // Intel Xeon processor MP. All processors are manufactured using 331 // the 45 nm process. 332 CPU = "penryn"; 333 *Type = INTEL_CORE2; 334 break; 335 case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All 336 // processors are manufactured using the 45 nm process. 337 case 0x1e: // Intel(R) Core(TM) i7 CPU 870 @ 2.93GHz. 338 // As found in a Summer 2010 model iMac. 339 case 0x1f: 340 case 0x2e: // Nehalem EX 341 CPU = "nehalem"; 342 *Type = INTEL_COREI7; 343 *Subtype = INTEL_COREI7_NEHALEM; 344 break; 345 case 0x25: // Intel Core i7, laptop version. 346 case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All 347 // processors are manufactured using the 32 nm process. 348 case 0x2f: // Westmere EX 349 CPU = "westmere"; 350 *Type = INTEL_COREI7; 351 *Subtype = INTEL_COREI7_WESTMERE; 352 break; 353 case 0x2a: // Intel Core i7 processor. All processors are manufactured 354 // using the 32 nm process. 355 case 0x2d: 356 CPU = "sandybridge"; 357 *Type = INTEL_COREI7; 358 *Subtype = INTEL_COREI7_SANDYBRIDGE; 359 break; 360 case 0x3a: 361 case 0x3e: // Ivy Bridge EP 362 CPU = "ivybridge"; 363 *Type = INTEL_COREI7; 364 *Subtype = INTEL_COREI7_IVYBRIDGE; 365 break; 366 367 // Haswell: 368 case 0x3c: 369 case 0x3f: 370 case 0x45: 371 case 0x46: 372 CPU = "haswell"; 373 *Type = INTEL_COREI7; 374 *Subtype = INTEL_COREI7_HASWELL; 375 break; 376 377 // Broadwell: 378 case 0x3d: 379 case 0x47: 380 case 0x4f: 381 case 0x56: 382 CPU = "broadwell"; 383 *Type = INTEL_COREI7; 384 *Subtype = INTEL_COREI7_BROADWELL; 385 break; 386 387 // Skylake: 388 case 0x4e: // Skylake mobile 389 case 0x5e: // Skylake desktop 390 case 0x8e: // Kaby Lake mobile 391 case 0x9e: // Kaby Lake desktop 392 case 0xa5: // Comet Lake-H/S 393 case 0xa6: // Comet Lake-U 394 CPU = "skylake"; 395 *Type = INTEL_COREI7; 396 *Subtype = INTEL_COREI7_SKYLAKE; 397 break; 398 399 // Rocketlake: 400 case 0xa7: 401 CPU = "rocketlake"; 402 *Type = INTEL_COREI7; 403 *Subtype = INTEL_COREI7_ROCKETLAKE; 404 break; 405 406 // Skylake Xeon: 407 case 0x55: 408 *Type = INTEL_COREI7; 409 if (testFeature(FEATURE_AVX512BF16)) { 410 CPU = "cooperlake"; 411 *Subtype = INTEL_COREI7_COOPERLAKE; 412 } else if (testFeature(FEATURE_AVX512VNNI)) { 413 CPU = "cascadelake"; 414 *Subtype = INTEL_COREI7_CASCADELAKE; 415 } else { 416 CPU = "skylake-avx512"; 417 *Subtype = INTEL_COREI7_SKYLAKE_AVX512; 418 } 419 break; 420 421 // Cannonlake: 422 case 0x66: 423 CPU = "cannonlake"; 424 *Type = INTEL_COREI7; 425 *Subtype = INTEL_COREI7_CANNONLAKE; 426 break; 427 428 // Icelake: 429 case 0x7d: 430 case 0x7e: 431 CPU = "icelake-client"; 432 *Type = INTEL_COREI7; 433 *Subtype = INTEL_COREI7_ICELAKE_CLIENT; 434 break; 435 436 // Tigerlake: 437 case 0x8c: 438 case 0x8d: 439 CPU = "tigerlake"; 440 *Type = INTEL_COREI7; 441 *Subtype = INTEL_COREI7_TIGERLAKE; 442 break; 443 444 // Alderlake: 445 case 0x97: 446 case 0x9a: 447 // Raptorlake: 448 case 0xb7: 449 case 0xba: 450 case 0xbf: 451 // Meteorlake: 452 case 0xaa: 453 case 0xac: 454 // Gracemont: 455 case 0xbe: 456 CPU = "alderlake"; 457 *Type = INTEL_COREI7; 458 *Subtype = INTEL_COREI7_ALDERLAKE; 459 break; 460 461 // Arrowlake: 462 case 0xc5: 463 CPU = "arrowlake"; 464 *Type = INTEL_COREI7; 465 *Subtype = INTEL_COREI7_ARROWLAKE; 466 break; 467 468 // Arrowlake S: 469 case 0xc6: 470 // Lunarlake: 471 case 0xbd: 472 CPU = "arrowlake-s"; 473 *Type = INTEL_COREI7; 474 *Subtype = INTEL_COREI7_ARROWLAKE_S; 475 break; 476 477 // Pantherlake: 478 case 0xcc: 479 CPU = "pantherlake"; 480 *Type = INTEL_COREI7; 481 *Subtype = INTEL_COREI7_PANTHERLAKE; 482 break; 483 484 // Icelake Xeon: 485 case 0x6a: 486 case 0x6c: 487 CPU = "icelake-server"; 488 *Type = INTEL_COREI7; 489 *Subtype = INTEL_COREI7_ICELAKE_SERVER; 490 break; 491 492 // Emerald Rapids: 493 case 0xcf: 494 // Sapphire Rapids: 495 case 0x8f: 496 CPU = "sapphirerapids"; 497 *Type = INTEL_COREI7; 498 *Subtype = INTEL_COREI7_SAPPHIRERAPIDS; 499 break; 500 501 // Granite Rapids: 502 case 0xad: 503 CPU = "graniterapids"; 504 *Type = INTEL_COREI7; 505 *Subtype = INTEL_COREI7_GRANITERAPIDS; 506 break; 507 508 // Granite Rapids D: 509 case 0xae: 510 CPU = "graniterapids-d"; 511 *Type = INTEL_COREI7; 512 *Subtype = INTEL_COREI7_GRANITERAPIDS_D; 513 break; 514 515 case 0x1c: // Most 45 nm Intel Atom processors 516 case 0x26: // 45 nm Atom Lincroft 517 case 0x27: // 32 nm Atom Medfield 518 case 0x35: // 32 nm Atom Midview 519 case 0x36: // 32 nm Atom Midview 520 CPU = "bonnell"; 521 *Type = INTEL_BONNELL; 522 break; 523 524 // Atom Silvermont codes from the Intel software optimization guide. 525 case 0x37: 526 case 0x4a: 527 case 0x4d: 528 case 0x5a: 529 case 0x5d: 530 case 0x4c: // really airmont 531 CPU = "silvermont"; 532 *Type = INTEL_SILVERMONT; 533 break; 534 // Goldmont: 535 case 0x5c: // Apollo Lake 536 case 0x5f: // Denverton 537 CPU = "goldmont"; 538 *Type = INTEL_GOLDMONT; 539 break; // "goldmont" 540 case 0x7a: 541 CPU = "goldmont-plus"; 542 *Type = INTEL_GOLDMONT_PLUS; 543 break; 544 case 0x86: 545 case 0x8a: // Lakefield 546 case 0x96: // Elkhart Lake 547 case 0x9c: // Jasper Lake 548 CPU = "tremont"; 549 *Type = INTEL_TREMONT; 550 break; 551 552 // Sierraforest: 553 case 0xaf: 554 CPU = "sierraforest"; 555 *Type = INTEL_SIERRAFOREST; 556 break; 557 558 // Grandridge: 559 case 0xb6: 560 CPU = "grandridge"; 561 *Type = INTEL_GRANDRIDGE; 562 break; 563 564 // Clearwaterforest: 565 case 0xdd: 566 CPU = "clearwaterforest"; 567 *Type = INTEL_COREI7; 568 *Subtype = INTEL_CLEARWATERFOREST; 569 break; 570 571 case 0x57: 572 CPU = "knl"; 573 *Type = INTEL_KNL; 574 break; 575 576 case 0x85: 577 CPU = "knm"; 578 *Type = INTEL_KNM; 579 break; 580 581 default: // Unknown family 6 CPU. 582 break; 583 } 584 break; 585 default: 586 break; // Unknown. 587 } 588 589 return CPU; 590 } 591 592 static const char *getAMDProcessorTypeAndSubtype(unsigned Family, 593 unsigned Model, 594 const unsigned *Features, 595 unsigned *Type, 596 unsigned *Subtype) { 597 // We select CPU strings to match the code in Host.cpp, but we don't use them 598 // in compiler-rt. 599 const char *CPU = 0; 600 601 switch (Family) { 602 case 16: 603 CPU = "amdfam10"; 604 *Type = AMDFAM10H; 605 switch (Model) { 606 case 2: 607 *Subtype = AMDFAM10H_BARCELONA; 608 break; 609 case 4: 610 *Subtype = AMDFAM10H_SHANGHAI; 611 break; 612 case 8: 613 *Subtype = AMDFAM10H_ISTANBUL; 614 break; 615 } 616 break; 617 case 20: 618 CPU = "btver1"; 619 *Type = AMD_BTVER1; 620 break; 621 case 21: 622 CPU = "bdver1"; 623 *Type = AMDFAM15H; 624 if (Model >= 0x60 && Model <= 0x7f) { 625 CPU = "bdver4"; 626 *Subtype = AMDFAM15H_BDVER4; 627 break; // 60h-7Fh: Excavator 628 } 629 if (Model >= 0x30 && Model <= 0x3f) { 630 CPU = "bdver3"; 631 *Subtype = AMDFAM15H_BDVER3; 632 break; // 30h-3Fh: Steamroller 633 } 634 if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) { 635 CPU = "bdver2"; 636 *Subtype = AMDFAM15H_BDVER2; 637 break; // 02h, 10h-1Fh: Piledriver 638 } 639 if (Model <= 0x0f) { 640 *Subtype = AMDFAM15H_BDVER1; 641 break; // 00h-0Fh: Bulldozer 642 } 643 break; 644 case 22: 645 CPU = "btver2"; 646 *Type = AMD_BTVER2; 647 break; 648 case 23: 649 CPU = "znver1"; 650 *Type = AMDFAM17H; 651 if ((Model >= 0x30 && Model <= 0x3f) || (Model == 0x47) || 652 (Model >= 0x60 && Model <= 0x67) || (Model >= 0x68 && Model <= 0x6f) || 653 (Model >= 0x70 && Model <= 0x7f) || (Model >= 0x84 && Model <= 0x87) || 654 (Model >= 0x90 && Model <= 0x97) || (Model >= 0x98 && Model <= 0x9f) || 655 (Model >= 0xa0 && Model <= 0xaf)) { 656 // Family 17h Models 30h-3Fh (Starship) Zen 2 657 // Family 17h Models 47h (Cardinal) Zen 2 658 // Family 17h Models 60h-67h (Renoir) Zen 2 659 // Family 17h Models 68h-6Fh (Lucienne) Zen 2 660 // Family 17h Models 70h-7Fh (Matisse) Zen 2 661 // Family 17h Models 84h-87h (ProjectX) Zen 2 662 // Family 17h Models 90h-97h (VanGogh) Zen 2 663 // Family 17h Models 98h-9Fh (Mero) Zen 2 664 // Family 17h Models A0h-AFh (Mendocino) Zen 2 665 CPU = "znver2"; 666 *Subtype = AMDFAM17H_ZNVER2; 667 break; 668 } 669 if ((Model >= 0x10 && Model <= 0x1f) || (Model >= 0x20 && Model <= 0x2f)) { 670 // Family 17h Models 10h-1Fh (Raven1) Zen 671 // Family 17h Models 10h-1Fh (Picasso) Zen+ 672 // Family 17h Models 20h-2Fh (Raven2 x86) Zen 673 *Subtype = AMDFAM17H_ZNVER1; 674 break; 675 } 676 break; 677 case 25: 678 CPU = "znver3"; 679 *Type = AMDFAM19H; 680 if ((Model <= 0x0f) || (Model >= 0x20 && Model <= 0x2f) || 681 (Model >= 0x30 && Model <= 0x3f) || (Model >= 0x40 && Model <= 0x4f) || 682 (Model >= 0x50 && Model <= 0x5f)) { 683 // Family 19h Models 00h-0Fh (Genesis, Chagall) Zen 3 684 // Family 19h Models 20h-2Fh (Vermeer) Zen 3 685 // Family 19h Models 30h-3Fh (Badami) Zen 3 686 // Family 19h Models 40h-4Fh (Rembrandt) Zen 3+ 687 // Family 19h Models 50h-5Fh (Cezanne) Zen 3 688 *Subtype = AMDFAM19H_ZNVER3; 689 break; 690 } 691 if ((Model >= 0x10 && Model <= 0x1f) || (Model >= 0x60 && Model <= 0x6f) || 692 (Model >= 0x70 && Model <= 0x77) || (Model >= 0x78 && Model <= 0x7f) || 693 (Model >= 0xa0 && Model <= 0xaf)) { 694 // Family 19h Models 10h-1Fh (Stones; Storm Peak) Zen 4 695 // Family 19h Models 60h-6Fh (Raphael) Zen 4 696 // Family 19h Models 70h-77h (Phoenix, Hawkpoint1) Zen 4 697 // Family 19h Models 78h-7Fh (Phoenix 2, Hawkpoint2) Zen 4 698 // Family 19h Models A0h-AFh (Stones-Dense) Zen 4 699 CPU = "znver4"; 700 *Subtype = AMDFAM19H_ZNVER4; 701 break; // "znver4" 702 } 703 break; // family 19h 704 default: 705 break; // Unknown AMD CPU. 706 } 707 708 return CPU; 709 } 710 711 static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf, 712 unsigned *Features) { 713 unsigned EAX = 0, EBX = 0; 714 715 #define hasFeature(F) ((Features[F / 32] >> (F % 32)) & 1) 716 #define setFeature(F) Features[F / 32] |= 1U << (F % 32) 717 718 if ((EDX >> 15) & 1) 719 setFeature(FEATURE_CMOV); 720 if ((EDX >> 23) & 1) 721 setFeature(FEATURE_MMX); 722 if ((EDX >> 25) & 1) 723 setFeature(FEATURE_SSE); 724 if ((EDX >> 26) & 1) 725 setFeature(FEATURE_SSE2); 726 727 if ((ECX >> 0) & 1) 728 setFeature(FEATURE_SSE3); 729 if ((ECX >> 1) & 1) 730 setFeature(FEATURE_PCLMUL); 731 if ((ECX >> 9) & 1) 732 setFeature(FEATURE_SSSE3); 733 if ((ECX >> 12) & 1) 734 setFeature(FEATURE_FMA); 735 if ((ECX >> 13) & 1) 736 setFeature(FEATURE_CMPXCHG16B); 737 if ((ECX >> 19) & 1) 738 setFeature(FEATURE_SSE4_1); 739 if ((ECX >> 20) & 1) 740 setFeature(FEATURE_SSE4_2); 741 if ((ECX >> 22) & 1) 742 setFeature(FEATURE_MOVBE); 743 if ((ECX >> 23) & 1) 744 setFeature(FEATURE_POPCNT); 745 if ((ECX >> 25) & 1) 746 setFeature(FEATURE_AES); 747 if ((ECX >> 29) & 1) 748 setFeature(FEATURE_F16C); 749 750 // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV 751 // indicates that the AVX registers will be saved and restored on context 752 // switch, then we have full AVX support. 753 const unsigned AVXBits = (1 << 27) | (1 << 28); 754 bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) && 755 ((EAX & 0x6) == 0x6); 756 #if defined(__APPLE__) 757 // Darwin lazily saves the AVX512 context on first use: trust that the OS will 758 // save the AVX512 context if we use AVX512 instructions, even the bit is not 759 // set right now. 760 bool HasAVX512Save = true; 761 #else 762 // AVX512 requires additional context to be saved by the OS. 763 bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0); 764 #endif 765 766 if (HasAVX) 767 setFeature(FEATURE_AVX); 768 769 bool HasLeaf7 = 770 MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX); 771 772 if (HasLeaf7 && ((EBX >> 3) & 1)) 773 setFeature(FEATURE_BMI); 774 if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX) 775 setFeature(FEATURE_AVX2); 776 if (HasLeaf7 && ((EBX >> 8) & 1)) 777 setFeature(FEATURE_BMI2); 778 if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save) 779 setFeature(FEATURE_AVX512F); 780 if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save) 781 setFeature(FEATURE_AVX512DQ); 782 if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save) 783 setFeature(FEATURE_AVX512IFMA); 784 if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save) 785 setFeature(FEATURE_AVX512PF); 786 if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save) 787 setFeature(FEATURE_AVX512ER); 788 if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save) 789 setFeature(FEATURE_AVX512CD); 790 if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save) 791 setFeature(FEATURE_AVX512BW); 792 if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save) 793 setFeature(FEATURE_AVX512VL); 794 795 if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save) 796 setFeature(FEATURE_AVX512VBMI); 797 if (HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save) 798 setFeature(FEATURE_AVX512VBMI2); 799 if (HasLeaf7 && ((ECX >> 8) & 1)) 800 setFeature(FEATURE_GFNI); 801 if (HasLeaf7 && ((ECX >> 10) & 1) && HasAVX) 802 setFeature(FEATURE_VPCLMULQDQ); 803 if (HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save) 804 setFeature(FEATURE_AVX512VNNI); 805 if (HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save) 806 setFeature(FEATURE_AVX512BITALG); 807 if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save) 808 setFeature(FEATURE_AVX512VPOPCNTDQ); 809 810 if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save) 811 setFeature(FEATURE_AVX5124VNNIW); 812 if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save) 813 setFeature(FEATURE_AVX5124FMAPS); 814 if (HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save) 815 setFeature(FEATURE_AVX512VP2INTERSECT); 816 if (HasLeaf7 && ((EDX >> 23) & 1) && HasAVX512Save) 817 setFeature(FEATURE_AVX512FP16); 818 819 // EAX from subleaf 0 is the maximum subleaf supported. Some CPUs don't 820 // return all 0s for invalid subleaves so check the limit. 821 bool HasLeaf7Subleaf1 = 822 HasLeaf7 && EAX >= 1 && 823 !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX); 824 if (HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save) 825 setFeature(FEATURE_AVX512BF16); 826 827 unsigned MaxExtLevel; 828 getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX); 829 830 bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 && 831 !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); 832 if (HasExtLeaf1) { 833 if (ECX & 1) 834 setFeature(FEATURE_LAHF_LM); 835 if ((ECX >> 5) & 1) 836 setFeature(FEATURE_LZCNT); 837 if (((ECX >> 6) & 1)) 838 setFeature(FEATURE_SSE4_A); 839 if (((ECX >> 11) & 1)) 840 setFeature(FEATURE_XOP); 841 if (((ECX >> 16) & 1)) 842 setFeature(FEATURE_FMA4); 843 if (((EDX >> 29) & 1)) 844 setFeature(FEATURE_LM); 845 } 846 847 if (hasFeature(FEATURE_LM) && hasFeature(FEATURE_SSE2)) { 848 setFeature(FEATURE_X86_64_BASELINE); 849 if (hasFeature(FEATURE_CMPXCHG16B) && hasFeature(FEATURE_POPCNT) && 850 hasFeature(FEATURE_LAHF_LM) && hasFeature(FEATURE_SSE4_2)) { 851 setFeature(FEATURE_X86_64_V2); 852 if (hasFeature(FEATURE_AVX2) && hasFeature(FEATURE_BMI) && 853 hasFeature(FEATURE_BMI2) && hasFeature(FEATURE_F16C) && 854 hasFeature(FEATURE_FMA) && hasFeature(FEATURE_LZCNT) && 855 hasFeature(FEATURE_MOVBE)) { 856 setFeature(FEATURE_X86_64_V3); 857 if (hasFeature(FEATURE_AVX512BW) && hasFeature(FEATURE_AVX512CD) && 858 hasFeature(FEATURE_AVX512DQ) && hasFeature(FEATURE_AVX512VL)) 859 setFeature(FEATURE_X86_64_V4); 860 } 861 } 862 } 863 864 #undef hasFeature 865 #undef setFeature 866 } 867 868 #ifndef _WIN32 869 __attribute__((visibility("hidden"))) 870 #endif 871 int __cpu_indicator_init(void) CONSTRUCTOR_ATTRIBUTE; 872 873 #ifndef _WIN32 874 __attribute__((visibility("hidden"))) 875 #endif 876 struct __processor_model { 877 unsigned int __cpu_vendor; 878 unsigned int __cpu_type; 879 unsigned int __cpu_subtype; 880 unsigned int __cpu_features[1]; 881 } __cpu_model = {0, 0, 0, {0}}; 882 883 #ifndef _WIN32 884 __attribute__((visibility("hidden"))) 885 #endif 886 unsigned __cpu_features2[(CPU_FEATURE_MAX - 1) / 32]; 887 888 // A constructor function that is sets __cpu_model and __cpu_features2 with 889 // the right values. This needs to run only once. This constructor is 890 // given the highest priority and it should run before constructors without 891 // the priority set. However, it still runs after ifunc initializers and 892 // needs to be called explicitly there. 893 894 int CONSTRUCTOR_ATTRIBUTE __cpu_indicator_init(void) { 895 unsigned EAX, EBX, ECX, EDX; 896 unsigned MaxLeaf = 5; 897 unsigned Vendor; 898 unsigned Model, Family; 899 unsigned Features[(CPU_FEATURE_MAX + 31) / 32] = {0}; 900 static_assert(sizeof(Features) / sizeof(Features[0]) == 4, ""); 901 static_assert(sizeof(__cpu_features2) / sizeof(__cpu_features2[0]) == 3, ""); 902 903 // This function needs to run just once. 904 if (__cpu_model.__cpu_vendor) 905 return 0; 906 907 if (!isCpuIdSupported() || 908 getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX) || MaxLeaf < 1) { 909 __cpu_model.__cpu_vendor = VENDOR_OTHER; 910 return -1; 911 } 912 913 getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX); 914 detectX86FamilyModel(EAX, &Family, &Model); 915 916 // Find available features. 917 getAvailableFeatures(ECX, EDX, MaxLeaf, &Features[0]); 918 919 __cpu_model.__cpu_features[0] = Features[0]; 920 __cpu_features2[0] = Features[1]; 921 __cpu_features2[1] = Features[2]; 922 __cpu_features2[2] = Features[3]; 923 924 if (Vendor == SIG_INTEL) { 925 // Get CPU type. 926 getIntelProcessorTypeAndSubtype(Family, Model, &Features[0], 927 &(__cpu_model.__cpu_type), 928 &(__cpu_model.__cpu_subtype)); 929 __cpu_model.__cpu_vendor = VENDOR_INTEL; 930 } else if (Vendor == SIG_AMD) { 931 // Get CPU type. 932 getAMDProcessorTypeAndSubtype(Family, Model, &Features[0], 933 &(__cpu_model.__cpu_type), 934 &(__cpu_model.__cpu_subtype)); 935 __cpu_model.__cpu_vendor = VENDOR_AMD; 936 } else 937 __cpu_model.__cpu_vendor = VENDOR_OTHER; 938 939 assert(__cpu_model.__cpu_vendor < VENDOR_MAX); 940 assert(__cpu_model.__cpu_type < CPU_TYPE_MAX); 941 assert(__cpu_model.__cpu_subtype < CPU_SUBTYPE_MAX); 942 943 return 0; 944 } 945 #endif // defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER) 946