1 //===-- Host.cpp - Implement OS Host Detection ------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the operating system Host detection. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/TargetParser/Host.h" 14 #include "llvm/ADT/SmallVector.h" 15 #include "llvm/ADT/StringMap.h" 16 #include "llvm/ADT/StringRef.h" 17 #include "llvm/ADT/StringSwitch.h" 18 #include "llvm/Config/llvm-config.h" 19 #include "llvm/Support/MemoryBuffer.h" 20 #include "llvm/Support/raw_ostream.h" 21 #include "llvm/TargetParser/Triple.h" 22 #include "llvm/TargetParser/X86TargetParser.h" 23 #include <string.h> 24 25 // Include the platform-specific parts of this class. 26 #ifdef LLVM_ON_UNIX 27 #include "Unix/Host.inc" 28 #include <sched.h> 29 #endif 30 #ifdef _WIN32 31 #include "Windows/Host.inc" 32 #endif 33 #ifdef _MSC_VER 34 #include <intrin.h> 35 #endif 36 #ifdef __MVS__ 37 #include "llvm/Support/BCD.h" 38 #endif 39 #if defined(__APPLE__) 40 #include <mach/host_info.h> 41 #include <mach/mach.h> 42 #include <mach/mach_host.h> 43 #include <mach/machine.h> 44 #include <sys/param.h> 45 #include <sys/sysctl.h> 46 #endif 47 #ifdef _AIX 48 #include <sys/systemcfg.h> 49 #endif 50 #if defined(__sun__) && defined(__svr4__) 51 #include <kstat.h> 52 #endif 53 54 #define DEBUG_TYPE "host-detection" 55 56 //===----------------------------------------------------------------------===// 57 // 58 // Implementations of the CPU detection routines 59 // 60 //===----------------------------------------------------------------------===// 61 62 using namespace llvm; 63 64 static std::unique_ptr<llvm::MemoryBuffer> 65 LLVM_ATTRIBUTE_UNUSED getProcCpuinfoContent() { 66 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text = 67 llvm::MemoryBuffer::getFileAsStream("/proc/cpuinfo"); 68 if (std::error_code EC = Text.getError()) { 69 llvm::errs() << "Can't read " 70 << "/proc/cpuinfo: " << EC.message() << "\n"; 71 return nullptr; 72 } 73 return std::move(*Text); 74 } 75 76 StringRef sys::detail::getHostCPUNameForPowerPC(StringRef ProcCpuinfoContent) { 77 // Access to the Processor Version Register (PVR) on PowerPC is privileged, 78 // and so we must use an operating-system interface to determine the current 79 // processor type. On Linux, this is exposed through the /proc/cpuinfo file. 80 const char *generic = "generic"; 81 82 // The cpu line is second (after the 'processor: 0' line), so if this 83 // buffer is too small then something has changed (or is wrong). 84 StringRef::const_iterator CPUInfoStart = ProcCpuinfoContent.begin(); 85 StringRef::const_iterator CPUInfoEnd = ProcCpuinfoContent.end(); 86 87 StringRef::const_iterator CIP = CPUInfoStart; 88 89 StringRef::const_iterator CPUStart = nullptr; 90 size_t CPULen = 0; 91 92 // We need to find the first line which starts with cpu, spaces, and a colon. 93 // After the colon, there may be some additional spaces and then the cpu type. 94 while (CIP < CPUInfoEnd && CPUStart == nullptr) { 95 if (CIP < CPUInfoEnd && *CIP == '\n') 96 ++CIP; 97 98 if (CIP < CPUInfoEnd && *CIP == 'c') { 99 ++CIP; 100 if (CIP < CPUInfoEnd && *CIP == 'p') { 101 ++CIP; 102 if (CIP < CPUInfoEnd && *CIP == 'u') { 103 ++CIP; 104 while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t')) 105 ++CIP; 106 107 if (CIP < CPUInfoEnd && *CIP == ':') { 108 ++CIP; 109 while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t')) 110 ++CIP; 111 112 if (CIP < CPUInfoEnd) { 113 CPUStart = CIP; 114 while (CIP < CPUInfoEnd && (*CIP != ' ' && *CIP != '\t' && 115 *CIP != ',' && *CIP != '\n')) 116 ++CIP; 117 CPULen = CIP - CPUStart; 118 } 119 } 120 } 121 } 122 } 123 124 if (CPUStart == nullptr) 125 while (CIP < CPUInfoEnd && *CIP != '\n') 126 ++CIP; 127 } 128 129 if (CPUStart == nullptr) 130 return generic; 131 132 return StringSwitch<const char *>(StringRef(CPUStart, CPULen)) 133 .Case("604e", "604e") 134 .Case("604", "604") 135 .Case("7400", "7400") 136 .Case("7410", "7400") 137 .Case("7447", "7400") 138 .Case("7455", "7450") 139 .Case("G4", "g4") 140 .Case("POWER4", "970") 141 .Case("PPC970FX", "970") 142 .Case("PPC970MP", "970") 143 .Case("G5", "g5") 144 .Case("POWER5", "g5") 145 .Case("A2", "a2") 146 .Case("POWER6", "pwr6") 147 .Case("POWER7", "pwr7") 148 .Case("POWER8", "pwr8") 149 .Case("POWER8E", "pwr8") 150 .Case("POWER8NVL", "pwr8") 151 .Case("POWER9", "pwr9") 152 .Case("POWER10", "pwr10") 153 // FIXME: If we get a simulator or machine with the capabilities of 154 // mcpu=future, we should revisit this and add the name reported by the 155 // simulator/machine. 156 .Default(generic); 157 } 158 159 StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) { 160 // The cpuid register on arm is not accessible from user space. On Linux, 161 // it is exposed through the /proc/cpuinfo file. 162 163 // Read 32 lines from /proc/cpuinfo, which should contain the CPU part line 164 // in all cases. 165 SmallVector<StringRef, 32> Lines; 166 ProcCpuinfoContent.split(Lines, "\n"); 167 168 // Look for the CPU implementer line. 169 StringRef Implementer; 170 StringRef Hardware; 171 StringRef Part; 172 for (unsigned I = 0, E = Lines.size(); I != E; ++I) { 173 if (Lines[I].starts_with("CPU implementer")) 174 Implementer = Lines[I].substr(15).ltrim("\t :"); 175 if (Lines[I].starts_with("Hardware")) 176 Hardware = Lines[I].substr(8).ltrim("\t :"); 177 if (Lines[I].starts_with("CPU part")) 178 Part = Lines[I].substr(8).ltrim("\t :"); 179 } 180 181 if (Implementer == "0x41") { // ARM Ltd. 182 // MSM8992/8994 may give cpu part for the core that the kernel is running on, 183 // which is undeterministic and wrong. Always return cortex-a53 for these SoC. 184 if (Hardware.ends_with("MSM8994") || Hardware.ends_with("MSM8996")) 185 return "cortex-a53"; 186 187 188 // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The 189 // values correspond to the "Part number" in the CP15/c0 register. The 190 // contents are specified in the various processor manuals. 191 // This corresponds to the Main ID Register in Technical Reference Manuals. 192 // and is used in programs like sys-utils 193 return StringSwitch<const char *>(Part) 194 .Case("0x926", "arm926ej-s") 195 .Case("0xb02", "mpcore") 196 .Case("0xb36", "arm1136j-s") 197 .Case("0xb56", "arm1156t2-s") 198 .Case("0xb76", "arm1176jz-s") 199 .Case("0xc08", "cortex-a8") 200 .Case("0xc09", "cortex-a9") 201 .Case("0xc0f", "cortex-a15") 202 .Case("0xc20", "cortex-m0") 203 .Case("0xc23", "cortex-m3") 204 .Case("0xc24", "cortex-m4") 205 .Case("0xd24", "cortex-m52") 206 .Case("0xd22", "cortex-m55") 207 .Case("0xd02", "cortex-a34") 208 .Case("0xd04", "cortex-a35") 209 .Case("0xd03", "cortex-a53") 210 .Case("0xd05", "cortex-a55") 211 .Case("0xd46", "cortex-a510") 212 .Case("0xd80", "cortex-a520") 213 .Case("0xd07", "cortex-a57") 214 .Case("0xd08", "cortex-a72") 215 .Case("0xd09", "cortex-a73") 216 .Case("0xd0a", "cortex-a75") 217 .Case("0xd0b", "cortex-a76") 218 .Case("0xd0d", "cortex-a77") 219 .Case("0xd41", "cortex-a78") 220 .Case("0xd47", "cortex-a710") 221 .Case("0xd4d", "cortex-a715") 222 .Case("0xd81", "cortex-a720") 223 .Case("0xd44", "cortex-x1") 224 .Case("0xd4c", "cortex-x1c") 225 .Case("0xd48", "cortex-x2") 226 .Case("0xd4e", "cortex-x3") 227 .Case("0xd82", "cortex-x4") 228 .Case("0xd0c", "neoverse-n1") 229 .Case("0xd49", "neoverse-n2") 230 .Case("0xd40", "neoverse-v1") 231 .Case("0xd4f", "neoverse-v2") 232 .Default("generic"); 233 } 234 235 if (Implementer == "0x42" || Implementer == "0x43") { // Broadcom | Cavium. 236 return StringSwitch<const char *>(Part) 237 .Case("0x516", "thunderx2t99") 238 .Case("0x0516", "thunderx2t99") 239 .Case("0xaf", "thunderx2t99") 240 .Case("0x0af", "thunderx2t99") 241 .Case("0xa1", "thunderxt88") 242 .Case("0x0a1", "thunderxt88") 243 .Default("generic"); 244 } 245 246 if (Implementer == "0x46") { // Fujitsu Ltd. 247 return StringSwitch<const char *>(Part) 248 .Case("0x001", "a64fx") 249 .Default("generic"); 250 } 251 252 if (Implementer == "0x4e") { // NVIDIA Corporation 253 return StringSwitch<const char *>(Part) 254 .Case("0x004", "carmel") 255 .Default("generic"); 256 } 257 258 if (Implementer == "0x48") // HiSilicon Technologies, Inc. 259 // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The 260 // values correspond to the "Part number" in the CP15/c0 register. The 261 // contents are specified in the various processor manuals. 262 return StringSwitch<const char *>(Part) 263 .Case("0xd01", "tsv110") 264 .Default("generic"); 265 266 if (Implementer == "0x51") // Qualcomm Technologies, Inc. 267 // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The 268 // values correspond to the "Part number" in the CP15/c0 register. The 269 // contents are specified in the various processor manuals. 270 return StringSwitch<const char *>(Part) 271 .Case("0x06f", "krait") // APQ8064 272 .Case("0x201", "kryo") 273 .Case("0x205", "kryo") 274 .Case("0x211", "kryo") 275 .Case("0x800", "cortex-a73") // Kryo 2xx Gold 276 .Case("0x801", "cortex-a73") // Kryo 2xx Silver 277 .Case("0x802", "cortex-a75") // Kryo 3xx Gold 278 .Case("0x803", "cortex-a75") // Kryo 3xx Silver 279 .Case("0x804", "cortex-a76") // Kryo 4xx Gold 280 .Case("0x805", "cortex-a76") // Kryo 4xx/5xx Silver 281 .Case("0xc00", "falkor") 282 .Case("0xc01", "saphira") 283 .Default("generic"); 284 if (Implementer == "0x53") { // Samsung Electronics Co., Ltd. 285 // The Exynos chips have a convoluted ID scheme that doesn't seem to follow 286 // any predictive pattern across variants and parts. 287 unsigned Variant = 0, Part = 0; 288 289 // Look for the CPU variant line, whose value is a 1 digit hexadecimal 290 // number, corresponding to the Variant bits in the CP15/C0 register. 291 for (auto I : Lines) 292 if (I.consume_front("CPU variant")) 293 I.ltrim("\t :").getAsInteger(0, Variant); 294 295 // Look for the CPU part line, whose value is a 3 digit hexadecimal 296 // number, corresponding to the PartNum bits in the CP15/C0 register. 297 for (auto I : Lines) 298 if (I.consume_front("CPU part")) 299 I.ltrim("\t :").getAsInteger(0, Part); 300 301 unsigned Exynos = (Variant << 12) | Part; 302 switch (Exynos) { 303 default: 304 // Default by falling through to Exynos M3. 305 [[fallthrough]]; 306 case 0x1002: 307 return "exynos-m3"; 308 case 0x1003: 309 return "exynos-m4"; 310 } 311 } 312 313 if (Implementer == "0xc0") { // Ampere Computing 314 return StringSwitch<const char *>(Part) 315 .Case("0xac3", "ampere1") 316 .Case("0xac4", "ampere1a") 317 .Default("generic"); 318 } 319 320 return "generic"; 321 } 322 323 namespace { 324 StringRef getCPUNameFromS390Model(unsigned int Id, bool HaveVectorSupport) { 325 switch (Id) { 326 case 2064: // z900 not supported by LLVM 327 case 2066: 328 case 2084: // z990 not supported by LLVM 329 case 2086: 330 case 2094: // z9-109 not supported by LLVM 331 case 2096: 332 return "generic"; 333 case 2097: 334 case 2098: 335 return "z10"; 336 case 2817: 337 case 2818: 338 return "z196"; 339 case 2827: 340 case 2828: 341 return "zEC12"; 342 case 2964: 343 case 2965: 344 return HaveVectorSupport? "z13" : "zEC12"; 345 case 3906: 346 case 3907: 347 return HaveVectorSupport? "z14" : "zEC12"; 348 case 8561: 349 case 8562: 350 return HaveVectorSupport? "z15" : "zEC12"; 351 case 3931: 352 case 3932: 353 default: 354 return HaveVectorSupport? "z16" : "zEC12"; 355 } 356 } 357 } // end anonymous namespace 358 359 StringRef sys::detail::getHostCPUNameForS390x(StringRef ProcCpuinfoContent) { 360 // STIDP is a privileged operation, so use /proc/cpuinfo instead. 361 362 // The "processor 0:" line comes after a fair amount of other information, 363 // including a cache breakdown, but this should be plenty. 364 SmallVector<StringRef, 32> Lines; 365 ProcCpuinfoContent.split(Lines, "\n"); 366 367 // Look for the CPU features. 368 SmallVector<StringRef, 32> CPUFeatures; 369 for (unsigned I = 0, E = Lines.size(); I != E; ++I) 370 if (Lines[I].starts_with("features")) { 371 size_t Pos = Lines[I].find(':'); 372 if (Pos != StringRef::npos) { 373 Lines[I].drop_front(Pos + 1).split(CPUFeatures, ' '); 374 break; 375 } 376 } 377 378 // We need to check for the presence of vector support independently of 379 // the machine type, since we may only use the vector register set when 380 // supported by the kernel (and hypervisor). 381 bool HaveVectorSupport = false; 382 for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) { 383 if (CPUFeatures[I] == "vx") 384 HaveVectorSupport = true; 385 } 386 387 // Now check the processor machine type. 388 for (unsigned I = 0, E = Lines.size(); I != E; ++I) { 389 if (Lines[I].starts_with("processor ")) { 390 size_t Pos = Lines[I].find("machine = "); 391 if (Pos != StringRef::npos) { 392 Pos += sizeof("machine = ") - 1; 393 unsigned int Id; 394 if (!Lines[I].drop_front(Pos).getAsInteger(10, Id)) 395 return getCPUNameFromS390Model(Id, HaveVectorSupport); 396 } 397 break; 398 } 399 } 400 401 return "generic"; 402 } 403 404 StringRef sys::detail::getHostCPUNameForRISCV(StringRef ProcCpuinfoContent) { 405 // There are 24 lines in /proc/cpuinfo 406 SmallVector<StringRef> Lines; 407 ProcCpuinfoContent.split(Lines, "\n"); 408 409 // Look for uarch line to determine cpu name 410 StringRef UArch; 411 for (unsigned I = 0, E = Lines.size(); I != E; ++I) { 412 if (Lines[I].starts_with("uarch")) { 413 UArch = Lines[I].substr(5).ltrim("\t :"); 414 break; 415 } 416 } 417 418 return StringSwitch<const char *>(UArch) 419 .Case("sifive,u74-mc", "sifive-u74") 420 .Case("sifive,bullet0", "sifive-u74") 421 .Default("generic"); 422 } 423 424 StringRef sys::detail::getHostCPUNameForBPF() { 425 #if !defined(__linux__) || !defined(__x86_64__) 426 return "generic"; 427 #else 428 uint8_t v3_insns[40] __attribute__ ((aligned (8))) = 429 /* BPF_MOV64_IMM(BPF_REG_0, 0) */ 430 { 0xb7, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 431 /* BPF_MOV64_IMM(BPF_REG_2, 1) */ 432 0xb7, 0x2, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 433 /* BPF_JMP32_REG(BPF_JLT, BPF_REG_0, BPF_REG_2, 1) */ 434 0xae, 0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 435 /* BPF_MOV64_IMM(BPF_REG_0, 1) */ 436 0xb7, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 437 /* BPF_EXIT_INSN() */ 438 0x95, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; 439 440 uint8_t v2_insns[40] __attribute__ ((aligned (8))) = 441 /* BPF_MOV64_IMM(BPF_REG_0, 0) */ 442 { 0xb7, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 443 /* BPF_MOV64_IMM(BPF_REG_2, 1) */ 444 0xb7, 0x2, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 445 /* BPF_JMP_REG(BPF_JLT, BPF_REG_0, BPF_REG_2, 1) */ 446 0xad, 0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 447 /* BPF_MOV64_IMM(BPF_REG_0, 1) */ 448 0xb7, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 449 /* BPF_EXIT_INSN() */ 450 0x95, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; 451 452 struct bpf_prog_load_attr { 453 uint32_t prog_type; 454 uint32_t insn_cnt; 455 uint64_t insns; 456 uint64_t license; 457 uint32_t log_level; 458 uint32_t log_size; 459 uint64_t log_buf; 460 uint32_t kern_version; 461 uint32_t prog_flags; 462 } attr = {}; 463 attr.prog_type = 1; /* BPF_PROG_TYPE_SOCKET_FILTER */ 464 attr.insn_cnt = 5; 465 attr.insns = (uint64_t)v3_insns; 466 attr.license = (uint64_t)"DUMMY"; 467 468 int fd = syscall(321 /* __NR_bpf */, 5 /* BPF_PROG_LOAD */, &attr, 469 sizeof(attr)); 470 if (fd >= 0) { 471 close(fd); 472 return "v3"; 473 } 474 475 /* Clear the whole attr in case its content changed by syscall. */ 476 memset(&attr, 0, sizeof(attr)); 477 attr.prog_type = 1; /* BPF_PROG_TYPE_SOCKET_FILTER */ 478 attr.insn_cnt = 5; 479 attr.insns = (uint64_t)v2_insns; 480 attr.license = (uint64_t)"DUMMY"; 481 fd = syscall(321 /* __NR_bpf */, 5 /* BPF_PROG_LOAD */, &attr, sizeof(attr)); 482 if (fd >= 0) { 483 close(fd); 484 return "v2"; 485 } 486 return "v1"; 487 #endif 488 } 489 490 #if defined(__i386__) || defined(_M_IX86) || \ 491 defined(__x86_64__) || defined(_M_X64) 492 493 // The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max). 494 // Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID 495 // support. Consequently, for i386, the presence of CPUID is checked first 496 // via the corresponding eflags bit. 497 // Removal of cpuid.h header motivated by PR30384 498 // Header cpuid.h and method __get_cpuid_max are not used in llvm, clang, openmp 499 // or test-suite, but are used in external projects e.g. libstdcxx 500 static bool isCpuIdSupported() { 501 #if defined(__GNUC__) || defined(__clang__) 502 #if defined(__i386__) 503 int __cpuid_supported; 504 __asm__(" pushfl\n" 505 " popl %%eax\n" 506 " movl %%eax,%%ecx\n" 507 " xorl $0x00200000,%%eax\n" 508 " pushl %%eax\n" 509 " popfl\n" 510 " pushfl\n" 511 " popl %%eax\n" 512 " movl $0,%0\n" 513 " cmpl %%eax,%%ecx\n" 514 " je 1f\n" 515 " movl $1,%0\n" 516 "1:" 517 : "=r"(__cpuid_supported) 518 : 519 : "eax", "ecx"); 520 if (!__cpuid_supported) 521 return false; 522 #endif 523 return true; 524 #endif 525 return true; 526 } 527 528 /// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in 529 /// the specified arguments. If we can't run cpuid on the host, return true. 530 static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX, 531 unsigned *rECX, unsigned *rEDX) { 532 #if defined(__GNUC__) || defined(__clang__) 533 #if defined(__x86_64__) 534 // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually. 535 // FIXME: should we save this for Clang? 536 __asm__("movq\t%%rbx, %%rsi\n\t" 537 "cpuid\n\t" 538 "xchgq\t%%rbx, %%rsi\n\t" 539 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) 540 : "a"(value)); 541 return false; 542 #elif defined(__i386__) 543 __asm__("movl\t%%ebx, %%esi\n\t" 544 "cpuid\n\t" 545 "xchgl\t%%ebx, %%esi\n\t" 546 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) 547 : "a"(value)); 548 return false; 549 #else 550 return true; 551 #endif 552 #elif defined(_MSC_VER) 553 // The MSVC intrinsic is portable across x86 and x64. 554 int registers[4]; 555 __cpuid(registers, value); 556 *rEAX = registers[0]; 557 *rEBX = registers[1]; 558 *rECX = registers[2]; 559 *rEDX = registers[3]; 560 return false; 561 #else 562 return true; 563 #endif 564 } 565 566 namespace llvm { 567 namespace sys { 568 namespace detail { 569 namespace x86 { 570 571 VendorSignatures getVendorSignature(unsigned *MaxLeaf) { 572 unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; 573 if (MaxLeaf == nullptr) 574 MaxLeaf = &EAX; 575 else 576 *MaxLeaf = 0; 577 578 if (!isCpuIdSupported()) 579 return VendorSignatures::UNKNOWN; 580 581 if (getX86CpuIDAndInfo(0, MaxLeaf, &EBX, &ECX, &EDX) || *MaxLeaf < 1) 582 return VendorSignatures::UNKNOWN; 583 584 // "Genu ineI ntel" 585 if (EBX == 0x756e6547 && EDX == 0x49656e69 && ECX == 0x6c65746e) 586 return VendorSignatures::GENUINE_INTEL; 587 588 // "Auth enti cAMD" 589 if (EBX == 0x68747541 && EDX == 0x69746e65 && ECX == 0x444d4163) 590 return VendorSignatures::AUTHENTIC_AMD; 591 592 return VendorSignatures::UNKNOWN; 593 } 594 595 } // namespace x86 596 } // namespace detail 597 } // namespace sys 598 } // namespace llvm 599 600 using namespace llvm::sys::detail::x86; 601 602 /// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return 603 /// the 4 values in the specified arguments. If we can't run cpuid on the host, 604 /// return true. 605 static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf, 606 unsigned *rEAX, unsigned *rEBX, unsigned *rECX, 607 unsigned *rEDX) { 608 #if defined(__GNUC__) || defined(__clang__) 609 #if defined(__x86_64__) 610 // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually. 611 // FIXME: should we save this for Clang? 612 __asm__("movq\t%%rbx, %%rsi\n\t" 613 "cpuid\n\t" 614 "xchgq\t%%rbx, %%rsi\n\t" 615 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) 616 : "a"(value), "c"(subleaf)); 617 return false; 618 #elif defined(__i386__) 619 __asm__("movl\t%%ebx, %%esi\n\t" 620 "cpuid\n\t" 621 "xchgl\t%%ebx, %%esi\n\t" 622 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) 623 : "a"(value), "c"(subleaf)); 624 return false; 625 #else 626 return true; 627 #endif 628 #elif defined(_MSC_VER) 629 int registers[4]; 630 __cpuidex(registers, value, subleaf); 631 *rEAX = registers[0]; 632 *rEBX = registers[1]; 633 *rECX = registers[2]; 634 *rEDX = registers[3]; 635 return false; 636 #else 637 return true; 638 #endif 639 } 640 641 // Read control register 0 (XCR0). Used to detect features such as AVX. 642 static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) { 643 #if defined(__GNUC__) || defined(__clang__) 644 // Check xgetbv; this uses a .byte sequence instead of the instruction 645 // directly because older assemblers do not include support for xgetbv and 646 // there is no easy way to conditionally compile based on the assembler used. 647 __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0)); 648 return false; 649 #elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK) 650 unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK); 651 *rEAX = Result; 652 *rEDX = Result >> 32; 653 return false; 654 #else 655 return true; 656 #endif 657 } 658 659 static void detectX86FamilyModel(unsigned EAX, unsigned *Family, 660 unsigned *Model) { 661 *Family = (EAX >> 8) & 0xf; // Bits 8 - 11 662 *Model = (EAX >> 4) & 0xf; // Bits 4 - 7 663 if (*Family == 6 || *Family == 0xf) { 664 if (*Family == 0xf) 665 // Examine extended family ID if family ID is F. 666 *Family += (EAX >> 20) & 0xff; // Bits 20 - 27 667 // Examine extended model ID if family ID is 6 or F. 668 *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19 669 } 670 } 671 672 static StringRef 673 getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model, 674 const unsigned *Features, 675 unsigned *Type, unsigned *Subtype) { 676 auto testFeature = [&](unsigned F) { 677 return (Features[F / 32] & (1U << (F % 32))) != 0; 678 }; 679 680 StringRef CPU; 681 682 switch (Family) { 683 case 3: 684 CPU = "i386"; 685 break; 686 case 4: 687 CPU = "i486"; 688 break; 689 case 5: 690 if (testFeature(X86::FEATURE_MMX)) { 691 CPU = "pentium-mmx"; 692 break; 693 } 694 CPU = "pentium"; 695 break; 696 case 6: 697 switch (Model) { 698 case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile 699 // processor, Intel Core 2 Quad processor, Intel Core 2 Quad 700 // mobile processor, Intel Core 2 Extreme processor, Intel 701 // Pentium Dual-Core processor, Intel Xeon processor, model 702 // 0Fh. All processors are manufactured using the 65 nm process. 703 case 0x16: // Intel Celeron processor model 16h. All processors are 704 // manufactured using the 65 nm process 705 CPU = "core2"; 706 *Type = X86::INTEL_CORE2; 707 break; 708 case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model 709 // 17h. All processors are manufactured using the 45 nm process. 710 // 711 // 45nm: Penryn , Wolfdale, Yorkfield (XE) 712 case 0x1d: // Intel Xeon processor MP. All processors are manufactured using 713 // the 45 nm process. 714 CPU = "penryn"; 715 *Type = X86::INTEL_CORE2; 716 break; 717 case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All 718 // processors are manufactured using the 45 nm process. 719 case 0x1e: // Intel(R) Core(TM) i7 CPU 870 @ 2.93GHz. 720 // As found in a Summer 2010 model iMac. 721 case 0x1f: 722 case 0x2e: // Nehalem EX 723 CPU = "nehalem"; 724 *Type = X86::INTEL_COREI7; 725 *Subtype = X86::INTEL_COREI7_NEHALEM; 726 break; 727 case 0x25: // Intel Core i7, laptop version. 728 case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All 729 // processors are manufactured using the 32 nm process. 730 case 0x2f: // Westmere EX 731 CPU = "westmere"; 732 *Type = X86::INTEL_COREI7; 733 *Subtype = X86::INTEL_COREI7_WESTMERE; 734 break; 735 case 0x2a: // Intel Core i7 processor. All processors are manufactured 736 // using the 32 nm process. 737 case 0x2d: 738 CPU = "sandybridge"; 739 *Type = X86::INTEL_COREI7; 740 *Subtype = X86::INTEL_COREI7_SANDYBRIDGE; 741 break; 742 case 0x3a: 743 case 0x3e: // Ivy Bridge EP 744 CPU = "ivybridge"; 745 *Type = X86::INTEL_COREI7; 746 *Subtype = X86::INTEL_COREI7_IVYBRIDGE; 747 break; 748 749 // Haswell: 750 case 0x3c: 751 case 0x3f: 752 case 0x45: 753 case 0x46: 754 CPU = "haswell"; 755 *Type = X86::INTEL_COREI7; 756 *Subtype = X86::INTEL_COREI7_HASWELL; 757 break; 758 759 // Broadwell: 760 case 0x3d: 761 case 0x47: 762 case 0x4f: 763 case 0x56: 764 CPU = "broadwell"; 765 *Type = X86::INTEL_COREI7; 766 *Subtype = X86::INTEL_COREI7_BROADWELL; 767 break; 768 769 // Skylake: 770 case 0x4e: // Skylake mobile 771 case 0x5e: // Skylake desktop 772 case 0x8e: // Kaby Lake mobile 773 case 0x9e: // Kaby Lake desktop 774 case 0xa5: // Comet Lake-H/S 775 case 0xa6: // Comet Lake-U 776 CPU = "skylake"; 777 *Type = X86::INTEL_COREI7; 778 *Subtype = X86::INTEL_COREI7_SKYLAKE; 779 break; 780 781 // Rocketlake: 782 case 0xa7: 783 CPU = "rocketlake"; 784 *Type = X86::INTEL_COREI7; 785 *Subtype = X86::INTEL_COREI7_ROCKETLAKE; 786 break; 787 788 // Skylake Xeon: 789 case 0x55: 790 *Type = X86::INTEL_COREI7; 791 if (testFeature(X86::FEATURE_AVX512BF16)) { 792 CPU = "cooperlake"; 793 *Subtype = X86::INTEL_COREI7_COOPERLAKE; 794 } else if (testFeature(X86::FEATURE_AVX512VNNI)) { 795 CPU = "cascadelake"; 796 *Subtype = X86::INTEL_COREI7_CASCADELAKE; 797 } else { 798 CPU = "skylake-avx512"; 799 *Subtype = X86::INTEL_COREI7_SKYLAKE_AVX512; 800 } 801 break; 802 803 // Cannonlake: 804 case 0x66: 805 CPU = "cannonlake"; 806 *Type = X86::INTEL_COREI7; 807 *Subtype = X86::INTEL_COREI7_CANNONLAKE; 808 break; 809 810 // Icelake: 811 case 0x7d: 812 case 0x7e: 813 CPU = "icelake-client"; 814 *Type = X86::INTEL_COREI7; 815 *Subtype = X86::INTEL_COREI7_ICELAKE_CLIENT; 816 break; 817 818 // Tigerlake: 819 case 0x8c: 820 case 0x8d: 821 CPU = "tigerlake"; 822 *Type = X86::INTEL_COREI7; 823 *Subtype = X86::INTEL_COREI7_TIGERLAKE; 824 break; 825 826 // Alderlake: 827 case 0x97: 828 case 0x9a: 829 // Gracemont 830 case 0xbe: 831 // Raptorlake: 832 case 0xb7: 833 case 0xba: 834 case 0xbf: 835 // Meteorlake: 836 case 0xaa: 837 case 0xac: 838 CPU = "alderlake"; 839 *Type = X86::INTEL_COREI7; 840 *Subtype = X86::INTEL_COREI7_ALDERLAKE; 841 break; 842 843 // Arrowlake: 844 case 0xc5: 845 CPU = "arrowlake"; 846 *Type = X86::INTEL_COREI7; 847 *Subtype = X86::INTEL_COREI7_ARROWLAKE; 848 break; 849 850 // Arrowlake S: 851 case 0xc6: 852 // Lunarlake: 853 case 0xbd: 854 CPU = "arrowlake-s"; 855 *Type = X86::INTEL_COREI7; 856 *Subtype = X86::INTEL_COREI7_ARROWLAKE_S; 857 break; 858 859 // Pantherlake: 860 case 0xcc: 861 CPU = "pantherlake"; 862 *Type = X86::INTEL_COREI7; 863 *Subtype = X86::INTEL_COREI7_PANTHERLAKE; 864 break; 865 866 // Graniterapids: 867 case 0xad: 868 CPU = "graniterapids"; 869 *Type = X86::INTEL_COREI7; 870 *Subtype = X86::INTEL_COREI7_GRANITERAPIDS; 871 break; 872 873 // Granite Rapids D: 874 case 0xae: 875 CPU = "graniterapids-d"; 876 *Type = X86::INTEL_COREI7; 877 *Subtype = X86::INTEL_COREI7_GRANITERAPIDS_D; 878 break; 879 880 // Icelake Xeon: 881 case 0x6a: 882 case 0x6c: 883 CPU = "icelake-server"; 884 *Type = X86::INTEL_COREI7; 885 *Subtype = X86::INTEL_COREI7_ICELAKE_SERVER; 886 break; 887 888 // Emerald Rapids: 889 case 0xcf: 890 // Sapphire Rapids: 891 case 0x8f: 892 CPU = "sapphirerapids"; 893 *Type = X86::INTEL_COREI7; 894 *Subtype = X86::INTEL_COREI7_SAPPHIRERAPIDS; 895 break; 896 897 case 0x1c: // Most 45 nm Intel Atom processors 898 case 0x26: // 45 nm Atom Lincroft 899 case 0x27: // 32 nm Atom Medfield 900 case 0x35: // 32 nm Atom Midview 901 case 0x36: // 32 nm Atom Midview 902 CPU = "bonnell"; 903 *Type = X86::INTEL_BONNELL; 904 break; 905 906 // Atom Silvermont codes from the Intel software optimization guide. 907 case 0x37: 908 case 0x4a: 909 case 0x4d: 910 case 0x5a: 911 case 0x5d: 912 case 0x4c: // really airmont 913 CPU = "silvermont"; 914 *Type = X86::INTEL_SILVERMONT; 915 break; 916 // Goldmont: 917 case 0x5c: // Apollo Lake 918 case 0x5f: // Denverton 919 CPU = "goldmont"; 920 *Type = X86::INTEL_GOLDMONT; 921 break; 922 case 0x7a: 923 CPU = "goldmont-plus"; 924 *Type = X86::INTEL_GOLDMONT_PLUS; 925 break; 926 case 0x86: 927 case 0x8a: // Lakefield 928 case 0x96: // Elkhart Lake 929 case 0x9c: // Jasper Lake 930 CPU = "tremont"; 931 *Type = X86::INTEL_TREMONT; 932 break; 933 934 // Sierraforest: 935 case 0xaf: 936 CPU = "sierraforest"; 937 *Type = X86::INTEL_SIERRAFOREST; 938 break; 939 940 // Grandridge: 941 case 0xb6: 942 CPU = "grandridge"; 943 *Type = X86::INTEL_GRANDRIDGE; 944 break; 945 946 // Clearwaterforest: 947 case 0xdd: 948 CPU = "clearwaterforest"; 949 *Type = X86::INTEL_CLEARWATERFOREST; 950 break; 951 952 // Xeon Phi (Knights Landing + Knights Mill): 953 case 0x57: 954 CPU = "knl"; 955 *Type = X86::INTEL_KNL; 956 break; 957 case 0x85: 958 CPU = "knm"; 959 *Type = X86::INTEL_KNM; 960 break; 961 962 default: // Unknown family 6 CPU, try to guess. 963 // Don't both with Type/Subtype here, they aren't used by the caller. 964 // They're used above to keep the code in sync with compiler-rt. 965 // TODO detect tigerlake host from model 966 if (testFeature(X86::FEATURE_AVX512VP2INTERSECT)) { 967 CPU = "tigerlake"; 968 } else if (testFeature(X86::FEATURE_AVX512VBMI2)) { 969 CPU = "icelake-client"; 970 } else if (testFeature(X86::FEATURE_AVX512VBMI)) { 971 CPU = "cannonlake"; 972 } else if (testFeature(X86::FEATURE_AVX512BF16)) { 973 CPU = "cooperlake"; 974 } else if (testFeature(X86::FEATURE_AVX512VNNI)) { 975 CPU = "cascadelake"; 976 } else if (testFeature(X86::FEATURE_AVX512VL)) { 977 CPU = "skylake-avx512"; 978 } else if (testFeature(X86::FEATURE_AVX512ER)) { 979 CPU = "knl"; 980 } else if (testFeature(X86::FEATURE_CLFLUSHOPT)) { 981 if (testFeature(X86::FEATURE_SHA)) 982 CPU = "goldmont"; 983 else 984 CPU = "skylake"; 985 } else if (testFeature(X86::FEATURE_ADX)) { 986 CPU = "broadwell"; 987 } else if (testFeature(X86::FEATURE_AVX2)) { 988 CPU = "haswell"; 989 } else if (testFeature(X86::FEATURE_AVX)) { 990 CPU = "sandybridge"; 991 } else if (testFeature(X86::FEATURE_SSE4_2)) { 992 if (testFeature(X86::FEATURE_MOVBE)) 993 CPU = "silvermont"; 994 else 995 CPU = "nehalem"; 996 } else if (testFeature(X86::FEATURE_SSE4_1)) { 997 CPU = "penryn"; 998 } else if (testFeature(X86::FEATURE_SSSE3)) { 999 if (testFeature(X86::FEATURE_MOVBE)) 1000 CPU = "bonnell"; 1001 else 1002 CPU = "core2"; 1003 } else if (testFeature(X86::FEATURE_64BIT)) { 1004 CPU = "core2"; 1005 } else if (testFeature(X86::FEATURE_SSE3)) { 1006 CPU = "yonah"; 1007 } else if (testFeature(X86::FEATURE_SSE2)) { 1008 CPU = "pentium-m"; 1009 } else if (testFeature(X86::FEATURE_SSE)) { 1010 CPU = "pentium3"; 1011 } else if (testFeature(X86::FEATURE_MMX)) { 1012 CPU = "pentium2"; 1013 } else { 1014 CPU = "pentiumpro"; 1015 } 1016 break; 1017 } 1018 break; 1019 case 15: { 1020 if (testFeature(X86::FEATURE_64BIT)) { 1021 CPU = "nocona"; 1022 break; 1023 } 1024 if (testFeature(X86::FEATURE_SSE3)) { 1025 CPU = "prescott"; 1026 break; 1027 } 1028 CPU = "pentium4"; 1029 break; 1030 } 1031 default: 1032 break; // Unknown. 1033 } 1034 1035 return CPU; 1036 } 1037 1038 static StringRef 1039 getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model, 1040 const unsigned *Features, 1041 unsigned *Type, unsigned *Subtype) { 1042 auto testFeature = [&](unsigned F) { 1043 return (Features[F / 32] & (1U << (F % 32))) != 0; 1044 }; 1045 1046 StringRef CPU; 1047 1048 switch (Family) { 1049 case 4: 1050 CPU = "i486"; 1051 break; 1052 case 5: 1053 CPU = "pentium"; 1054 switch (Model) { 1055 case 6: 1056 case 7: 1057 CPU = "k6"; 1058 break; 1059 case 8: 1060 CPU = "k6-2"; 1061 break; 1062 case 9: 1063 case 13: 1064 CPU = "k6-3"; 1065 break; 1066 case 10: 1067 CPU = "geode"; 1068 break; 1069 } 1070 break; 1071 case 6: 1072 if (testFeature(X86::FEATURE_SSE)) { 1073 CPU = "athlon-xp"; 1074 break; 1075 } 1076 CPU = "athlon"; 1077 break; 1078 case 15: 1079 if (testFeature(X86::FEATURE_SSE3)) { 1080 CPU = "k8-sse3"; 1081 break; 1082 } 1083 CPU = "k8"; 1084 break; 1085 case 16: 1086 CPU = "amdfam10"; 1087 *Type = X86::AMDFAM10H; // "amdfam10" 1088 switch (Model) { 1089 case 2: 1090 *Subtype = X86::AMDFAM10H_BARCELONA; 1091 break; 1092 case 4: 1093 *Subtype = X86::AMDFAM10H_SHANGHAI; 1094 break; 1095 case 8: 1096 *Subtype = X86::AMDFAM10H_ISTANBUL; 1097 break; 1098 } 1099 break; 1100 case 20: 1101 CPU = "btver1"; 1102 *Type = X86::AMD_BTVER1; 1103 break; 1104 case 21: 1105 CPU = "bdver1"; 1106 *Type = X86::AMDFAM15H; 1107 if (Model >= 0x60 && Model <= 0x7f) { 1108 CPU = "bdver4"; 1109 *Subtype = X86::AMDFAM15H_BDVER4; 1110 break; // 60h-7Fh: Excavator 1111 } 1112 if (Model >= 0x30 && Model <= 0x3f) { 1113 CPU = "bdver3"; 1114 *Subtype = X86::AMDFAM15H_BDVER3; 1115 break; // 30h-3Fh: Steamroller 1116 } 1117 if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) { 1118 CPU = "bdver2"; 1119 *Subtype = X86::AMDFAM15H_BDVER2; 1120 break; // 02h, 10h-1Fh: Piledriver 1121 } 1122 if (Model <= 0x0f) { 1123 *Subtype = X86::AMDFAM15H_BDVER1; 1124 break; // 00h-0Fh: Bulldozer 1125 } 1126 break; 1127 case 22: 1128 CPU = "btver2"; 1129 *Type = X86::AMD_BTVER2; 1130 break; 1131 case 23: 1132 CPU = "znver1"; 1133 *Type = X86::AMDFAM17H; 1134 if ((Model >= 0x30 && Model <= 0x3f) || (Model == 0x47) || 1135 (Model >= 0x60 && Model <= 0x67) || (Model >= 0x68 && Model <= 0x6f) || 1136 (Model >= 0x70 && Model <= 0x7f) || (Model >= 0x84 && Model <= 0x87) || 1137 (Model >= 0x90 && Model <= 0x97) || (Model >= 0x98 && Model <= 0x9f) || 1138 (Model >= 0xa0 && Model <= 0xaf)) { 1139 // Family 17h Models 30h-3Fh (Starship) Zen 2 1140 // Family 17h Models 47h (Cardinal) Zen 2 1141 // Family 17h Models 60h-67h (Renoir) Zen 2 1142 // Family 17h Models 68h-6Fh (Lucienne) Zen 2 1143 // Family 17h Models 70h-7Fh (Matisse) Zen 2 1144 // Family 17h Models 84h-87h (ProjectX) Zen 2 1145 // Family 17h Models 90h-97h (VanGogh) Zen 2 1146 // Family 17h Models 98h-9Fh (Mero) Zen 2 1147 // Family 17h Models A0h-AFh (Mendocino) Zen 2 1148 CPU = "znver2"; 1149 *Subtype = X86::AMDFAM17H_ZNVER2; 1150 break; 1151 } 1152 if ((Model >= 0x10 && Model <= 0x1f) || (Model >= 0x20 && Model <= 0x2f)) { 1153 // Family 17h Models 10h-1Fh (Raven1) Zen 1154 // Family 17h Models 10h-1Fh (Picasso) Zen+ 1155 // Family 17h Models 20h-2Fh (Raven2 x86) Zen 1156 *Subtype = X86::AMDFAM17H_ZNVER1; 1157 break; 1158 } 1159 break; 1160 case 25: 1161 CPU = "znver3"; 1162 *Type = X86::AMDFAM19H; 1163 if (Model <= 0x0f || (Model >= 0x20 && Model <= 0x2f) || 1164 (Model >= 0x30 && Model <= 0x3f) || (Model >= 0x40 && Model <= 0x4f) || 1165 (Model >= 0x50 && Model <= 0x5f)) { 1166 // Family 19h Models 00h-0Fh (Genesis, Chagall) Zen 3 1167 // Family 19h Models 20h-2Fh (Vermeer) Zen 3 1168 // Family 19h Models 30h-3Fh (Badami) Zen 3 1169 // Family 19h Models 40h-4Fh (Rembrandt) Zen 3+ 1170 // Family 19h Models 50h-5Fh (Cezanne) Zen 3 1171 *Subtype = X86::AMDFAM19H_ZNVER3; 1172 break; 1173 } 1174 if ((Model >= 0x10 && Model <= 0x1f) || (Model >= 0x60 && Model <= 0x6f) || 1175 (Model >= 0x70 && Model <= 0x77) || (Model >= 0x78 && Model <= 0x7f) || 1176 (Model >= 0xa0 && Model <= 0xaf)) { 1177 // Family 19h Models 10h-1Fh (Stones; Storm Peak) Zen 4 1178 // Family 19h Models 60h-6Fh (Raphael) Zen 4 1179 // Family 19h Models 70h-77h (Phoenix, Hawkpoint1) Zen 4 1180 // Family 19h Models 78h-7Fh (Phoenix 2, Hawkpoint2) Zen 4 1181 // Family 19h Models A0h-AFh (Stones-Dense) Zen 4 1182 CPU = "znver4"; 1183 *Subtype = X86::AMDFAM19H_ZNVER4; 1184 break; // "znver4" 1185 } 1186 break; 1187 default: 1188 break; // Unknown AMD CPU. 1189 } 1190 1191 return CPU; 1192 } 1193 1194 static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf, 1195 unsigned *Features) { 1196 unsigned EAX, EBX; 1197 1198 auto setFeature = [&](unsigned F) { 1199 Features[F / 32] |= 1U << (F % 32); 1200 }; 1201 1202 if ((EDX >> 15) & 1) 1203 setFeature(X86::FEATURE_CMOV); 1204 if ((EDX >> 23) & 1) 1205 setFeature(X86::FEATURE_MMX); 1206 if ((EDX >> 25) & 1) 1207 setFeature(X86::FEATURE_SSE); 1208 if ((EDX >> 26) & 1) 1209 setFeature(X86::FEATURE_SSE2); 1210 1211 if ((ECX >> 0) & 1) 1212 setFeature(X86::FEATURE_SSE3); 1213 if ((ECX >> 1) & 1) 1214 setFeature(X86::FEATURE_PCLMUL); 1215 if ((ECX >> 9) & 1) 1216 setFeature(X86::FEATURE_SSSE3); 1217 if ((ECX >> 12) & 1) 1218 setFeature(X86::FEATURE_FMA); 1219 if ((ECX >> 19) & 1) 1220 setFeature(X86::FEATURE_SSE4_1); 1221 if ((ECX >> 20) & 1) { 1222 setFeature(X86::FEATURE_SSE4_2); 1223 setFeature(X86::FEATURE_CRC32); 1224 } 1225 if ((ECX >> 23) & 1) 1226 setFeature(X86::FEATURE_POPCNT); 1227 if ((ECX >> 25) & 1) 1228 setFeature(X86::FEATURE_AES); 1229 1230 if ((ECX >> 22) & 1) 1231 setFeature(X86::FEATURE_MOVBE); 1232 1233 // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV 1234 // indicates that the AVX registers will be saved and restored on context 1235 // switch, then we have full AVX support. 1236 const unsigned AVXBits = (1 << 27) | (1 << 28); 1237 bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) && 1238 ((EAX & 0x6) == 0x6); 1239 #if defined(__APPLE__) 1240 // Darwin lazily saves the AVX512 context on first use: trust that the OS will 1241 // save the AVX512 context if we use AVX512 instructions, even the bit is not 1242 // set right now. 1243 bool HasAVX512Save = true; 1244 #else 1245 // AVX512 requires additional context to be saved by the OS. 1246 bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0); 1247 #endif 1248 1249 if (HasAVX) 1250 setFeature(X86::FEATURE_AVX); 1251 1252 bool HasLeaf7 = 1253 MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX); 1254 1255 if (HasLeaf7 && ((EBX >> 3) & 1)) 1256 setFeature(X86::FEATURE_BMI); 1257 if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX) 1258 setFeature(X86::FEATURE_AVX2); 1259 if (HasLeaf7 && ((EBX >> 8) & 1)) 1260 setFeature(X86::FEATURE_BMI2); 1261 if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save) 1262 setFeature(X86::FEATURE_AVX512F); 1263 if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save) 1264 setFeature(X86::FEATURE_AVX512DQ); 1265 if (HasLeaf7 && ((EBX >> 19) & 1)) 1266 setFeature(X86::FEATURE_ADX); 1267 if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save) 1268 setFeature(X86::FEATURE_AVX512IFMA); 1269 if (HasLeaf7 && ((EBX >> 23) & 1)) 1270 setFeature(X86::FEATURE_CLFLUSHOPT); 1271 if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save) 1272 setFeature(X86::FEATURE_AVX512PF); 1273 if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save) 1274 setFeature(X86::FEATURE_AVX512ER); 1275 if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save) 1276 setFeature(X86::FEATURE_AVX512CD); 1277 if (HasLeaf7 && ((EBX >> 29) & 1)) 1278 setFeature(X86::FEATURE_SHA); 1279 if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save) 1280 setFeature(X86::FEATURE_AVX512BW); 1281 if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save) 1282 setFeature(X86::FEATURE_AVX512VL); 1283 1284 if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save) 1285 setFeature(X86::FEATURE_AVX512VBMI); 1286 if (HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save) 1287 setFeature(X86::FEATURE_AVX512VBMI2); 1288 if (HasLeaf7 && ((ECX >> 8) & 1)) 1289 setFeature(X86::FEATURE_GFNI); 1290 if (HasLeaf7 && ((ECX >> 10) & 1) && HasAVX) 1291 setFeature(X86::FEATURE_VPCLMULQDQ); 1292 if (HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save) 1293 setFeature(X86::FEATURE_AVX512VNNI); 1294 if (HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save) 1295 setFeature(X86::FEATURE_AVX512BITALG); 1296 if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save) 1297 setFeature(X86::FEATURE_AVX512VPOPCNTDQ); 1298 1299 if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save) 1300 setFeature(X86::FEATURE_AVX5124VNNIW); 1301 if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save) 1302 setFeature(X86::FEATURE_AVX5124FMAPS); 1303 if (HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save) 1304 setFeature(X86::FEATURE_AVX512VP2INTERSECT); 1305 1306 // EAX from subleaf 0 is the maximum subleaf supported. Some CPUs don't 1307 // return all 0s for invalid subleaves so check the limit. 1308 bool HasLeaf7Subleaf1 = 1309 HasLeaf7 && EAX >= 1 && 1310 !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX); 1311 if (HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save) 1312 setFeature(X86::FEATURE_AVX512BF16); 1313 1314 unsigned MaxExtLevel; 1315 getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX); 1316 1317 bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 && 1318 !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); 1319 if (HasExtLeaf1 && ((ECX >> 6) & 1)) 1320 setFeature(X86::FEATURE_SSE4_A); 1321 if (HasExtLeaf1 && ((ECX >> 11) & 1)) 1322 setFeature(X86::FEATURE_XOP); 1323 if (HasExtLeaf1 && ((ECX >> 16) & 1)) 1324 setFeature(X86::FEATURE_FMA4); 1325 1326 if (HasExtLeaf1 && ((EDX >> 29) & 1)) 1327 setFeature(X86::FEATURE_64BIT); 1328 } 1329 1330 StringRef sys::getHostCPUName() { 1331 unsigned MaxLeaf = 0; 1332 const VendorSignatures Vendor = getVendorSignature(&MaxLeaf); 1333 if (Vendor == VendorSignatures::UNKNOWN) 1334 return "generic"; 1335 1336 unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; 1337 getX86CpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX); 1338 1339 unsigned Family = 0, Model = 0; 1340 unsigned Features[(X86::CPU_FEATURE_MAX + 31) / 32] = {0}; 1341 detectX86FamilyModel(EAX, &Family, &Model); 1342 getAvailableFeatures(ECX, EDX, MaxLeaf, Features); 1343 1344 // These aren't consumed in this file, but we try to keep some source code the 1345 // same or similar to compiler-rt. 1346 unsigned Type = 0; 1347 unsigned Subtype = 0; 1348 1349 StringRef CPU; 1350 1351 if (Vendor == VendorSignatures::GENUINE_INTEL) { 1352 CPU = getIntelProcessorTypeAndSubtype(Family, Model, Features, &Type, 1353 &Subtype); 1354 } else if (Vendor == VendorSignatures::AUTHENTIC_AMD) { 1355 CPU = getAMDProcessorTypeAndSubtype(Family, Model, Features, &Type, 1356 &Subtype); 1357 } 1358 1359 if (!CPU.empty()) 1360 return CPU; 1361 1362 return "generic"; 1363 } 1364 1365 #elif defined(__APPLE__) && defined(__powerpc__) 1366 StringRef sys::getHostCPUName() { 1367 host_basic_info_data_t hostInfo; 1368 mach_msg_type_number_t infoCount; 1369 1370 infoCount = HOST_BASIC_INFO_COUNT; 1371 mach_port_t hostPort = mach_host_self(); 1372 host_info(hostPort, HOST_BASIC_INFO, (host_info_t)&hostInfo, 1373 &infoCount); 1374 mach_port_deallocate(mach_task_self(), hostPort); 1375 1376 if (hostInfo.cpu_type != CPU_TYPE_POWERPC) 1377 return "generic"; 1378 1379 switch (hostInfo.cpu_subtype) { 1380 case CPU_SUBTYPE_POWERPC_601: 1381 return "601"; 1382 case CPU_SUBTYPE_POWERPC_602: 1383 return "602"; 1384 case CPU_SUBTYPE_POWERPC_603: 1385 return "603"; 1386 case CPU_SUBTYPE_POWERPC_603e: 1387 return "603e"; 1388 case CPU_SUBTYPE_POWERPC_603ev: 1389 return "603ev"; 1390 case CPU_SUBTYPE_POWERPC_604: 1391 return "604"; 1392 case CPU_SUBTYPE_POWERPC_604e: 1393 return "604e"; 1394 case CPU_SUBTYPE_POWERPC_620: 1395 return "620"; 1396 case CPU_SUBTYPE_POWERPC_750: 1397 return "750"; 1398 case CPU_SUBTYPE_POWERPC_7400: 1399 return "7400"; 1400 case CPU_SUBTYPE_POWERPC_7450: 1401 return "7450"; 1402 case CPU_SUBTYPE_POWERPC_970: 1403 return "970"; 1404 default:; 1405 } 1406 1407 return "generic"; 1408 } 1409 #elif defined(__linux__) && defined(__powerpc__) 1410 StringRef sys::getHostCPUName() { 1411 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent(); 1412 StringRef Content = P ? P->getBuffer() : ""; 1413 return detail::getHostCPUNameForPowerPC(Content); 1414 } 1415 #elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__)) 1416 StringRef sys::getHostCPUName() { 1417 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent(); 1418 StringRef Content = P ? P->getBuffer() : ""; 1419 return detail::getHostCPUNameForARM(Content); 1420 } 1421 #elif defined(__linux__) && defined(__s390x__) 1422 StringRef sys::getHostCPUName() { 1423 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent(); 1424 StringRef Content = P ? P->getBuffer() : ""; 1425 return detail::getHostCPUNameForS390x(Content); 1426 } 1427 #elif defined(__MVS__) 1428 StringRef sys::getHostCPUName() { 1429 // Get pointer to Communications Vector Table (CVT). 1430 // The pointer is located at offset 16 of the Prefixed Save Area (PSA). 1431 // It is stored as 31 bit pointer and will be zero-extended to 64 bit. 1432 int *StartToCVTOffset = reinterpret_cast<int *>(0x10); 1433 // Since its stored as a 31-bit pointer, get the 4 bytes from the start 1434 // of address. 1435 int ReadValue = *StartToCVTOffset; 1436 // Explicitly clear the high order bit. 1437 ReadValue = (ReadValue & 0x7FFFFFFF); 1438 char *CVT = reinterpret_cast<char *>(ReadValue); 1439 // The model number is located in the CVT prefix at offset -6 and stored as 1440 // signless packed decimal. 1441 uint16_t Id = *(uint16_t *)&CVT[-6]; 1442 // Convert number to integer. 1443 Id = decodePackedBCD<uint16_t>(Id, false); 1444 // Check for vector support. It's stored in field CVTFLAG5 (offset 244), 1445 // bit CVTVEF (X'80'). The facilities list is part of the PSA but the vector 1446 // extension can only be used if bit CVTVEF is on. 1447 bool HaveVectorSupport = CVT[244] & 0x80; 1448 return getCPUNameFromS390Model(Id, HaveVectorSupport); 1449 } 1450 #elif defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__)) 1451 #define CPUFAMILY_ARM_SWIFT 0x1e2d6381 1452 #define CPUFAMILY_ARM_CYCLONE 0x37a09642 1453 #define CPUFAMILY_ARM_TYPHOON 0x2c91a47e 1454 #define CPUFAMILY_ARM_TWISTER 0x92fb37c8 1455 #define CPUFAMILY_ARM_HURRICANE 0x67ceee93 1456 #define CPUFAMILY_ARM_MONSOON_MISTRAL 0xe81e7ef6 1457 #define CPUFAMILY_ARM_VORTEX_TEMPEST 0x07d34b9f 1458 #define CPUFAMILY_ARM_LIGHTNING_THUNDER 0x462504d2 1459 #define CPUFAMILY_ARM_FIRESTORM_ICESTORM 0x1b588bb3 1460 1461 StringRef sys::getHostCPUName() { 1462 uint32_t Family; 1463 size_t Length = sizeof(Family); 1464 sysctlbyname("hw.cpufamily", &Family, &Length, NULL, 0); 1465 1466 switch (Family) { 1467 case CPUFAMILY_ARM_SWIFT: 1468 return "swift"; 1469 case CPUFAMILY_ARM_CYCLONE: 1470 return "apple-a7"; 1471 case CPUFAMILY_ARM_TYPHOON: 1472 return "apple-a8"; 1473 case CPUFAMILY_ARM_TWISTER: 1474 return "apple-a9"; 1475 case CPUFAMILY_ARM_HURRICANE: 1476 return "apple-a10"; 1477 case CPUFAMILY_ARM_MONSOON_MISTRAL: 1478 return "apple-a11"; 1479 case CPUFAMILY_ARM_VORTEX_TEMPEST: 1480 return "apple-a12"; 1481 case CPUFAMILY_ARM_LIGHTNING_THUNDER: 1482 return "apple-a13"; 1483 case CPUFAMILY_ARM_FIRESTORM_ICESTORM: 1484 return "apple-m1"; 1485 default: 1486 // Default to the newest CPU we know about. 1487 return "apple-m1"; 1488 } 1489 } 1490 #elif defined(_AIX) 1491 StringRef sys::getHostCPUName() { 1492 switch (_system_configuration.implementation) { 1493 case POWER_4: 1494 if (_system_configuration.version == PV_4_3) 1495 return "970"; 1496 return "pwr4"; 1497 case POWER_5: 1498 if (_system_configuration.version == PV_5) 1499 return "pwr5"; 1500 return "pwr5x"; 1501 case POWER_6: 1502 if (_system_configuration.version == PV_6_Compat) 1503 return "pwr6"; 1504 return "pwr6x"; 1505 case POWER_7: 1506 return "pwr7"; 1507 case POWER_8: 1508 return "pwr8"; 1509 case POWER_9: 1510 return "pwr9"; 1511 // TODO: simplify this once the macro is available in all OS levels. 1512 #ifdef POWER_10 1513 case POWER_10: 1514 #else 1515 case 0x40000: 1516 #endif 1517 return "pwr10"; 1518 default: 1519 return "generic"; 1520 } 1521 } 1522 #elif defined(__loongarch__) 1523 StringRef sys::getHostCPUName() { 1524 // Use processor id to detect cpu name. 1525 uint32_t processor_id; 1526 __asm__("cpucfg %[prid], $zero\n\t" : [prid] "=r"(processor_id)); 1527 // Refer PRID_SERIES_MASK in linux kernel: arch/loongarch/include/asm/cpu.h. 1528 switch (processor_id & 0xf000) { 1529 case 0xc000: // Loongson 64bit, 4-issue 1530 return "la464"; 1531 // TODO: Others. 1532 default: 1533 break; 1534 } 1535 return "generic"; 1536 } 1537 #elif defined(__riscv) 1538 StringRef sys::getHostCPUName() { 1539 #if defined(__linux__) 1540 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent(); 1541 StringRef Content = P ? P->getBuffer() : ""; 1542 return detail::getHostCPUNameForRISCV(Content); 1543 #else 1544 #if __riscv_xlen == 64 1545 return "generic-rv64"; 1546 #elif __riscv_xlen == 32 1547 return "generic-rv32"; 1548 #else 1549 #error "Unhandled value of __riscv_xlen" 1550 #endif 1551 #endif 1552 } 1553 #elif defined(__sparc__) 1554 #if defined(__linux__) 1555 StringRef sys::detail::getHostCPUNameForSPARC(StringRef ProcCpuinfoContent) { 1556 SmallVector<StringRef> Lines; 1557 ProcCpuinfoContent.split(Lines, "\n"); 1558 1559 // Look for cpu line to determine cpu name 1560 StringRef Cpu; 1561 for (unsigned I = 0, E = Lines.size(); I != E; ++I) { 1562 if (Lines[I].starts_with("cpu")) { 1563 Cpu = Lines[I].substr(5).ltrim("\t :"); 1564 break; 1565 } 1566 } 1567 1568 return StringSwitch<const char *>(Cpu) 1569 .StartsWith("SuperSparc", "supersparc") 1570 .StartsWith("HyperSparc", "hypersparc") 1571 .StartsWith("SpitFire", "ultrasparc") 1572 .StartsWith("BlackBird", "ultrasparc") 1573 .StartsWith("Sabre", " ultrasparc") 1574 .StartsWith("Hummingbird", "ultrasparc") 1575 .StartsWith("Cheetah", "ultrasparc3") 1576 .StartsWith("Jalapeno", "ultrasparc3") 1577 .StartsWith("Jaguar", "ultrasparc3") 1578 .StartsWith("Panther", "ultrasparc3") 1579 .StartsWith("Serrano", "ultrasparc3") 1580 .StartsWith("UltraSparc T1", "niagara") 1581 .StartsWith("UltraSparc T2", "niagara2") 1582 .StartsWith("UltraSparc T3", "niagara3") 1583 .StartsWith("UltraSparc T4", "niagara4") 1584 .StartsWith("UltraSparc T5", "niagara4") 1585 .StartsWith("LEON", "leon3") 1586 // niagara7/m8 not supported by LLVM yet. 1587 .StartsWith("SPARC-M7", "niagara4" /* "niagara7" */) 1588 .StartsWith("SPARC-S7", "niagara4" /* "niagara7" */) 1589 .StartsWith("SPARC-M8", "niagara4" /* "m8" */) 1590 .Default("generic"); 1591 } 1592 #endif 1593 1594 StringRef sys::getHostCPUName() { 1595 #if defined(__linux__) 1596 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent(); 1597 StringRef Content = P ? P->getBuffer() : ""; 1598 return detail::getHostCPUNameForSPARC(Content); 1599 #elif defined(__sun__) && defined(__svr4__) 1600 char *buf = NULL; 1601 kstat_ctl_t *kc; 1602 kstat_t *ksp; 1603 kstat_named_t *brand = NULL; 1604 1605 kc = kstat_open(); 1606 if (kc != NULL) { 1607 ksp = kstat_lookup(kc, const_cast<char *>("cpu_info"), -1, NULL); 1608 if (ksp != NULL && kstat_read(kc, ksp, NULL) != -1 && 1609 ksp->ks_type == KSTAT_TYPE_NAMED) 1610 brand = 1611 (kstat_named_t *)kstat_data_lookup(ksp, const_cast<char *>("brand")); 1612 if (brand != NULL && brand->data_type == KSTAT_DATA_STRING) 1613 buf = KSTAT_NAMED_STR_PTR(brand); 1614 } 1615 kstat_close(kc); 1616 1617 return StringSwitch<const char *>(buf) 1618 .Case("TMS390S10", "supersparc") // Texas Instruments microSPARC I 1619 .Case("TMS390Z50", "supersparc") // Texas Instruments SuperSPARC I 1620 .Case("TMS390Z55", 1621 "supersparc") // Texas Instruments SuperSPARC I with SuperCache 1622 .Case("MB86904", "supersparc") // Fujitsu microSPARC II 1623 .Case("MB86907", "supersparc") // Fujitsu TurboSPARC 1624 .Case("RT623", "hypersparc") // Ross hyperSPARC 1625 .Case("RT625", "hypersparc") 1626 .Case("RT626", "hypersparc") 1627 .Case("UltraSPARC-I", "ultrasparc") 1628 .Case("UltraSPARC-II", "ultrasparc") 1629 .Case("UltraSPARC-IIe", "ultrasparc") 1630 .Case("UltraSPARC-IIi", "ultrasparc") 1631 .Case("SPARC64-III", "ultrasparc") 1632 .Case("SPARC64-IV", "ultrasparc") 1633 .Case("UltraSPARC-III", "ultrasparc3") 1634 .Case("UltraSPARC-III+", "ultrasparc3") 1635 .Case("UltraSPARC-IIIi", "ultrasparc3") 1636 .Case("UltraSPARC-IIIi+", "ultrasparc3") 1637 .Case("UltraSPARC-IV", "ultrasparc3") 1638 .Case("UltraSPARC-IV+", "ultrasparc3") 1639 .Case("SPARC64-V", "ultrasparc3") 1640 .Case("SPARC64-VI", "ultrasparc3") 1641 .Case("SPARC64-VII", "ultrasparc3") 1642 .Case("UltraSPARC-T1", "niagara") 1643 .Case("UltraSPARC-T2", "niagara2") 1644 .Case("UltraSPARC-T2", "niagara2") 1645 .Case("UltraSPARC-T2+", "niagara2") 1646 .Case("SPARC-T3", "niagara3") 1647 .Case("SPARC-T4", "niagara4") 1648 .Case("SPARC-T5", "niagara4") 1649 // niagara7/m8 not supported by LLVM yet. 1650 .Case("SPARC-M7", "niagara4" /* "niagara7" */) 1651 .Case("SPARC-S7", "niagara4" /* "niagara7" */) 1652 .Case("SPARC-M8", "niagara4" /* "m8" */) 1653 .Default("generic"); 1654 #else 1655 return "generic"; 1656 #endif 1657 } 1658 #else 1659 StringRef sys::getHostCPUName() { return "generic"; } 1660 namespace llvm { 1661 namespace sys { 1662 namespace detail { 1663 namespace x86 { 1664 1665 VendorSignatures getVendorSignature(unsigned *MaxLeaf) { 1666 return VendorSignatures::UNKNOWN; 1667 } 1668 1669 } // namespace x86 1670 } // namespace detail 1671 } // namespace sys 1672 } // namespace llvm 1673 #endif 1674 1675 #if defined(__i386__) || defined(_M_IX86) || \ 1676 defined(__x86_64__) || defined(_M_X64) 1677 bool sys::getHostCPUFeatures(StringMap<bool> &Features) { 1678 unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; 1679 unsigned MaxLevel; 1680 1681 if (getX86CpuIDAndInfo(0, &MaxLevel, &EBX, &ECX, &EDX) || MaxLevel < 1) 1682 return false; 1683 1684 getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX); 1685 1686 Features["cx8"] = (EDX >> 8) & 1; 1687 Features["cmov"] = (EDX >> 15) & 1; 1688 Features["mmx"] = (EDX >> 23) & 1; 1689 Features["fxsr"] = (EDX >> 24) & 1; 1690 Features["sse"] = (EDX >> 25) & 1; 1691 Features["sse2"] = (EDX >> 26) & 1; 1692 1693 Features["sse3"] = (ECX >> 0) & 1; 1694 Features["pclmul"] = (ECX >> 1) & 1; 1695 Features["ssse3"] = (ECX >> 9) & 1; 1696 Features["cx16"] = (ECX >> 13) & 1; 1697 Features["sse4.1"] = (ECX >> 19) & 1; 1698 Features["sse4.2"] = (ECX >> 20) & 1; 1699 Features["crc32"] = Features["sse4.2"]; 1700 Features["movbe"] = (ECX >> 22) & 1; 1701 Features["popcnt"] = (ECX >> 23) & 1; 1702 Features["aes"] = (ECX >> 25) & 1; 1703 Features["rdrnd"] = (ECX >> 30) & 1; 1704 1705 // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV 1706 // indicates that the AVX registers will be saved and restored on context 1707 // switch, then we have full AVX support. 1708 bool HasXSave = ((ECX >> 27) & 1) && !getX86XCR0(&EAX, &EDX); 1709 bool HasAVXSave = HasXSave && ((ECX >> 28) & 1) && ((EAX & 0x6) == 0x6); 1710 #if defined(__APPLE__) 1711 // Darwin lazily saves the AVX512 context on first use: trust that the OS will 1712 // save the AVX512 context if we use AVX512 instructions, even the bit is not 1713 // set right now. 1714 bool HasAVX512Save = true; 1715 #else 1716 // AVX512 requires additional context to be saved by the OS. 1717 bool HasAVX512Save = HasAVXSave && ((EAX & 0xe0) == 0xe0); 1718 #endif 1719 // AMX requires additional context to be saved by the OS. 1720 const unsigned AMXBits = (1 << 17) | (1 << 18); 1721 bool HasAMXSave = HasXSave && ((EAX & AMXBits) == AMXBits); 1722 1723 Features["avx"] = HasAVXSave; 1724 Features["fma"] = ((ECX >> 12) & 1) && HasAVXSave; 1725 // Only enable XSAVE if OS has enabled support for saving YMM state. 1726 Features["xsave"] = ((ECX >> 26) & 1) && HasAVXSave; 1727 Features["f16c"] = ((ECX >> 29) & 1) && HasAVXSave; 1728 1729 unsigned MaxExtLevel; 1730 getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX); 1731 1732 bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 && 1733 !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); 1734 Features["sahf"] = HasExtLeaf1 && ((ECX >> 0) & 1); 1735 Features["lzcnt"] = HasExtLeaf1 && ((ECX >> 5) & 1); 1736 Features["sse4a"] = HasExtLeaf1 && ((ECX >> 6) & 1); 1737 Features["prfchw"] = HasExtLeaf1 && ((ECX >> 8) & 1); 1738 Features["xop"] = HasExtLeaf1 && ((ECX >> 11) & 1) && HasAVXSave; 1739 Features["lwp"] = HasExtLeaf1 && ((ECX >> 15) & 1); 1740 Features["fma4"] = HasExtLeaf1 && ((ECX >> 16) & 1) && HasAVXSave; 1741 Features["tbm"] = HasExtLeaf1 && ((ECX >> 21) & 1); 1742 Features["mwaitx"] = HasExtLeaf1 && ((ECX >> 29) & 1); 1743 1744 Features["64bit"] = HasExtLeaf1 && ((EDX >> 29) & 1); 1745 1746 // Miscellaneous memory related features, detected by 1747 // using the 0x80000008 leaf of the CPUID instruction 1748 bool HasExtLeaf8 = MaxExtLevel >= 0x80000008 && 1749 !getX86CpuIDAndInfo(0x80000008, &EAX, &EBX, &ECX, &EDX); 1750 Features["clzero"] = HasExtLeaf8 && ((EBX >> 0) & 1); 1751 Features["rdpru"] = HasExtLeaf8 && ((EBX >> 4) & 1); 1752 Features["wbnoinvd"] = HasExtLeaf8 && ((EBX >> 9) & 1); 1753 1754 bool HasLeaf7 = 1755 MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX); 1756 1757 Features["fsgsbase"] = HasLeaf7 && ((EBX >> 0) & 1); 1758 Features["sgx"] = HasLeaf7 && ((EBX >> 2) & 1); 1759 Features["bmi"] = HasLeaf7 && ((EBX >> 3) & 1); 1760 // AVX2 is only supported if we have the OS save support from AVX. 1761 Features["avx2"] = HasLeaf7 && ((EBX >> 5) & 1) && HasAVXSave; 1762 Features["bmi2"] = HasLeaf7 && ((EBX >> 8) & 1); 1763 Features["invpcid"] = HasLeaf7 && ((EBX >> 10) & 1); 1764 Features["rtm"] = HasLeaf7 && ((EBX >> 11) & 1); 1765 // AVX512 is only supported if the OS supports the context save for it. 1766 Features["avx512f"] = HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save; 1767 Features["avx512dq"] = HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save; 1768 Features["rdseed"] = HasLeaf7 && ((EBX >> 18) & 1); 1769 Features["adx"] = HasLeaf7 && ((EBX >> 19) & 1); 1770 Features["avx512ifma"] = HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save; 1771 Features["clflushopt"] = HasLeaf7 && ((EBX >> 23) & 1); 1772 Features["clwb"] = HasLeaf7 && ((EBX >> 24) & 1); 1773 Features["avx512pf"] = HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save; 1774 Features["avx512er"] = HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save; 1775 Features["avx512cd"] = HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save; 1776 Features["sha"] = HasLeaf7 && ((EBX >> 29) & 1); 1777 Features["avx512bw"] = HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save; 1778 Features["avx512vl"] = HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save; 1779 1780 Features["prefetchwt1"] = HasLeaf7 && ((ECX >> 0) & 1); 1781 Features["avx512vbmi"] = HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save; 1782 Features["pku"] = HasLeaf7 && ((ECX >> 4) & 1); 1783 Features["waitpkg"] = HasLeaf7 && ((ECX >> 5) & 1); 1784 Features["avx512vbmi2"] = HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save; 1785 Features["shstk"] = HasLeaf7 && ((ECX >> 7) & 1); 1786 Features["gfni"] = HasLeaf7 && ((ECX >> 8) & 1); 1787 Features["vaes"] = HasLeaf7 && ((ECX >> 9) & 1) && HasAVXSave; 1788 Features["vpclmulqdq"] = HasLeaf7 && ((ECX >> 10) & 1) && HasAVXSave; 1789 Features["avx512vnni"] = HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save; 1790 Features["avx512bitalg"] = HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save; 1791 Features["avx512vpopcntdq"] = HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save; 1792 Features["rdpid"] = HasLeaf7 && ((ECX >> 22) & 1); 1793 Features["kl"] = HasLeaf7 && ((ECX >> 23) & 1); // key locker 1794 Features["cldemote"] = HasLeaf7 && ((ECX >> 25) & 1); 1795 Features["movdiri"] = HasLeaf7 && ((ECX >> 27) & 1); 1796 Features["movdir64b"] = HasLeaf7 && ((ECX >> 28) & 1); 1797 Features["enqcmd"] = HasLeaf7 && ((ECX >> 29) & 1); 1798 1799 Features["uintr"] = HasLeaf7 && ((EDX >> 5) & 1); 1800 Features["avx512vp2intersect"] = 1801 HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save; 1802 Features["serialize"] = HasLeaf7 && ((EDX >> 14) & 1); 1803 Features["tsxldtrk"] = HasLeaf7 && ((EDX >> 16) & 1); 1804 // There are two CPUID leafs which information associated with the pconfig 1805 // instruction: 1806 // EAX=0x7, ECX=0x0 indicates the availability of the instruction (via the 18th 1807 // bit of EDX), while the EAX=0x1b leaf returns information on the 1808 // availability of specific pconfig leafs. 1809 // The target feature here only refers to the the first of these two. 1810 // Users might need to check for the availability of specific pconfig 1811 // leaves using cpuid, since that information is ignored while 1812 // detecting features using the "-march=native" flag. 1813 // For more info, see X86 ISA docs. 1814 Features["pconfig"] = HasLeaf7 && ((EDX >> 18) & 1); 1815 Features["amx-bf16"] = HasLeaf7 && ((EDX >> 22) & 1) && HasAMXSave; 1816 Features["avx512fp16"] = HasLeaf7 && ((EDX >> 23) & 1) && HasAVX512Save; 1817 Features["amx-tile"] = HasLeaf7 && ((EDX >> 24) & 1) && HasAMXSave; 1818 Features["amx-int8"] = HasLeaf7 && ((EDX >> 25) & 1) && HasAMXSave; 1819 // EAX from subleaf 0 is the maximum subleaf supported. Some CPUs don't 1820 // return all 0s for invalid subleaves so check the limit. 1821 bool HasLeaf7Subleaf1 = 1822 HasLeaf7 && EAX >= 1 && 1823 !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX); 1824 Features["sha512"] = HasLeaf7Subleaf1 && ((EAX >> 0) & 1); 1825 Features["sm3"] = HasLeaf7Subleaf1 && ((EAX >> 1) & 1); 1826 Features["sm4"] = HasLeaf7Subleaf1 && ((EAX >> 2) & 1); 1827 Features["raoint"] = HasLeaf7Subleaf1 && ((EAX >> 3) & 1); 1828 Features["avxvnni"] = HasLeaf7Subleaf1 && ((EAX >> 4) & 1) && HasAVXSave; 1829 Features["avx512bf16"] = HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save; 1830 Features["amx-fp16"] = HasLeaf7Subleaf1 && ((EAX >> 21) & 1) && HasAMXSave; 1831 Features["cmpccxadd"] = HasLeaf7Subleaf1 && ((EAX >> 7) & 1); 1832 Features["hreset"] = HasLeaf7Subleaf1 && ((EAX >> 22) & 1); 1833 Features["avxifma"] = HasLeaf7Subleaf1 && ((EAX >> 23) & 1) && HasAVXSave; 1834 Features["avxvnniint8"] = HasLeaf7Subleaf1 && ((EDX >> 4) & 1) && HasAVXSave; 1835 Features["avxneconvert"] = HasLeaf7Subleaf1 && ((EDX >> 5) & 1) && HasAVXSave; 1836 Features["amx-complex"] = HasLeaf7Subleaf1 && ((EDX >> 8) & 1) && HasAMXSave; 1837 Features["avxvnniint16"] = HasLeaf7Subleaf1 && ((EDX >> 10) & 1) && HasAVXSave; 1838 Features["prefetchi"] = HasLeaf7Subleaf1 && ((EDX >> 14) & 1); 1839 Features["usermsr"] = HasLeaf7Subleaf1 && ((EDX >> 15) & 1); 1840 Features["avx10.1-256"] = HasLeaf7Subleaf1 && ((EDX >> 19) & 1); 1841 1842 bool HasLeafD = MaxLevel >= 0xd && 1843 !getX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX); 1844 1845 // Only enable XSAVE if OS has enabled support for saving YMM state. 1846 Features["xsaveopt"] = HasLeafD && ((EAX >> 0) & 1) && HasAVXSave; 1847 Features["xsavec"] = HasLeafD && ((EAX >> 1) & 1) && HasAVXSave; 1848 Features["xsaves"] = HasLeafD && ((EAX >> 3) & 1) && HasAVXSave; 1849 1850 bool HasLeaf14 = MaxLevel >= 0x14 && 1851 !getX86CpuIDAndInfoEx(0x14, 0x0, &EAX, &EBX, &ECX, &EDX); 1852 1853 Features["ptwrite"] = HasLeaf14 && ((EBX >> 4) & 1); 1854 1855 bool HasLeaf19 = 1856 MaxLevel >= 0x19 && !getX86CpuIDAndInfo(0x19, &EAX, &EBX, &ECX, &EDX); 1857 Features["widekl"] = HasLeaf7 && HasLeaf19 && ((EBX >> 2) & 1); 1858 1859 bool HasLeaf24 = 1860 MaxLevel >= 0x24 && !getX86CpuIDAndInfo(0x24, &EAX, &EBX, &ECX, &EDX); 1861 Features["avx10.1-512"] = 1862 Features["avx10.1-256"] && HasLeaf24 && ((EBX >> 18) & 1); 1863 1864 return true; 1865 } 1866 #elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__)) 1867 bool sys::getHostCPUFeatures(StringMap<bool> &Features) { 1868 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent(); 1869 if (!P) 1870 return false; 1871 1872 SmallVector<StringRef, 32> Lines; 1873 P->getBuffer().split(Lines, "\n"); 1874 1875 SmallVector<StringRef, 32> CPUFeatures; 1876 1877 // Look for the CPU features. 1878 for (unsigned I = 0, E = Lines.size(); I != E; ++I) 1879 if (Lines[I].starts_with("Features")) { 1880 Lines[I].split(CPUFeatures, ' '); 1881 break; 1882 } 1883 1884 #if defined(__aarch64__) 1885 // Keep track of which crypto features we have seen 1886 enum { CAP_AES = 0x1, CAP_PMULL = 0x2, CAP_SHA1 = 0x4, CAP_SHA2 = 0x8 }; 1887 uint32_t crypto = 0; 1888 #endif 1889 1890 for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) { 1891 StringRef LLVMFeatureStr = StringSwitch<StringRef>(CPUFeatures[I]) 1892 #if defined(__aarch64__) 1893 .Case("asimd", "neon") 1894 .Case("fp", "fp-armv8") 1895 .Case("crc32", "crc") 1896 .Case("atomics", "lse") 1897 .Case("sve", "sve") 1898 .Case("sve2", "sve2") 1899 #else 1900 .Case("half", "fp16") 1901 .Case("neon", "neon") 1902 .Case("vfpv3", "vfp3") 1903 .Case("vfpv3d16", "vfp3d16") 1904 .Case("vfpv4", "vfp4") 1905 .Case("idiva", "hwdiv-arm") 1906 .Case("idivt", "hwdiv") 1907 #endif 1908 .Default(""); 1909 1910 #if defined(__aarch64__) 1911 // We need to check crypto separately since we need all of the crypto 1912 // extensions to enable the subtarget feature 1913 if (CPUFeatures[I] == "aes") 1914 crypto |= CAP_AES; 1915 else if (CPUFeatures[I] == "pmull") 1916 crypto |= CAP_PMULL; 1917 else if (CPUFeatures[I] == "sha1") 1918 crypto |= CAP_SHA1; 1919 else if (CPUFeatures[I] == "sha2") 1920 crypto |= CAP_SHA2; 1921 #endif 1922 1923 if (LLVMFeatureStr != "") 1924 Features[LLVMFeatureStr] = true; 1925 } 1926 1927 #if defined(__aarch64__) 1928 // If we have all crypto bits we can add the feature 1929 if (crypto == (CAP_AES | CAP_PMULL | CAP_SHA1 | CAP_SHA2)) 1930 Features["crypto"] = true; 1931 #endif 1932 1933 return true; 1934 } 1935 #elif defined(_WIN32) && (defined(__aarch64__) || defined(_M_ARM64)) 1936 bool sys::getHostCPUFeatures(StringMap<bool> &Features) { 1937 if (IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE)) 1938 Features["neon"] = true; 1939 if (IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE)) 1940 Features["crc"] = true; 1941 if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE)) 1942 Features["crypto"] = true; 1943 1944 return true; 1945 } 1946 #elif defined(__linux__) && defined(__loongarch__) 1947 #include <sys/auxv.h> 1948 bool sys::getHostCPUFeatures(StringMap<bool> &Features) { 1949 unsigned long hwcap = getauxval(AT_HWCAP); 1950 bool HasFPU = hwcap & (1UL << 3); // HWCAP_LOONGARCH_FPU 1951 uint32_t cpucfg2 = 0x2; 1952 __asm__("cpucfg %[cpucfg2], %[cpucfg2]\n\t" : [cpucfg2] "+r"(cpucfg2)); 1953 1954 Features["f"] = HasFPU && (cpucfg2 & (1U << 1)); // CPUCFG.2.FP_SP 1955 Features["d"] = HasFPU && (cpucfg2 & (1U << 2)); // CPUCFG.2.FP_DP 1956 1957 Features["lsx"] = hwcap & (1UL << 4); // HWCAP_LOONGARCH_LSX 1958 Features["lasx"] = hwcap & (1UL << 5); // HWCAP_LOONGARCH_LASX 1959 Features["lvz"] = hwcap & (1UL << 9); // HWCAP_LOONGARCH_LVZ 1960 1961 return true; 1962 } 1963 #else 1964 bool sys::getHostCPUFeatures(StringMap<bool> &Features) { return false; } 1965 #endif 1966 1967 #if __APPLE__ 1968 /// \returns the \p triple, but with the Host's arch spliced in. 1969 static Triple withHostArch(Triple T) { 1970 #if defined(__arm__) 1971 T.setArch(Triple::arm); 1972 T.setArchName("arm"); 1973 #elif defined(__arm64e__) 1974 T.setArch(Triple::aarch64, Triple::AArch64SubArch_arm64e); 1975 T.setArchName("arm64e"); 1976 #elif defined(__aarch64__) 1977 T.setArch(Triple::aarch64); 1978 T.setArchName("arm64"); 1979 #elif defined(__x86_64h__) 1980 T.setArch(Triple::x86_64); 1981 T.setArchName("x86_64h"); 1982 #elif defined(__x86_64__) 1983 T.setArch(Triple::x86_64); 1984 T.setArchName("x86_64"); 1985 #elif defined(__i386__) 1986 T.setArch(Triple::x86); 1987 T.setArchName("i386"); 1988 #elif defined(__powerpc__) 1989 T.setArch(Triple::ppc); 1990 T.setArchName("powerpc"); 1991 #else 1992 # error "Unimplemented host arch fixup" 1993 #endif 1994 return T; 1995 } 1996 #endif 1997 1998 std::string sys::getProcessTriple() { 1999 std::string TargetTripleString = updateTripleOSVersion(LLVM_HOST_TRIPLE); 2000 Triple PT(Triple::normalize(TargetTripleString)); 2001 2002 #if __APPLE__ 2003 /// In Universal builds, LLVM_HOST_TRIPLE will have the wrong arch in one of 2004 /// the slices. This fixes that up. 2005 PT = withHostArch(PT); 2006 #endif 2007 2008 if (sizeof(void *) == 8 && PT.isArch32Bit()) 2009 PT = PT.get64BitArchVariant(); 2010 if (sizeof(void *) == 4 && PT.isArch64Bit()) 2011 PT = PT.get32BitArchVariant(); 2012 2013 return PT.str(); 2014 } 2015 2016 void sys::printDefaultTargetAndDetectedCPU(raw_ostream &OS) { 2017 #if LLVM_VERSION_PRINTER_SHOW_HOST_TARGET_INFO 2018 std::string CPU = std::string(sys::getHostCPUName()); 2019 if (CPU == "generic") 2020 CPU = "(unknown)"; 2021 OS << " Default target: " << sys::getDefaultTargetTriple() << '\n' 2022 << " Host CPU: " << CPU << '\n'; 2023 #endif 2024 } 2025