1 //===-- Host.cpp - Implement OS Host Detection ------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the operating system Host detection. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/TargetParser/Host.h" 14 #include "llvm/ADT/SmallVector.h" 15 #include "llvm/ADT/StringMap.h" 16 #include "llvm/ADT/StringRef.h" 17 #include "llvm/ADT/StringSwitch.h" 18 #include "llvm/Config/llvm-config.h" 19 #include "llvm/Support/MemoryBuffer.h" 20 #include "llvm/Support/raw_ostream.h" 21 #include "llvm/TargetParser/Triple.h" 22 #include "llvm/TargetParser/X86TargetParser.h" 23 #include <string.h> 24 25 // Include the platform-specific parts of this class. 26 #ifdef LLVM_ON_UNIX 27 #include "Unix/Host.inc" 28 #include <sched.h> 29 #endif 30 #ifdef _WIN32 31 #include "Windows/Host.inc" 32 #endif 33 #ifdef _MSC_VER 34 #include <intrin.h> 35 #endif 36 #ifdef __MVS__ 37 #include "llvm/Support/BCD.h" 38 #endif 39 #if defined(__APPLE__) 40 #include <mach/host_info.h> 41 #include <mach/mach.h> 42 #include <mach/mach_host.h> 43 #include <mach/machine.h> 44 #include <sys/param.h> 45 #include <sys/sysctl.h> 46 #endif 47 #ifdef _AIX 48 #include <sys/systemcfg.h> 49 #endif 50 #if defined(__sun__) && defined(__svr4__) 51 #include <kstat.h> 52 #endif 53 54 #define DEBUG_TYPE "host-detection" 55 56 //===----------------------------------------------------------------------===// 57 // 58 // Implementations of the CPU detection routines 59 // 60 //===----------------------------------------------------------------------===// 61 62 using namespace llvm; 63 64 static std::unique_ptr<llvm::MemoryBuffer> 65 LLVM_ATTRIBUTE_UNUSED getProcCpuinfoContent() { 66 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text = 67 llvm::MemoryBuffer::getFileAsStream("/proc/cpuinfo"); 68 if (std::error_code EC = Text.getError()) { 69 llvm::errs() << "Can't read " 70 << "/proc/cpuinfo: " << EC.message() << "\n"; 71 return nullptr; 72 } 73 return std::move(*Text); 74 } 75 76 StringRef sys::detail::getHostCPUNameForPowerPC(StringRef ProcCpuinfoContent) { 77 // Access to the Processor Version Register (PVR) on PowerPC is privileged, 78 // and so we must use an operating-system interface to determine the current 79 // processor type. On Linux, this is exposed through the /proc/cpuinfo file. 80 const char *generic = "generic"; 81 82 // The cpu line is second (after the 'processor: 0' line), so if this 83 // buffer is too small then something has changed (or is wrong). 84 StringRef::const_iterator CPUInfoStart = ProcCpuinfoContent.begin(); 85 StringRef::const_iterator CPUInfoEnd = ProcCpuinfoContent.end(); 86 87 StringRef::const_iterator CIP = CPUInfoStart; 88 89 StringRef::const_iterator CPUStart = nullptr; 90 size_t CPULen = 0; 91 92 // We need to find the first line which starts with cpu, spaces, and a colon. 93 // After the colon, there may be some additional spaces and then the cpu type. 94 while (CIP < CPUInfoEnd && CPUStart == nullptr) { 95 if (CIP < CPUInfoEnd && *CIP == '\n') 96 ++CIP; 97 98 if (CIP < CPUInfoEnd && *CIP == 'c') { 99 ++CIP; 100 if (CIP < CPUInfoEnd && *CIP == 'p') { 101 ++CIP; 102 if (CIP < CPUInfoEnd && *CIP == 'u') { 103 ++CIP; 104 while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t')) 105 ++CIP; 106 107 if (CIP < CPUInfoEnd && *CIP == ':') { 108 ++CIP; 109 while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t')) 110 ++CIP; 111 112 if (CIP < CPUInfoEnd) { 113 CPUStart = CIP; 114 while (CIP < CPUInfoEnd && (*CIP != ' ' && *CIP != '\t' && 115 *CIP != ',' && *CIP != '\n')) 116 ++CIP; 117 CPULen = CIP - CPUStart; 118 } 119 } 120 } 121 } 122 } 123 124 if (CPUStart == nullptr) 125 while (CIP < CPUInfoEnd && *CIP != '\n') 126 ++CIP; 127 } 128 129 if (CPUStart == nullptr) 130 return generic; 131 132 return StringSwitch<const char *>(StringRef(CPUStart, CPULen)) 133 .Case("604e", "604e") 134 .Case("604", "604") 135 .Case("7400", "7400") 136 .Case("7410", "7400") 137 .Case("7447", "7400") 138 .Case("7455", "7450") 139 .Case("G4", "g4") 140 .Case("POWER4", "970") 141 .Case("PPC970FX", "970") 142 .Case("PPC970MP", "970") 143 .Case("G5", "g5") 144 .Case("POWER5", "g5") 145 .Case("A2", "a2") 146 .Case("POWER6", "pwr6") 147 .Case("POWER7", "pwr7") 148 .Case("POWER8", "pwr8") 149 .Case("POWER8E", "pwr8") 150 .Case("POWER8NVL", "pwr8") 151 .Case("POWER9", "pwr9") 152 .Case("POWER10", "pwr10") 153 // FIXME: If we get a simulator or machine with the capabilities of 154 // mcpu=future, we should revisit this and add the name reported by the 155 // simulator/machine. 156 .Default(generic); 157 } 158 159 StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) { 160 // The cpuid register on arm is not accessible from user space. On Linux, 161 // it is exposed through the /proc/cpuinfo file. 162 163 // Read 32 lines from /proc/cpuinfo, which should contain the CPU part line 164 // in all cases. 165 SmallVector<StringRef, 32> Lines; 166 ProcCpuinfoContent.split(Lines, "\n"); 167 168 // Look for the CPU implementer line. 169 StringRef Implementer; 170 StringRef Hardware; 171 StringRef Part; 172 for (unsigned I = 0, E = Lines.size(); I != E; ++I) { 173 if (Lines[I].starts_with("CPU implementer")) 174 Implementer = Lines[I].substr(15).ltrim("\t :"); 175 if (Lines[I].starts_with("Hardware")) 176 Hardware = Lines[I].substr(8).ltrim("\t :"); 177 if (Lines[I].starts_with("CPU part")) 178 Part = Lines[I].substr(8).ltrim("\t :"); 179 } 180 181 if (Implementer == "0x41") { // ARM Ltd. 182 // MSM8992/8994 may give cpu part for the core that the kernel is running on, 183 // which is undeterministic and wrong. Always return cortex-a53 for these SoC. 184 if (Hardware.ends_with("MSM8994") || Hardware.ends_with("MSM8996")) 185 return "cortex-a53"; 186 187 188 // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The 189 // values correspond to the "Part number" in the CP15/c0 register. The 190 // contents are specified in the various processor manuals. 191 // This corresponds to the Main ID Register in Technical Reference Manuals. 192 // and is used in programs like sys-utils 193 return StringSwitch<const char *>(Part) 194 .Case("0x926", "arm926ej-s") 195 .Case("0xb02", "mpcore") 196 .Case("0xb36", "arm1136j-s") 197 .Case("0xb56", "arm1156t2-s") 198 .Case("0xb76", "arm1176jz-s") 199 .Case("0xc08", "cortex-a8") 200 .Case("0xc09", "cortex-a9") 201 .Case("0xc0f", "cortex-a15") 202 .Case("0xc20", "cortex-m0") 203 .Case("0xc23", "cortex-m3") 204 .Case("0xc24", "cortex-m4") 205 .Case("0xd24", "cortex-m52") 206 .Case("0xd22", "cortex-m55") 207 .Case("0xd02", "cortex-a34") 208 .Case("0xd04", "cortex-a35") 209 .Case("0xd03", "cortex-a53") 210 .Case("0xd05", "cortex-a55") 211 .Case("0xd46", "cortex-a510") 212 .Case("0xd80", "cortex-a520") 213 .Case("0xd07", "cortex-a57") 214 .Case("0xd08", "cortex-a72") 215 .Case("0xd09", "cortex-a73") 216 .Case("0xd0a", "cortex-a75") 217 .Case("0xd0b", "cortex-a76") 218 .Case("0xd0d", "cortex-a77") 219 .Case("0xd41", "cortex-a78") 220 .Case("0xd47", "cortex-a710") 221 .Case("0xd4d", "cortex-a715") 222 .Case("0xd81", "cortex-a720") 223 .Case("0xd44", "cortex-x1") 224 .Case("0xd4c", "cortex-x1c") 225 .Case("0xd48", "cortex-x2") 226 .Case("0xd4e", "cortex-x3") 227 .Case("0xd82", "cortex-x4") 228 .Case("0xd0c", "neoverse-n1") 229 .Case("0xd49", "neoverse-n2") 230 .Case("0xd40", "neoverse-v1") 231 .Case("0xd4f", "neoverse-v2") 232 .Default("generic"); 233 } 234 235 if (Implementer == "0x42" || Implementer == "0x43") { // Broadcom | Cavium. 236 return StringSwitch<const char *>(Part) 237 .Case("0x516", "thunderx2t99") 238 .Case("0x0516", "thunderx2t99") 239 .Case("0xaf", "thunderx2t99") 240 .Case("0x0af", "thunderx2t99") 241 .Case("0xa1", "thunderxt88") 242 .Case("0x0a1", "thunderxt88") 243 .Default("generic"); 244 } 245 246 if (Implementer == "0x46") { // Fujitsu Ltd. 247 return StringSwitch<const char *>(Part) 248 .Case("0x001", "a64fx") 249 .Default("generic"); 250 } 251 252 if (Implementer == "0x4e") { // NVIDIA Corporation 253 return StringSwitch<const char *>(Part) 254 .Case("0x004", "carmel") 255 .Default("generic"); 256 } 257 258 if (Implementer == "0x48") // HiSilicon Technologies, Inc. 259 // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The 260 // values correspond to the "Part number" in the CP15/c0 register. The 261 // contents are specified in the various processor manuals. 262 return StringSwitch<const char *>(Part) 263 .Case("0xd01", "tsv110") 264 .Default("generic"); 265 266 if (Implementer == "0x51") // Qualcomm Technologies, Inc. 267 // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The 268 // values correspond to the "Part number" in the CP15/c0 register. The 269 // contents are specified in the various processor manuals. 270 return StringSwitch<const char *>(Part) 271 .Case("0x06f", "krait") // APQ8064 272 .Case("0x201", "kryo") 273 .Case("0x205", "kryo") 274 .Case("0x211", "kryo") 275 .Case("0x800", "cortex-a73") // Kryo 2xx Gold 276 .Case("0x801", "cortex-a73") // Kryo 2xx Silver 277 .Case("0x802", "cortex-a75") // Kryo 3xx Gold 278 .Case("0x803", "cortex-a75") // Kryo 3xx Silver 279 .Case("0x804", "cortex-a76") // Kryo 4xx Gold 280 .Case("0x805", "cortex-a76") // Kryo 4xx/5xx Silver 281 .Case("0xc00", "falkor") 282 .Case("0xc01", "saphira") 283 .Default("generic"); 284 if (Implementer == "0x53") { // Samsung Electronics Co., Ltd. 285 // The Exynos chips have a convoluted ID scheme that doesn't seem to follow 286 // any predictive pattern across variants and parts. 287 unsigned Variant = 0, Part = 0; 288 289 // Look for the CPU variant line, whose value is a 1 digit hexadecimal 290 // number, corresponding to the Variant bits in the CP15/C0 register. 291 for (auto I : Lines) 292 if (I.consume_front("CPU variant")) 293 I.ltrim("\t :").getAsInteger(0, Variant); 294 295 // Look for the CPU part line, whose value is a 3 digit hexadecimal 296 // number, corresponding to the PartNum bits in the CP15/C0 register. 297 for (auto I : Lines) 298 if (I.consume_front("CPU part")) 299 I.ltrim("\t :").getAsInteger(0, Part); 300 301 unsigned Exynos = (Variant << 12) | Part; 302 switch (Exynos) { 303 default: 304 // Default by falling through to Exynos M3. 305 [[fallthrough]]; 306 case 0x1002: 307 return "exynos-m3"; 308 case 0x1003: 309 return "exynos-m4"; 310 } 311 } 312 313 if (Implementer == "0x6d") { // Microsoft Corporation. 314 // The Microsoft Azure Cobalt 100 CPU is handled as a Neoverse N2. 315 return StringSwitch<const char *>(Part) 316 .Case("0xd49", "neoverse-n2") 317 .Default("generic"); 318 } 319 320 if (Implementer == "0xc0") { // Ampere Computing 321 return StringSwitch<const char *>(Part) 322 .Case("0xac3", "ampere1") 323 .Case("0xac4", "ampere1a") 324 .Default("generic"); 325 } 326 327 return "generic"; 328 } 329 330 namespace { 331 StringRef getCPUNameFromS390Model(unsigned int Id, bool HaveVectorSupport) { 332 switch (Id) { 333 case 2064: // z900 not supported by LLVM 334 case 2066: 335 case 2084: // z990 not supported by LLVM 336 case 2086: 337 case 2094: // z9-109 not supported by LLVM 338 case 2096: 339 return "generic"; 340 case 2097: 341 case 2098: 342 return "z10"; 343 case 2817: 344 case 2818: 345 return "z196"; 346 case 2827: 347 case 2828: 348 return "zEC12"; 349 case 2964: 350 case 2965: 351 return HaveVectorSupport? "z13" : "zEC12"; 352 case 3906: 353 case 3907: 354 return HaveVectorSupport? "z14" : "zEC12"; 355 case 8561: 356 case 8562: 357 return HaveVectorSupport? "z15" : "zEC12"; 358 case 3931: 359 case 3932: 360 default: 361 return HaveVectorSupport? "z16" : "zEC12"; 362 } 363 } 364 } // end anonymous namespace 365 366 StringRef sys::detail::getHostCPUNameForS390x(StringRef ProcCpuinfoContent) { 367 // STIDP is a privileged operation, so use /proc/cpuinfo instead. 368 369 // The "processor 0:" line comes after a fair amount of other information, 370 // including a cache breakdown, but this should be plenty. 371 SmallVector<StringRef, 32> Lines; 372 ProcCpuinfoContent.split(Lines, "\n"); 373 374 // Look for the CPU features. 375 SmallVector<StringRef, 32> CPUFeatures; 376 for (unsigned I = 0, E = Lines.size(); I != E; ++I) 377 if (Lines[I].starts_with("features")) { 378 size_t Pos = Lines[I].find(':'); 379 if (Pos != StringRef::npos) { 380 Lines[I].drop_front(Pos + 1).split(CPUFeatures, ' '); 381 break; 382 } 383 } 384 385 // We need to check for the presence of vector support independently of 386 // the machine type, since we may only use the vector register set when 387 // supported by the kernel (and hypervisor). 388 bool HaveVectorSupport = false; 389 for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) { 390 if (CPUFeatures[I] == "vx") 391 HaveVectorSupport = true; 392 } 393 394 // Now check the processor machine type. 395 for (unsigned I = 0, E = Lines.size(); I != E; ++I) { 396 if (Lines[I].starts_with("processor ")) { 397 size_t Pos = Lines[I].find("machine = "); 398 if (Pos != StringRef::npos) { 399 Pos += sizeof("machine = ") - 1; 400 unsigned int Id; 401 if (!Lines[I].drop_front(Pos).getAsInteger(10, Id)) 402 return getCPUNameFromS390Model(Id, HaveVectorSupport); 403 } 404 break; 405 } 406 } 407 408 return "generic"; 409 } 410 411 StringRef sys::detail::getHostCPUNameForRISCV(StringRef ProcCpuinfoContent) { 412 // There are 24 lines in /proc/cpuinfo 413 SmallVector<StringRef> Lines; 414 ProcCpuinfoContent.split(Lines, "\n"); 415 416 // Look for uarch line to determine cpu name 417 StringRef UArch; 418 for (unsigned I = 0, E = Lines.size(); I != E; ++I) { 419 if (Lines[I].starts_with("uarch")) { 420 UArch = Lines[I].substr(5).ltrim("\t :"); 421 break; 422 } 423 } 424 425 return StringSwitch<const char *>(UArch) 426 .Case("sifive,u74-mc", "sifive-u74") 427 .Case("sifive,bullet0", "sifive-u74") 428 .Default("generic"); 429 } 430 431 StringRef sys::detail::getHostCPUNameForBPF() { 432 #if !defined(__linux__) || !defined(__x86_64__) 433 return "generic"; 434 #else 435 uint8_t v3_insns[40] __attribute__ ((aligned (8))) = 436 /* BPF_MOV64_IMM(BPF_REG_0, 0) */ 437 { 0xb7, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 438 /* BPF_MOV64_IMM(BPF_REG_2, 1) */ 439 0xb7, 0x2, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 440 /* BPF_JMP32_REG(BPF_JLT, BPF_REG_0, BPF_REG_2, 1) */ 441 0xae, 0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 442 /* BPF_MOV64_IMM(BPF_REG_0, 1) */ 443 0xb7, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 444 /* BPF_EXIT_INSN() */ 445 0x95, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; 446 447 uint8_t v2_insns[40] __attribute__ ((aligned (8))) = 448 /* BPF_MOV64_IMM(BPF_REG_0, 0) */ 449 { 0xb7, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 450 /* BPF_MOV64_IMM(BPF_REG_2, 1) */ 451 0xb7, 0x2, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 452 /* BPF_JMP_REG(BPF_JLT, BPF_REG_0, BPF_REG_2, 1) */ 453 0xad, 0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 454 /* BPF_MOV64_IMM(BPF_REG_0, 1) */ 455 0xb7, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 456 /* BPF_EXIT_INSN() */ 457 0x95, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; 458 459 struct bpf_prog_load_attr { 460 uint32_t prog_type; 461 uint32_t insn_cnt; 462 uint64_t insns; 463 uint64_t license; 464 uint32_t log_level; 465 uint32_t log_size; 466 uint64_t log_buf; 467 uint32_t kern_version; 468 uint32_t prog_flags; 469 } attr = {}; 470 attr.prog_type = 1; /* BPF_PROG_TYPE_SOCKET_FILTER */ 471 attr.insn_cnt = 5; 472 attr.insns = (uint64_t)v3_insns; 473 attr.license = (uint64_t)"DUMMY"; 474 475 int fd = syscall(321 /* __NR_bpf */, 5 /* BPF_PROG_LOAD */, &attr, 476 sizeof(attr)); 477 if (fd >= 0) { 478 close(fd); 479 return "v3"; 480 } 481 482 /* Clear the whole attr in case its content changed by syscall. */ 483 memset(&attr, 0, sizeof(attr)); 484 attr.prog_type = 1; /* BPF_PROG_TYPE_SOCKET_FILTER */ 485 attr.insn_cnt = 5; 486 attr.insns = (uint64_t)v2_insns; 487 attr.license = (uint64_t)"DUMMY"; 488 fd = syscall(321 /* __NR_bpf */, 5 /* BPF_PROG_LOAD */, &attr, sizeof(attr)); 489 if (fd >= 0) { 490 close(fd); 491 return "v2"; 492 } 493 return "v1"; 494 #endif 495 } 496 497 #if defined(__i386__) || defined(_M_IX86) || \ 498 defined(__x86_64__) || defined(_M_X64) 499 500 // The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max). 501 // Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID 502 // support. Consequently, for i386, the presence of CPUID is checked first 503 // via the corresponding eflags bit. 504 // Removal of cpuid.h header motivated by PR30384 505 // Header cpuid.h and method __get_cpuid_max are not used in llvm, clang, openmp 506 // or test-suite, but are used in external projects e.g. libstdcxx 507 static bool isCpuIdSupported() { 508 #if defined(__GNUC__) || defined(__clang__) 509 #if defined(__i386__) 510 int __cpuid_supported; 511 __asm__(" pushfl\n" 512 " popl %%eax\n" 513 " movl %%eax,%%ecx\n" 514 " xorl $0x00200000,%%eax\n" 515 " pushl %%eax\n" 516 " popfl\n" 517 " pushfl\n" 518 " popl %%eax\n" 519 " movl $0,%0\n" 520 " cmpl %%eax,%%ecx\n" 521 " je 1f\n" 522 " movl $1,%0\n" 523 "1:" 524 : "=r"(__cpuid_supported) 525 : 526 : "eax", "ecx"); 527 if (!__cpuid_supported) 528 return false; 529 #endif 530 return true; 531 #endif 532 return true; 533 } 534 535 /// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in 536 /// the specified arguments. If we can't run cpuid on the host, return true. 537 static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX, 538 unsigned *rECX, unsigned *rEDX) { 539 #if defined(__GNUC__) || defined(__clang__) 540 #if defined(__x86_64__) 541 // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually. 542 // FIXME: should we save this for Clang? 543 __asm__("movq\t%%rbx, %%rsi\n\t" 544 "cpuid\n\t" 545 "xchgq\t%%rbx, %%rsi\n\t" 546 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) 547 : "a"(value)); 548 return false; 549 #elif defined(__i386__) 550 __asm__("movl\t%%ebx, %%esi\n\t" 551 "cpuid\n\t" 552 "xchgl\t%%ebx, %%esi\n\t" 553 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) 554 : "a"(value)); 555 return false; 556 #else 557 return true; 558 #endif 559 #elif defined(_MSC_VER) 560 // The MSVC intrinsic is portable across x86 and x64. 561 int registers[4]; 562 __cpuid(registers, value); 563 *rEAX = registers[0]; 564 *rEBX = registers[1]; 565 *rECX = registers[2]; 566 *rEDX = registers[3]; 567 return false; 568 #else 569 return true; 570 #endif 571 } 572 573 namespace llvm { 574 namespace sys { 575 namespace detail { 576 namespace x86 { 577 578 VendorSignatures getVendorSignature(unsigned *MaxLeaf) { 579 unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; 580 if (MaxLeaf == nullptr) 581 MaxLeaf = &EAX; 582 else 583 *MaxLeaf = 0; 584 585 if (!isCpuIdSupported()) 586 return VendorSignatures::UNKNOWN; 587 588 if (getX86CpuIDAndInfo(0, MaxLeaf, &EBX, &ECX, &EDX) || *MaxLeaf < 1) 589 return VendorSignatures::UNKNOWN; 590 591 // "Genu ineI ntel" 592 if (EBX == 0x756e6547 && EDX == 0x49656e69 && ECX == 0x6c65746e) 593 return VendorSignatures::GENUINE_INTEL; 594 595 // "Auth enti cAMD" 596 if (EBX == 0x68747541 && EDX == 0x69746e65 && ECX == 0x444d4163) 597 return VendorSignatures::AUTHENTIC_AMD; 598 599 return VendorSignatures::UNKNOWN; 600 } 601 602 } // namespace x86 603 } // namespace detail 604 } // namespace sys 605 } // namespace llvm 606 607 using namespace llvm::sys::detail::x86; 608 609 /// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return 610 /// the 4 values in the specified arguments. If we can't run cpuid on the host, 611 /// return true. 612 static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf, 613 unsigned *rEAX, unsigned *rEBX, unsigned *rECX, 614 unsigned *rEDX) { 615 #if defined(__GNUC__) || defined(__clang__) 616 #if defined(__x86_64__) 617 // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually. 618 // FIXME: should we save this for Clang? 619 __asm__("movq\t%%rbx, %%rsi\n\t" 620 "cpuid\n\t" 621 "xchgq\t%%rbx, %%rsi\n\t" 622 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) 623 : "a"(value), "c"(subleaf)); 624 return false; 625 #elif defined(__i386__) 626 __asm__("movl\t%%ebx, %%esi\n\t" 627 "cpuid\n\t" 628 "xchgl\t%%ebx, %%esi\n\t" 629 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) 630 : "a"(value), "c"(subleaf)); 631 return false; 632 #else 633 return true; 634 #endif 635 #elif defined(_MSC_VER) 636 int registers[4]; 637 __cpuidex(registers, value, subleaf); 638 *rEAX = registers[0]; 639 *rEBX = registers[1]; 640 *rECX = registers[2]; 641 *rEDX = registers[3]; 642 return false; 643 #else 644 return true; 645 #endif 646 } 647 648 // Read control register 0 (XCR0). Used to detect features such as AVX. 649 static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) { 650 #if defined(__GNUC__) || defined(__clang__) 651 // Check xgetbv; this uses a .byte sequence instead of the instruction 652 // directly because older assemblers do not include support for xgetbv and 653 // there is no easy way to conditionally compile based on the assembler used. 654 __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0)); 655 return false; 656 #elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK) 657 unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK); 658 *rEAX = Result; 659 *rEDX = Result >> 32; 660 return false; 661 #else 662 return true; 663 #endif 664 } 665 666 static void detectX86FamilyModel(unsigned EAX, unsigned *Family, 667 unsigned *Model) { 668 *Family = (EAX >> 8) & 0xf; // Bits 8 - 11 669 *Model = (EAX >> 4) & 0xf; // Bits 4 - 7 670 if (*Family == 6 || *Family == 0xf) { 671 if (*Family == 0xf) 672 // Examine extended family ID if family ID is F. 673 *Family += (EAX >> 20) & 0xff; // Bits 20 - 27 674 // Examine extended model ID if family ID is 6 or F. 675 *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19 676 } 677 } 678 679 static StringRef 680 getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model, 681 const unsigned *Features, 682 unsigned *Type, unsigned *Subtype) { 683 auto testFeature = [&](unsigned F) { 684 return (Features[F / 32] & (1U << (F % 32))) != 0; 685 }; 686 687 StringRef CPU; 688 689 switch (Family) { 690 case 3: 691 CPU = "i386"; 692 break; 693 case 4: 694 CPU = "i486"; 695 break; 696 case 5: 697 if (testFeature(X86::FEATURE_MMX)) { 698 CPU = "pentium-mmx"; 699 break; 700 } 701 CPU = "pentium"; 702 break; 703 case 6: 704 switch (Model) { 705 case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile 706 // processor, Intel Core 2 Quad processor, Intel Core 2 Quad 707 // mobile processor, Intel Core 2 Extreme processor, Intel 708 // Pentium Dual-Core processor, Intel Xeon processor, model 709 // 0Fh. All processors are manufactured using the 65 nm process. 710 case 0x16: // Intel Celeron processor model 16h. All processors are 711 // manufactured using the 65 nm process 712 CPU = "core2"; 713 *Type = X86::INTEL_CORE2; 714 break; 715 case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model 716 // 17h. All processors are manufactured using the 45 nm process. 717 // 718 // 45nm: Penryn , Wolfdale, Yorkfield (XE) 719 case 0x1d: // Intel Xeon processor MP. All processors are manufactured using 720 // the 45 nm process. 721 CPU = "penryn"; 722 *Type = X86::INTEL_CORE2; 723 break; 724 case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All 725 // processors are manufactured using the 45 nm process. 726 case 0x1e: // Intel(R) Core(TM) i7 CPU 870 @ 2.93GHz. 727 // As found in a Summer 2010 model iMac. 728 case 0x1f: 729 case 0x2e: // Nehalem EX 730 CPU = "nehalem"; 731 *Type = X86::INTEL_COREI7; 732 *Subtype = X86::INTEL_COREI7_NEHALEM; 733 break; 734 case 0x25: // Intel Core i7, laptop version. 735 case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All 736 // processors are manufactured using the 32 nm process. 737 case 0x2f: // Westmere EX 738 CPU = "westmere"; 739 *Type = X86::INTEL_COREI7; 740 *Subtype = X86::INTEL_COREI7_WESTMERE; 741 break; 742 case 0x2a: // Intel Core i7 processor. All processors are manufactured 743 // using the 32 nm process. 744 case 0x2d: 745 CPU = "sandybridge"; 746 *Type = X86::INTEL_COREI7; 747 *Subtype = X86::INTEL_COREI7_SANDYBRIDGE; 748 break; 749 case 0x3a: 750 case 0x3e: // Ivy Bridge EP 751 CPU = "ivybridge"; 752 *Type = X86::INTEL_COREI7; 753 *Subtype = X86::INTEL_COREI7_IVYBRIDGE; 754 break; 755 756 // Haswell: 757 case 0x3c: 758 case 0x3f: 759 case 0x45: 760 case 0x46: 761 CPU = "haswell"; 762 *Type = X86::INTEL_COREI7; 763 *Subtype = X86::INTEL_COREI7_HASWELL; 764 break; 765 766 // Broadwell: 767 case 0x3d: 768 case 0x47: 769 case 0x4f: 770 case 0x56: 771 CPU = "broadwell"; 772 *Type = X86::INTEL_COREI7; 773 *Subtype = X86::INTEL_COREI7_BROADWELL; 774 break; 775 776 // Skylake: 777 case 0x4e: // Skylake mobile 778 case 0x5e: // Skylake desktop 779 case 0x8e: // Kaby Lake mobile 780 case 0x9e: // Kaby Lake desktop 781 case 0xa5: // Comet Lake-H/S 782 case 0xa6: // Comet Lake-U 783 CPU = "skylake"; 784 *Type = X86::INTEL_COREI7; 785 *Subtype = X86::INTEL_COREI7_SKYLAKE; 786 break; 787 788 // Rocketlake: 789 case 0xa7: 790 CPU = "rocketlake"; 791 *Type = X86::INTEL_COREI7; 792 *Subtype = X86::INTEL_COREI7_ROCKETLAKE; 793 break; 794 795 // Skylake Xeon: 796 case 0x55: 797 *Type = X86::INTEL_COREI7; 798 if (testFeature(X86::FEATURE_AVX512BF16)) { 799 CPU = "cooperlake"; 800 *Subtype = X86::INTEL_COREI7_COOPERLAKE; 801 } else if (testFeature(X86::FEATURE_AVX512VNNI)) { 802 CPU = "cascadelake"; 803 *Subtype = X86::INTEL_COREI7_CASCADELAKE; 804 } else { 805 CPU = "skylake-avx512"; 806 *Subtype = X86::INTEL_COREI7_SKYLAKE_AVX512; 807 } 808 break; 809 810 // Cannonlake: 811 case 0x66: 812 CPU = "cannonlake"; 813 *Type = X86::INTEL_COREI7; 814 *Subtype = X86::INTEL_COREI7_CANNONLAKE; 815 break; 816 817 // Icelake: 818 case 0x7d: 819 case 0x7e: 820 CPU = "icelake-client"; 821 *Type = X86::INTEL_COREI7; 822 *Subtype = X86::INTEL_COREI7_ICELAKE_CLIENT; 823 break; 824 825 // Tigerlake: 826 case 0x8c: 827 case 0x8d: 828 CPU = "tigerlake"; 829 *Type = X86::INTEL_COREI7; 830 *Subtype = X86::INTEL_COREI7_TIGERLAKE; 831 break; 832 833 // Alderlake: 834 case 0x97: 835 case 0x9a: 836 // Gracemont 837 case 0xbe: 838 // Raptorlake: 839 case 0xb7: 840 case 0xba: 841 case 0xbf: 842 // Meteorlake: 843 case 0xaa: 844 case 0xac: 845 CPU = "alderlake"; 846 *Type = X86::INTEL_COREI7; 847 *Subtype = X86::INTEL_COREI7_ALDERLAKE; 848 break; 849 850 // Arrowlake: 851 case 0xc5: 852 CPU = "arrowlake"; 853 *Type = X86::INTEL_COREI7; 854 *Subtype = X86::INTEL_COREI7_ARROWLAKE; 855 break; 856 857 // Arrowlake S: 858 case 0xc6: 859 // Lunarlake: 860 case 0xbd: 861 CPU = "arrowlake-s"; 862 *Type = X86::INTEL_COREI7; 863 *Subtype = X86::INTEL_COREI7_ARROWLAKE_S; 864 break; 865 866 // Pantherlake: 867 case 0xcc: 868 CPU = "pantherlake"; 869 *Type = X86::INTEL_COREI7; 870 *Subtype = X86::INTEL_COREI7_PANTHERLAKE; 871 break; 872 873 // Graniterapids: 874 case 0xad: 875 CPU = "graniterapids"; 876 *Type = X86::INTEL_COREI7; 877 *Subtype = X86::INTEL_COREI7_GRANITERAPIDS; 878 break; 879 880 // Granite Rapids D: 881 case 0xae: 882 CPU = "graniterapids-d"; 883 *Type = X86::INTEL_COREI7; 884 *Subtype = X86::INTEL_COREI7_GRANITERAPIDS_D; 885 break; 886 887 // Icelake Xeon: 888 case 0x6a: 889 case 0x6c: 890 CPU = "icelake-server"; 891 *Type = X86::INTEL_COREI7; 892 *Subtype = X86::INTEL_COREI7_ICELAKE_SERVER; 893 break; 894 895 // Emerald Rapids: 896 case 0xcf: 897 // Sapphire Rapids: 898 case 0x8f: 899 CPU = "sapphirerapids"; 900 *Type = X86::INTEL_COREI7; 901 *Subtype = X86::INTEL_COREI7_SAPPHIRERAPIDS; 902 break; 903 904 case 0x1c: // Most 45 nm Intel Atom processors 905 case 0x26: // 45 nm Atom Lincroft 906 case 0x27: // 32 nm Atom Medfield 907 case 0x35: // 32 nm Atom Midview 908 case 0x36: // 32 nm Atom Midview 909 CPU = "bonnell"; 910 *Type = X86::INTEL_BONNELL; 911 break; 912 913 // Atom Silvermont codes from the Intel software optimization guide. 914 case 0x37: 915 case 0x4a: 916 case 0x4d: 917 case 0x5a: 918 case 0x5d: 919 case 0x4c: // really airmont 920 CPU = "silvermont"; 921 *Type = X86::INTEL_SILVERMONT; 922 break; 923 // Goldmont: 924 case 0x5c: // Apollo Lake 925 case 0x5f: // Denverton 926 CPU = "goldmont"; 927 *Type = X86::INTEL_GOLDMONT; 928 break; 929 case 0x7a: 930 CPU = "goldmont-plus"; 931 *Type = X86::INTEL_GOLDMONT_PLUS; 932 break; 933 case 0x86: 934 case 0x8a: // Lakefield 935 case 0x96: // Elkhart Lake 936 case 0x9c: // Jasper Lake 937 CPU = "tremont"; 938 *Type = X86::INTEL_TREMONT; 939 break; 940 941 // Sierraforest: 942 case 0xaf: 943 CPU = "sierraforest"; 944 *Type = X86::INTEL_SIERRAFOREST; 945 break; 946 947 // Grandridge: 948 case 0xb6: 949 CPU = "grandridge"; 950 *Type = X86::INTEL_GRANDRIDGE; 951 break; 952 953 // Clearwaterforest: 954 case 0xdd: 955 CPU = "clearwaterforest"; 956 *Type = X86::INTEL_CLEARWATERFOREST; 957 break; 958 959 // Xeon Phi (Knights Landing + Knights Mill): 960 case 0x57: 961 CPU = "knl"; 962 *Type = X86::INTEL_KNL; 963 break; 964 case 0x85: 965 CPU = "knm"; 966 *Type = X86::INTEL_KNM; 967 break; 968 969 default: // Unknown family 6 CPU, try to guess. 970 // Don't both with Type/Subtype here, they aren't used by the caller. 971 // They're used above to keep the code in sync with compiler-rt. 972 // TODO detect tigerlake host from model 973 if (testFeature(X86::FEATURE_AVX512VP2INTERSECT)) { 974 CPU = "tigerlake"; 975 } else if (testFeature(X86::FEATURE_AVX512VBMI2)) { 976 CPU = "icelake-client"; 977 } else if (testFeature(X86::FEATURE_AVX512VBMI)) { 978 CPU = "cannonlake"; 979 } else if (testFeature(X86::FEATURE_AVX512BF16)) { 980 CPU = "cooperlake"; 981 } else if (testFeature(X86::FEATURE_AVX512VNNI)) { 982 CPU = "cascadelake"; 983 } else if (testFeature(X86::FEATURE_AVX512VL)) { 984 CPU = "skylake-avx512"; 985 } else if (testFeature(X86::FEATURE_AVX512ER)) { 986 CPU = "knl"; 987 } else if (testFeature(X86::FEATURE_CLFLUSHOPT)) { 988 if (testFeature(X86::FEATURE_SHA)) 989 CPU = "goldmont"; 990 else 991 CPU = "skylake"; 992 } else if (testFeature(X86::FEATURE_ADX)) { 993 CPU = "broadwell"; 994 } else if (testFeature(X86::FEATURE_AVX2)) { 995 CPU = "haswell"; 996 } else if (testFeature(X86::FEATURE_AVX)) { 997 CPU = "sandybridge"; 998 } else if (testFeature(X86::FEATURE_SSE4_2)) { 999 if (testFeature(X86::FEATURE_MOVBE)) 1000 CPU = "silvermont"; 1001 else 1002 CPU = "nehalem"; 1003 } else if (testFeature(X86::FEATURE_SSE4_1)) { 1004 CPU = "penryn"; 1005 } else if (testFeature(X86::FEATURE_SSSE3)) { 1006 if (testFeature(X86::FEATURE_MOVBE)) 1007 CPU = "bonnell"; 1008 else 1009 CPU = "core2"; 1010 } else if (testFeature(X86::FEATURE_64BIT)) { 1011 CPU = "core2"; 1012 } else if (testFeature(X86::FEATURE_SSE3)) { 1013 CPU = "yonah"; 1014 } else if (testFeature(X86::FEATURE_SSE2)) { 1015 CPU = "pentium-m"; 1016 } else if (testFeature(X86::FEATURE_SSE)) { 1017 CPU = "pentium3"; 1018 } else if (testFeature(X86::FEATURE_MMX)) { 1019 CPU = "pentium2"; 1020 } else { 1021 CPU = "pentiumpro"; 1022 } 1023 break; 1024 } 1025 break; 1026 case 15: { 1027 if (testFeature(X86::FEATURE_64BIT)) { 1028 CPU = "nocona"; 1029 break; 1030 } 1031 if (testFeature(X86::FEATURE_SSE3)) { 1032 CPU = "prescott"; 1033 break; 1034 } 1035 CPU = "pentium4"; 1036 break; 1037 } 1038 default: 1039 break; // Unknown. 1040 } 1041 1042 return CPU; 1043 } 1044 1045 static StringRef 1046 getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model, 1047 const unsigned *Features, 1048 unsigned *Type, unsigned *Subtype) { 1049 auto testFeature = [&](unsigned F) { 1050 return (Features[F / 32] & (1U << (F % 32))) != 0; 1051 }; 1052 1053 StringRef CPU; 1054 1055 switch (Family) { 1056 case 4: 1057 CPU = "i486"; 1058 break; 1059 case 5: 1060 CPU = "pentium"; 1061 switch (Model) { 1062 case 6: 1063 case 7: 1064 CPU = "k6"; 1065 break; 1066 case 8: 1067 CPU = "k6-2"; 1068 break; 1069 case 9: 1070 case 13: 1071 CPU = "k6-3"; 1072 break; 1073 case 10: 1074 CPU = "geode"; 1075 break; 1076 } 1077 break; 1078 case 6: 1079 if (testFeature(X86::FEATURE_SSE)) { 1080 CPU = "athlon-xp"; 1081 break; 1082 } 1083 CPU = "athlon"; 1084 break; 1085 case 15: 1086 if (testFeature(X86::FEATURE_SSE3)) { 1087 CPU = "k8-sse3"; 1088 break; 1089 } 1090 CPU = "k8"; 1091 break; 1092 case 16: 1093 CPU = "amdfam10"; 1094 *Type = X86::AMDFAM10H; // "amdfam10" 1095 switch (Model) { 1096 case 2: 1097 *Subtype = X86::AMDFAM10H_BARCELONA; 1098 break; 1099 case 4: 1100 *Subtype = X86::AMDFAM10H_SHANGHAI; 1101 break; 1102 case 8: 1103 *Subtype = X86::AMDFAM10H_ISTANBUL; 1104 break; 1105 } 1106 break; 1107 case 20: 1108 CPU = "btver1"; 1109 *Type = X86::AMD_BTVER1; 1110 break; 1111 case 21: 1112 CPU = "bdver1"; 1113 *Type = X86::AMDFAM15H; 1114 if (Model >= 0x60 && Model <= 0x7f) { 1115 CPU = "bdver4"; 1116 *Subtype = X86::AMDFAM15H_BDVER4; 1117 break; // 60h-7Fh: Excavator 1118 } 1119 if (Model >= 0x30 && Model <= 0x3f) { 1120 CPU = "bdver3"; 1121 *Subtype = X86::AMDFAM15H_BDVER3; 1122 break; // 30h-3Fh: Steamroller 1123 } 1124 if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) { 1125 CPU = "bdver2"; 1126 *Subtype = X86::AMDFAM15H_BDVER2; 1127 break; // 02h, 10h-1Fh: Piledriver 1128 } 1129 if (Model <= 0x0f) { 1130 *Subtype = X86::AMDFAM15H_BDVER1; 1131 break; // 00h-0Fh: Bulldozer 1132 } 1133 break; 1134 case 22: 1135 CPU = "btver2"; 1136 *Type = X86::AMD_BTVER2; 1137 break; 1138 case 23: 1139 CPU = "znver1"; 1140 *Type = X86::AMDFAM17H; 1141 if ((Model >= 0x30 && Model <= 0x3f) || (Model == 0x47) || 1142 (Model >= 0x60 && Model <= 0x67) || (Model >= 0x68 && Model <= 0x6f) || 1143 (Model >= 0x70 && Model <= 0x7f) || (Model >= 0x84 && Model <= 0x87) || 1144 (Model >= 0x90 && Model <= 0x97) || (Model >= 0x98 && Model <= 0x9f) || 1145 (Model >= 0xa0 && Model <= 0xaf)) { 1146 // Family 17h Models 30h-3Fh (Starship) Zen 2 1147 // Family 17h Models 47h (Cardinal) Zen 2 1148 // Family 17h Models 60h-67h (Renoir) Zen 2 1149 // Family 17h Models 68h-6Fh (Lucienne) Zen 2 1150 // Family 17h Models 70h-7Fh (Matisse) Zen 2 1151 // Family 17h Models 84h-87h (ProjectX) Zen 2 1152 // Family 17h Models 90h-97h (VanGogh) Zen 2 1153 // Family 17h Models 98h-9Fh (Mero) Zen 2 1154 // Family 17h Models A0h-AFh (Mendocino) Zen 2 1155 CPU = "znver2"; 1156 *Subtype = X86::AMDFAM17H_ZNVER2; 1157 break; 1158 } 1159 if ((Model >= 0x10 && Model <= 0x1f) || (Model >= 0x20 && Model <= 0x2f)) { 1160 // Family 17h Models 10h-1Fh (Raven1) Zen 1161 // Family 17h Models 10h-1Fh (Picasso) Zen+ 1162 // Family 17h Models 20h-2Fh (Raven2 x86) Zen 1163 *Subtype = X86::AMDFAM17H_ZNVER1; 1164 break; 1165 } 1166 break; 1167 case 25: 1168 CPU = "znver3"; 1169 *Type = X86::AMDFAM19H; 1170 if (Model <= 0x0f || (Model >= 0x20 && Model <= 0x2f) || 1171 (Model >= 0x30 && Model <= 0x3f) || (Model >= 0x40 && Model <= 0x4f) || 1172 (Model >= 0x50 && Model <= 0x5f)) { 1173 // Family 19h Models 00h-0Fh (Genesis, Chagall) Zen 3 1174 // Family 19h Models 20h-2Fh (Vermeer) Zen 3 1175 // Family 19h Models 30h-3Fh (Badami) Zen 3 1176 // Family 19h Models 40h-4Fh (Rembrandt) Zen 3+ 1177 // Family 19h Models 50h-5Fh (Cezanne) Zen 3 1178 *Subtype = X86::AMDFAM19H_ZNVER3; 1179 break; 1180 } 1181 if ((Model >= 0x10 && Model <= 0x1f) || (Model >= 0x60 && Model <= 0x6f) || 1182 (Model >= 0x70 && Model <= 0x77) || (Model >= 0x78 && Model <= 0x7f) || 1183 (Model >= 0xa0 && Model <= 0xaf)) { 1184 // Family 19h Models 10h-1Fh (Stones; Storm Peak) Zen 4 1185 // Family 19h Models 60h-6Fh (Raphael) Zen 4 1186 // Family 19h Models 70h-77h (Phoenix, Hawkpoint1) Zen 4 1187 // Family 19h Models 78h-7Fh (Phoenix 2, Hawkpoint2) Zen 4 1188 // Family 19h Models A0h-AFh (Stones-Dense) Zen 4 1189 CPU = "znver4"; 1190 *Subtype = X86::AMDFAM19H_ZNVER4; 1191 break; // "znver4" 1192 } 1193 break; 1194 default: 1195 break; // Unknown AMD CPU. 1196 } 1197 1198 return CPU; 1199 } 1200 1201 static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf, 1202 unsigned *Features) { 1203 unsigned EAX, EBX; 1204 1205 auto setFeature = [&](unsigned F) { 1206 Features[F / 32] |= 1U << (F % 32); 1207 }; 1208 1209 if ((EDX >> 15) & 1) 1210 setFeature(X86::FEATURE_CMOV); 1211 if ((EDX >> 23) & 1) 1212 setFeature(X86::FEATURE_MMX); 1213 if ((EDX >> 25) & 1) 1214 setFeature(X86::FEATURE_SSE); 1215 if ((EDX >> 26) & 1) 1216 setFeature(X86::FEATURE_SSE2); 1217 1218 if ((ECX >> 0) & 1) 1219 setFeature(X86::FEATURE_SSE3); 1220 if ((ECX >> 1) & 1) 1221 setFeature(X86::FEATURE_PCLMUL); 1222 if ((ECX >> 9) & 1) 1223 setFeature(X86::FEATURE_SSSE3); 1224 if ((ECX >> 12) & 1) 1225 setFeature(X86::FEATURE_FMA); 1226 if ((ECX >> 19) & 1) 1227 setFeature(X86::FEATURE_SSE4_1); 1228 if ((ECX >> 20) & 1) { 1229 setFeature(X86::FEATURE_SSE4_2); 1230 setFeature(X86::FEATURE_CRC32); 1231 } 1232 if ((ECX >> 23) & 1) 1233 setFeature(X86::FEATURE_POPCNT); 1234 if ((ECX >> 25) & 1) 1235 setFeature(X86::FEATURE_AES); 1236 1237 if ((ECX >> 22) & 1) 1238 setFeature(X86::FEATURE_MOVBE); 1239 1240 // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV 1241 // indicates that the AVX registers will be saved and restored on context 1242 // switch, then we have full AVX support. 1243 const unsigned AVXBits = (1 << 27) | (1 << 28); 1244 bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) && 1245 ((EAX & 0x6) == 0x6); 1246 #if defined(__APPLE__) 1247 // Darwin lazily saves the AVX512 context on first use: trust that the OS will 1248 // save the AVX512 context if we use AVX512 instructions, even the bit is not 1249 // set right now. 1250 bool HasAVX512Save = true; 1251 #else 1252 // AVX512 requires additional context to be saved by the OS. 1253 bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0); 1254 #endif 1255 1256 if (HasAVX) 1257 setFeature(X86::FEATURE_AVX); 1258 1259 bool HasLeaf7 = 1260 MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX); 1261 1262 if (HasLeaf7 && ((EBX >> 3) & 1)) 1263 setFeature(X86::FEATURE_BMI); 1264 if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX) 1265 setFeature(X86::FEATURE_AVX2); 1266 if (HasLeaf7 && ((EBX >> 8) & 1)) 1267 setFeature(X86::FEATURE_BMI2); 1268 if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save) 1269 setFeature(X86::FEATURE_AVX512F); 1270 if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save) 1271 setFeature(X86::FEATURE_AVX512DQ); 1272 if (HasLeaf7 && ((EBX >> 19) & 1)) 1273 setFeature(X86::FEATURE_ADX); 1274 if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save) 1275 setFeature(X86::FEATURE_AVX512IFMA); 1276 if (HasLeaf7 && ((EBX >> 23) & 1)) 1277 setFeature(X86::FEATURE_CLFLUSHOPT); 1278 if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save) 1279 setFeature(X86::FEATURE_AVX512PF); 1280 if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save) 1281 setFeature(X86::FEATURE_AVX512ER); 1282 if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save) 1283 setFeature(X86::FEATURE_AVX512CD); 1284 if (HasLeaf7 && ((EBX >> 29) & 1)) 1285 setFeature(X86::FEATURE_SHA); 1286 if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save) 1287 setFeature(X86::FEATURE_AVX512BW); 1288 if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save) 1289 setFeature(X86::FEATURE_AVX512VL); 1290 1291 if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save) 1292 setFeature(X86::FEATURE_AVX512VBMI); 1293 if (HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save) 1294 setFeature(X86::FEATURE_AVX512VBMI2); 1295 if (HasLeaf7 && ((ECX >> 8) & 1)) 1296 setFeature(X86::FEATURE_GFNI); 1297 if (HasLeaf7 && ((ECX >> 10) & 1) && HasAVX) 1298 setFeature(X86::FEATURE_VPCLMULQDQ); 1299 if (HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save) 1300 setFeature(X86::FEATURE_AVX512VNNI); 1301 if (HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save) 1302 setFeature(X86::FEATURE_AVX512BITALG); 1303 if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save) 1304 setFeature(X86::FEATURE_AVX512VPOPCNTDQ); 1305 1306 if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save) 1307 setFeature(X86::FEATURE_AVX5124VNNIW); 1308 if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save) 1309 setFeature(X86::FEATURE_AVX5124FMAPS); 1310 if (HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save) 1311 setFeature(X86::FEATURE_AVX512VP2INTERSECT); 1312 1313 // EAX from subleaf 0 is the maximum subleaf supported. Some CPUs don't 1314 // return all 0s for invalid subleaves so check the limit. 1315 bool HasLeaf7Subleaf1 = 1316 HasLeaf7 && EAX >= 1 && 1317 !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX); 1318 if (HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save) 1319 setFeature(X86::FEATURE_AVX512BF16); 1320 1321 unsigned MaxExtLevel; 1322 getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX); 1323 1324 bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 && 1325 !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); 1326 if (HasExtLeaf1 && ((ECX >> 6) & 1)) 1327 setFeature(X86::FEATURE_SSE4_A); 1328 if (HasExtLeaf1 && ((ECX >> 11) & 1)) 1329 setFeature(X86::FEATURE_XOP); 1330 if (HasExtLeaf1 && ((ECX >> 16) & 1)) 1331 setFeature(X86::FEATURE_FMA4); 1332 1333 if (HasExtLeaf1 && ((EDX >> 29) & 1)) 1334 setFeature(X86::FEATURE_64BIT); 1335 } 1336 1337 StringRef sys::getHostCPUName() { 1338 unsigned MaxLeaf = 0; 1339 const VendorSignatures Vendor = getVendorSignature(&MaxLeaf); 1340 if (Vendor == VendorSignatures::UNKNOWN) 1341 return "generic"; 1342 1343 unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; 1344 getX86CpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX); 1345 1346 unsigned Family = 0, Model = 0; 1347 unsigned Features[(X86::CPU_FEATURE_MAX + 31) / 32] = {0}; 1348 detectX86FamilyModel(EAX, &Family, &Model); 1349 getAvailableFeatures(ECX, EDX, MaxLeaf, Features); 1350 1351 // These aren't consumed in this file, but we try to keep some source code the 1352 // same or similar to compiler-rt. 1353 unsigned Type = 0; 1354 unsigned Subtype = 0; 1355 1356 StringRef CPU; 1357 1358 if (Vendor == VendorSignatures::GENUINE_INTEL) { 1359 CPU = getIntelProcessorTypeAndSubtype(Family, Model, Features, &Type, 1360 &Subtype); 1361 } else if (Vendor == VendorSignatures::AUTHENTIC_AMD) { 1362 CPU = getAMDProcessorTypeAndSubtype(Family, Model, Features, &Type, 1363 &Subtype); 1364 } 1365 1366 if (!CPU.empty()) 1367 return CPU; 1368 1369 return "generic"; 1370 } 1371 1372 #elif defined(__APPLE__) && defined(__powerpc__) 1373 StringRef sys::getHostCPUName() { 1374 host_basic_info_data_t hostInfo; 1375 mach_msg_type_number_t infoCount; 1376 1377 infoCount = HOST_BASIC_INFO_COUNT; 1378 mach_port_t hostPort = mach_host_self(); 1379 host_info(hostPort, HOST_BASIC_INFO, (host_info_t)&hostInfo, 1380 &infoCount); 1381 mach_port_deallocate(mach_task_self(), hostPort); 1382 1383 if (hostInfo.cpu_type != CPU_TYPE_POWERPC) 1384 return "generic"; 1385 1386 switch (hostInfo.cpu_subtype) { 1387 case CPU_SUBTYPE_POWERPC_601: 1388 return "601"; 1389 case CPU_SUBTYPE_POWERPC_602: 1390 return "602"; 1391 case CPU_SUBTYPE_POWERPC_603: 1392 return "603"; 1393 case CPU_SUBTYPE_POWERPC_603e: 1394 return "603e"; 1395 case CPU_SUBTYPE_POWERPC_603ev: 1396 return "603ev"; 1397 case CPU_SUBTYPE_POWERPC_604: 1398 return "604"; 1399 case CPU_SUBTYPE_POWERPC_604e: 1400 return "604e"; 1401 case CPU_SUBTYPE_POWERPC_620: 1402 return "620"; 1403 case CPU_SUBTYPE_POWERPC_750: 1404 return "750"; 1405 case CPU_SUBTYPE_POWERPC_7400: 1406 return "7400"; 1407 case CPU_SUBTYPE_POWERPC_7450: 1408 return "7450"; 1409 case CPU_SUBTYPE_POWERPC_970: 1410 return "970"; 1411 default:; 1412 } 1413 1414 return "generic"; 1415 } 1416 #elif defined(__linux__) && defined(__powerpc__) 1417 StringRef sys::getHostCPUName() { 1418 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent(); 1419 StringRef Content = P ? P->getBuffer() : ""; 1420 return detail::getHostCPUNameForPowerPC(Content); 1421 } 1422 #elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__)) 1423 StringRef sys::getHostCPUName() { 1424 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent(); 1425 StringRef Content = P ? P->getBuffer() : ""; 1426 return detail::getHostCPUNameForARM(Content); 1427 } 1428 #elif defined(__linux__) && defined(__s390x__) 1429 StringRef sys::getHostCPUName() { 1430 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent(); 1431 StringRef Content = P ? P->getBuffer() : ""; 1432 return detail::getHostCPUNameForS390x(Content); 1433 } 1434 #elif defined(__MVS__) 1435 StringRef sys::getHostCPUName() { 1436 // Get pointer to Communications Vector Table (CVT). 1437 // The pointer is located at offset 16 of the Prefixed Save Area (PSA). 1438 // It is stored as 31 bit pointer and will be zero-extended to 64 bit. 1439 int *StartToCVTOffset = reinterpret_cast<int *>(0x10); 1440 // Since its stored as a 31-bit pointer, get the 4 bytes from the start 1441 // of address. 1442 int ReadValue = *StartToCVTOffset; 1443 // Explicitly clear the high order bit. 1444 ReadValue = (ReadValue & 0x7FFFFFFF); 1445 char *CVT = reinterpret_cast<char *>(ReadValue); 1446 // The model number is located in the CVT prefix at offset -6 and stored as 1447 // signless packed decimal. 1448 uint16_t Id = *(uint16_t *)&CVT[-6]; 1449 // Convert number to integer. 1450 Id = decodePackedBCD<uint16_t>(Id, false); 1451 // Check for vector support. It's stored in field CVTFLAG5 (offset 244), 1452 // bit CVTVEF (X'80'). The facilities list is part of the PSA but the vector 1453 // extension can only be used if bit CVTVEF is on. 1454 bool HaveVectorSupport = CVT[244] & 0x80; 1455 return getCPUNameFromS390Model(Id, HaveVectorSupport); 1456 } 1457 #elif defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__)) 1458 #define CPUFAMILY_ARM_SWIFT 0x1e2d6381 1459 #define CPUFAMILY_ARM_CYCLONE 0x37a09642 1460 #define CPUFAMILY_ARM_TYPHOON 0x2c91a47e 1461 #define CPUFAMILY_ARM_TWISTER 0x92fb37c8 1462 #define CPUFAMILY_ARM_HURRICANE 0x67ceee93 1463 #define CPUFAMILY_ARM_MONSOON_MISTRAL 0xe81e7ef6 1464 #define CPUFAMILY_ARM_VORTEX_TEMPEST 0x07d34b9f 1465 #define CPUFAMILY_ARM_LIGHTNING_THUNDER 0x462504d2 1466 #define CPUFAMILY_ARM_FIRESTORM_ICESTORM 0x1b588bb3 1467 1468 StringRef sys::getHostCPUName() { 1469 uint32_t Family; 1470 size_t Length = sizeof(Family); 1471 sysctlbyname("hw.cpufamily", &Family, &Length, NULL, 0); 1472 1473 switch (Family) { 1474 case CPUFAMILY_ARM_SWIFT: 1475 return "swift"; 1476 case CPUFAMILY_ARM_CYCLONE: 1477 return "apple-a7"; 1478 case CPUFAMILY_ARM_TYPHOON: 1479 return "apple-a8"; 1480 case CPUFAMILY_ARM_TWISTER: 1481 return "apple-a9"; 1482 case CPUFAMILY_ARM_HURRICANE: 1483 return "apple-a10"; 1484 case CPUFAMILY_ARM_MONSOON_MISTRAL: 1485 return "apple-a11"; 1486 case CPUFAMILY_ARM_VORTEX_TEMPEST: 1487 return "apple-a12"; 1488 case CPUFAMILY_ARM_LIGHTNING_THUNDER: 1489 return "apple-a13"; 1490 case CPUFAMILY_ARM_FIRESTORM_ICESTORM: 1491 return "apple-m1"; 1492 default: 1493 // Default to the newest CPU we know about. 1494 return "apple-m1"; 1495 } 1496 } 1497 #elif defined(_AIX) 1498 StringRef sys::getHostCPUName() { 1499 switch (_system_configuration.implementation) { 1500 case POWER_4: 1501 if (_system_configuration.version == PV_4_3) 1502 return "970"; 1503 return "pwr4"; 1504 case POWER_5: 1505 if (_system_configuration.version == PV_5) 1506 return "pwr5"; 1507 return "pwr5x"; 1508 case POWER_6: 1509 if (_system_configuration.version == PV_6_Compat) 1510 return "pwr6"; 1511 return "pwr6x"; 1512 case POWER_7: 1513 return "pwr7"; 1514 case POWER_8: 1515 return "pwr8"; 1516 case POWER_9: 1517 return "pwr9"; 1518 // TODO: simplify this once the macro is available in all OS levels. 1519 #ifdef POWER_10 1520 case POWER_10: 1521 #else 1522 case 0x40000: 1523 #endif 1524 return "pwr10"; 1525 default: 1526 return "generic"; 1527 } 1528 } 1529 #elif defined(__loongarch__) 1530 StringRef sys::getHostCPUName() { 1531 // Use processor id to detect cpu name. 1532 uint32_t processor_id; 1533 __asm__("cpucfg %[prid], $zero\n\t" : [prid] "=r"(processor_id)); 1534 // Refer PRID_SERIES_MASK in linux kernel: arch/loongarch/include/asm/cpu.h. 1535 switch (processor_id & 0xf000) { 1536 case 0xc000: // Loongson 64bit, 4-issue 1537 return "la464"; 1538 // TODO: Others. 1539 default: 1540 break; 1541 } 1542 return "generic"; 1543 } 1544 #elif defined(__riscv) 1545 StringRef sys::getHostCPUName() { 1546 #if defined(__linux__) 1547 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent(); 1548 StringRef Content = P ? P->getBuffer() : ""; 1549 return detail::getHostCPUNameForRISCV(Content); 1550 #else 1551 #if __riscv_xlen == 64 1552 return "generic-rv64"; 1553 #elif __riscv_xlen == 32 1554 return "generic-rv32"; 1555 #else 1556 #error "Unhandled value of __riscv_xlen" 1557 #endif 1558 #endif 1559 } 1560 #elif defined(__sparc__) 1561 #if defined(__linux__) 1562 StringRef sys::detail::getHostCPUNameForSPARC(StringRef ProcCpuinfoContent) { 1563 SmallVector<StringRef> Lines; 1564 ProcCpuinfoContent.split(Lines, "\n"); 1565 1566 // Look for cpu line to determine cpu name 1567 StringRef Cpu; 1568 for (unsigned I = 0, E = Lines.size(); I != E; ++I) { 1569 if (Lines[I].starts_with("cpu")) { 1570 Cpu = Lines[I].substr(5).ltrim("\t :"); 1571 break; 1572 } 1573 } 1574 1575 return StringSwitch<const char *>(Cpu) 1576 .StartsWith("SuperSparc", "supersparc") 1577 .StartsWith("HyperSparc", "hypersparc") 1578 .StartsWith("SpitFire", "ultrasparc") 1579 .StartsWith("BlackBird", "ultrasparc") 1580 .StartsWith("Sabre", " ultrasparc") 1581 .StartsWith("Hummingbird", "ultrasparc") 1582 .StartsWith("Cheetah", "ultrasparc3") 1583 .StartsWith("Jalapeno", "ultrasparc3") 1584 .StartsWith("Jaguar", "ultrasparc3") 1585 .StartsWith("Panther", "ultrasparc3") 1586 .StartsWith("Serrano", "ultrasparc3") 1587 .StartsWith("UltraSparc T1", "niagara") 1588 .StartsWith("UltraSparc T2", "niagara2") 1589 .StartsWith("UltraSparc T3", "niagara3") 1590 .StartsWith("UltraSparc T4", "niagara4") 1591 .StartsWith("UltraSparc T5", "niagara4") 1592 .StartsWith("LEON", "leon3") 1593 // niagara7/m8 not supported by LLVM yet. 1594 .StartsWith("SPARC-M7", "niagara4" /* "niagara7" */) 1595 .StartsWith("SPARC-S7", "niagara4" /* "niagara7" */) 1596 .StartsWith("SPARC-M8", "niagara4" /* "m8" */) 1597 .Default("generic"); 1598 } 1599 #endif 1600 1601 StringRef sys::getHostCPUName() { 1602 #if defined(__linux__) 1603 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent(); 1604 StringRef Content = P ? P->getBuffer() : ""; 1605 return detail::getHostCPUNameForSPARC(Content); 1606 #elif defined(__sun__) && defined(__svr4__) 1607 char *buf = NULL; 1608 kstat_ctl_t *kc; 1609 kstat_t *ksp; 1610 kstat_named_t *brand = NULL; 1611 1612 kc = kstat_open(); 1613 if (kc != NULL) { 1614 ksp = kstat_lookup(kc, const_cast<char *>("cpu_info"), -1, NULL); 1615 if (ksp != NULL && kstat_read(kc, ksp, NULL) != -1 && 1616 ksp->ks_type == KSTAT_TYPE_NAMED) 1617 brand = 1618 (kstat_named_t *)kstat_data_lookup(ksp, const_cast<char *>("brand")); 1619 if (brand != NULL && brand->data_type == KSTAT_DATA_STRING) 1620 buf = KSTAT_NAMED_STR_PTR(brand); 1621 } 1622 kstat_close(kc); 1623 1624 return StringSwitch<const char *>(buf) 1625 .Case("TMS390S10", "supersparc") // Texas Instruments microSPARC I 1626 .Case("TMS390Z50", "supersparc") // Texas Instruments SuperSPARC I 1627 .Case("TMS390Z55", 1628 "supersparc") // Texas Instruments SuperSPARC I with SuperCache 1629 .Case("MB86904", "supersparc") // Fujitsu microSPARC II 1630 .Case("MB86907", "supersparc") // Fujitsu TurboSPARC 1631 .Case("RT623", "hypersparc") // Ross hyperSPARC 1632 .Case("RT625", "hypersparc") 1633 .Case("RT626", "hypersparc") 1634 .Case("UltraSPARC-I", "ultrasparc") 1635 .Case("UltraSPARC-II", "ultrasparc") 1636 .Case("UltraSPARC-IIe", "ultrasparc") 1637 .Case("UltraSPARC-IIi", "ultrasparc") 1638 .Case("SPARC64-III", "ultrasparc") 1639 .Case("SPARC64-IV", "ultrasparc") 1640 .Case("UltraSPARC-III", "ultrasparc3") 1641 .Case("UltraSPARC-III+", "ultrasparc3") 1642 .Case("UltraSPARC-IIIi", "ultrasparc3") 1643 .Case("UltraSPARC-IIIi+", "ultrasparc3") 1644 .Case("UltraSPARC-IV", "ultrasparc3") 1645 .Case("UltraSPARC-IV+", "ultrasparc3") 1646 .Case("SPARC64-V", "ultrasparc3") 1647 .Case("SPARC64-VI", "ultrasparc3") 1648 .Case("SPARC64-VII", "ultrasparc3") 1649 .Case("UltraSPARC-T1", "niagara") 1650 .Case("UltraSPARC-T2", "niagara2") 1651 .Case("UltraSPARC-T2", "niagara2") 1652 .Case("UltraSPARC-T2+", "niagara2") 1653 .Case("SPARC-T3", "niagara3") 1654 .Case("SPARC-T4", "niagara4") 1655 .Case("SPARC-T5", "niagara4") 1656 // niagara7/m8 not supported by LLVM yet. 1657 .Case("SPARC-M7", "niagara4" /* "niagara7" */) 1658 .Case("SPARC-S7", "niagara4" /* "niagara7" */) 1659 .Case("SPARC-M8", "niagara4" /* "m8" */) 1660 .Default("generic"); 1661 #else 1662 return "generic"; 1663 #endif 1664 } 1665 #else 1666 StringRef sys::getHostCPUName() { return "generic"; } 1667 namespace llvm { 1668 namespace sys { 1669 namespace detail { 1670 namespace x86 { 1671 1672 VendorSignatures getVendorSignature(unsigned *MaxLeaf) { 1673 return VendorSignatures::UNKNOWN; 1674 } 1675 1676 } // namespace x86 1677 } // namespace detail 1678 } // namespace sys 1679 } // namespace llvm 1680 #endif 1681 1682 #if defined(__i386__) || defined(_M_IX86) || \ 1683 defined(__x86_64__) || defined(_M_X64) 1684 bool sys::getHostCPUFeatures(StringMap<bool> &Features) { 1685 unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; 1686 unsigned MaxLevel; 1687 1688 if (getX86CpuIDAndInfo(0, &MaxLevel, &EBX, &ECX, &EDX) || MaxLevel < 1) 1689 return false; 1690 1691 getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX); 1692 1693 Features["cx8"] = (EDX >> 8) & 1; 1694 Features["cmov"] = (EDX >> 15) & 1; 1695 Features["mmx"] = (EDX >> 23) & 1; 1696 Features["fxsr"] = (EDX >> 24) & 1; 1697 Features["sse"] = (EDX >> 25) & 1; 1698 Features["sse2"] = (EDX >> 26) & 1; 1699 1700 Features["sse3"] = (ECX >> 0) & 1; 1701 Features["pclmul"] = (ECX >> 1) & 1; 1702 Features["ssse3"] = (ECX >> 9) & 1; 1703 Features["cx16"] = (ECX >> 13) & 1; 1704 Features["sse4.1"] = (ECX >> 19) & 1; 1705 Features["sse4.2"] = (ECX >> 20) & 1; 1706 Features["crc32"] = Features["sse4.2"]; 1707 Features["movbe"] = (ECX >> 22) & 1; 1708 Features["popcnt"] = (ECX >> 23) & 1; 1709 Features["aes"] = (ECX >> 25) & 1; 1710 Features["rdrnd"] = (ECX >> 30) & 1; 1711 1712 // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV 1713 // indicates that the AVX registers will be saved and restored on context 1714 // switch, then we have full AVX support. 1715 bool HasXSave = ((ECX >> 27) & 1) && !getX86XCR0(&EAX, &EDX); 1716 bool HasAVXSave = HasXSave && ((ECX >> 28) & 1) && ((EAX & 0x6) == 0x6); 1717 #if defined(__APPLE__) 1718 // Darwin lazily saves the AVX512 context on first use: trust that the OS will 1719 // save the AVX512 context if we use AVX512 instructions, even the bit is not 1720 // set right now. 1721 bool HasAVX512Save = true; 1722 #else 1723 // AVX512 requires additional context to be saved by the OS. 1724 bool HasAVX512Save = HasAVXSave && ((EAX & 0xe0) == 0xe0); 1725 #endif 1726 // AMX requires additional context to be saved by the OS. 1727 const unsigned AMXBits = (1 << 17) | (1 << 18); 1728 bool HasAMXSave = HasXSave && ((EAX & AMXBits) == AMXBits); 1729 1730 Features["avx"] = HasAVXSave; 1731 Features["fma"] = ((ECX >> 12) & 1) && HasAVXSave; 1732 // Only enable XSAVE if OS has enabled support for saving YMM state. 1733 Features["xsave"] = ((ECX >> 26) & 1) && HasAVXSave; 1734 Features["f16c"] = ((ECX >> 29) & 1) && HasAVXSave; 1735 1736 unsigned MaxExtLevel; 1737 getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX); 1738 1739 bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 && 1740 !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); 1741 Features["sahf"] = HasExtLeaf1 && ((ECX >> 0) & 1); 1742 Features["lzcnt"] = HasExtLeaf1 && ((ECX >> 5) & 1); 1743 Features["sse4a"] = HasExtLeaf1 && ((ECX >> 6) & 1); 1744 Features["prfchw"] = HasExtLeaf1 && ((ECX >> 8) & 1); 1745 Features["xop"] = HasExtLeaf1 && ((ECX >> 11) & 1) && HasAVXSave; 1746 Features["lwp"] = HasExtLeaf1 && ((ECX >> 15) & 1); 1747 Features["fma4"] = HasExtLeaf1 && ((ECX >> 16) & 1) && HasAVXSave; 1748 Features["tbm"] = HasExtLeaf1 && ((ECX >> 21) & 1); 1749 Features["mwaitx"] = HasExtLeaf1 && ((ECX >> 29) & 1); 1750 1751 Features["64bit"] = HasExtLeaf1 && ((EDX >> 29) & 1); 1752 1753 // Miscellaneous memory related features, detected by 1754 // using the 0x80000008 leaf of the CPUID instruction 1755 bool HasExtLeaf8 = MaxExtLevel >= 0x80000008 && 1756 !getX86CpuIDAndInfo(0x80000008, &EAX, &EBX, &ECX, &EDX); 1757 Features["clzero"] = HasExtLeaf8 && ((EBX >> 0) & 1); 1758 Features["rdpru"] = HasExtLeaf8 && ((EBX >> 4) & 1); 1759 Features["wbnoinvd"] = HasExtLeaf8 && ((EBX >> 9) & 1); 1760 1761 bool HasLeaf7 = 1762 MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX); 1763 1764 Features["fsgsbase"] = HasLeaf7 && ((EBX >> 0) & 1); 1765 Features["sgx"] = HasLeaf7 && ((EBX >> 2) & 1); 1766 Features["bmi"] = HasLeaf7 && ((EBX >> 3) & 1); 1767 // AVX2 is only supported if we have the OS save support from AVX. 1768 Features["avx2"] = HasLeaf7 && ((EBX >> 5) & 1) && HasAVXSave; 1769 Features["bmi2"] = HasLeaf7 && ((EBX >> 8) & 1); 1770 Features["invpcid"] = HasLeaf7 && ((EBX >> 10) & 1); 1771 Features["rtm"] = HasLeaf7 && ((EBX >> 11) & 1); 1772 // AVX512 is only supported if the OS supports the context save for it. 1773 Features["avx512f"] = HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save; 1774 Features["avx512dq"] = HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save; 1775 Features["rdseed"] = HasLeaf7 && ((EBX >> 18) & 1); 1776 Features["adx"] = HasLeaf7 && ((EBX >> 19) & 1); 1777 Features["avx512ifma"] = HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save; 1778 Features["clflushopt"] = HasLeaf7 && ((EBX >> 23) & 1); 1779 Features["clwb"] = HasLeaf7 && ((EBX >> 24) & 1); 1780 Features["avx512pf"] = HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save; 1781 Features["avx512er"] = HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save; 1782 Features["avx512cd"] = HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save; 1783 Features["sha"] = HasLeaf7 && ((EBX >> 29) & 1); 1784 Features["avx512bw"] = HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save; 1785 Features["avx512vl"] = HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save; 1786 1787 Features["prefetchwt1"] = HasLeaf7 && ((ECX >> 0) & 1); 1788 Features["avx512vbmi"] = HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save; 1789 Features["pku"] = HasLeaf7 && ((ECX >> 4) & 1); 1790 Features["waitpkg"] = HasLeaf7 && ((ECX >> 5) & 1); 1791 Features["avx512vbmi2"] = HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save; 1792 Features["shstk"] = HasLeaf7 && ((ECX >> 7) & 1); 1793 Features["gfni"] = HasLeaf7 && ((ECX >> 8) & 1); 1794 Features["vaes"] = HasLeaf7 && ((ECX >> 9) & 1) && HasAVXSave; 1795 Features["vpclmulqdq"] = HasLeaf7 && ((ECX >> 10) & 1) && HasAVXSave; 1796 Features["avx512vnni"] = HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save; 1797 Features["avx512bitalg"] = HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save; 1798 Features["avx512vpopcntdq"] = HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save; 1799 Features["rdpid"] = HasLeaf7 && ((ECX >> 22) & 1); 1800 Features["kl"] = HasLeaf7 && ((ECX >> 23) & 1); // key locker 1801 Features["cldemote"] = HasLeaf7 && ((ECX >> 25) & 1); 1802 Features["movdiri"] = HasLeaf7 && ((ECX >> 27) & 1); 1803 Features["movdir64b"] = HasLeaf7 && ((ECX >> 28) & 1); 1804 Features["enqcmd"] = HasLeaf7 && ((ECX >> 29) & 1); 1805 1806 Features["uintr"] = HasLeaf7 && ((EDX >> 5) & 1); 1807 Features["avx512vp2intersect"] = 1808 HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save; 1809 Features["serialize"] = HasLeaf7 && ((EDX >> 14) & 1); 1810 Features["tsxldtrk"] = HasLeaf7 && ((EDX >> 16) & 1); 1811 // There are two CPUID leafs which information associated with the pconfig 1812 // instruction: 1813 // EAX=0x7, ECX=0x0 indicates the availability of the instruction (via the 18th 1814 // bit of EDX), while the EAX=0x1b leaf returns information on the 1815 // availability of specific pconfig leafs. 1816 // The target feature here only refers to the the first of these two. 1817 // Users might need to check for the availability of specific pconfig 1818 // leaves using cpuid, since that information is ignored while 1819 // detecting features using the "-march=native" flag. 1820 // For more info, see X86 ISA docs. 1821 Features["pconfig"] = HasLeaf7 && ((EDX >> 18) & 1); 1822 Features["amx-bf16"] = HasLeaf7 && ((EDX >> 22) & 1) && HasAMXSave; 1823 Features["avx512fp16"] = HasLeaf7 && ((EDX >> 23) & 1) && HasAVX512Save; 1824 Features["amx-tile"] = HasLeaf7 && ((EDX >> 24) & 1) && HasAMXSave; 1825 Features["amx-int8"] = HasLeaf7 && ((EDX >> 25) & 1) && HasAMXSave; 1826 // EAX from subleaf 0 is the maximum subleaf supported. Some CPUs don't 1827 // return all 0s for invalid subleaves so check the limit. 1828 bool HasLeaf7Subleaf1 = 1829 HasLeaf7 && EAX >= 1 && 1830 !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX); 1831 Features["sha512"] = HasLeaf7Subleaf1 && ((EAX >> 0) & 1); 1832 Features["sm3"] = HasLeaf7Subleaf1 && ((EAX >> 1) & 1); 1833 Features["sm4"] = HasLeaf7Subleaf1 && ((EAX >> 2) & 1); 1834 Features["raoint"] = HasLeaf7Subleaf1 && ((EAX >> 3) & 1); 1835 Features["avxvnni"] = HasLeaf7Subleaf1 && ((EAX >> 4) & 1) && HasAVXSave; 1836 Features["avx512bf16"] = HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save; 1837 Features["amx-fp16"] = HasLeaf7Subleaf1 && ((EAX >> 21) & 1) && HasAMXSave; 1838 Features["cmpccxadd"] = HasLeaf7Subleaf1 && ((EAX >> 7) & 1); 1839 Features["hreset"] = HasLeaf7Subleaf1 && ((EAX >> 22) & 1); 1840 Features["avxifma"] = HasLeaf7Subleaf1 && ((EAX >> 23) & 1) && HasAVXSave; 1841 Features["avxvnniint8"] = HasLeaf7Subleaf1 && ((EDX >> 4) & 1) && HasAVXSave; 1842 Features["avxneconvert"] = HasLeaf7Subleaf1 && ((EDX >> 5) & 1) && HasAVXSave; 1843 Features["amx-complex"] = HasLeaf7Subleaf1 && ((EDX >> 8) & 1) && HasAMXSave; 1844 Features["avxvnniint16"] = HasLeaf7Subleaf1 && ((EDX >> 10) & 1) && HasAVXSave; 1845 Features["prefetchi"] = HasLeaf7Subleaf1 && ((EDX >> 14) & 1); 1846 Features["usermsr"] = HasLeaf7Subleaf1 && ((EDX >> 15) & 1); 1847 Features["avx10.1-256"] = HasLeaf7Subleaf1 && ((EDX >> 19) & 1); 1848 1849 bool HasLeafD = MaxLevel >= 0xd && 1850 !getX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX); 1851 1852 // Only enable XSAVE if OS has enabled support for saving YMM state. 1853 Features["xsaveopt"] = HasLeafD && ((EAX >> 0) & 1) && HasAVXSave; 1854 Features["xsavec"] = HasLeafD && ((EAX >> 1) & 1) && HasAVXSave; 1855 Features["xsaves"] = HasLeafD && ((EAX >> 3) & 1) && HasAVXSave; 1856 1857 bool HasLeaf14 = MaxLevel >= 0x14 && 1858 !getX86CpuIDAndInfoEx(0x14, 0x0, &EAX, &EBX, &ECX, &EDX); 1859 1860 Features["ptwrite"] = HasLeaf14 && ((EBX >> 4) & 1); 1861 1862 bool HasLeaf19 = 1863 MaxLevel >= 0x19 && !getX86CpuIDAndInfo(0x19, &EAX, &EBX, &ECX, &EDX); 1864 Features["widekl"] = HasLeaf7 && HasLeaf19 && ((EBX >> 2) & 1); 1865 1866 bool HasLeaf24 = 1867 MaxLevel >= 0x24 && !getX86CpuIDAndInfo(0x24, &EAX, &EBX, &ECX, &EDX); 1868 Features["avx10.1-512"] = 1869 Features["avx10.1-256"] && HasLeaf24 && ((EBX >> 18) & 1); 1870 1871 return true; 1872 } 1873 #elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__)) 1874 bool sys::getHostCPUFeatures(StringMap<bool> &Features) { 1875 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent(); 1876 if (!P) 1877 return false; 1878 1879 SmallVector<StringRef, 32> Lines; 1880 P->getBuffer().split(Lines, "\n"); 1881 1882 SmallVector<StringRef, 32> CPUFeatures; 1883 1884 // Look for the CPU features. 1885 for (unsigned I = 0, E = Lines.size(); I != E; ++I) 1886 if (Lines[I].starts_with("Features")) { 1887 Lines[I].split(CPUFeatures, ' '); 1888 break; 1889 } 1890 1891 #if defined(__aarch64__) 1892 // Keep track of which crypto features we have seen 1893 enum { CAP_AES = 0x1, CAP_PMULL = 0x2, CAP_SHA1 = 0x4, CAP_SHA2 = 0x8 }; 1894 uint32_t crypto = 0; 1895 #endif 1896 1897 for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) { 1898 StringRef LLVMFeatureStr = StringSwitch<StringRef>(CPUFeatures[I]) 1899 #if defined(__aarch64__) 1900 .Case("asimd", "neon") 1901 .Case("fp", "fp-armv8") 1902 .Case("crc32", "crc") 1903 .Case("atomics", "lse") 1904 .Case("sve", "sve") 1905 .Case("sve2", "sve2") 1906 #else 1907 .Case("half", "fp16") 1908 .Case("neon", "neon") 1909 .Case("vfpv3", "vfp3") 1910 .Case("vfpv3d16", "vfp3d16") 1911 .Case("vfpv4", "vfp4") 1912 .Case("idiva", "hwdiv-arm") 1913 .Case("idivt", "hwdiv") 1914 #endif 1915 .Default(""); 1916 1917 #if defined(__aarch64__) 1918 // We need to check crypto separately since we need all of the crypto 1919 // extensions to enable the subtarget feature 1920 if (CPUFeatures[I] == "aes") 1921 crypto |= CAP_AES; 1922 else if (CPUFeatures[I] == "pmull") 1923 crypto |= CAP_PMULL; 1924 else if (CPUFeatures[I] == "sha1") 1925 crypto |= CAP_SHA1; 1926 else if (CPUFeatures[I] == "sha2") 1927 crypto |= CAP_SHA2; 1928 #endif 1929 1930 if (LLVMFeatureStr != "") 1931 Features[LLVMFeatureStr] = true; 1932 } 1933 1934 #if defined(__aarch64__) 1935 // If we have all crypto bits we can add the feature 1936 if (crypto == (CAP_AES | CAP_PMULL | CAP_SHA1 | CAP_SHA2)) 1937 Features["crypto"] = true; 1938 #endif 1939 1940 return true; 1941 } 1942 #elif defined(_WIN32) && (defined(__aarch64__) || defined(_M_ARM64)) 1943 bool sys::getHostCPUFeatures(StringMap<bool> &Features) { 1944 if (IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE)) 1945 Features["neon"] = true; 1946 if (IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE)) 1947 Features["crc"] = true; 1948 if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE)) 1949 Features["crypto"] = true; 1950 1951 return true; 1952 } 1953 #elif defined(__linux__) && defined(__loongarch__) 1954 #include <sys/auxv.h> 1955 bool sys::getHostCPUFeatures(StringMap<bool> &Features) { 1956 unsigned long hwcap = getauxval(AT_HWCAP); 1957 bool HasFPU = hwcap & (1UL << 3); // HWCAP_LOONGARCH_FPU 1958 uint32_t cpucfg2 = 0x2; 1959 __asm__("cpucfg %[cpucfg2], %[cpucfg2]\n\t" : [cpucfg2] "+r"(cpucfg2)); 1960 1961 Features["f"] = HasFPU && (cpucfg2 & (1U << 1)); // CPUCFG.2.FP_SP 1962 Features["d"] = HasFPU && (cpucfg2 & (1U << 2)); // CPUCFG.2.FP_DP 1963 1964 Features["lsx"] = hwcap & (1UL << 4); // HWCAP_LOONGARCH_LSX 1965 Features["lasx"] = hwcap & (1UL << 5); // HWCAP_LOONGARCH_LASX 1966 Features["lvz"] = hwcap & (1UL << 9); // HWCAP_LOONGARCH_LVZ 1967 1968 return true; 1969 } 1970 #else 1971 bool sys::getHostCPUFeatures(StringMap<bool> &Features) { return false; } 1972 #endif 1973 1974 #if __APPLE__ 1975 /// \returns the \p triple, but with the Host's arch spliced in. 1976 static Triple withHostArch(Triple T) { 1977 #if defined(__arm__) 1978 T.setArch(Triple::arm); 1979 T.setArchName("arm"); 1980 #elif defined(__arm64e__) 1981 T.setArch(Triple::aarch64, Triple::AArch64SubArch_arm64e); 1982 T.setArchName("arm64e"); 1983 #elif defined(__aarch64__) 1984 T.setArch(Triple::aarch64); 1985 T.setArchName("arm64"); 1986 #elif defined(__x86_64h__) 1987 T.setArch(Triple::x86_64); 1988 T.setArchName("x86_64h"); 1989 #elif defined(__x86_64__) 1990 T.setArch(Triple::x86_64); 1991 T.setArchName("x86_64"); 1992 #elif defined(__i386__) 1993 T.setArch(Triple::x86); 1994 T.setArchName("i386"); 1995 #elif defined(__powerpc__) 1996 T.setArch(Triple::ppc); 1997 T.setArchName("powerpc"); 1998 #else 1999 # error "Unimplemented host arch fixup" 2000 #endif 2001 return T; 2002 } 2003 #endif 2004 2005 std::string sys::getProcessTriple() { 2006 std::string TargetTripleString = updateTripleOSVersion(LLVM_HOST_TRIPLE); 2007 Triple PT(Triple::normalize(TargetTripleString)); 2008 2009 #if __APPLE__ 2010 /// In Universal builds, LLVM_HOST_TRIPLE will have the wrong arch in one of 2011 /// the slices. This fixes that up. 2012 PT = withHostArch(PT); 2013 #endif 2014 2015 if (sizeof(void *) == 8 && PT.isArch32Bit()) 2016 PT = PT.get64BitArchVariant(); 2017 if (sizeof(void *) == 4 && PT.isArch64Bit()) 2018 PT = PT.get32BitArchVariant(); 2019 2020 return PT.str(); 2021 } 2022 2023 void sys::printDefaultTargetAndDetectedCPU(raw_ostream &OS) { 2024 #if LLVM_VERSION_PRINTER_SHOW_HOST_TARGET_INFO 2025 std::string CPU = std::string(sys::getHostCPUName()); 2026 if (CPU == "generic") 2027 CPU = "(unknown)"; 2028 OS << " Default target: " << sys::getDefaultTargetTriple() << '\n' 2029 << " Host CPU: " << CPU << '\n'; 2030 #endif 2031 } 2032