1 //===-- Host.cpp - Implement OS Host Detection ------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the operating system Host detection. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/TargetParser/Host.h" 14 #include "llvm/ADT/SmallVector.h" 15 #include "llvm/ADT/StringMap.h" 16 #include "llvm/ADT/StringRef.h" 17 #include "llvm/ADT/StringSwitch.h" 18 #include "llvm/Config/llvm-config.h" 19 #include "llvm/Support/MemoryBuffer.h" 20 #include "llvm/Support/raw_ostream.h" 21 #include "llvm/TargetParser/Triple.h" 22 #include "llvm/TargetParser/X86TargetParser.h" 23 #include <string.h> 24 25 // Include the platform-specific parts of this class. 26 #ifdef LLVM_ON_UNIX 27 #include "Unix/Host.inc" 28 #include <sched.h> 29 #endif 30 #ifdef _WIN32 31 #include "Windows/Host.inc" 32 #endif 33 #ifdef _MSC_VER 34 #include <intrin.h> 35 #endif 36 #ifdef __MVS__ 37 #include "llvm/Support/BCD.h" 38 #endif 39 #if defined(__APPLE__) 40 #include <mach/host_info.h> 41 #include <mach/mach.h> 42 #include <mach/mach_host.h> 43 #include <mach/machine.h> 44 #include <sys/param.h> 45 #include <sys/sysctl.h> 46 #endif 47 #ifdef _AIX 48 #include <sys/systemcfg.h> 49 #endif 50 #if defined(__sun__) && defined(__svr4__) 51 #include <kstat.h> 52 #endif 53 54 #define DEBUG_TYPE "host-detection" 55 56 //===----------------------------------------------------------------------===// 57 // 58 // Implementations of the CPU detection routines 59 // 60 //===----------------------------------------------------------------------===// 61 62 using namespace llvm; 63 64 static std::unique_ptr<llvm::MemoryBuffer> 65 LLVM_ATTRIBUTE_UNUSED getProcCpuinfoContent() { 66 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text = 67 llvm::MemoryBuffer::getFileAsStream("/proc/cpuinfo"); 68 if (std::error_code EC = Text.getError()) { 69 llvm::errs() << "Can't read " 70 << "/proc/cpuinfo: " << EC.message() << "\n"; 71 return nullptr; 72 } 73 return std::move(*Text); 74 } 75 76 StringRef sys::detail::getHostCPUNameForPowerPC(StringRef ProcCpuinfoContent) { 77 // Access to the Processor Version Register (PVR) on PowerPC is privileged, 78 // and so we must use an operating-system interface to determine the current 79 // processor type. On Linux, this is exposed through the /proc/cpuinfo file. 80 const char *generic = "generic"; 81 82 // The cpu line is second (after the 'processor: 0' line), so if this 83 // buffer is too small then something has changed (or is wrong). 84 StringRef::const_iterator CPUInfoStart = ProcCpuinfoContent.begin(); 85 StringRef::const_iterator CPUInfoEnd = ProcCpuinfoContent.end(); 86 87 StringRef::const_iterator CIP = CPUInfoStart; 88 89 StringRef::const_iterator CPUStart = nullptr; 90 size_t CPULen = 0; 91 92 // We need to find the first line which starts with cpu, spaces, and a colon. 93 // After the colon, there may be some additional spaces and then the cpu type. 94 while (CIP < CPUInfoEnd && CPUStart == nullptr) { 95 if (CIP < CPUInfoEnd && *CIP == '\n') 96 ++CIP; 97 98 if (CIP < CPUInfoEnd && *CIP == 'c') { 99 ++CIP; 100 if (CIP < CPUInfoEnd && *CIP == 'p') { 101 ++CIP; 102 if (CIP < CPUInfoEnd && *CIP == 'u') { 103 ++CIP; 104 while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t')) 105 ++CIP; 106 107 if (CIP < CPUInfoEnd && *CIP == ':') { 108 ++CIP; 109 while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t')) 110 ++CIP; 111 112 if (CIP < CPUInfoEnd) { 113 CPUStart = CIP; 114 while (CIP < CPUInfoEnd && (*CIP != ' ' && *CIP != '\t' && 115 *CIP != ',' && *CIP != '\n')) 116 ++CIP; 117 CPULen = CIP - CPUStart; 118 } 119 } 120 } 121 } 122 } 123 124 if (CPUStart == nullptr) 125 while (CIP < CPUInfoEnd && *CIP != '\n') 126 ++CIP; 127 } 128 129 if (CPUStart == nullptr) 130 return generic; 131 132 return StringSwitch<const char *>(StringRef(CPUStart, CPULen)) 133 .Case("604e", "604e") 134 .Case("604", "604") 135 .Case("7400", "7400") 136 .Case("7410", "7400") 137 .Case("7447", "7400") 138 .Case("7455", "7450") 139 .Case("G4", "g4") 140 .Case("POWER4", "970") 141 .Case("PPC970FX", "970") 142 .Case("PPC970MP", "970") 143 .Case("G5", "g5") 144 .Case("POWER5", "g5") 145 .Case("A2", "a2") 146 .Case("POWER6", "pwr6") 147 .Case("POWER7", "pwr7") 148 .Case("POWER8", "pwr8") 149 .Case("POWER8E", "pwr8") 150 .Case("POWER8NVL", "pwr8") 151 .Case("POWER9", "pwr9") 152 .Case("POWER10", "pwr10") 153 // FIXME: If we get a simulator or machine with the capabilities of 154 // mcpu=future, we should revisit this and add the name reported by the 155 // simulator/machine. 156 .Default(generic); 157 } 158 159 StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) { 160 // The cpuid register on arm is not accessible from user space. On Linux, 161 // it is exposed through the /proc/cpuinfo file. 162 163 // Read 32 lines from /proc/cpuinfo, which should contain the CPU part line 164 // in all cases. 165 SmallVector<StringRef, 32> Lines; 166 ProcCpuinfoContent.split(Lines, "\n"); 167 168 // Look for the CPU implementer line. 169 StringRef Implementer; 170 StringRef Hardware; 171 StringRef Part; 172 for (unsigned I = 0, E = Lines.size(); I != E; ++I) { 173 if (Lines[I].startswith("CPU implementer")) 174 Implementer = Lines[I].substr(15).ltrim("\t :"); 175 if (Lines[I].startswith("Hardware")) 176 Hardware = Lines[I].substr(8).ltrim("\t :"); 177 if (Lines[I].startswith("CPU part")) 178 Part = Lines[I].substr(8).ltrim("\t :"); 179 } 180 181 if (Implementer == "0x41") { // ARM Ltd. 182 // MSM8992/8994 may give cpu part for the core that the kernel is running on, 183 // which is undeterministic and wrong. Always return cortex-a53 for these SoC. 184 if (Hardware.endswith("MSM8994") || Hardware.endswith("MSM8996")) 185 return "cortex-a53"; 186 187 188 // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The 189 // values correspond to the "Part number" in the CP15/c0 register. The 190 // contents are specified in the various processor manuals. 191 // This corresponds to the Main ID Register in Technical Reference Manuals. 192 // and is used in programs like sys-utils 193 return StringSwitch<const char *>(Part) 194 .Case("0x926", "arm926ej-s") 195 .Case("0xb02", "mpcore") 196 .Case("0xb36", "arm1136j-s") 197 .Case("0xb56", "arm1156t2-s") 198 .Case("0xb76", "arm1176jz-s") 199 .Case("0xc08", "cortex-a8") 200 .Case("0xc09", "cortex-a9") 201 .Case("0xc0f", "cortex-a15") 202 .Case("0xc20", "cortex-m0") 203 .Case("0xc23", "cortex-m3") 204 .Case("0xc24", "cortex-m4") 205 .Case("0xd22", "cortex-m55") 206 .Case("0xd02", "cortex-a34") 207 .Case("0xd04", "cortex-a35") 208 .Case("0xd03", "cortex-a53") 209 .Case("0xd05", "cortex-a55") 210 .Case("0xd46", "cortex-a510") 211 .Case("0xd07", "cortex-a57") 212 .Case("0xd08", "cortex-a72") 213 .Case("0xd09", "cortex-a73") 214 .Case("0xd0a", "cortex-a75") 215 .Case("0xd0b", "cortex-a76") 216 .Case("0xd0d", "cortex-a77") 217 .Case("0xd41", "cortex-a78") 218 .Case("0xd47", "cortex-a710") 219 .Case("0xd4d", "cortex-a715") 220 .Case("0xd44", "cortex-x1") 221 .Case("0xd4c", "cortex-x1c") 222 .Case("0xd48", "cortex-x2") 223 .Case("0xd4e", "cortex-x3") 224 .Case("0xd0c", "neoverse-n1") 225 .Case("0xd49", "neoverse-n2") 226 .Case("0xd40", "neoverse-v1") 227 .Case("0xd4f", "neoverse-v2") 228 .Default("generic"); 229 } 230 231 if (Implementer == "0x42" || Implementer == "0x43") { // Broadcom | Cavium. 232 return StringSwitch<const char *>(Part) 233 .Case("0x516", "thunderx2t99") 234 .Case("0x0516", "thunderx2t99") 235 .Case("0xaf", "thunderx2t99") 236 .Case("0x0af", "thunderx2t99") 237 .Case("0xa1", "thunderxt88") 238 .Case("0x0a1", "thunderxt88") 239 .Default("generic"); 240 } 241 242 if (Implementer == "0x46") { // Fujitsu Ltd. 243 return StringSwitch<const char *>(Part) 244 .Case("0x001", "a64fx") 245 .Default("generic"); 246 } 247 248 if (Implementer == "0x4e") { // NVIDIA Corporation 249 return StringSwitch<const char *>(Part) 250 .Case("0x004", "carmel") 251 .Default("generic"); 252 } 253 254 if (Implementer == "0x48") // HiSilicon Technologies, Inc. 255 // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The 256 // values correspond to the "Part number" in the CP15/c0 register. The 257 // contents are specified in the various processor manuals. 258 return StringSwitch<const char *>(Part) 259 .Case("0xd01", "tsv110") 260 .Default("generic"); 261 262 if (Implementer == "0x51") // Qualcomm Technologies, Inc. 263 // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The 264 // values correspond to the "Part number" in the CP15/c0 register. The 265 // contents are specified in the various processor manuals. 266 return StringSwitch<const char *>(Part) 267 .Case("0x06f", "krait") // APQ8064 268 .Case("0x201", "kryo") 269 .Case("0x205", "kryo") 270 .Case("0x211", "kryo") 271 .Case("0x800", "cortex-a73") // Kryo 2xx Gold 272 .Case("0x801", "cortex-a73") // Kryo 2xx Silver 273 .Case("0x802", "cortex-a75") // Kryo 3xx Gold 274 .Case("0x803", "cortex-a75") // Kryo 3xx Silver 275 .Case("0x804", "cortex-a76") // Kryo 4xx Gold 276 .Case("0x805", "cortex-a76") // Kryo 4xx/5xx Silver 277 .Case("0xc00", "falkor") 278 .Case("0xc01", "saphira") 279 .Default("generic"); 280 if (Implementer == "0x53") { // Samsung Electronics Co., Ltd. 281 // The Exynos chips have a convoluted ID scheme that doesn't seem to follow 282 // any predictive pattern across variants and parts. 283 unsigned Variant = 0, Part = 0; 284 285 // Look for the CPU variant line, whose value is a 1 digit hexadecimal 286 // number, corresponding to the Variant bits in the CP15/C0 register. 287 for (auto I : Lines) 288 if (I.consume_front("CPU variant")) 289 I.ltrim("\t :").getAsInteger(0, Variant); 290 291 // Look for the CPU part line, whose value is a 3 digit hexadecimal 292 // number, corresponding to the PartNum bits in the CP15/C0 register. 293 for (auto I : Lines) 294 if (I.consume_front("CPU part")) 295 I.ltrim("\t :").getAsInteger(0, Part); 296 297 unsigned Exynos = (Variant << 12) | Part; 298 switch (Exynos) { 299 default: 300 // Default by falling through to Exynos M3. 301 [[fallthrough]]; 302 case 0x1002: 303 return "exynos-m3"; 304 case 0x1003: 305 return "exynos-m4"; 306 } 307 } 308 309 if (Implementer == "0xc0") { // Ampere Computing 310 return StringSwitch<const char *>(Part) 311 .Case("0xac3", "ampere1") 312 .Case("0xac4", "ampere1a") 313 .Default("generic"); 314 } 315 316 return "generic"; 317 } 318 319 namespace { 320 StringRef getCPUNameFromS390Model(unsigned int Id, bool HaveVectorSupport) { 321 switch (Id) { 322 case 2064: // z900 not supported by LLVM 323 case 2066: 324 case 2084: // z990 not supported by LLVM 325 case 2086: 326 case 2094: // z9-109 not supported by LLVM 327 case 2096: 328 return "generic"; 329 case 2097: 330 case 2098: 331 return "z10"; 332 case 2817: 333 case 2818: 334 return "z196"; 335 case 2827: 336 case 2828: 337 return "zEC12"; 338 case 2964: 339 case 2965: 340 return HaveVectorSupport? "z13" : "zEC12"; 341 case 3906: 342 case 3907: 343 return HaveVectorSupport? "z14" : "zEC12"; 344 case 8561: 345 case 8562: 346 return HaveVectorSupport? "z15" : "zEC12"; 347 case 3931: 348 case 3932: 349 default: 350 return HaveVectorSupport? "z16" : "zEC12"; 351 } 352 } 353 } // end anonymous namespace 354 355 StringRef sys::detail::getHostCPUNameForS390x(StringRef ProcCpuinfoContent) { 356 // STIDP is a privileged operation, so use /proc/cpuinfo instead. 357 358 // The "processor 0:" line comes after a fair amount of other information, 359 // including a cache breakdown, but this should be plenty. 360 SmallVector<StringRef, 32> Lines; 361 ProcCpuinfoContent.split(Lines, "\n"); 362 363 // Look for the CPU features. 364 SmallVector<StringRef, 32> CPUFeatures; 365 for (unsigned I = 0, E = Lines.size(); I != E; ++I) 366 if (Lines[I].startswith("features")) { 367 size_t Pos = Lines[I].find(':'); 368 if (Pos != StringRef::npos) { 369 Lines[I].drop_front(Pos + 1).split(CPUFeatures, ' '); 370 break; 371 } 372 } 373 374 // We need to check for the presence of vector support independently of 375 // the machine type, since we may only use the vector register set when 376 // supported by the kernel (and hypervisor). 377 bool HaveVectorSupport = false; 378 for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) { 379 if (CPUFeatures[I] == "vx") 380 HaveVectorSupport = true; 381 } 382 383 // Now check the processor machine type. 384 for (unsigned I = 0, E = Lines.size(); I != E; ++I) { 385 if (Lines[I].startswith("processor ")) { 386 size_t Pos = Lines[I].find("machine = "); 387 if (Pos != StringRef::npos) { 388 Pos += sizeof("machine = ") - 1; 389 unsigned int Id; 390 if (!Lines[I].drop_front(Pos).getAsInteger(10, Id)) 391 return getCPUNameFromS390Model(Id, HaveVectorSupport); 392 } 393 break; 394 } 395 } 396 397 return "generic"; 398 } 399 400 StringRef sys::detail::getHostCPUNameForRISCV(StringRef ProcCpuinfoContent) { 401 // There are 24 lines in /proc/cpuinfo 402 SmallVector<StringRef> Lines; 403 ProcCpuinfoContent.split(Lines, "\n"); 404 405 // Look for uarch line to determine cpu name 406 StringRef UArch; 407 for (unsigned I = 0, E = Lines.size(); I != E; ++I) { 408 if (Lines[I].startswith("uarch")) { 409 UArch = Lines[I].substr(5).ltrim("\t :"); 410 break; 411 } 412 } 413 414 return StringSwitch<const char *>(UArch) 415 .Case("sifive,u74-mc", "sifive-u74") 416 .Case("sifive,bullet0", "sifive-u74") 417 .Default("generic"); 418 } 419 420 StringRef sys::detail::getHostCPUNameForBPF() { 421 #if !defined(__linux__) || !defined(__x86_64__) 422 return "generic"; 423 #else 424 uint8_t v3_insns[40] __attribute__ ((aligned (8))) = 425 /* BPF_MOV64_IMM(BPF_REG_0, 0) */ 426 { 0xb7, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 427 /* BPF_MOV64_IMM(BPF_REG_2, 1) */ 428 0xb7, 0x2, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 429 /* BPF_JMP32_REG(BPF_JLT, BPF_REG_0, BPF_REG_2, 1) */ 430 0xae, 0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 431 /* BPF_MOV64_IMM(BPF_REG_0, 1) */ 432 0xb7, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 433 /* BPF_EXIT_INSN() */ 434 0x95, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; 435 436 uint8_t v2_insns[40] __attribute__ ((aligned (8))) = 437 /* BPF_MOV64_IMM(BPF_REG_0, 0) */ 438 { 0xb7, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 439 /* BPF_MOV64_IMM(BPF_REG_2, 1) */ 440 0xb7, 0x2, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 441 /* BPF_JMP_REG(BPF_JLT, BPF_REG_0, BPF_REG_2, 1) */ 442 0xad, 0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 443 /* BPF_MOV64_IMM(BPF_REG_0, 1) */ 444 0xb7, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 445 /* BPF_EXIT_INSN() */ 446 0x95, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; 447 448 struct bpf_prog_load_attr { 449 uint32_t prog_type; 450 uint32_t insn_cnt; 451 uint64_t insns; 452 uint64_t license; 453 uint32_t log_level; 454 uint32_t log_size; 455 uint64_t log_buf; 456 uint32_t kern_version; 457 uint32_t prog_flags; 458 } attr = {}; 459 attr.prog_type = 1; /* BPF_PROG_TYPE_SOCKET_FILTER */ 460 attr.insn_cnt = 5; 461 attr.insns = (uint64_t)v3_insns; 462 attr.license = (uint64_t)"DUMMY"; 463 464 int fd = syscall(321 /* __NR_bpf */, 5 /* BPF_PROG_LOAD */, &attr, 465 sizeof(attr)); 466 if (fd >= 0) { 467 close(fd); 468 return "v3"; 469 } 470 471 /* Clear the whole attr in case its content changed by syscall. */ 472 memset(&attr, 0, sizeof(attr)); 473 attr.prog_type = 1; /* BPF_PROG_TYPE_SOCKET_FILTER */ 474 attr.insn_cnt = 5; 475 attr.insns = (uint64_t)v2_insns; 476 attr.license = (uint64_t)"DUMMY"; 477 fd = syscall(321 /* __NR_bpf */, 5 /* BPF_PROG_LOAD */, &attr, sizeof(attr)); 478 if (fd >= 0) { 479 close(fd); 480 return "v2"; 481 } 482 return "v1"; 483 #endif 484 } 485 486 #if defined(__i386__) || defined(_M_IX86) || \ 487 defined(__x86_64__) || defined(_M_X64) 488 489 // The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max). 490 // Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID 491 // support. Consequently, for i386, the presence of CPUID is checked first 492 // via the corresponding eflags bit. 493 // Removal of cpuid.h header motivated by PR30384 494 // Header cpuid.h and method __get_cpuid_max are not used in llvm, clang, openmp 495 // or test-suite, but are used in external projects e.g. libstdcxx 496 static bool isCpuIdSupported() { 497 #if defined(__GNUC__) || defined(__clang__) 498 #if defined(__i386__) 499 int __cpuid_supported; 500 __asm__(" pushfl\n" 501 " popl %%eax\n" 502 " movl %%eax,%%ecx\n" 503 " xorl $0x00200000,%%eax\n" 504 " pushl %%eax\n" 505 " popfl\n" 506 " pushfl\n" 507 " popl %%eax\n" 508 " movl $0,%0\n" 509 " cmpl %%eax,%%ecx\n" 510 " je 1f\n" 511 " movl $1,%0\n" 512 "1:" 513 : "=r"(__cpuid_supported) 514 : 515 : "eax", "ecx"); 516 if (!__cpuid_supported) 517 return false; 518 #endif 519 return true; 520 #endif 521 return true; 522 } 523 524 /// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in 525 /// the specified arguments. If we can't run cpuid on the host, return true. 526 static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX, 527 unsigned *rECX, unsigned *rEDX) { 528 #if defined(__GNUC__) || defined(__clang__) 529 #if defined(__x86_64__) 530 // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually. 531 // FIXME: should we save this for Clang? 532 __asm__("movq\t%%rbx, %%rsi\n\t" 533 "cpuid\n\t" 534 "xchgq\t%%rbx, %%rsi\n\t" 535 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) 536 : "a"(value)); 537 return false; 538 #elif defined(__i386__) 539 __asm__("movl\t%%ebx, %%esi\n\t" 540 "cpuid\n\t" 541 "xchgl\t%%ebx, %%esi\n\t" 542 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) 543 : "a"(value)); 544 return false; 545 #else 546 return true; 547 #endif 548 #elif defined(_MSC_VER) 549 // The MSVC intrinsic is portable across x86 and x64. 550 int registers[4]; 551 __cpuid(registers, value); 552 *rEAX = registers[0]; 553 *rEBX = registers[1]; 554 *rECX = registers[2]; 555 *rEDX = registers[3]; 556 return false; 557 #else 558 return true; 559 #endif 560 } 561 562 namespace llvm { 563 namespace sys { 564 namespace detail { 565 namespace x86 { 566 567 VendorSignatures getVendorSignature(unsigned *MaxLeaf) { 568 unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; 569 if (MaxLeaf == nullptr) 570 MaxLeaf = &EAX; 571 else 572 *MaxLeaf = 0; 573 574 if (!isCpuIdSupported()) 575 return VendorSignatures::UNKNOWN; 576 577 if (getX86CpuIDAndInfo(0, MaxLeaf, &EBX, &ECX, &EDX) || *MaxLeaf < 1) 578 return VendorSignatures::UNKNOWN; 579 580 // "Genu ineI ntel" 581 if (EBX == 0x756e6547 && EDX == 0x49656e69 && ECX == 0x6c65746e) 582 return VendorSignatures::GENUINE_INTEL; 583 584 // "Auth enti cAMD" 585 if (EBX == 0x68747541 && EDX == 0x69746e65 && ECX == 0x444d4163) 586 return VendorSignatures::AUTHENTIC_AMD; 587 588 return VendorSignatures::UNKNOWN; 589 } 590 591 } // namespace x86 592 } // namespace detail 593 } // namespace sys 594 } // namespace llvm 595 596 using namespace llvm::sys::detail::x86; 597 598 /// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return 599 /// the 4 values in the specified arguments. If we can't run cpuid on the host, 600 /// return true. 601 static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf, 602 unsigned *rEAX, unsigned *rEBX, unsigned *rECX, 603 unsigned *rEDX) { 604 #if defined(__GNUC__) || defined(__clang__) 605 #if defined(__x86_64__) 606 // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually. 607 // FIXME: should we save this for Clang? 608 __asm__("movq\t%%rbx, %%rsi\n\t" 609 "cpuid\n\t" 610 "xchgq\t%%rbx, %%rsi\n\t" 611 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) 612 : "a"(value), "c"(subleaf)); 613 return false; 614 #elif defined(__i386__) 615 __asm__("movl\t%%ebx, %%esi\n\t" 616 "cpuid\n\t" 617 "xchgl\t%%ebx, %%esi\n\t" 618 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) 619 : "a"(value), "c"(subleaf)); 620 return false; 621 #else 622 return true; 623 #endif 624 #elif defined(_MSC_VER) 625 int registers[4]; 626 __cpuidex(registers, value, subleaf); 627 *rEAX = registers[0]; 628 *rEBX = registers[1]; 629 *rECX = registers[2]; 630 *rEDX = registers[3]; 631 return false; 632 #else 633 return true; 634 #endif 635 } 636 637 // Read control register 0 (XCR0). Used to detect features such as AVX. 638 static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) { 639 #if defined(__GNUC__) || defined(__clang__) 640 // Check xgetbv; this uses a .byte sequence instead of the instruction 641 // directly because older assemblers do not include support for xgetbv and 642 // there is no easy way to conditionally compile based on the assembler used. 643 __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0)); 644 return false; 645 #elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK) 646 unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK); 647 *rEAX = Result; 648 *rEDX = Result >> 32; 649 return false; 650 #else 651 return true; 652 #endif 653 } 654 655 static void detectX86FamilyModel(unsigned EAX, unsigned *Family, 656 unsigned *Model) { 657 *Family = (EAX >> 8) & 0xf; // Bits 8 - 11 658 *Model = (EAX >> 4) & 0xf; // Bits 4 - 7 659 if (*Family == 6 || *Family == 0xf) { 660 if (*Family == 0xf) 661 // Examine extended family ID if family ID is F. 662 *Family += (EAX >> 20) & 0xff; // Bits 20 - 27 663 // Examine extended model ID if family ID is 6 or F. 664 *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19 665 } 666 } 667 668 static StringRef 669 getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model, 670 const unsigned *Features, 671 unsigned *Type, unsigned *Subtype) { 672 auto testFeature = [&](unsigned F) { 673 return (Features[F / 32] & (1U << (F % 32))) != 0; 674 }; 675 676 StringRef CPU; 677 678 switch (Family) { 679 case 3: 680 CPU = "i386"; 681 break; 682 case 4: 683 CPU = "i486"; 684 break; 685 case 5: 686 if (testFeature(X86::FEATURE_MMX)) { 687 CPU = "pentium-mmx"; 688 break; 689 } 690 CPU = "pentium"; 691 break; 692 case 6: 693 switch (Model) { 694 case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile 695 // processor, Intel Core 2 Quad processor, Intel Core 2 Quad 696 // mobile processor, Intel Core 2 Extreme processor, Intel 697 // Pentium Dual-Core processor, Intel Xeon processor, model 698 // 0Fh. All processors are manufactured using the 65 nm process. 699 case 0x16: // Intel Celeron processor model 16h. All processors are 700 // manufactured using the 65 nm process 701 CPU = "core2"; 702 *Type = X86::INTEL_CORE2; 703 break; 704 case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model 705 // 17h. All processors are manufactured using the 45 nm process. 706 // 707 // 45nm: Penryn , Wolfdale, Yorkfield (XE) 708 case 0x1d: // Intel Xeon processor MP. All processors are manufactured using 709 // the 45 nm process. 710 CPU = "penryn"; 711 *Type = X86::INTEL_CORE2; 712 break; 713 case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All 714 // processors are manufactured using the 45 nm process. 715 case 0x1e: // Intel(R) Core(TM) i7 CPU 870 @ 2.93GHz. 716 // As found in a Summer 2010 model iMac. 717 case 0x1f: 718 case 0x2e: // Nehalem EX 719 CPU = "nehalem"; 720 *Type = X86::INTEL_COREI7; 721 *Subtype = X86::INTEL_COREI7_NEHALEM; 722 break; 723 case 0x25: // Intel Core i7, laptop version. 724 case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All 725 // processors are manufactured using the 32 nm process. 726 case 0x2f: // Westmere EX 727 CPU = "westmere"; 728 *Type = X86::INTEL_COREI7; 729 *Subtype = X86::INTEL_COREI7_WESTMERE; 730 break; 731 case 0x2a: // Intel Core i7 processor. All processors are manufactured 732 // using the 32 nm process. 733 case 0x2d: 734 CPU = "sandybridge"; 735 *Type = X86::INTEL_COREI7; 736 *Subtype = X86::INTEL_COREI7_SANDYBRIDGE; 737 break; 738 case 0x3a: 739 case 0x3e: // Ivy Bridge EP 740 CPU = "ivybridge"; 741 *Type = X86::INTEL_COREI7; 742 *Subtype = X86::INTEL_COREI7_IVYBRIDGE; 743 break; 744 745 // Haswell: 746 case 0x3c: 747 case 0x3f: 748 case 0x45: 749 case 0x46: 750 CPU = "haswell"; 751 *Type = X86::INTEL_COREI7; 752 *Subtype = X86::INTEL_COREI7_HASWELL; 753 break; 754 755 // Broadwell: 756 case 0x3d: 757 case 0x47: 758 case 0x4f: 759 case 0x56: 760 CPU = "broadwell"; 761 *Type = X86::INTEL_COREI7; 762 *Subtype = X86::INTEL_COREI7_BROADWELL; 763 break; 764 765 // Skylake: 766 case 0x4e: // Skylake mobile 767 case 0x5e: // Skylake desktop 768 case 0x8e: // Kaby Lake mobile 769 case 0x9e: // Kaby Lake desktop 770 case 0xa5: // Comet Lake-H/S 771 case 0xa6: // Comet Lake-U 772 CPU = "skylake"; 773 *Type = X86::INTEL_COREI7; 774 *Subtype = X86::INTEL_COREI7_SKYLAKE; 775 break; 776 777 // Rocketlake: 778 case 0xa7: 779 CPU = "rocketlake"; 780 *Type = X86::INTEL_COREI7; 781 *Subtype = X86::INTEL_COREI7_ROCKETLAKE; 782 break; 783 784 // Skylake Xeon: 785 case 0x55: 786 *Type = X86::INTEL_COREI7; 787 if (testFeature(X86::FEATURE_AVX512BF16)) { 788 CPU = "cooperlake"; 789 *Subtype = X86::INTEL_COREI7_COOPERLAKE; 790 } else if (testFeature(X86::FEATURE_AVX512VNNI)) { 791 CPU = "cascadelake"; 792 *Subtype = X86::INTEL_COREI7_CASCADELAKE; 793 } else { 794 CPU = "skylake-avx512"; 795 *Subtype = X86::INTEL_COREI7_SKYLAKE_AVX512; 796 } 797 break; 798 799 // Cannonlake: 800 case 0x66: 801 CPU = "cannonlake"; 802 *Type = X86::INTEL_COREI7; 803 *Subtype = X86::INTEL_COREI7_CANNONLAKE; 804 break; 805 806 // Icelake: 807 case 0x7d: 808 case 0x7e: 809 CPU = "icelake-client"; 810 *Type = X86::INTEL_COREI7; 811 *Subtype = X86::INTEL_COREI7_ICELAKE_CLIENT; 812 break; 813 814 // Tigerlake: 815 case 0x8c: 816 case 0x8d: 817 CPU = "tigerlake"; 818 *Type = X86::INTEL_COREI7; 819 *Subtype = X86::INTEL_COREI7_TIGERLAKE; 820 break; 821 822 // Alderlake: 823 case 0x97: 824 case 0x9a: 825 // Raptorlake: 826 case 0xb7: 827 // Meteorlake: 828 case 0xaa: 829 case 0xac: 830 CPU = "alderlake"; 831 *Type = X86::INTEL_COREI7; 832 *Subtype = X86::INTEL_COREI7_ALDERLAKE; 833 break; 834 835 // Graniterapids: 836 case 0xad: 837 CPU = "graniterapids"; 838 *Type = X86::INTEL_COREI7; 839 *Subtype = X86::INTEL_COREI7_GRANITERAPIDS; 840 break; 841 842 // Granite Rapids D: 843 case 0xae: 844 CPU = "graniterapids-d"; 845 *Type = X86::INTEL_COREI7; 846 *Subtype = X86::INTEL_COREI7_GRANITERAPIDS_D; 847 break; 848 849 // Icelake Xeon: 850 case 0x6a: 851 case 0x6c: 852 CPU = "icelake-server"; 853 *Type = X86::INTEL_COREI7; 854 *Subtype = X86::INTEL_COREI7_ICELAKE_SERVER; 855 break; 856 857 // Emerald Rapids: 858 case 0xcf: 859 // Sapphire Rapids: 860 case 0x8f: 861 CPU = "sapphirerapids"; 862 *Type = X86::INTEL_COREI7; 863 *Subtype = X86::INTEL_COREI7_SAPPHIRERAPIDS; 864 break; 865 866 case 0x1c: // Most 45 nm Intel Atom processors 867 case 0x26: // 45 nm Atom Lincroft 868 case 0x27: // 32 nm Atom Medfield 869 case 0x35: // 32 nm Atom Midview 870 case 0x36: // 32 nm Atom Midview 871 CPU = "bonnell"; 872 *Type = X86::INTEL_BONNELL; 873 break; 874 875 // Atom Silvermont codes from the Intel software optimization guide. 876 case 0x37: 877 case 0x4a: 878 case 0x4d: 879 case 0x5a: 880 case 0x5d: 881 case 0x4c: // really airmont 882 CPU = "silvermont"; 883 *Type = X86::INTEL_SILVERMONT; 884 break; 885 // Goldmont: 886 case 0x5c: // Apollo Lake 887 case 0x5f: // Denverton 888 CPU = "goldmont"; 889 *Type = X86::INTEL_GOLDMONT; 890 break; 891 case 0x7a: 892 CPU = "goldmont-plus"; 893 *Type = X86::INTEL_GOLDMONT_PLUS; 894 break; 895 case 0x86: 896 CPU = "tremont"; 897 *Type = X86::INTEL_TREMONT; 898 break; 899 900 // Sierraforest: 901 case 0xaf: 902 CPU = "sierraforest"; 903 *Type = X86::INTEL_SIERRAFOREST; 904 break; 905 906 // Grandridge: 907 case 0xb6: 908 CPU = "grandridge"; 909 *Type = X86::INTEL_GRANDRIDGE; 910 break; 911 912 // Xeon Phi (Knights Landing + Knights Mill): 913 case 0x57: 914 CPU = "knl"; 915 *Type = X86::INTEL_KNL; 916 break; 917 case 0x85: 918 CPU = "knm"; 919 *Type = X86::INTEL_KNM; 920 break; 921 922 default: // Unknown family 6 CPU, try to guess. 923 // Don't both with Type/Subtype here, they aren't used by the caller. 924 // They're used above to keep the code in sync with compiler-rt. 925 // TODO detect tigerlake host from model 926 if (testFeature(X86::FEATURE_AVX512VP2INTERSECT)) { 927 CPU = "tigerlake"; 928 } else if (testFeature(X86::FEATURE_AVX512VBMI2)) { 929 CPU = "icelake-client"; 930 } else if (testFeature(X86::FEATURE_AVX512VBMI)) { 931 CPU = "cannonlake"; 932 } else if (testFeature(X86::FEATURE_AVX512BF16)) { 933 CPU = "cooperlake"; 934 } else if (testFeature(X86::FEATURE_AVX512VNNI)) { 935 CPU = "cascadelake"; 936 } else if (testFeature(X86::FEATURE_AVX512VL)) { 937 CPU = "skylake-avx512"; 938 } else if (testFeature(X86::FEATURE_AVX512ER)) { 939 CPU = "knl"; 940 } else if (testFeature(X86::FEATURE_CLFLUSHOPT)) { 941 if (testFeature(X86::FEATURE_SHA)) 942 CPU = "goldmont"; 943 else 944 CPU = "skylake"; 945 } else if (testFeature(X86::FEATURE_ADX)) { 946 CPU = "broadwell"; 947 } else if (testFeature(X86::FEATURE_AVX2)) { 948 CPU = "haswell"; 949 } else if (testFeature(X86::FEATURE_AVX)) { 950 CPU = "sandybridge"; 951 } else if (testFeature(X86::FEATURE_SSE4_2)) { 952 if (testFeature(X86::FEATURE_MOVBE)) 953 CPU = "silvermont"; 954 else 955 CPU = "nehalem"; 956 } else if (testFeature(X86::FEATURE_SSE4_1)) { 957 CPU = "penryn"; 958 } else if (testFeature(X86::FEATURE_SSSE3)) { 959 if (testFeature(X86::FEATURE_MOVBE)) 960 CPU = "bonnell"; 961 else 962 CPU = "core2"; 963 } else if (testFeature(X86::FEATURE_64BIT)) { 964 CPU = "core2"; 965 } else if (testFeature(X86::FEATURE_SSE3)) { 966 CPU = "yonah"; 967 } else if (testFeature(X86::FEATURE_SSE2)) { 968 CPU = "pentium-m"; 969 } else if (testFeature(X86::FEATURE_SSE)) { 970 CPU = "pentium3"; 971 } else if (testFeature(X86::FEATURE_MMX)) { 972 CPU = "pentium2"; 973 } else { 974 CPU = "pentiumpro"; 975 } 976 break; 977 } 978 break; 979 case 15: { 980 if (testFeature(X86::FEATURE_64BIT)) { 981 CPU = "nocona"; 982 break; 983 } 984 if (testFeature(X86::FEATURE_SSE3)) { 985 CPU = "prescott"; 986 break; 987 } 988 CPU = "pentium4"; 989 break; 990 } 991 default: 992 break; // Unknown. 993 } 994 995 return CPU; 996 } 997 998 static StringRef 999 getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model, 1000 const unsigned *Features, 1001 unsigned *Type, unsigned *Subtype) { 1002 auto testFeature = [&](unsigned F) { 1003 return (Features[F / 32] & (1U << (F % 32))) != 0; 1004 }; 1005 1006 StringRef CPU; 1007 1008 switch (Family) { 1009 case 4: 1010 CPU = "i486"; 1011 break; 1012 case 5: 1013 CPU = "pentium"; 1014 switch (Model) { 1015 case 6: 1016 case 7: 1017 CPU = "k6"; 1018 break; 1019 case 8: 1020 CPU = "k6-2"; 1021 break; 1022 case 9: 1023 case 13: 1024 CPU = "k6-3"; 1025 break; 1026 case 10: 1027 CPU = "geode"; 1028 break; 1029 } 1030 break; 1031 case 6: 1032 if (testFeature(X86::FEATURE_SSE)) { 1033 CPU = "athlon-xp"; 1034 break; 1035 } 1036 CPU = "athlon"; 1037 break; 1038 case 15: 1039 if (testFeature(X86::FEATURE_SSE3)) { 1040 CPU = "k8-sse3"; 1041 break; 1042 } 1043 CPU = "k8"; 1044 break; 1045 case 16: 1046 CPU = "amdfam10"; 1047 *Type = X86::AMDFAM10H; // "amdfam10" 1048 switch (Model) { 1049 case 2: 1050 *Subtype = X86::AMDFAM10H_BARCELONA; 1051 break; 1052 case 4: 1053 *Subtype = X86::AMDFAM10H_SHANGHAI; 1054 break; 1055 case 8: 1056 *Subtype = X86::AMDFAM10H_ISTANBUL; 1057 break; 1058 } 1059 break; 1060 case 20: 1061 CPU = "btver1"; 1062 *Type = X86::AMD_BTVER1; 1063 break; 1064 case 21: 1065 CPU = "bdver1"; 1066 *Type = X86::AMDFAM15H; 1067 if (Model >= 0x60 && Model <= 0x7f) { 1068 CPU = "bdver4"; 1069 *Subtype = X86::AMDFAM15H_BDVER4; 1070 break; // 60h-7Fh: Excavator 1071 } 1072 if (Model >= 0x30 && Model <= 0x3f) { 1073 CPU = "bdver3"; 1074 *Subtype = X86::AMDFAM15H_BDVER3; 1075 break; // 30h-3Fh: Steamroller 1076 } 1077 if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) { 1078 CPU = "bdver2"; 1079 *Subtype = X86::AMDFAM15H_BDVER2; 1080 break; // 02h, 10h-1Fh: Piledriver 1081 } 1082 if (Model <= 0x0f) { 1083 *Subtype = X86::AMDFAM15H_BDVER1; 1084 break; // 00h-0Fh: Bulldozer 1085 } 1086 break; 1087 case 22: 1088 CPU = "btver2"; 1089 *Type = X86::AMD_BTVER2; 1090 break; 1091 case 23: 1092 CPU = "znver1"; 1093 *Type = X86::AMDFAM17H; 1094 if ((Model >= 0x30 && Model <= 0x3f) || Model == 0x71) { 1095 CPU = "znver2"; 1096 *Subtype = X86::AMDFAM17H_ZNVER2; 1097 break; // 30h-3fh, 71h: Zen2 1098 } 1099 if (Model <= 0x0f) { 1100 *Subtype = X86::AMDFAM17H_ZNVER1; 1101 break; // 00h-0Fh: Zen1 1102 } 1103 break; 1104 case 25: 1105 CPU = "znver3"; 1106 *Type = X86::AMDFAM19H; 1107 if (Model <= 0x0f || (Model >= 0x20 && Model <= 0x5f)) { 1108 // Family 19h Models 00h-0Fh - Zen3 1109 // Family 19h Models 20h-2Fh - Zen3 1110 // Family 19h Models 30h-3Fh - Zen3 1111 // Family 19h Models 40h-4Fh - Zen3+ 1112 // Family 19h Models 50h-5Fh - Zen3+ 1113 *Subtype = X86::AMDFAM19H_ZNVER3; 1114 break; 1115 } 1116 if ((Model >= 0x10 && Model <= 0x1f) || 1117 (Model >= 0x60 && Model <= 0x74) || 1118 (Model >= 0x78 && Model <= 0x7b) || 1119 (Model >= 0xA0 && Model <= 0xAf)) { 1120 CPU = "znver4"; 1121 *Subtype = X86::AMDFAM19H_ZNVER4; 1122 break; // "znver4" 1123 } 1124 break; // family 19h 1125 default: 1126 break; // Unknown AMD CPU. 1127 } 1128 1129 return CPU; 1130 } 1131 1132 static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf, 1133 unsigned *Features) { 1134 unsigned EAX, EBX; 1135 1136 auto setFeature = [&](unsigned F) { 1137 Features[F / 32] |= 1U << (F % 32); 1138 }; 1139 1140 if ((EDX >> 15) & 1) 1141 setFeature(X86::FEATURE_CMOV); 1142 if ((EDX >> 23) & 1) 1143 setFeature(X86::FEATURE_MMX); 1144 if ((EDX >> 25) & 1) 1145 setFeature(X86::FEATURE_SSE); 1146 if ((EDX >> 26) & 1) 1147 setFeature(X86::FEATURE_SSE2); 1148 1149 if ((ECX >> 0) & 1) 1150 setFeature(X86::FEATURE_SSE3); 1151 if ((ECX >> 1) & 1) 1152 setFeature(X86::FEATURE_PCLMUL); 1153 if ((ECX >> 9) & 1) 1154 setFeature(X86::FEATURE_SSSE3); 1155 if ((ECX >> 12) & 1) 1156 setFeature(X86::FEATURE_FMA); 1157 if ((ECX >> 19) & 1) 1158 setFeature(X86::FEATURE_SSE4_1); 1159 if ((ECX >> 20) & 1) { 1160 setFeature(X86::FEATURE_SSE4_2); 1161 setFeature(X86::FEATURE_CRC32); 1162 } 1163 if ((ECX >> 23) & 1) 1164 setFeature(X86::FEATURE_POPCNT); 1165 if ((ECX >> 25) & 1) 1166 setFeature(X86::FEATURE_AES); 1167 1168 if ((ECX >> 22) & 1) 1169 setFeature(X86::FEATURE_MOVBE); 1170 1171 // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV 1172 // indicates that the AVX registers will be saved and restored on context 1173 // switch, then we have full AVX support. 1174 const unsigned AVXBits = (1 << 27) | (1 << 28); 1175 bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) && 1176 ((EAX & 0x6) == 0x6); 1177 #if defined(__APPLE__) 1178 // Darwin lazily saves the AVX512 context on first use: trust that the OS will 1179 // save the AVX512 context if we use AVX512 instructions, even the bit is not 1180 // set right now. 1181 bool HasAVX512Save = true; 1182 #else 1183 // AVX512 requires additional context to be saved by the OS. 1184 bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0); 1185 #endif 1186 1187 if (HasAVX) 1188 setFeature(X86::FEATURE_AVX); 1189 1190 bool HasLeaf7 = 1191 MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX); 1192 1193 if (HasLeaf7 && ((EBX >> 3) & 1)) 1194 setFeature(X86::FEATURE_BMI); 1195 if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX) 1196 setFeature(X86::FEATURE_AVX2); 1197 if (HasLeaf7 && ((EBX >> 8) & 1)) 1198 setFeature(X86::FEATURE_BMI2); 1199 if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save) 1200 setFeature(X86::FEATURE_AVX512F); 1201 if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save) 1202 setFeature(X86::FEATURE_AVX512DQ); 1203 if (HasLeaf7 && ((EBX >> 19) & 1)) 1204 setFeature(X86::FEATURE_ADX); 1205 if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save) 1206 setFeature(X86::FEATURE_AVX512IFMA); 1207 if (HasLeaf7 && ((EBX >> 23) & 1)) 1208 setFeature(X86::FEATURE_CLFLUSHOPT); 1209 if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save) 1210 setFeature(X86::FEATURE_AVX512PF); 1211 if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save) 1212 setFeature(X86::FEATURE_AVX512ER); 1213 if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save) 1214 setFeature(X86::FEATURE_AVX512CD); 1215 if (HasLeaf7 && ((EBX >> 29) & 1)) 1216 setFeature(X86::FEATURE_SHA); 1217 if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save) 1218 setFeature(X86::FEATURE_AVX512BW); 1219 if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save) 1220 setFeature(X86::FEATURE_AVX512VL); 1221 1222 if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save) 1223 setFeature(X86::FEATURE_AVX512VBMI); 1224 if (HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save) 1225 setFeature(X86::FEATURE_AVX512VBMI2); 1226 if (HasLeaf7 && ((ECX >> 8) & 1)) 1227 setFeature(X86::FEATURE_GFNI); 1228 if (HasLeaf7 && ((ECX >> 10) & 1) && HasAVX) 1229 setFeature(X86::FEATURE_VPCLMULQDQ); 1230 if (HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save) 1231 setFeature(X86::FEATURE_AVX512VNNI); 1232 if (HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save) 1233 setFeature(X86::FEATURE_AVX512BITALG); 1234 if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save) 1235 setFeature(X86::FEATURE_AVX512VPOPCNTDQ); 1236 1237 if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save) 1238 setFeature(X86::FEATURE_AVX5124VNNIW); 1239 if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save) 1240 setFeature(X86::FEATURE_AVX5124FMAPS); 1241 if (HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save) 1242 setFeature(X86::FEATURE_AVX512VP2INTERSECT); 1243 1244 // EAX from subleaf 0 is the maximum subleaf supported. Some CPUs don't 1245 // return all 0s for invalid subleaves so check the limit. 1246 bool HasLeaf7Subleaf1 = 1247 HasLeaf7 && EAX >= 1 && 1248 !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX); 1249 if (HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save) 1250 setFeature(X86::FEATURE_AVX512BF16); 1251 1252 unsigned MaxExtLevel; 1253 getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX); 1254 1255 bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 && 1256 !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); 1257 if (HasExtLeaf1 && ((ECX >> 6) & 1)) 1258 setFeature(X86::FEATURE_SSE4_A); 1259 if (HasExtLeaf1 && ((ECX >> 11) & 1)) 1260 setFeature(X86::FEATURE_XOP); 1261 if (HasExtLeaf1 && ((ECX >> 16) & 1)) 1262 setFeature(X86::FEATURE_FMA4); 1263 1264 if (HasExtLeaf1 && ((EDX >> 29) & 1)) 1265 setFeature(X86::FEATURE_64BIT); 1266 } 1267 1268 StringRef sys::getHostCPUName() { 1269 unsigned MaxLeaf = 0; 1270 const VendorSignatures Vendor = getVendorSignature(&MaxLeaf); 1271 if (Vendor == VendorSignatures::UNKNOWN) 1272 return "generic"; 1273 1274 unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; 1275 getX86CpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX); 1276 1277 unsigned Family = 0, Model = 0; 1278 unsigned Features[(X86::CPU_FEATURE_MAX + 31) / 32] = {0}; 1279 detectX86FamilyModel(EAX, &Family, &Model); 1280 getAvailableFeatures(ECX, EDX, MaxLeaf, Features); 1281 1282 // These aren't consumed in this file, but we try to keep some source code the 1283 // same or similar to compiler-rt. 1284 unsigned Type = 0; 1285 unsigned Subtype = 0; 1286 1287 StringRef CPU; 1288 1289 if (Vendor == VendorSignatures::GENUINE_INTEL) { 1290 CPU = getIntelProcessorTypeAndSubtype(Family, Model, Features, &Type, 1291 &Subtype); 1292 } else if (Vendor == VendorSignatures::AUTHENTIC_AMD) { 1293 CPU = getAMDProcessorTypeAndSubtype(Family, Model, Features, &Type, 1294 &Subtype); 1295 } 1296 1297 if (!CPU.empty()) 1298 return CPU; 1299 1300 return "generic"; 1301 } 1302 1303 #elif defined(__APPLE__) && defined(__powerpc__) 1304 StringRef sys::getHostCPUName() { 1305 host_basic_info_data_t hostInfo; 1306 mach_msg_type_number_t infoCount; 1307 1308 infoCount = HOST_BASIC_INFO_COUNT; 1309 mach_port_t hostPort = mach_host_self(); 1310 host_info(hostPort, HOST_BASIC_INFO, (host_info_t)&hostInfo, 1311 &infoCount); 1312 mach_port_deallocate(mach_task_self(), hostPort); 1313 1314 if (hostInfo.cpu_type != CPU_TYPE_POWERPC) 1315 return "generic"; 1316 1317 switch (hostInfo.cpu_subtype) { 1318 case CPU_SUBTYPE_POWERPC_601: 1319 return "601"; 1320 case CPU_SUBTYPE_POWERPC_602: 1321 return "602"; 1322 case CPU_SUBTYPE_POWERPC_603: 1323 return "603"; 1324 case CPU_SUBTYPE_POWERPC_603e: 1325 return "603e"; 1326 case CPU_SUBTYPE_POWERPC_603ev: 1327 return "603ev"; 1328 case CPU_SUBTYPE_POWERPC_604: 1329 return "604"; 1330 case CPU_SUBTYPE_POWERPC_604e: 1331 return "604e"; 1332 case CPU_SUBTYPE_POWERPC_620: 1333 return "620"; 1334 case CPU_SUBTYPE_POWERPC_750: 1335 return "750"; 1336 case CPU_SUBTYPE_POWERPC_7400: 1337 return "7400"; 1338 case CPU_SUBTYPE_POWERPC_7450: 1339 return "7450"; 1340 case CPU_SUBTYPE_POWERPC_970: 1341 return "970"; 1342 default:; 1343 } 1344 1345 return "generic"; 1346 } 1347 #elif defined(__linux__) && defined(__powerpc__) 1348 StringRef sys::getHostCPUName() { 1349 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent(); 1350 StringRef Content = P ? P->getBuffer() : ""; 1351 return detail::getHostCPUNameForPowerPC(Content); 1352 } 1353 #elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__)) 1354 StringRef sys::getHostCPUName() { 1355 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent(); 1356 StringRef Content = P ? P->getBuffer() : ""; 1357 return detail::getHostCPUNameForARM(Content); 1358 } 1359 #elif defined(__linux__) && defined(__s390x__) 1360 StringRef sys::getHostCPUName() { 1361 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent(); 1362 StringRef Content = P ? P->getBuffer() : ""; 1363 return detail::getHostCPUNameForS390x(Content); 1364 } 1365 #elif defined(__MVS__) 1366 StringRef sys::getHostCPUName() { 1367 // Get pointer to Communications Vector Table (CVT). 1368 // The pointer is located at offset 16 of the Prefixed Save Area (PSA). 1369 // It is stored as 31 bit pointer and will be zero-extended to 64 bit. 1370 int *StartToCVTOffset = reinterpret_cast<int *>(0x10); 1371 // Since its stored as a 31-bit pointer, get the 4 bytes from the start 1372 // of address. 1373 int ReadValue = *StartToCVTOffset; 1374 // Explicitly clear the high order bit. 1375 ReadValue = (ReadValue & 0x7FFFFFFF); 1376 char *CVT = reinterpret_cast<char *>(ReadValue); 1377 // The model number is located in the CVT prefix at offset -6 and stored as 1378 // signless packed decimal. 1379 uint16_t Id = *(uint16_t *)&CVT[-6]; 1380 // Convert number to integer. 1381 Id = decodePackedBCD<uint16_t>(Id, false); 1382 // Check for vector support. It's stored in field CVTFLAG5 (offset 244), 1383 // bit CVTVEF (X'80'). The facilities list is part of the PSA but the vector 1384 // extension can only be used if bit CVTVEF is on. 1385 bool HaveVectorSupport = CVT[244] & 0x80; 1386 return getCPUNameFromS390Model(Id, HaveVectorSupport); 1387 } 1388 #elif defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__)) 1389 #define CPUFAMILY_ARM_SWIFT 0x1e2d6381 1390 #define CPUFAMILY_ARM_CYCLONE 0x37a09642 1391 #define CPUFAMILY_ARM_TYPHOON 0x2c91a47e 1392 #define CPUFAMILY_ARM_TWISTER 0x92fb37c8 1393 #define CPUFAMILY_ARM_HURRICANE 0x67ceee93 1394 #define CPUFAMILY_ARM_MONSOON_MISTRAL 0xe81e7ef6 1395 #define CPUFAMILY_ARM_VORTEX_TEMPEST 0x07d34b9f 1396 #define CPUFAMILY_ARM_LIGHTNING_THUNDER 0x462504d2 1397 #define CPUFAMILY_ARM_FIRESTORM_ICESTORM 0x1b588bb3 1398 1399 StringRef sys::getHostCPUName() { 1400 uint32_t Family; 1401 size_t Length = sizeof(Family); 1402 sysctlbyname("hw.cpufamily", &Family, &Length, NULL, 0); 1403 1404 switch (Family) { 1405 case CPUFAMILY_ARM_SWIFT: 1406 return "swift"; 1407 case CPUFAMILY_ARM_CYCLONE: 1408 return "apple-a7"; 1409 case CPUFAMILY_ARM_TYPHOON: 1410 return "apple-a8"; 1411 case CPUFAMILY_ARM_TWISTER: 1412 return "apple-a9"; 1413 case CPUFAMILY_ARM_HURRICANE: 1414 return "apple-a10"; 1415 case CPUFAMILY_ARM_MONSOON_MISTRAL: 1416 return "apple-a11"; 1417 case CPUFAMILY_ARM_VORTEX_TEMPEST: 1418 return "apple-a12"; 1419 case CPUFAMILY_ARM_LIGHTNING_THUNDER: 1420 return "apple-a13"; 1421 case CPUFAMILY_ARM_FIRESTORM_ICESTORM: 1422 return "apple-m1"; 1423 default: 1424 // Default to the newest CPU we know about. 1425 return "apple-m1"; 1426 } 1427 } 1428 #elif defined(_AIX) 1429 StringRef sys::getHostCPUName() { 1430 switch (_system_configuration.implementation) { 1431 case POWER_4: 1432 if (_system_configuration.version == PV_4_3) 1433 return "970"; 1434 return "pwr4"; 1435 case POWER_5: 1436 if (_system_configuration.version == PV_5) 1437 return "pwr5"; 1438 return "pwr5x"; 1439 case POWER_6: 1440 if (_system_configuration.version == PV_6_Compat) 1441 return "pwr6"; 1442 return "pwr6x"; 1443 case POWER_7: 1444 return "pwr7"; 1445 case POWER_8: 1446 return "pwr8"; 1447 case POWER_9: 1448 return "pwr9"; 1449 // TODO: simplify this once the macro is available in all OS levels. 1450 #ifdef POWER_10 1451 case POWER_10: 1452 #else 1453 case 0x40000: 1454 #endif 1455 return "pwr10"; 1456 default: 1457 return "generic"; 1458 } 1459 } 1460 #elif defined(__loongarch__) 1461 StringRef sys::getHostCPUName() { 1462 // Use processor id to detect cpu name. 1463 uint32_t processor_id; 1464 __asm__("cpucfg %[prid], $zero\n\t" : [prid] "=r"(processor_id)); 1465 switch (processor_id & 0xff00) { 1466 case 0xc000: // Loongson 64bit, 4-issue 1467 return "la464"; 1468 // TODO: Others. 1469 default: 1470 break; 1471 } 1472 return "generic"; 1473 } 1474 #elif defined(__riscv) 1475 StringRef sys::getHostCPUName() { 1476 #if defined(__linux__) 1477 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent(); 1478 StringRef Content = P ? P->getBuffer() : ""; 1479 return detail::getHostCPUNameForRISCV(Content); 1480 #else 1481 #if __riscv_xlen == 64 1482 return "generic-rv64"; 1483 #elif __riscv_xlen == 32 1484 return "generic-rv32"; 1485 #else 1486 #error "Unhandled value of __riscv_xlen" 1487 #endif 1488 #endif 1489 } 1490 #elif defined(__sparc__) 1491 #if defined(__linux__) 1492 StringRef sys::detail::getHostCPUNameForSPARC(StringRef ProcCpuinfoContent) { 1493 SmallVector<StringRef> Lines; 1494 ProcCpuinfoContent.split(Lines, "\n"); 1495 1496 // Look for cpu line to determine cpu name 1497 StringRef Cpu; 1498 for (unsigned I = 0, E = Lines.size(); I != E; ++I) { 1499 if (Lines[I].startswith("cpu")) { 1500 Cpu = Lines[I].substr(5).ltrim("\t :"); 1501 break; 1502 } 1503 } 1504 1505 return StringSwitch<const char *>(Cpu) 1506 .StartsWith("SuperSparc", "supersparc") 1507 .StartsWith("HyperSparc", "hypersparc") 1508 .StartsWith("SpitFire", "ultrasparc") 1509 .StartsWith("BlackBird", "ultrasparc") 1510 .StartsWith("Sabre", " ultrasparc") 1511 .StartsWith("Hummingbird", "ultrasparc") 1512 .StartsWith("Cheetah", "ultrasparc3") 1513 .StartsWith("Jalapeno", "ultrasparc3") 1514 .StartsWith("Jaguar", "ultrasparc3") 1515 .StartsWith("Panther", "ultrasparc3") 1516 .StartsWith("Serrano", "ultrasparc3") 1517 .StartsWith("UltraSparc T1", "niagara") 1518 .StartsWith("UltraSparc T2", "niagara2") 1519 .StartsWith("UltraSparc T3", "niagara3") 1520 .StartsWith("UltraSparc T4", "niagara4") 1521 .StartsWith("UltraSparc T5", "niagara4") 1522 .StartsWith("LEON", "leon3") 1523 // niagara7/m8 not supported by LLVM yet. 1524 .StartsWith("SPARC-M7", "niagara4" /* "niagara7" */) 1525 .StartsWith("SPARC-S7", "niagara4" /* "niagara7" */) 1526 .StartsWith("SPARC-M8", "niagara4" /* "m8" */) 1527 .Default("generic"); 1528 } 1529 #endif 1530 1531 StringRef sys::getHostCPUName() { 1532 #if defined(__linux__) 1533 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent(); 1534 StringRef Content = P ? P->getBuffer() : ""; 1535 return detail::getHostCPUNameForSPARC(Content); 1536 #elif defined(__sun__) && defined(__svr4__) 1537 char *buf = NULL; 1538 kstat_ctl_t *kc; 1539 kstat_t *ksp; 1540 kstat_named_t *brand = NULL; 1541 1542 kc = kstat_open(); 1543 if (kc != NULL) { 1544 ksp = kstat_lookup(kc, const_cast<char *>("cpu_info"), -1, NULL); 1545 if (ksp != NULL && kstat_read(kc, ksp, NULL) != -1 && 1546 ksp->ks_type == KSTAT_TYPE_NAMED) 1547 brand = 1548 (kstat_named_t *)kstat_data_lookup(ksp, const_cast<char *>("brand")); 1549 if (brand != NULL && brand->data_type == KSTAT_DATA_STRING) 1550 buf = KSTAT_NAMED_STR_PTR(brand); 1551 } 1552 kstat_close(kc); 1553 1554 return StringSwitch<const char *>(buf) 1555 .Case("TMS390S10", "supersparc") // Texas Instruments microSPARC I 1556 .Case("TMS390Z50", "supersparc") // Texas Instruments SuperSPARC I 1557 .Case("TMS390Z55", 1558 "supersparc") // Texas Instruments SuperSPARC I with SuperCache 1559 .Case("MB86904", "supersparc") // Fujitsu microSPARC II 1560 .Case("MB86907", "supersparc") // Fujitsu TurboSPARC 1561 .Case("RT623", "hypersparc") // Ross hyperSPARC 1562 .Case("RT625", "hypersparc") 1563 .Case("RT626", "hypersparc") 1564 .Case("UltraSPARC-I", "ultrasparc") 1565 .Case("UltraSPARC-II", "ultrasparc") 1566 .Case("UltraSPARC-IIe", "ultrasparc") 1567 .Case("UltraSPARC-IIi", "ultrasparc") 1568 .Case("SPARC64-III", "ultrasparc") 1569 .Case("SPARC64-IV", "ultrasparc") 1570 .Case("UltraSPARC-III", "ultrasparc3") 1571 .Case("UltraSPARC-III+", "ultrasparc3") 1572 .Case("UltraSPARC-IIIi", "ultrasparc3") 1573 .Case("UltraSPARC-IIIi+", "ultrasparc3") 1574 .Case("UltraSPARC-IV", "ultrasparc3") 1575 .Case("UltraSPARC-IV+", "ultrasparc3") 1576 .Case("SPARC64-V", "ultrasparc3") 1577 .Case("SPARC64-VI", "ultrasparc3") 1578 .Case("SPARC64-VII", "ultrasparc3") 1579 .Case("UltraSPARC-T1", "niagara") 1580 .Case("UltraSPARC-T2", "niagara2") 1581 .Case("UltraSPARC-T2", "niagara2") 1582 .Case("UltraSPARC-T2+", "niagara2") 1583 .Case("SPARC-T3", "niagara3") 1584 .Case("SPARC-T4", "niagara4") 1585 .Case("SPARC-T5", "niagara4") 1586 // niagara7/m8 not supported by LLVM yet. 1587 .Case("SPARC-M7", "niagara4" /* "niagara7" */) 1588 .Case("SPARC-S7", "niagara4" /* "niagara7" */) 1589 .Case("SPARC-M8", "niagara4" /* "m8" */) 1590 .Default("generic"); 1591 #else 1592 return "generic"; 1593 #endif 1594 } 1595 #else 1596 StringRef sys::getHostCPUName() { return "generic"; } 1597 namespace llvm { 1598 namespace sys { 1599 namespace detail { 1600 namespace x86 { 1601 1602 VendorSignatures getVendorSignature(unsigned *MaxLeaf) { 1603 return VendorSignatures::UNKNOWN; 1604 } 1605 1606 } // namespace x86 1607 } // namespace detail 1608 } // namespace sys 1609 } // namespace llvm 1610 #endif 1611 1612 #if defined(__i386__) || defined(_M_IX86) || \ 1613 defined(__x86_64__) || defined(_M_X64) 1614 bool sys::getHostCPUFeatures(StringMap<bool> &Features) { 1615 unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; 1616 unsigned MaxLevel; 1617 1618 if (getX86CpuIDAndInfo(0, &MaxLevel, &EBX, &ECX, &EDX) || MaxLevel < 1) 1619 return false; 1620 1621 getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX); 1622 1623 Features["cx8"] = (EDX >> 8) & 1; 1624 Features["cmov"] = (EDX >> 15) & 1; 1625 Features["mmx"] = (EDX >> 23) & 1; 1626 Features["fxsr"] = (EDX >> 24) & 1; 1627 Features["sse"] = (EDX >> 25) & 1; 1628 Features["sse2"] = (EDX >> 26) & 1; 1629 1630 Features["sse3"] = (ECX >> 0) & 1; 1631 Features["pclmul"] = (ECX >> 1) & 1; 1632 Features["ssse3"] = (ECX >> 9) & 1; 1633 Features["cx16"] = (ECX >> 13) & 1; 1634 Features["sse4.1"] = (ECX >> 19) & 1; 1635 Features["sse4.2"] = (ECX >> 20) & 1; 1636 Features["crc32"] = Features["sse4.2"]; 1637 Features["movbe"] = (ECX >> 22) & 1; 1638 Features["popcnt"] = (ECX >> 23) & 1; 1639 Features["aes"] = (ECX >> 25) & 1; 1640 Features["rdrnd"] = (ECX >> 30) & 1; 1641 1642 // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV 1643 // indicates that the AVX registers will be saved and restored on context 1644 // switch, then we have full AVX support. 1645 bool HasXSave = ((ECX >> 27) & 1) && !getX86XCR0(&EAX, &EDX); 1646 bool HasAVXSave = HasXSave && ((ECX >> 28) & 1) && ((EAX & 0x6) == 0x6); 1647 #if defined(__APPLE__) 1648 // Darwin lazily saves the AVX512 context on first use: trust that the OS will 1649 // save the AVX512 context if we use AVX512 instructions, even the bit is not 1650 // set right now. 1651 bool HasAVX512Save = true; 1652 #else 1653 // AVX512 requires additional context to be saved by the OS. 1654 bool HasAVX512Save = HasAVXSave && ((EAX & 0xe0) == 0xe0); 1655 #endif 1656 // AMX requires additional context to be saved by the OS. 1657 const unsigned AMXBits = (1 << 17) | (1 << 18); 1658 bool HasAMXSave = HasXSave && ((EAX & AMXBits) == AMXBits); 1659 1660 Features["avx"] = HasAVXSave; 1661 Features["fma"] = ((ECX >> 12) & 1) && HasAVXSave; 1662 // Only enable XSAVE if OS has enabled support for saving YMM state. 1663 Features["xsave"] = ((ECX >> 26) & 1) && HasAVXSave; 1664 Features["f16c"] = ((ECX >> 29) & 1) && HasAVXSave; 1665 1666 unsigned MaxExtLevel; 1667 getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX); 1668 1669 bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 && 1670 !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); 1671 Features["sahf"] = HasExtLeaf1 && ((ECX >> 0) & 1); 1672 Features["lzcnt"] = HasExtLeaf1 && ((ECX >> 5) & 1); 1673 Features["sse4a"] = HasExtLeaf1 && ((ECX >> 6) & 1); 1674 Features["prfchw"] = HasExtLeaf1 && ((ECX >> 8) & 1); 1675 Features["xop"] = HasExtLeaf1 && ((ECX >> 11) & 1) && HasAVXSave; 1676 Features["lwp"] = HasExtLeaf1 && ((ECX >> 15) & 1); 1677 Features["fma4"] = HasExtLeaf1 && ((ECX >> 16) & 1) && HasAVXSave; 1678 Features["tbm"] = HasExtLeaf1 && ((ECX >> 21) & 1); 1679 Features["mwaitx"] = HasExtLeaf1 && ((ECX >> 29) & 1); 1680 1681 Features["64bit"] = HasExtLeaf1 && ((EDX >> 29) & 1); 1682 1683 // Miscellaneous memory related features, detected by 1684 // using the 0x80000008 leaf of the CPUID instruction 1685 bool HasExtLeaf8 = MaxExtLevel >= 0x80000008 && 1686 !getX86CpuIDAndInfo(0x80000008, &EAX, &EBX, &ECX, &EDX); 1687 Features["clzero"] = HasExtLeaf8 && ((EBX >> 0) & 1); 1688 Features["rdpru"] = HasExtLeaf8 && ((EBX >> 4) & 1); 1689 Features["wbnoinvd"] = HasExtLeaf8 && ((EBX >> 9) & 1); 1690 1691 bool HasLeaf7 = 1692 MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX); 1693 1694 Features["fsgsbase"] = HasLeaf7 && ((EBX >> 0) & 1); 1695 Features["sgx"] = HasLeaf7 && ((EBX >> 2) & 1); 1696 Features["bmi"] = HasLeaf7 && ((EBX >> 3) & 1); 1697 // AVX2 is only supported if we have the OS save support from AVX. 1698 Features["avx2"] = HasLeaf7 && ((EBX >> 5) & 1) && HasAVXSave; 1699 Features["bmi2"] = HasLeaf7 && ((EBX >> 8) & 1); 1700 Features["invpcid"] = HasLeaf7 && ((EBX >> 10) & 1); 1701 Features["rtm"] = HasLeaf7 && ((EBX >> 11) & 1); 1702 // AVX512 is only supported if the OS supports the context save for it. 1703 Features["avx512f"] = HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save; 1704 Features["avx512dq"] = HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save; 1705 Features["rdseed"] = HasLeaf7 && ((EBX >> 18) & 1); 1706 Features["adx"] = HasLeaf7 && ((EBX >> 19) & 1); 1707 Features["avx512ifma"] = HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save; 1708 Features["clflushopt"] = HasLeaf7 && ((EBX >> 23) & 1); 1709 Features["clwb"] = HasLeaf7 && ((EBX >> 24) & 1); 1710 Features["avx512pf"] = HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save; 1711 Features["avx512er"] = HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save; 1712 Features["avx512cd"] = HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save; 1713 Features["sha"] = HasLeaf7 && ((EBX >> 29) & 1); 1714 Features["avx512bw"] = HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save; 1715 Features["avx512vl"] = HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save; 1716 1717 Features["prefetchwt1"] = HasLeaf7 && ((ECX >> 0) & 1); 1718 Features["avx512vbmi"] = HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save; 1719 Features["pku"] = HasLeaf7 && ((ECX >> 4) & 1); 1720 Features["waitpkg"] = HasLeaf7 && ((ECX >> 5) & 1); 1721 Features["avx512vbmi2"] = HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save; 1722 Features["shstk"] = HasLeaf7 && ((ECX >> 7) & 1); 1723 Features["gfni"] = HasLeaf7 && ((ECX >> 8) & 1); 1724 Features["vaes"] = HasLeaf7 && ((ECX >> 9) & 1) && HasAVXSave; 1725 Features["vpclmulqdq"] = HasLeaf7 && ((ECX >> 10) & 1) && HasAVXSave; 1726 Features["avx512vnni"] = HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save; 1727 Features["avx512bitalg"] = HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save; 1728 Features["avx512vpopcntdq"] = HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save; 1729 Features["rdpid"] = HasLeaf7 && ((ECX >> 22) & 1); 1730 Features["kl"] = HasLeaf7 && ((ECX >> 23) & 1); // key locker 1731 Features["cldemote"] = HasLeaf7 && ((ECX >> 25) & 1); 1732 Features["movdiri"] = HasLeaf7 && ((ECX >> 27) & 1); 1733 Features["movdir64b"] = HasLeaf7 && ((ECX >> 28) & 1); 1734 Features["enqcmd"] = HasLeaf7 && ((ECX >> 29) & 1); 1735 1736 Features["uintr"] = HasLeaf7 && ((EDX >> 5) & 1); 1737 Features["avx512vp2intersect"] = 1738 HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save; 1739 Features["serialize"] = HasLeaf7 && ((EDX >> 14) & 1); 1740 Features["tsxldtrk"] = HasLeaf7 && ((EDX >> 16) & 1); 1741 // There are two CPUID leafs which information associated with the pconfig 1742 // instruction: 1743 // EAX=0x7, ECX=0x0 indicates the availability of the instruction (via the 18th 1744 // bit of EDX), while the EAX=0x1b leaf returns information on the 1745 // availability of specific pconfig leafs. 1746 // The target feature here only refers to the the first of these two. 1747 // Users might need to check for the availability of specific pconfig 1748 // leaves using cpuid, since that information is ignored while 1749 // detecting features using the "-march=native" flag. 1750 // For more info, see X86 ISA docs. 1751 Features["pconfig"] = HasLeaf7 && ((EDX >> 18) & 1); 1752 Features["amx-bf16"] = HasLeaf7 && ((EDX >> 22) & 1) && HasAMXSave; 1753 Features["avx512fp16"] = HasLeaf7 && ((EDX >> 23) & 1) && HasAVX512Save; 1754 Features["amx-tile"] = HasLeaf7 && ((EDX >> 24) & 1) && HasAMXSave; 1755 Features["amx-int8"] = HasLeaf7 && ((EDX >> 25) & 1) && HasAMXSave; 1756 // EAX from subleaf 0 is the maximum subleaf supported. Some CPUs don't 1757 // return all 0s for invalid subleaves so check the limit. 1758 bool HasLeaf7Subleaf1 = 1759 HasLeaf7 && EAX >= 1 && 1760 !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX); 1761 Features["sha512"] = HasLeaf7Subleaf1 && ((EAX >> 0) & 1); 1762 Features["sm3"] = HasLeaf7Subleaf1 && ((EAX >> 1) & 1); 1763 Features["sm4"] = HasLeaf7Subleaf1 && ((EAX >> 2) & 1); 1764 Features["raoint"] = HasLeaf7Subleaf1 && ((EAX >> 3) & 1); 1765 Features["avxvnni"] = HasLeaf7Subleaf1 && ((EAX >> 4) & 1) && HasAVXSave; 1766 Features["avx512bf16"] = HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save; 1767 Features["amx-fp16"] = HasLeaf7Subleaf1 && ((EAX >> 21) & 1) && HasAMXSave; 1768 Features["cmpccxadd"] = HasLeaf7Subleaf1 && ((EAX >> 7) & 1); 1769 Features["hreset"] = HasLeaf7Subleaf1 && ((EAX >> 22) & 1); 1770 Features["avxifma"] = HasLeaf7Subleaf1 && ((EAX >> 23) & 1) && HasAVXSave; 1771 Features["avxvnniint8"] = HasLeaf7Subleaf1 && ((EDX >> 4) & 1) && HasAVXSave; 1772 Features["avxneconvert"] = HasLeaf7Subleaf1 && ((EDX >> 5) & 1) && HasAVXSave; 1773 Features["amx-complex"] = HasLeaf7Subleaf1 && ((EDX >> 8) & 1) && HasAMXSave; 1774 Features["avxvnniint16"] = HasLeaf7Subleaf1 && ((EDX >> 10) & 1) && HasAVXSave; 1775 Features["prefetchi"] = HasLeaf7Subleaf1 && ((EDX >> 14) & 1); 1776 1777 bool HasLeafD = MaxLevel >= 0xd && 1778 !getX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX); 1779 1780 // Only enable XSAVE if OS has enabled support for saving YMM state. 1781 Features["xsaveopt"] = HasLeafD && ((EAX >> 0) & 1) && HasAVXSave; 1782 Features["xsavec"] = HasLeafD && ((EAX >> 1) & 1) && HasAVXSave; 1783 Features["xsaves"] = HasLeafD && ((EAX >> 3) & 1) && HasAVXSave; 1784 1785 bool HasLeaf14 = MaxLevel >= 0x14 && 1786 !getX86CpuIDAndInfoEx(0x14, 0x0, &EAX, &EBX, &ECX, &EDX); 1787 1788 Features["ptwrite"] = HasLeaf14 && ((EBX >> 4) & 1); 1789 1790 bool HasLeaf19 = 1791 MaxLevel >= 0x19 && !getX86CpuIDAndInfo(0x19, &EAX, &EBX, &ECX, &EDX); 1792 Features["widekl"] = HasLeaf7 && HasLeaf19 && ((EBX >> 2) & 1); 1793 1794 return true; 1795 } 1796 #elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__)) 1797 bool sys::getHostCPUFeatures(StringMap<bool> &Features) { 1798 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent(); 1799 if (!P) 1800 return false; 1801 1802 SmallVector<StringRef, 32> Lines; 1803 P->getBuffer().split(Lines, "\n"); 1804 1805 SmallVector<StringRef, 32> CPUFeatures; 1806 1807 // Look for the CPU features. 1808 for (unsigned I = 0, E = Lines.size(); I != E; ++I) 1809 if (Lines[I].startswith("Features")) { 1810 Lines[I].split(CPUFeatures, ' '); 1811 break; 1812 } 1813 1814 #if defined(__aarch64__) 1815 // Keep track of which crypto features we have seen 1816 enum { CAP_AES = 0x1, CAP_PMULL = 0x2, CAP_SHA1 = 0x4, CAP_SHA2 = 0x8 }; 1817 uint32_t crypto = 0; 1818 #endif 1819 1820 for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) { 1821 StringRef LLVMFeatureStr = StringSwitch<StringRef>(CPUFeatures[I]) 1822 #if defined(__aarch64__) 1823 .Case("asimd", "neon") 1824 .Case("fp", "fp-armv8") 1825 .Case("crc32", "crc") 1826 .Case("atomics", "lse") 1827 .Case("sve", "sve") 1828 .Case("sve2", "sve2") 1829 #else 1830 .Case("half", "fp16") 1831 .Case("neon", "neon") 1832 .Case("vfpv3", "vfp3") 1833 .Case("vfpv3d16", "vfp3d16") 1834 .Case("vfpv4", "vfp4") 1835 .Case("idiva", "hwdiv-arm") 1836 .Case("idivt", "hwdiv") 1837 #endif 1838 .Default(""); 1839 1840 #if defined(__aarch64__) 1841 // We need to check crypto separately since we need all of the crypto 1842 // extensions to enable the subtarget feature 1843 if (CPUFeatures[I] == "aes") 1844 crypto |= CAP_AES; 1845 else if (CPUFeatures[I] == "pmull") 1846 crypto |= CAP_PMULL; 1847 else if (CPUFeatures[I] == "sha1") 1848 crypto |= CAP_SHA1; 1849 else if (CPUFeatures[I] == "sha2") 1850 crypto |= CAP_SHA2; 1851 #endif 1852 1853 if (LLVMFeatureStr != "") 1854 Features[LLVMFeatureStr] = true; 1855 } 1856 1857 #if defined(__aarch64__) 1858 // If we have all crypto bits we can add the feature 1859 if (crypto == (CAP_AES | CAP_PMULL | CAP_SHA1 | CAP_SHA2)) 1860 Features["crypto"] = true; 1861 #endif 1862 1863 return true; 1864 } 1865 #elif defined(_WIN32) && (defined(__aarch64__) || defined(_M_ARM64)) 1866 bool sys::getHostCPUFeatures(StringMap<bool> &Features) { 1867 if (IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE)) 1868 Features["neon"] = true; 1869 if (IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE)) 1870 Features["crc"] = true; 1871 if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE)) 1872 Features["crypto"] = true; 1873 1874 return true; 1875 } 1876 #elif defined(__linux__) && defined(__loongarch__) 1877 #include <sys/auxv.h> 1878 bool sys::getHostCPUFeatures(StringMap<bool> &Features) { 1879 unsigned long hwcap = getauxval(AT_HWCAP); 1880 bool HasFPU = hwcap & (1UL << 3); // HWCAP_LOONGARCH_FPU 1881 uint32_t cpucfg2 = 0x2; 1882 __asm__("cpucfg %[cpucfg2], %[cpucfg2]\n\t" : [cpucfg2] "+r"(cpucfg2)); 1883 1884 Features["f"] = HasFPU && (cpucfg2 & (1U << 1)); // CPUCFG.2.FP_SP 1885 Features["d"] = HasFPU && (cpucfg2 & (1U << 2)); // CPUCFG.2.FP_DP 1886 1887 Features["lsx"] = hwcap & (1UL << 4); // HWCAP_LOONGARCH_LSX 1888 Features["lasx"] = hwcap & (1UL << 5); // HWCAP_LOONGARCH_LASX 1889 Features["lvz"] = hwcap & (1UL << 9); // HWCAP_LOONGARCH_LVZ 1890 1891 return true; 1892 } 1893 #else 1894 bool sys::getHostCPUFeatures(StringMap<bool> &Features) { return false; } 1895 #endif 1896 1897 #if __APPLE__ 1898 /// \returns the \p triple, but with the Host's arch spliced in. 1899 static Triple withHostArch(Triple T) { 1900 #if defined(__arm__) 1901 T.setArch(Triple::arm); 1902 T.setArchName("arm"); 1903 #elif defined(__arm64e__) 1904 T.setArch(Triple::aarch64, Triple::AArch64SubArch_arm64e); 1905 T.setArchName("arm64e"); 1906 #elif defined(__aarch64__) 1907 T.setArch(Triple::aarch64); 1908 T.setArchName("arm64"); 1909 #elif defined(__x86_64h__) 1910 T.setArch(Triple::x86_64); 1911 T.setArchName("x86_64h"); 1912 #elif defined(__x86_64__) 1913 T.setArch(Triple::x86_64); 1914 T.setArchName("x86_64"); 1915 #elif defined(__powerpc__) 1916 T.setArch(Triple::ppc); 1917 T.setArchName("powerpc"); 1918 #else 1919 # error "Unimplemented host arch fixup" 1920 #endif 1921 return T; 1922 } 1923 #endif 1924 1925 std::string sys::getProcessTriple() { 1926 std::string TargetTripleString = updateTripleOSVersion(LLVM_HOST_TRIPLE); 1927 Triple PT(Triple::normalize(TargetTripleString)); 1928 1929 #if __APPLE__ 1930 /// In Universal builds, LLVM_HOST_TRIPLE will have the wrong arch in one of 1931 /// the slices. This fixes that up. 1932 PT = withHostArch(PT); 1933 #endif 1934 1935 if (sizeof(void *) == 8 && PT.isArch32Bit()) 1936 PT = PT.get64BitArchVariant(); 1937 if (sizeof(void *) == 4 && PT.isArch64Bit()) 1938 PT = PT.get32BitArchVariant(); 1939 1940 return PT.str(); 1941 } 1942 1943 void sys::printDefaultTargetAndDetectedCPU(raw_ostream &OS) { 1944 #if LLVM_VERSION_PRINTER_SHOW_HOST_TARGET_INFO 1945 std::string CPU = std::string(sys::getHostCPUName()); 1946 if (CPU == "generic") 1947 CPU = "(unknown)"; 1948 OS << " Default target: " << sys::getDefaultTargetTriple() << '\n' 1949 << " Host CPU: " << CPU << '\n'; 1950 #endif 1951 } 1952