1 //===-- Host.cpp - Implement OS Host Detection ------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the operating system Host detection. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/TargetParser/Host.h" 14 #include "llvm/ADT/SmallVector.h" 15 #include "llvm/ADT/StringMap.h" 16 #include "llvm/ADT/StringRef.h" 17 #include "llvm/ADT/StringSwitch.h" 18 #include "llvm/Config/llvm-config.h" 19 #include "llvm/Support/MemoryBuffer.h" 20 #include "llvm/Support/raw_ostream.h" 21 #include "llvm/TargetParser/RISCVTargetParser.h" 22 #include "llvm/TargetParser/Triple.h" 23 #include "llvm/TargetParser/X86TargetParser.h" 24 #include <string.h> 25 26 // Include the platform-specific parts of this class. 27 #ifdef LLVM_ON_UNIX 28 #include "Unix/Host.inc" 29 #include <sched.h> 30 #endif 31 #ifdef _WIN32 32 #include "Windows/Host.inc" 33 #endif 34 #ifdef _MSC_VER 35 #include <intrin.h> 36 #endif 37 #ifdef __MVS__ 38 #include "llvm/Support/BCD.h" 39 #endif 40 #if defined(__APPLE__) 41 #include <mach/host_info.h> 42 #include <mach/mach.h> 43 #include <mach/mach_host.h> 44 #include <mach/machine.h> 45 #include <sys/param.h> 46 #include <sys/sysctl.h> 47 #endif 48 #ifdef _AIX 49 #include <sys/systemcfg.h> 50 #endif 51 #if defined(__sun__) && defined(__svr4__) 52 #include <kstat.h> 53 #endif 54 #if defined(__GNUC__) || defined(__clang__) 55 #if (defined(__i386__) || defined(__x86_64__)) && !defined(_MSC_VER) 56 #include <cpuid.h> 57 #endif 58 #endif 59 60 #define DEBUG_TYPE "host-detection" 61 62 //===----------------------------------------------------------------------===// 63 // 64 // Implementations of the CPU detection routines 65 // 66 //===----------------------------------------------------------------------===// 67 68 using namespace llvm; 69 70 static std::unique_ptr<llvm::MemoryBuffer> 71 LLVM_ATTRIBUTE_UNUSED getProcCpuinfoContent() { 72 const char *CPUInfoFile = "/proc/cpuinfo"; 73 if (const char *CpuinfoIntercept = std::getenv("LLVM_CPUINFO")) 74 CPUInfoFile = CpuinfoIntercept; 75 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text = 76 llvm::MemoryBuffer::getFileAsStream(CPUInfoFile); 77 78 if (std::error_code EC = Text.getError()) { 79 llvm::errs() << "Can't read " << CPUInfoFile << ": " << EC.message() 80 << "\n"; 81 return nullptr; 82 } 83 return std::move(*Text); 84 } 85 86 StringRef sys::detail::getHostCPUNameForPowerPC(StringRef ProcCpuinfoContent) { 87 // Access to the Processor Version Register (PVR) on PowerPC is privileged, 88 // and so we must use an operating-system interface to determine the current 89 // processor type. On Linux, this is exposed through the /proc/cpuinfo file. 90 const char *generic = "generic"; 91 92 // The cpu line is second (after the 'processor: 0' line), so if this 93 // buffer is too small then something has changed (or is wrong). 94 StringRef::const_iterator CPUInfoStart = ProcCpuinfoContent.begin(); 95 StringRef::const_iterator CPUInfoEnd = ProcCpuinfoContent.end(); 96 97 StringRef::const_iterator CIP = CPUInfoStart; 98 99 StringRef::const_iterator CPUStart = nullptr; 100 size_t CPULen = 0; 101 102 // We need to find the first line which starts with cpu, spaces, and a colon. 103 // After the colon, there may be some additional spaces and then the cpu type. 104 while (CIP < CPUInfoEnd && CPUStart == nullptr) { 105 if (CIP < CPUInfoEnd && *CIP == '\n') 106 ++CIP; 107 108 if (CIP < CPUInfoEnd && *CIP == 'c') { 109 ++CIP; 110 if (CIP < CPUInfoEnd && *CIP == 'p') { 111 ++CIP; 112 if (CIP < CPUInfoEnd && *CIP == 'u') { 113 ++CIP; 114 while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t')) 115 ++CIP; 116 117 if (CIP < CPUInfoEnd && *CIP == ':') { 118 ++CIP; 119 while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t')) 120 ++CIP; 121 122 if (CIP < CPUInfoEnd) { 123 CPUStart = CIP; 124 while (CIP < CPUInfoEnd && (*CIP != ' ' && *CIP != '\t' && 125 *CIP != ',' && *CIP != '\n')) 126 ++CIP; 127 CPULen = CIP - CPUStart; 128 } 129 } 130 } 131 } 132 } 133 134 if (CPUStart == nullptr) 135 while (CIP < CPUInfoEnd && *CIP != '\n') 136 ++CIP; 137 } 138 139 if (CPUStart == nullptr) 140 return generic; 141 142 return StringSwitch<const char *>(StringRef(CPUStart, CPULen)) 143 .Case("604e", "604e") 144 .Case("604", "604") 145 .Case("7400", "7400") 146 .Case("7410", "7400") 147 .Case("7447", "7400") 148 .Case("7455", "7450") 149 .Case("G4", "g4") 150 .Case("POWER4", "970") 151 .Case("PPC970FX", "970") 152 .Case("PPC970MP", "970") 153 .Case("G5", "g5") 154 .Case("POWER5", "g5") 155 .Case("A2", "a2") 156 .Case("POWER6", "pwr6") 157 .Case("POWER7", "pwr7") 158 .Case("POWER8", "pwr8") 159 .Case("POWER8E", "pwr8") 160 .Case("POWER8NVL", "pwr8") 161 .Case("POWER9", "pwr9") 162 .Case("POWER10", "pwr10") 163 .Case("POWER11", "pwr11") 164 // FIXME: If we get a simulator or machine with the capabilities of 165 // mcpu=future, we should revisit this and add the name reported by the 166 // simulator/machine. 167 .Default(generic); 168 } 169 170 StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) { 171 // The cpuid register on arm is not accessible from user space. On Linux, 172 // it is exposed through the /proc/cpuinfo file. 173 174 // Read 32 lines from /proc/cpuinfo, which should contain the CPU part line 175 // in all cases. 176 SmallVector<StringRef, 32> Lines; 177 ProcCpuinfoContent.split(Lines, '\n'); 178 179 // Look for the CPU implementer and hardware lines, and store the CPU part 180 // numbers found. 181 StringRef Implementer; 182 StringRef Hardware; 183 SmallVector<StringRef, 32> Parts; 184 for (StringRef Line : Lines) { 185 if (Line.consume_front("CPU implementer")) 186 Implementer = Line.ltrim("\t :"); 187 else if (Line.consume_front("Hardware")) 188 Hardware = Line.ltrim("\t :"); 189 else if (Line.consume_front("CPU part")) 190 Parts.emplace_back(Line.ltrim("\t :")); 191 } 192 193 // Last `Part' seen, in case we don't analyse all `Parts' parsed. 194 StringRef Part = Parts.empty() ? StringRef() : Parts.back(); 195 196 // Remove duplicate `Parts'. 197 llvm::sort(Parts); 198 Parts.erase(llvm::unique(Parts), Parts.end()); 199 200 auto MatchBigLittle = [](auto const &Parts, StringRef Big, StringRef Little) { 201 if (Parts.size() == 2) 202 return (Parts[0] == Big && Parts[1] == Little) || 203 (Parts[1] == Big && Parts[0] == Little); 204 return false; 205 }; 206 207 if (Implementer == "0x41") { // ARM Ltd. 208 // MSM8992/8994 may give cpu part for the core that the kernel is running on, 209 // which is undeterministic and wrong. Always return cortex-a53 for these SoC. 210 if (Hardware.ends_with("MSM8994") || Hardware.ends_with("MSM8996")) 211 return "cortex-a53"; 212 213 // Detect big.LITTLE systems. 214 if (MatchBigLittle(Parts, "0xd85", "0xd87")) 215 return "cortex-x925"; 216 217 // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The 218 // values correspond to the "Part number" in the CP15/c0 register. The 219 // contents are specified in the various processor manuals. 220 // This corresponds to the Main ID Register in Technical Reference Manuals. 221 // and is used in programs like sys-utils 222 return StringSwitch<const char *>(Part) 223 .Case("0x926", "arm926ej-s") 224 .Case("0xb02", "mpcore") 225 .Case("0xb36", "arm1136j-s") 226 .Case("0xb56", "arm1156t2-s") 227 .Case("0xb76", "arm1176jz-s") 228 .Case("0xc05", "cortex-a5") 229 .Case("0xc07", "cortex-a7") 230 .Case("0xc08", "cortex-a8") 231 .Case("0xc09", "cortex-a9") 232 .Case("0xc0f", "cortex-a15") 233 .Case("0xc0e", "cortex-a17") 234 .Case("0xc20", "cortex-m0") 235 .Case("0xc23", "cortex-m3") 236 .Case("0xc24", "cortex-m4") 237 .Case("0xc27", "cortex-m7") 238 .Case("0xd20", "cortex-m23") 239 .Case("0xd21", "cortex-m33") 240 .Case("0xd24", "cortex-m52") 241 .Case("0xd22", "cortex-m55") 242 .Case("0xd23", "cortex-m85") 243 .Case("0xc18", "cortex-r8") 244 .Case("0xd13", "cortex-r52") 245 .Case("0xd16", "cortex-r52plus") 246 .Case("0xd15", "cortex-r82") 247 .Case("0xd14", "cortex-r82ae") 248 .Case("0xd02", "cortex-a34") 249 .Case("0xd04", "cortex-a35") 250 .Case("0xd8f", "cortex-a320") 251 .Case("0xd03", "cortex-a53") 252 .Case("0xd05", "cortex-a55") 253 .Case("0xd46", "cortex-a510") 254 .Case("0xd80", "cortex-a520") 255 .Case("0xd88", "cortex-a520ae") 256 .Case("0xd07", "cortex-a57") 257 .Case("0xd06", "cortex-a65") 258 .Case("0xd43", "cortex-a65ae") 259 .Case("0xd08", "cortex-a72") 260 .Case("0xd09", "cortex-a73") 261 .Case("0xd0a", "cortex-a75") 262 .Case("0xd0b", "cortex-a76") 263 .Case("0xd0e", "cortex-a76ae") 264 .Case("0xd0d", "cortex-a77") 265 .Case("0xd41", "cortex-a78") 266 .Case("0xd42", "cortex-a78ae") 267 .Case("0xd4b", "cortex-a78c") 268 .Case("0xd47", "cortex-a710") 269 .Case("0xd4d", "cortex-a715") 270 .Case("0xd81", "cortex-a720") 271 .Case("0xd89", "cortex-a720ae") 272 .Case("0xd87", "cortex-a725") 273 .Case("0xd44", "cortex-x1") 274 .Case("0xd4c", "cortex-x1c") 275 .Case("0xd48", "cortex-x2") 276 .Case("0xd4e", "cortex-x3") 277 .Case("0xd82", "cortex-x4") 278 .Case("0xd85", "cortex-x925") 279 .Case("0xd4a", "neoverse-e1") 280 .Case("0xd0c", "neoverse-n1") 281 .Case("0xd49", "neoverse-n2") 282 .Case("0xd8e", "neoverse-n3") 283 .Case("0xd40", "neoverse-v1") 284 .Case("0xd4f", "neoverse-v2") 285 .Case("0xd84", "neoverse-v3") 286 .Case("0xd83", "neoverse-v3ae") 287 .Default("generic"); 288 } 289 290 if (Implementer == "0x42" || Implementer == "0x43") { // Broadcom | Cavium. 291 return StringSwitch<const char *>(Part) 292 .Case("0x516", "thunderx2t99") 293 .Case("0x0516", "thunderx2t99") 294 .Case("0xaf", "thunderx2t99") 295 .Case("0x0af", "thunderx2t99") 296 .Case("0xa1", "thunderxt88") 297 .Case("0x0a1", "thunderxt88") 298 .Default("generic"); 299 } 300 301 if (Implementer == "0x46") { // Fujitsu Ltd. 302 return StringSwitch<const char *>(Part) 303 .Case("0x001", "a64fx") 304 .Case("0x003", "fujitsu-monaka") 305 .Default("generic"); 306 } 307 308 if (Implementer == "0x4e") { // NVIDIA Corporation 309 return StringSwitch<const char *>(Part) 310 .Case("0x004", "carmel") 311 .Case("0x10", "olympus") 312 .Case("0x010", "olympus") 313 .Default("generic"); 314 } 315 316 if (Implementer == "0x48") // HiSilicon Technologies, Inc. 317 // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The 318 // values correspond to the "Part number" in the CP15/c0 register. The 319 // contents are specified in the various processor manuals. 320 return StringSwitch<const char *>(Part) 321 .Case("0xd01", "tsv110") 322 .Default("generic"); 323 324 if (Implementer == "0x51") // Qualcomm Technologies, Inc. 325 // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The 326 // values correspond to the "Part number" in the CP15/c0 register. The 327 // contents are specified in the various processor manuals. 328 return StringSwitch<const char *>(Part) 329 .Case("0x06f", "krait") // APQ8064 330 .Case("0x201", "kryo") 331 .Case("0x205", "kryo") 332 .Case("0x211", "kryo") 333 .Case("0x800", "cortex-a73") // Kryo 2xx Gold 334 .Case("0x801", "cortex-a73") // Kryo 2xx Silver 335 .Case("0x802", "cortex-a75") // Kryo 3xx Gold 336 .Case("0x803", "cortex-a75") // Kryo 3xx Silver 337 .Case("0x804", "cortex-a76") // Kryo 4xx Gold 338 .Case("0x805", "cortex-a76") // Kryo 4xx/5xx Silver 339 .Case("0xc00", "falkor") 340 .Case("0xc01", "saphira") 341 .Case("0x001", "oryon-1") 342 .Default("generic"); 343 if (Implementer == "0x53") { // Samsung Electronics Co., Ltd. 344 // The Exynos chips have a convoluted ID scheme that doesn't seem to follow 345 // any predictive pattern across variants and parts. 346 unsigned Variant = 0, Part = 0; 347 348 // Look for the CPU variant line, whose value is a 1 digit hexadecimal 349 // number, corresponding to the Variant bits in the CP15/C0 register. 350 for (auto I : Lines) 351 if (I.consume_front("CPU variant")) 352 I.ltrim("\t :").getAsInteger(0, Variant); 353 354 // Look for the CPU part line, whose value is a 3 digit hexadecimal 355 // number, corresponding to the PartNum bits in the CP15/C0 register. 356 for (auto I : Lines) 357 if (I.consume_front("CPU part")) 358 I.ltrim("\t :").getAsInteger(0, Part); 359 360 unsigned Exynos = (Variant << 12) | Part; 361 switch (Exynos) { 362 default: 363 // Default by falling through to Exynos M3. 364 [[fallthrough]]; 365 case 0x1002: 366 return "exynos-m3"; 367 case 0x1003: 368 return "exynos-m4"; 369 } 370 } 371 372 if (Implementer == "0x61") { // Apple 373 return StringSwitch<const char *>(Part) 374 .Case("0x020", "apple-m1") 375 .Case("0x021", "apple-m1") 376 .Case("0x022", "apple-m1") 377 .Case("0x023", "apple-m1") 378 .Case("0x024", "apple-m1") 379 .Case("0x025", "apple-m1") 380 .Case("0x028", "apple-m1") 381 .Case("0x029", "apple-m1") 382 .Case("0x030", "apple-m2") 383 .Case("0x031", "apple-m2") 384 .Case("0x032", "apple-m2") 385 .Case("0x033", "apple-m2") 386 .Case("0x034", "apple-m2") 387 .Case("0x035", "apple-m2") 388 .Case("0x038", "apple-m2") 389 .Case("0x039", "apple-m2") 390 .Case("0x049", "apple-m3") 391 .Case("0x048", "apple-m3") 392 .Default("generic"); 393 } 394 395 if (Implementer == "0x63") { // Arm China. 396 return StringSwitch<const char *>(Part) 397 .Case("0x132", "star-mc1") 398 .Default("generic"); 399 } 400 401 if (Implementer == "0x6d") { // Microsoft Corporation. 402 // The Microsoft Azure Cobalt 100 CPU is handled as a Neoverse N2. 403 return StringSwitch<const char *>(Part) 404 .Case("0xd49", "neoverse-n2") 405 .Default("generic"); 406 } 407 408 if (Implementer == "0xc0") { // Ampere Computing 409 return StringSwitch<const char *>(Part) 410 .Case("0xac3", "ampere1") 411 .Case("0xac4", "ampere1a") 412 .Case("0xac5", "ampere1b") 413 .Default("generic"); 414 } 415 416 return "generic"; 417 } 418 419 namespace { 420 StringRef getCPUNameFromS390Model(unsigned int Id, bool HaveVectorSupport) { 421 switch (Id) { 422 case 2064: // z900 not supported by LLVM 423 case 2066: 424 case 2084: // z990 not supported by LLVM 425 case 2086: 426 case 2094: // z9-109 not supported by LLVM 427 case 2096: 428 return "generic"; 429 case 2097: 430 case 2098: 431 return "z10"; 432 case 2817: 433 case 2818: 434 return "z196"; 435 case 2827: 436 case 2828: 437 return "zEC12"; 438 case 2964: 439 case 2965: 440 return HaveVectorSupport? "z13" : "zEC12"; 441 case 3906: 442 case 3907: 443 return HaveVectorSupport? "z14" : "zEC12"; 444 case 8561: 445 case 8562: 446 return HaveVectorSupport? "z15" : "zEC12"; 447 case 3931: 448 case 3932: 449 return HaveVectorSupport? "z16" : "zEC12"; 450 case 9175: 451 case 9176: 452 default: 453 return HaveVectorSupport? "z17" : "zEC12"; 454 } 455 } 456 } // end anonymous namespace 457 458 StringRef sys::detail::getHostCPUNameForS390x(StringRef ProcCpuinfoContent) { 459 // STIDP is a privileged operation, so use /proc/cpuinfo instead. 460 461 // The "processor 0:" line comes after a fair amount of other information, 462 // including a cache breakdown, but this should be plenty. 463 SmallVector<StringRef, 32> Lines; 464 ProcCpuinfoContent.split(Lines, '\n'); 465 466 // Look for the CPU features. 467 SmallVector<StringRef, 32> CPUFeatures; 468 for (unsigned I = 0, E = Lines.size(); I != E; ++I) 469 if (Lines[I].starts_with("features")) { 470 size_t Pos = Lines[I].find(':'); 471 if (Pos != StringRef::npos) { 472 Lines[I].drop_front(Pos + 1).split(CPUFeatures, ' '); 473 break; 474 } 475 } 476 477 // We need to check for the presence of vector support independently of 478 // the machine type, since we may only use the vector register set when 479 // supported by the kernel (and hypervisor). 480 bool HaveVectorSupport = false; 481 for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) { 482 if (CPUFeatures[I] == "vx") 483 HaveVectorSupport = true; 484 } 485 486 // Now check the processor machine type. 487 for (unsigned I = 0, E = Lines.size(); I != E; ++I) { 488 if (Lines[I].starts_with("processor ")) { 489 size_t Pos = Lines[I].find("machine = "); 490 if (Pos != StringRef::npos) { 491 Pos += sizeof("machine = ") - 1; 492 unsigned int Id; 493 if (!Lines[I].drop_front(Pos).getAsInteger(10, Id)) 494 return getCPUNameFromS390Model(Id, HaveVectorSupport); 495 } 496 break; 497 } 498 } 499 500 return "generic"; 501 } 502 503 StringRef sys::detail::getHostCPUNameForRISCV(StringRef ProcCpuinfoContent) { 504 // There are 24 lines in /proc/cpuinfo 505 SmallVector<StringRef> Lines; 506 ProcCpuinfoContent.split(Lines, '\n'); 507 508 // Look for uarch line to determine cpu name 509 StringRef UArch; 510 for (unsigned I = 0, E = Lines.size(); I != E; ++I) { 511 if (Lines[I].starts_with("uarch")) { 512 UArch = Lines[I].substr(5).ltrim("\t :"); 513 break; 514 } 515 } 516 517 return StringSwitch<const char *>(UArch) 518 .Case("eswin,eic770x", "sifive-p550") 519 .Case("sifive,u74-mc", "sifive-u74") 520 .Case("sifive,bullet0", "sifive-u74") 521 .Default(""); 522 } 523 524 StringRef sys::detail::getHostCPUNameForBPF() { 525 #if !defined(__linux__) || !defined(__x86_64__) 526 return "generic"; 527 #else 528 uint8_t v3_insns[40] __attribute__ ((aligned (8))) = 529 /* BPF_MOV64_IMM(BPF_REG_0, 0) */ 530 { 0xb7, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 531 /* BPF_MOV64_IMM(BPF_REG_2, 1) */ 532 0xb7, 0x2, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 533 /* BPF_JMP32_REG(BPF_JLT, BPF_REG_0, BPF_REG_2, 1) */ 534 0xae, 0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 535 /* BPF_MOV64_IMM(BPF_REG_0, 1) */ 536 0xb7, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 537 /* BPF_EXIT_INSN() */ 538 0x95, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; 539 540 uint8_t v2_insns[40] __attribute__ ((aligned (8))) = 541 /* BPF_MOV64_IMM(BPF_REG_0, 0) */ 542 { 0xb7, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 543 /* BPF_MOV64_IMM(BPF_REG_2, 1) */ 544 0xb7, 0x2, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 545 /* BPF_JMP_REG(BPF_JLT, BPF_REG_0, BPF_REG_2, 1) */ 546 0xad, 0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 547 /* BPF_MOV64_IMM(BPF_REG_0, 1) */ 548 0xb7, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 549 /* BPF_EXIT_INSN() */ 550 0x95, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; 551 552 struct bpf_prog_load_attr { 553 uint32_t prog_type; 554 uint32_t insn_cnt; 555 uint64_t insns; 556 uint64_t license; 557 uint32_t log_level; 558 uint32_t log_size; 559 uint64_t log_buf; 560 uint32_t kern_version; 561 uint32_t prog_flags; 562 } attr = {}; 563 attr.prog_type = 1; /* BPF_PROG_TYPE_SOCKET_FILTER */ 564 attr.insn_cnt = 5; 565 attr.insns = (uint64_t)v3_insns; 566 attr.license = (uint64_t)"DUMMY"; 567 568 int fd = syscall(321 /* __NR_bpf */, 5 /* BPF_PROG_LOAD */, &attr, 569 sizeof(attr)); 570 if (fd >= 0) { 571 close(fd); 572 return "v3"; 573 } 574 575 /* Clear the whole attr in case its content changed by syscall. */ 576 memset(&attr, 0, sizeof(attr)); 577 attr.prog_type = 1; /* BPF_PROG_TYPE_SOCKET_FILTER */ 578 attr.insn_cnt = 5; 579 attr.insns = (uint64_t)v2_insns; 580 attr.license = (uint64_t)"DUMMY"; 581 fd = syscall(321 /* __NR_bpf */, 5 /* BPF_PROG_LOAD */, &attr, sizeof(attr)); 582 if (fd >= 0) { 583 close(fd); 584 return "v2"; 585 } 586 return "v1"; 587 #endif 588 } 589 590 #if defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || \ 591 defined(_M_X64) 592 593 /// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in 594 /// the specified arguments. If we can't run cpuid on the host, return true. 595 static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX, 596 unsigned *rECX, unsigned *rEDX) { 597 #if (defined(__i386__) || defined(__x86_64__)) && !defined(_MSC_VER) 598 return !__get_cpuid(value, rEAX, rEBX, rECX, rEDX); 599 #elif defined(_MSC_VER) 600 // The MSVC intrinsic is portable across x86 and x64. 601 int registers[4]; 602 __cpuid(registers, value); 603 *rEAX = registers[0]; 604 *rEBX = registers[1]; 605 *rECX = registers[2]; 606 *rEDX = registers[3]; 607 return false; 608 #else 609 return true; 610 #endif 611 } 612 613 namespace llvm { 614 namespace sys { 615 namespace detail { 616 namespace x86 { 617 618 VendorSignatures getVendorSignature(unsigned *MaxLeaf) { 619 unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; 620 if (MaxLeaf == nullptr) 621 MaxLeaf = &EAX; 622 else 623 *MaxLeaf = 0; 624 625 if (getX86CpuIDAndInfo(0, MaxLeaf, &EBX, &ECX, &EDX) || *MaxLeaf < 1) 626 return VendorSignatures::UNKNOWN; 627 628 // "Genu ineI ntel" 629 if (EBX == 0x756e6547 && EDX == 0x49656e69 && ECX == 0x6c65746e) 630 return VendorSignatures::GENUINE_INTEL; 631 632 // "Auth enti cAMD" 633 if (EBX == 0x68747541 && EDX == 0x69746e65 && ECX == 0x444d4163) 634 return VendorSignatures::AUTHENTIC_AMD; 635 636 return VendorSignatures::UNKNOWN; 637 } 638 639 } // namespace x86 640 } // namespace detail 641 } // namespace sys 642 } // namespace llvm 643 644 using namespace llvm::sys::detail::x86; 645 646 /// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return 647 /// the 4 values in the specified arguments. If we can't run cpuid on the host, 648 /// return true. 649 static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf, 650 unsigned *rEAX, unsigned *rEBX, unsigned *rECX, 651 unsigned *rEDX) { 652 // TODO(boomanaiden154): When the minimum toolchain versions for gcc and clang 653 // are such that __cpuidex is defined within cpuid.h for both, we can remove 654 // the __get_cpuid_count function and share the MSVC implementation between 655 // all three. 656 #if (defined(__i386__) || defined(__x86_64__)) && !defined(_MSC_VER) 657 return !__get_cpuid_count(value, subleaf, rEAX, rEBX, rECX, rEDX); 658 #elif defined(_MSC_VER) 659 int registers[4]; 660 __cpuidex(registers, value, subleaf); 661 *rEAX = registers[0]; 662 *rEBX = registers[1]; 663 *rECX = registers[2]; 664 *rEDX = registers[3]; 665 return false; 666 #else 667 return true; 668 #endif 669 } 670 671 // Read control register 0 (XCR0). Used to detect features such as AVX. 672 static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) { 673 // TODO(boomanaiden154): When the minimum toolchain versions for gcc and clang 674 // are such that _xgetbv is supported by both, we can unify the implementation 675 // with MSVC and remove all inline assembly. 676 #if defined(__GNUC__) || defined(__clang__) 677 // Check xgetbv; this uses a .byte sequence instead of the instruction 678 // directly because older assemblers do not include support for xgetbv and 679 // there is no easy way to conditionally compile based on the assembler used. 680 __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0)); 681 return false; 682 #elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK) 683 unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK); 684 *rEAX = Result; 685 *rEDX = Result >> 32; 686 return false; 687 #else 688 return true; 689 #endif 690 } 691 692 static void detectX86FamilyModel(unsigned EAX, unsigned *Family, 693 unsigned *Model) { 694 *Family = (EAX >> 8) & 0xf; // Bits 8 - 11 695 *Model = (EAX >> 4) & 0xf; // Bits 4 - 7 696 if (*Family == 6 || *Family == 0xf) { 697 if (*Family == 0xf) 698 // Examine extended family ID if family ID is F. 699 *Family += (EAX >> 20) & 0xff; // Bits 20 - 27 700 // Examine extended model ID if family ID is 6 or F. 701 *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19 702 } 703 } 704 705 #define testFeature(F) (Features[F / 32] & (1 << (F % 32))) != 0 706 707 static StringRef getIntelProcessorTypeAndSubtype(unsigned Family, 708 unsigned Model, 709 const unsigned *Features, 710 unsigned *Type, 711 unsigned *Subtype) { 712 StringRef CPU; 713 714 switch (Family) { 715 case 3: 716 CPU = "i386"; 717 break; 718 case 4: 719 CPU = "i486"; 720 break; 721 case 5: 722 if (testFeature(X86::FEATURE_MMX)) { 723 CPU = "pentium-mmx"; 724 break; 725 } 726 CPU = "pentium"; 727 break; 728 case 6: 729 switch (Model) { 730 case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile 731 // processor, Intel Core 2 Quad processor, Intel Core 2 Quad 732 // mobile processor, Intel Core 2 Extreme processor, Intel 733 // Pentium Dual-Core processor, Intel Xeon processor, model 734 // 0Fh. All processors are manufactured using the 65 nm process. 735 case 0x16: // Intel Celeron processor model 16h. All processors are 736 // manufactured using the 65 nm process 737 CPU = "core2"; 738 *Type = X86::INTEL_CORE2; 739 break; 740 case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model 741 // 17h. All processors are manufactured using the 45 nm process. 742 // 743 // 45nm: Penryn , Wolfdale, Yorkfield (XE) 744 case 0x1d: // Intel Xeon processor MP. All processors are manufactured using 745 // the 45 nm process. 746 CPU = "penryn"; 747 *Type = X86::INTEL_CORE2; 748 break; 749 case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All 750 // processors are manufactured using the 45 nm process. 751 case 0x1e: // Intel(R) Core(TM) i7 CPU 870 @ 2.93GHz. 752 // As found in a Summer 2010 model iMac. 753 case 0x1f: 754 case 0x2e: // Nehalem EX 755 CPU = "nehalem"; 756 *Type = X86::INTEL_COREI7; 757 *Subtype = X86::INTEL_COREI7_NEHALEM; 758 break; 759 case 0x25: // Intel Core i7, laptop version. 760 case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All 761 // processors are manufactured using the 32 nm process. 762 case 0x2f: // Westmere EX 763 CPU = "westmere"; 764 *Type = X86::INTEL_COREI7; 765 *Subtype = X86::INTEL_COREI7_WESTMERE; 766 break; 767 case 0x2a: // Intel Core i7 processor. All processors are manufactured 768 // using the 32 nm process. 769 case 0x2d: 770 CPU = "sandybridge"; 771 *Type = X86::INTEL_COREI7; 772 *Subtype = X86::INTEL_COREI7_SANDYBRIDGE; 773 break; 774 case 0x3a: 775 case 0x3e: // Ivy Bridge EP 776 CPU = "ivybridge"; 777 *Type = X86::INTEL_COREI7; 778 *Subtype = X86::INTEL_COREI7_IVYBRIDGE; 779 break; 780 781 // Haswell: 782 case 0x3c: 783 case 0x3f: 784 case 0x45: 785 case 0x46: 786 CPU = "haswell"; 787 *Type = X86::INTEL_COREI7; 788 *Subtype = X86::INTEL_COREI7_HASWELL; 789 break; 790 791 // Broadwell: 792 case 0x3d: 793 case 0x47: 794 case 0x4f: 795 case 0x56: 796 CPU = "broadwell"; 797 *Type = X86::INTEL_COREI7; 798 *Subtype = X86::INTEL_COREI7_BROADWELL; 799 break; 800 801 // Skylake: 802 case 0x4e: // Skylake mobile 803 case 0x5e: // Skylake desktop 804 case 0x8e: // Kaby Lake mobile 805 case 0x9e: // Kaby Lake desktop 806 case 0xa5: // Comet Lake-H/S 807 case 0xa6: // Comet Lake-U 808 CPU = "skylake"; 809 *Type = X86::INTEL_COREI7; 810 *Subtype = X86::INTEL_COREI7_SKYLAKE; 811 break; 812 813 // Rocketlake: 814 case 0xa7: 815 CPU = "rocketlake"; 816 *Type = X86::INTEL_COREI7; 817 *Subtype = X86::INTEL_COREI7_ROCKETLAKE; 818 break; 819 820 // Skylake Xeon: 821 case 0x55: 822 *Type = X86::INTEL_COREI7; 823 if (testFeature(X86::FEATURE_AVX512BF16)) { 824 CPU = "cooperlake"; 825 *Subtype = X86::INTEL_COREI7_COOPERLAKE; 826 } else if (testFeature(X86::FEATURE_AVX512VNNI)) { 827 CPU = "cascadelake"; 828 *Subtype = X86::INTEL_COREI7_CASCADELAKE; 829 } else { 830 CPU = "skylake-avx512"; 831 *Subtype = X86::INTEL_COREI7_SKYLAKE_AVX512; 832 } 833 break; 834 835 // Cannonlake: 836 case 0x66: 837 CPU = "cannonlake"; 838 *Type = X86::INTEL_COREI7; 839 *Subtype = X86::INTEL_COREI7_CANNONLAKE; 840 break; 841 842 // Icelake: 843 case 0x7d: 844 case 0x7e: 845 CPU = "icelake-client"; 846 *Type = X86::INTEL_COREI7; 847 *Subtype = X86::INTEL_COREI7_ICELAKE_CLIENT; 848 break; 849 850 // Tigerlake: 851 case 0x8c: 852 case 0x8d: 853 CPU = "tigerlake"; 854 *Type = X86::INTEL_COREI7; 855 *Subtype = X86::INTEL_COREI7_TIGERLAKE; 856 break; 857 858 // Alderlake: 859 case 0x97: 860 case 0x9a: 861 CPU = "alderlake"; 862 *Type = X86::INTEL_COREI7; 863 *Subtype = X86::INTEL_COREI7_ALDERLAKE; 864 break; 865 866 // Gracemont 867 case 0xbe: 868 CPU = "gracemont"; 869 *Type = X86::INTEL_COREI7; 870 *Subtype = X86::INTEL_COREI7_ALDERLAKE; 871 break; 872 873 // Raptorlake: 874 case 0xb7: 875 case 0xba: 876 case 0xbf: 877 CPU = "raptorlake"; 878 *Type = X86::INTEL_COREI7; 879 *Subtype = X86::INTEL_COREI7_ALDERLAKE; 880 break; 881 882 // Meteorlake: 883 case 0xaa: 884 case 0xac: 885 CPU = "meteorlake"; 886 *Type = X86::INTEL_COREI7; 887 *Subtype = X86::INTEL_COREI7_ALDERLAKE; 888 break; 889 890 // Arrowlake: 891 case 0xc5: 892 // Arrowlake U: 893 case 0xb5: 894 CPU = "arrowlake"; 895 *Type = X86::INTEL_COREI7; 896 *Subtype = X86::INTEL_COREI7_ARROWLAKE; 897 break; 898 899 // Arrowlake S: 900 case 0xc6: 901 CPU = "arrowlake-s"; 902 *Type = X86::INTEL_COREI7; 903 *Subtype = X86::INTEL_COREI7_ARROWLAKE_S; 904 break; 905 906 // Lunarlake: 907 case 0xbd: 908 CPU = "lunarlake"; 909 *Type = X86::INTEL_COREI7; 910 *Subtype = X86::INTEL_COREI7_ARROWLAKE_S; 911 break; 912 913 // Pantherlake: 914 case 0xcc: 915 CPU = "pantherlake"; 916 *Type = X86::INTEL_COREI7; 917 *Subtype = X86::INTEL_COREI7_PANTHERLAKE; 918 break; 919 920 // Graniterapids: 921 case 0xad: 922 CPU = "graniterapids"; 923 *Type = X86::INTEL_COREI7; 924 *Subtype = X86::INTEL_COREI7_GRANITERAPIDS; 925 break; 926 927 // Granite Rapids D: 928 case 0xae: 929 CPU = "graniterapids-d"; 930 *Type = X86::INTEL_COREI7; 931 *Subtype = X86::INTEL_COREI7_GRANITERAPIDS_D; 932 break; 933 934 // Icelake Xeon: 935 case 0x6a: 936 case 0x6c: 937 CPU = "icelake-server"; 938 *Type = X86::INTEL_COREI7; 939 *Subtype = X86::INTEL_COREI7_ICELAKE_SERVER; 940 break; 941 942 // Emerald Rapids: 943 case 0xcf: 944 CPU = "emeraldrapids"; 945 *Type = X86::INTEL_COREI7; 946 *Subtype = X86::INTEL_COREI7_SAPPHIRERAPIDS; 947 break; 948 949 // Sapphire Rapids: 950 case 0x8f: 951 CPU = "sapphirerapids"; 952 *Type = X86::INTEL_COREI7; 953 *Subtype = X86::INTEL_COREI7_SAPPHIRERAPIDS; 954 break; 955 956 case 0x1c: // Most 45 nm Intel Atom processors 957 case 0x26: // 45 nm Atom Lincroft 958 case 0x27: // 32 nm Atom Medfield 959 case 0x35: // 32 nm Atom Midview 960 case 0x36: // 32 nm Atom Midview 961 CPU = "bonnell"; 962 *Type = X86::INTEL_BONNELL; 963 break; 964 965 // Atom Silvermont codes from the Intel software optimization guide. 966 case 0x37: 967 case 0x4a: 968 case 0x4d: 969 case 0x5a: 970 case 0x5d: 971 case 0x4c: // really airmont 972 CPU = "silvermont"; 973 *Type = X86::INTEL_SILVERMONT; 974 break; 975 // Goldmont: 976 case 0x5c: // Apollo Lake 977 case 0x5f: // Denverton 978 CPU = "goldmont"; 979 *Type = X86::INTEL_GOLDMONT; 980 break; 981 case 0x7a: 982 CPU = "goldmont-plus"; 983 *Type = X86::INTEL_GOLDMONT_PLUS; 984 break; 985 case 0x86: 986 case 0x8a: // Lakefield 987 case 0x96: // Elkhart Lake 988 case 0x9c: // Jasper Lake 989 CPU = "tremont"; 990 *Type = X86::INTEL_TREMONT; 991 break; 992 993 // Sierraforest: 994 case 0xaf: 995 CPU = "sierraforest"; 996 *Type = X86::INTEL_SIERRAFOREST; 997 break; 998 999 // Grandridge: 1000 case 0xb6: 1001 CPU = "grandridge"; 1002 *Type = X86::INTEL_GRANDRIDGE; 1003 break; 1004 1005 // Clearwaterforest: 1006 case 0xdd: 1007 CPU = "clearwaterforest"; 1008 *Type = X86::INTEL_CLEARWATERFOREST; 1009 break; 1010 1011 // Xeon Phi (Knights Landing + Knights Mill): 1012 case 0x57: 1013 CPU = "knl"; 1014 *Type = X86::INTEL_KNL; 1015 break; 1016 case 0x85: 1017 CPU = "knm"; 1018 *Type = X86::INTEL_KNM; 1019 break; 1020 1021 default: // Unknown family 6 CPU, try to guess. 1022 // Don't both with Type/Subtype here, they aren't used by the caller. 1023 // They're used above to keep the code in sync with compiler-rt. 1024 // TODO detect tigerlake host from model 1025 if (testFeature(X86::FEATURE_AVX512VP2INTERSECT)) { 1026 CPU = "tigerlake"; 1027 } else if (testFeature(X86::FEATURE_AVX512VBMI2)) { 1028 CPU = "icelake-client"; 1029 } else if (testFeature(X86::FEATURE_AVX512VBMI)) { 1030 CPU = "cannonlake"; 1031 } else if (testFeature(X86::FEATURE_AVX512BF16)) { 1032 CPU = "cooperlake"; 1033 } else if (testFeature(X86::FEATURE_AVX512VNNI)) { 1034 CPU = "cascadelake"; 1035 } else if (testFeature(X86::FEATURE_AVX512VL)) { 1036 CPU = "skylake-avx512"; 1037 } else if (testFeature(X86::FEATURE_CLFLUSHOPT)) { 1038 if (testFeature(X86::FEATURE_SHA)) 1039 CPU = "goldmont"; 1040 else 1041 CPU = "skylake"; 1042 } else if (testFeature(X86::FEATURE_ADX)) { 1043 CPU = "broadwell"; 1044 } else if (testFeature(X86::FEATURE_AVX2)) { 1045 CPU = "haswell"; 1046 } else if (testFeature(X86::FEATURE_AVX)) { 1047 CPU = "sandybridge"; 1048 } else if (testFeature(X86::FEATURE_SSE4_2)) { 1049 if (testFeature(X86::FEATURE_MOVBE)) 1050 CPU = "silvermont"; 1051 else 1052 CPU = "nehalem"; 1053 } else if (testFeature(X86::FEATURE_SSE4_1)) { 1054 CPU = "penryn"; 1055 } else if (testFeature(X86::FEATURE_SSSE3)) { 1056 if (testFeature(X86::FEATURE_MOVBE)) 1057 CPU = "bonnell"; 1058 else 1059 CPU = "core2"; 1060 } else if (testFeature(X86::FEATURE_64BIT)) { 1061 CPU = "core2"; 1062 } else if (testFeature(X86::FEATURE_SSE3)) { 1063 CPU = "yonah"; 1064 } else if (testFeature(X86::FEATURE_SSE2)) { 1065 CPU = "pentium-m"; 1066 } else if (testFeature(X86::FEATURE_SSE)) { 1067 CPU = "pentium3"; 1068 } else if (testFeature(X86::FEATURE_MMX)) { 1069 CPU = "pentium2"; 1070 } else { 1071 CPU = "pentiumpro"; 1072 } 1073 break; 1074 } 1075 break; 1076 case 15: { 1077 if (testFeature(X86::FEATURE_64BIT)) { 1078 CPU = "nocona"; 1079 break; 1080 } 1081 if (testFeature(X86::FEATURE_SSE3)) { 1082 CPU = "prescott"; 1083 break; 1084 } 1085 CPU = "pentium4"; 1086 break; 1087 } 1088 case 19: 1089 switch (Model) { 1090 // Diamond Rapids: 1091 case 0x01: 1092 CPU = "diamondrapids"; 1093 *Type = X86::INTEL_COREI7; 1094 *Subtype = X86::INTEL_COREI7_DIAMONDRAPIDS; 1095 break; 1096 1097 default: // Unknown family 19 CPU. 1098 break; 1099 } 1100 break; 1101 default: 1102 break; // Unknown. 1103 } 1104 1105 return CPU; 1106 } 1107 1108 static const char *getAMDProcessorTypeAndSubtype(unsigned Family, 1109 unsigned Model, 1110 const unsigned *Features, 1111 unsigned *Type, 1112 unsigned *Subtype) { 1113 const char *CPU = 0; 1114 1115 switch (Family) { 1116 case 4: 1117 CPU = "i486"; 1118 break; 1119 case 5: 1120 CPU = "pentium"; 1121 switch (Model) { 1122 case 6: 1123 case 7: 1124 CPU = "k6"; 1125 break; 1126 case 8: 1127 CPU = "k6-2"; 1128 break; 1129 case 9: 1130 case 13: 1131 CPU = "k6-3"; 1132 break; 1133 case 10: 1134 CPU = "geode"; 1135 break; 1136 } 1137 break; 1138 case 6: 1139 if (testFeature(X86::FEATURE_SSE)) { 1140 CPU = "athlon-xp"; 1141 break; 1142 } 1143 CPU = "athlon"; 1144 break; 1145 case 15: 1146 if (testFeature(X86::FEATURE_SSE3)) { 1147 CPU = "k8-sse3"; 1148 break; 1149 } 1150 CPU = "k8"; 1151 break; 1152 case 16: 1153 case 18: 1154 CPU = "amdfam10"; 1155 *Type = X86::AMDFAM10H; // "amdfam10" 1156 switch (Model) { 1157 case 2: 1158 *Subtype = X86::AMDFAM10H_BARCELONA; 1159 break; 1160 case 4: 1161 *Subtype = X86::AMDFAM10H_SHANGHAI; 1162 break; 1163 case 8: 1164 *Subtype = X86::AMDFAM10H_ISTANBUL; 1165 break; 1166 } 1167 break; 1168 case 20: 1169 CPU = "btver1"; 1170 *Type = X86::AMD_BTVER1; 1171 break; 1172 case 21: 1173 CPU = "bdver1"; 1174 *Type = X86::AMDFAM15H; 1175 if (Model >= 0x60 && Model <= 0x7f) { 1176 CPU = "bdver4"; 1177 *Subtype = X86::AMDFAM15H_BDVER4; 1178 break; // 60h-7Fh: Excavator 1179 } 1180 if (Model >= 0x30 && Model <= 0x3f) { 1181 CPU = "bdver3"; 1182 *Subtype = X86::AMDFAM15H_BDVER3; 1183 break; // 30h-3Fh: Steamroller 1184 } 1185 if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) { 1186 CPU = "bdver2"; 1187 *Subtype = X86::AMDFAM15H_BDVER2; 1188 break; // 02h, 10h-1Fh: Piledriver 1189 } 1190 if (Model <= 0x0f) { 1191 *Subtype = X86::AMDFAM15H_BDVER1; 1192 break; // 00h-0Fh: Bulldozer 1193 } 1194 break; 1195 case 22: 1196 CPU = "btver2"; 1197 *Type = X86::AMD_BTVER2; 1198 break; 1199 case 23: 1200 CPU = "znver1"; 1201 *Type = X86::AMDFAM17H; 1202 if ((Model >= 0x30 && Model <= 0x3f) || (Model == 0x47) || 1203 (Model >= 0x60 && Model <= 0x67) || (Model >= 0x68 && Model <= 0x6f) || 1204 (Model >= 0x70 && Model <= 0x7f) || (Model >= 0x84 && Model <= 0x87) || 1205 (Model >= 0x90 && Model <= 0x97) || (Model >= 0x98 && Model <= 0x9f) || 1206 (Model >= 0xa0 && Model <= 0xaf)) { 1207 // Family 17h Models 30h-3Fh (Starship) Zen 2 1208 // Family 17h Models 47h (Cardinal) Zen 2 1209 // Family 17h Models 60h-67h (Renoir) Zen 2 1210 // Family 17h Models 68h-6Fh (Lucienne) Zen 2 1211 // Family 17h Models 70h-7Fh (Matisse) Zen 2 1212 // Family 17h Models 84h-87h (ProjectX) Zen 2 1213 // Family 17h Models 90h-97h (VanGogh) Zen 2 1214 // Family 17h Models 98h-9Fh (Mero) Zen 2 1215 // Family 17h Models A0h-AFh (Mendocino) Zen 2 1216 CPU = "znver2"; 1217 *Subtype = X86::AMDFAM17H_ZNVER2; 1218 break; 1219 } 1220 if ((Model >= 0x10 && Model <= 0x1f) || (Model >= 0x20 && Model <= 0x2f)) { 1221 // Family 17h Models 10h-1Fh (Raven1) Zen 1222 // Family 17h Models 10h-1Fh (Picasso) Zen+ 1223 // Family 17h Models 20h-2Fh (Raven2 x86) Zen 1224 *Subtype = X86::AMDFAM17H_ZNVER1; 1225 break; 1226 } 1227 break; 1228 case 25: 1229 CPU = "znver3"; 1230 *Type = X86::AMDFAM19H; 1231 if (Model <= 0x0f || (Model >= 0x20 && Model <= 0x2f) || 1232 (Model >= 0x30 && Model <= 0x3f) || (Model >= 0x40 && Model <= 0x4f) || 1233 (Model >= 0x50 && Model <= 0x5f)) { 1234 // Family 19h Models 00h-0Fh (Genesis, Chagall) Zen 3 1235 // Family 19h Models 20h-2Fh (Vermeer) Zen 3 1236 // Family 19h Models 30h-3Fh (Badami) Zen 3 1237 // Family 19h Models 40h-4Fh (Rembrandt) Zen 3+ 1238 // Family 19h Models 50h-5Fh (Cezanne) Zen 3 1239 *Subtype = X86::AMDFAM19H_ZNVER3; 1240 break; 1241 } 1242 if ((Model >= 0x10 && Model <= 0x1f) || (Model >= 0x60 && Model <= 0x6f) || 1243 (Model >= 0x70 && Model <= 0x77) || (Model >= 0x78 && Model <= 0x7f) || 1244 (Model >= 0xa0 && Model <= 0xaf)) { 1245 // Family 19h Models 10h-1Fh (Stones; Storm Peak) Zen 4 1246 // Family 19h Models 60h-6Fh (Raphael) Zen 4 1247 // Family 19h Models 70h-77h (Phoenix, Hawkpoint1) Zen 4 1248 // Family 19h Models 78h-7Fh (Phoenix 2, Hawkpoint2) Zen 4 1249 // Family 19h Models A0h-AFh (Stones-Dense) Zen 4 1250 CPU = "znver4"; 1251 *Subtype = X86::AMDFAM19H_ZNVER4; 1252 break; // "znver4" 1253 } 1254 break; // family 19h 1255 case 26: 1256 CPU = "znver5"; 1257 *Type = X86::AMDFAM1AH; 1258 if (Model <= 0x77) { 1259 // Models 00h-0Fh (Breithorn). 1260 // Models 10h-1Fh (Breithorn-Dense). 1261 // Models 20h-2Fh (Strix 1). 1262 // Models 30h-37h (Strix 2). 1263 // Models 38h-3Fh (Strix 3). 1264 // Models 40h-4Fh (Granite Ridge). 1265 // Models 50h-5Fh (Weisshorn). 1266 // Models 60h-6Fh (Krackan1). 1267 // Models 70h-77h (Sarlak). 1268 CPU = "znver5"; 1269 *Subtype = X86::AMDFAM1AH_ZNVER5; 1270 break; // "znver5" 1271 } 1272 break; 1273 1274 default: 1275 break; // Unknown AMD CPU. 1276 } 1277 1278 return CPU; 1279 } 1280 1281 #undef testFeature 1282 1283 static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf, 1284 unsigned *Features) { 1285 unsigned EAX, EBX; 1286 1287 auto setFeature = [&](unsigned F) { 1288 Features[F / 32] |= 1U << (F % 32); 1289 }; 1290 1291 if ((EDX >> 15) & 1) 1292 setFeature(X86::FEATURE_CMOV); 1293 if ((EDX >> 23) & 1) 1294 setFeature(X86::FEATURE_MMX); 1295 if ((EDX >> 25) & 1) 1296 setFeature(X86::FEATURE_SSE); 1297 if ((EDX >> 26) & 1) 1298 setFeature(X86::FEATURE_SSE2); 1299 1300 if ((ECX >> 0) & 1) 1301 setFeature(X86::FEATURE_SSE3); 1302 if ((ECX >> 1) & 1) 1303 setFeature(X86::FEATURE_PCLMUL); 1304 if ((ECX >> 9) & 1) 1305 setFeature(X86::FEATURE_SSSE3); 1306 if ((ECX >> 12) & 1) 1307 setFeature(X86::FEATURE_FMA); 1308 if ((ECX >> 19) & 1) 1309 setFeature(X86::FEATURE_SSE4_1); 1310 if ((ECX >> 20) & 1) { 1311 setFeature(X86::FEATURE_SSE4_2); 1312 setFeature(X86::FEATURE_CRC32); 1313 } 1314 if ((ECX >> 23) & 1) 1315 setFeature(X86::FEATURE_POPCNT); 1316 if ((ECX >> 25) & 1) 1317 setFeature(X86::FEATURE_AES); 1318 1319 if ((ECX >> 22) & 1) 1320 setFeature(X86::FEATURE_MOVBE); 1321 1322 // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV 1323 // indicates that the AVX registers will be saved and restored on context 1324 // switch, then we have full AVX support. 1325 const unsigned AVXBits = (1 << 27) | (1 << 28); 1326 bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) && 1327 ((EAX & 0x6) == 0x6); 1328 #if defined(__APPLE__) 1329 // Darwin lazily saves the AVX512 context on first use: trust that the OS will 1330 // save the AVX512 context if we use AVX512 instructions, even the bit is not 1331 // set right now. 1332 bool HasAVX512Save = true; 1333 #else 1334 // AVX512 requires additional context to be saved by the OS. 1335 bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0); 1336 #endif 1337 1338 if (HasAVX) 1339 setFeature(X86::FEATURE_AVX); 1340 1341 bool HasLeaf7 = 1342 MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX); 1343 1344 if (HasLeaf7 && ((EBX >> 3) & 1)) 1345 setFeature(X86::FEATURE_BMI); 1346 if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX) 1347 setFeature(X86::FEATURE_AVX2); 1348 if (HasLeaf7 && ((EBX >> 8) & 1)) 1349 setFeature(X86::FEATURE_BMI2); 1350 if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save) { 1351 setFeature(X86::FEATURE_AVX512F); 1352 setFeature(X86::FEATURE_EVEX512); 1353 } 1354 if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save) 1355 setFeature(X86::FEATURE_AVX512DQ); 1356 if (HasLeaf7 && ((EBX >> 19) & 1)) 1357 setFeature(X86::FEATURE_ADX); 1358 if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save) 1359 setFeature(X86::FEATURE_AVX512IFMA); 1360 if (HasLeaf7 && ((EBX >> 23) & 1)) 1361 setFeature(X86::FEATURE_CLFLUSHOPT); 1362 if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save) 1363 setFeature(X86::FEATURE_AVX512CD); 1364 if (HasLeaf7 && ((EBX >> 29) & 1)) 1365 setFeature(X86::FEATURE_SHA); 1366 if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save) 1367 setFeature(X86::FEATURE_AVX512BW); 1368 if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save) 1369 setFeature(X86::FEATURE_AVX512VL); 1370 1371 if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save) 1372 setFeature(X86::FEATURE_AVX512VBMI); 1373 if (HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save) 1374 setFeature(X86::FEATURE_AVX512VBMI2); 1375 if (HasLeaf7 && ((ECX >> 8) & 1)) 1376 setFeature(X86::FEATURE_GFNI); 1377 if (HasLeaf7 && ((ECX >> 10) & 1) && HasAVX) 1378 setFeature(X86::FEATURE_VPCLMULQDQ); 1379 if (HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save) 1380 setFeature(X86::FEATURE_AVX512VNNI); 1381 if (HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save) 1382 setFeature(X86::FEATURE_AVX512BITALG); 1383 if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save) 1384 setFeature(X86::FEATURE_AVX512VPOPCNTDQ); 1385 1386 if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save) 1387 setFeature(X86::FEATURE_AVX5124VNNIW); 1388 if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save) 1389 setFeature(X86::FEATURE_AVX5124FMAPS); 1390 if (HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save) 1391 setFeature(X86::FEATURE_AVX512VP2INTERSECT); 1392 1393 // EAX from subleaf 0 is the maximum subleaf supported. Some CPUs don't 1394 // return all 0s for invalid subleaves so check the limit. 1395 bool HasLeaf7Subleaf1 = 1396 HasLeaf7 && EAX >= 1 && 1397 !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX); 1398 if (HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save) 1399 setFeature(X86::FEATURE_AVX512BF16); 1400 1401 unsigned MaxExtLevel; 1402 getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX); 1403 1404 bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 && 1405 !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); 1406 if (HasExtLeaf1 && ((ECX >> 6) & 1)) 1407 setFeature(X86::FEATURE_SSE4_A); 1408 if (HasExtLeaf1 && ((ECX >> 11) & 1)) 1409 setFeature(X86::FEATURE_XOP); 1410 if (HasExtLeaf1 && ((ECX >> 16) & 1)) 1411 setFeature(X86::FEATURE_FMA4); 1412 1413 if (HasExtLeaf1 && ((EDX >> 29) & 1)) 1414 setFeature(X86::FEATURE_64BIT); 1415 } 1416 1417 StringRef sys::getHostCPUName() { 1418 unsigned MaxLeaf = 0; 1419 const VendorSignatures Vendor = getVendorSignature(&MaxLeaf); 1420 if (Vendor == VendorSignatures::UNKNOWN) 1421 return "generic"; 1422 1423 unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; 1424 getX86CpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX); 1425 1426 unsigned Family = 0, Model = 0; 1427 unsigned Features[(X86::CPU_FEATURE_MAX + 31) / 32] = {0}; 1428 detectX86FamilyModel(EAX, &Family, &Model); 1429 getAvailableFeatures(ECX, EDX, MaxLeaf, Features); 1430 1431 // These aren't consumed in this file, but we try to keep some source code the 1432 // same or similar to compiler-rt. 1433 unsigned Type = 0; 1434 unsigned Subtype = 0; 1435 1436 StringRef CPU; 1437 1438 if (Vendor == VendorSignatures::GENUINE_INTEL) { 1439 CPU = getIntelProcessorTypeAndSubtype(Family, Model, Features, &Type, 1440 &Subtype); 1441 } else if (Vendor == VendorSignatures::AUTHENTIC_AMD) { 1442 CPU = getAMDProcessorTypeAndSubtype(Family, Model, Features, &Type, 1443 &Subtype); 1444 } 1445 1446 if (!CPU.empty()) 1447 return CPU; 1448 1449 return "generic"; 1450 } 1451 1452 #elif defined(__APPLE__) && defined(__powerpc__) 1453 StringRef sys::getHostCPUName() { 1454 host_basic_info_data_t hostInfo; 1455 mach_msg_type_number_t infoCount; 1456 1457 infoCount = HOST_BASIC_INFO_COUNT; 1458 mach_port_t hostPort = mach_host_self(); 1459 host_info(hostPort, HOST_BASIC_INFO, (host_info_t)&hostInfo, 1460 &infoCount); 1461 mach_port_deallocate(mach_task_self(), hostPort); 1462 1463 if (hostInfo.cpu_type != CPU_TYPE_POWERPC) 1464 return "generic"; 1465 1466 switch (hostInfo.cpu_subtype) { 1467 case CPU_SUBTYPE_POWERPC_601: 1468 return "601"; 1469 case CPU_SUBTYPE_POWERPC_602: 1470 return "602"; 1471 case CPU_SUBTYPE_POWERPC_603: 1472 return "603"; 1473 case CPU_SUBTYPE_POWERPC_603e: 1474 return "603e"; 1475 case CPU_SUBTYPE_POWERPC_603ev: 1476 return "603ev"; 1477 case CPU_SUBTYPE_POWERPC_604: 1478 return "604"; 1479 case CPU_SUBTYPE_POWERPC_604e: 1480 return "604e"; 1481 case CPU_SUBTYPE_POWERPC_620: 1482 return "620"; 1483 case CPU_SUBTYPE_POWERPC_750: 1484 return "750"; 1485 case CPU_SUBTYPE_POWERPC_7400: 1486 return "7400"; 1487 case CPU_SUBTYPE_POWERPC_7450: 1488 return "7450"; 1489 case CPU_SUBTYPE_POWERPC_970: 1490 return "970"; 1491 default:; 1492 } 1493 1494 return "generic"; 1495 } 1496 #elif defined(__linux__) && defined(__powerpc__) 1497 StringRef sys::getHostCPUName() { 1498 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent(); 1499 StringRef Content = P ? P->getBuffer() : ""; 1500 return detail::getHostCPUNameForPowerPC(Content); 1501 } 1502 #elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__)) 1503 StringRef sys::getHostCPUName() { 1504 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent(); 1505 StringRef Content = P ? P->getBuffer() : ""; 1506 return detail::getHostCPUNameForARM(Content); 1507 } 1508 #elif defined(__linux__) && defined(__s390x__) 1509 StringRef sys::getHostCPUName() { 1510 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent(); 1511 StringRef Content = P ? P->getBuffer() : ""; 1512 return detail::getHostCPUNameForS390x(Content); 1513 } 1514 #elif defined(__MVS__) 1515 StringRef sys::getHostCPUName() { 1516 // Get pointer to Communications Vector Table (CVT). 1517 // The pointer is located at offset 16 of the Prefixed Save Area (PSA). 1518 // It is stored as 31 bit pointer and will be zero-extended to 64 bit. 1519 int *StartToCVTOffset = reinterpret_cast<int *>(0x10); 1520 // Since its stored as a 31-bit pointer, get the 4 bytes from the start 1521 // of address. 1522 int ReadValue = *StartToCVTOffset; 1523 // Explicitly clear the high order bit. 1524 ReadValue = (ReadValue & 0x7FFFFFFF); 1525 char *CVT = reinterpret_cast<char *>(ReadValue); 1526 // The model number is located in the CVT prefix at offset -6 and stored as 1527 // signless packed decimal. 1528 uint16_t Id = *(uint16_t *)&CVT[-6]; 1529 // Convert number to integer. 1530 Id = decodePackedBCD<uint16_t>(Id, false); 1531 // Check for vector support. It's stored in field CVTFLAG5 (offset 244), 1532 // bit CVTVEF (X'80'). The facilities list is part of the PSA but the vector 1533 // extension can only be used if bit CVTVEF is on. 1534 bool HaveVectorSupport = CVT[244] & 0x80; 1535 return getCPUNameFromS390Model(Id, HaveVectorSupport); 1536 } 1537 #elif defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__)) 1538 // Copied from <mach/machine.h> in the macOS SDK. 1539 // 1540 // Also available here, though usually not as up-to-date: 1541 // https://github.com/apple-oss-distributions/xnu/blob/xnu-11215.41.3/osfmk/mach/machine.h#L403-L452. 1542 #define CPUFAMILY_UNKNOWN 0 1543 #define CPUFAMILY_ARM_9 0xe73283ae 1544 #define CPUFAMILY_ARM_11 0x8ff620d8 1545 #define CPUFAMILY_ARM_XSCALE 0x53b005f5 1546 #define CPUFAMILY_ARM_12 0xbd1b0ae9 1547 #define CPUFAMILY_ARM_13 0x0cc90e64 1548 #define CPUFAMILY_ARM_14 0x96077ef1 1549 #define CPUFAMILY_ARM_15 0xa8511bca 1550 #define CPUFAMILY_ARM_SWIFT 0x1e2d6381 1551 #define CPUFAMILY_ARM_CYCLONE 0x37a09642 1552 #define CPUFAMILY_ARM_TYPHOON 0x2c91a47e 1553 #define CPUFAMILY_ARM_TWISTER 0x92fb37c8 1554 #define CPUFAMILY_ARM_HURRICANE 0x67ceee93 1555 #define CPUFAMILY_ARM_MONSOON_MISTRAL 0xe81e7ef6 1556 #define CPUFAMILY_ARM_VORTEX_TEMPEST 0x07d34b9f 1557 #define CPUFAMILY_ARM_LIGHTNING_THUNDER 0x462504d2 1558 #define CPUFAMILY_ARM_FIRESTORM_ICESTORM 0x1b588bb3 1559 #define CPUFAMILY_ARM_BLIZZARD_AVALANCHE 0xda33d83d 1560 #define CPUFAMILY_ARM_EVEREST_SAWTOOTH 0x8765edea 1561 #define CPUFAMILY_ARM_IBIZA 0xfa33415e 1562 #define CPUFAMILY_ARM_PALMA 0x72015832 1563 #define CPUFAMILY_ARM_COLL 0x2876f5b5 1564 #define CPUFAMILY_ARM_LOBOS 0x5f4dea93 1565 #define CPUFAMILY_ARM_DONAN 0x6f5129ac 1566 #define CPUFAMILY_ARM_BRAVA 0x17d5b93a 1567 #define CPUFAMILY_ARM_TAHITI 0x75d4acb9 1568 #define CPUFAMILY_ARM_TUPAI 0x204526d0 1569 1570 StringRef sys::getHostCPUName() { 1571 uint32_t Family; 1572 size_t Length = sizeof(Family); 1573 sysctlbyname("hw.cpufamily", &Family, &Length, NULL, 0); 1574 1575 // This is found by testing on actual hardware, and by looking at: 1576 // https://github.com/apple-oss-distributions/xnu/blob/xnu-11215.41.3/osfmk/arm/cpuid.c#L109-L231. 1577 // 1578 // Another great resource is 1579 // https://github.com/AsahiLinux/docs/wiki/Codenames. 1580 // 1581 // NOTE: We choose to return `apple-mX` instead of `apple-aX`, since the M1, 1582 // M2, M3 etc. aliases are more widely known to users than A14, A15, A16 etc. 1583 // (and this code is basically only used on host macOS anyways). 1584 switch (Family) { 1585 case CPUFAMILY_UNKNOWN: 1586 return "generic"; 1587 case CPUFAMILY_ARM_9: 1588 return "arm920t"; // or arm926ej-s 1589 case CPUFAMILY_ARM_11: 1590 return "arm1136jf-s"; 1591 case CPUFAMILY_ARM_XSCALE: 1592 return "xscale"; 1593 case CPUFAMILY_ARM_12: // Seems unused by the kernel 1594 return "generic"; 1595 case CPUFAMILY_ARM_13: 1596 return "cortex-a8"; 1597 case CPUFAMILY_ARM_14: 1598 return "cortex-a9"; 1599 case CPUFAMILY_ARM_15: 1600 return "cortex-a7"; 1601 case CPUFAMILY_ARM_SWIFT: 1602 return "swift"; 1603 case CPUFAMILY_ARM_CYCLONE: 1604 return "apple-a7"; 1605 case CPUFAMILY_ARM_TYPHOON: 1606 return "apple-a8"; 1607 case CPUFAMILY_ARM_TWISTER: 1608 return "apple-a9"; 1609 case CPUFAMILY_ARM_HURRICANE: 1610 return "apple-a10"; 1611 case CPUFAMILY_ARM_MONSOON_MISTRAL: 1612 return "apple-a11"; 1613 case CPUFAMILY_ARM_VORTEX_TEMPEST: 1614 return "apple-a12"; 1615 case CPUFAMILY_ARM_LIGHTNING_THUNDER: 1616 return "apple-a13"; 1617 case CPUFAMILY_ARM_FIRESTORM_ICESTORM: // A14 / M1 1618 return "apple-m1"; 1619 case CPUFAMILY_ARM_BLIZZARD_AVALANCHE: // A15 / M2 1620 return "apple-m2"; 1621 case CPUFAMILY_ARM_EVEREST_SAWTOOTH: // A16 1622 case CPUFAMILY_ARM_IBIZA: // M3 1623 case CPUFAMILY_ARM_PALMA: // M3 Max 1624 case CPUFAMILY_ARM_LOBOS: // M3 Pro 1625 return "apple-m3"; 1626 case CPUFAMILY_ARM_COLL: // A17 Pro 1627 return "apple-a17"; 1628 case CPUFAMILY_ARM_DONAN: // M4 1629 case CPUFAMILY_ARM_BRAVA: // M4 Max 1630 case CPUFAMILY_ARM_TAHITI: // A18 Pro 1631 case CPUFAMILY_ARM_TUPAI: // A18 1632 return "apple-m4"; 1633 default: 1634 // Default to the newest CPU we know about. 1635 return "apple-m4"; 1636 } 1637 } 1638 #elif defined(_AIX) 1639 StringRef sys::getHostCPUName() { 1640 switch (_system_configuration.implementation) { 1641 case POWER_4: 1642 if (_system_configuration.version == PV_4_3) 1643 return "970"; 1644 return "pwr4"; 1645 case POWER_5: 1646 if (_system_configuration.version == PV_5) 1647 return "pwr5"; 1648 return "pwr5x"; 1649 case POWER_6: 1650 if (_system_configuration.version == PV_6_Compat) 1651 return "pwr6"; 1652 return "pwr6x"; 1653 case POWER_7: 1654 return "pwr7"; 1655 case POWER_8: 1656 return "pwr8"; 1657 case POWER_9: 1658 return "pwr9"; 1659 // TODO: simplify this once the macro is available in all OS levels. 1660 #ifdef POWER_10 1661 case POWER_10: 1662 #else 1663 case 0x40000: 1664 #endif 1665 return "pwr10"; 1666 #ifdef POWER_11 1667 case POWER_11: 1668 #else 1669 case 0x80000: 1670 #endif 1671 return "pwr11"; 1672 default: 1673 return "generic"; 1674 } 1675 } 1676 #elif defined(__loongarch__) 1677 StringRef sys::getHostCPUName() { 1678 // Use processor id to detect cpu name. 1679 uint32_t processor_id; 1680 __asm__("cpucfg %[prid], $zero\n\t" : [prid] "=r"(processor_id)); 1681 // Refer PRID_SERIES_MASK in linux kernel: arch/loongarch/include/asm/cpu.h. 1682 switch (processor_id & 0xf000) { 1683 case 0xc000: // Loongson 64bit, 4-issue 1684 return "la464"; 1685 case 0xd000: // Loongson 64bit, 6-issue 1686 return "la664"; 1687 // TODO: Others. 1688 default: 1689 break; 1690 } 1691 return "generic"; 1692 } 1693 #elif defined(__riscv) 1694 #if defined(__linux__) 1695 // struct riscv_hwprobe 1696 struct RISCVHwProbe { 1697 int64_t Key; 1698 uint64_t Value; 1699 }; 1700 #endif 1701 1702 StringRef sys::getHostCPUName() { 1703 #if defined(__linux__) 1704 // Try the hwprobe way first. 1705 RISCVHwProbe Query[]{{/*RISCV_HWPROBE_KEY_MVENDORID=*/0, 0}, 1706 {/*RISCV_HWPROBE_KEY_MARCHID=*/1, 0}, 1707 {/*RISCV_HWPROBE_KEY_MIMPID=*/2, 0}}; 1708 int Ret = syscall(/*__NR_riscv_hwprobe=*/258, /*pairs=*/Query, 1709 /*pair_count=*/std::size(Query), /*cpu_count=*/0, 1710 /*cpus=*/0, /*flags=*/0); 1711 if (Ret == 0) { 1712 RISCV::CPUModel Model{static_cast<uint32_t>(Query[0].Value), Query[1].Value, 1713 Query[2].Value}; 1714 StringRef Name = RISCV::getCPUNameFromCPUModel(Model); 1715 if (!Name.empty()) 1716 return Name; 1717 } 1718 1719 // Then try the cpuinfo way. 1720 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent(); 1721 StringRef Content = P ? P->getBuffer() : ""; 1722 StringRef Name = detail::getHostCPUNameForRISCV(Content); 1723 if (!Name.empty()) 1724 return Name; 1725 #endif 1726 #if __riscv_xlen == 64 1727 return "generic-rv64"; 1728 #elif __riscv_xlen == 32 1729 return "generic-rv32"; 1730 #else 1731 #error "Unhandled value of __riscv_xlen" 1732 #endif 1733 } 1734 #elif defined(__sparc__) 1735 #if defined(__linux__) 1736 StringRef sys::detail::getHostCPUNameForSPARC(StringRef ProcCpuinfoContent) { 1737 SmallVector<StringRef> Lines; 1738 ProcCpuinfoContent.split(Lines, '\n'); 1739 1740 // Look for cpu line to determine cpu name 1741 StringRef Cpu; 1742 for (unsigned I = 0, E = Lines.size(); I != E; ++I) { 1743 if (Lines[I].starts_with("cpu")) { 1744 Cpu = Lines[I].substr(5).ltrim("\t :"); 1745 break; 1746 } 1747 } 1748 1749 return StringSwitch<const char *>(Cpu) 1750 .StartsWith("SuperSparc", "supersparc") 1751 .StartsWith("HyperSparc", "hypersparc") 1752 .StartsWith("SpitFire", "ultrasparc") 1753 .StartsWith("BlackBird", "ultrasparc") 1754 .StartsWith("Sabre", " ultrasparc") 1755 .StartsWith("Hummingbird", "ultrasparc") 1756 .StartsWith("Cheetah", "ultrasparc3") 1757 .StartsWith("Jalapeno", "ultrasparc3") 1758 .StartsWith("Jaguar", "ultrasparc3") 1759 .StartsWith("Panther", "ultrasparc3") 1760 .StartsWith("Serrano", "ultrasparc3") 1761 .StartsWith("UltraSparc T1", "niagara") 1762 .StartsWith("UltraSparc T2", "niagara2") 1763 .StartsWith("UltraSparc T3", "niagara3") 1764 .StartsWith("UltraSparc T4", "niagara4") 1765 .StartsWith("UltraSparc T5", "niagara4") 1766 .StartsWith("LEON", "leon3") 1767 // niagara7/m8 not supported by LLVM yet. 1768 .StartsWith("SPARC-M7", "niagara4" /* "niagara7" */) 1769 .StartsWith("SPARC-S7", "niagara4" /* "niagara7" */) 1770 .StartsWith("SPARC-M8", "niagara4" /* "m8" */) 1771 .Default("generic"); 1772 } 1773 #endif 1774 1775 StringRef sys::getHostCPUName() { 1776 #if defined(__linux__) 1777 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent(); 1778 StringRef Content = P ? P->getBuffer() : ""; 1779 return detail::getHostCPUNameForSPARC(Content); 1780 #elif defined(__sun__) && defined(__svr4__) 1781 char *buf = NULL; 1782 kstat_ctl_t *kc; 1783 kstat_t *ksp; 1784 kstat_named_t *brand = NULL; 1785 1786 kc = kstat_open(); 1787 if (kc != NULL) { 1788 ksp = kstat_lookup(kc, const_cast<char *>("cpu_info"), -1, NULL); 1789 if (ksp != NULL && kstat_read(kc, ksp, NULL) != -1 && 1790 ksp->ks_type == KSTAT_TYPE_NAMED) 1791 brand = 1792 (kstat_named_t *)kstat_data_lookup(ksp, const_cast<char *>("brand")); 1793 if (brand != NULL && brand->data_type == KSTAT_DATA_STRING) 1794 buf = KSTAT_NAMED_STR_PTR(brand); 1795 } 1796 kstat_close(kc); 1797 1798 return StringSwitch<const char *>(buf) 1799 .Case("TMS390S10", "supersparc") // Texas Instruments microSPARC I 1800 .Case("TMS390Z50", "supersparc") // Texas Instruments SuperSPARC I 1801 .Case("TMS390Z55", 1802 "supersparc") // Texas Instruments SuperSPARC I with SuperCache 1803 .Case("MB86904", "supersparc") // Fujitsu microSPARC II 1804 .Case("MB86907", "supersparc") // Fujitsu TurboSPARC 1805 .Case("RT623", "hypersparc") // Ross hyperSPARC 1806 .Case("RT625", "hypersparc") 1807 .Case("RT626", "hypersparc") 1808 .Case("UltraSPARC-I", "ultrasparc") 1809 .Case("UltraSPARC-II", "ultrasparc") 1810 .Case("UltraSPARC-IIe", "ultrasparc") 1811 .Case("UltraSPARC-IIi", "ultrasparc") 1812 .Case("SPARC64-III", "ultrasparc") 1813 .Case("SPARC64-IV", "ultrasparc") 1814 .Case("UltraSPARC-III", "ultrasparc3") 1815 .Case("UltraSPARC-III+", "ultrasparc3") 1816 .Case("UltraSPARC-IIIi", "ultrasparc3") 1817 .Case("UltraSPARC-IIIi+", "ultrasparc3") 1818 .Case("UltraSPARC-IV", "ultrasparc3") 1819 .Case("UltraSPARC-IV+", "ultrasparc3") 1820 .Case("SPARC64-V", "ultrasparc3") 1821 .Case("SPARC64-VI", "ultrasparc3") 1822 .Case("SPARC64-VII", "ultrasparc3") 1823 .Case("UltraSPARC-T1", "niagara") 1824 .Case("UltraSPARC-T2", "niagara2") 1825 .Case("UltraSPARC-T2", "niagara2") 1826 .Case("UltraSPARC-T2+", "niagara2") 1827 .Case("SPARC-T3", "niagara3") 1828 .Case("SPARC-T4", "niagara4") 1829 .Case("SPARC-T5", "niagara4") 1830 // niagara7/m8 not supported by LLVM yet. 1831 .Case("SPARC-M7", "niagara4" /* "niagara7" */) 1832 .Case("SPARC-S7", "niagara4" /* "niagara7" */) 1833 .Case("SPARC-M8", "niagara4" /* "m8" */) 1834 .Default("generic"); 1835 #else 1836 return "generic"; 1837 #endif 1838 } 1839 #else 1840 StringRef sys::getHostCPUName() { return "generic"; } 1841 namespace llvm { 1842 namespace sys { 1843 namespace detail { 1844 namespace x86 { 1845 1846 VendorSignatures getVendorSignature(unsigned *MaxLeaf) { 1847 return VendorSignatures::UNKNOWN; 1848 } 1849 1850 } // namespace x86 1851 } // namespace detail 1852 } // namespace sys 1853 } // namespace llvm 1854 #endif 1855 1856 #if defined(__i386__) || defined(_M_IX86) || \ 1857 defined(__x86_64__) || defined(_M_X64) 1858 const StringMap<bool> sys::getHostCPUFeatures() { 1859 unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; 1860 unsigned MaxLevel; 1861 StringMap<bool> Features; 1862 1863 if (getX86CpuIDAndInfo(0, &MaxLevel, &EBX, &ECX, &EDX) || MaxLevel < 1) 1864 return Features; 1865 1866 getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX); 1867 1868 Features["cx8"] = (EDX >> 8) & 1; 1869 Features["cmov"] = (EDX >> 15) & 1; 1870 Features["mmx"] = (EDX >> 23) & 1; 1871 Features["fxsr"] = (EDX >> 24) & 1; 1872 Features["sse"] = (EDX >> 25) & 1; 1873 Features["sse2"] = (EDX >> 26) & 1; 1874 1875 Features["sse3"] = (ECX >> 0) & 1; 1876 Features["pclmul"] = (ECX >> 1) & 1; 1877 Features["ssse3"] = (ECX >> 9) & 1; 1878 Features["cx16"] = (ECX >> 13) & 1; 1879 Features["sse4.1"] = (ECX >> 19) & 1; 1880 Features["sse4.2"] = (ECX >> 20) & 1; 1881 Features["crc32"] = Features["sse4.2"]; 1882 Features["movbe"] = (ECX >> 22) & 1; 1883 Features["popcnt"] = (ECX >> 23) & 1; 1884 Features["aes"] = (ECX >> 25) & 1; 1885 Features["rdrnd"] = (ECX >> 30) & 1; 1886 1887 // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV 1888 // indicates that the AVX registers will be saved and restored on context 1889 // switch, then we have full AVX support. 1890 bool HasXSave = ((ECX >> 27) & 1) && !getX86XCR0(&EAX, &EDX); 1891 bool HasAVXSave = HasXSave && ((ECX >> 28) & 1) && ((EAX & 0x6) == 0x6); 1892 #if defined(__APPLE__) 1893 // Darwin lazily saves the AVX512 context on first use: trust that the OS will 1894 // save the AVX512 context if we use AVX512 instructions, even the bit is not 1895 // set right now. 1896 bool HasAVX512Save = true; 1897 #else 1898 // AVX512 requires additional context to be saved by the OS. 1899 bool HasAVX512Save = HasAVXSave && ((EAX & 0xe0) == 0xe0); 1900 #endif 1901 // AMX requires additional context to be saved by the OS. 1902 const unsigned AMXBits = (1 << 17) | (1 << 18); 1903 bool HasAMXSave = HasXSave && ((EAX & AMXBits) == AMXBits); 1904 1905 Features["avx"] = HasAVXSave; 1906 Features["fma"] = ((ECX >> 12) & 1) && HasAVXSave; 1907 // Only enable XSAVE if OS has enabled support for saving YMM state. 1908 Features["xsave"] = ((ECX >> 26) & 1) && HasAVXSave; 1909 Features["f16c"] = ((ECX >> 29) & 1) && HasAVXSave; 1910 1911 unsigned MaxExtLevel; 1912 getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX); 1913 1914 bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 && 1915 !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); 1916 Features["sahf"] = HasExtLeaf1 && ((ECX >> 0) & 1); 1917 Features["lzcnt"] = HasExtLeaf1 && ((ECX >> 5) & 1); 1918 Features["sse4a"] = HasExtLeaf1 && ((ECX >> 6) & 1); 1919 Features["prfchw"] = HasExtLeaf1 && ((ECX >> 8) & 1); 1920 Features["xop"] = HasExtLeaf1 && ((ECX >> 11) & 1) && HasAVXSave; 1921 Features["lwp"] = HasExtLeaf1 && ((ECX >> 15) & 1); 1922 Features["fma4"] = HasExtLeaf1 && ((ECX >> 16) & 1) && HasAVXSave; 1923 Features["tbm"] = HasExtLeaf1 && ((ECX >> 21) & 1); 1924 Features["mwaitx"] = HasExtLeaf1 && ((ECX >> 29) & 1); 1925 1926 Features["64bit"] = HasExtLeaf1 && ((EDX >> 29) & 1); 1927 1928 // Miscellaneous memory related features, detected by 1929 // using the 0x80000008 leaf of the CPUID instruction 1930 bool HasExtLeaf8 = MaxExtLevel >= 0x80000008 && 1931 !getX86CpuIDAndInfo(0x80000008, &EAX, &EBX, &ECX, &EDX); 1932 Features["clzero"] = HasExtLeaf8 && ((EBX >> 0) & 1); 1933 Features["rdpru"] = HasExtLeaf8 && ((EBX >> 4) & 1); 1934 Features["wbnoinvd"] = HasExtLeaf8 && ((EBX >> 9) & 1); 1935 1936 bool HasLeaf7 = 1937 MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX); 1938 1939 Features["fsgsbase"] = HasLeaf7 && ((EBX >> 0) & 1); 1940 Features["sgx"] = HasLeaf7 && ((EBX >> 2) & 1); 1941 Features["bmi"] = HasLeaf7 && ((EBX >> 3) & 1); 1942 // AVX2 is only supported if we have the OS save support from AVX. 1943 Features["avx2"] = HasLeaf7 && ((EBX >> 5) & 1) && HasAVXSave; 1944 Features["bmi2"] = HasLeaf7 && ((EBX >> 8) & 1); 1945 Features["invpcid"] = HasLeaf7 && ((EBX >> 10) & 1); 1946 Features["rtm"] = HasLeaf7 && ((EBX >> 11) & 1); 1947 // AVX512 is only supported if the OS supports the context save for it. 1948 Features["avx512f"] = HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save; 1949 if (Features["avx512f"]) 1950 Features["evex512"] = true; 1951 Features["avx512dq"] = HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save; 1952 Features["rdseed"] = HasLeaf7 && ((EBX >> 18) & 1); 1953 Features["adx"] = HasLeaf7 && ((EBX >> 19) & 1); 1954 Features["avx512ifma"] = HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save; 1955 Features["clflushopt"] = HasLeaf7 && ((EBX >> 23) & 1); 1956 Features["clwb"] = HasLeaf7 && ((EBX >> 24) & 1); 1957 Features["avx512cd"] = HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save; 1958 Features["sha"] = HasLeaf7 && ((EBX >> 29) & 1); 1959 Features["avx512bw"] = HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save; 1960 Features["avx512vl"] = HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save; 1961 1962 Features["avx512vbmi"] = HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save; 1963 Features["pku"] = HasLeaf7 && ((ECX >> 4) & 1); 1964 Features["waitpkg"] = HasLeaf7 && ((ECX >> 5) & 1); 1965 Features["avx512vbmi2"] = HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save; 1966 Features["shstk"] = HasLeaf7 && ((ECX >> 7) & 1); 1967 Features["gfni"] = HasLeaf7 && ((ECX >> 8) & 1); 1968 Features["vaes"] = HasLeaf7 && ((ECX >> 9) & 1) && HasAVXSave; 1969 Features["vpclmulqdq"] = HasLeaf7 && ((ECX >> 10) & 1) && HasAVXSave; 1970 Features["avx512vnni"] = HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save; 1971 Features["avx512bitalg"] = HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save; 1972 Features["avx512vpopcntdq"] = HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save; 1973 Features["rdpid"] = HasLeaf7 && ((ECX >> 22) & 1); 1974 Features["kl"] = HasLeaf7 && ((ECX >> 23) & 1); // key locker 1975 Features["cldemote"] = HasLeaf7 && ((ECX >> 25) & 1); 1976 Features["movdiri"] = HasLeaf7 && ((ECX >> 27) & 1); 1977 Features["movdir64b"] = HasLeaf7 && ((ECX >> 28) & 1); 1978 Features["enqcmd"] = HasLeaf7 && ((ECX >> 29) & 1); 1979 1980 Features["uintr"] = HasLeaf7 && ((EDX >> 5) & 1); 1981 Features["avx512vp2intersect"] = 1982 HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save; 1983 Features["serialize"] = HasLeaf7 && ((EDX >> 14) & 1); 1984 Features["tsxldtrk"] = HasLeaf7 && ((EDX >> 16) & 1); 1985 // There are two CPUID leafs which information associated with the pconfig 1986 // instruction: 1987 // EAX=0x7, ECX=0x0 indicates the availability of the instruction (via the 18th 1988 // bit of EDX), while the EAX=0x1b leaf returns information on the 1989 // availability of specific pconfig leafs. 1990 // The target feature here only refers to the the first of these two. 1991 // Users might need to check for the availability of specific pconfig 1992 // leaves using cpuid, since that information is ignored while 1993 // detecting features using the "-march=native" flag. 1994 // For more info, see X86 ISA docs. 1995 Features["pconfig"] = HasLeaf7 && ((EDX >> 18) & 1); 1996 Features["amx-bf16"] = HasLeaf7 && ((EDX >> 22) & 1) && HasAMXSave; 1997 Features["avx512fp16"] = HasLeaf7 && ((EDX >> 23) & 1) && HasAVX512Save; 1998 Features["amx-tile"] = HasLeaf7 && ((EDX >> 24) & 1) && HasAMXSave; 1999 Features["amx-int8"] = HasLeaf7 && ((EDX >> 25) & 1) && HasAMXSave; 2000 // EAX from subleaf 0 is the maximum subleaf supported. Some CPUs don't 2001 // return all 0s for invalid subleaves so check the limit. 2002 bool HasLeaf7Subleaf1 = 2003 HasLeaf7 && EAX >= 1 && 2004 !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX); 2005 Features["sha512"] = HasLeaf7Subleaf1 && ((EAX >> 0) & 1); 2006 Features["sm3"] = HasLeaf7Subleaf1 && ((EAX >> 1) & 1); 2007 Features["sm4"] = HasLeaf7Subleaf1 && ((EAX >> 2) & 1); 2008 Features["raoint"] = HasLeaf7Subleaf1 && ((EAX >> 3) & 1); 2009 Features["avxvnni"] = HasLeaf7Subleaf1 && ((EAX >> 4) & 1) && HasAVXSave; 2010 Features["avx512bf16"] = HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save; 2011 Features["amx-fp16"] = HasLeaf7Subleaf1 && ((EAX >> 21) & 1) && HasAMXSave; 2012 Features["cmpccxadd"] = HasLeaf7Subleaf1 && ((EAX >> 7) & 1); 2013 Features["hreset"] = HasLeaf7Subleaf1 && ((EAX >> 22) & 1); 2014 Features["avxifma"] = HasLeaf7Subleaf1 && ((EAX >> 23) & 1) && HasAVXSave; 2015 Features["movrs"] = HasLeaf7Subleaf1 && ((EAX >> 31) & 1); 2016 Features["avxvnniint8"] = HasLeaf7Subleaf1 && ((EDX >> 4) & 1) && HasAVXSave; 2017 Features["avxneconvert"] = HasLeaf7Subleaf1 && ((EDX >> 5) & 1) && HasAVXSave; 2018 Features["amx-complex"] = HasLeaf7Subleaf1 && ((EDX >> 8) & 1) && HasAMXSave; 2019 Features["avxvnniint16"] = HasLeaf7Subleaf1 && ((EDX >> 10) & 1) && HasAVXSave; 2020 Features["prefetchi"] = HasLeaf7Subleaf1 && ((EDX >> 14) & 1); 2021 Features["usermsr"] = HasLeaf7Subleaf1 && ((EDX >> 15) & 1); 2022 bool HasAVX10 = HasLeaf7Subleaf1 && ((EDX >> 19) & 1); 2023 bool HasAPXF = HasLeaf7Subleaf1 && ((EDX >> 21) & 1); 2024 Features["egpr"] = HasAPXF; 2025 Features["push2pop2"] = HasAPXF; 2026 Features["ppx"] = HasAPXF; 2027 Features["ndd"] = HasAPXF; 2028 Features["ccmp"] = HasAPXF; 2029 Features["nf"] = HasAPXF; 2030 Features["cf"] = HasAPXF; 2031 Features["zu"] = HasAPXF; 2032 2033 bool HasLeafD = MaxLevel >= 0xd && 2034 !getX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX); 2035 2036 // Only enable XSAVE if OS has enabled support for saving YMM state. 2037 Features["xsaveopt"] = HasLeafD && ((EAX >> 0) & 1) && HasAVXSave; 2038 Features["xsavec"] = HasLeafD && ((EAX >> 1) & 1) && HasAVXSave; 2039 Features["xsaves"] = HasLeafD && ((EAX >> 3) & 1) && HasAVXSave; 2040 2041 bool HasLeaf14 = MaxLevel >= 0x14 && 2042 !getX86CpuIDAndInfoEx(0x14, 0x0, &EAX, &EBX, &ECX, &EDX); 2043 2044 Features["ptwrite"] = HasLeaf14 && ((EBX >> 4) & 1); 2045 2046 bool HasLeaf19 = 2047 MaxLevel >= 0x19 && !getX86CpuIDAndInfo(0x19, &EAX, &EBX, &ECX, &EDX); 2048 Features["widekl"] = HasLeaf7 && HasLeaf19 && ((EBX >> 2) & 1); 2049 2050 bool HasLeaf1E = MaxLevel >= 0x1e && 2051 !getX86CpuIDAndInfoEx(0x1e, 0x1, &EAX, &EBX, &ECX, &EDX); 2052 Features["amx-fp8"] = HasLeaf1E && ((EAX >> 4) & 1) && HasAMXSave; 2053 Features["amx-transpose"] = HasLeaf1E && ((EAX >> 5) & 1) && HasAMXSave; 2054 Features["amx-tf32"] = HasLeaf1E && ((EAX >> 6) & 1) && HasAMXSave; 2055 Features["amx-avx512"] = HasLeaf1E && ((EAX >> 7) & 1) && HasAMXSave; 2056 Features["amx-movrs"] = HasLeaf1E && ((EAX >> 8) & 1) && HasAMXSave; 2057 2058 bool HasLeaf24 = 2059 MaxLevel >= 0x24 && !getX86CpuIDAndInfo(0x24, &EAX, &EBX, &ECX, &EDX); 2060 2061 int AVX10Ver = HasLeaf24 && (EBX & 0xff); 2062 int Has512Len = HasLeaf24 && ((EBX >> 18) & 1); 2063 Features["avx10.1-256"] = HasAVX10 && AVX10Ver >= 1; 2064 Features["avx10.1-512"] = HasAVX10 && AVX10Ver >= 1 && Has512Len; 2065 Features["avx10.2-256"] = HasAVX10 && AVX10Ver >= 2; 2066 Features["avx10.2-512"] = HasAVX10 && AVX10Ver >= 2 && Has512Len; 2067 2068 return Features; 2069 } 2070 #elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__)) 2071 const StringMap<bool> sys::getHostCPUFeatures() { 2072 StringMap<bool> Features; 2073 std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent(); 2074 if (!P) 2075 return Features; 2076 2077 SmallVector<StringRef, 32> Lines; 2078 P->getBuffer().split(Lines, '\n'); 2079 2080 SmallVector<StringRef, 32> CPUFeatures; 2081 2082 // Look for the CPU features. 2083 for (unsigned I = 0, E = Lines.size(); I != E; ++I) 2084 if (Lines[I].starts_with("Features")) { 2085 Lines[I].split(CPUFeatures, ' '); 2086 break; 2087 } 2088 2089 #if defined(__aarch64__) 2090 // All of these are "crypto" features, but we must sift out actual features 2091 // as the former meaning of "crypto" as a single feature is no more. 2092 enum { CAP_AES = 0x1, CAP_PMULL = 0x2, CAP_SHA1 = 0x4, CAP_SHA2 = 0x8 }; 2093 uint32_t crypto = 0; 2094 #endif 2095 2096 for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) { 2097 StringRef LLVMFeatureStr = StringSwitch<StringRef>(CPUFeatures[I]) 2098 #if defined(__aarch64__) 2099 .Case("asimd", "neon") 2100 .Case("fp", "fp-armv8") 2101 .Case("crc32", "crc") 2102 .Case("atomics", "lse") 2103 .Case("sha3", "sha3") 2104 .Case("sm4", "sm4") 2105 .Case("sve", "sve") 2106 .Case("sve2", "sve2") 2107 .Case("sveaes", "sve-aes") 2108 .Case("svesha3", "sve-sha3") 2109 .Case("svesm4", "sve-sm4") 2110 #else 2111 .Case("half", "fp16") 2112 .Case("neon", "neon") 2113 .Case("vfpv3", "vfp3") 2114 .Case("vfpv3d16", "vfp3d16") 2115 .Case("vfpv4", "vfp4") 2116 .Case("idiva", "hwdiv-arm") 2117 .Case("idivt", "hwdiv") 2118 #endif 2119 .Default(""); 2120 2121 #if defined(__aarch64__) 2122 // We need to check crypto separately since we need all of the crypto 2123 // extensions to enable the subtarget feature 2124 if (CPUFeatures[I] == "aes") 2125 crypto |= CAP_AES; 2126 else if (CPUFeatures[I] == "pmull") 2127 crypto |= CAP_PMULL; 2128 else if (CPUFeatures[I] == "sha1") 2129 crypto |= CAP_SHA1; 2130 else if (CPUFeatures[I] == "sha2") 2131 crypto |= CAP_SHA2; 2132 #endif 2133 2134 if (LLVMFeatureStr != "") 2135 Features[LLVMFeatureStr] = true; 2136 } 2137 2138 #if defined(__aarch64__) 2139 // LLVM has decided some AArch64 CPUs have all the instructions they _may_ 2140 // have, as opposed to all the instructions they _must_ have, so allow runtime 2141 // information to correct us on that. 2142 uint32_t Aes = CAP_AES | CAP_PMULL; 2143 uint32_t Sha2 = CAP_SHA1 | CAP_SHA2; 2144 Features["aes"] = (crypto & Aes) == Aes; 2145 Features["sha2"] = (crypto & Sha2) == Sha2; 2146 #endif 2147 2148 return Features; 2149 } 2150 #elif defined(_WIN32) && (defined(__aarch64__) || defined(_M_ARM64)) 2151 const StringMap<bool> sys::getHostCPUFeatures() { 2152 StringMap<bool> Features; 2153 2154 // If we're asking the OS at runtime, believe what the OS says 2155 Features["neon"] = 2156 IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE); 2157 Features["crc"] = 2158 IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE); 2159 2160 // Avoid inferring "crypto" means more than the traditional AES + SHA2 2161 bool TradCrypto = 2162 IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE); 2163 Features["aes"] = TradCrypto; 2164 Features["sha2"] = TradCrypto; 2165 2166 return Features; 2167 } 2168 #elif defined(__linux__) && defined(__loongarch__) 2169 #include <sys/auxv.h> 2170 const StringMap<bool> sys::getHostCPUFeatures() { 2171 unsigned long hwcap = getauxval(AT_HWCAP); 2172 bool HasFPU = hwcap & (1UL << 3); // HWCAP_LOONGARCH_FPU 2173 uint32_t cpucfg2 = 0x2, cpucfg3 = 0x3; 2174 __asm__("cpucfg %[cpucfg2], %[cpucfg2]\n\t" : [cpucfg2] "+r"(cpucfg2)); 2175 __asm__("cpucfg %[cpucfg3], %[cpucfg3]\n\t" : [cpucfg3] "+r"(cpucfg3)); 2176 2177 StringMap<bool> Features; 2178 2179 Features["f"] = HasFPU && (cpucfg2 & (1U << 1)); // CPUCFG.2.FP_SP 2180 Features["d"] = HasFPU && (cpucfg2 & (1U << 2)); // CPUCFG.2.FP_DP 2181 2182 Features["lsx"] = hwcap & (1UL << 4); // HWCAP_LOONGARCH_LSX 2183 Features["lasx"] = hwcap & (1UL << 5); // HWCAP_LOONGARCH_LASX 2184 Features["lvz"] = hwcap & (1UL << 9); // HWCAP_LOONGARCH_LVZ 2185 2186 Features["frecipe"] = cpucfg2 & (1U << 25); // CPUCFG.2.FRECIPE 2187 Features["div32"] = cpucfg2 & (1U << 26); // CPUCFG.2.DIV32 2188 Features["lam-bh"] = cpucfg2 & (1U << 27); // CPUCFG.2.LAM_BH 2189 Features["lamcas"] = cpucfg2 & (1U << 28); // CPUCFG.2.LAMCAS 2190 Features["scq"] = cpucfg2 & (1U << 30); // CPUCFG.2.SCQ 2191 2192 Features["ld-seq-sa"] = cpucfg3 & (1U << 23); // CPUCFG.3.LD_SEQ_SA 2193 2194 // TODO: Need to complete. 2195 // Features["llacq-screl"] = cpucfg2 & (1U << 29); // CPUCFG.2.LLACQ_SCREL 2196 return Features; 2197 } 2198 #elif defined(__linux__) && defined(__riscv) 2199 const StringMap<bool> sys::getHostCPUFeatures() { 2200 RISCVHwProbe Query[]{{/*RISCV_HWPROBE_KEY_BASE_BEHAVIOR=*/3, 0}, 2201 {/*RISCV_HWPROBE_KEY_IMA_EXT_0=*/4, 0}, 2202 {/*RISCV_HWPROBE_KEY_MISALIGNED_SCALAR_PERF=*/9, 0}}; 2203 int Ret = syscall(/*__NR_riscv_hwprobe=*/258, /*pairs=*/Query, 2204 /*pair_count=*/std::size(Query), /*cpu_count=*/0, 2205 /*cpus=*/0, /*flags=*/0); 2206 if (Ret != 0) 2207 return {}; 2208 2209 StringMap<bool> Features; 2210 uint64_t BaseMask = Query[0].Value; 2211 // Check whether RISCV_HWPROBE_BASE_BEHAVIOR_IMA is set. 2212 if (BaseMask & 1) { 2213 Features["i"] = true; 2214 Features["m"] = true; 2215 Features["a"] = true; 2216 } 2217 2218 uint64_t ExtMask = Query[1].Value; 2219 Features["f"] = ExtMask & (1 << 0); // RISCV_HWPROBE_IMA_FD 2220 Features["d"] = ExtMask & (1 << 0); // RISCV_HWPROBE_IMA_FD 2221 Features["c"] = ExtMask & (1 << 1); // RISCV_HWPROBE_IMA_C 2222 Features["v"] = ExtMask & (1 << 2); // RISCV_HWPROBE_IMA_V 2223 Features["zba"] = ExtMask & (1 << 3); // RISCV_HWPROBE_EXT_ZBA 2224 Features["zbb"] = ExtMask & (1 << 4); // RISCV_HWPROBE_EXT_ZBB 2225 Features["zbs"] = ExtMask & (1 << 5); // RISCV_HWPROBE_EXT_ZBS 2226 Features["zicboz"] = ExtMask & (1 << 6); // RISCV_HWPROBE_EXT_ZICBOZ 2227 Features["zbc"] = ExtMask & (1 << 7); // RISCV_HWPROBE_EXT_ZBC 2228 Features["zbkb"] = ExtMask & (1 << 8); // RISCV_HWPROBE_EXT_ZBKB 2229 Features["zbkc"] = ExtMask & (1 << 9); // RISCV_HWPROBE_EXT_ZBKC 2230 Features["zbkx"] = ExtMask & (1 << 10); // RISCV_HWPROBE_EXT_ZBKX 2231 Features["zknd"] = ExtMask & (1 << 11); // RISCV_HWPROBE_EXT_ZKND 2232 Features["zkne"] = ExtMask & (1 << 12); // RISCV_HWPROBE_EXT_ZKNE 2233 Features["zknh"] = ExtMask & (1 << 13); // RISCV_HWPROBE_EXT_ZKNH 2234 Features["zksed"] = ExtMask & (1 << 14); // RISCV_HWPROBE_EXT_ZKSED 2235 Features["zksh"] = ExtMask & (1 << 15); // RISCV_HWPROBE_EXT_ZKSH 2236 Features["zkt"] = ExtMask & (1 << 16); // RISCV_HWPROBE_EXT_ZKT 2237 Features["zvbb"] = ExtMask & (1 << 17); // RISCV_HWPROBE_EXT_ZVBB 2238 Features["zvbc"] = ExtMask & (1 << 18); // RISCV_HWPROBE_EXT_ZVBC 2239 Features["zvkb"] = ExtMask & (1 << 19); // RISCV_HWPROBE_EXT_ZVKB 2240 Features["zvkg"] = ExtMask & (1 << 20); // RISCV_HWPROBE_EXT_ZVKG 2241 Features["zvkned"] = ExtMask & (1 << 21); // RISCV_HWPROBE_EXT_ZVKNED 2242 Features["zvknha"] = ExtMask & (1 << 22); // RISCV_HWPROBE_EXT_ZVKNHA 2243 Features["zvknhb"] = ExtMask & (1 << 23); // RISCV_HWPROBE_EXT_ZVKNHB 2244 Features["zvksed"] = ExtMask & (1 << 24); // RISCV_HWPROBE_EXT_ZVKSED 2245 Features["zvksh"] = ExtMask & (1 << 25); // RISCV_HWPROBE_EXT_ZVKSH 2246 Features["zvkt"] = ExtMask & (1 << 26); // RISCV_HWPROBE_EXT_ZVKT 2247 Features["zfh"] = ExtMask & (1 << 27); // RISCV_HWPROBE_EXT_ZFH 2248 Features["zfhmin"] = ExtMask & (1 << 28); // RISCV_HWPROBE_EXT_ZFHMIN 2249 Features["zihintntl"] = ExtMask & (1 << 29); // RISCV_HWPROBE_EXT_ZIHINTNTL 2250 Features["zvfh"] = ExtMask & (1 << 30); // RISCV_HWPROBE_EXT_ZVFH 2251 Features["zvfhmin"] = ExtMask & (1ULL << 31); // RISCV_HWPROBE_EXT_ZVFHMIN 2252 Features["zfa"] = ExtMask & (1ULL << 32); // RISCV_HWPROBE_EXT_ZFA 2253 Features["ztso"] = ExtMask & (1ULL << 33); // RISCV_HWPROBE_EXT_ZTSO 2254 Features["zacas"] = ExtMask & (1ULL << 34); // RISCV_HWPROBE_EXT_ZACAS 2255 Features["zicond"] = ExtMask & (1ULL << 35); // RISCV_HWPROBE_EXT_ZICOND 2256 Features["zihintpause"] = 2257 ExtMask & (1ULL << 36); // RISCV_HWPROBE_EXT_ZIHINTPAUSE 2258 Features["zve32x"] = ExtMask & (1ULL << 37); // RISCV_HWPROBE_EXT_ZVE32X 2259 Features["zve32f"] = ExtMask & (1ULL << 38); // RISCV_HWPROBE_EXT_ZVE32F 2260 Features["zve64x"] = ExtMask & (1ULL << 39); // RISCV_HWPROBE_EXT_ZVE64X 2261 Features["zve64f"] = ExtMask & (1ULL << 40); // RISCV_HWPROBE_EXT_ZVE64F 2262 Features["zve64d"] = ExtMask & (1ULL << 41); // RISCV_HWPROBE_EXT_ZVE64D 2263 Features["zimop"] = ExtMask & (1ULL << 42); // RISCV_HWPROBE_EXT_ZIMOP 2264 Features["zca"] = ExtMask & (1ULL << 43); // RISCV_HWPROBE_EXT_ZCA 2265 Features["zcb"] = ExtMask & (1ULL << 44); // RISCV_HWPROBE_EXT_ZCB 2266 Features["zcd"] = ExtMask & (1ULL << 45); // RISCV_HWPROBE_EXT_ZCD 2267 Features["zcf"] = ExtMask & (1ULL << 46); // RISCV_HWPROBE_EXT_ZCF 2268 Features["zcmop"] = ExtMask & (1ULL << 47); // RISCV_HWPROBE_EXT_ZCMOP 2269 Features["zawrs"] = ExtMask & (1ULL << 48); // RISCV_HWPROBE_EXT_ZAWRS 2270 2271 // Check whether the processor supports fast misaligned scalar memory access. 2272 // NOTE: RISCV_HWPROBE_KEY_MISALIGNED_SCALAR_PERF is only available on 2273 // Linux 6.11 or later. If it is not recognized, the key field will be cleared 2274 // to -1. 2275 if (Query[2].Key != -1 && 2276 Query[2].Value == /*RISCV_HWPROBE_MISALIGNED_SCALAR_FAST=*/3) 2277 Features["unaligned-scalar-mem"] = true; 2278 2279 return Features; 2280 } 2281 #else 2282 const StringMap<bool> sys::getHostCPUFeatures() { return {}; } 2283 #endif 2284 2285 #if __APPLE__ 2286 /// \returns the \p triple, but with the Host's arch spliced in. 2287 static Triple withHostArch(Triple T) { 2288 #if defined(__arm__) 2289 T.setArch(Triple::arm); 2290 T.setArchName("arm"); 2291 #elif defined(__arm64e__) 2292 T.setArch(Triple::aarch64, Triple::AArch64SubArch_arm64e); 2293 T.setArchName("arm64e"); 2294 #elif defined(__aarch64__) 2295 T.setArch(Triple::aarch64); 2296 T.setArchName("arm64"); 2297 #elif defined(__x86_64h__) 2298 T.setArch(Triple::x86_64); 2299 T.setArchName("x86_64h"); 2300 #elif defined(__x86_64__) 2301 T.setArch(Triple::x86_64); 2302 T.setArchName("x86_64"); 2303 #elif defined(__i386__) 2304 T.setArch(Triple::x86); 2305 T.setArchName("i386"); 2306 #elif defined(__powerpc__) 2307 T.setArch(Triple::ppc); 2308 T.setArchName("powerpc"); 2309 #else 2310 # error "Unimplemented host arch fixup" 2311 #endif 2312 return T; 2313 } 2314 #endif 2315 2316 std::string sys::getProcessTriple() { 2317 std::string TargetTripleString = updateTripleOSVersion(LLVM_HOST_TRIPLE); 2318 Triple PT(Triple::normalize(TargetTripleString)); 2319 2320 #if __APPLE__ 2321 /// In Universal builds, LLVM_HOST_TRIPLE will have the wrong arch in one of 2322 /// the slices. This fixes that up. 2323 PT = withHostArch(PT); 2324 #endif 2325 2326 if (sizeof(void *) == 8 && PT.isArch32Bit()) 2327 PT = PT.get64BitArchVariant(); 2328 if (sizeof(void *) == 4 && PT.isArch64Bit()) 2329 PT = PT.get32BitArchVariant(); 2330 2331 return PT.str(); 2332 } 2333 2334 void sys::printDefaultTargetAndDetectedCPU(raw_ostream &OS) { 2335 #if LLVM_VERSION_PRINTER_SHOW_HOST_TARGET_INFO 2336 std::string CPU = std::string(sys::getHostCPUName()); 2337 if (CPU == "generic") 2338 CPU = "(unknown)"; 2339 OS << " Default target: " << sys::getDefaultTargetTriple() << '\n' 2340 << " Host CPU: " << CPU << '\n'; 2341 #endif 2342 } 2343