1 //===--- X86.cpp - Implement X86 target feature support -------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements X86 TargetInfo objects. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "X86.h" 14 #include "clang/Basic/Builtins.h" 15 #include "clang/Basic/Diagnostic.h" 16 #include "clang/Basic/TargetBuiltins.h" 17 #include "llvm/ADT/StringExtras.h" 18 #include "llvm/ADT/StringRef.h" 19 #include "llvm/ADT/StringSwitch.h" 20 #include "llvm/Support/X86TargetParser.h" 21 #include <optional> 22 23 namespace clang { 24 namespace targets { 25 26 static constexpr Builtin::Info BuiltinInfoX86[] = { 27 #define BUILTIN(ID, TYPE, ATTRS) \ 28 {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES}, 29 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ 30 {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES}, 31 #define TARGET_HEADER_BUILTIN(ID, TYPE, ATTRS, HEADER, LANGS, FEATURE) \ 32 {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::HEADER, LANGS}, 33 #include "clang/Basic/BuiltinsX86.def" 34 35 #define BUILTIN(ID, TYPE, ATTRS) \ 36 {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES}, 37 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ 38 {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES}, 39 #define TARGET_HEADER_BUILTIN(ID, TYPE, ATTRS, HEADER, LANGS, FEATURE) \ 40 {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::HEADER, LANGS}, 41 #include "clang/Basic/BuiltinsX86_64.def" 42 }; 43 44 static const char *const GCCRegNames[] = { 45 "ax", "dx", "cx", "bx", "si", "di", "bp", "sp", 46 "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)", 47 "argp", "flags", "fpcr", "fpsr", "dirflag", "frame", "xmm0", "xmm1", 48 "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "mm0", "mm1", 49 "mm2", "mm3", "mm4", "mm5", "mm6", "mm7", "r8", "r9", 50 "r10", "r11", "r12", "r13", "r14", "r15", "xmm8", "xmm9", 51 "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "ymm0", "ymm1", 52 "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", "ymm8", "ymm9", 53 "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15", "xmm16", "xmm17", 54 "xmm18", "xmm19", "xmm20", "xmm21", "xmm22", "xmm23", "xmm24", "xmm25", 55 "xmm26", "xmm27", "xmm28", "xmm29", "xmm30", "xmm31", "ymm16", "ymm17", 56 "ymm18", "ymm19", "ymm20", "ymm21", "ymm22", "ymm23", "ymm24", "ymm25", 57 "ymm26", "ymm27", "ymm28", "ymm29", "ymm30", "ymm31", "zmm0", "zmm1", 58 "zmm2", "zmm3", "zmm4", "zmm5", "zmm6", "zmm7", "zmm8", "zmm9", 59 "zmm10", "zmm11", "zmm12", "zmm13", "zmm14", "zmm15", "zmm16", "zmm17", 60 "zmm18", "zmm19", "zmm20", "zmm21", "zmm22", "zmm23", "zmm24", "zmm25", 61 "zmm26", "zmm27", "zmm28", "zmm29", "zmm30", "zmm31", "k0", "k1", 62 "k2", "k3", "k4", "k5", "k6", "k7", 63 "cr0", "cr2", "cr3", "cr4", "cr8", 64 "dr0", "dr1", "dr2", "dr3", "dr6", "dr7", 65 "bnd0", "bnd1", "bnd2", "bnd3", 66 "tmm0", "tmm1", "tmm2", "tmm3", "tmm4", "tmm5", "tmm6", "tmm7", 67 }; 68 69 const TargetInfo::AddlRegName AddlRegNames[] = { 70 {{"al", "ah", "eax", "rax"}, 0}, 71 {{"bl", "bh", "ebx", "rbx"}, 3}, 72 {{"cl", "ch", "ecx", "rcx"}, 2}, 73 {{"dl", "dh", "edx", "rdx"}, 1}, 74 {{"esi", "rsi"}, 4}, 75 {{"edi", "rdi"}, 5}, 76 {{"esp", "rsp"}, 7}, 77 {{"ebp", "rbp"}, 6}, 78 {{"r8d", "r8w", "r8b"}, 38}, 79 {{"r9d", "r9w", "r9b"}, 39}, 80 {{"r10d", "r10w", "r10b"}, 40}, 81 {{"r11d", "r11w", "r11b"}, 41}, 82 {{"r12d", "r12w", "r12b"}, 42}, 83 {{"r13d", "r13w", "r13b"}, 43}, 84 {{"r14d", "r14w", "r14b"}, 44}, 85 {{"r15d", "r15w", "r15b"}, 45}, 86 }; 87 88 } // namespace targets 89 } // namespace clang 90 91 using namespace clang; 92 using namespace clang::targets; 93 94 bool X86TargetInfo::setFPMath(StringRef Name) { 95 if (Name == "387") { 96 FPMath = FP_387; 97 return true; 98 } 99 if (Name == "sse") { 100 FPMath = FP_SSE; 101 return true; 102 } 103 return false; 104 } 105 106 bool X86TargetInfo::initFeatureMap( 107 llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU, 108 const std::vector<std::string> &FeaturesVec) const { 109 // FIXME: This *really* should not be here. 110 // X86_64 always has SSE2. 111 if (getTriple().getArch() == llvm::Triple::x86_64) 112 setFeatureEnabled(Features, "sse2", true); 113 114 using namespace llvm::X86; 115 116 SmallVector<StringRef, 16> CPUFeatures; 117 getFeaturesForCPU(CPU, CPUFeatures); 118 for (auto &F : CPUFeatures) 119 setFeatureEnabled(Features, F, true); 120 121 std::vector<std::string> UpdatedFeaturesVec; 122 for (const auto &Feature : FeaturesVec) { 123 // Expand general-regs-only to -x86, -mmx and -sse 124 if (Feature == "+general-regs-only") { 125 UpdatedFeaturesVec.push_back("-x87"); 126 UpdatedFeaturesVec.push_back("-mmx"); 127 UpdatedFeaturesVec.push_back("-sse"); 128 continue; 129 } 130 131 UpdatedFeaturesVec.push_back(Feature); 132 } 133 134 if (!TargetInfo::initFeatureMap(Features, Diags, CPU, UpdatedFeaturesVec)) 135 return false; 136 137 // Can't do this earlier because we need to be able to explicitly enable 138 // or disable these features and the things that they depend upon. 139 140 // Enable popcnt if sse4.2 is enabled and popcnt is not explicitly disabled. 141 auto I = Features.find("sse4.2"); 142 if (I != Features.end() && I->getValue() && 143 !llvm::is_contained(UpdatedFeaturesVec, "-popcnt")) 144 Features["popcnt"] = true; 145 146 // Additionally, if SSE is enabled and mmx is not explicitly disabled, 147 // then enable MMX. 148 I = Features.find("sse"); 149 if (I != Features.end() && I->getValue() && 150 !llvm::is_contained(UpdatedFeaturesVec, "-mmx")) 151 Features["mmx"] = true; 152 153 // Enable xsave if avx is enabled and xsave is not explicitly disabled. 154 I = Features.find("avx"); 155 if (I != Features.end() && I->getValue() && 156 !llvm::is_contained(UpdatedFeaturesVec, "-xsave")) 157 Features["xsave"] = true; 158 159 // Enable CRC32 if SSE4.2 is enabled and CRC32 is not explicitly disabled. 160 I = Features.find("sse4.2"); 161 if (I != Features.end() && I->getValue() && 162 !llvm::is_contained(UpdatedFeaturesVec, "-crc32")) 163 Features["crc32"] = true; 164 165 return true; 166 } 167 168 void X86TargetInfo::setFeatureEnabled(llvm::StringMap<bool> &Features, 169 StringRef Name, bool Enabled) const { 170 if (Name == "sse4") { 171 // We can get here via the __target__ attribute since that's not controlled 172 // via the -msse4/-mno-sse4 command line alias. Handle this the same way 173 // here - turn on the sse4.2 if enabled, turn off the sse4.1 level if 174 // disabled. 175 if (Enabled) 176 Name = "sse4.2"; 177 else 178 Name = "sse4.1"; 179 } 180 181 Features[Name] = Enabled; 182 llvm::X86::updateImpliedFeatures(Name, Enabled, Features); 183 } 184 185 /// handleTargetFeatures - Perform initialization based on the user 186 /// configured set of features. 187 bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features, 188 DiagnosticsEngine &Diags) { 189 for (const auto &Feature : Features) { 190 if (Feature[0] != '+') 191 continue; 192 193 if (Feature == "+aes") { 194 HasAES = true; 195 } else if (Feature == "+vaes") { 196 HasVAES = true; 197 } else if (Feature == "+pclmul") { 198 HasPCLMUL = true; 199 } else if (Feature == "+vpclmulqdq") { 200 HasVPCLMULQDQ = true; 201 } else if (Feature == "+lzcnt") { 202 HasLZCNT = true; 203 } else if (Feature == "+rdrnd") { 204 HasRDRND = true; 205 } else if (Feature == "+fsgsbase") { 206 HasFSGSBASE = true; 207 } else if (Feature == "+bmi") { 208 HasBMI = true; 209 } else if (Feature == "+bmi2") { 210 HasBMI2 = true; 211 } else if (Feature == "+popcnt") { 212 HasPOPCNT = true; 213 } else if (Feature == "+rtm") { 214 HasRTM = true; 215 } else if (Feature == "+prfchw") { 216 HasPRFCHW = true; 217 } else if (Feature == "+rdseed") { 218 HasRDSEED = true; 219 } else if (Feature == "+adx") { 220 HasADX = true; 221 } else if (Feature == "+tbm") { 222 HasTBM = true; 223 } else if (Feature == "+lwp") { 224 HasLWP = true; 225 } else if (Feature == "+fma") { 226 HasFMA = true; 227 } else if (Feature == "+f16c") { 228 HasF16C = true; 229 } else if (Feature == "+gfni") { 230 HasGFNI = true; 231 } else if (Feature == "+avx512cd") { 232 HasAVX512CD = true; 233 } else if (Feature == "+avx512vpopcntdq") { 234 HasAVX512VPOPCNTDQ = true; 235 } else if (Feature == "+avx512vnni") { 236 HasAVX512VNNI = true; 237 } else if (Feature == "+avx512bf16") { 238 HasAVX512BF16 = true; 239 } else if (Feature == "+avx512er") { 240 HasAVX512ER = true; 241 } else if (Feature == "+avx512fp16") { 242 HasAVX512FP16 = true; 243 HasLegalHalfType = true; 244 } else if (Feature == "+avx512pf") { 245 HasAVX512PF = true; 246 } else if (Feature == "+avx512dq") { 247 HasAVX512DQ = true; 248 } else if (Feature == "+avx512bitalg") { 249 HasAVX512BITALG = true; 250 } else if (Feature == "+avx512bw") { 251 HasAVX512BW = true; 252 } else if (Feature == "+avx512vl") { 253 HasAVX512VL = true; 254 } else if (Feature == "+avx512vbmi") { 255 HasAVX512VBMI = true; 256 } else if (Feature == "+avx512vbmi2") { 257 HasAVX512VBMI2 = true; 258 } else if (Feature == "+avx512ifma") { 259 HasAVX512IFMA = true; 260 } else if (Feature == "+avx512vp2intersect") { 261 HasAVX512VP2INTERSECT = true; 262 } else if (Feature == "+sha") { 263 HasSHA = true; 264 } else if (Feature == "+shstk") { 265 HasSHSTK = true; 266 } else if (Feature == "+movbe") { 267 HasMOVBE = true; 268 } else if (Feature == "+sgx") { 269 HasSGX = true; 270 } else if (Feature == "+cx8") { 271 HasCX8 = true; 272 } else if (Feature == "+cx16") { 273 HasCX16 = true; 274 } else if (Feature == "+fxsr") { 275 HasFXSR = true; 276 } else if (Feature == "+xsave") { 277 HasXSAVE = true; 278 } else if (Feature == "+xsaveopt") { 279 HasXSAVEOPT = true; 280 } else if (Feature == "+xsavec") { 281 HasXSAVEC = true; 282 } else if (Feature == "+xsaves") { 283 HasXSAVES = true; 284 } else if (Feature == "+mwaitx") { 285 HasMWAITX = true; 286 } else if (Feature == "+pku") { 287 HasPKU = true; 288 } else if (Feature == "+clflushopt") { 289 HasCLFLUSHOPT = true; 290 } else if (Feature == "+clwb") { 291 HasCLWB = true; 292 } else if (Feature == "+wbnoinvd") { 293 HasWBNOINVD = true; 294 } else if (Feature == "+prefetchi") { 295 HasPREFETCHI = true; 296 } else if (Feature == "+prefetchwt1") { 297 HasPREFETCHWT1 = true; 298 } else if (Feature == "+clzero") { 299 HasCLZERO = true; 300 } else if (Feature == "+cldemote") { 301 HasCLDEMOTE = true; 302 } else if (Feature == "+rdpid") { 303 HasRDPID = true; 304 } else if (Feature == "+rdpru") { 305 HasRDPRU = true; 306 } else if (Feature == "+kl") { 307 HasKL = true; 308 } else if (Feature == "+widekl") { 309 HasWIDEKL = true; 310 } else if (Feature == "+retpoline-external-thunk") { 311 HasRetpolineExternalThunk = true; 312 } else if (Feature == "+sahf") { 313 HasLAHFSAHF = true; 314 } else if (Feature == "+waitpkg") { 315 HasWAITPKG = true; 316 } else if (Feature == "+movdiri") { 317 HasMOVDIRI = true; 318 } else if (Feature == "+movdir64b") { 319 HasMOVDIR64B = true; 320 } else if (Feature == "+pconfig") { 321 HasPCONFIG = true; 322 } else if (Feature == "+ptwrite") { 323 HasPTWRITE = true; 324 } else if (Feature == "+invpcid") { 325 HasINVPCID = true; 326 } else if (Feature == "+enqcmd") { 327 HasENQCMD = true; 328 } else if (Feature == "+hreset") { 329 HasHRESET = true; 330 } else if (Feature == "+amx-bf16") { 331 HasAMXBF16 = true; 332 } else if (Feature == "+amx-fp16") { 333 HasAMXFP16 = true; 334 } else if (Feature == "+amx-int8") { 335 HasAMXINT8 = true; 336 } else if (Feature == "+amx-tile") { 337 HasAMXTILE = true; 338 } else if (Feature == "+cmpccxadd") { 339 HasCMPCCXADD = true; 340 } else if (Feature == "+raoint") { 341 HasRAOINT = true; 342 } else if (Feature == "+avxifma") { 343 HasAVXIFMA = true; 344 } else if (Feature == "+avxneconvert") { 345 HasAVXNECONVERT= true; 346 } else if (Feature == "+avxvnni") { 347 HasAVXVNNI = true; 348 } else if (Feature == "+avxvnniint8") { 349 HasAVXVNNIINT8 = true; 350 } else if (Feature == "+serialize") { 351 HasSERIALIZE = true; 352 } else if (Feature == "+tsxldtrk") { 353 HasTSXLDTRK = true; 354 } else if (Feature == "+uintr") { 355 HasUINTR = true; 356 } else if (Feature == "+crc32") { 357 HasCRC32 = true; 358 } else if (Feature == "+x87") { 359 HasX87 = true; 360 } 361 362 X86SSEEnum Level = llvm::StringSwitch<X86SSEEnum>(Feature) 363 .Case("+avx512f", AVX512F) 364 .Case("+avx2", AVX2) 365 .Case("+avx", AVX) 366 .Case("+sse4.2", SSE42) 367 .Case("+sse4.1", SSE41) 368 .Case("+ssse3", SSSE3) 369 .Case("+sse3", SSE3) 370 .Case("+sse2", SSE2) 371 .Case("+sse", SSE1) 372 .Default(NoSSE); 373 SSELevel = std::max(SSELevel, Level); 374 375 HasFloat16 = SSELevel >= SSE2; 376 377 HasBFloat16 = SSELevel >= SSE2; 378 379 MMX3DNowEnum ThreeDNowLevel = llvm::StringSwitch<MMX3DNowEnum>(Feature) 380 .Case("+3dnowa", AMD3DNowAthlon) 381 .Case("+3dnow", AMD3DNow) 382 .Case("+mmx", MMX) 383 .Default(NoMMX3DNow); 384 MMX3DNowLevel = std::max(MMX3DNowLevel, ThreeDNowLevel); 385 386 XOPEnum XLevel = llvm::StringSwitch<XOPEnum>(Feature) 387 .Case("+xop", XOP) 388 .Case("+fma4", FMA4) 389 .Case("+sse4a", SSE4A) 390 .Default(NoXOP); 391 XOPLevel = std::max(XOPLevel, XLevel); 392 } 393 394 // LLVM doesn't have a separate switch for fpmath, so only accept it if it 395 // matches the selected sse level. 396 if ((FPMath == FP_SSE && SSELevel < SSE1) || 397 (FPMath == FP_387 && SSELevel >= SSE1)) { 398 Diags.Report(diag::err_target_unsupported_fpmath) 399 << (FPMath == FP_SSE ? "sse" : "387"); 400 return false; 401 } 402 403 SimdDefaultAlign = 404 hasFeature("avx512f") ? 512 : hasFeature("avx") ? 256 : 128; 405 406 // FIXME: We should allow long double type on 32-bits to match with GCC. 407 // This requires backend to be able to lower f80 without x87 first. 408 if (!HasX87 && LongDoubleFormat == &llvm::APFloat::x87DoubleExtended()) 409 HasLongDouble = false; 410 411 return true; 412 } 413 414 /// X86TargetInfo::getTargetDefines - Return the set of the X86-specific macro 415 /// definitions for this particular subtarget. 416 void X86TargetInfo::getTargetDefines(const LangOptions &Opts, 417 MacroBuilder &Builder) const { 418 // Inline assembly supports X86 flag outputs. 419 Builder.defineMacro("__GCC_ASM_FLAG_OUTPUTS__"); 420 421 std::string CodeModel = getTargetOpts().CodeModel; 422 if (CodeModel == "default") 423 CodeModel = "small"; 424 Builder.defineMacro("__code_model_" + CodeModel + "__"); 425 426 // Target identification. 427 if (getTriple().getArch() == llvm::Triple::x86_64) { 428 Builder.defineMacro("__amd64__"); 429 Builder.defineMacro("__amd64"); 430 Builder.defineMacro("__x86_64"); 431 Builder.defineMacro("__x86_64__"); 432 if (getTriple().getArchName() == "x86_64h") { 433 Builder.defineMacro("__x86_64h"); 434 Builder.defineMacro("__x86_64h__"); 435 } 436 } else { 437 DefineStd(Builder, "i386", Opts); 438 } 439 440 Builder.defineMacro("__SEG_GS"); 441 Builder.defineMacro("__SEG_FS"); 442 Builder.defineMacro("__seg_gs", "__attribute__((address_space(256)))"); 443 Builder.defineMacro("__seg_fs", "__attribute__((address_space(257)))"); 444 445 // Subtarget options. 446 // FIXME: We are hard-coding the tune parameters based on the CPU, but they 447 // truly should be based on -mtune options. 448 using namespace llvm::X86; 449 switch (CPU) { 450 case CK_None: 451 break; 452 case CK_i386: 453 // The rest are coming from the i386 define above. 454 Builder.defineMacro("__tune_i386__"); 455 break; 456 case CK_i486: 457 case CK_WinChipC6: 458 case CK_WinChip2: 459 case CK_C3: 460 defineCPUMacros(Builder, "i486"); 461 break; 462 case CK_PentiumMMX: 463 Builder.defineMacro("__pentium_mmx__"); 464 Builder.defineMacro("__tune_pentium_mmx__"); 465 [[fallthrough]]; 466 case CK_i586: 467 case CK_Pentium: 468 defineCPUMacros(Builder, "i586"); 469 defineCPUMacros(Builder, "pentium"); 470 break; 471 case CK_Pentium3: 472 case CK_PentiumM: 473 Builder.defineMacro("__tune_pentium3__"); 474 [[fallthrough]]; 475 case CK_Pentium2: 476 case CK_C3_2: 477 Builder.defineMacro("__tune_pentium2__"); 478 [[fallthrough]]; 479 case CK_PentiumPro: 480 case CK_i686: 481 defineCPUMacros(Builder, "i686"); 482 defineCPUMacros(Builder, "pentiumpro"); 483 break; 484 case CK_Pentium4: 485 defineCPUMacros(Builder, "pentium4"); 486 break; 487 case CK_Yonah: 488 case CK_Prescott: 489 case CK_Nocona: 490 defineCPUMacros(Builder, "nocona"); 491 break; 492 case CK_Core2: 493 case CK_Penryn: 494 defineCPUMacros(Builder, "core2"); 495 break; 496 case CK_Bonnell: 497 defineCPUMacros(Builder, "atom"); 498 break; 499 case CK_Silvermont: 500 defineCPUMacros(Builder, "slm"); 501 break; 502 case CK_Goldmont: 503 defineCPUMacros(Builder, "goldmont"); 504 break; 505 case CK_GoldmontPlus: 506 defineCPUMacros(Builder, "goldmont_plus"); 507 break; 508 case CK_Tremont: 509 defineCPUMacros(Builder, "tremont"); 510 break; 511 case CK_Nehalem: 512 case CK_Westmere: 513 case CK_SandyBridge: 514 case CK_IvyBridge: 515 case CK_Haswell: 516 case CK_Broadwell: 517 case CK_SkylakeClient: 518 case CK_SkylakeServer: 519 case CK_Cascadelake: 520 case CK_Cooperlake: 521 case CK_Cannonlake: 522 case CK_IcelakeClient: 523 case CK_Rocketlake: 524 case CK_IcelakeServer: 525 case CK_Tigerlake: 526 case CK_SapphireRapids: 527 case CK_Alderlake: 528 case CK_Raptorlake: 529 case CK_Meteorlake: 530 case CK_Sierraforest: 531 case CK_Grandridge: 532 case CK_Graniterapids: 533 case CK_Emeraldrapids: 534 // FIXME: Historically, we defined this legacy name, it would be nice to 535 // remove it at some point. We've never exposed fine-grained names for 536 // recent primary x86 CPUs, and we should keep it that way. 537 defineCPUMacros(Builder, "corei7"); 538 break; 539 case CK_KNL: 540 defineCPUMacros(Builder, "knl"); 541 break; 542 case CK_KNM: 543 break; 544 case CK_Lakemont: 545 defineCPUMacros(Builder, "i586", /*Tuning*/false); 546 defineCPUMacros(Builder, "pentium", /*Tuning*/false); 547 Builder.defineMacro("__tune_lakemont__"); 548 break; 549 case CK_K6_2: 550 Builder.defineMacro("__k6_2__"); 551 Builder.defineMacro("__tune_k6_2__"); 552 [[fallthrough]]; 553 case CK_K6_3: 554 if (CPU != CK_K6_2) { // In case of fallthrough 555 // FIXME: GCC may be enabling these in cases where some other k6 556 // architecture is specified but -m3dnow is explicitly provided. The 557 // exact semantics need to be determined and emulated here. 558 Builder.defineMacro("__k6_3__"); 559 Builder.defineMacro("__tune_k6_3__"); 560 } 561 [[fallthrough]]; 562 case CK_K6: 563 defineCPUMacros(Builder, "k6"); 564 break; 565 case CK_Athlon: 566 case CK_AthlonXP: 567 defineCPUMacros(Builder, "athlon"); 568 if (SSELevel != NoSSE) { 569 Builder.defineMacro("__athlon_sse__"); 570 Builder.defineMacro("__tune_athlon_sse__"); 571 } 572 break; 573 case CK_K8: 574 case CK_K8SSE3: 575 case CK_x86_64: 576 defineCPUMacros(Builder, "k8"); 577 break; 578 case CK_x86_64_v2: 579 case CK_x86_64_v3: 580 case CK_x86_64_v4: 581 break; 582 case CK_AMDFAM10: 583 defineCPUMacros(Builder, "amdfam10"); 584 break; 585 case CK_BTVER1: 586 defineCPUMacros(Builder, "btver1"); 587 break; 588 case CK_BTVER2: 589 defineCPUMacros(Builder, "btver2"); 590 break; 591 case CK_BDVER1: 592 defineCPUMacros(Builder, "bdver1"); 593 break; 594 case CK_BDVER2: 595 defineCPUMacros(Builder, "bdver2"); 596 break; 597 case CK_BDVER3: 598 defineCPUMacros(Builder, "bdver3"); 599 break; 600 case CK_BDVER4: 601 defineCPUMacros(Builder, "bdver4"); 602 break; 603 case CK_ZNVER1: 604 defineCPUMacros(Builder, "znver1"); 605 break; 606 case CK_ZNVER2: 607 defineCPUMacros(Builder, "znver2"); 608 break; 609 case CK_ZNVER3: 610 defineCPUMacros(Builder, "znver3"); 611 break; 612 case CK_ZNVER4: 613 defineCPUMacros(Builder, "znver4"); 614 break; 615 case CK_Geode: 616 defineCPUMacros(Builder, "geode"); 617 break; 618 } 619 620 // Target properties. 621 Builder.defineMacro("__REGISTER_PREFIX__", ""); 622 623 // Define __NO_MATH_INLINES on linux/x86 so that we don't get inline 624 // functions in glibc header files that use FP Stack inline asm which the 625 // backend can't deal with (PR879). 626 Builder.defineMacro("__NO_MATH_INLINES"); 627 628 if (HasAES) 629 Builder.defineMacro("__AES__"); 630 631 if (HasVAES) 632 Builder.defineMacro("__VAES__"); 633 634 if (HasPCLMUL) 635 Builder.defineMacro("__PCLMUL__"); 636 637 if (HasVPCLMULQDQ) 638 Builder.defineMacro("__VPCLMULQDQ__"); 639 640 // Note, in 32-bit mode, GCC does not define the macro if -mno-sahf. In LLVM, 641 // the feature flag only applies to 64-bit mode. 642 if (HasLAHFSAHF || getTriple().getArch() == llvm::Triple::x86) 643 Builder.defineMacro("__LAHF_SAHF__"); 644 645 if (HasLZCNT) 646 Builder.defineMacro("__LZCNT__"); 647 648 if (HasRDRND) 649 Builder.defineMacro("__RDRND__"); 650 651 if (HasFSGSBASE) 652 Builder.defineMacro("__FSGSBASE__"); 653 654 if (HasBMI) 655 Builder.defineMacro("__BMI__"); 656 657 if (HasBMI2) 658 Builder.defineMacro("__BMI2__"); 659 660 if (HasPOPCNT) 661 Builder.defineMacro("__POPCNT__"); 662 663 if (HasRTM) 664 Builder.defineMacro("__RTM__"); 665 666 if (HasPRFCHW) 667 Builder.defineMacro("__PRFCHW__"); 668 669 if (HasRDSEED) 670 Builder.defineMacro("__RDSEED__"); 671 672 if (HasADX) 673 Builder.defineMacro("__ADX__"); 674 675 if (HasTBM) 676 Builder.defineMacro("__TBM__"); 677 678 if (HasLWP) 679 Builder.defineMacro("__LWP__"); 680 681 if (HasMWAITX) 682 Builder.defineMacro("__MWAITX__"); 683 684 if (HasMOVBE) 685 Builder.defineMacro("__MOVBE__"); 686 687 switch (XOPLevel) { 688 case XOP: 689 Builder.defineMacro("__XOP__"); 690 [[fallthrough]]; 691 case FMA4: 692 Builder.defineMacro("__FMA4__"); 693 [[fallthrough]]; 694 case SSE4A: 695 Builder.defineMacro("__SSE4A__"); 696 [[fallthrough]]; 697 case NoXOP: 698 break; 699 } 700 701 if (HasFMA) 702 Builder.defineMacro("__FMA__"); 703 704 if (HasF16C) 705 Builder.defineMacro("__F16C__"); 706 707 if (HasGFNI) 708 Builder.defineMacro("__GFNI__"); 709 710 if (HasAVX512CD) 711 Builder.defineMacro("__AVX512CD__"); 712 if (HasAVX512VPOPCNTDQ) 713 Builder.defineMacro("__AVX512VPOPCNTDQ__"); 714 if (HasAVX512VNNI) 715 Builder.defineMacro("__AVX512VNNI__"); 716 if (HasAVX512BF16) 717 Builder.defineMacro("__AVX512BF16__"); 718 if (HasAVX512ER) 719 Builder.defineMacro("__AVX512ER__"); 720 if (HasAVX512FP16) 721 Builder.defineMacro("__AVX512FP16__"); 722 if (HasAVX512PF) 723 Builder.defineMacro("__AVX512PF__"); 724 if (HasAVX512DQ) 725 Builder.defineMacro("__AVX512DQ__"); 726 if (HasAVX512BITALG) 727 Builder.defineMacro("__AVX512BITALG__"); 728 if (HasAVX512BW) 729 Builder.defineMacro("__AVX512BW__"); 730 if (HasAVX512VL) 731 Builder.defineMacro("__AVX512VL__"); 732 if (HasAVX512VBMI) 733 Builder.defineMacro("__AVX512VBMI__"); 734 if (HasAVX512VBMI2) 735 Builder.defineMacro("__AVX512VBMI2__"); 736 if (HasAVX512IFMA) 737 Builder.defineMacro("__AVX512IFMA__"); 738 if (HasAVX512VP2INTERSECT) 739 Builder.defineMacro("__AVX512VP2INTERSECT__"); 740 if (HasSHA) 741 Builder.defineMacro("__SHA__"); 742 743 if (HasFXSR) 744 Builder.defineMacro("__FXSR__"); 745 if (HasXSAVE) 746 Builder.defineMacro("__XSAVE__"); 747 if (HasXSAVEOPT) 748 Builder.defineMacro("__XSAVEOPT__"); 749 if (HasXSAVEC) 750 Builder.defineMacro("__XSAVEC__"); 751 if (HasXSAVES) 752 Builder.defineMacro("__XSAVES__"); 753 if (HasPKU) 754 Builder.defineMacro("__PKU__"); 755 if (HasCLFLUSHOPT) 756 Builder.defineMacro("__CLFLUSHOPT__"); 757 if (HasCLWB) 758 Builder.defineMacro("__CLWB__"); 759 if (HasWBNOINVD) 760 Builder.defineMacro("__WBNOINVD__"); 761 if (HasSHSTK) 762 Builder.defineMacro("__SHSTK__"); 763 if (HasSGX) 764 Builder.defineMacro("__SGX__"); 765 if (HasPREFETCHI) 766 Builder.defineMacro("__PREFETCHI__"); 767 if (HasPREFETCHWT1) 768 Builder.defineMacro("__PREFETCHWT1__"); 769 if (HasCLZERO) 770 Builder.defineMacro("__CLZERO__"); 771 if (HasKL) 772 Builder.defineMacro("__KL__"); 773 if (HasWIDEKL) 774 Builder.defineMacro("__WIDEKL__"); 775 if (HasRDPID) 776 Builder.defineMacro("__RDPID__"); 777 if (HasRDPRU) 778 Builder.defineMacro("__RDPRU__"); 779 if (HasCLDEMOTE) 780 Builder.defineMacro("__CLDEMOTE__"); 781 if (HasWAITPKG) 782 Builder.defineMacro("__WAITPKG__"); 783 if (HasMOVDIRI) 784 Builder.defineMacro("__MOVDIRI__"); 785 if (HasMOVDIR64B) 786 Builder.defineMacro("__MOVDIR64B__"); 787 if (HasPCONFIG) 788 Builder.defineMacro("__PCONFIG__"); 789 if (HasPTWRITE) 790 Builder.defineMacro("__PTWRITE__"); 791 if (HasINVPCID) 792 Builder.defineMacro("__INVPCID__"); 793 if (HasENQCMD) 794 Builder.defineMacro("__ENQCMD__"); 795 if (HasHRESET) 796 Builder.defineMacro("__HRESET__"); 797 if (HasAMXTILE) 798 Builder.defineMacro("__AMX_TILE__"); 799 if (HasAMXINT8) 800 Builder.defineMacro("__AMX_INT8__"); 801 if (HasAMXBF16) 802 Builder.defineMacro("__AMX_BF16__"); 803 if (HasAMXFP16) 804 Builder.defineMacro("__AMX_FP16__"); 805 if (HasCMPCCXADD) 806 Builder.defineMacro("__CMPCCXADD__"); 807 if (HasRAOINT) 808 Builder.defineMacro("__RAOINT__"); 809 if (HasAVXIFMA) 810 Builder.defineMacro("__AVXIFMA__"); 811 if (HasAVXNECONVERT) 812 Builder.defineMacro("__AVXNECONVERT__"); 813 if (HasAVXVNNI) 814 Builder.defineMacro("__AVXVNNI__"); 815 if (HasAVXVNNIINT8) 816 Builder.defineMacro("__AVXVNNIINT8__"); 817 if (HasSERIALIZE) 818 Builder.defineMacro("__SERIALIZE__"); 819 if (HasTSXLDTRK) 820 Builder.defineMacro("__TSXLDTRK__"); 821 if (HasUINTR) 822 Builder.defineMacro("__UINTR__"); 823 if (HasCRC32) 824 Builder.defineMacro("__CRC32__"); 825 826 // Each case falls through to the previous one here. 827 switch (SSELevel) { 828 case AVX512F: 829 Builder.defineMacro("__AVX512F__"); 830 [[fallthrough]]; 831 case AVX2: 832 Builder.defineMacro("__AVX2__"); 833 [[fallthrough]]; 834 case AVX: 835 Builder.defineMacro("__AVX__"); 836 [[fallthrough]]; 837 case SSE42: 838 Builder.defineMacro("__SSE4_2__"); 839 [[fallthrough]]; 840 case SSE41: 841 Builder.defineMacro("__SSE4_1__"); 842 [[fallthrough]]; 843 case SSSE3: 844 Builder.defineMacro("__SSSE3__"); 845 [[fallthrough]]; 846 case SSE3: 847 Builder.defineMacro("__SSE3__"); 848 [[fallthrough]]; 849 case SSE2: 850 Builder.defineMacro("__SSE2__"); 851 Builder.defineMacro("__SSE2_MATH__"); // -mfp-math=sse always implied. 852 [[fallthrough]]; 853 case SSE1: 854 Builder.defineMacro("__SSE__"); 855 Builder.defineMacro("__SSE_MATH__"); // -mfp-math=sse always implied. 856 [[fallthrough]]; 857 case NoSSE: 858 break; 859 } 860 861 if (Opts.MicrosoftExt && getTriple().getArch() == llvm::Triple::x86) { 862 switch (SSELevel) { 863 case AVX512F: 864 case AVX2: 865 case AVX: 866 case SSE42: 867 case SSE41: 868 case SSSE3: 869 case SSE3: 870 case SSE2: 871 Builder.defineMacro("_M_IX86_FP", Twine(2)); 872 break; 873 case SSE1: 874 Builder.defineMacro("_M_IX86_FP", Twine(1)); 875 break; 876 default: 877 Builder.defineMacro("_M_IX86_FP", Twine(0)); 878 break; 879 } 880 } 881 882 // Each case falls through to the previous one here. 883 switch (MMX3DNowLevel) { 884 case AMD3DNowAthlon: 885 Builder.defineMacro("__3dNOW_A__"); 886 [[fallthrough]]; 887 case AMD3DNow: 888 Builder.defineMacro("__3dNOW__"); 889 [[fallthrough]]; 890 case MMX: 891 Builder.defineMacro("__MMX__"); 892 [[fallthrough]]; 893 case NoMMX3DNow: 894 break; 895 } 896 897 if (CPU >= CK_i486 || CPU == CK_None) { 898 Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1"); 899 Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2"); 900 Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4"); 901 } 902 if (HasCX8) 903 Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8"); 904 if (HasCX16 && getTriple().getArch() == llvm::Triple::x86_64) 905 Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_16"); 906 907 if (HasFloat128) 908 Builder.defineMacro("__SIZEOF_FLOAT128__", "16"); 909 } 910 911 bool X86TargetInfo::isValidFeatureName(StringRef Name) const { 912 return llvm::StringSwitch<bool>(Name) 913 .Case("3dnow", true) 914 .Case("3dnowa", true) 915 .Case("adx", true) 916 .Case("aes", true) 917 .Case("amx-bf16", true) 918 .Case("amx-fp16", true) 919 .Case("amx-int8", true) 920 .Case("amx-tile", true) 921 .Case("avx", true) 922 .Case("avx2", true) 923 .Case("avx512f", true) 924 .Case("avx512cd", true) 925 .Case("avx512vpopcntdq", true) 926 .Case("avx512vnni", true) 927 .Case("avx512bf16", true) 928 .Case("avx512er", true) 929 .Case("avx512fp16", true) 930 .Case("avx512pf", true) 931 .Case("avx512dq", true) 932 .Case("avx512bitalg", true) 933 .Case("avx512bw", true) 934 .Case("avx512vl", true) 935 .Case("avx512vbmi", true) 936 .Case("avx512vbmi2", true) 937 .Case("avx512ifma", true) 938 .Case("avx512vp2intersect", true) 939 .Case("avxifma", true) 940 .Case("avxneconvert", true) 941 .Case("avxvnni", true) 942 .Case("avxvnniint8", true) 943 .Case("bmi", true) 944 .Case("bmi2", true) 945 .Case("cldemote", true) 946 .Case("clflushopt", true) 947 .Case("clwb", true) 948 .Case("clzero", true) 949 .Case("cmpccxadd", true) 950 .Case("crc32", true) 951 .Case("cx16", true) 952 .Case("enqcmd", true) 953 .Case("f16c", true) 954 .Case("fma", true) 955 .Case("fma4", true) 956 .Case("fsgsbase", true) 957 .Case("fxsr", true) 958 .Case("general-regs-only", true) 959 .Case("gfni", true) 960 .Case("hreset", true) 961 .Case("invpcid", true) 962 .Case("kl", true) 963 .Case("widekl", true) 964 .Case("lwp", true) 965 .Case("lzcnt", true) 966 .Case("mmx", true) 967 .Case("movbe", true) 968 .Case("movdiri", true) 969 .Case("movdir64b", true) 970 .Case("mwaitx", true) 971 .Case("pclmul", true) 972 .Case("pconfig", true) 973 .Case("pku", true) 974 .Case("popcnt", true) 975 .Case("prefetchi", true) 976 .Case("prefetchwt1", true) 977 .Case("prfchw", true) 978 .Case("ptwrite", true) 979 .Case("raoint", true) 980 .Case("rdpid", true) 981 .Case("rdpru", true) 982 .Case("rdrnd", true) 983 .Case("rdseed", true) 984 .Case("rtm", true) 985 .Case("sahf", true) 986 .Case("serialize", true) 987 .Case("sgx", true) 988 .Case("sha", true) 989 .Case("shstk", true) 990 .Case("sse", true) 991 .Case("sse2", true) 992 .Case("sse3", true) 993 .Case("ssse3", true) 994 .Case("sse4", true) 995 .Case("sse4.1", true) 996 .Case("sse4.2", true) 997 .Case("sse4a", true) 998 .Case("tbm", true) 999 .Case("tsxldtrk", true) 1000 .Case("uintr", true) 1001 .Case("vaes", true) 1002 .Case("vpclmulqdq", true) 1003 .Case("wbnoinvd", true) 1004 .Case("waitpkg", true) 1005 .Case("x87", true) 1006 .Case("xop", true) 1007 .Case("xsave", true) 1008 .Case("xsavec", true) 1009 .Case("xsaves", true) 1010 .Case("xsaveopt", true) 1011 .Default(false); 1012 } 1013 1014 bool X86TargetInfo::hasFeature(StringRef Feature) const { 1015 return llvm::StringSwitch<bool>(Feature) 1016 .Case("adx", HasADX) 1017 .Case("aes", HasAES) 1018 .Case("amx-bf16", HasAMXBF16) 1019 .Case("amx-fp16", HasAMXFP16) 1020 .Case("amx-int8", HasAMXINT8) 1021 .Case("amx-tile", HasAMXTILE) 1022 .Case("avx", SSELevel >= AVX) 1023 .Case("avx2", SSELevel >= AVX2) 1024 .Case("avx512f", SSELevel >= AVX512F) 1025 .Case("avx512cd", HasAVX512CD) 1026 .Case("avx512vpopcntdq", HasAVX512VPOPCNTDQ) 1027 .Case("avx512vnni", HasAVX512VNNI) 1028 .Case("avx512bf16", HasAVX512BF16) 1029 .Case("avx512er", HasAVX512ER) 1030 .Case("avx512fp16", HasAVX512FP16) 1031 .Case("avx512pf", HasAVX512PF) 1032 .Case("avx512dq", HasAVX512DQ) 1033 .Case("avx512bitalg", HasAVX512BITALG) 1034 .Case("avx512bw", HasAVX512BW) 1035 .Case("avx512vl", HasAVX512VL) 1036 .Case("avx512vbmi", HasAVX512VBMI) 1037 .Case("avx512vbmi2", HasAVX512VBMI2) 1038 .Case("avx512ifma", HasAVX512IFMA) 1039 .Case("avx512vp2intersect", HasAVX512VP2INTERSECT) 1040 .Case("avxifma", HasAVXIFMA) 1041 .Case("avxneconvert", HasAVXNECONVERT) 1042 .Case("avxvnni", HasAVXVNNI) 1043 .Case("avxvnniint8", HasAVXVNNIINT8) 1044 .Case("bmi", HasBMI) 1045 .Case("bmi2", HasBMI2) 1046 .Case("cldemote", HasCLDEMOTE) 1047 .Case("clflushopt", HasCLFLUSHOPT) 1048 .Case("clwb", HasCLWB) 1049 .Case("clzero", HasCLZERO) 1050 .Case("cmpccxadd", HasCMPCCXADD) 1051 .Case("crc32", HasCRC32) 1052 .Case("cx8", HasCX8) 1053 .Case("cx16", HasCX16) 1054 .Case("enqcmd", HasENQCMD) 1055 .Case("f16c", HasF16C) 1056 .Case("fma", HasFMA) 1057 .Case("fma4", XOPLevel >= FMA4) 1058 .Case("fsgsbase", HasFSGSBASE) 1059 .Case("fxsr", HasFXSR) 1060 .Case("gfni", HasGFNI) 1061 .Case("hreset", HasHRESET) 1062 .Case("invpcid", HasINVPCID) 1063 .Case("kl", HasKL) 1064 .Case("widekl", HasWIDEKL) 1065 .Case("lwp", HasLWP) 1066 .Case("lzcnt", HasLZCNT) 1067 .Case("mm3dnow", MMX3DNowLevel >= AMD3DNow) 1068 .Case("mm3dnowa", MMX3DNowLevel >= AMD3DNowAthlon) 1069 .Case("mmx", MMX3DNowLevel >= MMX) 1070 .Case("movbe", HasMOVBE) 1071 .Case("movdiri", HasMOVDIRI) 1072 .Case("movdir64b", HasMOVDIR64B) 1073 .Case("mwaitx", HasMWAITX) 1074 .Case("pclmul", HasPCLMUL) 1075 .Case("pconfig", HasPCONFIG) 1076 .Case("pku", HasPKU) 1077 .Case("popcnt", HasPOPCNT) 1078 .Case("prefetchi", HasPREFETCHI) 1079 .Case("prefetchwt1", HasPREFETCHWT1) 1080 .Case("prfchw", HasPRFCHW) 1081 .Case("ptwrite", HasPTWRITE) 1082 .Case("raoint", HasRAOINT) 1083 .Case("rdpid", HasRDPID) 1084 .Case("rdpru", HasRDPRU) 1085 .Case("rdrnd", HasRDRND) 1086 .Case("rdseed", HasRDSEED) 1087 .Case("retpoline-external-thunk", HasRetpolineExternalThunk) 1088 .Case("rtm", HasRTM) 1089 .Case("sahf", HasLAHFSAHF) 1090 .Case("serialize", HasSERIALIZE) 1091 .Case("sgx", HasSGX) 1092 .Case("sha", HasSHA) 1093 .Case("shstk", HasSHSTK) 1094 .Case("sse", SSELevel >= SSE1) 1095 .Case("sse2", SSELevel >= SSE2) 1096 .Case("sse3", SSELevel >= SSE3) 1097 .Case("ssse3", SSELevel >= SSSE3) 1098 .Case("sse4.1", SSELevel >= SSE41) 1099 .Case("sse4.2", SSELevel >= SSE42) 1100 .Case("sse4a", XOPLevel >= SSE4A) 1101 .Case("tbm", HasTBM) 1102 .Case("tsxldtrk", HasTSXLDTRK) 1103 .Case("uintr", HasUINTR) 1104 .Case("vaes", HasVAES) 1105 .Case("vpclmulqdq", HasVPCLMULQDQ) 1106 .Case("wbnoinvd", HasWBNOINVD) 1107 .Case("waitpkg", HasWAITPKG) 1108 .Case("x86", true) 1109 .Case("x86_32", getTriple().getArch() == llvm::Triple::x86) 1110 .Case("x86_64", getTriple().getArch() == llvm::Triple::x86_64) 1111 .Case("x87", HasX87) 1112 .Case("xop", XOPLevel >= XOP) 1113 .Case("xsave", HasXSAVE) 1114 .Case("xsavec", HasXSAVEC) 1115 .Case("xsaves", HasXSAVES) 1116 .Case("xsaveopt", HasXSAVEOPT) 1117 .Default(false); 1118 } 1119 1120 // We can't use a generic validation scheme for the features accepted here 1121 // versus subtarget features accepted in the target attribute because the 1122 // bitfield structure that's initialized in the runtime only supports the 1123 // below currently rather than the full range of subtarget features. (See 1124 // X86TargetInfo::hasFeature for a somewhat comprehensive list). 1125 bool X86TargetInfo::validateCpuSupports(StringRef FeatureStr) const { 1126 return llvm::StringSwitch<bool>(FeatureStr) 1127 #define X86_FEATURE_COMPAT(ENUM, STR, PRIORITY) .Case(STR, true) 1128 #include "llvm/TargetParser/X86TargetParser.def" 1129 .Default(false); 1130 } 1131 1132 static llvm::X86::ProcessorFeatures getFeature(StringRef Name) { 1133 return llvm::StringSwitch<llvm::X86::ProcessorFeatures>(Name) 1134 #define X86_FEATURE_COMPAT(ENUM, STR, PRIORITY) \ 1135 .Case(STR, llvm::X86::FEATURE_##ENUM) 1136 1137 #include "llvm/TargetParser/X86TargetParser.def" 1138 ; 1139 // Note, this function should only be used after ensuring the value is 1140 // correct, so it asserts if the value is out of range. 1141 } 1142 1143 unsigned X86TargetInfo::multiVersionSortPriority(StringRef Name) const { 1144 // Valid CPUs have a 'key feature' that compares just better than its key 1145 // feature. 1146 using namespace llvm::X86; 1147 CPUKind Kind = parseArchX86(Name); 1148 if (Kind != CK_None) { 1149 ProcessorFeatures KeyFeature = getKeyFeature(Kind); 1150 return (getFeaturePriority(KeyFeature) << 1) + 1; 1151 } 1152 1153 // Now we know we have a feature, so get its priority and shift it a few so 1154 // that we have sufficient room for the CPUs (above). 1155 return getFeaturePriority(getFeature(Name)) << 1; 1156 } 1157 1158 bool X86TargetInfo::validateCPUSpecificCPUDispatch(StringRef Name) const { 1159 return llvm::StringSwitch<bool>(Name) 1160 #define CPU_SPECIFIC(NAME, TUNE_NAME, MANGLING, FEATURES) .Case(NAME, true) 1161 #define CPU_SPECIFIC_ALIAS(NEW_NAME, TUNE_NAME, NAME) .Case(NEW_NAME, true) 1162 #include "llvm/TargetParser/X86TargetParser.def" 1163 .Default(false); 1164 } 1165 1166 static StringRef CPUSpecificCPUDispatchNameDealias(StringRef Name) { 1167 return llvm::StringSwitch<StringRef>(Name) 1168 #define CPU_SPECIFIC_ALIAS(NEW_NAME, TUNE_NAME, NAME) .Case(NEW_NAME, NAME) 1169 #include "llvm/TargetParser/X86TargetParser.def" 1170 .Default(Name); 1171 } 1172 1173 char X86TargetInfo::CPUSpecificManglingCharacter(StringRef Name) const { 1174 return llvm::StringSwitch<char>(CPUSpecificCPUDispatchNameDealias(Name)) 1175 #define CPU_SPECIFIC(NAME, TUNE_NAME, MANGLING, FEATURES) .Case(NAME, MANGLING) 1176 #include "llvm/TargetParser/X86TargetParser.def" 1177 .Default(0); 1178 } 1179 1180 void X86TargetInfo::getCPUSpecificCPUDispatchFeatures( 1181 StringRef Name, llvm::SmallVectorImpl<StringRef> &Features) const { 1182 StringRef WholeList = 1183 llvm::StringSwitch<StringRef>(CPUSpecificCPUDispatchNameDealias(Name)) 1184 #define CPU_SPECIFIC(NAME, TUNE_NAME, MANGLING, FEATURES) .Case(NAME, FEATURES) 1185 #include "llvm/TargetParser/X86TargetParser.def" 1186 .Default(""); 1187 WholeList.split(Features, ',', /*MaxSplit=*/-1, /*KeepEmpty=*/false); 1188 } 1189 1190 StringRef X86TargetInfo::getCPUSpecificTuneName(StringRef Name) const { 1191 return llvm::StringSwitch<StringRef>(Name) 1192 #define CPU_SPECIFIC(NAME, TUNE_NAME, MANGLING, FEATURES) .Case(NAME, TUNE_NAME) 1193 #define CPU_SPECIFIC_ALIAS(NEW_NAME, TUNE_NAME, NAME) .Case(NEW_NAME, TUNE_NAME) 1194 #include "llvm/TargetParser/X86TargetParser.def" 1195 .Default(""); 1196 } 1197 1198 // We can't use a generic validation scheme for the cpus accepted here 1199 // versus subtarget cpus accepted in the target attribute because the 1200 // variables intitialized by the runtime only support the below currently 1201 // rather than the full range of cpus. 1202 bool X86TargetInfo::validateCpuIs(StringRef FeatureStr) const { 1203 return llvm::StringSwitch<bool>(FeatureStr) 1204 #define X86_VENDOR(ENUM, STRING) .Case(STRING, true) 1205 #define X86_CPU_TYPE_ALIAS(ENUM, ALIAS) .Case(ALIAS, true) 1206 #define X86_CPU_TYPE(ENUM, STR) .Case(STR, true) 1207 #define X86_CPU_SUBTYPE_ALIAS(ENUM, ALIAS) .Case(ALIAS, true) 1208 #define X86_CPU_SUBTYPE(ENUM, STR) .Case(STR, true) 1209 #include "llvm/TargetParser/X86TargetParser.def" 1210 .Default(false); 1211 } 1212 1213 static unsigned matchAsmCCConstraint(const char *&Name) { 1214 auto RV = llvm::StringSwitch<unsigned>(Name) 1215 .Case("@cca", 4) 1216 .Case("@ccae", 5) 1217 .Case("@ccb", 4) 1218 .Case("@ccbe", 5) 1219 .Case("@ccc", 4) 1220 .Case("@cce", 4) 1221 .Case("@ccz", 4) 1222 .Case("@ccg", 4) 1223 .Case("@ccge", 5) 1224 .Case("@ccl", 4) 1225 .Case("@ccle", 5) 1226 .Case("@ccna", 5) 1227 .Case("@ccnae", 6) 1228 .Case("@ccnb", 5) 1229 .Case("@ccnbe", 6) 1230 .Case("@ccnc", 5) 1231 .Case("@ccne", 5) 1232 .Case("@ccnz", 5) 1233 .Case("@ccng", 5) 1234 .Case("@ccnge", 6) 1235 .Case("@ccnl", 5) 1236 .Case("@ccnle", 6) 1237 .Case("@ccno", 5) 1238 .Case("@ccnp", 5) 1239 .Case("@ccns", 5) 1240 .Case("@cco", 4) 1241 .Case("@ccp", 4) 1242 .Case("@ccs", 4) 1243 .Default(0); 1244 return RV; 1245 } 1246 1247 bool X86TargetInfo::validateAsmConstraint( 1248 const char *&Name, TargetInfo::ConstraintInfo &Info) const { 1249 switch (*Name) { 1250 default: 1251 return false; 1252 // Constant constraints. 1253 case 'e': // 32-bit signed integer constant for use with sign-extending x86_64 1254 // instructions. 1255 case 'Z': // 32-bit unsigned integer constant for use with zero-extending 1256 // x86_64 instructions. 1257 case 's': 1258 Info.setRequiresImmediate(); 1259 return true; 1260 case 'I': 1261 Info.setRequiresImmediate(0, 31); 1262 return true; 1263 case 'J': 1264 Info.setRequiresImmediate(0, 63); 1265 return true; 1266 case 'K': 1267 Info.setRequiresImmediate(-128, 127); 1268 return true; 1269 case 'L': 1270 Info.setRequiresImmediate({int(0xff), int(0xffff), int(0xffffffff)}); 1271 return true; 1272 case 'M': 1273 Info.setRequiresImmediate(0, 3); 1274 return true; 1275 case 'N': 1276 Info.setRequiresImmediate(0, 255); 1277 return true; 1278 case 'O': 1279 Info.setRequiresImmediate(0, 127); 1280 return true; 1281 // Register constraints. 1282 case 'Y': // 'Y' is the first character for several 2-character constraints. 1283 // Shift the pointer to the second character of the constraint. 1284 Name++; 1285 switch (*Name) { 1286 default: 1287 return false; 1288 case 'z': // First SSE register. 1289 case '2': 1290 case 't': // Any SSE register, when SSE2 is enabled. 1291 case 'i': // Any SSE register, when SSE2 and inter-unit moves enabled. 1292 case 'm': // Any MMX register, when inter-unit moves enabled. 1293 case 'k': // AVX512 arch mask registers: k1-k7. 1294 Info.setAllowsRegister(); 1295 return true; 1296 } 1297 case 'f': // Any x87 floating point stack register. 1298 // Constraint 'f' cannot be used for output operands. 1299 if (Info.ConstraintStr[0] == '=') 1300 return false; 1301 Info.setAllowsRegister(); 1302 return true; 1303 case 'a': // eax. 1304 case 'b': // ebx. 1305 case 'c': // ecx. 1306 case 'd': // edx. 1307 case 'S': // esi. 1308 case 'D': // edi. 1309 case 'A': // edx:eax. 1310 case 't': // Top of floating point stack. 1311 case 'u': // Second from top of floating point stack. 1312 case 'q': // Any register accessible as [r]l: a, b, c, and d. 1313 case 'y': // Any MMX register. 1314 case 'v': // Any {X,Y,Z}MM register (Arch & context dependent) 1315 case 'x': // Any SSE register. 1316 case 'k': // Any AVX512 mask register (same as Yk, additionally allows k0 1317 // for intermideate k reg operations). 1318 case 'Q': // Any register accessible as [r]h: a, b, c, and d. 1319 case 'R': // "Legacy" registers: ax, bx, cx, dx, di, si, sp, bp. 1320 case 'l': // "Index" registers: any general register that can be used as an 1321 // index in a base+index memory access. 1322 Info.setAllowsRegister(); 1323 return true; 1324 // Floating point constant constraints. 1325 case 'C': // SSE floating point constant. 1326 case 'G': // x87 floating point constant. 1327 return true; 1328 case '@': 1329 // CC condition changes. 1330 if (auto Len = matchAsmCCConstraint(Name)) { 1331 Name += Len - 1; 1332 Info.setAllowsRegister(); 1333 return true; 1334 } 1335 return false; 1336 } 1337 } 1338 1339 // Below is based on the following information: 1340 // +------------------------------------+-------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ 1341 // | Processor Name | Cache Line Size (Bytes) | Source | 1342 // +------------------------------------+-------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ 1343 // | i386 | 64 | https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-optimization-manual.pdf | 1344 // | i486 | 16 | "four doublewords" (doubleword = 32 bits, 4 bits * 32 bits = 16 bytes) https://en.wikichip.org/w/images/d/d3/i486_MICROPROCESSOR_HARDWARE_REFERENCE_MANUAL_%281990%29.pdf and http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.126.4216&rep=rep1&type=pdf (page 29) | 1345 // | i586/Pentium MMX | 32 | https://www.7-cpu.com/cpu/P-MMX.html | 1346 // | i686/Pentium | 32 | https://www.7-cpu.com/cpu/P6.html | 1347 // | Netburst/Pentium4 | 64 | https://www.7-cpu.com/cpu/P4-180.html | 1348 // | Atom | 64 | https://www.7-cpu.com/cpu/Atom.html | 1349 // | Westmere | 64 | https://en.wikichip.org/wiki/intel/microarchitectures/sandy_bridge_(client) "Cache Architecture" | 1350 // | Sandy Bridge | 64 | https://en.wikipedia.org/wiki/Sandy_Bridge and https://www.7-cpu.com/cpu/SandyBridge.html | 1351 // | Ivy Bridge | 64 | https://blog.stuffedcow.net/2013/01/ivb-cache-replacement/ and https://www.7-cpu.com/cpu/IvyBridge.html | 1352 // | Haswell | 64 | https://www.7-cpu.com/cpu/Haswell.html | 1353 // | Boadwell | 64 | https://www.7-cpu.com/cpu/Broadwell.html | 1354 // | Skylake (including skylake-avx512) | 64 | https://www.nas.nasa.gov/hecc/support/kb/skylake-processors_550.html "Cache Hierarchy" | 1355 // | Cascade Lake | 64 | https://www.nas.nasa.gov/hecc/support/kb/cascade-lake-processors_579.html "Cache Hierarchy" | 1356 // | Skylake | 64 | https://en.wikichip.org/wiki/intel/microarchitectures/kaby_lake "Memory Hierarchy" | 1357 // | Ice Lake | 64 | https://www.7-cpu.com/cpu/Ice_Lake.html | 1358 // | Knights Landing | 64 | https://software.intel.com/en-us/articles/intel-xeon-phi-processor-7200-family-memory-management-optimizations "The Intel® Xeon Phi™ Processor Architecture" | 1359 // | Knights Mill | 64 | https://software.intel.com/sites/default/files/managed/9e/bc/64-ia-32-architectures-optimization-manual.pdf?countrylabel=Colombia "2.5.5.2 L1 DCache " | 1360 // +------------------------------------+-------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ 1361 std::optional<unsigned> X86TargetInfo::getCPUCacheLineSize() const { 1362 using namespace llvm::X86; 1363 switch (CPU) { 1364 // i386 1365 case CK_i386: 1366 // i486 1367 case CK_i486: 1368 case CK_WinChipC6: 1369 case CK_WinChip2: 1370 case CK_C3: 1371 // Lakemont 1372 case CK_Lakemont: 1373 return 16; 1374 1375 // i586 1376 case CK_i586: 1377 case CK_Pentium: 1378 case CK_PentiumMMX: 1379 // i686 1380 case CK_PentiumPro: 1381 case CK_i686: 1382 case CK_Pentium2: 1383 case CK_Pentium3: 1384 case CK_PentiumM: 1385 case CK_C3_2: 1386 // K6 1387 case CK_K6: 1388 case CK_K6_2: 1389 case CK_K6_3: 1390 // Geode 1391 case CK_Geode: 1392 return 32; 1393 1394 // Netburst 1395 case CK_Pentium4: 1396 case CK_Prescott: 1397 case CK_Nocona: 1398 // Atom 1399 case CK_Bonnell: 1400 case CK_Silvermont: 1401 case CK_Goldmont: 1402 case CK_GoldmontPlus: 1403 case CK_Tremont: 1404 1405 case CK_Westmere: 1406 case CK_SandyBridge: 1407 case CK_IvyBridge: 1408 case CK_Haswell: 1409 case CK_Broadwell: 1410 case CK_SkylakeClient: 1411 case CK_SkylakeServer: 1412 case CK_Cascadelake: 1413 case CK_Nehalem: 1414 case CK_Cooperlake: 1415 case CK_Cannonlake: 1416 case CK_Tigerlake: 1417 case CK_SapphireRapids: 1418 case CK_IcelakeClient: 1419 case CK_Rocketlake: 1420 case CK_IcelakeServer: 1421 case CK_Alderlake: 1422 case CK_Raptorlake: 1423 case CK_Meteorlake: 1424 case CK_Sierraforest: 1425 case CK_Grandridge: 1426 case CK_Graniterapids: 1427 case CK_Emeraldrapids: 1428 case CK_KNL: 1429 case CK_KNM: 1430 // K7 1431 case CK_Athlon: 1432 case CK_AthlonXP: 1433 // K8 1434 case CK_K8: 1435 case CK_K8SSE3: 1436 case CK_AMDFAM10: 1437 // Bobcat 1438 case CK_BTVER1: 1439 case CK_BTVER2: 1440 // Bulldozer 1441 case CK_BDVER1: 1442 case CK_BDVER2: 1443 case CK_BDVER3: 1444 case CK_BDVER4: 1445 // Zen 1446 case CK_ZNVER1: 1447 case CK_ZNVER2: 1448 case CK_ZNVER3: 1449 case CK_ZNVER4: 1450 // Deprecated 1451 case CK_x86_64: 1452 case CK_x86_64_v2: 1453 case CK_x86_64_v3: 1454 case CK_x86_64_v4: 1455 case CK_Yonah: 1456 case CK_Penryn: 1457 case CK_Core2: 1458 return 64; 1459 1460 // The following currently have unknown cache line sizes (but they are probably all 64): 1461 // Core 1462 case CK_None: 1463 return std::nullopt; 1464 } 1465 llvm_unreachable("Unknown CPU kind"); 1466 } 1467 1468 bool X86TargetInfo::validateOutputSize(const llvm::StringMap<bool> &FeatureMap, 1469 StringRef Constraint, 1470 unsigned Size) const { 1471 // Strip off constraint modifiers. 1472 while (Constraint[0] == '=' || Constraint[0] == '+' || Constraint[0] == '&') 1473 Constraint = Constraint.substr(1); 1474 1475 return validateOperandSize(FeatureMap, Constraint, Size); 1476 } 1477 1478 bool X86TargetInfo::validateInputSize(const llvm::StringMap<bool> &FeatureMap, 1479 StringRef Constraint, 1480 unsigned Size) const { 1481 return validateOperandSize(FeatureMap, Constraint, Size); 1482 } 1483 1484 bool X86TargetInfo::validateOperandSize(const llvm::StringMap<bool> &FeatureMap, 1485 StringRef Constraint, 1486 unsigned Size) const { 1487 switch (Constraint[0]) { 1488 default: 1489 break; 1490 case 'k': 1491 // Registers k0-k7 (AVX512) size limit is 64 bit. 1492 case 'y': 1493 return Size <= 64; 1494 case 'f': 1495 case 't': 1496 case 'u': 1497 return Size <= 128; 1498 case 'Y': 1499 // 'Y' is the first character for several 2-character constraints. 1500 switch (Constraint[1]) { 1501 default: 1502 return false; 1503 case 'm': 1504 // 'Ym' is synonymous with 'y'. 1505 case 'k': 1506 return Size <= 64; 1507 case 'z': 1508 // XMM0/YMM/ZMM0 1509 if (hasFeatureEnabled(FeatureMap, "avx512f")) 1510 // ZMM0 can be used if target supports AVX512F. 1511 return Size <= 512U; 1512 else if (hasFeatureEnabled(FeatureMap, "avx")) 1513 // YMM0 can be used if target supports AVX. 1514 return Size <= 256U; 1515 else if (hasFeatureEnabled(FeatureMap, "sse")) 1516 return Size <= 128U; 1517 return false; 1518 case 'i': 1519 case 't': 1520 case '2': 1521 // 'Yi','Yt','Y2' are synonymous with 'x' when SSE2 is enabled. 1522 if (SSELevel < SSE2) 1523 return false; 1524 break; 1525 } 1526 break; 1527 case 'v': 1528 case 'x': 1529 if (hasFeatureEnabled(FeatureMap, "avx512f")) 1530 // 512-bit zmm registers can be used if target supports AVX512F. 1531 return Size <= 512U; 1532 else if (hasFeatureEnabled(FeatureMap, "avx")) 1533 // 256-bit ymm registers can be used if target supports AVX. 1534 return Size <= 256U; 1535 return Size <= 128U; 1536 1537 } 1538 1539 return true; 1540 } 1541 1542 std::string X86TargetInfo::convertConstraint(const char *&Constraint) const { 1543 switch (*Constraint) { 1544 case '@': 1545 if (auto Len = matchAsmCCConstraint(Constraint)) { 1546 std::string Converted = "{" + std::string(Constraint, Len) + "}"; 1547 Constraint += Len - 1; 1548 return Converted; 1549 } 1550 return std::string(1, *Constraint); 1551 case 'a': 1552 return std::string("{ax}"); 1553 case 'b': 1554 return std::string("{bx}"); 1555 case 'c': 1556 return std::string("{cx}"); 1557 case 'd': 1558 return std::string("{dx}"); 1559 case 'S': 1560 return std::string("{si}"); 1561 case 'D': 1562 return std::string("{di}"); 1563 case 'p': // Keep 'p' constraint (address). 1564 return std::string("p"); 1565 case 't': // top of floating point stack. 1566 return std::string("{st}"); 1567 case 'u': // second from top of floating point stack. 1568 return std::string("{st(1)}"); // second from top of floating point stack. 1569 case 'Y': 1570 switch (Constraint[1]) { 1571 default: 1572 // Break from inner switch and fall through (copy single char), 1573 // continue parsing after copying the current constraint into 1574 // the return string. 1575 break; 1576 case 'k': 1577 case 'm': 1578 case 'i': 1579 case 't': 1580 case 'z': 1581 case '2': 1582 // "^" hints llvm that this is a 2 letter constraint. 1583 // "Constraint++" is used to promote the string iterator 1584 // to the next constraint. 1585 return std::string("^") + std::string(Constraint++, 2); 1586 } 1587 [[fallthrough]]; 1588 default: 1589 return std::string(1, *Constraint); 1590 } 1591 } 1592 1593 void X86TargetInfo::fillValidCPUList(SmallVectorImpl<StringRef> &Values) const { 1594 bool Only64Bit = getTriple().getArch() != llvm::Triple::x86; 1595 llvm::X86::fillValidCPUArchList(Values, Only64Bit); 1596 } 1597 1598 void X86TargetInfo::fillValidTuneCPUList(SmallVectorImpl<StringRef> &Values) const { 1599 llvm::X86::fillValidTuneCPUList(Values); 1600 } 1601 1602 ArrayRef<const char *> X86TargetInfo::getGCCRegNames() const { 1603 return llvm::ArrayRef(GCCRegNames); 1604 } 1605 1606 ArrayRef<TargetInfo::AddlRegName> X86TargetInfo::getGCCAddlRegNames() const { 1607 return llvm::ArrayRef(AddlRegNames); 1608 } 1609 1610 ArrayRef<Builtin::Info> X86_32TargetInfo::getTargetBuiltins() const { 1611 return llvm::ArrayRef(BuiltinInfoX86, clang::X86::LastX86CommonBuiltin - 1612 Builtin::FirstTSBuiltin + 1); 1613 } 1614 1615 ArrayRef<Builtin::Info> X86_64TargetInfo::getTargetBuiltins() const { 1616 return llvm::ArrayRef(BuiltinInfoX86, 1617 X86::LastTSBuiltin - Builtin::FirstTSBuiltin); 1618 } 1619