1 //===--- X86.cpp - Implement X86 target feature support -------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements X86 TargetInfo objects. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "X86.h" 14 #include "clang/Basic/Builtins.h" 15 #include "clang/Basic/Diagnostic.h" 16 #include "clang/Basic/TargetBuiltins.h" 17 #include "llvm/ADT/StringExtras.h" 18 #include "llvm/ADT/StringRef.h" 19 #include "llvm/ADT/StringSwitch.h" 20 #include "llvm/TargetParser/X86TargetParser.h" 21 #include <optional> 22 23 namespace clang { 24 namespace targets { 25 26 static constexpr Builtin::Info BuiltinInfoX86[] = { 27 #define BUILTIN(ID, TYPE, ATTRS) \ 28 {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES}, 29 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ 30 {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES}, 31 #define TARGET_HEADER_BUILTIN(ID, TYPE, ATTRS, HEADER, LANGS, FEATURE) \ 32 {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::HEADER, LANGS}, 33 #include "clang/Basic/BuiltinsX86.def" 34 35 #define BUILTIN(ID, TYPE, ATTRS) \ 36 {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES}, 37 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ 38 {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES}, 39 #define TARGET_HEADER_BUILTIN(ID, TYPE, ATTRS, HEADER, LANGS, FEATURE) \ 40 {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::HEADER, LANGS}, 41 #include "clang/Basic/BuiltinsX86_64.def" 42 }; 43 44 static const char *const GCCRegNames[] = { 45 "ax", "dx", "cx", "bx", "si", "di", "bp", "sp", 46 "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)", 47 "argp", "flags", "fpcr", "fpsr", "dirflag", "frame", "xmm0", "xmm1", 48 "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "mm0", "mm1", 49 "mm2", "mm3", "mm4", "mm5", "mm6", "mm7", "r8", "r9", 50 "r10", "r11", "r12", "r13", "r14", "r15", "xmm8", "xmm9", 51 "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "ymm0", "ymm1", 52 "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", "ymm8", "ymm9", 53 "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15", "xmm16", "xmm17", 54 "xmm18", "xmm19", "xmm20", "xmm21", "xmm22", "xmm23", "xmm24", "xmm25", 55 "xmm26", "xmm27", "xmm28", "xmm29", "xmm30", "xmm31", "ymm16", "ymm17", 56 "ymm18", "ymm19", "ymm20", "ymm21", "ymm22", "ymm23", "ymm24", "ymm25", 57 "ymm26", "ymm27", "ymm28", "ymm29", "ymm30", "ymm31", "zmm0", "zmm1", 58 "zmm2", "zmm3", "zmm4", "zmm5", "zmm6", "zmm7", "zmm8", "zmm9", 59 "zmm10", "zmm11", "zmm12", "zmm13", "zmm14", "zmm15", "zmm16", "zmm17", 60 "zmm18", "zmm19", "zmm20", "zmm21", "zmm22", "zmm23", "zmm24", "zmm25", 61 "zmm26", "zmm27", "zmm28", "zmm29", "zmm30", "zmm31", "k0", "k1", 62 "k2", "k3", "k4", "k5", "k6", "k7", 63 "cr0", "cr2", "cr3", "cr4", "cr8", 64 "dr0", "dr1", "dr2", "dr3", "dr6", "dr7", 65 "bnd0", "bnd1", "bnd2", "bnd3", 66 "tmm0", "tmm1", "tmm2", "tmm3", "tmm4", "tmm5", "tmm6", "tmm7", 67 }; 68 69 const TargetInfo::AddlRegName AddlRegNames[] = { 70 {{"al", "ah", "eax", "rax"}, 0}, 71 {{"bl", "bh", "ebx", "rbx"}, 3}, 72 {{"cl", "ch", "ecx", "rcx"}, 2}, 73 {{"dl", "dh", "edx", "rdx"}, 1}, 74 {{"esi", "rsi"}, 4}, 75 {{"edi", "rdi"}, 5}, 76 {{"esp", "rsp"}, 7}, 77 {{"ebp", "rbp"}, 6}, 78 {{"r8d", "r8w", "r8b"}, 38}, 79 {{"r9d", "r9w", "r9b"}, 39}, 80 {{"r10d", "r10w", "r10b"}, 40}, 81 {{"r11d", "r11w", "r11b"}, 41}, 82 {{"r12d", "r12w", "r12b"}, 42}, 83 {{"r13d", "r13w", "r13b"}, 43}, 84 {{"r14d", "r14w", "r14b"}, 44}, 85 {{"r15d", "r15w", "r15b"}, 45}, 86 }; 87 88 } // namespace targets 89 } // namespace clang 90 91 using namespace clang; 92 using namespace clang::targets; 93 94 bool X86TargetInfo::setFPMath(StringRef Name) { 95 if (Name == "387") { 96 FPMath = FP_387; 97 return true; 98 } 99 if (Name == "sse") { 100 FPMath = FP_SSE; 101 return true; 102 } 103 return false; 104 } 105 106 bool X86TargetInfo::initFeatureMap( 107 llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU, 108 const std::vector<std::string> &FeaturesVec) const { 109 // FIXME: This *really* should not be here. 110 // X86_64 always has SSE2. 111 if (getTriple().getArch() == llvm::Triple::x86_64) 112 setFeatureEnabled(Features, "sse2", true); 113 114 using namespace llvm::X86; 115 116 SmallVector<StringRef, 16> CPUFeatures; 117 getFeaturesForCPU(CPU, CPUFeatures); 118 for (auto &F : CPUFeatures) 119 setFeatureEnabled(Features, F, true); 120 121 std::vector<std::string> UpdatedFeaturesVec; 122 for (const auto &Feature : FeaturesVec) { 123 // Expand general-regs-only to -x86, -mmx and -sse 124 if (Feature == "+general-regs-only") { 125 UpdatedFeaturesVec.push_back("-x87"); 126 UpdatedFeaturesVec.push_back("-mmx"); 127 UpdatedFeaturesVec.push_back("-sse"); 128 continue; 129 } 130 131 UpdatedFeaturesVec.push_back(Feature); 132 } 133 134 if (!TargetInfo::initFeatureMap(Features, Diags, CPU, UpdatedFeaturesVec)) 135 return false; 136 137 // Can't do this earlier because we need to be able to explicitly enable 138 // or disable these features and the things that they depend upon. 139 140 // Enable popcnt if sse4.2 is enabled and popcnt is not explicitly disabled. 141 auto I = Features.find("sse4.2"); 142 if (I != Features.end() && I->getValue() && 143 !llvm::is_contained(UpdatedFeaturesVec, "-popcnt")) 144 Features["popcnt"] = true; 145 146 // Additionally, if SSE is enabled and mmx is not explicitly disabled, 147 // then enable MMX. 148 I = Features.find("sse"); 149 if (I != Features.end() && I->getValue() && 150 !llvm::is_contained(UpdatedFeaturesVec, "-mmx")) 151 Features["mmx"] = true; 152 153 // Enable xsave if avx is enabled and xsave is not explicitly disabled. 154 I = Features.find("avx"); 155 if (I != Features.end() && I->getValue() && 156 !llvm::is_contained(UpdatedFeaturesVec, "-xsave")) 157 Features["xsave"] = true; 158 159 // Enable CRC32 if SSE4.2 is enabled and CRC32 is not explicitly disabled. 160 I = Features.find("sse4.2"); 161 if (I != Features.end() && I->getValue() && 162 !llvm::is_contained(UpdatedFeaturesVec, "-crc32")) 163 Features["crc32"] = true; 164 165 return true; 166 } 167 168 void X86TargetInfo::setFeatureEnabled(llvm::StringMap<bool> &Features, 169 StringRef Name, bool Enabled) const { 170 if (Name == "sse4") { 171 // We can get here via the __target__ attribute since that's not controlled 172 // via the -msse4/-mno-sse4 command line alias. Handle this the same way 173 // here - turn on the sse4.2 if enabled, turn off the sse4.1 level if 174 // disabled. 175 if (Enabled) 176 Name = "sse4.2"; 177 else 178 Name = "sse4.1"; 179 } 180 181 Features[Name] = Enabled; 182 llvm::X86::updateImpliedFeatures(Name, Enabled, Features); 183 } 184 185 /// handleTargetFeatures - Perform initialization based on the user 186 /// configured set of features. 187 bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features, 188 DiagnosticsEngine &Diags) { 189 for (const auto &Feature : Features) { 190 if (Feature[0] != '+') 191 continue; 192 193 if (Feature == "+aes") { 194 HasAES = true; 195 } else if (Feature == "+vaes") { 196 HasVAES = true; 197 } else if (Feature == "+pclmul") { 198 HasPCLMUL = true; 199 } else if (Feature == "+vpclmulqdq") { 200 HasVPCLMULQDQ = true; 201 } else if (Feature == "+lzcnt") { 202 HasLZCNT = true; 203 } else if (Feature == "+rdrnd") { 204 HasRDRND = true; 205 } else if (Feature == "+fsgsbase") { 206 HasFSGSBASE = true; 207 } else if (Feature == "+bmi") { 208 HasBMI = true; 209 } else if (Feature == "+bmi2") { 210 HasBMI2 = true; 211 } else if (Feature == "+popcnt") { 212 HasPOPCNT = true; 213 } else if (Feature == "+rtm") { 214 HasRTM = true; 215 } else if (Feature == "+prfchw") { 216 HasPRFCHW = true; 217 } else if (Feature == "+rdseed") { 218 HasRDSEED = true; 219 } else if (Feature == "+adx") { 220 HasADX = true; 221 } else if (Feature == "+tbm") { 222 HasTBM = true; 223 } else if (Feature == "+lwp") { 224 HasLWP = true; 225 } else if (Feature == "+fma") { 226 HasFMA = true; 227 } else if (Feature == "+f16c") { 228 HasF16C = true; 229 } else if (Feature == "+gfni") { 230 HasGFNI = true; 231 } else if (Feature == "+avx512cd") { 232 HasAVX512CD = true; 233 } else if (Feature == "+avx512vpopcntdq") { 234 HasAVX512VPOPCNTDQ = true; 235 } else if (Feature == "+avx512vnni") { 236 HasAVX512VNNI = true; 237 } else if (Feature == "+avx512bf16") { 238 HasAVX512BF16 = true; 239 } else if (Feature == "+avx512er") { 240 HasAVX512ER = true; 241 } else if (Feature == "+avx512fp16") { 242 HasAVX512FP16 = true; 243 HasLegalHalfType = true; 244 } else if (Feature == "+avx512pf") { 245 HasAVX512PF = true; 246 } else if (Feature == "+avx512dq") { 247 HasAVX512DQ = true; 248 } else if (Feature == "+avx512bitalg") { 249 HasAVX512BITALG = true; 250 } else if (Feature == "+avx512bw") { 251 HasAVX512BW = true; 252 } else if (Feature == "+avx512vl") { 253 HasAVX512VL = true; 254 } else if (Feature == "+avx512vbmi") { 255 HasAVX512VBMI = true; 256 } else if (Feature == "+avx512vbmi2") { 257 HasAVX512VBMI2 = true; 258 } else if (Feature == "+avx512ifma") { 259 HasAVX512IFMA = true; 260 } else if (Feature == "+avx512vp2intersect") { 261 HasAVX512VP2INTERSECT = true; 262 } else if (Feature == "+sha") { 263 HasSHA = true; 264 } else if (Feature == "+sha512") { 265 HasSHA512 = true; 266 } else if (Feature == "+shstk") { 267 HasSHSTK = true; 268 } else if (Feature == "+sm3") { 269 HasSM3 = true; 270 } else if (Feature == "+sm4") { 271 HasSM4 = true; 272 } else if (Feature == "+movbe") { 273 HasMOVBE = true; 274 } else if (Feature == "+sgx") { 275 HasSGX = true; 276 } else if (Feature == "+cx8") { 277 HasCX8 = true; 278 } else if (Feature == "+cx16") { 279 HasCX16 = true; 280 } else if (Feature == "+fxsr") { 281 HasFXSR = true; 282 } else if (Feature == "+xsave") { 283 HasXSAVE = true; 284 } else if (Feature == "+xsaveopt") { 285 HasXSAVEOPT = true; 286 } else if (Feature == "+xsavec") { 287 HasXSAVEC = true; 288 } else if (Feature == "+xsaves") { 289 HasXSAVES = true; 290 } else if (Feature == "+mwaitx") { 291 HasMWAITX = true; 292 } else if (Feature == "+pku") { 293 HasPKU = true; 294 } else if (Feature == "+clflushopt") { 295 HasCLFLUSHOPT = true; 296 } else if (Feature == "+clwb") { 297 HasCLWB = true; 298 } else if (Feature == "+wbnoinvd") { 299 HasWBNOINVD = true; 300 } else if (Feature == "+prefetchi") { 301 HasPREFETCHI = true; 302 } else if (Feature == "+prefetchwt1") { 303 HasPREFETCHWT1 = true; 304 } else if (Feature == "+clzero") { 305 HasCLZERO = true; 306 } else if (Feature == "+cldemote") { 307 HasCLDEMOTE = true; 308 } else if (Feature == "+rdpid") { 309 HasRDPID = true; 310 } else if (Feature == "+rdpru") { 311 HasRDPRU = true; 312 } else if (Feature == "+kl") { 313 HasKL = true; 314 } else if (Feature == "+widekl") { 315 HasWIDEKL = true; 316 } else if (Feature == "+retpoline-external-thunk") { 317 HasRetpolineExternalThunk = true; 318 } else if (Feature == "+sahf") { 319 HasLAHFSAHF = true; 320 } else if (Feature == "+waitpkg") { 321 HasWAITPKG = true; 322 } else if (Feature == "+movdiri") { 323 HasMOVDIRI = true; 324 } else if (Feature == "+movdir64b") { 325 HasMOVDIR64B = true; 326 } else if (Feature == "+pconfig") { 327 HasPCONFIG = true; 328 } else if (Feature == "+ptwrite") { 329 HasPTWRITE = true; 330 } else if (Feature == "+invpcid") { 331 HasINVPCID = true; 332 } else if (Feature == "+enqcmd") { 333 HasENQCMD = true; 334 } else if (Feature == "+hreset") { 335 HasHRESET = true; 336 } else if (Feature == "+amx-bf16") { 337 HasAMXBF16 = true; 338 } else if (Feature == "+amx-fp16") { 339 HasAMXFP16 = true; 340 } else if (Feature == "+amx-int8") { 341 HasAMXINT8 = true; 342 } else if (Feature == "+amx-tile") { 343 HasAMXTILE = true; 344 } else if (Feature == "+amx-complex") { 345 HasAMXCOMPLEX = true; 346 } else if (Feature == "+cmpccxadd") { 347 HasCMPCCXADD = true; 348 } else if (Feature == "+raoint") { 349 HasRAOINT = true; 350 } else if (Feature == "+avxifma") { 351 HasAVXIFMA = true; 352 } else if (Feature == "+avxneconvert") { 353 HasAVXNECONVERT= true; 354 } else if (Feature == "+avxvnni") { 355 HasAVXVNNI = true; 356 } else if (Feature == "+avxvnniint16") { 357 HasAVXVNNIINT16 = true; 358 } else if (Feature == "+avxvnniint8") { 359 HasAVXVNNIINT8 = true; 360 } else if (Feature == "+serialize") { 361 HasSERIALIZE = true; 362 } else if (Feature == "+tsxldtrk") { 363 HasTSXLDTRK = true; 364 } else if (Feature == "+uintr") { 365 HasUINTR = true; 366 } else if (Feature == "+crc32") { 367 HasCRC32 = true; 368 } else if (Feature == "+x87") { 369 HasX87 = true; 370 } else if (Feature == "+fullbf16") { 371 HasFullBFloat16 = true; 372 } 373 374 X86SSEEnum Level = llvm::StringSwitch<X86SSEEnum>(Feature) 375 .Case("+avx512f", AVX512F) 376 .Case("+avx2", AVX2) 377 .Case("+avx", AVX) 378 .Case("+sse4.2", SSE42) 379 .Case("+sse4.1", SSE41) 380 .Case("+ssse3", SSSE3) 381 .Case("+sse3", SSE3) 382 .Case("+sse2", SSE2) 383 .Case("+sse", SSE1) 384 .Default(NoSSE); 385 SSELevel = std::max(SSELevel, Level); 386 387 HasFloat16 = SSELevel >= SSE2; 388 389 // X86 target has bfloat16 emulation support in the backend, where 390 // bfloat16 is treated as a 32-bit float, arithmetic operations are 391 // performed in 32-bit, and the result is converted back to bfloat16. 392 // Truncation and extension between bfloat16 and 32-bit float are supported 393 // by the compiler-rt library. However, native bfloat16 support is currently 394 // not available in the X86 target. Hence, HasFullBFloat16 will be false 395 // until native bfloat16 support is available. HasFullBFloat16 is used to 396 // determine whether to automatically use excess floating point precision 397 // for bfloat16 arithmetic operations in the front-end. 398 HasBFloat16 = SSELevel >= SSE2; 399 400 MMX3DNowEnum ThreeDNowLevel = llvm::StringSwitch<MMX3DNowEnum>(Feature) 401 .Case("+3dnowa", AMD3DNowAthlon) 402 .Case("+3dnow", AMD3DNow) 403 .Case("+mmx", MMX) 404 .Default(NoMMX3DNow); 405 MMX3DNowLevel = std::max(MMX3DNowLevel, ThreeDNowLevel); 406 407 XOPEnum XLevel = llvm::StringSwitch<XOPEnum>(Feature) 408 .Case("+xop", XOP) 409 .Case("+fma4", FMA4) 410 .Case("+sse4a", SSE4A) 411 .Default(NoXOP); 412 XOPLevel = std::max(XOPLevel, XLevel); 413 } 414 415 // LLVM doesn't have a separate switch for fpmath, so only accept it if it 416 // matches the selected sse level. 417 if ((FPMath == FP_SSE && SSELevel < SSE1) || 418 (FPMath == FP_387 && SSELevel >= SSE1)) { 419 Diags.Report(diag::err_target_unsupported_fpmath) 420 << (FPMath == FP_SSE ? "sse" : "387"); 421 return false; 422 } 423 424 // FIXME: We should allow long double type on 32-bits to match with GCC. 425 // This requires backend to be able to lower f80 without x87 first. 426 if (!HasX87 && LongDoubleFormat == &llvm::APFloat::x87DoubleExtended()) 427 HasLongDouble = false; 428 429 return true; 430 } 431 432 /// X86TargetInfo::getTargetDefines - Return the set of the X86-specific macro 433 /// definitions for this particular subtarget. 434 void X86TargetInfo::getTargetDefines(const LangOptions &Opts, 435 MacroBuilder &Builder) const { 436 // Inline assembly supports X86 flag outputs. 437 Builder.defineMacro("__GCC_ASM_FLAG_OUTPUTS__"); 438 439 std::string CodeModel = getTargetOpts().CodeModel; 440 if (CodeModel == "default") 441 CodeModel = "small"; 442 Builder.defineMacro("__code_model_" + CodeModel + "__"); 443 444 // Target identification. 445 if (getTriple().getArch() == llvm::Triple::x86_64) { 446 Builder.defineMacro("__amd64__"); 447 Builder.defineMacro("__amd64"); 448 Builder.defineMacro("__x86_64"); 449 Builder.defineMacro("__x86_64__"); 450 if (getTriple().getArchName() == "x86_64h") { 451 Builder.defineMacro("__x86_64h"); 452 Builder.defineMacro("__x86_64h__"); 453 } 454 } else { 455 DefineStd(Builder, "i386", Opts); 456 } 457 458 Builder.defineMacro("__SEG_GS"); 459 Builder.defineMacro("__SEG_FS"); 460 Builder.defineMacro("__seg_gs", "__attribute__((address_space(256)))"); 461 Builder.defineMacro("__seg_fs", "__attribute__((address_space(257)))"); 462 463 // Subtarget options. 464 // FIXME: We are hard-coding the tune parameters based on the CPU, but they 465 // truly should be based on -mtune options. 466 using namespace llvm::X86; 467 switch (CPU) { 468 case CK_None: 469 break; 470 case CK_i386: 471 // The rest are coming from the i386 define above. 472 Builder.defineMacro("__tune_i386__"); 473 break; 474 case CK_i486: 475 case CK_WinChipC6: 476 case CK_WinChip2: 477 case CK_C3: 478 defineCPUMacros(Builder, "i486"); 479 break; 480 case CK_PentiumMMX: 481 Builder.defineMacro("__pentium_mmx__"); 482 Builder.defineMacro("__tune_pentium_mmx__"); 483 [[fallthrough]]; 484 case CK_i586: 485 case CK_Pentium: 486 defineCPUMacros(Builder, "i586"); 487 defineCPUMacros(Builder, "pentium"); 488 break; 489 case CK_Pentium3: 490 case CK_PentiumM: 491 Builder.defineMacro("__tune_pentium3__"); 492 [[fallthrough]]; 493 case CK_Pentium2: 494 case CK_C3_2: 495 Builder.defineMacro("__tune_pentium2__"); 496 [[fallthrough]]; 497 case CK_PentiumPro: 498 case CK_i686: 499 defineCPUMacros(Builder, "i686"); 500 defineCPUMacros(Builder, "pentiumpro"); 501 break; 502 case CK_Pentium4: 503 defineCPUMacros(Builder, "pentium4"); 504 break; 505 case CK_Yonah: 506 case CK_Prescott: 507 case CK_Nocona: 508 defineCPUMacros(Builder, "nocona"); 509 break; 510 case CK_Core2: 511 case CK_Penryn: 512 defineCPUMacros(Builder, "core2"); 513 break; 514 case CK_Bonnell: 515 defineCPUMacros(Builder, "atom"); 516 break; 517 case CK_Silvermont: 518 defineCPUMacros(Builder, "slm"); 519 break; 520 case CK_Goldmont: 521 defineCPUMacros(Builder, "goldmont"); 522 break; 523 case CK_GoldmontPlus: 524 defineCPUMacros(Builder, "goldmont_plus"); 525 break; 526 case CK_Tremont: 527 defineCPUMacros(Builder, "tremont"); 528 break; 529 case CK_Nehalem: 530 case CK_Westmere: 531 case CK_SandyBridge: 532 case CK_IvyBridge: 533 case CK_Haswell: 534 case CK_Broadwell: 535 case CK_SkylakeClient: 536 case CK_SkylakeServer: 537 case CK_Cascadelake: 538 case CK_Cooperlake: 539 case CK_Cannonlake: 540 case CK_IcelakeClient: 541 case CK_Rocketlake: 542 case CK_IcelakeServer: 543 case CK_Tigerlake: 544 case CK_SapphireRapids: 545 case CK_Alderlake: 546 case CK_Raptorlake: 547 case CK_Meteorlake: 548 case CK_Sierraforest: 549 case CK_Grandridge: 550 case CK_Graniterapids: 551 case CK_GraniterapidsD: 552 case CK_Emeraldrapids: 553 // FIXME: Historically, we defined this legacy name, it would be nice to 554 // remove it at some point. We've never exposed fine-grained names for 555 // recent primary x86 CPUs, and we should keep it that way. 556 defineCPUMacros(Builder, "corei7"); 557 break; 558 case CK_KNL: 559 defineCPUMacros(Builder, "knl"); 560 break; 561 case CK_KNM: 562 break; 563 case CK_Lakemont: 564 defineCPUMacros(Builder, "i586", /*Tuning*/false); 565 defineCPUMacros(Builder, "pentium", /*Tuning*/false); 566 Builder.defineMacro("__tune_lakemont__"); 567 break; 568 case CK_K6_2: 569 Builder.defineMacro("__k6_2__"); 570 Builder.defineMacro("__tune_k6_2__"); 571 [[fallthrough]]; 572 case CK_K6_3: 573 if (CPU != CK_K6_2) { // In case of fallthrough 574 // FIXME: GCC may be enabling these in cases where some other k6 575 // architecture is specified but -m3dnow is explicitly provided. The 576 // exact semantics need to be determined and emulated here. 577 Builder.defineMacro("__k6_3__"); 578 Builder.defineMacro("__tune_k6_3__"); 579 } 580 [[fallthrough]]; 581 case CK_K6: 582 defineCPUMacros(Builder, "k6"); 583 break; 584 case CK_Athlon: 585 case CK_AthlonXP: 586 defineCPUMacros(Builder, "athlon"); 587 if (SSELevel != NoSSE) { 588 Builder.defineMacro("__athlon_sse__"); 589 Builder.defineMacro("__tune_athlon_sse__"); 590 } 591 break; 592 case CK_K8: 593 case CK_K8SSE3: 594 case CK_x86_64: 595 defineCPUMacros(Builder, "k8"); 596 break; 597 case CK_x86_64_v2: 598 case CK_x86_64_v3: 599 case CK_x86_64_v4: 600 break; 601 case CK_AMDFAM10: 602 defineCPUMacros(Builder, "amdfam10"); 603 break; 604 case CK_BTVER1: 605 defineCPUMacros(Builder, "btver1"); 606 break; 607 case CK_BTVER2: 608 defineCPUMacros(Builder, "btver2"); 609 break; 610 case CK_BDVER1: 611 defineCPUMacros(Builder, "bdver1"); 612 break; 613 case CK_BDVER2: 614 defineCPUMacros(Builder, "bdver2"); 615 break; 616 case CK_BDVER3: 617 defineCPUMacros(Builder, "bdver3"); 618 break; 619 case CK_BDVER4: 620 defineCPUMacros(Builder, "bdver4"); 621 break; 622 case CK_ZNVER1: 623 defineCPUMacros(Builder, "znver1"); 624 break; 625 case CK_ZNVER2: 626 defineCPUMacros(Builder, "znver2"); 627 break; 628 case CK_ZNVER3: 629 defineCPUMacros(Builder, "znver3"); 630 break; 631 case CK_ZNVER4: 632 defineCPUMacros(Builder, "znver4"); 633 break; 634 case CK_Geode: 635 defineCPUMacros(Builder, "geode"); 636 break; 637 } 638 639 // Target properties. 640 Builder.defineMacro("__REGISTER_PREFIX__", ""); 641 642 // Define __NO_MATH_INLINES on linux/x86 so that we don't get inline 643 // functions in glibc header files that use FP Stack inline asm which the 644 // backend can't deal with (PR879). 645 Builder.defineMacro("__NO_MATH_INLINES"); 646 647 if (HasAES) 648 Builder.defineMacro("__AES__"); 649 650 if (HasVAES) 651 Builder.defineMacro("__VAES__"); 652 653 if (HasPCLMUL) 654 Builder.defineMacro("__PCLMUL__"); 655 656 if (HasVPCLMULQDQ) 657 Builder.defineMacro("__VPCLMULQDQ__"); 658 659 // Note, in 32-bit mode, GCC does not define the macro if -mno-sahf. In LLVM, 660 // the feature flag only applies to 64-bit mode. 661 if (HasLAHFSAHF || getTriple().getArch() == llvm::Triple::x86) 662 Builder.defineMacro("__LAHF_SAHF__"); 663 664 if (HasLZCNT) 665 Builder.defineMacro("__LZCNT__"); 666 667 if (HasRDRND) 668 Builder.defineMacro("__RDRND__"); 669 670 if (HasFSGSBASE) 671 Builder.defineMacro("__FSGSBASE__"); 672 673 if (HasBMI) 674 Builder.defineMacro("__BMI__"); 675 676 if (HasBMI2) 677 Builder.defineMacro("__BMI2__"); 678 679 if (HasPOPCNT) 680 Builder.defineMacro("__POPCNT__"); 681 682 if (HasRTM) 683 Builder.defineMacro("__RTM__"); 684 685 if (HasPRFCHW) 686 Builder.defineMacro("__PRFCHW__"); 687 688 if (HasRDSEED) 689 Builder.defineMacro("__RDSEED__"); 690 691 if (HasADX) 692 Builder.defineMacro("__ADX__"); 693 694 if (HasTBM) 695 Builder.defineMacro("__TBM__"); 696 697 if (HasLWP) 698 Builder.defineMacro("__LWP__"); 699 700 if (HasMWAITX) 701 Builder.defineMacro("__MWAITX__"); 702 703 if (HasMOVBE) 704 Builder.defineMacro("__MOVBE__"); 705 706 switch (XOPLevel) { 707 case XOP: 708 Builder.defineMacro("__XOP__"); 709 [[fallthrough]]; 710 case FMA4: 711 Builder.defineMacro("__FMA4__"); 712 [[fallthrough]]; 713 case SSE4A: 714 Builder.defineMacro("__SSE4A__"); 715 [[fallthrough]]; 716 case NoXOP: 717 break; 718 } 719 720 if (HasFMA) 721 Builder.defineMacro("__FMA__"); 722 723 if (HasF16C) 724 Builder.defineMacro("__F16C__"); 725 726 if (HasGFNI) 727 Builder.defineMacro("__GFNI__"); 728 729 if (HasAVX512CD) 730 Builder.defineMacro("__AVX512CD__"); 731 if (HasAVX512VPOPCNTDQ) 732 Builder.defineMacro("__AVX512VPOPCNTDQ__"); 733 if (HasAVX512VNNI) 734 Builder.defineMacro("__AVX512VNNI__"); 735 if (HasAVX512BF16) 736 Builder.defineMacro("__AVX512BF16__"); 737 if (HasAVX512ER) 738 Builder.defineMacro("__AVX512ER__"); 739 if (HasAVX512FP16) 740 Builder.defineMacro("__AVX512FP16__"); 741 if (HasAVX512PF) 742 Builder.defineMacro("__AVX512PF__"); 743 if (HasAVX512DQ) 744 Builder.defineMacro("__AVX512DQ__"); 745 if (HasAVX512BITALG) 746 Builder.defineMacro("__AVX512BITALG__"); 747 if (HasAVX512BW) 748 Builder.defineMacro("__AVX512BW__"); 749 if (HasAVX512VL) 750 Builder.defineMacro("__AVX512VL__"); 751 if (HasAVX512VBMI) 752 Builder.defineMacro("__AVX512VBMI__"); 753 if (HasAVX512VBMI2) 754 Builder.defineMacro("__AVX512VBMI2__"); 755 if (HasAVX512IFMA) 756 Builder.defineMacro("__AVX512IFMA__"); 757 if (HasAVX512VP2INTERSECT) 758 Builder.defineMacro("__AVX512VP2INTERSECT__"); 759 if (HasSHA) 760 Builder.defineMacro("__SHA__"); 761 if (HasSHA512) 762 Builder.defineMacro("__SHA512__"); 763 764 if (HasFXSR) 765 Builder.defineMacro("__FXSR__"); 766 if (HasXSAVE) 767 Builder.defineMacro("__XSAVE__"); 768 if (HasXSAVEOPT) 769 Builder.defineMacro("__XSAVEOPT__"); 770 if (HasXSAVEC) 771 Builder.defineMacro("__XSAVEC__"); 772 if (HasXSAVES) 773 Builder.defineMacro("__XSAVES__"); 774 if (HasPKU) 775 Builder.defineMacro("__PKU__"); 776 if (HasCLFLUSHOPT) 777 Builder.defineMacro("__CLFLUSHOPT__"); 778 if (HasCLWB) 779 Builder.defineMacro("__CLWB__"); 780 if (HasWBNOINVD) 781 Builder.defineMacro("__WBNOINVD__"); 782 if (HasSHSTK) 783 Builder.defineMacro("__SHSTK__"); 784 if (HasSGX) 785 Builder.defineMacro("__SGX__"); 786 if (HasSM3) 787 Builder.defineMacro("__SM3__"); 788 if (HasSM4) 789 Builder.defineMacro("__SM4__"); 790 if (HasPREFETCHI) 791 Builder.defineMacro("__PREFETCHI__"); 792 if (HasPREFETCHWT1) 793 Builder.defineMacro("__PREFETCHWT1__"); 794 if (HasCLZERO) 795 Builder.defineMacro("__CLZERO__"); 796 if (HasKL) 797 Builder.defineMacro("__KL__"); 798 if (HasWIDEKL) 799 Builder.defineMacro("__WIDEKL__"); 800 if (HasRDPID) 801 Builder.defineMacro("__RDPID__"); 802 if (HasRDPRU) 803 Builder.defineMacro("__RDPRU__"); 804 if (HasCLDEMOTE) 805 Builder.defineMacro("__CLDEMOTE__"); 806 if (HasWAITPKG) 807 Builder.defineMacro("__WAITPKG__"); 808 if (HasMOVDIRI) 809 Builder.defineMacro("__MOVDIRI__"); 810 if (HasMOVDIR64B) 811 Builder.defineMacro("__MOVDIR64B__"); 812 if (HasPCONFIG) 813 Builder.defineMacro("__PCONFIG__"); 814 if (HasPTWRITE) 815 Builder.defineMacro("__PTWRITE__"); 816 if (HasINVPCID) 817 Builder.defineMacro("__INVPCID__"); 818 if (HasENQCMD) 819 Builder.defineMacro("__ENQCMD__"); 820 if (HasHRESET) 821 Builder.defineMacro("__HRESET__"); 822 if (HasAMXTILE) 823 Builder.defineMacro("__AMX_TILE__"); 824 if (HasAMXINT8) 825 Builder.defineMacro("__AMX_INT8__"); 826 if (HasAMXBF16) 827 Builder.defineMacro("__AMX_BF16__"); 828 if (HasAMXFP16) 829 Builder.defineMacro("__AMX_FP16__"); 830 if (HasAMXCOMPLEX) 831 Builder.defineMacro("__AMX_COMPLEX__"); 832 if (HasCMPCCXADD) 833 Builder.defineMacro("__CMPCCXADD__"); 834 if (HasRAOINT) 835 Builder.defineMacro("__RAOINT__"); 836 if (HasAVXIFMA) 837 Builder.defineMacro("__AVXIFMA__"); 838 if (HasAVXNECONVERT) 839 Builder.defineMacro("__AVXNECONVERT__"); 840 if (HasAVXVNNI) 841 Builder.defineMacro("__AVXVNNI__"); 842 if (HasAVXVNNIINT16) 843 Builder.defineMacro("__AVXVNNIINT16__"); 844 if (HasAVXVNNIINT8) 845 Builder.defineMacro("__AVXVNNIINT8__"); 846 if (HasSERIALIZE) 847 Builder.defineMacro("__SERIALIZE__"); 848 if (HasTSXLDTRK) 849 Builder.defineMacro("__TSXLDTRK__"); 850 if (HasUINTR) 851 Builder.defineMacro("__UINTR__"); 852 if (HasCRC32) 853 Builder.defineMacro("__CRC32__"); 854 855 // Each case falls through to the previous one here. 856 switch (SSELevel) { 857 case AVX512F: 858 Builder.defineMacro("__AVX512F__"); 859 [[fallthrough]]; 860 case AVX2: 861 Builder.defineMacro("__AVX2__"); 862 [[fallthrough]]; 863 case AVX: 864 Builder.defineMacro("__AVX__"); 865 [[fallthrough]]; 866 case SSE42: 867 Builder.defineMacro("__SSE4_2__"); 868 [[fallthrough]]; 869 case SSE41: 870 Builder.defineMacro("__SSE4_1__"); 871 [[fallthrough]]; 872 case SSSE3: 873 Builder.defineMacro("__SSSE3__"); 874 [[fallthrough]]; 875 case SSE3: 876 Builder.defineMacro("__SSE3__"); 877 [[fallthrough]]; 878 case SSE2: 879 Builder.defineMacro("__SSE2__"); 880 Builder.defineMacro("__SSE2_MATH__"); // -mfp-math=sse always implied. 881 [[fallthrough]]; 882 case SSE1: 883 Builder.defineMacro("__SSE__"); 884 Builder.defineMacro("__SSE_MATH__"); // -mfp-math=sse always implied. 885 [[fallthrough]]; 886 case NoSSE: 887 break; 888 } 889 890 if (Opts.MicrosoftExt && getTriple().getArch() == llvm::Triple::x86) { 891 switch (SSELevel) { 892 case AVX512F: 893 case AVX2: 894 case AVX: 895 case SSE42: 896 case SSE41: 897 case SSSE3: 898 case SSE3: 899 case SSE2: 900 Builder.defineMacro("_M_IX86_FP", Twine(2)); 901 break; 902 case SSE1: 903 Builder.defineMacro("_M_IX86_FP", Twine(1)); 904 break; 905 default: 906 Builder.defineMacro("_M_IX86_FP", Twine(0)); 907 break; 908 } 909 } 910 911 // Each case falls through to the previous one here. 912 switch (MMX3DNowLevel) { 913 case AMD3DNowAthlon: 914 Builder.defineMacro("__3dNOW_A__"); 915 [[fallthrough]]; 916 case AMD3DNow: 917 Builder.defineMacro("__3dNOW__"); 918 [[fallthrough]]; 919 case MMX: 920 Builder.defineMacro("__MMX__"); 921 [[fallthrough]]; 922 case NoMMX3DNow: 923 break; 924 } 925 926 if (CPU >= CK_i486 || CPU == CK_None) { 927 Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1"); 928 Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2"); 929 Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4"); 930 } 931 if (HasCX8) 932 Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8"); 933 if (HasCX16 && getTriple().getArch() == llvm::Triple::x86_64) 934 Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_16"); 935 936 if (HasFloat128) 937 Builder.defineMacro("__SIZEOF_FLOAT128__", "16"); 938 } 939 940 bool X86TargetInfo::isValidFeatureName(StringRef Name) const { 941 return llvm::StringSwitch<bool>(Name) 942 .Case("3dnow", true) 943 .Case("3dnowa", true) 944 .Case("adx", true) 945 .Case("aes", true) 946 .Case("amx-bf16", true) 947 .Case("amx-complex", true) 948 .Case("amx-fp16", true) 949 .Case("amx-int8", true) 950 .Case("amx-tile", true) 951 .Case("avx", true) 952 .Case("avx2", true) 953 .Case("avx512f", true) 954 .Case("avx512cd", true) 955 .Case("avx512vpopcntdq", true) 956 .Case("avx512vnni", true) 957 .Case("avx512bf16", true) 958 .Case("avx512er", true) 959 .Case("avx512fp16", true) 960 .Case("avx512pf", true) 961 .Case("avx512dq", true) 962 .Case("avx512bitalg", true) 963 .Case("avx512bw", true) 964 .Case("avx512vl", true) 965 .Case("avx512vbmi", true) 966 .Case("avx512vbmi2", true) 967 .Case("avx512ifma", true) 968 .Case("avx512vp2intersect", true) 969 .Case("avxifma", true) 970 .Case("avxneconvert", true) 971 .Case("avxvnni", true) 972 .Case("avxvnniint16", true) 973 .Case("avxvnniint8", true) 974 .Case("bmi", true) 975 .Case("bmi2", true) 976 .Case("cldemote", true) 977 .Case("clflushopt", true) 978 .Case("clwb", true) 979 .Case("clzero", true) 980 .Case("cmpccxadd", true) 981 .Case("crc32", true) 982 .Case("cx16", true) 983 .Case("enqcmd", true) 984 .Case("f16c", true) 985 .Case("fma", true) 986 .Case("fma4", true) 987 .Case("fsgsbase", true) 988 .Case("fxsr", true) 989 .Case("general-regs-only", true) 990 .Case("gfni", true) 991 .Case("hreset", true) 992 .Case("invpcid", true) 993 .Case("kl", true) 994 .Case("widekl", true) 995 .Case("lwp", true) 996 .Case("lzcnt", true) 997 .Case("mmx", true) 998 .Case("movbe", true) 999 .Case("movdiri", true) 1000 .Case("movdir64b", true) 1001 .Case("mwaitx", true) 1002 .Case("pclmul", true) 1003 .Case("pconfig", true) 1004 .Case("pku", true) 1005 .Case("popcnt", true) 1006 .Case("prefetchi", true) 1007 .Case("prefetchwt1", true) 1008 .Case("prfchw", true) 1009 .Case("ptwrite", true) 1010 .Case("raoint", true) 1011 .Case("rdpid", true) 1012 .Case("rdpru", true) 1013 .Case("rdrnd", true) 1014 .Case("rdseed", true) 1015 .Case("rtm", true) 1016 .Case("sahf", true) 1017 .Case("serialize", true) 1018 .Case("sgx", true) 1019 .Case("sha", true) 1020 .Case("sha512", true) 1021 .Case("shstk", true) 1022 .Case("sm3", true) 1023 .Case("sm4", true) 1024 .Case("sse", true) 1025 .Case("sse2", true) 1026 .Case("sse3", true) 1027 .Case("ssse3", true) 1028 .Case("sse4", true) 1029 .Case("sse4.1", true) 1030 .Case("sse4.2", true) 1031 .Case("sse4a", true) 1032 .Case("tbm", true) 1033 .Case("tsxldtrk", true) 1034 .Case("uintr", true) 1035 .Case("vaes", true) 1036 .Case("vpclmulqdq", true) 1037 .Case("wbnoinvd", true) 1038 .Case("waitpkg", true) 1039 .Case("x87", true) 1040 .Case("xop", true) 1041 .Case("xsave", true) 1042 .Case("xsavec", true) 1043 .Case("xsaves", true) 1044 .Case("xsaveopt", true) 1045 .Default(false); 1046 } 1047 1048 bool X86TargetInfo::hasFeature(StringRef Feature) const { 1049 return llvm::StringSwitch<bool>(Feature) 1050 .Case("adx", HasADX) 1051 .Case("aes", HasAES) 1052 .Case("amx-bf16", HasAMXBF16) 1053 .Case("amx-complex", HasAMXCOMPLEX) 1054 .Case("amx-fp16", HasAMXFP16) 1055 .Case("amx-int8", HasAMXINT8) 1056 .Case("amx-tile", HasAMXTILE) 1057 .Case("avx", SSELevel >= AVX) 1058 .Case("avx2", SSELevel >= AVX2) 1059 .Case("avx512f", SSELevel >= AVX512F) 1060 .Case("avx512cd", HasAVX512CD) 1061 .Case("avx512vpopcntdq", HasAVX512VPOPCNTDQ) 1062 .Case("avx512vnni", HasAVX512VNNI) 1063 .Case("avx512bf16", HasAVX512BF16) 1064 .Case("avx512er", HasAVX512ER) 1065 .Case("avx512fp16", HasAVX512FP16) 1066 .Case("avx512pf", HasAVX512PF) 1067 .Case("avx512dq", HasAVX512DQ) 1068 .Case("avx512bitalg", HasAVX512BITALG) 1069 .Case("avx512bw", HasAVX512BW) 1070 .Case("avx512vl", HasAVX512VL) 1071 .Case("avx512vbmi", HasAVX512VBMI) 1072 .Case("avx512vbmi2", HasAVX512VBMI2) 1073 .Case("avx512ifma", HasAVX512IFMA) 1074 .Case("avx512vp2intersect", HasAVX512VP2INTERSECT) 1075 .Case("avxifma", HasAVXIFMA) 1076 .Case("avxneconvert", HasAVXNECONVERT) 1077 .Case("avxvnni", HasAVXVNNI) 1078 .Case("avxvnniint16", HasAVXVNNIINT16) 1079 .Case("avxvnniint8", HasAVXVNNIINT8) 1080 .Case("bmi", HasBMI) 1081 .Case("bmi2", HasBMI2) 1082 .Case("cldemote", HasCLDEMOTE) 1083 .Case("clflushopt", HasCLFLUSHOPT) 1084 .Case("clwb", HasCLWB) 1085 .Case("clzero", HasCLZERO) 1086 .Case("cmpccxadd", HasCMPCCXADD) 1087 .Case("crc32", HasCRC32) 1088 .Case("cx8", HasCX8) 1089 .Case("cx16", HasCX16) 1090 .Case("enqcmd", HasENQCMD) 1091 .Case("f16c", HasF16C) 1092 .Case("fma", HasFMA) 1093 .Case("fma4", XOPLevel >= FMA4) 1094 .Case("fsgsbase", HasFSGSBASE) 1095 .Case("fxsr", HasFXSR) 1096 .Case("gfni", HasGFNI) 1097 .Case("hreset", HasHRESET) 1098 .Case("invpcid", HasINVPCID) 1099 .Case("kl", HasKL) 1100 .Case("widekl", HasWIDEKL) 1101 .Case("lwp", HasLWP) 1102 .Case("lzcnt", HasLZCNT) 1103 .Case("mm3dnow", MMX3DNowLevel >= AMD3DNow) 1104 .Case("mm3dnowa", MMX3DNowLevel >= AMD3DNowAthlon) 1105 .Case("mmx", MMX3DNowLevel >= MMX) 1106 .Case("movbe", HasMOVBE) 1107 .Case("movdiri", HasMOVDIRI) 1108 .Case("movdir64b", HasMOVDIR64B) 1109 .Case("mwaitx", HasMWAITX) 1110 .Case("pclmul", HasPCLMUL) 1111 .Case("pconfig", HasPCONFIG) 1112 .Case("pku", HasPKU) 1113 .Case("popcnt", HasPOPCNT) 1114 .Case("prefetchi", HasPREFETCHI) 1115 .Case("prefetchwt1", HasPREFETCHWT1) 1116 .Case("prfchw", HasPRFCHW) 1117 .Case("ptwrite", HasPTWRITE) 1118 .Case("raoint", HasRAOINT) 1119 .Case("rdpid", HasRDPID) 1120 .Case("rdpru", HasRDPRU) 1121 .Case("rdrnd", HasRDRND) 1122 .Case("rdseed", HasRDSEED) 1123 .Case("retpoline-external-thunk", HasRetpolineExternalThunk) 1124 .Case("rtm", HasRTM) 1125 .Case("sahf", HasLAHFSAHF) 1126 .Case("serialize", HasSERIALIZE) 1127 .Case("sgx", HasSGX) 1128 .Case("sha", HasSHA) 1129 .Case("sha512", HasSHA512) 1130 .Case("shstk", HasSHSTK) 1131 .Case("sm3", HasSM3) 1132 .Case("sm4", HasSM4) 1133 .Case("sse", SSELevel >= SSE1) 1134 .Case("sse2", SSELevel >= SSE2) 1135 .Case("sse3", SSELevel >= SSE3) 1136 .Case("ssse3", SSELevel >= SSSE3) 1137 .Case("sse4.1", SSELevel >= SSE41) 1138 .Case("sse4.2", SSELevel >= SSE42) 1139 .Case("sse4a", XOPLevel >= SSE4A) 1140 .Case("tbm", HasTBM) 1141 .Case("tsxldtrk", HasTSXLDTRK) 1142 .Case("uintr", HasUINTR) 1143 .Case("vaes", HasVAES) 1144 .Case("vpclmulqdq", HasVPCLMULQDQ) 1145 .Case("wbnoinvd", HasWBNOINVD) 1146 .Case("waitpkg", HasWAITPKG) 1147 .Case("x86", true) 1148 .Case("x86_32", getTriple().getArch() == llvm::Triple::x86) 1149 .Case("x86_64", getTriple().getArch() == llvm::Triple::x86_64) 1150 .Case("x87", HasX87) 1151 .Case("xop", XOPLevel >= XOP) 1152 .Case("xsave", HasXSAVE) 1153 .Case("xsavec", HasXSAVEC) 1154 .Case("xsaves", HasXSAVES) 1155 .Case("xsaveopt", HasXSAVEOPT) 1156 .Case("fullbf16", HasFullBFloat16) 1157 .Default(false); 1158 } 1159 1160 // We can't use a generic validation scheme for the features accepted here 1161 // versus subtarget features accepted in the target attribute because the 1162 // bitfield structure that's initialized in the runtime only supports the 1163 // below currently rather than the full range of subtarget features. (See 1164 // X86TargetInfo::hasFeature for a somewhat comprehensive list). 1165 bool X86TargetInfo::validateCpuSupports(StringRef FeatureStr) const { 1166 return llvm::StringSwitch<bool>(FeatureStr) 1167 #define X86_FEATURE_COMPAT(ENUM, STR, PRIORITY) .Case(STR, true) 1168 #include "llvm/TargetParser/X86TargetParser.def" 1169 .Default(false); 1170 } 1171 1172 static llvm::X86::ProcessorFeatures getFeature(StringRef Name) { 1173 return llvm::StringSwitch<llvm::X86::ProcessorFeatures>(Name) 1174 #define X86_FEATURE_COMPAT(ENUM, STR, PRIORITY) \ 1175 .Case(STR, llvm::X86::FEATURE_##ENUM) 1176 1177 #include "llvm/TargetParser/X86TargetParser.def" 1178 ; 1179 // Note, this function should only be used after ensuring the value is 1180 // correct, so it asserts if the value is out of range. 1181 } 1182 1183 unsigned X86TargetInfo::multiVersionSortPriority(StringRef Name) const { 1184 // Valid CPUs have a 'key feature' that compares just better than its key 1185 // feature. 1186 using namespace llvm::X86; 1187 CPUKind Kind = parseArchX86(Name); 1188 if (Kind != CK_None) { 1189 ProcessorFeatures KeyFeature = getKeyFeature(Kind); 1190 return (getFeaturePriority(KeyFeature) << 1) + 1; 1191 } 1192 1193 // Now we know we have a feature, so get its priority and shift it a few so 1194 // that we have sufficient room for the CPUs (above). 1195 return getFeaturePriority(getFeature(Name)) << 1; 1196 } 1197 1198 bool X86TargetInfo::validateCPUSpecificCPUDispatch(StringRef Name) const { 1199 return llvm::X86::validateCPUSpecificCPUDispatch(Name); 1200 } 1201 1202 char X86TargetInfo::CPUSpecificManglingCharacter(StringRef Name) const { 1203 return llvm::X86::getCPUDispatchMangling(Name); 1204 } 1205 1206 void X86TargetInfo::getCPUSpecificCPUDispatchFeatures( 1207 StringRef Name, llvm::SmallVectorImpl<StringRef> &Features) const { 1208 SmallVector<StringRef, 32> TargetCPUFeatures; 1209 llvm::X86::getFeaturesForCPU(Name, TargetCPUFeatures, true); 1210 for (auto &F : TargetCPUFeatures) 1211 Features.push_back(F); 1212 } 1213 1214 // We can't use a generic validation scheme for the cpus accepted here 1215 // versus subtarget cpus accepted in the target attribute because the 1216 // variables intitialized by the runtime only support the below currently 1217 // rather than the full range of cpus. 1218 bool X86TargetInfo::validateCpuIs(StringRef FeatureStr) const { 1219 return llvm::StringSwitch<bool>(FeatureStr) 1220 #define X86_VENDOR(ENUM, STRING) .Case(STRING, true) 1221 #define X86_CPU_TYPE_ALIAS(ENUM, ALIAS) .Case(ALIAS, true) 1222 #define X86_CPU_TYPE(ENUM, STR) .Case(STR, true) 1223 #define X86_CPU_SUBTYPE_ALIAS(ENUM, ALIAS) .Case(ALIAS, true) 1224 #define X86_CPU_SUBTYPE(ENUM, STR) .Case(STR, true) 1225 #include "llvm/TargetParser/X86TargetParser.def" 1226 .Default(false); 1227 } 1228 1229 static unsigned matchAsmCCConstraint(const char *Name) { 1230 auto RV = llvm::StringSwitch<unsigned>(Name) 1231 .Case("@cca", 4) 1232 .Case("@ccae", 5) 1233 .Case("@ccb", 4) 1234 .Case("@ccbe", 5) 1235 .Case("@ccc", 4) 1236 .Case("@cce", 4) 1237 .Case("@ccz", 4) 1238 .Case("@ccg", 4) 1239 .Case("@ccge", 5) 1240 .Case("@ccl", 4) 1241 .Case("@ccle", 5) 1242 .Case("@ccna", 5) 1243 .Case("@ccnae", 6) 1244 .Case("@ccnb", 5) 1245 .Case("@ccnbe", 6) 1246 .Case("@ccnc", 5) 1247 .Case("@ccne", 5) 1248 .Case("@ccnz", 5) 1249 .Case("@ccng", 5) 1250 .Case("@ccnge", 6) 1251 .Case("@ccnl", 5) 1252 .Case("@ccnle", 6) 1253 .Case("@ccno", 5) 1254 .Case("@ccnp", 5) 1255 .Case("@ccns", 5) 1256 .Case("@cco", 4) 1257 .Case("@ccp", 4) 1258 .Case("@ccs", 4) 1259 .Default(0); 1260 return RV; 1261 } 1262 1263 bool X86TargetInfo::validateAsmConstraint( 1264 const char *&Name, TargetInfo::ConstraintInfo &Info) const { 1265 switch (*Name) { 1266 default: 1267 return false; 1268 // Constant constraints. 1269 case 'e': // 32-bit signed integer constant for use with sign-extending x86_64 1270 // instructions. 1271 case 'Z': // 32-bit unsigned integer constant for use with zero-extending 1272 // x86_64 instructions. 1273 case 's': 1274 Info.setRequiresImmediate(); 1275 return true; 1276 case 'I': 1277 Info.setRequiresImmediate(0, 31); 1278 return true; 1279 case 'J': 1280 Info.setRequiresImmediate(0, 63); 1281 return true; 1282 case 'K': 1283 Info.setRequiresImmediate(-128, 127); 1284 return true; 1285 case 'L': 1286 Info.setRequiresImmediate({int(0xff), int(0xffff), int(0xffffffff)}); 1287 return true; 1288 case 'M': 1289 Info.setRequiresImmediate(0, 3); 1290 return true; 1291 case 'N': 1292 Info.setRequiresImmediate(0, 255); 1293 return true; 1294 case 'O': 1295 Info.setRequiresImmediate(0, 127); 1296 return true; 1297 // Register constraints. 1298 case 'Y': // 'Y' is the first character for several 2-character constraints. 1299 // Shift the pointer to the second character of the constraint. 1300 Name++; 1301 switch (*Name) { 1302 default: 1303 return false; 1304 case 'z': // First SSE register. 1305 case '2': 1306 case 't': // Any SSE register, when SSE2 is enabled. 1307 case 'i': // Any SSE register, when SSE2 and inter-unit moves enabled. 1308 case 'm': // Any MMX register, when inter-unit moves enabled. 1309 case 'k': // AVX512 arch mask registers: k1-k7. 1310 Info.setAllowsRegister(); 1311 return true; 1312 } 1313 case 'f': // Any x87 floating point stack register. 1314 // Constraint 'f' cannot be used for output operands. 1315 if (Info.ConstraintStr[0] == '=') 1316 return false; 1317 Info.setAllowsRegister(); 1318 return true; 1319 case 'a': // eax. 1320 case 'b': // ebx. 1321 case 'c': // ecx. 1322 case 'd': // edx. 1323 case 'S': // esi. 1324 case 'D': // edi. 1325 case 'A': // edx:eax. 1326 case 't': // Top of floating point stack. 1327 case 'u': // Second from top of floating point stack. 1328 case 'q': // Any register accessible as [r]l: a, b, c, and d. 1329 case 'y': // Any MMX register. 1330 case 'v': // Any {X,Y,Z}MM register (Arch & context dependent) 1331 case 'x': // Any SSE register. 1332 case 'k': // Any AVX512 mask register (same as Yk, additionally allows k0 1333 // for intermideate k reg operations). 1334 case 'Q': // Any register accessible as [r]h: a, b, c, and d. 1335 case 'R': // "Legacy" registers: ax, bx, cx, dx, di, si, sp, bp. 1336 case 'l': // "Index" registers: any general register that can be used as an 1337 // index in a base+index memory access. 1338 Info.setAllowsRegister(); 1339 return true; 1340 // Floating point constant constraints. 1341 case 'C': // SSE floating point constant. 1342 case 'G': // x87 floating point constant. 1343 return true; 1344 case '@': 1345 // CC condition changes. 1346 if (auto Len = matchAsmCCConstraint(Name)) { 1347 Name += Len - 1; 1348 Info.setAllowsRegister(); 1349 return true; 1350 } 1351 return false; 1352 } 1353 } 1354 1355 // Below is based on the following information: 1356 // +------------------------------------+-------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ 1357 // | Processor Name | Cache Line Size (Bytes) | Source | 1358 // +------------------------------------+-------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ 1359 // | i386 | 64 | https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-optimization-manual.pdf | 1360 // | i486 | 16 | "four doublewords" (doubleword = 32 bits, 4 bits * 32 bits = 16 bytes) https://en.wikichip.org/w/images/d/d3/i486_MICROPROCESSOR_HARDWARE_REFERENCE_MANUAL_%281990%29.pdf and http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.126.4216&rep=rep1&type=pdf (page 29) | 1361 // | i586/Pentium MMX | 32 | https://www.7-cpu.com/cpu/P-MMX.html | 1362 // | i686/Pentium | 32 | https://www.7-cpu.com/cpu/P6.html | 1363 // | Netburst/Pentium4 | 64 | https://www.7-cpu.com/cpu/P4-180.html | 1364 // | Atom | 64 | https://www.7-cpu.com/cpu/Atom.html | 1365 // | Westmere | 64 | https://en.wikichip.org/wiki/intel/microarchitectures/sandy_bridge_(client) "Cache Architecture" | 1366 // | Sandy Bridge | 64 | https://en.wikipedia.org/wiki/Sandy_Bridge and https://www.7-cpu.com/cpu/SandyBridge.html | 1367 // | Ivy Bridge | 64 | https://blog.stuffedcow.net/2013/01/ivb-cache-replacement/ and https://www.7-cpu.com/cpu/IvyBridge.html | 1368 // | Haswell | 64 | https://www.7-cpu.com/cpu/Haswell.html | 1369 // | Broadwell | 64 | https://www.7-cpu.com/cpu/Broadwell.html | 1370 // | Skylake (including skylake-avx512) | 64 | https://www.nas.nasa.gov/hecc/support/kb/skylake-processors_550.html "Cache Hierarchy" | 1371 // | Cascade Lake | 64 | https://www.nas.nasa.gov/hecc/support/kb/cascade-lake-processors_579.html "Cache Hierarchy" | 1372 // | Skylake | 64 | https://en.wikichip.org/wiki/intel/microarchitectures/kaby_lake "Memory Hierarchy" | 1373 // | Ice Lake | 64 | https://www.7-cpu.com/cpu/Ice_Lake.html | 1374 // | Knights Landing | 64 | https://software.intel.com/en-us/articles/intel-xeon-phi-processor-7200-family-memory-management-optimizations "The Intel® Xeon Phi™ Processor Architecture" | 1375 // | Knights Mill | 64 | https://software.intel.com/sites/default/files/managed/9e/bc/64-ia-32-architectures-optimization-manual.pdf?countrylabel=Colombia "2.5.5.2 L1 DCache " | 1376 // +------------------------------------+-------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ 1377 std::optional<unsigned> X86TargetInfo::getCPUCacheLineSize() const { 1378 using namespace llvm::X86; 1379 switch (CPU) { 1380 // i386 1381 case CK_i386: 1382 // i486 1383 case CK_i486: 1384 case CK_WinChipC6: 1385 case CK_WinChip2: 1386 case CK_C3: 1387 // Lakemont 1388 case CK_Lakemont: 1389 return 16; 1390 1391 // i586 1392 case CK_i586: 1393 case CK_Pentium: 1394 case CK_PentiumMMX: 1395 // i686 1396 case CK_PentiumPro: 1397 case CK_i686: 1398 case CK_Pentium2: 1399 case CK_Pentium3: 1400 case CK_PentiumM: 1401 case CK_C3_2: 1402 // K6 1403 case CK_K6: 1404 case CK_K6_2: 1405 case CK_K6_3: 1406 // Geode 1407 case CK_Geode: 1408 return 32; 1409 1410 // Netburst 1411 case CK_Pentium4: 1412 case CK_Prescott: 1413 case CK_Nocona: 1414 // Atom 1415 case CK_Bonnell: 1416 case CK_Silvermont: 1417 case CK_Goldmont: 1418 case CK_GoldmontPlus: 1419 case CK_Tremont: 1420 1421 case CK_Westmere: 1422 case CK_SandyBridge: 1423 case CK_IvyBridge: 1424 case CK_Haswell: 1425 case CK_Broadwell: 1426 case CK_SkylakeClient: 1427 case CK_SkylakeServer: 1428 case CK_Cascadelake: 1429 case CK_Nehalem: 1430 case CK_Cooperlake: 1431 case CK_Cannonlake: 1432 case CK_Tigerlake: 1433 case CK_SapphireRapids: 1434 case CK_IcelakeClient: 1435 case CK_Rocketlake: 1436 case CK_IcelakeServer: 1437 case CK_Alderlake: 1438 case CK_Raptorlake: 1439 case CK_Meteorlake: 1440 case CK_Sierraforest: 1441 case CK_Grandridge: 1442 case CK_Graniterapids: 1443 case CK_GraniterapidsD: 1444 case CK_Emeraldrapids: 1445 case CK_KNL: 1446 case CK_KNM: 1447 // K7 1448 case CK_Athlon: 1449 case CK_AthlonXP: 1450 // K8 1451 case CK_K8: 1452 case CK_K8SSE3: 1453 case CK_AMDFAM10: 1454 // Bobcat 1455 case CK_BTVER1: 1456 case CK_BTVER2: 1457 // Bulldozer 1458 case CK_BDVER1: 1459 case CK_BDVER2: 1460 case CK_BDVER3: 1461 case CK_BDVER4: 1462 // Zen 1463 case CK_ZNVER1: 1464 case CK_ZNVER2: 1465 case CK_ZNVER3: 1466 case CK_ZNVER4: 1467 // Deprecated 1468 case CK_x86_64: 1469 case CK_x86_64_v2: 1470 case CK_x86_64_v3: 1471 case CK_x86_64_v4: 1472 case CK_Yonah: 1473 case CK_Penryn: 1474 case CK_Core2: 1475 return 64; 1476 1477 // The following currently have unknown cache line sizes (but they are probably all 64): 1478 // Core 1479 case CK_None: 1480 return std::nullopt; 1481 } 1482 llvm_unreachable("Unknown CPU kind"); 1483 } 1484 1485 bool X86TargetInfo::validateOutputSize(const llvm::StringMap<bool> &FeatureMap, 1486 StringRef Constraint, 1487 unsigned Size) const { 1488 // Strip off constraint modifiers. 1489 while (Constraint[0] == '=' || Constraint[0] == '+' || Constraint[0] == '&') 1490 Constraint = Constraint.substr(1); 1491 1492 return validateOperandSize(FeatureMap, Constraint, Size); 1493 } 1494 1495 bool X86TargetInfo::validateInputSize(const llvm::StringMap<bool> &FeatureMap, 1496 StringRef Constraint, 1497 unsigned Size) const { 1498 return validateOperandSize(FeatureMap, Constraint, Size); 1499 } 1500 1501 bool X86TargetInfo::validateOperandSize(const llvm::StringMap<bool> &FeatureMap, 1502 StringRef Constraint, 1503 unsigned Size) const { 1504 switch (Constraint[0]) { 1505 default: 1506 break; 1507 case 'k': 1508 // Registers k0-k7 (AVX512) size limit is 64 bit. 1509 case 'y': 1510 return Size <= 64; 1511 case 'f': 1512 case 't': 1513 case 'u': 1514 return Size <= 128; 1515 case 'Y': 1516 // 'Y' is the first character for several 2-character constraints. 1517 switch (Constraint[1]) { 1518 default: 1519 return false; 1520 case 'm': 1521 // 'Ym' is synonymous with 'y'. 1522 case 'k': 1523 return Size <= 64; 1524 case 'z': 1525 // XMM0/YMM/ZMM0 1526 if (hasFeatureEnabled(FeatureMap, "avx512f")) 1527 // ZMM0 can be used if target supports AVX512F. 1528 return Size <= 512U; 1529 else if (hasFeatureEnabled(FeatureMap, "avx")) 1530 // YMM0 can be used if target supports AVX. 1531 return Size <= 256U; 1532 else if (hasFeatureEnabled(FeatureMap, "sse")) 1533 return Size <= 128U; 1534 return false; 1535 case 'i': 1536 case 't': 1537 case '2': 1538 // 'Yi','Yt','Y2' are synonymous with 'x' when SSE2 is enabled. 1539 if (SSELevel < SSE2) 1540 return false; 1541 break; 1542 } 1543 break; 1544 case 'v': 1545 case 'x': 1546 if (hasFeatureEnabled(FeatureMap, "avx512f")) 1547 // 512-bit zmm registers can be used if target supports AVX512F. 1548 return Size <= 512U; 1549 else if (hasFeatureEnabled(FeatureMap, "avx")) 1550 // 256-bit ymm registers can be used if target supports AVX. 1551 return Size <= 256U; 1552 return Size <= 128U; 1553 1554 } 1555 1556 return true; 1557 } 1558 1559 std::string X86TargetInfo::convertConstraint(const char *&Constraint) const { 1560 switch (*Constraint) { 1561 case '@': 1562 if (auto Len = matchAsmCCConstraint(Constraint)) { 1563 std::string Converted = "{" + std::string(Constraint, Len) + "}"; 1564 Constraint += Len - 1; 1565 return Converted; 1566 } 1567 return std::string(1, *Constraint); 1568 case 'a': 1569 return std::string("{ax}"); 1570 case 'b': 1571 return std::string("{bx}"); 1572 case 'c': 1573 return std::string("{cx}"); 1574 case 'd': 1575 return std::string("{dx}"); 1576 case 'S': 1577 return std::string("{si}"); 1578 case 'D': 1579 return std::string("{di}"); 1580 case 'p': // Keep 'p' constraint (address). 1581 return std::string("p"); 1582 case 't': // top of floating point stack. 1583 return std::string("{st}"); 1584 case 'u': // second from top of floating point stack. 1585 return std::string("{st(1)}"); // second from top of floating point stack. 1586 case 'Y': 1587 switch (Constraint[1]) { 1588 default: 1589 // Break from inner switch and fall through (copy single char), 1590 // continue parsing after copying the current constraint into 1591 // the return string. 1592 break; 1593 case 'k': 1594 case 'm': 1595 case 'i': 1596 case 't': 1597 case 'z': 1598 case '2': 1599 // "^" hints llvm that this is a 2 letter constraint. 1600 // "Constraint++" is used to promote the string iterator 1601 // to the next constraint. 1602 return std::string("^") + std::string(Constraint++, 2); 1603 } 1604 [[fallthrough]]; 1605 default: 1606 return std::string(1, *Constraint); 1607 } 1608 } 1609 1610 void X86TargetInfo::fillValidCPUList(SmallVectorImpl<StringRef> &Values) const { 1611 bool Only64Bit = getTriple().getArch() != llvm::Triple::x86; 1612 llvm::X86::fillValidCPUArchList(Values, Only64Bit); 1613 } 1614 1615 void X86TargetInfo::fillValidTuneCPUList(SmallVectorImpl<StringRef> &Values) const { 1616 llvm::X86::fillValidTuneCPUList(Values); 1617 } 1618 1619 ArrayRef<const char *> X86TargetInfo::getGCCRegNames() const { 1620 return llvm::ArrayRef(GCCRegNames); 1621 } 1622 1623 ArrayRef<TargetInfo::AddlRegName> X86TargetInfo::getGCCAddlRegNames() const { 1624 return llvm::ArrayRef(AddlRegNames); 1625 } 1626 1627 ArrayRef<Builtin::Info> X86_32TargetInfo::getTargetBuiltins() const { 1628 return llvm::ArrayRef(BuiltinInfoX86, clang::X86::LastX86CommonBuiltin - 1629 Builtin::FirstTSBuiltin + 1); 1630 } 1631 1632 ArrayRef<Builtin::Info> X86_64TargetInfo::getTargetBuiltins() const { 1633 return llvm::ArrayRef(BuiltinInfoX86, 1634 X86::LastTSBuiltin - Builtin::FirstTSBuiltin); 1635 } 1636