xref: /freebsd/contrib/llvm-project/clang/lib/Basic/Targets/X86.cpp (revision 9f23cbd6cae82fd77edfad7173432fa8dccd0a95)
1 //===--- X86.cpp - Implement X86 target feature support -------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements X86 TargetInfo objects.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "X86.h"
14 #include "clang/Basic/Builtins.h"
15 #include "clang/Basic/Diagnostic.h"
16 #include "clang/Basic/TargetBuiltins.h"
17 #include "llvm/ADT/StringExtras.h"
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/ADT/StringSwitch.h"
20 #include "llvm/Support/X86TargetParser.h"
21 #include <optional>
22 
23 namespace clang {
24 namespace targets {
25 
26 static constexpr Builtin::Info BuiltinInfoX86[] = {
27 #define BUILTIN(ID, TYPE, ATTRS)                                               \
28   {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
29 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
30   {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
31 #define TARGET_HEADER_BUILTIN(ID, TYPE, ATTRS, HEADER, LANGS, FEATURE)         \
32   {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::HEADER, LANGS},
33 #include "clang/Basic/BuiltinsX86.def"
34 
35 #define BUILTIN(ID, TYPE, ATTRS)                                               \
36   {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
37 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
38   {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
39 #define TARGET_HEADER_BUILTIN(ID, TYPE, ATTRS, HEADER, LANGS, FEATURE)         \
40   {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::HEADER, LANGS},
41 #include "clang/Basic/BuiltinsX86_64.def"
42 };
43 
44 static const char *const GCCRegNames[] = {
45     "ax",    "dx",    "cx",    "bx",    "si",      "di",    "bp",    "sp",
46     "st",    "st(1)", "st(2)", "st(3)", "st(4)",   "st(5)", "st(6)", "st(7)",
47     "argp",  "flags", "fpcr",  "fpsr",  "dirflag", "frame", "xmm0",  "xmm1",
48     "xmm2",  "xmm3",  "xmm4",  "xmm5",  "xmm6",    "xmm7",  "mm0",   "mm1",
49     "mm2",   "mm3",   "mm4",   "mm5",   "mm6",     "mm7",   "r8",    "r9",
50     "r10",   "r11",   "r12",   "r13",   "r14",     "r15",   "xmm8",  "xmm9",
51     "xmm10", "xmm11", "xmm12", "xmm13", "xmm14",   "xmm15", "ymm0",  "ymm1",
52     "ymm2",  "ymm3",  "ymm4",  "ymm5",  "ymm6",    "ymm7",  "ymm8",  "ymm9",
53     "ymm10", "ymm11", "ymm12", "ymm13", "ymm14",   "ymm15", "xmm16", "xmm17",
54     "xmm18", "xmm19", "xmm20", "xmm21", "xmm22",   "xmm23", "xmm24", "xmm25",
55     "xmm26", "xmm27", "xmm28", "xmm29", "xmm30",   "xmm31", "ymm16", "ymm17",
56     "ymm18", "ymm19", "ymm20", "ymm21", "ymm22",   "ymm23", "ymm24", "ymm25",
57     "ymm26", "ymm27", "ymm28", "ymm29", "ymm30",   "ymm31", "zmm0",  "zmm1",
58     "zmm2",  "zmm3",  "zmm4",  "zmm5",  "zmm6",    "zmm7",  "zmm8",  "zmm9",
59     "zmm10", "zmm11", "zmm12", "zmm13", "zmm14",   "zmm15", "zmm16", "zmm17",
60     "zmm18", "zmm19", "zmm20", "zmm21", "zmm22",   "zmm23", "zmm24", "zmm25",
61     "zmm26", "zmm27", "zmm28", "zmm29", "zmm30",   "zmm31", "k0",    "k1",
62     "k2",    "k3",    "k4",    "k5",    "k6",      "k7",
63     "cr0",   "cr2",   "cr3",   "cr4",   "cr8",
64     "dr0",   "dr1",   "dr2",   "dr3",   "dr6",     "dr7",
65     "bnd0",  "bnd1",  "bnd2",  "bnd3",
66     "tmm0",  "tmm1",  "tmm2",  "tmm3",  "tmm4",    "tmm5",  "tmm6",  "tmm7",
67 };
68 
69 const TargetInfo::AddlRegName AddlRegNames[] = {
70     {{"al", "ah", "eax", "rax"}, 0},
71     {{"bl", "bh", "ebx", "rbx"}, 3},
72     {{"cl", "ch", "ecx", "rcx"}, 2},
73     {{"dl", "dh", "edx", "rdx"}, 1},
74     {{"esi", "rsi"}, 4},
75     {{"edi", "rdi"}, 5},
76     {{"esp", "rsp"}, 7},
77     {{"ebp", "rbp"}, 6},
78     {{"r8d", "r8w", "r8b"}, 38},
79     {{"r9d", "r9w", "r9b"}, 39},
80     {{"r10d", "r10w", "r10b"}, 40},
81     {{"r11d", "r11w", "r11b"}, 41},
82     {{"r12d", "r12w", "r12b"}, 42},
83     {{"r13d", "r13w", "r13b"}, 43},
84     {{"r14d", "r14w", "r14b"}, 44},
85     {{"r15d", "r15w", "r15b"}, 45},
86 };
87 
88 } // namespace targets
89 } // namespace clang
90 
91 using namespace clang;
92 using namespace clang::targets;
93 
94 bool X86TargetInfo::setFPMath(StringRef Name) {
95   if (Name == "387") {
96     FPMath = FP_387;
97     return true;
98   }
99   if (Name == "sse") {
100     FPMath = FP_SSE;
101     return true;
102   }
103   return false;
104 }
105 
106 bool X86TargetInfo::initFeatureMap(
107     llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
108     const std::vector<std::string> &FeaturesVec) const {
109   // FIXME: This *really* should not be here.
110   // X86_64 always has SSE2.
111   if (getTriple().getArch() == llvm::Triple::x86_64)
112     setFeatureEnabled(Features, "sse2", true);
113 
114   using namespace llvm::X86;
115 
116   SmallVector<StringRef, 16> CPUFeatures;
117   getFeaturesForCPU(CPU, CPUFeatures);
118   for (auto &F : CPUFeatures)
119     setFeatureEnabled(Features, F, true);
120 
121   std::vector<std::string> UpdatedFeaturesVec;
122   for (const auto &Feature : FeaturesVec) {
123     // Expand general-regs-only to -x86, -mmx and -sse
124     if (Feature == "+general-regs-only") {
125       UpdatedFeaturesVec.push_back("-x87");
126       UpdatedFeaturesVec.push_back("-mmx");
127       UpdatedFeaturesVec.push_back("-sse");
128       continue;
129     }
130 
131     UpdatedFeaturesVec.push_back(Feature);
132   }
133 
134   if (!TargetInfo::initFeatureMap(Features, Diags, CPU, UpdatedFeaturesVec))
135     return false;
136 
137   // Can't do this earlier because we need to be able to explicitly enable
138   // or disable these features and the things that they depend upon.
139 
140   // Enable popcnt if sse4.2 is enabled and popcnt is not explicitly disabled.
141   auto I = Features.find("sse4.2");
142   if (I != Features.end() && I->getValue() &&
143       !llvm::is_contained(UpdatedFeaturesVec, "-popcnt"))
144     Features["popcnt"] = true;
145 
146   // Additionally, if SSE is enabled and mmx is not explicitly disabled,
147   // then enable MMX.
148   I = Features.find("sse");
149   if (I != Features.end() && I->getValue() &&
150       !llvm::is_contained(UpdatedFeaturesVec, "-mmx"))
151     Features["mmx"] = true;
152 
153   // Enable xsave if avx is enabled and xsave is not explicitly disabled.
154   I = Features.find("avx");
155   if (I != Features.end() && I->getValue() &&
156       !llvm::is_contained(UpdatedFeaturesVec, "-xsave"))
157     Features["xsave"] = true;
158 
159   // Enable CRC32 if SSE4.2 is enabled and CRC32 is not explicitly disabled.
160   I = Features.find("sse4.2");
161   if (I != Features.end() && I->getValue() &&
162       !llvm::is_contained(UpdatedFeaturesVec, "-crc32"))
163     Features["crc32"] = true;
164 
165   return true;
166 }
167 
168 void X86TargetInfo::setFeatureEnabled(llvm::StringMap<bool> &Features,
169                                       StringRef Name, bool Enabled) const {
170   if (Name == "sse4") {
171     // We can get here via the __target__ attribute since that's not controlled
172     // via the -msse4/-mno-sse4 command line alias. Handle this the same way
173     // here - turn on the sse4.2 if enabled, turn off the sse4.1 level if
174     // disabled.
175     if (Enabled)
176       Name = "sse4.2";
177     else
178       Name = "sse4.1";
179   }
180 
181   Features[Name] = Enabled;
182   llvm::X86::updateImpliedFeatures(Name, Enabled, Features);
183 }
184 
185 /// handleTargetFeatures - Perform initialization based on the user
186 /// configured set of features.
187 bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
188                                          DiagnosticsEngine &Diags) {
189   for (const auto &Feature : Features) {
190     if (Feature[0] != '+')
191       continue;
192 
193     if (Feature == "+aes") {
194       HasAES = true;
195     } else if (Feature == "+vaes") {
196       HasVAES = true;
197     } else if (Feature == "+pclmul") {
198       HasPCLMUL = true;
199     } else if (Feature == "+vpclmulqdq") {
200       HasVPCLMULQDQ = true;
201     } else if (Feature == "+lzcnt") {
202       HasLZCNT = true;
203     } else if (Feature == "+rdrnd") {
204       HasRDRND = true;
205     } else if (Feature == "+fsgsbase") {
206       HasFSGSBASE = true;
207     } else if (Feature == "+bmi") {
208       HasBMI = true;
209     } else if (Feature == "+bmi2") {
210       HasBMI2 = true;
211     } else if (Feature == "+popcnt") {
212       HasPOPCNT = true;
213     } else if (Feature == "+rtm") {
214       HasRTM = true;
215     } else if (Feature == "+prfchw") {
216       HasPRFCHW = true;
217     } else if (Feature == "+rdseed") {
218       HasRDSEED = true;
219     } else if (Feature == "+adx") {
220       HasADX = true;
221     } else if (Feature == "+tbm") {
222       HasTBM = true;
223     } else if (Feature == "+lwp") {
224       HasLWP = true;
225     } else if (Feature == "+fma") {
226       HasFMA = true;
227     } else if (Feature == "+f16c") {
228       HasF16C = true;
229     } else if (Feature == "+gfni") {
230       HasGFNI = true;
231     } else if (Feature == "+avx512cd") {
232       HasAVX512CD = true;
233     } else if (Feature == "+avx512vpopcntdq") {
234       HasAVX512VPOPCNTDQ = true;
235     } else if (Feature == "+avx512vnni") {
236       HasAVX512VNNI = true;
237     } else if (Feature == "+avx512bf16") {
238       HasAVX512BF16 = true;
239     } else if (Feature == "+avx512er") {
240       HasAVX512ER = true;
241     } else if (Feature == "+avx512fp16") {
242       HasAVX512FP16 = true;
243       HasLegalHalfType = true;
244     } else if (Feature == "+avx512pf") {
245       HasAVX512PF = true;
246     } else if (Feature == "+avx512dq") {
247       HasAVX512DQ = true;
248     } else if (Feature == "+avx512bitalg") {
249       HasAVX512BITALG = true;
250     } else if (Feature == "+avx512bw") {
251       HasAVX512BW = true;
252     } else if (Feature == "+avx512vl") {
253       HasAVX512VL = true;
254     } else if (Feature == "+avx512vbmi") {
255       HasAVX512VBMI = true;
256     } else if (Feature == "+avx512vbmi2") {
257       HasAVX512VBMI2 = true;
258     } else if (Feature == "+avx512ifma") {
259       HasAVX512IFMA = true;
260     } else if (Feature == "+avx512vp2intersect") {
261       HasAVX512VP2INTERSECT = true;
262     } else if (Feature == "+sha") {
263       HasSHA = true;
264     } else if (Feature == "+shstk") {
265       HasSHSTK = true;
266     } else if (Feature == "+movbe") {
267       HasMOVBE = true;
268     } else if (Feature == "+sgx") {
269       HasSGX = true;
270     } else if (Feature == "+cx8") {
271       HasCX8 = true;
272     } else if (Feature == "+cx16") {
273       HasCX16 = true;
274     } else if (Feature == "+fxsr") {
275       HasFXSR = true;
276     } else if (Feature == "+xsave") {
277       HasXSAVE = true;
278     } else if (Feature == "+xsaveopt") {
279       HasXSAVEOPT = true;
280     } else if (Feature == "+xsavec") {
281       HasXSAVEC = true;
282     } else if (Feature == "+xsaves") {
283       HasXSAVES = true;
284     } else if (Feature == "+mwaitx") {
285       HasMWAITX = true;
286     } else if (Feature == "+pku") {
287       HasPKU = true;
288     } else if (Feature == "+clflushopt") {
289       HasCLFLUSHOPT = true;
290     } else if (Feature == "+clwb") {
291       HasCLWB = true;
292     } else if (Feature == "+wbnoinvd") {
293       HasWBNOINVD = true;
294     } else if (Feature == "+prefetchi") {
295       HasPREFETCHI = true;
296     } else if (Feature == "+prefetchwt1") {
297       HasPREFETCHWT1 = true;
298     } else if (Feature == "+clzero") {
299       HasCLZERO = true;
300     } else if (Feature == "+cldemote") {
301       HasCLDEMOTE = true;
302     } else if (Feature == "+rdpid") {
303       HasRDPID = true;
304     } else if (Feature == "+rdpru") {
305       HasRDPRU = true;
306     } else if (Feature == "+kl") {
307       HasKL = true;
308     } else if (Feature == "+widekl") {
309       HasWIDEKL = true;
310     } else if (Feature == "+retpoline-external-thunk") {
311       HasRetpolineExternalThunk = true;
312     } else if (Feature == "+sahf") {
313       HasLAHFSAHF = true;
314     } else if (Feature == "+waitpkg") {
315       HasWAITPKG = true;
316     } else if (Feature == "+movdiri") {
317       HasMOVDIRI = true;
318     } else if (Feature == "+movdir64b") {
319       HasMOVDIR64B = true;
320     } else if (Feature == "+pconfig") {
321       HasPCONFIG = true;
322     } else if (Feature == "+ptwrite") {
323       HasPTWRITE = true;
324     } else if (Feature == "+invpcid") {
325       HasINVPCID = true;
326     } else if (Feature == "+enqcmd") {
327       HasENQCMD = true;
328     } else if (Feature == "+hreset") {
329       HasHRESET = true;
330     } else if (Feature == "+amx-bf16") {
331       HasAMXBF16 = true;
332     } else if (Feature == "+amx-fp16") {
333       HasAMXFP16 = true;
334     } else if (Feature == "+amx-int8") {
335       HasAMXINT8 = true;
336     } else if (Feature == "+amx-tile") {
337       HasAMXTILE = true;
338     } else if (Feature == "+cmpccxadd") {
339       HasCMPCCXADD = true;
340     } else if (Feature == "+raoint") {
341       HasRAOINT = true;
342     } else if (Feature == "+avxifma") {
343       HasAVXIFMA = true;
344     } else if (Feature == "+avxneconvert") {
345       HasAVXNECONVERT= true;
346     } else if (Feature == "+avxvnni") {
347       HasAVXVNNI = true;
348     } else if (Feature == "+avxvnniint8") {
349       HasAVXVNNIINT8 = true;
350     } else if (Feature == "+serialize") {
351       HasSERIALIZE = true;
352     } else if (Feature == "+tsxldtrk") {
353       HasTSXLDTRK = true;
354     } else if (Feature == "+uintr") {
355       HasUINTR = true;
356     } else if (Feature == "+crc32") {
357       HasCRC32 = true;
358     } else if (Feature == "+x87") {
359       HasX87 = true;
360     }
361 
362     X86SSEEnum Level = llvm::StringSwitch<X86SSEEnum>(Feature)
363                            .Case("+avx512f", AVX512F)
364                            .Case("+avx2", AVX2)
365                            .Case("+avx", AVX)
366                            .Case("+sse4.2", SSE42)
367                            .Case("+sse4.1", SSE41)
368                            .Case("+ssse3", SSSE3)
369                            .Case("+sse3", SSE3)
370                            .Case("+sse2", SSE2)
371                            .Case("+sse", SSE1)
372                            .Default(NoSSE);
373     SSELevel = std::max(SSELevel, Level);
374 
375     HasFloat16 = SSELevel >= SSE2;
376 
377     HasBFloat16 = SSELevel >= SSE2;
378 
379     MMX3DNowEnum ThreeDNowLevel = llvm::StringSwitch<MMX3DNowEnum>(Feature)
380                                       .Case("+3dnowa", AMD3DNowAthlon)
381                                       .Case("+3dnow", AMD3DNow)
382                                       .Case("+mmx", MMX)
383                                       .Default(NoMMX3DNow);
384     MMX3DNowLevel = std::max(MMX3DNowLevel, ThreeDNowLevel);
385 
386     XOPEnum XLevel = llvm::StringSwitch<XOPEnum>(Feature)
387                          .Case("+xop", XOP)
388                          .Case("+fma4", FMA4)
389                          .Case("+sse4a", SSE4A)
390                          .Default(NoXOP);
391     XOPLevel = std::max(XOPLevel, XLevel);
392   }
393 
394   // LLVM doesn't have a separate switch for fpmath, so only accept it if it
395   // matches the selected sse level.
396   if ((FPMath == FP_SSE && SSELevel < SSE1) ||
397       (FPMath == FP_387 && SSELevel >= SSE1)) {
398     Diags.Report(diag::err_target_unsupported_fpmath)
399         << (FPMath == FP_SSE ? "sse" : "387");
400     return false;
401   }
402 
403   SimdDefaultAlign =
404       hasFeature("avx512f") ? 512 : hasFeature("avx") ? 256 : 128;
405 
406   // FIXME: We should allow long double type on 32-bits to match with GCC.
407   // This requires backend to be able to lower f80 without x87 first.
408   if (!HasX87 && LongDoubleFormat == &llvm::APFloat::x87DoubleExtended())
409     HasLongDouble = false;
410 
411   return true;
412 }
413 
414 /// X86TargetInfo::getTargetDefines - Return the set of the X86-specific macro
415 /// definitions for this particular subtarget.
416 void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
417                                      MacroBuilder &Builder) const {
418   // Inline assembly supports X86 flag outputs.
419   Builder.defineMacro("__GCC_ASM_FLAG_OUTPUTS__");
420 
421   std::string CodeModel = getTargetOpts().CodeModel;
422   if (CodeModel == "default")
423     CodeModel = "small";
424   Builder.defineMacro("__code_model_" + CodeModel + "__");
425 
426   // Target identification.
427   if (getTriple().getArch() == llvm::Triple::x86_64) {
428     Builder.defineMacro("__amd64__");
429     Builder.defineMacro("__amd64");
430     Builder.defineMacro("__x86_64");
431     Builder.defineMacro("__x86_64__");
432     if (getTriple().getArchName() == "x86_64h") {
433       Builder.defineMacro("__x86_64h");
434       Builder.defineMacro("__x86_64h__");
435     }
436   } else {
437     DefineStd(Builder, "i386", Opts);
438   }
439 
440   Builder.defineMacro("__SEG_GS");
441   Builder.defineMacro("__SEG_FS");
442   Builder.defineMacro("__seg_gs", "__attribute__((address_space(256)))");
443   Builder.defineMacro("__seg_fs", "__attribute__((address_space(257)))");
444 
445   // Subtarget options.
446   // FIXME: We are hard-coding the tune parameters based on the CPU, but they
447   // truly should be based on -mtune options.
448   using namespace llvm::X86;
449   switch (CPU) {
450   case CK_None:
451     break;
452   case CK_i386:
453     // The rest are coming from the i386 define above.
454     Builder.defineMacro("__tune_i386__");
455     break;
456   case CK_i486:
457   case CK_WinChipC6:
458   case CK_WinChip2:
459   case CK_C3:
460     defineCPUMacros(Builder, "i486");
461     break;
462   case CK_PentiumMMX:
463     Builder.defineMacro("__pentium_mmx__");
464     Builder.defineMacro("__tune_pentium_mmx__");
465     [[fallthrough]];
466   case CK_i586:
467   case CK_Pentium:
468     defineCPUMacros(Builder, "i586");
469     defineCPUMacros(Builder, "pentium");
470     break;
471   case CK_Pentium3:
472   case CK_PentiumM:
473     Builder.defineMacro("__tune_pentium3__");
474     [[fallthrough]];
475   case CK_Pentium2:
476   case CK_C3_2:
477     Builder.defineMacro("__tune_pentium2__");
478     [[fallthrough]];
479   case CK_PentiumPro:
480   case CK_i686:
481     defineCPUMacros(Builder, "i686");
482     defineCPUMacros(Builder, "pentiumpro");
483     break;
484   case CK_Pentium4:
485     defineCPUMacros(Builder, "pentium4");
486     break;
487   case CK_Yonah:
488   case CK_Prescott:
489   case CK_Nocona:
490     defineCPUMacros(Builder, "nocona");
491     break;
492   case CK_Core2:
493   case CK_Penryn:
494     defineCPUMacros(Builder, "core2");
495     break;
496   case CK_Bonnell:
497     defineCPUMacros(Builder, "atom");
498     break;
499   case CK_Silvermont:
500     defineCPUMacros(Builder, "slm");
501     break;
502   case CK_Goldmont:
503     defineCPUMacros(Builder, "goldmont");
504     break;
505   case CK_GoldmontPlus:
506     defineCPUMacros(Builder, "goldmont_plus");
507     break;
508   case CK_Tremont:
509     defineCPUMacros(Builder, "tremont");
510     break;
511   case CK_Nehalem:
512   case CK_Westmere:
513   case CK_SandyBridge:
514   case CK_IvyBridge:
515   case CK_Haswell:
516   case CK_Broadwell:
517   case CK_SkylakeClient:
518   case CK_SkylakeServer:
519   case CK_Cascadelake:
520   case CK_Cooperlake:
521   case CK_Cannonlake:
522   case CK_IcelakeClient:
523   case CK_Rocketlake:
524   case CK_IcelakeServer:
525   case CK_Tigerlake:
526   case CK_SapphireRapids:
527   case CK_Alderlake:
528   case CK_Raptorlake:
529   case CK_Meteorlake:
530   case CK_Sierraforest:
531   case CK_Grandridge:
532   case CK_Graniterapids:
533   case CK_Emeraldrapids:
534     // FIXME: Historically, we defined this legacy name, it would be nice to
535     // remove it at some point. We've never exposed fine-grained names for
536     // recent primary x86 CPUs, and we should keep it that way.
537     defineCPUMacros(Builder, "corei7");
538     break;
539   case CK_KNL:
540     defineCPUMacros(Builder, "knl");
541     break;
542   case CK_KNM:
543     break;
544   case CK_Lakemont:
545     defineCPUMacros(Builder, "i586", /*Tuning*/false);
546     defineCPUMacros(Builder, "pentium", /*Tuning*/false);
547     Builder.defineMacro("__tune_lakemont__");
548     break;
549   case CK_K6_2:
550     Builder.defineMacro("__k6_2__");
551     Builder.defineMacro("__tune_k6_2__");
552     [[fallthrough]];
553   case CK_K6_3:
554     if (CPU != CK_K6_2) { // In case of fallthrough
555       // FIXME: GCC may be enabling these in cases where some other k6
556       // architecture is specified but -m3dnow is explicitly provided. The
557       // exact semantics need to be determined and emulated here.
558       Builder.defineMacro("__k6_3__");
559       Builder.defineMacro("__tune_k6_3__");
560     }
561     [[fallthrough]];
562   case CK_K6:
563     defineCPUMacros(Builder, "k6");
564     break;
565   case CK_Athlon:
566   case CK_AthlonXP:
567     defineCPUMacros(Builder, "athlon");
568     if (SSELevel != NoSSE) {
569       Builder.defineMacro("__athlon_sse__");
570       Builder.defineMacro("__tune_athlon_sse__");
571     }
572     break;
573   case CK_K8:
574   case CK_K8SSE3:
575   case CK_x86_64:
576     defineCPUMacros(Builder, "k8");
577     break;
578   case CK_x86_64_v2:
579   case CK_x86_64_v3:
580   case CK_x86_64_v4:
581     break;
582   case CK_AMDFAM10:
583     defineCPUMacros(Builder, "amdfam10");
584     break;
585   case CK_BTVER1:
586     defineCPUMacros(Builder, "btver1");
587     break;
588   case CK_BTVER2:
589     defineCPUMacros(Builder, "btver2");
590     break;
591   case CK_BDVER1:
592     defineCPUMacros(Builder, "bdver1");
593     break;
594   case CK_BDVER2:
595     defineCPUMacros(Builder, "bdver2");
596     break;
597   case CK_BDVER3:
598     defineCPUMacros(Builder, "bdver3");
599     break;
600   case CK_BDVER4:
601     defineCPUMacros(Builder, "bdver4");
602     break;
603   case CK_ZNVER1:
604     defineCPUMacros(Builder, "znver1");
605     break;
606   case CK_ZNVER2:
607     defineCPUMacros(Builder, "znver2");
608     break;
609   case CK_ZNVER3:
610     defineCPUMacros(Builder, "znver3");
611     break;
612   case CK_ZNVER4:
613     defineCPUMacros(Builder, "znver4");
614     break;
615   case CK_Geode:
616     defineCPUMacros(Builder, "geode");
617     break;
618   }
619 
620   // Target properties.
621   Builder.defineMacro("__REGISTER_PREFIX__", "");
622 
623   // Define __NO_MATH_INLINES on linux/x86 so that we don't get inline
624   // functions in glibc header files that use FP Stack inline asm which the
625   // backend can't deal with (PR879).
626   Builder.defineMacro("__NO_MATH_INLINES");
627 
628   if (HasAES)
629     Builder.defineMacro("__AES__");
630 
631   if (HasVAES)
632     Builder.defineMacro("__VAES__");
633 
634   if (HasPCLMUL)
635     Builder.defineMacro("__PCLMUL__");
636 
637   if (HasVPCLMULQDQ)
638     Builder.defineMacro("__VPCLMULQDQ__");
639 
640   // Note, in 32-bit mode, GCC does not define the macro if -mno-sahf. In LLVM,
641   // the feature flag only applies to 64-bit mode.
642   if (HasLAHFSAHF || getTriple().getArch() == llvm::Triple::x86)
643     Builder.defineMacro("__LAHF_SAHF__");
644 
645   if (HasLZCNT)
646     Builder.defineMacro("__LZCNT__");
647 
648   if (HasRDRND)
649     Builder.defineMacro("__RDRND__");
650 
651   if (HasFSGSBASE)
652     Builder.defineMacro("__FSGSBASE__");
653 
654   if (HasBMI)
655     Builder.defineMacro("__BMI__");
656 
657   if (HasBMI2)
658     Builder.defineMacro("__BMI2__");
659 
660   if (HasPOPCNT)
661     Builder.defineMacro("__POPCNT__");
662 
663   if (HasRTM)
664     Builder.defineMacro("__RTM__");
665 
666   if (HasPRFCHW)
667     Builder.defineMacro("__PRFCHW__");
668 
669   if (HasRDSEED)
670     Builder.defineMacro("__RDSEED__");
671 
672   if (HasADX)
673     Builder.defineMacro("__ADX__");
674 
675   if (HasTBM)
676     Builder.defineMacro("__TBM__");
677 
678   if (HasLWP)
679     Builder.defineMacro("__LWP__");
680 
681   if (HasMWAITX)
682     Builder.defineMacro("__MWAITX__");
683 
684   if (HasMOVBE)
685     Builder.defineMacro("__MOVBE__");
686 
687   switch (XOPLevel) {
688   case XOP:
689     Builder.defineMacro("__XOP__");
690     [[fallthrough]];
691   case FMA4:
692     Builder.defineMacro("__FMA4__");
693     [[fallthrough]];
694   case SSE4A:
695     Builder.defineMacro("__SSE4A__");
696     [[fallthrough]];
697   case NoXOP:
698     break;
699   }
700 
701   if (HasFMA)
702     Builder.defineMacro("__FMA__");
703 
704   if (HasF16C)
705     Builder.defineMacro("__F16C__");
706 
707   if (HasGFNI)
708     Builder.defineMacro("__GFNI__");
709 
710   if (HasAVX512CD)
711     Builder.defineMacro("__AVX512CD__");
712   if (HasAVX512VPOPCNTDQ)
713     Builder.defineMacro("__AVX512VPOPCNTDQ__");
714   if (HasAVX512VNNI)
715     Builder.defineMacro("__AVX512VNNI__");
716   if (HasAVX512BF16)
717     Builder.defineMacro("__AVX512BF16__");
718   if (HasAVX512ER)
719     Builder.defineMacro("__AVX512ER__");
720   if (HasAVX512FP16)
721     Builder.defineMacro("__AVX512FP16__");
722   if (HasAVX512PF)
723     Builder.defineMacro("__AVX512PF__");
724   if (HasAVX512DQ)
725     Builder.defineMacro("__AVX512DQ__");
726   if (HasAVX512BITALG)
727     Builder.defineMacro("__AVX512BITALG__");
728   if (HasAVX512BW)
729     Builder.defineMacro("__AVX512BW__");
730   if (HasAVX512VL)
731     Builder.defineMacro("__AVX512VL__");
732   if (HasAVX512VBMI)
733     Builder.defineMacro("__AVX512VBMI__");
734   if (HasAVX512VBMI2)
735     Builder.defineMacro("__AVX512VBMI2__");
736   if (HasAVX512IFMA)
737     Builder.defineMacro("__AVX512IFMA__");
738   if (HasAVX512VP2INTERSECT)
739     Builder.defineMacro("__AVX512VP2INTERSECT__");
740   if (HasSHA)
741     Builder.defineMacro("__SHA__");
742 
743   if (HasFXSR)
744     Builder.defineMacro("__FXSR__");
745   if (HasXSAVE)
746     Builder.defineMacro("__XSAVE__");
747   if (HasXSAVEOPT)
748     Builder.defineMacro("__XSAVEOPT__");
749   if (HasXSAVEC)
750     Builder.defineMacro("__XSAVEC__");
751   if (HasXSAVES)
752     Builder.defineMacro("__XSAVES__");
753   if (HasPKU)
754     Builder.defineMacro("__PKU__");
755   if (HasCLFLUSHOPT)
756     Builder.defineMacro("__CLFLUSHOPT__");
757   if (HasCLWB)
758     Builder.defineMacro("__CLWB__");
759   if (HasWBNOINVD)
760     Builder.defineMacro("__WBNOINVD__");
761   if (HasSHSTK)
762     Builder.defineMacro("__SHSTK__");
763   if (HasSGX)
764     Builder.defineMacro("__SGX__");
765   if (HasPREFETCHI)
766     Builder.defineMacro("__PREFETCHI__");
767   if (HasPREFETCHWT1)
768     Builder.defineMacro("__PREFETCHWT1__");
769   if (HasCLZERO)
770     Builder.defineMacro("__CLZERO__");
771   if (HasKL)
772     Builder.defineMacro("__KL__");
773   if (HasWIDEKL)
774     Builder.defineMacro("__WIDEKL__");
775   if (HasRDPID)
776     Builder.defineMacro("__RDPID__");
777   if (HasRDPRU)
778     Builder.defineMacro("__RDPRU__");
779   if (HasCLDEMOTE)
780     Builder.defineMacro("__CLDEMOTE__");
781   if (HasWAITPKG)
782     Builder.defineMacro("__WAITPKG__");
783   if (HasMOVDIRI)
784     Builder.defineMacro("__MOVDIRI__");
785   if (HasMOVDIR64B)
786     Builder.defineMacro("__MOVDIR64B__");
787   if (HasPCONFIG)
788     Builder.defineMacro("__PCONFIG__");
789   if (HasPTWRITE)
790     Builder.defineMacro("__PTWRITE__");
791   if (HasINVPCID)
792     Builder.defineMacro("__INVPCID__");
793   if (HasENQCMD)
794     Builder.defineMacro("__ENQCMD__");
795   if (HasHRESET)
796     Builder.defineMacro("__HRESET__");
797   if (HasAMXTILE)
798     Builder.defineMacro("__AMX_TILE__");
799   if (HasAMXINT8)
800     Builder.defineMacro("__AMX_INT8__");
801   if (HasAMXBF16)
802     Builder.defineMacro("__AMX_BF16__");
803   if (HasAMXFP16)
804     Builder.defineMacro("__AMX_FP16__");
805   if (HasCMPCCXADD)
806     Builder.defineMacro("__CMPCCXADD__");
807   if (HasRAOINT)
808     Builder.defineMacro("__RAOINT__");
809   if (HasAVXIFMA)
810     Builder.defineMacro("__AVXIFMA__");
811   if (HasAVXNECONVERT)
812     Builder.defineMacro("__AVXNECONVERT__");
813   if (HasAVXVNNI)
814     Builder.defineMacro("__AVXVNNI__");
815   if (HasAVXVNNIINT8)
816     Builder.defineMacro("__AVXVNNIINT8__");
817   if (HasSERIALIZE)
818     Builder.defineMacro("__SERIALIZE__");
819   if (HasTSXLDTRK)
820     Builder.defineMacro("__TSXLDTRK__");
821   if (HasUINTR)
822     Builder.defineMacro("__UINTR__");
823   if (HasCRC32)
824     Builder.defineMacro("__CRC32__");
825 
826   // Each case falls through to the previous one here.
827   switch (SSELevel) {
828   case AVX512F:
829     Builder.defineMacro("__AVX512F__");
830     [[fallthrough]];
831   case AVX2:
832     Builder.defineMacro("__AVX2__");
833     [[fallthrough]];
834   case AVX:
835     Builder.defineMacro("__AVX__");
836     [[fallthrough]];
837   case SSE42:
838     Builder.defineMacro("__SSE4_2__");
839     [[fallthrough]];
840   case SSE41:
841     Builder.defineMacro("__SSE4_1__");
842     [[fallthrough]];
843   case SSSE3:
844     Builder.defineMacro("__SSSE3__");
845     [[fallthrough]];
846   case SSE3:
847     Builder.defineMacro("__SSE3__");
848     [[fallthrough]];
849   case SSE2:
850     Builder.defineMacro("__SSE2__");
851     Builder.defineMacro("__SSE2_MATH__"); // -mfp-math=sse always implied.
852     [[fallthrough]];
853   case SSE1:
854     Builder.defineMacro("__SSE__");
855     Builder.defineMacro("__SSE_MATH__"); // -mfp-math=sse always implied.
856     [[fallthrough]];
857   case NoSSE:
858     break;
859   }
860 
861   if (Opts.MicrosoftExt && getTriple().getArch() == llvm::Triple::x86) {
862     switch (SSELevel) {
863     case AVX512F:
864     case AVX2:
865     case AVX:
866     case SSE42:
867     case SSE41:
868     case SSSE3:
869     case SSE3:
870     case SSE2:
871       Builder.defineMacro("_M_IX86_FP", Twine(2));
872       break;
873     case SSE1:
874       Builder.defineMacro("_M_IX86_FP", Twine(1));
875       break;
876     default:
877       Builder.defineMacro("_M_IX86_FP", Twine(0));
878       break;
879     }
880   }
881 
882   // Each case falls through to the previous one here.
883   switch (MMX3DNowLevel) {
884   case AMD3DNowAthlon:
885     Builder.defineMacro("__3dNOW_A__");
886     [[fallthrough]];
887   case AMD3DNow:
888     Builder.defineMacro("__3dNOW__");
889     [[fallthrough]];
890   case MMX:
891     Builder.defineMacro("__MMX__");
892     [[fallthrough]];
893   case NoMMX3DNow:
894     break;
895   }
896 
897   if (CPU >= CK_i486 || CPU == CK_None) {
898     Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1");
899     Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2");
900     Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4");
901   }
902   if (HasCX8)
903     Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8");
904   if (HasCX16 && getTriple().getArch() == llvm::Triple::x86_64)
905     Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_16");
906 
907   if (HasFloat128)
908     Builder.defineMacro("__SIZEOF_FLOAT128__", "16");
909 }
910 
911 bool X86TargetInfo::isValidFeatureName(StringRef Name) const {
912   return llvm::StringSwitch<bool>(Name)
913       .Case("3dnow", true)
914       .Case("3dnowa", true)
915       .Case("adx", true)
916       .Case("aes", true)
917       .Case("amx-bf16", true)
918       .Case("amx-fp16", true)
919       .Case("amx-int8", true)
920       .Case("amx-tile", true)
921       .Case("avx", true)
922       .Case("avx2", true)
923       .Case("avx512f", true)
924       .Case("avx512cd", true)
925       .Case("avx512vpopcntdq", true)
926       .Case("avx512vnni", true)
927       .Case("avx512bf16", true)
928       .Case("avx512er", true)
929       .Case("avx512fp16", true)
930       .Case("avx512pf", true)
931       .Case("avx512dq", true)
932       .Case("avx512bitalg", true)
933       .Case("avx512bw", true)
934       .Case("avx512vl", true)
935       .Case("avx512vbmi", true)
936       .Case("avx512vbmi2", true)
937       .Case("avx512ifma", true)
938       .Case("avx512vp2intersect", true)
939       .Case("avxifma", true)
940       .Case("avxneconvert", true)
941       .Case("avxvnni", true)
942       .Case("avxvnniint8", true)
943       .Case("bmi", true)
944       .Case("bmi2", true)
945       .Case("cldemote", true)
946       .Case("clflushopt", true)
947       .Case("clwb", true)
948       .Case("clzero", true)
949       .Case("cmpccxadd", true)
950       .Case("crc32", true)
951       .Case("cx16", true)
952       .Case("enqcmd", true)
953       .Case("f16c", true)
954       .Case("fma", true)
955       .Case("fma4", true)
956       .Case("fsgsbase", true)
957       .Case("fxsr", true)
958       .Case("general-regs-only", true)
959       .Case("gfni", true)
960       .Case("hreset", true)
961       .Case("invpcid", true)
962       .Case("kl", true)
963       .Case("widekl", true)
964       .Case("lwp", true)
965       .Case("lzcnt", true)
966       .Case("mmx", true)
967       .Case("movbe", true)
968       .Case("movdiri", true)
969       .Case("movdir64b", true)
970       .Case("mwaitx", true)
971       .Case("pclmul", true)
972       .Case("pconfig", true)
973       .Case("pku", true)
974       .Case("popcnt", true)
975       .Case("prefetchi", true)
976       .Case("prefetchwt1", true)
977       .Case("prfchw", true)
978       .Case("ptwrite", true)
979       .Case("raoint", true)
980       .Case("rdpid", true)
981       .Case("rdpru", true)
982       .Case("rdrnd", true)
983       .Case("rdseed", true)
984       .Case("rtm", true)
985       .Case("sahf", true)
986       .Case("serialize", true)
987       .Case("sgx", true)
988       .Case("sha", true)
989       .Case("shstk", true)
990       .Case("sse", true)
991       .Case("sse2", true)
992       .Case("sse3", true)
993       .Case("ssse3", true)
994       .Case("sse4", true)
995       .Case("sse4.1", true)
996       .Case("sse4.2", true)
997       .Case("sse4a", true)
998       .Case("tbm", true)
999       .Case("tsxldtrk", true)
1000       .Case("uintr", true)
1001       .Case("vaes", true)
1002       .Case("vpclmulqdq", true)
1003       .Case("wbnoinvd", true)
1004       .Case("waitpkg", true)
1005       .Case("x87", true)
1006       .Case("xop", true)
1007       .Case("xsave", true)
1008       .Case("xsavec", true)
1009       .Case("xsaves", true)
1010       .Case("xsaveopt", true)
1011       .Default(false);
1012 }
1013 
1014 bool X86TargetInfo::hasFeature(StringRef Feature) const {
1015   return llvm::StringSwitch<bool>(Feature)
1016       .Case("adx", HasADX)
1017       .Case("aes", HasAES)
1018       .Case("amx-bf16", HasAMXBF16)
1019       .Case("amx-fp16", HasAMXFP16)
1020       .Case("amx-int8", HasAMXINT8)
1021       .Case("amx-tile", HasAMXTILE)
1022       .Case("avx", SSELevel >= AVX)
1023       .Case("avx2", SSELevel >= AVX2)
1024       .Case("avx512f", SSELevel >= AVX512F)
1025       .Case("avx512cd", HasAVX512CD)
1026       .Case("avx512vpopcntdq", HasAVX512VPOPCNTDQ)
1027       .Case("avx512vnni", HasAVX512VNNI)
1028       .Case("avx512bf16", HasAVX512BF16)
1029       .Case("avx512er", HasAVX512ER)
1030       .Case("avx512fp16", HasAVX512FP16)
1031       .Case("avx512pf", HasAVX512PF)
1032       .Case("avx512dq", HasAVX512DQ)
1033       .Case("avx512bitalg", HasAVX512BITALG)
1034       .Case("avx512bw", HasAVX512BW)
1035       .Case("avx512vl", HasAVX512VL)
1036       .Case("avx512vbmi", HasAVX512VBMI)
1037       .Case("avx512vbmi2", HasAVX512VBMI2)
1038       .Case("avx512ifma", HasAVX512IFMA)
1039       .Case("avx512vp2intersect", HasAVX512VP2INTERSECT)
1040       .Case("avxifma", HasAVXIFMA)
1041       .Case("avxneconvert", HasAVXNECONVERT)
1042       .Case("avxvnni", HasAVXVNNI)
1043       .Case("avxvnniint8", HasAVXVNNIINT8)
1044       .Case("bmi", HasBMI)
1045       .Case("bmi2", HasBMI2)
1046       .Case("cldemote", HasCLDEMOTE)
1047       .Case("clflushopt", HasCLFLUSHOPT)
1048       .Case("clwb", HasCLWB)
1049       .Case("clzero", HasCLZERO)
1050       .Case("cmpccxadd", HasCMPCCXADD)
1051       .Case("crc32", HasCRC32)
1052       .Case("cx8", HasCX8)
1053       .Case("cx16", HasCX16)
1054       .Case("enqcmd", HasENQCMD)
1055       .Case("f16c", HasF16C)
1056       .Case("fma", HasFMA)
1057       .Case("fma4", XOPLevel >= FMA4)
1058       .Case("fsgsbase", HasFSGSBASE)
1059       .Case("fxsr", HasFXSR)
1060       .Case("gfni", HasGFNI)
1061       .Case("hreset", HasHRESET)
1062       .Case("invpcid", HasINVPCID)
1063       .Case("kl", HasKL)
1064       .Case("widekl", HasWIDEKL)
1065       .Case("lwp", HasLWP)
1066       .Case("lzcnt", HasLZCNT)
1067       .Case("mm3dnow", MMX3DNowLevel >= AMD3DNow)
1068       .Case("mm3dnowa", MMX3DNowLevel >= AMD3DNowAthlon)
1069       .Case("mmx", MMX3DNowLevel >= MMX)
1070       .Case("movbe", HasMOVBE)
1071       .Case("movdiri", HasMOVDIRI)
1072       .Case("movdir64b", HasMOVDIR64B)
1073       .Case("mwaitx", HasMWAITX)
1074       .Case("pclmul", HasPCLMUL)
1075       .Case("pconfig", HasPCONFIG)
1076       .Case("pku", HasPKU)
1077       .Case("popcnt", HasPOPCNT)
1078       .Case("prefetchi", HasPREFETCHI)
1079       .Case("prefetchwt1", HasPREFETCHWT1)
1080       .Case("prfchw", HasPRFCHW)
1081       .Case("ptwrite", HasPTWRITE)
1082       .Case("raoint", HasRAOINT)
1083       .Case("rdpid", HasRDPID)
1084       .Case("rdpru", HasRDPRU)
1085       .Case("rdrnd", HasRDRND)
1086       .Case("rdseed", HasRDSEED)
1087       .Case("retpoline-external-thunk", HasRetpolineExternalThunk)
1088       .Case("rtm", HasRTM)
1089       .Case("sahf", HasLAHFSAHF)
1090       .Case("serialize", HasSERIALIZE)
1091       .Case("sgx", HasSGX)
1092       .Case("sha", HasSHA)
1093       .Case("shstk", HasSHSTK)
1094       .Case("sse", SSELevel >= SSE1)
1095       .Case("sse2", SSELevel >= SSE2)
1096       .Case("sse3", SSELevel >= SSE3)
1097       .Case("ssse3", SSELevel >= SSSE3)
1098       .Case("sse4.1", SSELevel >= SSE41)
1099       .Case("sse4.2", SSELevel >= SSE42)
1100       .Case("sse4a", XOPLevel >= SSE4A)
1101       .Case("tbm", HasTBM)
1102       .Case("tsxldtrk", HasTSXLDTRK)
1103       .Case("uintr", HasUINTR)
1104       .Case("vaes", HasVAES)
1105       .Case("vpclmulqdq", HasVPCLMULQDQ)
1106       .Case("wbnoinvd", HasWBNOINVD)
1107       .Case("waitpkg", HasWAITPKG)
1108       .Case("x86", true)
1109       .Case("x86_32", getTriple().getArch() == llvm::Triple::x86)
1110       .Case("x86_64", getTriple().getArch() == llvm::Triple::x86_64)
1111       .Case("x87", HasX87)
1112       .Case("xop", XOPLevel >= XOP)
1113       .Case("xsave", HasXSAVE)
1114       .Case("xsavec", HasXSAVEC)
1115       .Case("xsaves", HasXSAVES)
1116       .Case("xsaveopt", HasXSAVEOPT)
1117       .Default(false);
1118 }
1119 
1120 // We can't use a generic validation scheme for the features accepted here
1121 // versus subtarget features accepted in the target attribute because the
1122 // bitfield structure that's initialized in the runtime only supports the
1123 // below currently rather than the full range of subtarget features. (See
1124 // X86TargetInfo::hasFeature for a somewhat comprehensive list).
1125 bool X86TargetInfo::validateCpuSupports(StringRef FeatureStr) const {
1126   return llvm::StringSwitch<bool>(FeatureStr)
1127 #define X86_FEATURE_COMPAT(ENUM, STR, PRIORITY) .Case(STR, true)
1128 #include "llvm/TargetParser/X86TargetParser.def"
1129       .Default(false);
1130 }
1131 
1132 static llvm::X86::ProcessorFeatures getFeature(StringRef Name) {
1133   return llvm::StringSwitch<llvm::X86::ProcessorFeatures>(Name)
1134 #define X86_FEATURE_COMPAT(ENUM, STR, PRIORITY)                                \
1135   .Case(STR, llvm::X86::FEATURE_##ENUM)
1136 
1137 #include "llvm/TargetParser/X86TargetParser.def"
1138       ;
1139   // Note, this function should only be used after ensuring the value is
1140   // correct, so it asserts if the value is out of range.
1141 }
1142 
1143 unsigned X86TargetInfo::multiVersionSortPriority(StringRef Name) const {
1144   // Valid CPUs have a 'key feature' that compares just better than its key
1145   // feature.
1146   using namespace llvm::X86;
1147   CPUKind Kind = parseArchX86(Name);
1148   if (Kind != CK_None) {
1149     ProcessorFeatures KeyFeature = getKeyFeature(Kind);
1150     return (getFeaturePriority(KeyFeature) << 1) + 1;
1151   }
1152 
1153   // Now we know we have a feature, so get its priority and shift it a few so
1154   // that we have sufficient room for the CPUs (above).
1155   return getFeaturePriority(getFeature(Name)) << 1;
1156 }
1157 
1158 bool X86TargetInfo::validateCPUSpecificCPUDispatch(StringRef Name) const {
1159   return llvm::StringSwitch<bool>(Name)
1160 #define CPU_SPECIFIC(NAME, TUNE_NAME, MANGLING, FEATURES) .Case(NAME, true)
1161 #define CPU_SPECIFIC_ALIAS(NEW_NAME, TUNE_NAME, NAME) .Case(NEW_NAME, true)
1162 #include "llvm/TargetParser/X86TargetParser.def"
1163       .Default(false);
1164 }
1165 
1166 static StringRef CPUSpecificCPUDispatchNameDealias(StringRef Name) {
1167   return llvm::StringSwitch<StringRef>(Name)
1168 #define CPU_SPECIFIC_ALIAS(NEW_NAME, TUNE_NAME, NAME) .Case(NEW_NAME, NAME)
1169 #include "llvm/TargetParser/X86TargetParser.def"
1170       .Default(Name);
1171 }
1172 
1173 char X86TargetInfo::CPUSpecificManglingCharacter(StringRef Name) const {
1174   return llvm::StringSwitch<char>(CPUSpecificCPUDispatchNameDealias(Name))
1175 #define CPU_SPECIFIC(NAME, TUNE_NAME, MANGLING, FEATURES) .Case(NAME, MANGLING)
1176 #include "llvm/TargetParser/X86TargetParser.def"
1177       .Default(0);
1178 }
1179 
1180 void X86TargetInfo::getCPUSpecificCPUDispatchFeatures(
1181     StringRef Name, llvm::SmallVectorImpl<StringRef> &Features) const {
1182   StringRef WholeList =
1183       llvm::StringSwitch<StringRef>(CPUSpecificCPUDispatchNameDealias(Name))
1184 #define CPU_SPECIFIC(NAME, TUNE_NAME, MANGLING, FEATURES) .Case(NAME, FEATURES)
1185 #include "llvm/TargetParser/X86TargetParser.def"
1186           .Default("");
1187   WholeList.split(Features, ',', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
1188 }
1189 
1190 StringRef X86TargetInfo::getCPUSpecificTuneName(StringRef Name) const {
1191   return llvm::StringSwitch<StringRef>(Name)
1192 #define CPU_SPECIFIC(NAME, TUNE_NAME, MANGLING, FEATURES) .Case(NAME, TUNE_NAME)
1193 #define CPU_SPECIFIC_ALIAS(NEW_NAME, TUNE_NAME, NAME) .Case(NEW_NAME, TUNE_NAME)
1194 #include "llvm/TargetParser/X86TargetParser.def"
1195       .Default("");
1196 }
1197 
1198 // We can't use a generic validation scheme for the cpus accepted here
1199 // versus subtarget cpus accepted in the target attribute because the
1200 // variables intitialized by the runtime only support the below currently
1201 // rather than the full range of cpus.
1202 bool X86TargetInfo::validateCpuIs(StringRef FeatureStr) const {
1203   return llvm::StringSwitch<bool>(FeatureStr)
1204 #define X86_VENDOR(ENUM, STRING) .Case(STRING, true)
1205 #define X86_CPU_TYPE_ALIAS(ENUM, ALIAS) .Case(ALIAS, true)
1206 #define X86_CPU_TYPE(ENUM, STR) .Case(STR, true)
1207 #define X86_CPU_SUBTYPE_ALIAS(ENUM, ALIAS) .Case(ALIAS, true)
1208 #define X86_CPU_SUBTYPE(ENUM, STR) .Case(STR, true)
1209 #include "llvm/TargetParser/X86TargetParser.def"
1210       .Default(false);
1211 }
1212 
1213 static unsigned matchAsmCCConstraint(const char *&Name) {
1214   auto RV = llvm::StringSwitch<unsigned>(Name)
1215                 .Case("@cca", 4)
1216                 .Case("@ccae", 5)
1217                 .Case("@ccb", 4)
1218                 .Case("@ccbe", 5)
1219                 .Case("@ccc", 4)
1220                 .Case("@cce", 4)
1221                 .Case("@ccz", 4)
1222                 .Case("@ccg", 4)
1223                 .Case("@ccge", 5)
1224                 .Case("@ccl", 4)
1225                 .Case("@ccle", 5)
1226                 .Case("@ccna", 5)
1227                 .Case("@ccnae", 6)
1228                 .Case("@ccnb", 5)
1229                 .Case("@ccnbe", 6)
1230                 .Case("@ccnc", 5)
1231                 .Case("@ccne", 5)
1232                 .Case("@ccnz", 5)
1233                 .Case("@ccng", 5)
1234                 .Case("@ccnge", 6)
1235                 .Case("@ccnl", 5)
1236                 .Case("@ccnle", 6)
1237                 .Case("@ccno", 5)
1238                 .Case("@ccnp", 5)
1239                 .Case("@ccns", 5)
1240                 .Case("@cco", 4)
1241                 .Case("@ccp", 4)
1242                 .Case("@ccs", 4)
1243                 .Default(0);
1244   return RV;
1245 }
1246 
1247 bool X86TargetInfo::validateAsmConstraint(
1248     const char *&Name, TargetInfo::ConstraintInfo &Info) const {
1249   switch (*Name) {
1250   default:
1251     return false;
1252   // Constant constraints.
1253   case 'e': // 32-bit signed integer constant for use with sign-extending x86_64
1254             // instructions.
1255   case 'Z': // 32-bit unsigned integer constant for use with zero-extending
1256             // x86_64 instructions.
1257   case 's':
1258     Info.setRequiresImmediate();
1259     return true;
1260   case 'I':
1261     Info.setRequiresImmediate(0, 31);
1262     return true;
1263   case 'J':
1264     Info.setRequiresImmediate(0, 63);
1265     return true;
1266   case 'K':
1267     Info.setRequiresImmediate(-128, 127);
1268     return true;
1269   case 'L':
1270     Info.setRequiresImmediate({int(0xff), int(0xffff), int(0xffffffff)});
1271     return true;
1272   case 'M':
1273     Info.setRequiresImmediate(0, 3);
1274     return true;
1275   case 'N':
1276     Info.setRequiresImmediate(0, 255);
1277     return true;
1278   case 'O':
1279     Info.setRequiresImmediate(0, 127);
1280     return true;
1281   // Register constraints.
1282   case 'Y': // 'Y' is the first character for several 2-character constraints.
1283     // Shift the pointer to the second character of the constraint.
1284     Name++;
1285     switch (*Name) {
1286     default:
1287       return false;
1288     case 'z': // First SSE register.
1289     case '2':
1290     case 't': // Any SSE register, when SSE2 is enabled.
1291     case 'i': // Any SSE register, when SSE2 and inter-unit moves enabled.
1292     case 'm': // Any MMX register, when inter-unit moves enabled.
1293     case 'k': // AVX512 arch mask registers: k1-k7.
1294       Info.setAllowsRegister();
1295       return true;
1296     }
1297   case 'f': // Any x87 floating point stack register.
1298     // Constraint 'f' cannot be used for output operands.
1299     if (Info.ConstraintStr[0] == '=')
1300       return false;
1301     Info.setAllowsRegister();
1302     return true;
1303   case 'a': // eax.
1304   case 'b': // ebx.
1305   case 'c': // ecx.
1306   case 'd': // edx.
1307   case 'S': // esi.
1308   case 'D': // edi.
1309   case 'A': // edx:eax.
1310   case 't': // Top of floating point stack.
1311   case 'u': // Second from top of floating point stack.
1312   case 'q': // Any register accessible as [r]l: a, b, c, and d.
1313   case 'y': // Any MMX register.
1314   case 'v': // Any {X,Y,Z}MM register (Arch & context dependent)
1315   case 'x': // Any SSE register.
1316   case 'k': // Any AVX512 mask register (same as Yk, additionally allows k0
1317             // for intermideate k reg operations).
1318   case 'Q': // Any register accessible as [r]h: a, b, c, and d.
1319   case 'R': // "Legacy" registers: ax, bx, cx, dx, di, si, sp, bp.
1320   case 'l': // "Index" registers: any general register that can be used as an
1321             // index in a base+index memory access.
1322     Info.setAllowsRegister();
1323     return true;
1324   // Floating point constant constraints.
1325   case 'C': // SSE floating point constant.
1326   case 'G': // x87 floating point constant.
1327     return true;
1328   case '@':
1329     // CC condition changes.
1330     if (auto Len = matchAsmCCConstraint(Name)) {
1331       Name += Len - 1;
1332       Info.setAllowsRegister();
1333       return true;
1334     }
1335     return false;
1336   }
1337 }
1338 
1339 // Below is based on the following information:
1340 // +------------------------------------+-------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+
1341 // |           Processor Name           | Cache Line Size (Bytes) |                                                                            Source                                                                            |
1342 // +------------------------------------+-------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+
1343 // | i386                               |                      64 | https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-optimization-manual.pdf                                          |
1344 // | i486                               |                      16 | "four doublewords" (doubleword = 32 bits, 4 bits * 32 bits = 16 bytes) https://en.wikichip.org/w/images/d/d3/i486_MICROPROCESSOR_HARDWARE_REFERENCE_MANUAL_%281990%29.pdf and http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.126.4216&rep=rep1&type=pdf (page 29) |
1345 // | i586/Pentium MMX                   |                      32 | https://www.7-cpu.com/cpu/P-MMX.html                                                                                                                         |
1346 // | i686/Pentium                       |                      32 | https://www.7-cpu.com/cpu/P6.html                                                                                                                            |
1347 // | Netburst/Pentium4                  |                      64 | https://www.7-cpu.com/cpu/P4-180.html                                                                                                                        |
1348 // | Atom                               |                      64 | https://www.7-cpu.com/cpu/Atom.html                                                                                                                          |
1349 // | Westmere                           |                      64 | https://en.wikichip.org/wiki/intel/microarchitectures/sandy_bridge_(client) "Cache Architecture"                                                             |
1350 // | Sandy Bridge                       |                      64 | https://en.wikipedia.org/wiki/Sandy_Bridge and https://www.7-cpu.com/cpu/SandyBridge.html                                                                    |
1351 // | Ivy Bridge                         |                      64 | https://blog.stuffedcow.net/2013/01/ivb-cache-replacement/ and https://www.7-cpu.com/cpu/IvyBridge.html                                                      |
1352 // | Haswell                            |                      64 | https://www.7-cpu.com/cpu/Haswell.html                                                                                                                       |
1353 // | Boadwell                           |                      64 | https://www.7-cpu.com/cpu/Broadwell.html                                                                                                                     |
1354 // | Skylake (including skylake-avx512) |                      64 | https://www.nas.nasa.gov/hecc/support/kb/skylake-processors_550.html "Cache Hierarchy"                                                                       |
1355 // | Cascade Lake                       |                      64 | https://www.nas.nasa.gov/hecc/support/kb/cascade-lake-processors_579.html "Cache Hierarchy"                                                                  |
1356 // | Skylake                            |                      64 | https://en.wikichip.org/wiki/intel/microarchitectures/kaby_lake "Memory Hierarchy"                                                                           |
1357 // | Ice Lake                           |                      64 | https://www.7-cpu.com/cpu/Ice_Lake.html                                                                                                                      |
1358 // | Knights Landing                    |                      64 | https://software.intel.com/en-us/articles/intel-xeon-phi-processor-7200-family-memory-management-optimizations "The Intel® Xeon Phi™ Processor Architecture" |
1359 // | Knights Mill                       |                      64 | https://software.intel.com/sites/default/files/managed/9e/bc/64-ia-32-architectures-optimization-manual.pdf?countrylabel=Colombia "2.5.5.2 L1 DCache "       |
1360 // +------------------------------------+-------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+
1361 std::optional<unsigned> X86TargetInfo::getCPUCacheLineSize() const {
1362   using namespace llvm::X86;
1363   switch (CPU) {
1364     // i386
1365     case CK_i386:
1366     // i486
1367     case CK_i486:
1368     case CK_WinChipC6:
1369     case CK_WinChip2:
1370     case CK_C3:
1371     // Lakemont
1372     case CK_Lakemont:
1373       return 16;
1374 
1375     // i586
1376     case CK_i586:
1377     case CK_Pentium:
1378     case CK_PentiumMMX:
1379     // i686
1380     case CK_PentiumPro:
1381     case CK_i686:
1382     case CK_Pentium2:
1383     case CK_Pentium3:
1384     case CK_PentiumM:
1385     case CK_C3_2:
1386     // K6
1387     case CK_K6:
1388     case CK_K6_2:
1389     case CK_K6_3:
1390     // Geode
1391     case CK_Geode:
1392       return 32;
1393 
1394     // Netburst
1395     case CK_Pentium4:
1396     case CK_Prescott:
1397     case CK_Nocona:
1398     // Atom
1399     case CK_Bonnell:
1400     case CK_Silvermont:
1401     case CK_Goldmont:
1402     case CK_GoldmontPlus:
1403     case CK_Tremont:
1404 
1405     case CK_Westmere:
1406     case CK_SandyBridge:
1407     case CK_IvyBridge:
1408     case CK_Haswell:
1409     case CK_Broadwell:
1410     case CK_SkylakeClient:
1411     case CK_SkylakeServer:
1412     case CK_Cascadelake:
1413     case CK_Nehalem:
1414     case CK_Cooperlake:
1415     case CK_Cannonlake:
1416     case CK_Tigerlake:
1417     case CK_SapphireRapids:
1418     case CK_IcelakeClient:
1419     case CK_Rocketlake:
1420     case CK_IcelakeServer:
1421     case CK_Alderlake:
1422     case CK_Raptorlake:
1423     case CK_Meteorlake:
1424     case CK_Sierraforest:
1425     case CK_Grandridge:
1426     case CK_Graniterapids:
1427     case CK_Emeraldrapids:
1428     case CK_KNL:
1429     case CK_KNM:
1430     // K7
1431     case CK_Athlon:
1432     case CK_AthlonXP:
1433     // K8
1434     case CK_K8:
1435     case CK_K8SSE3:
1436     case CK_AMDFAM10:
1437     // Bobcat
1438     case CK_BTVER1:
1439     case CK_BTVER2:
1440     // Bulldozer
1441     case CK_BDVER1:
1442     case CK_BDVER2:
1443     case CK_BDVER3:
1444     case CK_BDVER4:
1445     // Zen
1446     case CK_ZNVER1:
1447     case CK_ZNVER2:
1448     case CK_ZNVER3:
1449     case CK_ZNVER4:
1450     // Deprecated
1451     case CK_x86_64:
1452     case CK_x86_64_v2:
1453     case CK_x86_64_v3:
1454     case CK_x86_64_v4:
1455     case CK_Yonah:
1456     case CK_Penryn:
1457     case CK_Core2:
1458       return 64;
1459 
1460     // The following currently have unknown cache line sizes (but they are probably all 64):
1461     // Core
1462     case CK_None:
1463       return std::nullopt;
1464   }
1465   llvm_unreachable("Unknown CPU kind");
1466 }
1467 
1468 bool X86TargetInfo::validateOutputSize(const llvm::StringMap<bool> &FeatureMap,
1469                                        StringRef Constraint,
1470                                        unsigned Size) const {
1471   // Strip off constraint modifiers.
1472   while (Constraint[0] == '=' || Constraint[0] == '+' || Constraint[0] == '&')
1473     Constraint = Constraint.substr(1);
1474 
1475   return validateOperandSize(FeatureMap, Constraint, Size);
1476 }
1477 
1478 bool X86TargetInfo::validateInputSize(const llvm::StringMap<bool> &FeatureMap,
1479                                       StringRef Constraint,
1480                                       unsigned Size) const {
1481   return validateOperandSize(FeatureMap, Constraint, Size);
1482 }
1483 
1484 bool X86TargetInfo::validateOperandSize(const llvm::StringMap<bool> &FeatureMap,
1485                                         StringRef Constraint,
1486                                         unsigned Size) const {
1487   switch (Constraint[0]) {
1488   default:
1489     break;
1490   case 'k':
1491   // Registers k0-k7 (AVX512) size limit is 64 bit.
1492   case 'y':
1493     return Size <= 64;
1494   case 'f':
1495   case 't':
1496   case 'u':
1497     return Size <= 128;
1498   case 'Y':
1499     // 'Y' is the first character for several 2-character constraints.
1500     switch (Constraint[1]) {
1501     default:
1502       return false;
1503     case 'm':
1504       // 'Ym' is synonymous with 'y'.
1505     case 'k':
1506       return Size <= 64;
1507     case 'z':
1508       // XMM0/YMM/ZMM0
1509       if (hasFeatureEnabled(FeatureMap, "avx512f"))
1510         // ZMM0 can be used if target supports AVX512F.
1511         return Size <= 512U;
1512       else if (hasFeatureEnabled(FeatureMap, "avx"))
1513         // YMM0 can be used if target supports AVX.
1514         return Size <= 256U;
1515       else if (hasFeatureEnabled(FeatureMap, "sse"))
1516         return Size <= 128U;
1517       return false;
1518     case 'i':
1519     case 't':
1520     case '2':
1521       // 'Yi','Yt','Y2' are synonymous with 'x' when SSE2 is enabled.
1522       if (SSELevel < SSE2)
1523         return false;
1524       break;
1525     }
1526     break;
1527   case 'v':
1528   case 'x':
1529     if (hasFeatureEnabled(FeatureMap, "avx512f"))
1530       // 512-bit zmm registers can be used if target supports AVX512F.
1531       return Size <= 512U;
1532     else if (hasFeatureEnabled(FeatureMap, "avx"))
1533       // 256-bit ymm registers can be used if target supports AVX.
1534       return Size <= 256U;
1535     return Size <= 128U;
1536 
1537   }
1538 
1539   return true;
1540 }
1541 
1542 std::string X86TargetInfo::convertConstraint(const char *&Constraint) const {
1543   switch (*Constraint) {
1544   case '@':
1545     if (auto Len = matchAsmCCConstraint(Constraint)) {
1546       std::string Converted = "{" + std::string(Constraint, Len) + "}";
1547       Constraint += Len - 1;
1548       return Converted;
1549     }
1550     return std::string(1, *Constraint);
1551   case 'a':
1552     return std::string("{ax}");
1553   case 'b':
1554     return std::string("{bx}");
1555   case 'c':
1556     return std::string("{cx}");
1557   case 'd':
1558     return std::string("{dx}");
1559   case 'S':
1560     return std::string("{si}");
1561   case 'D':
1562     return std::string("{di}");
1563   case 'p': // Keep 'p' constraint (address).
1564     return std::string("p");
1565   case 't': // top of floating point stack.
1566     return std::string("{st}");
1567   case 'u':                        // second from top of floating point stack.
1568     return std::string("{st(1)}"); // second from top of floating point stack.
1569   case 'Y':
1570     switch (Constraint[1]) {
1571     default:
1572       // Break from inner switch and fall through (copy single char),
1573       // continue parsing after copying the current constraint into
1574       // the return string.
1575       break;
1576     case 'k':
1577     case 'm':
1578     case 'i':
1579     case 't':
1580     case 'z':
1581     case '2':
1582       // "^" hints llvm that this is a 2 letter constraint.
1583       // "Constraint++" is used to promote the string iterator
1584       // to the next constraint.
1585       return std::string("^") + std::string(Constraint++, 2);
1586     }
1587     [[fallthrough]];
1588   default:
1589     return std::string(1, *Constraint);
1590   }
1591 }
1592 
1593 void X86TargetInfo::fillValidCPUList(SmallVectorImpl<StringRef> &Values) const {
1594   bool Only64Bit = getTriple().getArch() != llvm::Triple::x86;
1595   llvm::X86::fillValidCPUArchList(Values, Only64Bit);
1596 }
1597 
1598 void X86TargetInfo::fillValidTuneCPUList(SmallVectorImpl<StringRef> &Values) const {
1599   llvm::X86::fillValidTuneCPUList(Values);
1600 }
1601 
1602 ArrayRef<const char *> X86TargetInfo::getGCCRegNames() const {
1603   return llvm::ArrayRef(GCCRegNames);
1604 }
1605 
1606 ArrayRef<TargetInfo::AddlRegName> X86TargetInfo::getGCCAddlRegNames() const {
1607   return llvm::ArrayRef(AddlRegNames);
1608 }
1609 
1610 ArrayRef<Builtin::Info> X86_32TargetInfo::getTargetBuiltins() const {
1611   return llvm::ArrayRef(BuiltinInfoX86, clang::X86::LastX86CommonBuiltin -
1612                                             Builtin::FirstTSBuiltin + 1);
1613 }
1614 
1615 ArrayRef<Builtin::Info> X86_64TargetInfo::getTargetBuiltins() const {
1616   return llvm::ArrayRef(BuiltinInfoX86,
1617                         X86::LastTSBuiltin - Builtin::FirstTSBuiltin);
1618 }
1619