xref: /freebsd/contrib/llvm-project/llvm/lib/Target/X86/X86.td (revision a2464ee12761660f50d0b6f59f233949ebcacc87)
1//===-- X86.td - Target definition file for the Intel X86 --*- tablegen -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This is a target description file for the Intel i386 architecture, referred
10// to here as the "X86" architecture.
11//
12//===----------------------------------------------------------------------===//
13
14// Get the target-independent interfaces which we are implementing...
15//
16include "llvm/Target/Target.td"
17
18//===----------------------------------------------------------------------===//
19// X86 Subtarget state
20//
21
22def Mode64Bit : SubtargetFeature<"64bit-mode", "In64BitMode", "true",
23                                  "64-bit mode (x86_64)">;
24def Mode32Bit : SubtargetFeature<"32bit-mode", "In32BitMode", "true",
25                                  "32-bit mode (80386)">;
26def Mode16Bit : SubtargetFeature<"16bit-mode", "In16BitMode", "true",
27                                  "16-bit mode (i8086)">;
28
29//===----------------------------------------------------------------------===//
30// X86 Subtarget ISA features
31//===----------------------------------------------------------------------===//
32
33def FeatureX87     : SubtargetFeature<"x87","HasX87", "true",
34                                      "Enable X87 float instructions">;
35
36def FeatureNOPL    : SubtargetFeature<"nopl", "HasNOPL", "true",
37                                      "Enable NOPL instruction">;
38
39def FeatureCMOV    : SubtargetFeature<"cmov","HasCMov", "true",
40                                      "Enable conditional move instructions">;
41
42def FeatureCMPXCHG8B : SubtargetFeature<"cx8", "HasCmpxchg8b", "true",
43                                        "Support CMPXCHG8B instructions">;
44
45def FeatureCRC32   : SubtargetFeature<"crc32", "HasCRC32", "true",
46                                      "Enable SSE 4.2 CRC32 instruction">;
47
48def FeaturePOPCNT   : SubtargetFeature<"popcnt", "HasPOPCNT", "true",
49                                       "Support POPCNT instruction">;
50
51def FeatureFXSR    : SubtargetFeature<"fxsr", "HasFXSR", "true",
52                                      "Support fxsave/fxrestore instructions">;
53
54def FeatureXSAVE   : SubtargetFeature<"xsave", "HasXSAVE", "true",
55                                       "Support xsave instructions">;
56
57def FeatureXSAVEOPT: SubtargetFeature<"xsaveopt", "HasXSAVEOPT", "true",
58                                       "Support xsaveopt instructions",
59                                       [FeatureXSAVE]>;
60
61def FeatureXSAVEC  : SubtargetFeature<"xsavec", "HasXSAVEC", "true",
62                                       "Support xsavec instructions",
63                                       [FeatureXSAVE]>;
64
65def FeatureXSAVES  : SubtargetFeature<"xsaves", "HasXSAVES", "true",
66                                       "Support xsaves instructions",
67                                       [FeatureXSAVE]>;
68
69def FeatureSSE1    : SubtargetFeature<"sse", "X86SSELevel", "SSE1",
70                                      "Enable SSE instructions">;
71def FeatureSSE2    : SubtargetFeature<"sse2", "X86SSELevel", "SSE2",
72                                      "Enable SSE2 instructions",
73                                      [FeatureSSE1]>;
74def FeatureSSE3    : SubtargetFeature<"sse3", "X86SSELevel", "SSE3",
75                                      "Enable SSE3 instructions",
76                                      [FeatureSSE2]>;
77def FeatureSSSE3   : SubtargetFeature<"ssse3", "X86SSELevel", "SSSE3",
78                                      "Enable SSSE3 instructions",
79                                      [FeatureSSE3]>;
80def FeatureSSE41   : SubtargetFeature<"sse4.1", "X86SSELevel", "SSE41",
81                                      "Enable SSE 4.1 instructions",
82                                      [FeatureSSSE3]>;
83def FeatureSSE42   : SubtargetFeature<"sse4.2", "X86SSELevel", "SSE42",
84                                      "Enable SSE 4.2 instructions",
85                                      [FeatureSSE41]>;
86// The MMX subtarget feature is separate from the rest of the SSE features
87// because it's important (for odd compatibility reasons) to be able to
88// turn it off explicitly while allowing SSE+ to be on.
89def FeatureMMX     : SubtargetFeature<"mmx","X863DNowLevel", "MMX",
90                                      "Enable MMX instructions">;
91def Feature3DNow   : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow",
92                                      "Enable 3DNow! instructions",
93                                      [FeatureMMX]>;
94def Feature3DNowA  : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA",
95                                      "Enable 3DNow! Athlon instructions",
96                                      [Feature3DNow]>;
97// All x86-64 hardware has SSE2, but we don't mark SSE2 as an implied
98// feature, because SSE2 can be disabled (e.g. for compiling OS kernels)
99// without disabling 64-bit mode. Nothing should imply this feature bit. It
100// is used to enforce that only 64-bit capable CPUs are used in 64-bit mode.
101def Feature64Bit   : SubtargetFeature<"64bit", "HasX86_64", "true",
102                                      "Support 64-bit instructions">;
103def FeatureCMPXCHG16B : SubtargetFeature<"cx16", "HasCmpxchg16b", "true",
104                                      "64-bit with cmpxchg16b",
105                                      [FeatureCMPXCHG8B]>;
106def FeatureSSE4A   : SubtargetFeature<"sse4a", "HasSSE4A", "true",
107                                      "Support SSE 4a instructions",
108                                      [FeatureSSE3]>;
109
110def FeatureAVX     : SubtargetFeature<"avx", "X86SSELevel", "AVX",
111                                      "Enable AVX instructions",
112                                      [FeatureSSE42]>;
113def FeatureAVX2    : SubtargetFeature<"avx2", "X86SSELevel", "AVX2",
114                                      "Enable AVX2 instructions",
115                                      [FeatureAVX]>;
116def FeatureFMA     : SubtargetFeature<"fma", "HasFMA", "true",
117                                      "Enable three-operand fused multiple-add",
118                                      [FeatureAVX]>;
119def FeatureF16C    : SubtargetFeature<"f16c", "HasF16C", "true",
120                       "Support 16-bit floating point conversion instructions",
121                       [FeatureAVX]>;
122def FeatureAVX512   : SubtargetFeature<"avx512f", "X86SSELevel", "AVX512F",
123                                      "Enable AVX-512 instructions",
124                                      [FeatureAVX2, FeatureFMA, FeatureF16C]>;
125def FeatureERI      : SubtargetFeature<"avx512er", "HasERI", "true",
126                      "Enable AVX-512 Exponential and Reciprocal Instructions",
127                                      [FeatureAVX512]>;
128def FeatureCDI      : SubtargetFeature<"avx512cd", "HasCDI", "true",
129                      "Enable AVX-512 Conflict Detection Instructions",
130                                      [FeatureAVX512]>;
131def FeatureVPOPCNTDQ : SubtargetFeature<"avx512vpopcntdq", "HasVPOPCNTDQ",
132                       "true", "Enable AVX-512 Population Count Instructions",
133                                      [FeatureAVX512]>;
134def FeaturePFI      : SubtargetFeature<"avx512pf", "HasPFI", "true",
135                      "Enable AVX-512 PreFetch Instructions",
136                                      [FeatureAVX512]>;
137def FeaturePREFETCHWT1  : SubtargetFeature<"prefetchwt1", "HasPREFETCHWT1",
138                                   "true",
139                                   "Prefetch with Intent to Write and T1 Hint">;
140def FeatureDQI     : SubtargetFeature<"avx512dq", "HasDQI", "true",
141                      "Enable AVX-512 Doubleword and Quadword Instructions",
142                                      [FeatureAVX512]>;
143def FeatureBWI     : SubtargetFeature<"avx512bw", "HasBWI", "true",
144                      "Enable AVX-512 Byte and Word Instructions",
145                                      [FeatureAVX512]>;
146def FeatureVLX     : SubtargetFeature<"avx512vl", "HasVLX", "true",
147                      "Enable AVX-512 Vector Length eXtensions",
148                                      [FeatureAVX512]>;
149def FeatureVBMI     : SubtargetFeature<"avx512vbmi", "HasVBMI", "true",
150                      "Enable AVX-512 Vector Byte Manipulation Instructions",
151                                      [FeatureBWI]>;
152def FeatureVBMI2    : SubtargetFeature<"avx512vbmi2", "HasVBMI2", "true",
153                      "Enable AVX-512 further Vector Byte Manipulation Instructions",
154                                      [FeatureBWI]>;
155def FeatureIFMA     : SubtargetFeature<"avx512ifma", "HasIFMA", "true",
156                      "Enable AVX-512 Integer Fused Multiple-Add",
157                                      [FeatureAVX512]>;
158def FeaturePKU   : SubtargetFeature<"pku", "HasPKU", "true",
159                      "Enable protection keys">;
160def FeatureVNNI    : SubtargetFeature<"avx512vnni", "HasVNNI", "true",
161                          "Enable AVX-512 Vector Neural Network Instructions",
162                                      [FeatureAVX512]>;
163def FeatureAVXVNNI    : SubtargetFeature<"avxvnni", "HasAVXVNNI", "true",
164                           "Support AVX_VNNI encoding",
165                                      [FeatureAVX2]>;
166def FeatureBF16    : SubtargetFeature<"avx512bf16", "HasBF16", "true",
167                           "Support bfloat16 floating point",
168                                      [FeatureBWI]>;
169def FeatureBITALG  : SubtargetFeature<"avx512bitalg", "HasBITALG", "true",
170                       "Enable AVX-512 Bit Algorithms",
171                        [FeatureBWI]>;
172def FeatureVP2INTERSECT  : SubtargetFeature<"avx512vp2intersect",
173                                            "HasVP2INTERSECT", "true",
174                                            "Enable AVX-512 vp2intersect",
175                                            [FeatureAVX512]>;
176// FIXME: FP16 scalar intrinsics use the type v8f16, which is supposed to be
177// guarded under condition hasVLX. So we imply it in FeatureFP16 currently.
178// FIXME: FP16 conversion between f16 and i64 customize type v8i64, which is
179// supposed to be guarded under condition hasDQI. So we imply it in FeatureFP16
180// currently.
181def FeatureFP16    : SubtargetFeature<"avx512fp16", "HasFP16", "true",
182                           "Support 16-bit floating point",
183                           [FeatureBWI, FeatureVLX, FeatureDQI]>;
184def FeaturePCLMUL  : SubtargetFeature<"pclmul", "HasPCLMUL", "true",
185                         "Enable packed carry-less multiplication instructions",
186                               [FeatureSSE2]>;
187def FeatureGFNI    : SubtargetFeature<"gfni", "HasGFNI", "true",
188                         "Enable Galois Field Arithmetic Instructions",
189                               [FeatureSSE2]>;
190def FeatureVPCLMULQDQ : SubtargetFeature<"vpclmulqdq", "HasVPCLMULQDQ", "true",
191                                         "Enable vpclmulqdq instructions",
192                                         [FeatureAVX, FeaturePCLMUL]>;
193def FeatureFMA4    : SubtargetFeature<"fma4", "HasFMA4", "true",
194                                      "Enable four-operand fused multiple-add",
195                                      [FeatureAVX, FeatureSSE4A]>;
196def FeatureXOP     : SubtargetFeature<"xop", "HasXOP", "true",
197                                      "Enable XOP instructions",
198                                      [FeatureFMA4]>;
199def FeatureSSEUnalignedMem : SubtargetFeature<"sse-unaligned-mem",
200                                          "HasSSEUnalignedMem", "true",
201                      "Allow unaligned memory operands with SSE instructions">;
202def FeatureAES     : SubtargetFeature<"aes", "HasAES", "true",
203                                      "Enable AES instructions",
204                                      [FeatureSSE2]>;
205def FeatureVAES    : SubtargetFeature<"vaes", "HasVAES", "true",
206                       "Promote selected AES instructions to AVX512/AVX registers",
207                        [FeatureAVX, FeatureAES]>;
208def FeatureTBM     : SubtargetFeature<"tbm", "HasTBM", "true",
209                                      "Enable TBM instructions">;
210def FeatureLWP     : SubtargetFeature<"lwp", "HasLWP", "true",
211                                      "Enable LWP instructions">;
212def FeatureMOVBE   : SubtargetFeature<"movbe", "HasMOVBE", "true",
213                                      "Support MOVBE instruction">;
214def FeatureRDRAND  : SubtargetFeature<"rdrnd", "HasRDRAND", "true",
215                                      "Support RDRAND instruction">;
216def FeatureFSGSBase : SubtargetFeature<"fsgsbase", "HasFSGSBase", "true",
217                                       "Support FS/GS Base instructions">;
218def FeatureLZCNT   : SubtargetFeature<"lzcnt", "HasLZCNT", "true",
219                                      "Support LZCNT instruction">;
220def FeatureBMI     : SubtargetFeature<"bmi", "HasBMI", "true",
221                                      "Support BMI instructions">;
222def FeatureBMI2    : SubtargetFeature<"bmi2", "HasBMI2", "true",
223                                      "Support BMI2 instructions">;
224def FeatureRTM     : SubtargetFeature<"rtm", "HasRTM", "true",
225                                      "Support RTM instructions">;
226def FeatureADX     : SubtargetFeature<"adx", "HasADX", "true",
227                                      "Support ADX instructions">;
228def FeatureSHA     : SubtargetFeature<"sha", "HasSHA", "true",
229                                      "Enable SHA instructions",
230                                      [FeatureSSE2]>;
231def FeatureSHSTK   : SubtargetFeature<"shstk", "HasSHSTK", "true",
232                       "Support CET Shadow-Stack instructions">;
233def FeaturePRFCHW  : SubtargetFeature<"prfchw", "HasPRFCHW", "true",
234                                      "Support PRFCHW instructions">;
235def FeatureRDSEED  : SubtargetFeature<"rdseed", "HasRDSEED", "true",
236                                      "Support RDSEED instruction">;
237def FeatureLAHFSAHF : SubtargetFeature<"sahf", "HasLAHFSAHF64", "true",
238                           "Support LAHF and SAHF instructions in 64-bit mode">;
239def FeatureMWAITX  : SubtargetFeature<"mwaitx", "HasMWAITX", "true",
240                                      "Enable MONITORX/MWAITX timer functionality">;
241def FeatureCLZERO  : SubtargetFeature<"clzero", "HasCLZERO", "true",
242                                      "Enable Cache Line Zero">;
243def FeatureCLDEMOTE  : SubtargetFeature<"cldemote", "HasCLDEMOTE", "true",
244                                      "Enable Cache Demote">;
245def FeaturePTWRITE  : SubtargetFeature<"ptwrite", "HasPTWRITE", "true",
246                                      "Support ptwrite instruction">;
247def FeatureAMXTILE     : SubtargetFeature<"amx-tile", "HasAMXTILE", "true",
248                                      "Support AMX-TILE instructions">;
249def FeatureAMXINT8     : SubtargetFeature<"amx-int8", "HasAMXINT8", "true",
250                                      "Support AMX-INT8 instructions",
251                                      [FeatureAMXTILE]>;
252def FeatureAMXBF16     : SubtargetFeature<"amx-bf16", "HasAMXBF16", "true",
253                                      "Support AMX-BF16 instructions",
254                                      [FeatureAMXTILE]>;
255def FeatureINVPCID : SubtargetFeature<"invpcid", "HasINVPCID", "true",
256                                      "Invalidate Process-Context Identifier">;
257def FeatureSGX     : SubtargetFeature<"sgx", "HasSGX", "true",
258                                      "Enable Software Guard Extensions">;
259def FeatureCLFLUSHOPT : SubtargetFeature<"clflushopt", "HasCLFLUSHOPT", "true",
260                                      "Flush A Cache Line Optimized">;
261def FeatureCLWB    : SubtargetFeature<"clwb", "HasCLWB", "true",
262                                      "Cache Line Write Back">;
263def FeatureWBNOINVD    : SubtargetFeature<"wbnoinvd", "HasWBNOINVD", "true",
264                                      "Write Back No Invalidate">;
265def FeatureRDPID : SubtargetFeature<"rdpid", "HasRDPID", "true",
266                                    "Support RDPID instructions">;
267def FeatureWAITPKG  : SubtargetFeature<"waitpkg", "HasWAITPKG", "true",
268                                      "Wait and pause enhancements">;
269def FeatureENQCMD : SubtargetFeature<"enqcmd", "HasENQCMD", "true",
270                                     "Has ENQCMD instructions">;
271def FeatureKL  : SubtargetFeature<"kl", "HasKL", "true",
272                                  "Support Key Locker kl Instructions",
273                                  [FeatureSSE2]>;
274def FeatureWIDEKL  : SubtargetFeature<"widekl", "HasWIDEKL", "true",
275                                      "Support Key Locker wide Instructions",
276                                      [FeatureKL]>;
277def FeatureHRESET : SubtargetFeature<"hreset", "HasHRESET", "true",
278                                      "Has hreset instruction">;
279def FeatureSERIALIZE : SubtargetFeature<"serialize", "HasSERIALIZE", "true",
280                                        "Has serialize instruction">;
281def FeatureTSXLDTRK : SubtargetFeature<"tsxldtrk", "HasTSXLDTRK", "true",
282                                       "Support TSXLDTRK instructions">;
283def FeatureUINTR : SubtargetFeature<"uintr", "HasUINTR", "true",
284                                    "Has UINTR Instructions">;
285def FeaturePCONFIG : SubtargetFeature<"pconfig", "HasPCONFIG", "true",
286                                      "platform configuration instruction">;
287def FeatureMOVDIRI  : SubtargetFeature<"movdiri", "HasMOVDIRI", "true",
288                                       "Support movdiri instruction">;
289def FeatureMOVDIR64B : SubtargetFeature<"movdir64b", "HasMOVDIR64B", "true",
290                                        "Support movdir64b instruction">;
291
292// Ivy Bridge and newer processors have enhanced REP MOVSB and STOSB (aka
293// "string operations"). See "REP String Enhancement" in the Intel Software
294// Development Manual. This feature essentially means that REP MOVSB will copy
295// using the largest available size instead of copying bytes one by one, making
296// it at least as fast as REPMOVS{W,D,Q}.
297def FeatureERMSB
298    : SubtargetFeature<
299          "ermsb", "HasERMSB", "true",
300          "REP MOVS/STOS are fast">;
301
302// Icelake and newer processors have Fast Short REP MOV.
303def FeatureFSRM
304    : SubtargetFeature<
305          "fsrm", "HasFSRM", "true",
306          "REP MOVSB of short lengths is faster">;
307
308def FeatureSoftFloat
309    : SubtargetFeature<"soft-float", "UseSoftFloat", "true",
310                       "Use software floating point features">;
311
312//===----------------------------------------------------------------------===//
313// X86 Subtarget Security Mitigation features
314//===----------------------------------------------------------------------===//
315
316// Lower indirect calls using a special construct called a `retpoline` to
317// mitigate potential Spectre v2 attacks against them.
318def FeatureRetpolineIndirectCalls
319    : SubtargetFeature<
320          "retpoline-indirect-calls", "UseRetpolineIndirectCalls", "true",
321          "Remove speculation of indirect calls from the generated code">;
322
323// Lower indirect branches and switches either using conditional branch trees
324// or using a special construct called a `retpoline` to mitigate potential
325// Spectre v2 attacks against them.
326def FeatureRetpolineIndirectBranches
327    : SubtargetFeature<
328          "retpoline-indirect-branches", "UseRetpolineIndirectBranches", "true",
329          "Remove speculation of indirect branches from the generated code">;
330
331// Deprecated umbrella feature for enabling both `retpoline-indirect-calls` and
332// `retpoline-indirect-branches` above.
333def FeatureRetpoline
334    : SubtargetFeature<"retpoline", "DeprecatedUseRetpoline", "true",
335                       "Remove speculation of indirect branches from the "
336                       "generated code, either by avoiding them entirely or "
337                       "lowering them with a speculation blocking construct",
338                       [FeatureRetpolineIndirectCalls,
339                        FeatureRetpolineIndirectBranches]>;
340
341// Rely on external thunks for the emitted retpoline calls. This allows users
342// to provide their own custom thunk definitions in highly specialized
343// environments such as a kernel that does boot-time hot patching.
344def FeatureRetpolineExternalThunk
345    : SubtargetFeature<
346          "retpoline-external-thunk", "UseRetpolineExternalThunk", "true",
347          "When lowering an indirect call or branch using a `retpoline`, rely "
348          "on the specified user provided thunk rather than emitting one "
349          "ourselves. Only has effect when combined with some other retpoline "
350          "feature", [FeatureRetpolineIndirectCalls]>;
351
352// Mitigate LVI attacks against indirect calls/branches and call returns
353def FeatureLVIControlFlowIntegrity
354    : SubtargetFeature<
355          "lvi-cfi", "UseLVIControlFlowIntegrity", "true",
356          "Prevent indirect calls/branches from using a memory operand, and "
357          "precede all indirect calls/branches from a register with an "
358          "LFENCE instruction to serialize control flow. Also decompose RET "
359          "instructions into a POP+LFENCE+JMP sequence.">;
360
361// Enable SESES to mitigate speculative execution attacks
362def FeatureSpeculativeExecutionSideEffectSuppression
363    : SubtargetFeature<
364          "seses", "UseSpeculativeExecutionSideEffectSuppression", "true",
365          "Prevent speculative execution side channel timing attacks by "
366          "inserting a speculation barrier before memory reads, memory writes, "
367          "and conditional branches. Implies LVI Control Flow integrity.",
368          [FeatureLVIControlFlowIntegrity]>;
369
370// Mitigate LVI attacks against data loads
371def FeatureLVILoadHardening
372    : SubtargetFeature<
373          "lvi-load-hardening", "UseLVILoadHardening", "true",
374          "Insert LFENCE instructions to prevent data speculatively injected "
375          "into loads from being used maliciously.">;
376
377def FeatureTaggedGlobals
378    : SubtargetFeature<
379          "tagged-globals", "AllowTaggedGlobals", "true",
380          "Use an instruction sequence for taking the address of a global "
381          "that allows a memory tag in the upper address bits.">;
382
383//===----------------------------------------------------------------------===//
384// X86 Subtarget Tuning features
385//===----------------------------------------------------------------------===//
386
387def TuningSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true",
388                                       "SHLD instruction is slow">;
389
390def TuningSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true",
391                                        "PMULLD instruction is slow">;
392
393def TuningSlowPMADDWD : SubtargetFeature<"slow-pmaddwd", "IsPMADDWDSlow",
394                                          "true",
395                                          "PMADDWD is slower than PMULLD">;
396
397// FIXME: This should not apply to CPUs that do not have SSE.
398def TuningSlowUAMem16 : SubtargetFeature<"slow-unaligned-mem-16",
399                                "IsUAMem16Slow", "true",
400                                "Slow unaligned 16-byte memory access">;
401
402def TuningSlowUAMem32 : SubtargetFeature<"slow-unaligned-mem-32",
403                                "IsUAMem32Slow", "true",
404                                "Slow unaligned 32-byte memory access">;
405
406def TuningLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
407                                     "Use LEA for adjusting the stack pointer">;
408
409def TuningSlowDivide32 : SubtargetFeature<"idivl-to-divb",
410                                     "HasSlowDivide32", "true",
411                                     "Use 8-bit divide for positive values less than 256">;
412
413def TuningSlowDivide64 : SubtargetFeature<"idivq-to-divl",
414                                     "HasSlowDivide64", "true",
415                                     "Use 32-bit divide for positive values less than 2^32">;
416
417def TuningPadShortFunctions : SubtargetFeature<"pad-short-functions",
418                                     "PadShortFunctions", "true",
419                                     "Pad short functions">;
420
421// On some processors, instructions that implicitly take two memory operands are
422// slow. In practice, this means that CALL, PUSH, and POP with memory operands
423// should be avoided in favor of a MOV + register CALL/PUSH/POP.
424def TuningSlowTwoMemOps : SubtargetFeature<"slow-two-mem-ops",
425                                     "SlowTwoMemOps", "true",
426                                     "Two memory operand instructions are slow">;
427
428def TuningLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LEAUsesAG", "true",
429                                   "LEA instruction needs inputs at AG stage">;
430
431def TuningSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true",
432                                   "LEA instruction with certain arguments is slow">;
433
434def TuningSlow3OpsLEA : SubtargetFeature<"slow-3ops-lea", "Slow3OpsLEA", "true",
435                                   "LEA instruction with 3 ops or certain registers is slow">;
436
437def TuningSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true",
438                                   "INC and DEC instructions are slower than ADD and SUB">;
439
440def TuningPOPCNTFalseDeps : SubtargetFeature<"false-deps-popcnt",
441                                     "HasPOPCNTFalseDeps", "true",
442                                     "POPCNT has a false dependency on dest register">;
443
444def TuningLZCNTFalseDeps : SubtargetFeature<"false-deps-lzcnt-tzcnt",
445                                     "HasLZCNTFalseDeps", "true",
446                                     "LZCNT/TZCNT have a false dependency on dest register">;
447
448// On recent X86 (port bound) processors, its preferable to combine to a single shuffle
449// using a variable mask over multiple fixed shuffles.
450def TuningFastVariableCrossLaneShuffle
451    : SubtargetFeature<"fast-variable-crosslane-shuffle",
452                       "HasFastVariableCrossLaneShuffle",
453                       "true", "Cross-lane shuffles with variable masks are fast">;
454def TuningFastVariablePerLaneShuffle
455    : SubtargetFeature<"fast-variable-perlane-shuffle",
456                       "HasFastVariablePerLaneShuffle",
457                       "true", "Per-lane shuffles with variable masks are fast">;
458
459// On some X86 processors, a vzeroupper instruction should be inserted after
460// using ymm/zmm registers before executing code that may use SSE instructions.
461def TuningInsertVZEROUPPER
462    : SubtargetFeature<"vzeroupper",
463                       "InsertVZEROUPPER",
464                       "true", "Should insert vzeroupper instructions">;
465
466// TuningFastScalarFSQRT should be enabled if scalar FSQRT has shorter latency
467// than the corresponding NR code. TuningFastVectorFSQRT should be enabled if
468// vector FSQRT has higher throughput than the corresponding NR code.
469// The idea is that throughput bound code is likely to be vectorized, so for
470// vectorized code we should care about the throughput of SQRT operations.
471// But if the code is scalar that probably means that the code has some kind of
472// dependency and we should care more about reducing the latency.
473def TuningFastScalarFSQRT
474    : SubtargetFeature<"fast-scalar-fsqrt", "HasFastScalarFSQRT",
475                       "true", "Scalar SQRT is fast (disable Newton-Raphson)">;
476def TuningFastVectorFSQRT
477    : SubtargetFeature<"fast-vector-fsqrt", "HasFastVectorFSQRT",
478                       "true", "Vector SQRT is fast (disable Newton-Raphson)">;
479
480// If lzcnt has equivalent latency/throughput to most simple integer ops, it can
481// be used to replace test/set sequences.
482def TuningFastLZCNT
483    : SubtargetFeature<
484          "fast-lzcnt", "HasFastLZCNT", "true",
485          "LZCNT instructions are as fast as most simple integer ops">;
486
487// If the target can efficiently decode NOPs upto 7-bytes in length.
488def TuningFast7ByteNOP
489    : SubtargetFeature<
490          "fast-7bytenop", "HasFast7ByteNOP", "true",
491          "Target can quickly decode up to 7 byte NOPs">;
492
493// If the target can efficiently decode NOPs upto 11-bytes in length.
494def TuningFast11ByteNOP
495    : SubtargetFeature<
496          "fast-11bytenop", "HasFast11ByteNOP", "true",
497          "Target can quickly decode up to 11 byte NOPs">;
498
499// If the target can efficiently decode NOPs upto 15-bytes in length.
500def TuningFast15ByteNOP
501    : SubtargetFeature<
502          "fast-15bytenop", "HasFast15ByteNOP", "true",
503          "Target can quickly decode up to 15 byte NOPs">;
504
505// Sandy Bridge and newer processors can use SHLD with the same source on both
506// inputs to implement rotate to avoid the partial flag update of the normal
507// rotate instructions.
508def TuningFastSHLDRotate
509    : SubtargetFeature<
510          "fast-shld-rotate", "HasFastSHLDRotate", "true",
511          "SHLD can be used as a faster rotate">;
512
513// Bulldozer and newer processors can merge CMP/TEST (but not other
514// instructions) with conditional branches.
515def TuningBranchFusion
516    : SubtargetFeature<"branchfusion", "HasBranchFusion", "true",
517                 "CMP/TEST can be fused with conditional branches">;
518
519// Sandy Bridge and newer processors have many instructions that can be
520// fused with conditional branches and pass through the CPU as a single
521// operation.
522def TuningMacroFusion
523    : SubtargetFeature<"macrofusion", "HasMacroFusion", "true",
524                 "Various instructions can be fused with conditional branches">;
525
526// Gather is available since Haswell (AVX2 set). So technically, we can
527// generate Gathers on all AVX2 processors. But the overhead on HSW is high.
528// Skylake Client processor has faster Gathers than HSW and performance is
529// similar to Skylake Server (AVX-512).
530def TuningFastGather
531    : SubtargetFeature<"fast-gather", "HasFastGather", "true",
532                       "Indicates if gather is reasonably fast">;
533
534def TuningPrefer128Bit
535    : SubtargetFeature<"prefer-128-bit", "Prefer128Bit", "true",
536                       "Prefer 128-bit AVX instructions">;
537
538def TuningPrefer256Bit
539    : SubtargetFeature<"prefer-256-bit", "Prefer256Bit", "true",
540                       "Prefer 256-bit AVX instructions">;
541
542def TuningPreferMaskRegisters
543    : SubtargetFeature<"prefer-mask-registers", "PreferMaskRegisters", "true",
544                       "Prefer AVX512 mask registers over PTEST/MOVMSK">;
545
546def TuningFastBEXTR : SubtargetFeature<"fast-bextr", "HasFastBEXTR", "true",
547          "Indicates that the BEXTR instruction is implemented as a single uop "
548          "with good throughput">;
549
550// Combine vector math operations with shuffles into horizontal math
551// instructions if a CPU implements horizontal operations (introduced with
552// SSE3) with better latency/throughput than the alternative sequence.
553def TuningFastHorizontalOps
554    : SubtargetFeature<
555        "fast-hops", "HasFastHorizontalOps", "true",
556        "Prefer horizontal vector math instructions (haddp, phsub, etc.) over "
557        "normal vector instructions with shuffles">;
558
559def TuningFastScalarShiftMasks
560    : SubtargetFeature<
561        "fast-scalar-shift-masks", "HasFastScalarShiftMasks", "true",
562        "Prefer a left/right scalar logical shift pair over a shift+and pair">;
563
564def TuningFastVectorShiftMasks
565    : SubtargetFeature<
566        "fast-vector-shift-masks", "HasFastVectorShiftMasks", "true",
567        "Prefer a left/right vector logical shift pair over a shift+and pair">;
568
569def TuningFastMOVBE
570    : SubtargetFeature<"fast-movbe", "HasFastMOVBE", "true",
571    "Prefer a movbe over a single-use load + bswap / single-use bswap + store">;
572
573def TuningUseSLMArithCosts
574    : SubtargetFeature<"use-slm-arith-costs", "UseSLMArithCosts", "true",
575        "Use Silvermont specific arithmetic costs">;
576
577def TuningUseGLMDivSqrtCosts
578    : SubtargetFeature<"use-glm-div-sqrt-costs", "UseGLMDivSqrtCosts", "true",
579        "Use Goldmont specific floating point div/sqrt costs">;
580
581// Enable use of alias analysis during code generation.
582def FeatureUseAA : SubtargetFeature<"use-aa", "UseAA", "true",
583                                    "Use alias analysis during codegen">;
584
585//===----------------------------------------------------------------------===//
586// X86 CPU Families
587// TODO: Remove these - use general tuning features to determine codegen.
588//===----------------------------------------------------------------------===//
589
590// Bonnell
591def ProcIntelAtom : SubtargetFeature<"", "X86ProcFamily", "IntelAtom", "">;
592
593//===----------------------------------------------------------------------===//
594// Register File Description
595//===----------------------------------------------------------------------===//
596
597include "X86RegisterInfo.td"
598include "X86RegisterBanks.td"
599
600//===----------------------------------------------------------------------===//
601// Instruction Descriptions
602//===----------------------------------------------------------------------===//
603
604include "X86Schedule.td"
605include "X86InstrInfo.td"
606include "X86SchedPredicates.td"
607
608def X86InstrInfo : InstrInfo;
609
610//===----------------------------------------------------------------------===//
611// X86 Scheduler Models
612//===----------------------------------------------------------------------===//
613
614include "X86ScheduleAtom.td"
615include "X86SchedSandyBridge.td"
616include "X86SchedHaswell.td"
617include "X86SchedBroadwell.td"
618include "X86ScheduleSLM.td"
619include "X86ScheduleZnver1.td"
620include "X86ScheduleZnver2.td"
621include "X86ScheduleZnver3.td"
622include "X86ScheduleBdVer2.td"
623include "X86ScheduleBtVer2.td"
624include "X86SchedSkylakeClient.td"
625include "X86SchedSkylakeServer.td"
626include "X86SchedIceLake.td"
627
628//===----------------------------------------------------------------------===//
629// X86 Processor Feature Lists
630//===----------------------------------------------------------------------===//
631
632def ProcessorFeatures {
633  // x86-64 and x86-64-v[234]
634  list<SubtargetFeature> X86_64V1Features = [
635    FeatureX87, FeatureCMPXCHG8B, FeatureCMOV, FeatureMMX, FeatureSSE2,
636    FeatureFXSR, FeatureNOPL, Feature64Bit
637  ];
638  list<SubtargetFeature> X86_64V2Features = !listconcat(X86_64V1Features, [
639    FeatureCMPXCHG16B, FeatureLAHFSAHF, FeatureCRC32, FeaturePOPCNT,
640    FeatureSSE42
641  ]);
642  list<SubtargetFeature> X86_64V3Features = !listconcat(X86_64V2Features, [
643    FeatureAVX2, FeatureBMI, FeatureBMI2, FeatureF16C, FeatureFMA, FeatureLZCNT,
644    FeatureMOVBE, FeatureXSAVE
645  ]);
646  list<SubtargetFeature> X86_64V4Features = !listconcat(X86_64V3Features, [
647    FeatureBWI,
648    FeatureCDI,
649    FeatureDQI,
650    FeatureVLX,
651  ]);
652
653  // Nehalem
654  list<SubtargetFeature> NHMFeatures = X86_64V2Features;
655  list<SubtargetFeature> NHMTuning = [TuningMacroFusion,
656                                      TuningInsertVZEROUPPER];
657
658  // Westmere
659  list<SubtargetFeature> WSMAdditionalFeatures = [FeaturePCLMUL];
660  list<SubtargetFeature> WSMTuning = NHMTuning;
661  list<SubtargetFeature> WSMFeatures =
662    !listconcat(NHMFeatures, WSMAdditionalFeatures);
663
664  // Sandybridge
665  list<SubtargetFeature> SNBAdditionalFeatures = [FeatureAVX,
666                                                  FeatureXSAVE,
667                                                  FeatureXSAVEOPT];
668  list<SubtargetFeature> SNBTuning = [TuningMacroFusion,
669                                      TuningSlow3OpsLEA,
670                                      TuningSlowDivide64,
671                                      TuningSlowUAMem32,
672                                      TuningFastScalarFSQRT,
673                                      TuningFastSHLDRotate,
674                                      TuningFast15ByteNOP,
675                                      TuningPOPCNTFalseDeps,
676                                      TuningInsertVZEROUPPER];
677  list<SubtargetFeature> SNBFeatures =
678    !listconcat(WSMFeatures, SNBAdditionalFeatures);
679
680  // Ivybridge
681  list<SubtargetFeature> IVBAdditionalFeatures = [FeatureRDRAND,
682                                                  FeatureF16C,
683                                                  FeatureFSGSBase];
684  list<SubtargetFeature> IVBTuning = SNBTuning;
685  list<SubtargetFeature> IVBFeatures =
686    !listconcat(SNBFeatures, IVBAdditionalFeatures);
687
688  // Haswell
689  list<SubtargetFeature> HSWAdditionalFeatures = [FeatureAVX2,
690                                                  FeatureBMI,
691                                                  FeatureBMI2,
692                                                  FeatureERMSB,
693                                                  FeatureFMA,
694                                                  FeatureINVPCID,
695                                                  FeatureLZCNT,
696                                                  FeatureMOVBE];
697  list<SubtargetFeature> HSWTuning = [TuningMacroFusion,
698                                      TuningSlow3OpsLEA,
699                                      TuningSlowDivide64,
700                                      TuningFastScalarFSQRT,
701                                      TuningFastSHLDRotate,
702                                      TuningFast15ByteNOP,
703                                      TuningFastVariableCrossLaneShuffle,
704                                      TuningFastVariablePerLaneShuffle,
705                                      TuningPOPCNTFalseDeps,
706                                      TuningLZCNTFalseDeps,
707                                      TuningInsertVZEROUPPER];
708  list<SubtargetFeature> HSWFeatures =
709    !listconcat(IVBFeatures, HSWAdditionalFeatures);
710
711  // Broadwell
712  list<SubtargetFeature> BDWAdditionalFeatures = [FeatureADX,
713                                                  FeatureRDSEED,
714                                                  FeaturePRFCHW];
715  list<SubtargetFeature> BDWTuning = HSWTuning;
716  list<SubtargetFeature> BDWFeatures =
717    !listconcat(HSWFeatures, BDWAdditionalFeatures);
718
719  // Skylake
720  list<SubtargetFeature> SKLAdditionalFeatures = [FeatureAES,
721                                                  FeatureXSAVEC,
722                                                  FeatureXSAVES,
723                                                  FeatureCLFLUSHOPT];
724  list<SubtargetFeature> SKLTuning = [TuningFastGather,
725                                      TuningMacroFusion,
726                                      TuningSlow3OpsLEA,
727                                      TuningSlowDivide64,
728                                      TuningFastScalarFSQRT,
729                                      TuningFastVectorFSQRT,
730                                      TuningFastSHLDRotate,
731                                      TuningFast15ByteNOP,
732                                      TuningFastVariableCrossLaneShuffle,
733                                      TuningFastVariablePerLaneShuffle,
734                                      TuningPOPCNTFalseDeps,
735                                      TuningInsertVZEROUPPER];
736  list<SubtargetFeature> SKLFeatures =
737    !listconcat(BDWFeatures, SKLAdditionalFeatures);
738
739  // Skylake-AVX512
740  list<SubtargetFeature> SKXAdditionalFeatures = [FeatureAES,
741                                                  FeatureXSAVEC,
742                                                  FeatureXSAVES,
743                                                  FeatureCLFLUSHOPT,
744                                                  FeatureAVX512,
745                                                  FeatureCDI,
746                                                  FeatureDQI,
747                                                  FeatureBWI,
748                                                  FeatureVLX,
749                                                  FeaturePKU,
750                                                  FeatureCLWB];
751  list<SubtargetFeature> SKXTuning = [TuningFastGather,
752                                      TuningMacroFusion,
753                                      TuningSlow3OpsLEA,
754                                      TuningSlowDivide64,
755                                      TuningFastScalarFSQRT,
756                                      TuningFastVectorFSQRT,
757                                      TuningFastSHLDRotate,
758                                      TuningFast15ByteNOP,
759                                      TuningFastVariableCrossLaneShuffle,
760                                      TuningFastVariablePerLaneShuffle,
761                                      TuningPrefer256Bit,
762                                      TuningPOPCNTFalseDeps,
763                                      TuningInsertVZEROUPPER];
764  list<SubtargetFeature> SKXFeatures =
765    !listconcat(BDWFeatures, SKXAdditionalFeatures);
766
767  // Cascadelake
768  list<SubtargetFeature> CLXAdditionalFeatures = [FeatureVNNI];
769  list<SubtargetFeature> CLXTuning = SKXTuning;
770  list<SubtargetFeature> CLXFeatures =
771    !listconcat(SKXFeatures, CLXAdditionalFeatures);
772
773  // Cooperlake
774  list<SubtargetFeature> CPXAdditionalFeatures = [FeatureBF16];
775  list<SubtargetFeature> CPXTuning = SKXTuning;
776  list<SubtargetFeature> CPXFeatures =
777    !listconcat(CLXFeatures, CPXAdditionalFeatures);
778
779  // Cannonlake
780  list<SubtargetFeature> CNLAdditionalFeatures = [FeatureAVX512,
781                                                  FeatureCDI,
782                                                  FeatureDQI,
783                                                  FeatureBWI,
784                                                  FeatureVLX,
785                                                  FeaturePKU,
786                                                  FeatureVBMI,
787                                                  FeatureIFMA,
788                                                  FeatureSHA];
789  list<SubtargetFeature> CNLTuning = [TuningFastGather,
790                                      TuningMacroFusion,
791                                      TuningSlow3OpsLEA,
792                                      TuningSlowDivide64,
793                                      TuningFastScalarFSQRT,
794                                      TuningFastVectorFSQRT,
795                                      TuningFastSHLDRotate,
796                                      TuningFast15ByteNOP,
797                                      TuningFastVariableCrossLaneShuffle,
798                                      TuningFastVariablePerLaneShuffle,
799                                      TuningPrefer256Bit,
800                                      TuningInsertVZEROUPPER];
801  list<SubtargetFeature> CNLFeatures =
802    !listconcat(SKLFeatures, CNLAdditionalFeatures);
803
804  // Icelake
805  list<SubtargetFeature> ICLAdditionalFeatures = [FeatureBITALG,
806                                                  FeatureVAES,
807                                                  FeatureVBMI2,
808                                                  FeatureVNNI,
809                                                  FeatureVPCLMULQDQ,
810                                                  FeatureVPOPCNTDQ,
811                                                  FeatureGFNI,
812                                                  FeatureRDPID,
813                                                  FeatureFSRM];
814  list<SubtargetFeature> ICLTuning = [TuningFastGather,
815                                      TuningMacroFusion,
816                                      TuningSlow3OpsLEA,
817                                      TuningSlowDivide64,
818                                      TuningFastScalarFSQRT,
819                                      TuningFastVectorFSQRT,
820                                      TuningFastSHLDRotate,
821                                      TuningFast15ByteNOP,
822                                      TuningFastVariableCrossLaneShuffle,
823                                      TuningFastVariablePerLaneShuffle,
824                                      TuningPrefer256Bit,
825                                      TuningInsertVZEROUPPER];
826  list<SubtargetFeature> ICLFeatures =
827    !listconcat(CNLFeatures, ICLAdditionalFeatures);
828
829  // Icelake Server
830  list<SubtargetFeature> ICXAdditionalFeatures = [FeaturePCONFIG,
831                                                  FeatureCLWB,
832                                                  FeatureWBNOINVD];
833  list<SubtargetFeature> ICXTuning = ICLTuning;
834  list<SubtargetFeature> ICXFeatures =
835    !listconcat(ICLFeatures, ICXAdditionalFeatures);
836
837  // Tigerlake
838  list<SubtargetFeature> TGLAdditionalFeatures = [FeatureVP2INTERSECT,
839                                                  FeatureCLWB,
840                                                  FeatureMOVDIRI,
841                                                  FeatureMOVDIR64B,
842                                                  FeatureSHSTK];
843  list<SubtargetFeature> TGLTuning = ICLTuning;
844  list<SubtargetFeature> TGLFeatures =
845    !listconcat(ICLFeatures, TGLAdditionalFeatures );
846
847  // Sapphirerapids
848  list<SubtargetFeature> SPRAdditionalFeatures = [FeatureAMXTILE,
849                                                  FeatureAMXINT8,
850                                                  FeatureAMXBF16,
851                                                  FeatureBF16,
852                                                  FeatureSERIALIZE,
853                                                  FeatureCLDEMOTE,
854                                                  FeatureWAITPKG,
855                                                  FeaturePTWRITE,
856                                                  FeatureFP16,
857                                                  FeatureAVXVNNI,
858                                                  FeatureTSXLDTRK,
859                                                  FeatureENQCMD,
860                                                  FeatureSHSTK,
861                                                  FeatureVP2INTERSECT,
862                                                  FeatureMOVDIRI,
863                                                  FeatureMOVDIR64B,
864                                                  FeatureUINTR];
865  list<SubtargetFeature> SPRTuning = ICXTuning;
866  list<SubtargetFeature> SPRFeatures =
867    !listconcat(ICXFeatures, SPRAdditionalFeatures);
868
869  // Atom
870  list<SubtargetFeature> AtomFeatures = [FeatureX87,
871                                         FeatureCMPXCHG8B,
872                                         FeatureCMOV,
873                                         FeatureMMX,
874                                         FeatureSSSE3,
875                                         FeatureFXSR,
876                                         FeatureNOPL,
877                                         Feature64Bit,
878                                         FeatureCMPXCHG16B,
879                                         FeatureMOVBE,
880                                         FeatureLAHFSAHF];
881  list<SubtargetFeature> AtomTuning = [ProcIntelAtom,
882                                       TuningSlowUAMem16,
883                                       TuningLEAForSP,
884                                       TuningSlowDivide32,
885                                       TuningSlowDivide64,
886                                       TuningSlowTwoMemOps,
887                                       TuningLEAUsesAG,
888                                       TuningPadShortFunctions,
889                                       TuningInsertVZEROUPPER];
890
891  // Silvermont
892  list<SubtargetFeature> SLMAdditionalFeatures = [FeatureSSE42,
893                                                  FeatureCRC32,
894                                                  FeaturePOPCNT,
895                                                  FeaturePCLMUL,
896                                                  FeaturePRFCHW,
897                                                  FeatureRDRAND];
898  list<SubtargetFeature> SLMTuning = [TuningUseSLMArithCosts,
899                                      TuningSlowTwoMemOps,
900                                      TuningSlowLEA,
901                                      TuningSlowIncDec,
902                                      TuningSlowDivide64,
903                                      TuningSlowPMULLD,
904                                      TuningFast7ByteNOP,
905                                      TuningFastMOVBE,
906                                      TuningPOPCNTFalseDeps,
907                                      TuningInsertVZEROUPPER];
908  list<SubtargetFeature> SLMFeatures =
909    !listconcat(AtomFeatures, SLMAdditionalFeatures);
910
911  // Goldmont
912  list<SubtargetFeature> GLMAdditionalFeatures = [FeatureAES,
913                                                  FeatureSHA,
914                                                  FeatureRDSEED,
915                                                  FeatureXSAVE,
916                                                  FeatureXSAVEOPT,
917                                                  FeatureXSAVEC,
918                                                  FeatureXSAVES,
919                                                  FeatureCLFLUSHOPT,
920                                                  FeatureFSGSBase];
921  list<SubtargetFeature> GLMTuning = [TuningUseGLMDivSqrtCosts,
922                                      TuningSlowTwoMemOps,
923                                      TuningSlowLEA,
924                                      TuningSlowIncDec,
925                                      TuningFastMOVBE,
926                                      TuningPOPCNTFalseDeps,
927                                      TuningInsertVZEROUPPER];
928  list<SubtargetFeature> GLMFeatures =
929    !listconcat(SLMFeatures, GLMAdditionalFeatures);
930
931  // Goldmont Plus
932  list<SubtargetFeature> GLPAdditionalFeatures = [FeaturePTWRITE,
933                                                  FeatureRDPID];
934  list<SubtargetFeature> GLPTuning = [TuningUseGLMDivSqrtCosts,
935                                      TuningSlowTwoMemOps,
936                                      TuningSlowLEA,
937                                      TuningSlowIncDec,
938                                      TuningFastMOVBE,
939                                      TuningInsertVZEROUPPER];
940  list<SubtargetFeature> GLPFeatures =
941    !listconcat(GLMFeatures, GLPAdditionalFeatures);
942
943  // Tremont
944  list<SubtargetFeature> TRMAdditionalFeatures = [FeatureCLWB,
945                                                  FeatureGFNI];
946  list<SubtargetFeature> TRMTuning = GLPTuning;
947  list<SubtargetFeature> TRMFeatures =
948    !listconcat(GLPFeatures, TRMAdditionalFeatures);
949
950  // Alderlake
951  list<SubtargetFeature> ADLAdditionalFeatures = [FeatureSERIALIZE,
952                                                  FeaturePCONFIG,
953                                                  FeatureSHSTK,
954                                                  FeatureWIDEKL,
955                                                  FeatureINVPCID,
956                                                  FeatureADX,
957                                                  FeatureFMA,
958                                                  FeatureVAES,
959                                                  FeatureVPCLMULQDQ,
960                                                  FeatureF16C,
961                                                  FeatureBMI,
962                                                  FeatureBMI2,
963                                                  FeatureLZCNT,
964                                                  FeatureAVXVNNI,
965                                                  FeaturePKU,
966                                                  FeatureHRESET,
967                                                  FeatureCLDEMOTE,
968                                                  FeatureMOVDIRI,
969                                                  FeatureMOVDIR64B,
970                                                  FeatureWAITPKG];
971  list<SubtargetFeature> ADLTuning = SKLTuning;
972  list<SubtargetFeature> ADLFeatures =
973    !listconcat(TRMFeatures, ADLAdditionalFeatures);
974
975  // Knights Landing
976  list<SubtargetFeature> KNLFeatures = [FeatureX87,
977                                        FeatureCMPXCHG8B,
978                                        FeatureCMOV,
979                                        FeatureMMX,
980                                        FeatureFXSR,
981                                        FeatureNOPL,
982                                        Feature64Bit,
983                                        FeatureCMPXCHG16B,
984                                        FeatureCRC32,
985                                        FeaturePOPCNT,
986                                        FeaturePCLMUL,
987                                        FeatureXSAVE,
988                                        FeatureXSAVEOPT,
989                                        FeatureLAHFSAHF,
990                                        FeatureAES,
991                                        FeatureRDRAND,
992                                        FeatureF16C,
993                                        FeatureFSGSBase,
994                                        FeatureAVX512,
995                                        FeatureERI,
996                                        FeatureCDI,
997                                        FeaturePFI,
998                                        FeaturePREFETCHWT1,
999                                        FeatureADX,
1000                                        FeatureRDSEED,
1001                                        FeatureMOVBE,
1002                                        FeatureLZCNT,
1003                                        FeatureBMI,
1004                                        FeatureBMI2,
1005                                        FeatureFMA,
1006                                        FeaturePRFCHW];
1007  list<SubtargetFeature> KNLTuning = [TuningSlowDivide64,
1008                                      TuningSlow3OpsLEA,
1009                                      TuningSlowIncDec,
1010                                      TuningSlowTwoMemOps,
1011                                      TuningPreferMaskRegisters,
1012                                      TuningFastGather,
1013                                      TuningFastMOVBE,
1014                                      TuningSlowPMADDWD];
1015  // TODO Add AVX5124FMAPS/AVX5124VNNIW features
1016  list<SubtargetFeature> KNMFeatures =
1017    !listconcat(KNLFeatures, [FeatureVPOPCNTDQ]);
1018
1019  // Barcelona
1020  list<SubtargetFeature> BarcelonaFeatures = [FeatureX87,
1021                                              FeatureCMPXCHG8B,
1022                                              FeatureSSE4A,
1023                                              Feature3DNowA,
1024                                              FeatureFXSR,
1025                                              FeatureNOPL,
1026                                              FeatureCMPXCHG16B,
1027                                              FeaturePRFCHW,
1028                                              FeatureLZCNT,
1029                                              FeaturePOPCNT,
1030                                              FeatureLAHFSAHF,
1031                                              FeatureCMOV,
1032                                              Feature64Bit];
1033  list<SubtargetFeature> BarcelonaTuning = [TuningFastScalarShiftMasks,
1034                                            TuningSlowSHLD,
1035                                            TuningInsertVZEROUPPER];
1036
1037  // Bobcat
1038  list<SubtargetFeature> BtVer1Features = [FeatureX87,
1039                                           FeatureCMPXCHG8B,
1040                                           FeatureCMOV,
1041                                           FeatureMMX,
1042                                           FeatureSSSE3,
1043                                           FeatureSSE4A,
1044                                           FeatureFXSR,
1045                                           FeatureNOPL,
1046                                           Feature64Bit,
1047                                           FeatureCMPXCHG16B,
1048                                           FeaturePRFCHW,
1049                                           FeatureLZCNT,
1050                                           FeaturePOPCNT,
1051                                           FeatureLAHFSAHF];
1052  list<SubtargetFeature> BtVer1Tuning = [TuningFast15ByteNOP,
1053                                         TuningFastScalarShiftMasks,
1054                                         TuningFastVectorShiftMasks,
1055                                         TuningSlowSHLD,
1056                                         TuningInsertVZEROUPPER];
1057
1058  // Jaguar
1059  list<SubtargetFeature> BtVer2AdditionalFeatures = [FeatureAVX,
1060                                                     FeatureAES,
1061                                                     FeatureCRC32,
1062                                                     FeaturePCLMUL,
1063                                                     FeatureBMI,
1064                                                     FeatureF16C,
1065                                                     FeatureMOVBE,
1066                                                     FeatureXSAVE,
1067                                                     FeatureXSAVEOPT];
1068  list<SubtargetFeature> BtVer2Tuning = [TuningFastLZCNT,
1069                                         TuningFastBEXTR,
1070                                         TuningFastHorizontalOps,
1071                                         TuningFast15ByteNOP,
1072                                         TuningFastScalarShiftMasks,
1073                                         TuningFastVectorShiftMasks,
1074                                         TuningFastMOVBE,
1075                                         TuningSlowSHLD];
1076  list<SubtargetFeature> BtVer2Features =
1077    !listconcat(BtVer1Features, BtVer2AdditionalFeatures);
1078
1079  // Bulldozer
1080  list<SubtargetFeature> BdVer1Features = [FeatureX87,
1081                                           FeatureCMPXCHG8B,
1082                                           FeatureCMOV,
1083                                           FeatureXOP,
1084                                           Feature64Bit,
1085                                           FeatureCMPXCHG16B,
1086                                           FeatureAES,
1087                                           FeatureCRC32,
1088                                           FeaturePRFCHW,
1089                                           FeaturePCLMUL,
1090                                           FeatureMMX,
1091                                           FeatureFXSR,
1092                                           FeatureNOPL,
1093                                           FeatureLZCNT,
1094                                           FeaturePOPCNT,
1095                                           FeatureXSAVE,
1096                                           FeatureLWP,
1097                                           FeatureLAHFSAHF];
1098  list<SubtargetFeature> BdVer1Tuning = [TuningSlowSHLD,
1099                                         TuningFast11ByteNOP,
1100                                         TuningFastScalarShiftMasks,
1101                                         TuningBranchFusion,
1102                                         TuningInsertVZEROUPPER];
1103
1104  // PileDriver
1105  list<SubtargetFeature> BdVer2AdditionalFeatures = [FeatureF16C,
1106                                                     FeatureBMI,
1107                                                     FeatureTBM,
1108                                                     FeatureFMA];
1109  list<SubtargetFeature> BdVer2AdditionalTuning = [TuningFastBEXTR,
1110                                                   TuningFastMOVBE];
1111  list<SubtargetFeature> BdVer2Tuning =
1112    !listconcat(BdVer1Tuning, BdVer2AdditionalTuning);
1113  list<SubtargetFeature> BdVer2Features =
1114    !listconcat(BdVer1Features, BdVer2AdditionalFeatures);
1115
1116  // Steamroller
1117  list<SubtargetFeature> BdVer3AdditionalFeatures = [FeatureXSAVEOPT,
1118                                                     FeatureFSGSBase];
1119  list<SubtargetFeature> BdVer3Tuning = BdVer2Tuning;
1120  list<SubtargetFeature> BdVer3Features =
1121    !listconcat(BdVer2Features, BdVer3AdditionalFeatures);
1122
1123  // Excavator
1124  list<SubtargetFeature> BdVer4AdditionalFeatures = [FeatureAVX2,
1125                                                     FeatureBMI2,
1126                                                     FeatureMOVBE,
1127                                                     FeatureRDRAND,
1128                                                     FeatureMWAITX];
1129  list<SubtargetFeature> BdVer4Tuning = BdVer3Tuning;
1130  list<SubtargetFeature> BdVer4Features =
1131    !listconcat(BdVer3Features, BdVer4AdditionalFeatures);
1132
1133
1134  // AMD Zen Processors common ISAs
1135  list<SubtargetFeature> ZNFeatures = [FeatureADX,
1136                                       FeatureAES,
1137                                       FeatureAVX2,
1138                                       FeatureBMI,
1139                                       FeatureBMI2,
1140                                       FeatureCLFLUSHOPT,
1141                                       FeatureCLZERO,
1142                                       FeatureCMOV,
1143                                       Feature64Bit,
1144                                       FeatureCMPXCHG16B,
1145                                       FeatureCRC32,
1146                                       FeatureF16C,
1147                                       FeatureFMA,
1148                                       FeatureFSGSBase,
1149                                       FeatureFXSR,
1150                                       FeatureNOPL,
1151                                       FeatureLAHFSAHF,
1152                                       FeatureLZCNT,
1153                                       FeatureMMX,
1154                                       FeatureMOVBE,
1155                                       FeatureMWAITX,
1156                                       FeaturePCLMUL,
1157                                       FeaturePOPCNT,
1158                                       FeaturePRFCHW,
1159                                       FeatureRDRAND,
1160                                       FeatureRDSEED,
1161                                       FeatureSHA,
1162                                       FeatureSSE4A,
1163                                       FeatureX87,
1164                                       FeatureXSAVE,
1165                                       FeatureXSAVEC,
1166                                       FeatureXSAVEOPT,
1167                                       FeatureXSAVES];
1168  list<SubtargetFeature> ZNTuning = [TuningFastLZCNT,
1169                                     TuningFastBEXTR,
1170                                     TuningFast15ByteNOP,
1171                                     TuningBranchFusion,
1172                                     TuningFastScalarFSQRT,
1173                                     TuningFastVectorFSQRT,
1174                                     TuningFastScalarShiftMasks,
1175                                     TuningFastMOVBE,
1176                                     TuningSlowSHLD,
1177                                     TuningInsertVZEROUPPER];
1178  list<SubtargetFeature> ZN2AdditionalFeatures = [FeatureCLWB,
1179                                                  FeatureRDPID,
1180                                                  FeatureWBNOINVD];
1181  list<SubtargetFeature> ZN2Tuning = ZNTuning;
1182  list<SubtargetFeature> ZN2Features =
1183    !listconcat(ZNFeatures, ZN2AdditionalFeatures);
1184  list<SubtargetFeature> ZN3AdditionalFeatures = [FeatureFSRM,
1185                                                  FeatureINVPCID,
1186                                                  FeaturePKU,
1187                                                  FeatureVAES,
1188                                                  FeatureVPCLMULQDQ];
1189  list<SubtargetFeature> ZN3AdditionalTuning =
1190    [TuningMacroFusion,
1191     TuningFastVariablePerLaneShuffle];
1192  list<SubtargetFeature> ZN3Tuning =
1193    !listconcat(ZNTuning, ZN3AdditionalTuning);
1194  list<SubtargetFeature> ZN3Features =
1195    !listconcat(ZN2Features, ZN3AdditionalFeatures);
1196}
1197
1198//===----------------------------------------------------------------------===//
1199// X86 processors supported.
1200//===----------------------------------------------------------------------===//
1201
1202class Proc<string Name, list<SubtargetFeature> Features,
1203           list<SubtargetFeature> TuneFeatures>
1204 : ProcessorModel<Name, GenericModel, Features, TuneFeatures>;
1205
1206class ProcModel<string Name, SchedMachineModel Model,
1207                list<SubtargetFeature> Features,
1208                list<SubtargetFeature> TuneFeatures>
1209 : ProcessorModel<Name, Model, Features, TuneFeatures>;
1210
1211// NOTE: CMPXCHG8B is here for legacy compatibility so that it is only disabled
1212// if i386/i486 is specifically requested.
1213// NOTE: 64Bit is here as "generic" is the default llc CPU. The X86Subtarget
1214// constructor checks that any CPU used in 64-bit mode has Feature64Bit enabled.
1215// It has no effect on code generation.
1216def : ProcModel<"generic", SandyBridgeModel,
1217                [FeatureX87, FeatureCMPXCHG8B, Feature64Bit],
1218                [TuningSlow3OpsLEA,
1219                 TuningSlowDivide64,
1220                 TuningSlowIncDec,
1221                 TuningMacroFusion,
1222                 TuningInsertVZEROUPPER]>;
1223
1224def : Proc<"i386",            [FeatureX87],
1225                              [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1226def : Proc<"i486",            [FeatureX87],
1227                              [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1228def : Proc<"i586",            [FeatureX87, FeatureCMPXCHG8B],
1229                              [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1230def : Proc<"pentium",         [FeatureX87, FeatureCMPXCHG8B],
1231                              [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1232def : Proc<"pentium-mmx",     [FeatureX87, FeatureCMPXCHG8B, FeatureMMX],
1233                              [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1234
1235def : Proc<"i686", [FeatureX87, FeatureCMPXCHG8B, FeatureCMOV],
1236                   [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1237def : Proc<"pentiumpro", [FeatureX87, FeatureCMPXCHG8B, FeatureCMOV,
1238                          FeatureNOPL],
1239                         [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1240
1241def : Proc<"pentium2", [FeatureX87, FeatureCMPXCHG8B, FeatureMMX, FeatureCMOV,
1242                        FeatureFXSR, FeatureNOPL],
1243                       [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1244
1245foreach P = ["pentium3", "pentium3m"] in {
1246  def : Proc<P, [FeatureX87, FeatureCMPXCHG8B, FeatureMMX,
1247                 FeatureSSE1, FeatureFXSR, FeatureNOPL, FeatureCMOV],
1248                [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1249}
1250
1251// Enable the PostRAScheduler for SSE2 and SSE3 class cpus.
1252// The intent is to enable it for pentium4 which is the current default
1253// processor in a vanilla 32-bit clang compilation when no specific
1254// architecture is specified.  This generally gives a nice performance
1255// increase on silvermont, with largely neutral behavior on other
1256// contemporary large core processors.
1257// pentium-m, pentium4m, prescott and nocona are included as a preventative
1258// measure to avoid performance surprises, in case clang's default cpu
1259// changes slightly.
1260
1261def : ProcModel<"pentium-m", GenericPostRAModel,
1262                [FeatureX87, FeatureCMPXCHG8B, FeatureMMX, FeatureSSE2,
1263                FeatureFXSR, FeatureNOPL, FeatureCMOV],
1264                [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1265
1266foreach P = ["pentium4", "pentium4m"] in {
1267  def : ProcModel<P, GenericPostRAModel,
1268                  [FeatureX87, FeatureCMPXCHG8B, FeatureMMX, FeatureSSE2,
1269                   FeatureFXSR, FeatureNOPL, FeatureCMOV],
1270                  [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1271}
1272
1273// Intel Quark.
1274def : Proc<"lakemont", [FeatureCMPXCHG8B],
1275                       [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1276
1277// Intel Core Duo.
1278def : ProcModel<"yonah", SandyBridgeModel,
1279                [FeatureX87, FeatureCMPXCHG8B, FeatureMMX, FeatureSSE3,
1280                 FeatureFXSR, FeatureNOPL, FeatureCMOV],
1281                [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1282
1283// NetBurst.
1284def : ProcModel<"prescott", GenericPostRAModel,
1285                [FeatureX87, FeatureCMPXCHG8B, FeatureMMX, FeatureSSE3,
1286                 FeatureFXSR, FeatureNOPL, FeatureCMOV],
1287                [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1288def : ProcModel<"nocona", GenericPostRAModel, [
1289  FeatureX87,
1290  FeatureCMPXCHG8B,
1291  FeatureCMOV,
1292  FeatureMMX,
1293  FeatureSSE3,
1294  FeatureFXSR,
1295  FeatureNOPL,
1296  Feature64Bit,
1297  FeatureCMPXCHG16B,
1298],
1299[
1300  TuningSlowUAMem16,
1301  TuningInsertVZEROUPPER
1302]>;
1303
1304// Intel Core 2 Solo/Duo.
1305def : ProcModel<"core2", SandyBridgeModel, [
1306  FeatureX87,
1307  FeatureCMPXCHG8B,
1308  FeatureCMOV,
1309  FeatureMMX,
1310  FeatureSSSE3,
1311  FeatureFXSR,
1312  FeatureNOPL,
1313  Feature64Bit,
1314  FeatureCMPXCHG16B,
1315  FeatureLAHFSAHF
1316],
1317[
1318  TuningMacroFusion,
1319  TuningSlowUAMem16,
1320  TuningInsertVZEROUPPER
1321]>;
1322def : ProcModel<"penryn", SandyBridgeModel, [
1323  FeatureX87,
1324  FeatureCMPXCHG8B,
1325  FeatureCMOV,
1326  FeatureMMX,
1327  FeatureSSE41,
1328  FeatureFXSR,
1329  FeatureNOPL,
1330  Feature64Bit,
1331  FeatureCMPXCHG16B,
1332  FeatureLAHFSAHF
1333],
1334[
1335  TuningMacroFusion,
1336  TuningSlowUAMem16,
1337  TuningInsertVZEROUPPER
1338]>;
1339
1340// Atom CPUs.
1341foreach P = ["bonnell", "atom"] in {
1342  def : ProcModel<P, AtomModel, ProcessorFeatures.AtomFeatures,
1343                  ProcessorFeatures.AtomTuning>;
1344}
1345
1346foreach P = ["silvermont", "slm"] in {
1347  def : ProcModel<P, SLMModel, ProcessorFeatures.SLMFeatures,
1348                  ProcessorFeatures.SLMTuning>;
1349}
1350
1351def : ProcModel<"goldmont", SLMModel, ProcessorFeatures.GLMFeatures,
1352                ProcessorFeatures.GLMTuning>;
1353def : ProcModel<"goldmont-plus", SLMModel, ProcessorFeatures.GLPFeatures,
1354                ProcessorFeatures.GLPTuning>;
1355def : ProcModel<"tremont", SLMModel, ProcessorFeatures.TRMFeatures,
1356                ProcessorFeatures.TRMTuning>;
1357
1358// "Arrandale" along with corei3 and corei5
1359foreach P = ["nehalem", "corei7"] in {
1360  def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.NHMFeatures,
1361                  ProcessorFeatures.NHMTuning>;
1362}
1363
1364// Westmere is the corei3/i5/i7 path from nehalem to sandybridge
1365def : ProcModel<"westmere", SandyBridgeModel, ProcessorFeatures.WSMFeatures,
1366                ProcessorFeatures.WSMTuning>;
1367
1368foreach P = ["sandybridge", "corei7-avx"] in {
1369  def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.SNBFeatures,
1370                  ProcessorFeatures.SNBTuning>;
1371}
1372
1373foreach P = ["ivybridge", "core-avx-i"] in {
1374  def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.IVBFeatures,
1375                  ProcessorFeatures.IVBTuning>;
1376}
1377
1378foreach P = ["haswell", "core-avx2"] in {
1379  def : ProcModel<P, HaswellModel, ProcessorFeatures.HSWFeatures,
1380                  ProcessorFeatures.HSWTuning>;
1381}
1382
1383def : ProcModel<"broadwell", BroadwellModel, ProcessorFeatures.BDWFeatures,
1384                ProcessorFeatures.BDWTuning>;
1385
1386def : ProcModel<"skylake", SkylakeClientModel, ProcessorFeatures.SKLFeatures,
1387                ProcessorFeatures.SKLTuning>;
1388
1389// FIXME: define KNL scheduler model
1390def : ProcModel<"knl", HaswellModel, ProcessorFeatures.KNLFeatures,
1391                ProcessorFeatures.KNLTuning>;
1392def : ProcModel<"knm", HaswellModel, ProcessorFeatures.KNMFeatures,
1393                ProcessorFeatures.KNLTuning>;
1394
1395foreach P = ["skylake-avx512", "skx"] in {
1396  def : ProcModel<P, SkylakeServerModel, ProcessorFeatures.SKXFeatures,
1397                  ProcessorFeatures.SKXTuning>;
1398}
1399
1400def : ProcModel<"cascadelake", SkylakeServerModel,
1401                ProcessorFeatures.CLXFeatures, ProcessorFeatures.CLXTuning>;
1402def : ProcModel<"cooperlake", SkylakeServerModel,
1403                ProcessorFeatures.CPXFeatures, ProcessorFeatures.CPXTuning>;
1404def : ProcModel<"cannonlake", SkylakeServerModel,
1405                ProcessorFeatures.CNLFeatures, ProcessorFeatures.CNLTuning>;
1406def : ProcModel<"icelake-client", IceLakeModel,
1407                ProcessorFeatures.ICLFeatures, ProcessorFeatures.ICLTuning>;
1408def : ProcModel<"rocketlake", IceLakeModel,
1409                ProcessorFeatures.ICLFeatures, ProcessorFeatures.ICLTuning>;
1410def : ProcModel<"icelake-server", IceLakeModel,
1411                ProcessorFeatures.ICXFeatures, ProcessorFeatures.ICXTuning>;
1412def : ProcModel<"tigerlake", IceLakeModel,
1413                ProcessorFeatures.TGLFeatures, ProcessorFeatures.TGLTuning>;
1414def : ProcModel<"sapphirerapids", SkylakeServerModel,
1415                ProcessorFeatures.SPRFeatures, ProcessorFeatures.SPRTuning>;
1416def : ProcModel<"alderlake", SkylakeClientModel,
1417                ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>;
1418
1419// AMD CPUs.
1420
1421def : Proc<"k6",   [FeatureX87, FeatureCMPXCHG8B, FeatureMMX],
1422                   [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1423def : Proc<"k6-2", [FeatureX87, FeatureCMPXCHG8B, Feature3DNow],
1424                   [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1425def : Proc<"k6-3", [FeatureX87, FeatureCMPXCHG8B, Feature3DNow],
1426                   [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1427
1428foreach P = ["athlon", "athlon-tbird"] in {
1429  def : Proc<P, [FeatureX87, FeatureCMPXCHG8B, FeatureCMOV, Feature3DNowA,
1430                 FeatureNOPL],
1431                [TuningSlowSHLD, TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1432}
1433
1434foreach P = ["athlon-4", "athlon-xp", "athlon-mp"] in {
1435  def : Proc<P, [FeatureX87, FeatureCMPXCHG8B, FeatureCMOV,
1436                 FeatureSSE1, Feature3DNowA, FeatureFXSR, FeatureNOPL],
1437                [TuningSlowSHLD, TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1438}
1439
1440foreach P = ["k8", "opteron", "athlon64", "athlon-fx"] in {
1441  def : Proc<P, [FeatureX87, FeatureCMPXCHG8B, FeatureSSE2, Feature3DNowA,
1442                 FeatureFXSR, FeatureNOPL, Feature64Bit, FeatureCMOV],
1443                [TuningFastScalarShiftMasks, TuningSlowSHLD, TuningSlowUAMem16,
1444                 TuningInsertVZEROUPPER]>;
1445}
1446
1447foreach P = ["k8-sse3", "opteron-sse3", "athlon64-sse3"] in {
1448  def : Proc<P, [FeatureX87, FeatureCMPXCHG8B, FeatureSSE3, Feature3DNowA,
1449                 FeatureFXSR, FeatureNOPL, FeatureCMPXCHG16B, FeatureCMOV,
1450                 Feature64Bit],
1451                [TuningFastScalarShiftMasks, TuningSlowSHLD, TuningSlowUAMem16,
1452                 TuningInsertVZEROUPPER]>;
1453}
1454
1455foreach P = ["amdfam10", "barcelona"] in {
1456  def : Proc<P, ProcessorFeatures.BarcelonaFeatures,
1457             ProcessorFeatures.BarcelonaTuning>;
1458}
1459
1460// Bobcat
1461def : Proc<"btver1", ProcessorFeatures.BtVer1Features,
1462           ProcessorFeatures.BtVer1Tuning>;
1463// Jaguar
1464def : ProcModel<"btver2", BtVer2Model, ProcessorFeatures.BtVer2Features,
1465                ProcessorFeatures.BtVer2Tuning>;
1466
1467// Bulldozer
1468def : ProcModel<"bdver1", BdVer2Model, ProcessorFeatures.BdVer1Features,
1469                ProcessorFeatures.BdVer1Tuning>;
1470// Piledriver
1471def : ProcModel<"bdver2", BdVer2Model, ProcessorFeatures.BdVer2Features,
1472                ProcessorFeatures.BdVer2Tuning>;
1473// Steamroller
1474def : Proc<"bdver3", ProcessorFeatures.BdVer3Features,
1475           ProcessorFeatures.BdVer3Tuning>;
1476// Excavator
1477def : Proc<"bdver4", ProcessorFeatures.BdVer4Features,
1478           ProcessorFeatures.BdVer4Tuning>;
1479
1480def : ProcModel<"znver1", Znver1Model, ProcessorFeatures.ZNFeatures,
1481                ProcessorFeatures.ZNTuning>;
1482def : ProcModel<"znver2", Znver2Model, ProcessorFeatures.ZN2Features,
1483                ProcessorFeatures.ZN2Tuning>;
1484def : ProcModel<"znver3", Znver3Model, ProcessorFeatures.ZN3Features,
1485                ProcessorFeatures.ZN3Tuning>;
1486
1487def : Proc<"geode",           [FeatureX87, FeatureCMPXCHG8B, Feature3DNowA],
1488                              [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1489
1490def : Proc<"winchip-c6",      [FeatureX87, FeatureMMX],
1491                              [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1492def : Proc<"winchip2",        [FeatureX87, Feature3DNow],
1493                              [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1494def : Proc<"c3",              [FeatureX87, Feature3DNow],
1495                              [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1496def : Proc<"c3-2",            [FeatureX87, FeatureCMPXCHG8B, FeatureMMX,
1497                               FeatureSSE1, FeatureFXSR, FeatureCMOV],
1498                              [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1499
1500// We also provide a generic 64-bit specific x86 processor model which tries to
1501// be good for modern chips without enabling instruction set encodings past the
1502// basic SSE2 and 64-bit ones. It disables slow things from any mainstream and
1503// modern 64-bit x86 chip, and enables features that are generally beneficial.
1504//
1505// We currently use the Sandy Bridge model as the default scheduling model as
1506// we use it across Nehalem, Westmere, Sandy Bridge, and Ivy Bridge which
1507// covers a huge swath of x86 processors. If there are specific scheduling
1508// knobs which need to be tuned differently for AMD chips, we might consider
1509// forming a common base for them.
1510def : ProcModel<"x86-64", SandyBridgeModel, ProcessorFeatures.X86_64V1Features,
1511[
1512  TuningSlow3OpsLEA,
1513  TuningSlowDivide64,
1514  TuningSlowIncDec,
1515  TuningMacroFusion,
1516  TuningInsertVZEROUPPER
1517]>;
1518
1519// x86-64 micro-architecture levels.
1520def : ProcModel<"x86-64-v2", SandyBridgeModel, ProcessorFeatures.X86_64V2Features,
1521                ProcessorFeatures.SNBTuning>;
1522// Close to Haswell.
1523def : ProcModel<"x86-64-v3", HaswellModel, ProcessorFeatures.X86_64V3Features,
1524                ProcessorFeatures.HSWTuning>;
1525// Close to the AVX-512 level implemented by Xeon Scalable Processors.
1526def : ProcModel<"x86-64-v4", SkylakeServerModel, ProcessorFeatures.X86_64V4Features,
1527                ProcessorFeatures.SKXTuning>;
1528
1529//===----------------------------------------------------------------------===//
1530// Calling Conventions
1531//===----------------------------------------------------------------------===//
1532
1533include "X86CallingConv.td"
1534
1535
1536//===----------------------------------------------------------------------===//
1537// Assembly Parser
1538//===----------------------------------------------------------------------===//
1539
1540def ATTAsmParserVariant : AsmParserVariant {
1541  int Variant = 0;
1542
1543  // Variant name.
1544  string Name = "att";
1545
1546  // Discard comments in assembly strings.
1547  string CommentDelimiter = "#";
1548
1549  // Recognize hard coded registers.
1550  string RegisterPrefix = "%";
1551}
1552
1553def IntelAsmParserVariant : AsmParserVariant {
1554  int Variant = 1;
1555
1556  // Variant name.
1557  string Name = "intel";
1558
1559  // Discard comments in assembly strings.
1560  string CommentDelimiter = ";";
1561
1562  // Recognize hard coded registers.
1563  string RegisterPrefix = "";
1564}
1565
1566//===----------------------------------------------------------------------===//
1567// Assembly Printers
1568//===----------------------------------------------------------------------===//
1569
1570// The X86 target supports two different syntaxes for emitting machine code.
1571// This is controlled by the -x86-asm-syntax={att|intel}
1572def ATTAsmWriter : AsmWriter {
1573  string AsmWriterClassName  = "ATTInstPrinter";
1574  int Variant = 0;
1575}
1576def IntelAsmWriter : AsmWriter {
1577  string AsmWriterClassName  = "IntelInstPrinter";
1578  int Variant = 1;
1579}
1580
1581def X86 : Target {
1582  // Information about the instructions...
1583  let InstructionSet = X86InstrInfo;
1584  let AssemblyParserVariants = [ATTAsmParserVariant, IntelAsmParserVariant];
1585  let AssemblyWriters = [ATTAsmWriter, IntelAsmWriter];
1586  let AllowRegisterRenaming = 1;
1587}
1588
1589//===----------------------------------------------------------------------===//
1590// Pfm Counters
1591//===----------------------------------------------------------------------===//
1592
1593include "X86PfmCounters.td"
1594