xref: /freebsd/contrib/llvm-project/llvm/lib/Target/X86/X86.td (revision 8311bc5f17dec348749f763b82dfe2737bc53cd7)
1//===-- X86.td - Target definition file for the Intel X86 --*- tablegen -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This is a target description file for the Intel i386 architecture, referred
10// to here as the "X86" architecture.
11//
12//===----------------------------------------------------------------------===//
13
14// Get the target-independent interfaces which we are implementing...
15//
16include "llvm/Target/Target.td"
17
18//===----------------------------------------------------------------------===//
19// X86 Subtarget state
20//
21// disregarding specific ABI / programming model
22def Is64Bit : SubtargetFeature<"64bit-mode", "Is64Bit", "true",
23                               "64-bit mode (x86_64)">;
24def Is32Bit : SubtargetFeature<"32bit-mode", "Is32Bit", "true",
25                               "32-bit mode (80386)">;
26def Is16Bit : SubtargetFeature<"16bit-mode", "Is16Bit", "true",
27                               "16-bit mode (i8086)">;
28
29//===----------------------------------------------------------------------===//
30// X86 Subtarget ISA features
31//===----------------------------------------------------------------------===//
32
33def FeatureX87     : SubtargetFeature<"x87","HasX87", "true",
34                                      "Enable X87 float instructions">;
35
36def FeatureNOPL    : SubtargetFeature<"nopl", "HasNOPL", "true",
37                                      "Enable NOPL instruction (generally pentium pro+)">;
38
39def FeatureCMOV    : SubtargetFeature<"cmov","HasCMOV", "true",
40                                      "Enable conditional move instructions">;
41
42def FeatureCX8     : SubtargetFeature<"cx8", "HasCX8", "true",
43                                      "Support CMPXCHG8B instructions">;
44
45def FeatureCRC32   : SubtargetFeature<"crc32", "HasCRC32", "true",
46                                      "Enable SSE 4.2 CRC32 instruction (used when SSE4.2 is supported but function is GPR only)">;
47
48def FeaturePOPCNT   : SubtargetFeature<"popcnt", "HasPOPCNT", "true",
49                                       "Support POPCNT instruction">;
50
51def FeatureFXSR    : SubtargetFeature<"fxsr", "HasFXSR", "true",
52                                      "Support fxsave/fxrestore instructions">;
53
54def FeatureXSAVE   : SubtargetFeature<"xsave", "HasXSAVE", "true",
55                                       "Support xsave instructions">;
56
57def FeatureXSAVEOPT: SubtargetFeature<"xsaveopt", "HasXSAVEOPT", "true",
58                                       "Support xsaveopt instructions",
59                                       [FeatureXSAVE]>;
60
61def FeatureXSAVEC  : SubtargetFeature<"xsavec", "HasXSAVEC", "true",
62                                       "Support xsavec instructions",
63                                       [FeatureXSAVE]>;
64
65def FeatureXSAVES  : SubtargetFeature<"xsaves", "HasXSAVES", "true",
66                                       "Support xsaves instructions",
67                                       [FeatureXSAVE]>;
68
69def FeatureSSE1    : SubtargetFeature<"sse", "X86SSELevel", "SSE1",
70                                      "Enable SSE instructions">;
71def FeatureSSE2    : SubtargetFeature<"sse2", "X86SSELevel", "SSE2",
72                                      "Enable SSE2 instructions",
73                                      [FeatureSSE1]>;
74def FeatureSSE3    : SubtargetFeature<"sse3", "X86SSELevel", "SSE3",
75                                      "Enable SSE3 instructions",
76                                      [FeatureSSE2]>;
77def FeatureSSSE3   : SubtargetFeature<"ssse3", "X86SSELevel", "SSSE3",
78                                      "Enable SSSE3 instructions",
79                                      [FeatureSSE3]>;
80def FeatureSSE41   : SubtargetFeature<"sse4.1", "X86SSELevel", "SSE41",
81                                      "Enable SSE 4.1 instructions",
82                                      [FeatureSSSE3]>;
83def FeatureSSE42   : SubtargetFeature<"sse4.2", "X86SSELevel", "SSE42",
84                                      "Enable SSE 4.2 instructions",
85                                      [FeatureSSE41]>;
86// The MMX subtarget feature is separate from the rest of the SSE features
87// because it's important (for odd compatibility reasons) to be able to
88// turn it off explicitly while allowing SSE+ to be on.
89def FeatureMMX     : SubtargetFeature<"mmx","X863DNowLevel", "MMX",
90                                      "Enable MMX instructions">;
91def Feature3DNow   : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow",
92                                      "Enable 3DNow! instructions",
93                                      [FeatureMMX]>;
94def Feature3DNowA  : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA",
95                                      "Enable 3DNow! Athlon instructions",
96                                      [Feature3DNow]>;
97// All x86-64 hardware has SSE2, but we don't mark SSE2 as an implied
98// feature, because SSE2 can be disabled (e.g. for compiling OS kernels)
99// without disabling 64-bit mode. Nothing should imply this feature bit. It
100// is used to enforce that only 64-bit capable CPUs are used in 64-bit mode.
101def FeatureX86_64   : SubtargetFeature<"64bit", "HasX86_64", "true",
102                                      "Support 64-bit instructions">;
103def FeatureCX16     : SubtargetFeature<"cx16", "HasCX16", "true",
104                                       "64-bit with cmpxchg16b (this is true for most x86-64 chips, but not the first AMD chips)",
105                                       [FeatureCX8]>;
106def FeatureSSE4A   : SubtargetFeature<"sse4a", "HasSSE4A", "true",
107                                      "Support SSE 4a instructions",
108                                      [FeatureSSE3]>;
109
110def FeatureAVX     : SubtargetFeature<"avx", "X86SSELevel", "AVX",
111                                      "Enable AVX instructions",
112                                      [FeatureSSE42]>;
113def FeatureAVX2    : SubtargetFeature<"avx2", "X86SSELevel", "AVX2",
114                                      "Enable AVX2 instructions",
115                                      [FeatureAVX]>;
116def FeatureFMA     : SubtargetFeature<"fma", "HasFMA", "true",
117                                      "Enable three-operand fused multiple-add",
118                                      [FeatureAVX]>;
119def FeatureF16C    : SubtargetFeature<"f16c", "HasF16C", "true",
120                       "Support 16-bit floating point conversion instructions",
121                       [FeatureAVX]>;
122def FeatureAVX512   : SubtargetFeature<"avx512f", "X86SSELevel", "AVX512",
123                                      "Enable AVX-512 instructions",
124                                      [FeatureAVX2, FeatureFMA, FeatureF16C]>;
125def FeatureERI      : SubtargetFeature<"avx512er", "HasERI", "true",
126                      "Enable AVX-512 Exponential and Reciprocal Instructions",
127                                      [FeatureAVX512]>;
128def FeatureCDI      : SubtargetFeature<"avx512cd", "HasCDI", "true",
129                      "Enable AVX-512 Conflict Detection Instructions",
130                                      [FeatureAVX512]>;
131def FeatureVPOPCNTDQ : SubtargetFeature<"avx512vpopcntdq", "HasVPOPCNTDQ",
132                       "true", "Enable AVX-512 Population Count Instructions",
133                                      [FeatureAVX512]>;
134def FeaturePFI      : SubtargetFeature<"avx512pf", "HasPFI", "true",
135                      "Enable AVX-512 PreFetch Instructions",
136                                      [FeatureAVX512]>;
137def FeaturePREFETCHI  : SubtargetFeature<"prefetchi", "HasPREFETCHI",
138                                   "true",
139                                   "Prefetch instruction with T0 or T1 Hint">;
140def FeaturePREFETCHWT1  : SubtargetFeature<"prefetchwt1", "HasPREFETCHWT1",
141                                   "true",
142                                   "Prefetch with Intent to Write and T1 Hint">;
143def FeatureDQI     : SubtargetFeature<"avx512dq", "HasDQI", "true",
144                      "Enable AVX-512 Doubleword and Quadword Instructions",
145                                      [FeatureAVX512]>;
146def FeatureBWI     : SubtargetFeature<"avx512bw", "HasBWI", "true",
147                      "Enable AVX-512 Byte and Word Instructions",
148                                      [FeatureAVX512]>;
149def FeatureVLX     : SubtargetFeature<"avx512vl", "HasVLX", "true",
150                      "Enable AVX-512 Vector Length eXtensions",
151                                      [FeatureAVX512]>;
152def FeatureVBMI     : SubtargetFeature<"avx512vbmi", "HasVBMI", "true",
153                      "Enable AVX-512 Vector Byte Manipulation Instructions",
154                                      [FeatureBWI]>;
155def FeatureVBMI2    : SubtargetFeature<"avx512vbmi2", "HasVBMI2", "true",
156                      "Enable AVX-512 further Vector Byte Manipulation Instructions",
157                                      [FeatureBWI]>;
158def FeatureAVXIFMA    : SubtargetFeature<"avxifma", "HasAVXIFMA", "true",
159                           "Enable AVX-IFMA",
160                           [FeatureAVX2]>;
161def FeatureIFMA     : SubtargetFeature<"avx512ifma", "HasIFMA", "true",
162                      "Enable AVX-512 Integer Fused Multiple-Add",
163                                      [FeatureAVX512]>;
164def FeaturePKU   : SubtargetFeature<"pku", "HasPKU", "true",
165                      "Enable protection keys">;
166def FeatureVNNI    : SubtargetFeature<"avx512vnni", "HasVNNI", "true",
167                          "Enable AVX-512 Vector Neural Network Instructions",
168                                      [FeatureAVX512]>;
169def FeatureAVXVNNI    : SubtargetFeature<"avxvnni", "HasAVXVNNI", "true",
170                           "Support AVX_VNNI encoding",
171                                      [FeatureAVX2]>;
172def FeatureBF16    : SubtargetFeature<"avx512bf16", "HasBF16", "true",
173                           "Support bfloat16 floating point",
174                                      [FeatureBWI]>;
175def FeatureBITALG  : SubtargetFeature<"avx512bitalg", "HasBITALG", "true",
176                       "Enable AVX-512 Bit Algorithms",
177                        [FeatureBWI]>;
178def FeatureVP2INTERSECT  : SubtargetFeature<"avx512vp2intersect",
179                                            "HasVP2INTERSECT", "true",
180                                            "Enable AVX-512 vp2intersect",
181                                            [FeatureAVX512]>;
182// FIXME: FP16 scalar intrinsics use the type v8f16, which is supposed to be
183// guarded under condition hasVLX. So we imply it in FeatureFP16 currently.
184// FIXME: FP16 conversion between f16 and i64 customize type v8i64, which is
185// supposed to be guarded under condition hasDQI. So we imply it in FeatureFP16
186// currently.
187def FeatureFP16    : SubtargetFeature<"avx512fp16", "HasFP16", "true",
188                           "Support 16-bit floating point",
189                           [FeatureBWI, FeatureVLX, FeatureDQI]>;
190def FeatureAVXVNNIINT8  : SubtargetFeature<"avxvnniint8",
191                             "HasAVXVNNIINT8", "true",
192                             "Enable AVX-VNNI-INT8",
193                             [FeatureAVX2]>;
194def FeatureAVXVNNIINT16 : SubtargetFeature<"avxvnniint16",
195                             "HasAVXVNNIINT16", "true",
196                             "Enable AVX-VNNI-INT16",
197                             [FeatureAVX2]>;
198def FeaturePCLMUL  : SubtargetFeature<"pclmul", "HasPCLMUL", "true",
199                         "Enable packed carry-less multiplication instructions",
200                               [FeatureSSE2]>;
201def FeatureGFNI    : SubtargetFeature<"gfni", "HasGFNI", "true",
202                         "Enable Galois Field Arithmetic Instructions",
203                               [FeatureSSE2]>;
204def FeatureVPCLMULQDQ : SubtargetFeature<"vpclmulqdq", "HasVPCLMULQDQ", "true",
205                                         "Enable vpclmulqdq instructions",
206                                         [FeatureAVX, FeaturePCLMUL]>;
207def FeatureFMA4    : SubtargetFeature<"fma4", "HasFMA4", "true",
208                                      "Enable four-operand fused multiple-add",
209                                      [FeatureAVX, FeatureSSE4A]>;
210def FeatureXOP     : SubtargetFeature<"xop", "HasXOP", "true",
211                                      "Enable XOP instructions",
212                                      [FeatureFMA4]>;
213def FeatureSSEUnalignedMem : SubtargetFeature<"sse-unaligned-mem",
214                                          "HasSSEUnalignedMem", "true",
215                      "Allow unaligned memory operands with SSE instructions (this may require setting a configuration bit in the processor)">;
216def FeatureAES     : SubtargetFeature<"aes", "HasAES", "true",
217                                      "Enable AES instructions",
218                                      [FeatureSSE2]>;
219def FeatureVAES    : SubtargetFeature<"vaes", "HasVAES", "true",
220                       "Promote selected AES instructions to AVX512/AVX registers",
221                        [FeatureAVX, FeatureAES]>;
222def FeatureTBM     : SubtargetFeature<"tbm", "HasTBM", "true",
223                                      "Enable TBM instructions">;
224def FeatureLWP     : SubtargetFeature<"lwp", "HasLWP", "true",
225                                      "Enable LWP instructions">;
226def FeatureMOVBE   : SubtargetFeature<"movbe", "HasMOVBE", "true",
227                                      "Support MOVBE instruction">;
228def FeatureRDRAND  : SubtargetFeature<"rdrnd", "HasRDRAND", "true",
229                                      "Support RDRAND instruction">;
230def FeatureFSGSBase : SubtargetFeature<"fsgsbase", "HasFSGSBase", "true",
231                                       "Support FS/GS Base instructions">;
232def FeatureLZCNT   : SubtargetFeature<"lzcnt", "HasLZCNT", "true",
233                                      "Support LZCNT instruction">;
234def FeatureBMI     : SubtargetFeature<"bmi", "HasBMI", "true",
235                                      "Support BMI instructions">;
236def FeatureBMI2    : SubtargetFeature<"bmi2", "HasBMI2", "true",
237                                      "Support BMI2 instructions">;
238def FeatureRTM     : SubtargetFeature<"rtm", "HasRTM", "true",
239                                      "Support RTM instructions">;
240def FeatureADX     : SubtargetFeature<"adx", "HasADX", "true",
241                                      "Support ADX instructions">;
242def FeatureSHA     : SubtargetFeature<"sha", "HasSHA", "true",
243                                      "Enable SHA instructions",
244                                      [FeatureSSE2]>;
245def FeatureSHA512  : SubtargetFeature<"sha512", "HasSHA512", "true",
246                                      "Support SHA512 instructions",
247                                      [FeatureAVX]>;
248// Processor supports CET SHSTK - Control-Flow Enforcement Technology
249// using Shadow Stack
250def FeatureSHSTK   : SubtargetFeature<"shstk", "HasSHSTK", "true",
251                       "Support CET Shadow-Stack instructions">;
252def FeatureSM3     : SubtargetFeature<"sm3", "HasSM3", "true",
253                                      "Support SM3 instructions",
254                                      [FeatureAVX]>;
255def FeatureSM4     : SubtargetFeature<"sm4", "HasSM4", "true",
256                                      "Support SM4 instructions",
257                                      [FeatureAVX]>;
258def FeaturePRFCHW  : SubtargetFeature<"prfchw", "HasPRFCHW", "true",
259                                      "Support PRFCHW instructions">;
260def FeatureRDSEED  : SubtargetFeature<"rdseed", "HasRDSEED", "true",
261                                      "Support RDSEED instruction">;
262def FeatureLAHFSAHF64 : SubtargetFeature<"sahf", "HasLAHFSAHF64", "true",
263                           "Support LAHF and SAHF instructions in 64-bit mode">;
264def FeatureMWAITX  : SubtargetFeature<"mwaitx", "HasMWAITX", "true",
265                                      "Enable MONITORX/MWAITX timer functionality">;
266def FeatureCLZERO  : SubtargetFeature<"clzero", "HasCLZERO", "true",
267                                      "Enable Cache Line Zero">;
268def FeatureCLDEMOTE  : SubtargetFeature<"cldemote", "HasCLDEMOTE", "true",
269                                      "Enable Cache Line Demote">;
270def FeaturePTWRITE  : SubtargetFeature<"ptwrite", "HasPTWRITE", "true",
271                                      "Support ptwrite instruction">;
272def FeatureAMXTILE     : SubtargetFeature<"amx-tile", "HasAMXTILE", "true",
273                                      "Support AMX-TILE instructions">;
274def FeatureAMXINT8     : SubtargetFeature<"amx-int8", "HasAMXINT8", "true",
275                                      "Support AMX-INT8 instructions",
276                                      [FeatureAMXTILE]>;
277def FeatureAMXBF16     : SubtargetFeature<"amx-bf16", "HasAMXBF16", "true",
278                                      "Support AMX-BF16 instructions",
279                                      [FeatureAMXTILE]>;
280def FeatureAMXFP16     : SubtargetFeature<"amx-fp16", "HasAMXFP16", "true",
281                                      "Support AMX amx-fp16 instructions",
282                                      [FeatureAMXTILE]>;
283def FeatureAMXCOMPLEX : SubtargetFeature<"amx-complex", "HasAMXCOMPLEX", "true",
284                                         "Support AMX-COMPLEX instructions",
285                                         [FeatureAMXTILE]>;
286def FeatureCMPCCXADD : SubtargetFeature<"cmpccxadd", "HasCMPCCXADD", "true",
287                                        "Support CMPCCXADD instructions">;
288def FeatureRAOINT : SubtargetFeature<"raoint", "HasRAOINT", "true",
289                                     "Support RAO-INT instructions",
290                                     []>;
291def FeatureAVXNECONVERT : SubtargetFeature<"avxneconvert", "HasAVXNECONVERT", "true",
292                                           "Support AVX-NE-CONVERT instructions",
293                                           [FeatureAVX2]>;
294def FeatureINVPCID : SubtargetFeature<"invpcid", "HasINVPCID", "true",
295                                      "Invalidate Process-Context Identifier">;
296def FeatureSGX     : SubtargetFeature<"sgx", "HasSGX", "true",
297                                      "Enable Software Guard Extensions">;
298def FeatureCLFLUSHOPT : SubtargetFeature<"clflushopt", "HasCLFLUSHOPT", "true",
299                                      "Flush A Cache Line Optimized">;
300def FeatureCLWB    : SubtargetFeature<"clwb", "HasCLWB", "true",
301                                      "Cache Line Write Back">;
302def FeatureWBNOINVD    : SubtargetFeature<"wbnoinvd", "HasWBNOINVD", "true",
303                                      "Write Back No Invalidate">;
304def FeatureRDPID : SubtargetFeature<"rdpid", "HasRDPID", "true",
305                                    "Support RDPID instructions">;
306def FeatureRDPRU : SubtargetFeature<"rdpru", "HasRDPRU", "true",
307                                    "Support RDPRU instructions">;
308def FeatureWAITPKG  : SubtargetFeature<"waitpkg", "HasWAITPKG", "true",
309                                      "Wait and pause enhancements">;
310def FeatureENQCMD : SubtargetFeature<"enqcmd", "HasENQCMD", "true",
311                                     "Has ENQCMD instructions">;
312def FeatureKL  : SubtargetFeature<"kl", "HasKL", "true",
313                                  "Support Key Locker kl Instructions",
314                                  [FeatureSSE2]>;
315def FeatureWIDEKL  : SubtargetFeature<"widekl", "HasWIDEKL", "true",
316                                      "Support Key Locker wide Instructions",
317                                      [FeatureKL]>;
318def FeatureHRESET : SubtargetFeature<"hreset", "HasHRESET", "true",
319                                      "Has hreset instruction">;
320def FeatureSERIALIZE : SubtargetFeature<"serialize", "HasSERIALIZE", "true",
321                                        "Has serialize instruction">;
322def FeatureTSXLDTRK : SubtargetFeature<"tsxldtrk", "HasTSXLDTRK", "true",
323                                       "Support TSXLDTRK instructions">;
324def FeatureUINTR : SubtargetFeature<"uintr", "HasUINTR", "true",
325                                    "Has UINTR Instructions">;
326def FeaturePCONFIG : SubtargetFeature<"pconfig", "HasPCONFIG", "true",
327                                      "platform configuration instruction">;
328def FeatureMOVDIRI  : SubtargetFeature<"movdiri", "HasMOVDIRI", "true",
329                                       "Support movdiri instruction (direct store integer)">;
330def FeatureMOVDIR64B : SubtargetFeature<"movdir64b", "HasMOVDIR64B", "true",
331                                        "Support movdir64b instruction (direct store 64 bytes)">;
332
333// Ivy Bridge and newer processors have enhanced REP MOVSB and STOSB (aka
334// "string operations"). See "REP String Enhancement" in the Intel Software
335// Development Manual. This feature essentially means that REP MOVSB will copy
336// using the largest available size instead of copying bytes one by one, making
337// it at least as fast as REPMOVS{W,D,Q}.
338def FeatureERMSB
339    : SubtargetFeature<
340          "ermsb", "HasERMSB", "true",
341          "REP MOVS/STOS are fast">;
342
343// Icelake and newer processors have Fast Short REP MOV.
344def FeatureFSRM
345    : SubtargetFeature<
346          "fsrm", "HasFSRM", "true",
347          "REP MOVSB of short lengths is faster">;
348
349def FeatureSoftFloat
350    : SubtargetFeature<"soft-float", "UseSoftFloat", "true",
351                       "Use software floating point features">;
352
353//===----------------------------------------------------------------------===//
354// X86 Subtarget Security Mitigation features
355//===----------------------------------------------------------------------===//
356
357// Lower indirect calls using a special construct called a `retpoline` to
358// mitigate potential Spectre v2 attacks against them.
359def FeatureRetpolineIndirectCalls
360    : SubtargetFeature<
361          "retpoline-indirect-calls", "UseRetpolineIndirectCalls", "true",
362          "Remove speculation of indirect calls from the generated code">;
363
364// Lower indirect branches and switches either using conditional branch trees
365// or using a special construct called a `retpoline` to mitigate potential
366// Spectre v2 attacks against them.
367def FeatureRetpolineIndirectBranches
368    : SubtargetFeature<
369          "retpoline-indirect-branches", "UseRetpolineIndirectBranches", "true",
370          "Remove speculation of indirect branches from the generated code">;
371
372// Deprecated umbrella feature for enabling both `retpoline-indirect-calls` and
373// `retpoline-indirect-branches` above.
374def FeatureRetpoline
375    : SubtargetFeature<"retpoline", "DeprecatedUseRetpoline", "true",
376                       "Remove speculation of indirect branches from the "
377                       "generated code, either by avoiding them entirely or "
378                       "lowering them with a speculation blocking construct",
379                       [FeatureRetpolineIndirectCalls,
380                        FeatureRetpolineIndirectBranches]>;
381
382// Rely on external thunks for the emitted retpoline calls. This allows users
383// to provide their own custom thunk definitions in highly specialized
384// environments such as a kernel that does boot-time hot patching.
385def FeatureRetpolineExternalThunk
386    : SubtargetFeature<
387          "retpoline-external-thunk", "UseRetpolineExternalThunk", "true",
388          "When lowering an indirect call or branch using a `retpoline`, rely "
389          "on the specified user provided thunk rather than emitting one "
390          "ourselves. Only has effect when combined with some other retpoline "
391          "feature", [FeatureRetpolineIndirectCalls]>;
392
393// Mitigate LVI attacks against indirect calls/branches and call returns
394def FeatureLVIControlFlowIntegrity
395    : SubtargetFeature<
396          "lvi-cfi", "UseLVIControlFlowIntegrity", "true",
397          "Prevent indirect calls/branches from using a memory operand, and "
398          "precede all indirect calls/branches from a register with an "
399          "LFENCE instruction to serialize control flow. Also decompose RET "
400          "instructions into a POP+LFENCE+JMP sequence.">;
401
402// Enable SESES to mitigate speculative execution attacks
403def FeatureSpeculativeExecutionSideEffectSuppression
404    : SubtargetFeature<
405          "seses", "UseSpeculativeExecutionSideEffectSuppression", "true",
406          "Prevent speculative execution side channel timing attacks by "
407          "inserting a speculation barrier before memory reads, memory writes, "
408          "and conditional branches. Implies LVI Control Flow integrity.",
409          [FeatureLVIControlFlowIntegrity]>;
410
411// Mitigate LVI attacks against data loads
412def FeatureLVILoadHardening
413    : SubtargetFeature<
414          "lvi-load-hardening", "UseLVILoadHardening", "true",
415          "Insert LFENCE instructions to prevent data speculatively injected "
416          "into loads from being used maliciously.">;
417
418def FeatureTaggedGlobals
419    : SubtargetFeature<
420          "tagged-globals", "AllowTaggedGlobals", "true",
421          "Use an instruction sequence for taking the address of a global "
422          "that allows a memory tag in the upper address bits.">;
423
424// Control codegen mitigation against Straight Line Speculation vulnerability.
425def FeatureHardenSlsRet
426    : SubtargetFeature<
427          "harden-sls-ret", "HardenSlsRet", "true",
428          "Harden against straight line speculation across RET instructions.">;
429
430def FeatureHardenSlsIJmp
431    : SubtargetFeature<
432          "harden-sls-ijmp", "HardenSlsIJmp", "true",
433          "Harden against straight line speculation across indirect JMP instructions.">;
434
435//===----------------------------------------------------------------------===//
436// X86 Subtarget Tuning features
437//===----------------------------------------------------------------------===//
438def TuningPreferMovmskOverVTest : SubtargetFeature<"prefer-movmsk-over-vtest",
439                                       "PreferMovmskOverVTest", "true",
440                                       "Prefer movmsk over vtest instruction">;
441
442def TuningSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true",
443                                       "SHLD instruction is slow">;
444
445def TuningSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true",
446                                        "PMULLD instruction is slow (compared to PMULLW/PMULHW and PMULUDQ)">;
447
448def TuningSlowPMADDWD : SubtargetFeature<"slow-pmaddwd", "IsPMADDWDSlow",
449                                          "true",
450                                          "PMADDWD is slower than PMULLD">;
451
452// FIXME: This should not apply to CPUs that do not have SSE.
453def TuningSlowUAMem16 : SubtargetFeature<"slow-unaligned-mem-16",
454                                "IsUnalignedMem16Slow", "true",
455                                "Slow unaligned 16-byte memory access">;
456
457def TuningSlowUAMem32 : SubtargetFeature<"slow-unaligned-mem-32",
458                                "IsUnalignedMem32Slow", "true",
459                                "Slow unaligned 32-byte memory access">;
460
461def TuningLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
462                                     "Use LEA for adjusting the stack pointer (this is an optimization for Intel Atom processors)">;
463
464// True if 8-bit divisions are significantly faster than
465// 32-bit divisions and should be used when possible.
466def TuningSlowDivide32 : SubtargetFeature<"idivl-to-divb",
467                                     "HasSlowDivide32", "true",
468                                     "Use 8-bit divide for positive values less than 256">;
469
470// True if 32-bit divides are significantly faster than
471// 64-bit divisions and should be used when possible.
472def TuningSlowDivide64 : SubtargetFeature<"idivq-to-divl",
473                                     "HasSlowDivide64", "true",
474                                     "Use 32-bit divide for positive values less than 2^32">;
475
476def TuningPadShortFunctions : SubtargetFeature<"pad-short-functions",
477                                     "PadShortFunctions", "true",
478                                     "Pad short functions (to prevent a stall when returning too early)">;
479
480// On some processors, instructions that implicitly take two memory operands are
481// slow. In practice, this means that CALL, PUSH, and POP with memory operands
482// should be avoided in favor of a MOV + register CALL/PUSH/POP.
483def TuningSlowTwoMemOps : SubtargetFeature<"slow-two-mem-ops",
484                                     "SlowTwoMemOps", "true",
485                                     "Two memory operand instructions are slow">;
486
487// True if the LEA instruction inputs have to be ready at address generation
488// (AG) time.
489def TuningLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LeaUsesAG", "true",
490                                   "LEA instruction needs inputs at AG stage">;
491
492def TuningSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true",
493                                   "LEA instruction with certain arguments is slow">;
494
495// True if the LEA instruction has all three source operands: base, index,
496// and offset or if the LEA instruction uses base and index registers where
497// the base is EBP, RBP,or R13
498def TuningSlow3OpsLEA : SubtargetFeature<"slow-3ops-lea", "Slow3OpsLEA", "true",
499                                   "LEA instruction with 3 ops or certain registers is slow">;
500
501// True if INC and DEC instructions are slow when writing to flags
502def TuningSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true",
503                                   "INC and DEC instructions are slower than ADD and SUB">;
504
505def TuningPOPCNTFalseDeps : SubtargetFeature<"false-deps-popcnt",
506                                     "HasPOPCNTFalseDeps", "true",
507                                     "POPCNT has a false dependency on dest register">;
508
509def TuningLZCNTFalseDeps : SubtargetFeature<"false-deps-lzcnt-tzcnt",
510                                     "HasLZCNTFalseDeps", "true",
511                                     "LZCNT/TZCNT have a false dependency on dest register">;
512
513def TuningMULCFalseDeps : SubtargetFeature<"false-deps-mulc",
514                               "HasMULCFalseDeps", "true",
515                               "VF[C]MULCPH/SH has a false dependency on dest register">;
516
517def TuningPERMFalseDeps : SubtargetFeature<"false-deps-perm",
518                               "HasPERMFalseDeps", "true",
519                               "VPERMD/Q/PS/PD has a false dependency on dest register">;
520
521def TuningRANGEFalseDeps : SubtargetFeature<"false-deps-range",
522                               "HasRANGEFalseDeps", "true",
523                               "VRANGEPD/PS/SD/SS has a false dependency on dest register">;
524
525def TuningGETMANTFalseDeps : SubtargetFeature<"false-deps-getmant",
526                               "HasGETMANTFalseDeps", "true",
527                               "VGETMANTSS/SD/SH and VGETMANDPS/PD(memory version) has a"
528                               " false dependency on dest register">;
529
530def TuningMULLQFalseDeps : SubtargetFeature<"false-deps-mullq",
531                               "HasMULLQFalseDeps", "true",
532                               "VPMULLQ has a false dependency on dest register">;
533
534def TuningSBBDepBreaking : SubtargetFeature<"sbb-dep-breaking",
535                                     "HasSBBDepBreaking", "true",
536                                     "SBB with same register has no source dependency">;
537
538// On recent X86 (port bound) processors, its preferable to combine to a single shuffle
539// using a variable mask over multiple fixed shuffles.
540def TuningFastVariableCrossLaneShuffle
541    : SubtargetFeature<"fast-variable-crosslane-shuffle",
542                       "HasFastVariableCrossLaneShuffle",
543                       "true", "Cross-lane shuffles with variable masks are fast">;
544def TuningFastVariablePerLaneShuffle
545    : SubtargetFeature<"fast-variable-perlane-shuffle",
546                       "HasFastVariablePerLaneShuffle",
547                       "true", "Per-lane shuffles with variable masks are fast">;
548
549// Goldmont / Tremont (atom in general) has no bypass delay
550def TuningNoDomainDelay : SubtargetFeature<"no-bypass-delay",
551                                   "NoDomainDelay","true",
552                                   "Has no bypass delay when using the 'wrong' domain">;
553
554// Many processors (Nehalem+ on Intel) have no bypass delay when
555// using the wrong mov type.
556def TuningNoDomainDelayMov : SubtargetFeature<"no-bypass-delay-mov",
557                                   "NoDomainDelayMov","true",
558                                   "Has no bypass delay when using the 'wrong' mov type">;
559
560// Newer processors (Skylake+ on Intel) have no bypass delay when
561// using the wrong blend type.
562def TuningNoDomainDelayBlend : SubtargetFeature<"no-bypass-delay-blend",
563                                   "NoDomainDelayBlend","true",
564                                   "Has no bypass delay when using the 'wrong' blend type">;
565
566// Newer processors (Haswell+ on Intel) have no bypass delay when
567// using the wrong shuffle type.
568def TuningNoDomainDelayShuffle : SubtargetFeature<"no-bypass-delay-shuffle",
569                                   "NoDomainDelayShuffle","true",
570                                   "Has no bypass delay when using the 'wrong' shuffle type">;
571
572// Prefer lowering shuffles on AVX512 targets (e.g. Skylake Server) to
573// imm shifts/rotate if they can use more ports than regular shuffles.
574def TuningPreferShiftShuffle : SubtargetFeature<"faster-shift-than-shuffle",
575                                   "PreferLowerShuffleAsShift", "true",
576                                   "Shifts are faster (or as fast) as shuffle">;
577
578def TuningFastImmVectorShift : SubtargetFeature<"tuning-fast-imm-vector-shift",
579                                   "FastImmVectorShift", "true",
580                                   "Vector shifts are fast (2/cycle) as opposed to slow (1/cycle)">;
581
582// On some X86 processors, a vzeroupper instruction should be inserted after
583// using ymm/zmm registers before executing code that may use SSE instructions.
584def TuningInsertVZEROUPPER
585    : SubtargetFeature<"vzeroupper",
586                       "InsertVZEROUPPER",
587                       "true", "Should insert vzeroupper instructions">;
588
589// TuningFastScalarFSQRT should be enabled if scalar FSQRT has shorter latency
590// than the corresponding NR code. TuningFastVectorFSQRT should be enabled if
591// vector FSQRT has higher throughput than the corresponding NR code.
592// The idea is that throughput bound code is likely to be vectorized, so for
593// vectorized code we should care about the throughput of SQRT operations.
594// But if the code is scalar that probably means that the code has some kind of
595// dependency and we should care more about reducing the latency.
596
597// True if hardware SQRTSS instruction is at least as fast (latency) as
598// RSQRTSS followed by a Newton-Raphson iteration.
599def TuningFastScalarFSQRT
600    : SubtargetFeature<"fast-scalar-fsqrt", "HasFastScalarFSQRT",
601                       "true", "Scalar SQRT is fast (disable Newton-Raphson)">;
602// True if hardware SQRTPS/VSQRTPS instructions are at least as fast
603// (throughput) as RSQRTPS/VRSQRTPS followed by a Newton-Raphson iteration.
604def TuningFastVectorFSQRT
605    : SubtargetFeature<"fast-vector-fsqrt", "HasFastVectorFSQRT",
606                       "true", "Vector SQRT is fast (disable Newton-Raphson)">;
607
608// If lzcnt has equivalent latency/throughput to most simple integer ops, it can
609// be used to replace test/set sequences.
610def TuningFastLZCNT
611    : SubtargetFeature<
612          "fast-lzcnt", "HasFastLZCNT", "true",
613          "LZCNT instructions are as fast as most simple integer ops">;
614
615// If the target can efficiently decode NOPs upto 7-bytes in length.
616def TuningFast7ByteNOP
617    : SubtargetFeature<
618          "fast-7bytenop", "HasFast7ByteNOP", "true",
619          "Target can quickly decode up to 7 byte NOPs">;
620
621// If the target can efficiently decode NOPs upto 11-bytes in length.
622def TuningFast11ByteNOP
623    : SubtargetFeature<
624          "fast-11bytenop", "HasFast11ByteNOP", "true",
625          "Target can quickly decode up to 11 byte NOPs">;
626
627// If the target can efficiently decode NOPs upto 15-bytes in length.
628def TuningFast15ByteNOP
629    : SubtargetFeature<
630          "fast-15bytenop", "HasFast15ByteNOP", "true",
631          "Target can quickly decode up to 15 byte NOPs">;
632
633// Sandy Bridge and newer processors can use SHLD with the same source on both
634// inputs to implement rotate to avoid the partial flag update of the normal
635// rotate instructions.
636def TuningFastSHLDRotate
637    : SubtargetFeature<
638          "fast-shld-rotate", "HasFastSHLDRotate", "true",
639          "SHLD can be used as a faster rotate">;
640
641// Bulldozer and newer processors can merge CMP/TEST (but not other
642// instructions) with conditional branches.
643def TuningBranchFusion
644    : SubtargetFeature<"branchfusion", "HasBranchFusion", "true",
645                 "CMP/TEST can be fused with conditional branches">;
646
647// Sandy Bridge and newer processors have many instructions that can be
648// fused with conditional branches and pass through the CPU as a single
649// operation.
650def TuningMacroFusion
651    : SubtargetFeature<"macrofusion", "HasMacroFusion", "true",
652                 "Various instructions can be fused with conditional branches">;
653
654// Gather is available since Haswell (AVX2 set). So technically, we can
655// generate Gathers on all AVX2 processors. But the overhead on HSW is high.
656// Skylake Client processor has faster Gathers than HSW and performance is
657// similar to Skylake Server (AVX-512).
658def TuningFastGather
659    : SubtargetFeature<"fast-gather", "HasFastGather", "true",
660                       "Indicates if gather is reasonably fast (this is true for Skylake client and all AVX-512 CPUs)">;
661
662def TuningPreferNoGather
663    : SubtargetFeature<"prefer-no-gather", "PreferGather", "false",
664                       "Prefer no gather instructions">;
665def TuningPreferNoScatter
666    : SubtargetFeature<"prefer-no-scatter", "PreferScatter", "false",
667                       "Prefer no scatter instructions">;
668
669def TuningPrefer128Bit
670    : SubtargetFeature<"prefer-128-bit", "Prefer128Bit", "true",
671                       "Prefer 128-bit AVX instructions">;
672
673def TuningPrefer256Bit
674    : SubtargetFeature<"prefer-256-bit", "Prefer256Bit", "true",
675                       "Prefer 256-bit AVX instructions">;
676
677def TuningAllowLight256Bit
678    : SubtargetFeature<"allow-light-256-bit", "AllowLight256Bit", "true",
679                       "Enable generation of 256-bit load/stores even if we prefer 128-bit">;
680
681def TuningPreferMaskRegisters
682    : SubtargetFeature<"prefer-mask-registers", "PreferMaskRegisters", "true",
683                       "Prefer AVX512 mask registers over PTEST/MOVMSK">;
684
685def TuningFastBEXTR : SubtargetFeature<"fast-bextr", "HasFastBEXTR", "true",
686          "Indicates that the BEXTR instruction is implemented as a single uop "
687          "with good throughput">;
688
689// Combine vector math operations with shuffles into horizontal math
690// instructions if a CPU implements horizontal operations (introduced with
691// SSE3) with better latency/throughput than the alternative sequence.
692def TuningFastHorizontalOps
693    : SubtargetFeature<
694        "fast-hops", "HasFastHorizontalOps", "true",
695        "Prefer horizontal vector math instructions (haddp, phsub, etc.) over "
696        "normal vector instructions with shuffles">;
697
698def TuningFastScalarShiftMasks
699    : SubtargetFeature<
700        "fast-scalar-shift-masks", "HasFastScalarShiftMasks", "true",
701        "Prefer a left/right scalar logical shift pair over a shift+and pair">;
702
703def TuningFastVectorShiftMasks
704    : SubtargetFeature<
705        "fast-vector-shift-masks", "HasFastVectorShiftMasks", "true",
706        "Prefer a left/right vector logical shift pair over a shift+and pair">;
707
708def TuningFastMOVBE
709    : SubtargetFeature<"fast-movbe", "HasFastMOVBE", "true",
710    "Prefer a movbe over a single-use load + bswap / single-use bswap + store">;
711
712def TuningUseSLMArithCosts
713    : SubtargetFeature<"use-slm-arith-costs", "UseSLMArithCosts", "true",
714        "Use Silvermont specific arithmetic costs">;
715
716def TuningUseGLMDivSqrtCosts
717    : SubtargetFeature<"use-glm-div-sqrt-costs", "UseGLMDivSqrtCosts", "true",
718        "Use Goldmont specific floating point div/sqrt costs">;
719
720//===----------------------------------------------------------------------===//
721// X86 CPU Families
722// TODO: Remove these - use general tuning features to determine codegen.
723//===----------------------------------------------------------------------===//
724
725// Bonnell
726def ProcIntelAtom : SubtargetFeature<"", "IsAtom", "true", "Is Intel Atom processor">;
727
728//===----------------------------------------------------------------------===//
729// Register File Description
730//===----------------------------------------------------------------------===//
731
732include "X86RegisterInfo.td"
733include "X86RegisterBanks.td"
734
735//===----------------------------------------------------------------------===//
736// Instruction Descriptions
737//===----------------------------------------------------------------------===//
738
739include "X86Schedule.td"
740include "X86InstrInfo.td"
741include "X86SchedPredicates.td"
742
743def X86InstrInfo : InstrInfo;
744
745//===----------------------------------------------------------------------===//
746// X86 Scheduler Models
747//===----------------------------------------------------------------------===//
748
749include "X86ScheduleAtom.td"
750include "X86SchedSandyBridge.td"
751include "X86SchedHaswell.td"
752include "X86SchedBroadwell.td"
753include "X86ScheduleSLM.td"
754include "X86ScheduleZnver1.td"
755include "X86ScheduleZnver2.td"
756include "X86ScheduleZnver3.td"
757include "X86ScheduleZnver4.td"
758include "X86ScheduleBdVer2.td"
759include "X86ScheduleBtVer2.td"
760include "X86SchedSkylakeClient.td"
761include "X86SchedSkylakeServer.td"
762include "X86SchedIceLake.td"
763include "X86SchedAlderlakeP.td"
764include "X86SchedSapphireRapids.td"
765
766//===----------------------------------------------------------------------===//
767// X86 Processor Feature Lists
768//===----------------------------------------------------------------------===//
769
770def ProcessorFeatures {
771  // x86-64 micro-architecture levels: x86-64 and x86-64-v[234]
772  list<SubtargetFeature> X86_64V1Features = [
773    FeatureX87, FeatureCX8, FeatureCMOV, FeatureMMX, FeatureSSE2,
774    FeatureFXSR, FeatureNOPL, FeatureX86_64,
775  ];
776  list<SubtargetFeature> X86_64V1Tuning = [
777    TuningMacroFusion,
778    TuningSlow3OpsLEA,
779    TuningSlowDivide64,
780    TuningSlowIncDec,
781    TuningInsertVZEROUPPER
782  ];
783
784  list<SubtargetFeature> X86_64V2Features = !listconcat(X86_64V1Features, [
785    FeatureCX16, FeatureLAHFSAHF64, FeatureCRC32, FeaturePOPCNT,
786    FeatureSSE42
787  ]);
788  list<SubtargetFeature> X86_64V2Tuning = [
789    TuningMacroFusion,
790    TuningSlow3OpsLEA,
791    TuningSlowDivide64,
792    TuningSlowUAMem32,
793    TuningFastScalarFSQRT,
794    TuningFastSHLDRotate,
795    TuningFast15ByteNOP,
796    TuningPOPCNTFalseDeps,
797    TuningInsertVZEROUPPER
798  ];
799
800  list<SubtargetFeature> X86_64V3Features = !listconcat(X86_64V2Features, [
801    FeatureAVX2, FeatureBMI, FeatureBMI2, FeatureF16C, FeatureFMA, FeatureLZCNT,
802    FeatureMOVBE, FeatureXSAVE
803  ]);
804  list<SubtargetFeature> X86_64V3Tuning = [
805    TuningMacroFusion,
806    TuningSlow3OpsLEA,
807    TuningSlowDivide64,
808    TuningFastScalarFSQRT,
809    TuningFastSHLDRotate,
810    TuningFast15ByteNOP,
811    TuningFastVariableCrossLaneShuffle,
812    TuningFastVariablePerLaneShuffle,
813    TuningPOPCNTFalseDeps,
814    TuningLZCNTFalseDeps,
815    TuningInsertVZEROUPPER,
816    TuningAllowLight256Bit
817  ];
818
819  list<SubtargetFeature> X86_64V4Features = !listconcat(X86_64V3Features, [
820    FeatureBWI,
821    FeatureCDI,
822    FeatureDQI,
823    FeatureVLX,
824  ]);
825  list<SubtargetFeature> X86_64V4Tuning = [
826    TuningMacroFusion,
827    TuningSlow3OpsLEA,
828    TuningSlowDivide64,
829    TuningFastScalarFSQRT,
830    TuningFastVectorFSQRT,
831    TuningFastSHLDRotate,
832    TuningFast15ByteNOP,
833    TuningFastVariableCrossLaneShuffle,
834    TuningFastVariablePerLaneShuffle,
835    TuningPrefer256Bit,
836    TuningFastGather,
837    TuningPOPCNTFalseDeps,
838    TuningInsertVZEROUPPER,
839    TuningAllowLight256Bit
840  ];
841
842  // Nehalem
843  list<SubtargetFeature> NHMFeatures = X86_64V2Features;
844  list<SubtargetFeature> NHMTuning = [TuningMacroFusion,
845                                      TuningInsertVZEROUPPER,
846                                      TuningNoDomainDelayMov];
847
848  // Westmere
849  list<SubtargetFeature> WSMAdditionalFeatures = [FeaturePCLMUL];
850  list<SubtargetFeature> WSMTuning = NHMTuning;
851  list<SubtargetFeature> WSMFeatures =
852    !listconcat(NHMFeatures, WSMAdditionalFeatures);
853
854  // Sandybridge
855  list<SubtargetFeature> SNBAdditionalFeatures = [FeatureAVX,
856                                                  FeatureXSAVE,
857                                                  FeatureXSAVEOPT];
858  list<SubtargetFeature> SNBTuning = [TuningMacroFusion,
859                                      TuningSlow3OpsLEA,
860                                      TuningSlowDivide64,
861                                      TuningSlowUAMem32,
862                                      TuningFastScalarFSQRT,
863                                      TuningFastSHLDRotate,
864                                      TuningFast15ByteNOP,
865                                      TuningPOPCNTFalseDeps,
866                                      TuningInsertVZEROUPPER,
867                                      TuningNoDomainDelayMov];
868  list<SubtargetFeature> SNBFeatures =
869    !listconcat(WSMFeatures, SNBAdditionalFeatures);
870
871  // Ivybridge
872  list<SubtargetFeature> IVBAdditionalFeatures = [FeatureRDRAND,
873                                                  FeatureF16C,
874                                                  FeatureFSGSBase];
875  list<SubtargetFeature> IVBTuning = SNBTuning;
876  list<SubtargetFeature> IVBFeatures =
877    !listconcat(SNBFeatures, IVBAdditionalFeatures);
878
879  // Haswell
880  list<SubtargetFeature> HSWAdditionalFeatures = [FeatureAVX2,
881                                                  FeatureBMI,
882                                                  FeatureBMI2,
883                                                  FeatureERMSB,
884                                                  FeatureFMA,
885                                                  FeatureINVPCID,
886                                                  FeatureLZCNT,
887                                                  FeatureMOVBE];
888  list<SubtargetFeature> HSWTuning = [TuningMacroFusion,
889                                      TuningSlow3OpsLEA,
890                                      TuningSlowDivide64,
891                                      TuningFastScalarFSQRT,
892                                      TuningFastSHLDRotate,
893                                      TuningFast15ByteNOP,
894                                      TuningFastVariableCrossLaneShuffle,
895                                      TuningFastVariablePerLaneShuffle,
896                                      TuningPOPCNTFalseDeps,
897                                      TuningLZCNTFalseDeps,
898                                      TuningInsertVZEROUPPER,
899                                      TuningAllowLight256Bit,
900                                      TuningNoDomainDelayMov,
901                                      TuningNoDomainDelayShuffle];
902  list<SubtargetFeature> HSWFeatures =
903    !listconcat(IVBFeatures, HSWAdditionalFeatures);
904
905  // Broadwell
906  list<SubtargetFeature> BDWAdditionalFeatures = [FeatureADX,
907                                                  FeatureRDSEED,
908                                                  FeaturePRFCHW];
909  list<SubtargetFeature> BDWTuning = HSWTuning;
910  list<SubtargetFeature> BDWFeatures =
911    !listconcat(HSWFeatures, BDWAdditionalFeatures);
912
913  // Skylake
914  list<SubtargetFeature> SKLAdditionalFeatures = [FeatureAES,
915                                                  FeatureXSAVEC,
916                                                  FeatureXSAVES,
917                                                  FeatureCLFLUSHOPT];
918  list<SubtargetFeature> SKLTuning = [TuningFastGather,
919                                      TuningMacroFusion,
920                                      TuningSlow3OpsLEA,
921                                      TuningSlowDivide64,
922                                      TuningFastScalarFSQRT,
923                                      TuningFastVectorFSQRT,
924                                      TuningFastSHLDRotate,
925                                      TuningFast15ByteNOP,
926                                      TuningFastVariableCrossLaneShuffle,
927                                      TuningFastVariablePerLaneShuffle,
928                                      TuningPOPCNTFalseDeps,
929                                      TuningInsertVZEROUPPER,
930                                      TuningAllowLight256Bit,
931                                      TuningNoDomainDelayMov,
932                                      TuningNoDomainDelayShuffle,
933                                      TuningNoDomainDelayBlend];
934  list<SubtargetFeature> SKLFeatures =
935    !listconcat(BDWFeatures, SKLAdditionalFeatures);
936
937  // Skylake-AVX512
938  list<SubtargetFeature> SKXAdditionalFeatures = [FeatureAES,
939                                                  FeatureXSAVEC,
940                                                  FeatureXSAVES,
941                                                  FeatureCLFLUSHOPT,
942                                                  FeatureAVX512,
943                                                  FeatureCDI,
944                                                  FeatureDQI,
945                                                  FeatureBWI,
946                                                  FeatureVLX,
947                                                  FeaturePKU,
948                                                  FeatureCLWB];
949  list<SubtargetFeature> SKXTuning = [TuningFastGather,
950                                      TuningMacroFusion,
951                                      TuningSlow3OpsLEA,
952                                      TuningSlowDivide64,
953                                      TuningFastScalarFSQRT,
954                                      TuningFastVectorFSQRT,
955                                      TuningFastSHLDRotate,
956                                      TuningFast15ByteNOP,
957                                      TuningFastVariableCrossLaneShuffle,
958                                      TuningFastVariablePerLaneShuffle,
959                                      TuningPrefer256Bit,
960                                      TuningPOPCNTFalseDeps,
961                                      TuningInsertVZEROUPPER,
962                                      TuningAllowLight256Bit,
963                                      TuningPreferShiftShuffle,
964                                      TuningNoDomainDelayMov,
965                                      TuningNoDomainDelayShuffle,
966                                      TuningNoDomainDelayBlend,
967                                      TuningFastImmVectorShift];
968  list<SubtargetFeature> SKXFeatures =
969    !listconcat(BDWFeatures, SKXAdditionalFeatures);
970
971  // Cascadelake
972  list<SubtargetFeature> CLXAdditionalFeatures = [FeatureVNNI];
973  list<SubtargetFeature> CLXTuning = SKXTuning;
974  list<SubtargetFeature> CLXFeatures =
975    !listconcat(SKXFeatures, CLXAdditionalFeatures);
976
977  // Cooperlake
978  list<SubtargetFeature> CPXAdditionalFeatures = [FeatureBF16];
979  list<SubtargetFeature> CPXTuning = SKXTuning;
980  list<SubtargetFeature> CPXFeatures =
981    !listconcat(CLXFeatures, CPXAdditionalFeatures);
982
983  // Cannonlake
984  list<SubtargetFeature> CNLAdditionalFeatures = [FeatureAVX512,
985                                                  FeatureCDI,
986                                                  FeatureDQI,
987                                                  FeatureBWI,
988                                                  FeatureVLX,
989                                                  FeaturePKU,
990                                                  FeatureVBMI,
991                                                  FeatureIFMA,
992                                                  FeatureSHA];
993  list<SubtargetFeature> CNLTuning = [TuningFastGather,
994                                      TuningMacroFusion,
995                                      TuningSlow3OpsLEA,
996                                      TuningSlowDivide64,
997                                      TuningFastScalarFSQRT,
998                                      TuningFastVectorFSQRT,
999                                      TuningFastSHLDRotate,
1000                                      TuningFast15ByteNOP,
1001                                      TuningFastVariableCrossLaneShuffle,
1002                                      TuningFastVariablePerLaneShuffle,
1003                                      TuningPrefer256Bit,
1004                                      TuningInsertVZEROUPPER,
1005                                      TuningAllowLight256Bit,
1006                                      TuningNoDomainDelayMov,
1007                                      TuningNoDomainDelayShuffle,
1008                                      TuningNoDomainDelayBlend,
1009                                      TuningFastImmVectorShift];
1010  list<SubtargetFeature> CNLFeatures =
1011    !listconcat(SKLFeatures, CNLAdditionalFeatures);
1012
1013  // Icelake
1014  list<SubtargetFeature> ICLAdditionalFeatures = [FeatureBITALG,
1015                                                  FeatureVAES,
1016                                                  FeatureVBMI2,
1017                                                  FeatureVNNI,
1018                                                  FeatureVPCLMULQDQ,
1019                                                  FeatureVPOPCNTDQ,
1020                                                  FeatureGFNI,
1021                                                  FeatureRDPID,
1022                                                  FeatureFSRM];
1023  list<SubtargetFeature> ICLTuning = [TuningFastGather,
1024                                      TuningMacroFusion,
1025                                      TuningSlowDivide64,
1026                                      TuningFastScalarFSQRT,
1027                                      TuningFastVectorFSQRT,
1028                                      TuningFastSHLDRotate,
1029                                      TuningFast15ByteNOP,
1030                                      TuningFastVariableCrossLaneShuffle,
1031                                      TuningFastVariablePerLaneShuffle,
1032                                      TuningPrefer256Bit,
1033                                      TuningInsertVZEROUPPER,
1034                                      TuningAllowLight256Bit,
1035                                      TuningNoDomainDelayMov,
1036                                      TuningNoDomainDelayShuffle,
1037                                      TuningNoDomainDelayBlend,
1038                                      TuningFastImmVectorShift];
1039  list<SubtargetFeature> ICLFeatures =
1040    !listconcat(CNLFeatures, ICLAdditionalFeatures);
1041
1042  // Icelake Server
1043  list<SubtargetFeature> ICXAdditionalFeatures = [FeaturePCONFIG,
1044                                                  FeatureCLWB,
1045                                                  FeatureWBNOINVD];
1046  list<SubtargetFeature> ICXTuning = ICLTuning;
1047  list<SubtargetFeature> ICXFeatures =
1048    !listconcat(ICLFeatures, ICXAdditionalFeatures);
1049
1050  // Tigerlake
1051  list<SubtargetFeature> TGLAdditionalFeatures = [FeatureVP2INTERSECT,
1052                                                  FeatureCLWB,
1053                                                  FeatureMOVDIRI,
1054                                                  FeatureMOVDIR64B,
1055                                                  FeatureSHSTK];
1056  list<SubtargetFeature> TGLTuning = ICLTuning;
1057  list<SubtargetFeature> TGLFeatures =
1058    !listconcat(ICLFeatures, TGLAdditionalFeatures );
1059
1060  // Sapphirerapids
1061  list<SubtargetFeature> SPRAdditionalFeatures = [FeatureAMXTILE,
1062                                                  FeatureAMXINT8,
1063                                                  FeatureAMXBF16,
1064                                                  FeatureBF16,
1065                                                  FeatureSERIALIZE,
1066                                                  FeatureCLDEMOTE,
1067                                                  FeatureWAITPKG,
1068                                                  FeaturePTWRITE,
1069                                                  FeatureFP16,
1070                                                  FeatureAVXVNNI,
1071                                                  FeatureTSXLDTRK,
1072                                                  FeatureENQCMD,
1073                                                  FeatureSHSTK,
1074                                                  FeatureMOVDIRI,
1075                                                  FeatureMOVDIR64B,
1076                                                  FeatureUINTR];
1077  list<SubtargetFeature> SPRAdditionalTuning = [TuningMULCFalseDeps,
1078                                                TuningPERMFalseDeps,
1079                                                TuningRANGEFalseDeps,
1080                                                TuningGETMANTFalseDeps,
1081                                                TuningMULLQFalseDeps];
1082  list<SubtargetFeature> SPRTuning = !listconcat(ICXTuning, SPRAdditionalTuning);
1083  list<SubtargetFeature> SPRFeatures =
1084    !listconcat(ICXFeatures, SPRAdditionalFeatures);
1085
1086  // Graniterapids
1087  list<SubtargetFeature> GNRAdditionalFeatures = [FeatureAMXFP16,
1088                                                  FeaturePREFETCHI];
1089  list<SubtargetFeature> GNRFeatures =
1090    !listconcat(SPRFeatures, GNRAdditionalFeatures);
1091
1092  // Graniterapids D
1093  list<SubtargetFeature> GNRDAdditionalFeatures = [FeatureAMXCOMPLEX];
1094  list<SubtargetFeature> GNRDFeatures =
1095    !listconcat(GNRFeatures, GNRDAdditionalFeatures);
1096
1097  // Atom
1098  list<SubtargetFeature> AtomFeatures = [FeatureX87,
1099                                         FeatureCX8,
1100                                         FeatureCMOV,
1101                                         FeatureMMX,
1102                                         FeatureSSSE3,
1103                                         FeatureFXSR,
1104                                         FeatureNOPL,
1105                                         FeatureX86_64,
1106                                         FeatureCX16,
1107                                         FeatureMOVBE,
1108                                         FeatureLAHFSAHF64];
1109  list<SubtargetFeature> AtomTuning = [ProcIntelAtom,
1110                                       TuningSlowUAMem16,
1111                                       TuningLEAForSP,
1112                                       TuningSlowDivide32,
1113                                       TuningSlowDivide64,
1114                                       TuningSlowTwoMemOps,
1115                                       TuningLEAUsesAG,
1116                                       TuningPadShortFunctions,
1117                                       TuningInsertVZEROUPPER,
1118                                       TuningNoDomainDelay];
1119
1120  // Silvermont
1121  list<SubtargetFeature> SLMAdditionalFeatures = [FeatureSSE42,
1122                                                  FeatureCRC32,
1123                                                  FeaturePOPCNT,
1124                                                  FeaturePCLMUL,
1125                                                  FeaturePRFCHW,
1126                                                  FeatureRDRAND];
1127  list<SubtargetFeature> SLMTuning = [TuningUseSLMArithCosts,
1128                                      TuningSlowTwoMemOps,
1129                                      TuningSlowLEA,
1130                                      TuningSlowIncDec,
1131                                      TuningSlowDivide64,
1132                                      TuningSlowPMULLD,
1133                                      TuningFast7ByteNOP,
1134                                      TuningFastMOVBE,
1135                                      TuningPOPCNTFalseDeps,
1136                                      TuningInsertVZEROUPPER,
1137                                      TuningNoDomainDelay];
1138  list<SubtargetFeature> SLMFeatures =
1139    !listconcat(AtomFeatures, SLMAdditionalFeatures);
1140
1141  // Goldmont
1142  list<SubtargetFeature> GLMAdditionalFeatures = [FeatureAES,
1143                                                  FeatureSHA,
1144                                                  FeatureRDSEED,
1145                                                  FeatureXSAVE,
1146                                                  FeatureXSAVEOPT,
1147                                                  FeatureXSAVEC,
1148                                                  FeatureXSAVES,
1149                                                  FeatureCLFLUSHOPT,
1150                                                  FeatureFSGSBase];
1151  list<SubtargetFeature> GLMTuning = [TuningUseGLMDivSqrtCosts,
1152                                      TuningSlowTwoMemOps,
1153                                      TuningSlowLEA,
1154                                      TuningSlowIncDec,
1155                                      TuningFastMOVBE,
1156                                      TuningPOPCNTFalseDeps,
1157                                      TuningInsertVZEROUPPER,
1158                                      TuningNoDomainDelay];
1159  list<SubtargetFeature> GLMFeatures =
1160    !listconcat(SLMFeatures, GLMAdditionalFeatures);
1161
1162  // Goldmont Plus
1163  list<SubtargetFeature> GLPAdditionalFeatures = [FeaturePTWRITE,
1164                                                  FeatureRDPID];
1165  list<SubtargetFeature> GLPTuning = [TuningUseGLMDivSqrtCosts,
1166                                      TuningSlowTwoMemOps,
1167                                      TuningSlowLEA,
1168                                      TuningSlowIncDec,
1169                                      TuningFastMOVBE,
1170                                      TuningInsertVZEROUPPER,
1171                                      TuningNoDomainDelay];
1172  list<SubtargetFeature> GLPFeatures =
1173    !listconcat(GLMFeatures, GLPAdditionalFeatures);
1174
1175  // Tremont
1176  list<SubtargetFeature> TRMAdditionalFeatures = [FeatureCLWB,
1177                                                  FeatureGFNI];
1178  list<SubtargetFeature> TRMTuning = GLPTuning;
1179  list<SubtargetFeature> TRMFeatures =
1180    !listconcat(GLPFeatures, TRMAdditionalFeatures);
1181
1182  // Alderlake
1183  list<SubtargetFeature> ADLAdditionalFeatures = [FeatureSERIALIZE,
1184                                                  FeaturePCONFIG,
1185                                                  FeatureSHSTK,
1186                                                  FeatureWIDEKL,
1187                                                  FeatureINVPCID,
1188                                                  FeatureADX,
1189                                                  FeatureFMA,
1190                                                  FeatureVAES,
1191                                                  FeatureVPCLMULQDQ,
1192                                                  FeatureF16C,
1193                                                  FeatureBMI,
1194                                                  FeatureBMI2,
1195                                                  FeatureLZCNT,
1196                                                  FeatureAVXVNNI,
1197                                                  FeaturePKU,
1198                                                  FeatureHRESET,
1199                                                  FeatureCLDEMOTE,
1200                                                  FeatureMOVDIRI,
1201                                                  FeatureMOVDIR64B,
1202                                                  FeatureWAITPKG];
1203  list<SubtargetFeature> ADLAdditionalTuning = [TuningPERMFalseDeps,
1204                                                TuningPreferMovmskOverVTest,
1205                                                TuningFastImmVectorShift];
1206  list<SubtargetFeature> ADLTuning = !listconcat(SKLTuning, ADLAdditionalTuning);
1207  list<SubtargetFeature> ADLFeatures =
1208    !listconcat(TRMFeatures, ADLAdditionalFeatures);
1209
1210  // Sierraforest
1211  list<SubtargetFeature> SRFAdditionalFeatures = [FeatureCMPCCXADD,
1212                                                  FeatureAVXIFMA,
1213                                                  FeatureAVXNECONVERT,
1214                                                  FeatureENQCMD,
1215                                                  FeatureUINTR,
1216                                                  FeatureAVXVNNIINT8];
1217  list<SubtargetFeature> SRFFeatures =
1218    !listconcat(ADLFeatures, SRFAdditionalFeatures);
1219
1220  // Grandridge
1221  list<SubtargetFeature> GRRAdditionalFeatures = [FeatureRAOINT];
1222  list<SubtargetFeature> GRRFeatures =
1223    !listconcat(SRFFeatures, GRRAdditionalFeatures);
1224
1225  // Knights Landing
1226  list<SubtargetFeature> KNLFeatures = [FeatureX87,
1227                                        FeatureCX8,
1228                                        FeatureCMOV,
1229                                        FeatureMMX,
1230                                        FeatureFXSR,
1231                                        FeatureNOPL,
1232                                        FeatureX86_64,
1233                                        FeatureCX16,
1234                                        FeatureCRC32,
1235                                        FeaturePOPCNT,
1236                                        FeaturePCLMUL,
1237                                        FeatureXSAVE,
1238                                        FeatureXSAVEOPT,
1239                                        FeatureLAHFSAHF64,
1240                                        FeatureAES,
1241                                        FeatureRDRAND,
1242                                        FeatureF16C,
1243                                        FeatureFSGSBase,
1244                                        FeatureAVX512,
1245                                        FeatureERI,
1246                                        FeatureCDI,
1247                                        FeaturePFI,
1248                                        FeaturePREFETCHWT1,
1249                                        FeatureADX,
1250                                        FeatureRDSEED,
1251                                        FeatureMOVBE,
1252                                        FeatureLZCNT,
1253                                        FeatureBMI,
1254                                        FeatureBMI2,
1255                                        FeatureFMA,
1256                                        FeaturePRFCHW];
1257  list<SubtargetFeature> KNLTuning = [TuningSlowDivide64,
1258                                      TuningSlow3OpsLEA,
1259                                      TuningSlowIncDec,
1260                                      TuningSlowTwoMemOps,
1261                                      TuningPreferMaskRegisters,
1262                                      TuningFastGather,
1263                                      TuningFastMOVBE,
1264                                      TuningSlowPMADDWD];
1265  // TODO Add AVX5124FMAPS/AVX5124VNNIW features
1266  list<SubtargetFeature> KNMFeatures =
1267    !listconcat(KNLFeatures, [FeatureVPOPCNTDQ]);
1268
1269  // Barcelona
1270  list<SubtargetFeature> BarcelonaFeatures = [FeatureX87,
1271                                              FeatureCX8,
1272                                              FeatureSSE4A,
1273                                              Feature3DNowA,
1274                                              FeatureFXSR,
1275                                              FeatureNOPL,
1276                                              FeatureCX16,
1277                                              FeaturePRFCHW,
1278                                              FeatureLZCNT,
1279                                              FeaturePOPCNT,
1280                                              FeatureLAHFSAHF64,
1281                                              FeatureCMOV,
1282                                              FeatureX86_64];
1283  list<SubtargetFeature> BarcelonaTuning = [TuningFastScalarShiftMasks,
1284                                            TuningSlowSHLD,
1285                                            TuningSBBDepBreaking,
1286                                            TuningInsertVZEROUPPER];
1287
1288  // Bobcat
1289  list<SubtargetFeature> BtVer1Features = [FeatureX87,
1290                                           FeatureCX8,
1291                                           FeatureCMOV,
1292                                           FeatureMMX,
1293                                           FeatureSSSE3,
1294                                           FeatureSSE4A,
1295                                           FeatureFXSR,
1296                                           FeatureNOPL,
1297                                           FeatureX86_64,
1298                                           FeatureCX16,
1299                                           FeaturePRFCHW,
1300                                           FeatureLZCNT,
1301                                           FeaturePOPCNT,
1302                                           FeatureLAHFSAHF64];
1303  list<SubtargetFeature> BtVer1Tuning = [TuningFast15ByteNOP,
1304                                         TuningFastScalarShiftMasks,
1305                                         TuningFastVectorShiftMasks,
1306                                         TuningSlowSHLD,
1307                                         TuningSBBDepBreaking,
1308                                         TuningInsertVZEROUPPER];
1309
1310  // Jaguar
1311  list<SubtargetFeature> BtVer2AdditionalFeatures = [FeatureAVX,
1312                                                     FeatureAES,
1313                                                     FeatureCRC32,
1314                                                     FeaturePCLMUL,
1315                                                     FeatureBMI,
1316                                                     FeatureF16C,
1317                                                     FeatureMOVBE,
1318                                                     FeatureXSAVE,
1319                                                     FeatureXSAVEOPT];
1320  list<SubtargetFeature> BtVer2Tuning = [TuningFastLZCNT,
1321                                         TuningFastBEXTR,
1322                                         TuningFastHorizontalOps,
1323                                         TuningFast15ByteNOP,
1324                                         TuningFastScalarShiftMasks,
1325                                         TuningFastVectorShiftMasks,
1326                                         TuningFastMOVBE,
1327                                         TuningSBBDepBreaking,
1328                                         TuningSlowSHLD];
1329  list<SubtargetFeature> BtVer2Features =
1330    !listconcat(BtVer1Features, BtVer2AdditionalFeatures);
1331
1332  // Bulldozer
1333  list<SubtargetFeature> BdVer1Features = [FeatureX87,
1334                                           FeatureCX8,
1335                                           FeatureCMOV,
1336                                           FeatureXOP,
1337                                           FeatureX86_64,
1338                                           FeatureCX16,
1339                                           FeatureAES,
1340                                           FeatureCRC32,
1341                                           FeaturePRFCHW,
1342                                           FeaturePCLMUL,
1343                                           FeatureMMX,
1344                                           FeatureFXSR,
1345                                           FeatureNOPL,
1346                                           FeatureLZCNT,
1347                                           FeaturePOPCNT,
1348                                           FeatureXSAVE,
1349                                           FeatureLWP,
1350                                           FeatureLAHFSAHF64];
1351  list<SubtargetFeature> BdVer1Tuning = [TuningSlowSHLD,
1352                                         TuningFast11ByteNOP,
1353                                         TuningFastScalarShiftMasks,
1354                                         TuningBranchFusion,
1355                                         TuningSBBDepBreaking,
1356                                         TuningInsertVZEROUPPER];
1357
1358  // PileDriver
1359  list<SubtargetFeature> BdVer2AdditionalFeatures = [FeatureF16C,
1360                                                     FeatureBMI,
1361                                                     FeatureTBM,
1362                                                     FeatureFMA];
1363  list<SubtargetFeature> BdVer2AdditionalTuning = [TuningFastBEXTR,
1364                                                   TuningFastMOVBE];
1365  list<SubtargetFeature> BdVer2Tuning =
1366    !listconcat(BdVer1Tuning, BdVer2AdditionalTuning);
1367  list<SubtargetFeature> BdVer2Features =
1368    !listconcat(BdVer1Features, BdVer2AdditionalFeatures);
1369
1370  // Steamroller
1371  list<SubtargetFeature> BdVer3AdditionalFeatures = [FeatureXSAVEOPT,
1372                                                     FeatureFSGSBase];
1373  list<SubtargetFeature> BdVer3Tuning = BdVer2Tuning;
1374  list<SubtargetFeature> BdVer3Features =
1375    !listconcat(BdVer2Features, BdVer3AdditionalFeatures);
1376
1377  // Excavator
1378  list<SubtargetFeature> BdVer4AdditionalFeatures = [FeatureAVX2,
1379                                                     FeatureBMI2,
1380                                                     FeatureMOVBE,
1381                                                     FeatureRDRAND,
1382                                                     FeatureMWAITX];
1383  list<SubtargetFeature> BdVer4Tuning = BdVer3Tuning;
1384  list<SubtargetFeature> BdVer4Features =
1385    !listconcat(BdVer3Features, BdVer4AdditionalFeatures);
1386
1387
1388  // AMD Zen Processors common ISAs
1389  list<SubtargetFeature> ZNFeatures = [FeatureADX,
1390                                       FeatureAES,
1391                                       FeatureAVX2,
1392                                       FeatureBMI,
1393                                       FeatureBMI2,
1394                                       FeatureCLFLUSHOPT,
1395                                       FeatureCLZERO,
1396                                       FeatureCMOV,
1397                                       FeatureX86_64,
1398                                       FeatureCX16,
1399                                       FeatureCRC32,
1400                                       FeatureF16C,
1401                                       FeatureFMA,
1402                                       FeatureFSGSBase,
1403                                       FeatureFXSR,
1404                                       FeatureNOPL,
1405                                       FeatureLAHFSAHF64,
1406                                       FeatureLZCNT,
1407                                       FeatureMMX,
1408                                       FeatureMOVBE,
1409                                       FeatureMWAITX,
1410                                       FeaturePCLMUL,
1411                                       FeaturePOPCNT,
1412                                       FeaturePRFCHW,
1413                                       FeatureRDRAND,
1414                                       FeatureRDSEED,
1415                                       FeatureSHA,
1416                                       FeatureSSE4A,
1417                                       FeatureX87,
1418                                       FeatureXSAVE,
1419                                       FeatureXSAVEC,
1420                                       FeatureXSAVEOPT,
1421                                       FeatureXSAVES];
1422  list<SubtargetFeature> ZNTuning = [TuningFastLZCNT,
1423                                     TuningFastBEXTR,
1424                                     TuningFast15ByteNOP,
1425                                     TuningBranchFusion,
1426                                     TuningFastScalarFSQRT,
1427                                     TuningFastVectorFSQRT,
1428                                     TuningFastScalarShiftMasks,
1429                                     TuningFastVariablePerLaneShuffle,
1430                                     TuningFastMOVBE,
1431                                     TuningSlowSHLD,
1432                                     TuningSBBDepBreaking,
1433                                     TuningInsertVZEROUPPER,
1434                                     TuningAllowLight256Bit];
1435  list<SubtargetFeature> ZN2AdditionalFeatures = [FeatureCLWB,
1436                                                  FeatureRDPID,
1437                                                  FeatureRDPRU,
1438                                                  FeatureWBNOINVD];
1439  list<SubtargetFeature> ZN2Tuning = ZNTuning;
1440  list<SubtargetFeature> ZN2Features =
1441    !listconcat(ZNFeatures, ZN2AdditionalFeatures);
1442  list<SubtargetFeature> ZN3AdditionalFeatures = [FeatureFSRM,
1443                                                  FeatureINVPCID,
1444                                                  FeaturePKU,
1445                                                  FeatureVAES,
1446                                                  FeatureVPCLMULQDQ];
1447  list<SubtargetFeature> ZN3AdditionalTuning = [TuningMacroFusion];
1448  list<SubtargetFeature> ZN3Tuning =
1449    !listconcat(ZN2Tuning, ZN3AdditionalTuning);
1450  list<SubtargetFeature> ZN3Features =
1451    !listconcat(ZN2Features, ZN3AdditionalFeatures);
1452  list<SubtargetFeature> ZN4Tuning = ZN3Tuning;
1453  list<SubtargetFeature> ZN4AdditionalFeatures = [FeatureAVX512,
1454                                                  FeatureCDI,
1455                                                  FeatureDQI,
1456                                                  FeatureBWI,
1457                                                  FeatureVLX,
1458                                                  FeatureVBMI,
1459                                                  FeatureVBMI2,
1460                                                  FeatureIFMA,
1461                                                  FeatureVNNI,
1462                                                  FeatureBITALG,
1463                                                  FeatureGFNI,
1464                                                  FeatureBF16,
1465                                                  FeatureSHSTK,
1466                                                  FeatureVPOPCNTDQ];
1467  list<SubtargetFeature> ZN4Features =
1468    !listconcat(ZN3Features, ZN4AdditionalFeatures);
1469}
1470
1471//===----------------------------------------------------------------------===//
1472// X86 processors supported.
1473//===----------------------------------------------------------------------===//
1474
1475class Proc<string Name, list<SubtargetFeature> Features,
1476           list<SubtargetFeature> TuneFeatures>
1477 : ProcessorModel<Name, GenericModel, Features, TuneFeatures>;
1478
1479class ProcModel<string Name, SchedMachineModel Model,
1480                list<SubtargetFeature> Features,
1481                list<SubtargetFeature> TuneFeatures>
1482 : ProcessorModel<Name, Model, Features, TuneFeatures>;
1483
1484// NOTE: CMPXCHG8B is here for legacy compatibility so that it is only disabled
1485// if i386/i486 is specifically requested.
1486// NOTE: 64Bit is here as "generic" is the default llc CPU. The X86Subtarget
1487// constructor checks that any CPU used in 64-bit mode has FeatureX86_64
1488// enabled. It has no effect on code generation.
1489// NOTE: As a default tuning, "generic" aims to produce code optimized for the
1490// most common X86 processors. The tunings might be changed over time. It is
1491// recommended to use "tune-cpu"="x86-64" in function attribute for consistency.
1492def : ProcModel<"generic", SandyBridgeModel,
1493                [FeatureX87, FeatureCX8, FeatureX86_64],
1494                [TuningSlow3OpsLEA,
1495                 TuningSlowDivide64,
1496                 TuningMacroFusion,
1497                 TuningFastScalarFSQRT,
1498                 TuningFast15ByteNOP,
1499                 TuningInsertVZEROUPPER]>;
1500
1501def : Proc<"i386",            [FeatureX87],
1502                              [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1503def : Proc<"i486",            [FeatureX87],
1504                              [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1505def : Proc<"i586",            [FeatureX87, FeatureCX8],
1506                              [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1507def : Proc<"pentium",         [FeatureX87, FeatureCX8],
1508                              [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1509foreach P = ["pentium-mmx", "pentium_mmx"] in {
1510  def : Proc<P, [FeatureX87, FeatureCX8, FeatureMMX],
1511                [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1512}
1513def : Proc<"i686", [FeatureX87, FeatureCX8, FeatureCMOV],
1514                   [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1515foreach P = ["pentiumpro", "pentium_pro"] in {
1516  def : Proc<P, [FeatureX87, FeatureCX8, FeatureCMOV, FeatureNOPL],
1517                [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1518}
1519foreach P = ["pentium2", "pentium_ii"] in {
1520  def : Proc<P, [FeatureX87, FeatureCX8, FeatureMMX, FeatureCMOV,
1521                          FeatureFXSR, FeatureNOPL],
1522                        [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1523}
1524foreach P = ["pentium3", "pentium3m", "pentium_iii_no_xmm_regs", "pentium_iii"] in {
1525  def : Proc<P, [FeatureX87, FeatureCX8, FeatureMMX,
1526                 FeatureSSE1, FeatureFXSR, FeatureNOPL, FeatureCMOV],
1527                [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1528}
1529
1530// Enable the PostRAScheduler for SSE2 and SSE3 class cpus.
1531// The intent is to enable it for pentium4 which is the current default
1532// processor in a vanilla 32-bit clang compilation when no specific
1533// architecture is specified.  This generally gives a nice performance
1534// increase on silvermont, with largely neutral behavior on other
1535// contemporary large core processors.
1536// pentium-m, pentium4m, prescott and nocona are included as a preventative
1537// measure to avoid performance surprises, in case clang's default cpu
1538// changes slightly.
1539
1540foreach P = ["pentium_m", "pentium-m"] in {
1541def : ProcModel<P, GenericPostRAModel,
1542                [FeatureX87, FeatureCX8, FeatureMMX, FeatureSSE2,
1543                FeatureFXSR, FeatureNOPL, FeatureCMOV],
1544                [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1545}
1546
1547foreach P = ["pentium4", "pentium4m", "pentium_4"] in {
1548  def : ProcModel<P, GenericPostRAModel,
1549                  [FeatureX87, FeatureCX8, FeatureMMX, FeatureSSE2,
1550                   FeatureFXSR, FeatureNOPL, FeatureCMOV],
1551                  [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1552}
1553
1554// Intel Quark.
1555def : Proc<"lakemont", [FeatureCX8],
1556                       [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1557
1558// Intel Core Duo.
1559def : ProcModel<"yonah", SandyBridgeModel,
1560                [FeatureX87, FeatureCX8, FeatureMMX, FeatureSSE3,
1561                 FeatureFXSR, FeatureNOPL, FeatureCMOV],
1562                [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1563
1564// NetBurst.
1565foreach P = ["prescott", "pentium_4_sse3"] in {
1566  def : ProcModel<P, GenericPostRAModel,
1567                  [FeatureX87, FeatureCX8, FeatureMMX, FeatureSSE3,
1568                  FeatureFXSR, FeatureNOPL, FeatureCMOV],
1569                  [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1570}
1571def : ProcModel<"nocona", GenericPostRAModel, [
1572  FeatureX87,
1573  FeatureCX8,
1574  FeatureCMOV,
1575  FeatureMMX,
1576  FeatureSSE3,
1577  FeatureFXSR,
1578  FeatureNOPL,
1579  FeatureX86_64,
1580  FeatureCX16,
1581],
1582[
1583  TuningSlowUAMem16,
1584  TuningInsertVZEROUPPER
1585]>;
1586
1587// Intel Core 2 Solo/Duo.
1588foreach P = ["core2", "core_2_duo_ssse3"] in {
1589def : ProcModel<P, SandyBridgeModel, [
1590  FeatureX87,
1591  FeatureCX8,
1592  FeatureCMOV,
1593  FeatureMMX,
1594  FeatureSSSE3,
1595  FeatureFXSR,
1596  FeatureNOPL,
1597  FeatureX86_64,
1598  FeatureCX16,
1599  FeatureLAHFSAHF64
1600],
1601[
1602  TuningMacroFusion,
1603  TuningSlowUAMem16,
1604  TuningInsertVZEROUPPER
1605]>;
1606}
1607foreach P = ["penryn", "core_2_duo_sse4_1"] in {
1608def : ProcModel<P, SandyBridgeModel, [
1609  FeatureX87,
1610  FeatureCX8,
1611  FeatureCMOV,
1612  FeatureMMX,
1613  FeatureSSE41,
1614  FeatureFXSR,
1615  FeatureNOPL,
1616  FeatureX86_64,
1617  FeatureCX16,
1618  FeatureLAHFSAHF64
1619],
1620[
1621  TuningMacroFusion,
1622  TuningSlowUAMem16,
1623  TuningInsertVZEROUPPER
1624]>;
1625}
1626
1627// Atom CPUs.
1628foreach P = ["bonnell", "atom"] in {
1629  def : ProcModel<P, AtomModel, ProcessorFeatures.AtomFeatures,
1630                  ProcessorFeatures.AtomTuning>;
1631}
1632
1633foreach P = ["silvermont", "slm", "atom_sse4_2"] in {
1634  def : ProcModel<P, SLMModel, ProcessorFeatures.SLMFeatures,
1635                  ProcessorFeatures.SLMTuning>;
1636}
1637
1638def : ProcModel<"atom_sse4_2_movbe", SLMModel, ProcessorFeatures.GLMFeatures,
1639                ProcessorFeatures.SLMTuning>;
1640def : ProcModel<"goldmont", SLMModel, ProcessorFeatures.GLMFeatures,
1641                ProcessorFeatures.GLMTuning>;
1642foreach P = ["goldmont_plus", "goldmont-plus"] in {
1643  def : ProcModel<P, SLMModel, ProcessorFeatures.GLPFeatures,
1644                  ProcessorFeatures.GLPTuning>;
1645}
1646def : ProcModel<"tremont", SLMModel, ProcessorFeatures.TRMFeatures,
1647                ProcessorFeatures.TRMTuning>;
1648def : ProcModel<"sierraforest", AlderlakePModel, ProcessorFeatures.SRFFeatures,
1649                ProcessorFeatures.TRMTuning>;
1650def : ProcModel<"grandridge", AlderlakePModel, ProcessorFeatures.GRRFeatures,
1651                ProcessorFeatures.TRMTuning>;
1652
1653// "Arrandale" along with corei3 and corei5
1654foreach P = ["nehalem", "corei7", "core_i7_sse4_2"] in {
1655  def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.NHMFeatures,
1656                  ProcessorFeatures.NHMTuning>;
1657}
1658
1659// Westmere is the corei3/i5/i7 path from nehalem to sandybridge
1660foreach P = ["westmere", "core_aes_pclmulqdq"] in {
1661  def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.WSMFeatures,
1662                  ProcessorFeatures.WSMTuning>;
1663}
1664
1665foreach P = ["sandybridge", "corei7-avx", "core_2nd_gen_avx"] in {
1666  def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.SNBFeatures,
1667                  ProcessorFeatures.SNBTuning>;
1668}
1669
1670foreach P = ["ivybridge", "core-avx-i", "core_3rd_gen_avx"] in {
1671  def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.IVBFeatures,
1672                  ProcessorFeatures.IVBTuning>;
1673}
1674
1675foreach P = ["haswell", "core-avx2", "core_4th_gen_avx", "core_4th_gen_avx_tsx"] in {
1676  def : ProcModel<P, HaswellModel, ProcessorFeatures.HSWFeatures,
1677                  ProcessorFeatures.HSWTuning>;
1678}
1679
1680foreach P = ["broadwell", "core_5th_gen_avx", "core_5th_gen_avx_tsx"] in {
1681  def : ProcModel<P, BroadwellModel, ProcessorFeatures.BDWFeatures,
1682                  ProcessorFeatures.BDWTuning>;
1683}
1684
1685def : ProcModel<"skylake", SkylakeClientModel, ProcessorFeatures.SKLFeatures,
1686                ProcessorFeatures.SKLTuning>;
1687
1688// FIXME: define KNL scheduler model
1689foreach P = ["knl", "mic_avx512"] in {
1690  def : ProcModel<P, HaswellModel, ProcessorFeatures.KNLFeatures,
1691                  ProcessorFeatures.KNLTuning>;
1692}
1693def : ProcModel<"knm", HaswellModel, ProcessorFeatures.KNMFeatures,
1694                ProcessorFeatures.KNLTuning>;
1695
1696foreach P = ["skylake-avx512", "skx", "skylake_avx512"] in {
1697  def : ProcModel<P, SkylakeServerModel, ProcessorFeatures.SKXFeatures,
1698                  ProcessorFeatures.SKXTuning>;
1699}
1700
1701def : ProcModel<"cascadelake", SkylakeServerModel,
1702                ProcessorFeatures.CLXFeatures, ProcessorFeatures.CLXTuning>;
1703def : ProcModel<"cooperlake", SkylakeServerModel,
1704                ProcessorFeatures.CPXFeatures, ProcessorFeatures.CPXTuning>;
1705def : ProcModel<"cannonlake", SkylakeServerModel,
1706                ProcessorFeatures.CNLFeatures, ProcessorFeatures.CNLTuning>;
1707foreach P = ["icelake-client", "icelake_client"] in {
1708def : ProcModel<P, IceLakeModel,
1709                ProcessorFeatures.ICLFeatures, ProcessorFeatures.ICLTuning>;
1710}
1711def : ProcModel<"rocketlake", IceLakeModel,
1712                ProcessorFeatures.ICLFeatures, ProcessorFeatures.ICLTuning>;
1713foreach P = ["icelake-server", "icelake_server"] in {
1714def : ProcModel<P, IceLakeModel,
1715                ProcessorFeatures.ICXFeatures, ProcessorFeatures.ICXTuning>;
1716}
1717def : ProcModel<"tigerlake", IceLakeModel,
1718                ProcessorFeatures.TGLFeatures, ProcessorFeatures.TGLTuning>;
1719def : ProcModel<"sapphirerapids", SapphireRapidsModel,
1720                ProcessorFeatures.SPRFeatures, ProcessorFeatures.SPRTuning>;
1721def : ProcModel<"alderlake", AlderlakePModel,
1722                ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>;
1723def : ProcModel<"raptorlake", AlderlakePModel,
1724                ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>;
1725def : ProcModel<"meteorlake", AlderlakePModel,
1726                ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>;
1727def : ProcModel<"graniterapids", SapphireRapidsModel,
1728                ProcessorFeatures.GNRFeatures, ProcessorFeatures.SPRTuning>;
1729def : ProcModel<"emeraldrapids", SapphireRapidsModel,
1730                ProcessorFeatures.SPRFeatures, ProcessorFeatures.SPRTuning>;
1731foreach P = ["graniterapids-d", "graniterapids_d"] in {
1732def : ProcModel<P, SapphireRapidsModel,
1733                ProcessorFeatures.GNRDFeatures, ProcessorFeatures.SPRTuning>;
1734}
1735
1736// AMD CPUs.
1737
1738def : Proc<"k6",   [FeatureX87, FeatureCX8, FeatureMMX],
1739                   [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1740def : Proc<"k6-2", [FeatureX87, FeatureCX8, Feature3DNow],
1741                   [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1742def : Proc<"k6-3", [FeatureX87, FeatureCX8, Feature3DNow],
1743                   [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1744
1745foreach P = ["athlon", "athlon-tbird"] in {
1746  def : Proc<P, [FeatureX87, FeatureCX8, FeatureCMOV, Feature3DNowA,
1747                 FeatureNOPL],
1748                [TuningSlowSHLD, TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1749}
1750
1751foreach P = ["athlon-4", "athlon-xp", "athlon-mp"] in {
1752  def : Proc<P, [FeatureX87, FeatureCX8, FeatureCMOV,
1753                 FeatureSSE1, Feature3DNowA, FeatureFXSR, FeatureNOPL],
1754                [TuningSlowSHLD, TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1755}
1756
1757foreach P = ["k8", "opteron", "athlon64", "athlon-fx"] in {
1758  def : Proc<P, [FeatureX87, FeatureCX8, FeatureSSE2, Feature3DNowA,
1759                 FeatureFXSR, FeatureNOPL, FeatureX86_64, FeatureCMOV],
1760                [TuningFastScalarShiftMasks, TuningSlowSHLD, TuningSlowUAMem16,
1761                 TuningSBBDepBreaking, TuningInsertVZEROUPPER]>;
1762}
1763
1764foreach P = ["k8-sse3", "opteron-sse3", "athlon64-sse3"] in {
1765  def : Proc<P, [FeatureX87, FeatureCX8, FeatureSSE3, Feature3DNowA,
1766                 FeatureFXSR, FeatureNOPL, FeatureCX16, FeatureCMOV,
1767                 FeatureX86_64],
1768                [TuningFastScalarShiftMasks, TuningSlowSHLD, TuningSlowUAMem16,
1769                 TuningSBBDepBreaking, TuningInsertVZEROUPPER]>;
1770}
1771
1772foreach P = ["amdfam10", "barcelona"] in {
1773  def : Proc<P, ProcessorFeatures.BarcelonaFeatures,
1774             ProcessorFeatures.BarcelonaTuning>;
1775}
1776
1777// Bobcat
1778def : Proc<"btver1", ProcessorFeatures.BtVer1Features,
1779           ProcessorFeatures.BtVer1Tuning>;
1780// Jaguar
1781def : ProcModel<"btver2", BtVer2Model, ProcessorFeatures.BtVer2Features,
1782                ProcessorFeatures.BtVer2Tuning>;
1783
1784// Bulldozer
1785def : ProcModel<"bdver1", BdVer2Model, ProcessorFeatures.BdVer1Features,
1786                ProcessorFeatures.BdVer1Tuning>;
1787// Piledriver
1788def : ProcModel<"bdver2", BdVer2Model, ProcessorFeatures.BdVer2Features,
1789                ProcessorFeatures.BdVer2Tuning>;
1790// Steamroller
1791def : Proc<"bdver3", ProcessorFeatures.BdVer3Features,
1792           ProcessorFeatures.BdVer3Tuning>;
1793// Excavator
1794def : Proc<"bdver4", ProcessorFeatures.BdVer4Features,
1795           ProcessorFeatures.BdVer4Tuning>;
1796
1797def : ProcModel<"znver1", Znver1Model, ProcessorFeatures.ZNFeatures,
1798                ProcessorFeatures.ZNTuning>;
1799def : ProcModel<"znver2", Znver2Model, ProcessorFeatures.ZN2Features,
1800                ProcessorFeatures.ZN2Tuning>;
1801def : ProcModel<"znver3", Znver3Model, ProcessorFeatures.ZN3Features,
1802                ProcessorFeatures.ZN3Tuning>;
1803def : ProcModel<"znver4", Znver4Model, ProcessorFeatures.ZN4Features,
1804           ProcessorFeatures.ZN4Tuning>;
1805
1806def : Proc<"geode",           [FeatureX87, FeatureCX8, Feature3DNowA],
1807                              [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1808
1809def : Proc<"winchip-c6",      [FeatureX87, FeatureMMX],
1810                              [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1811def : Proc<"winchip2",        [FeatureX87, Feature3DNow],
1812                              [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1813def : Proc<"c3",              [FeatureX87, Feature3DNow],
1814                              [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1815def : Proc<"c3-2",            [FeatureX87, FeatureCX8, FeatureMMX,
1816                               FeatureSSE1, FeatureFXSR, FeatureCMOV],
1817                              [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1818
1819// We also provide a generic 64-bit specific x86 processor model which tries to
1820// be good for modern chips without enabling instruction set encodings past the
1821// basic SSE2 and 64-bit ones. It disables slow things from any mainstream and
1822// modern 64-bit x86 chip, and enables features that are generally beneficial.
1823//
1824// We currently use the Sandy Bridge model as the default scheduling model as
1825// we use it across Nehalem, Westmere, Sandy Bridge, and Ivy Bridge which
1826// covers a huge swath of x86 processors. If there are specific scheduling
1827// knobs which need to be tuned differently for AMD chips, we might consider
1828// forming a common base for them.
1829def : ProcModel<"x86-64", SandyBridgeModel, ProcessorFeatures.X86_64V1Features,
1830                ProcessorFeatures.X86_64V1Tuning>;
1831// Close to Sandybridge.
1832def : ProcModel<"x86-64-v2", SandyBridgeModel, ProcessorFeatures.X86_64V2Features,
1833                ProcessorFeatures.X86_64V2Tuning>;
1834// Close to Haswell.
1835def : ProcModel<"x86-64-v3", HaswellModel, ProcessorFeatures.X86_64V3Features,
1836                ProcessorFeatures.X86_64V3Tuning>;
1837// Close to the AVX-512 level implemented by Xeon Scalable Processors.
1838def : ProcModel<"x86-64-v4", SkylakeServerModel, ProcessorFeatures.X86_64V4Features,
1839                ProcessorFeatures.X86_64V4Tuning>;
1840
1841//===----------------------------------------------------------------------===//
1842// Calling Conventions
1843//===----------------------------------------------------------------------===//
1844
1845include "X86CallingConv.td"
1846
1847
1848//===----------------------------------------------------------------------===//
1849// Assembly Parser
1850//===----------------------------------------------------------------------===//
1851
1852def ATTAsmParserVariant : AsmParserVariant {
1853  int Variant = 0;
1854
1855  // Variant name.
1856  string Name = "att";
1857
1858  // Discard comments in assembly strings.
1859  string CommentDelimiter = "#";
1860
1861  // Recognize hard coded registers.
1862  string RegisterPrefix = "%";
1863}
1864
1865def IntelAsmParserVariant : AsmParserVariant {
1866  int Variant = 1;
1867
1868  // Variant name.
1869  string Name = "intel";
1870
1871  // Discard comments in assembly strings.
1872  string CommentDelimiter = ";";
1873
1874  // Recognize hard coded registers.
1875  string RegisterPrefix = "";
1876}
1877
1878//===----------------------------------------------------------------------===//
1879// Assembly Printers
1880//===----------------------------------------------------------------------===//
1881
1882// The X86 target supports two different syntaxes for emitting machine code.
1883// This is controlled by the -x86-asm-syntax={att|intel}
1884def ATTAsmWriter : AsmWriter {
1885  string AsmWriterClassName  = "ATTInstPrinter";
1886  int Variant = 0;
1887}
1888def IntelAsmWriter : AsmWriter {
1889  string AsmWriterClassName  = "IntelInstPrinter";
1890  int Variant = 1;
1891}
1892
1893def X86 : Target {
1894  // Information about the instructions...
1895  let InstructionSet = X86InstrInfo;
1896  let AssemblyParserVariants = [ATTAsmParserVariant, IntelAsmParserVariant];
1897  let AssemblyWriters = [ATTAsmWriter, IntelAsmWriter];
1898  let AllowRegisterRenaming = 1;
1899}
1900
1901//===----------------------------------------------------------------------===//
1902// Pfm Counters
1903//===----------------------------------------------------------------------===//
1904
1905include "X86PfmCounters.td"
1906