xref: /freebsd/contrib/llvm-project/llvm/lib/Target/X86/X86.td (revision b64c5a0ace59af62eff52bfe110a521dc73c937b)
1//===-- X86.td - Target definition file for the Intel X86 --*- tablegen -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This is a target description file for the Intel i386 architecture, referred
10// to here as the "X86" architecture.
11//
12//===----------------------------------------------------------------------===//
13
14// Get the target-independent interfaces which we are implementing...
15//
16include "llvm/Target/Target.td"
17
18//===----------------------------------------------------------------------===//
19// X86 Subtarget state
20//
21// disregarding specific ABI / programming model
22def Is64Bit : SubtargetFeature<"64bit-mode", "Is64Bit", "true",
23                               "64-bit mode (x86_64)">;
24def Is32Bit : SubtargetFeature<"32bit-mode", "Is32Bit", "true",
25                               "32-bit mode (80386)">;
26def Is16Bit : SubtargetFeature<"16bit-mode", "Is16Bit", "true",
27                               "16-bit mode (i8086)">;
28
29//===----------------------------------------------------------------------===//
30// X86 Subtarget ISA features
31//===----------------------------------------------------------------------===//
32
33def FeatureX87     : SubtargetFeature<"x87","HasX87", "true",
34                                      "Enable X87 float instructions">;
35
36def FeatureNOPL    : SubtargetFeature<"nopl", "HasNOPL", "true",
37                                      "Enable NOPL instruction (generally pentium pro+)">;
38
39def FeatureCMOV    : SubtargetFeature<"cmov","HasCMOV", "true",
40                                      "Enable conditional move instructions">;
41
42def FeatureCX8     : SubtargetFeature<"cx8", "HasCX8", "true",
43                                      "Support CMPXCHG8B instructions">;
44
45def FeatureCRC32   : SubtargetFeature<"crc32", "HasCRC32", "true",
46                                      "Enable SSE 4.2 CRC32 instruction (used when SSE4.2 is supported but function is GPR only)">;
47
48def FeaturePOPCNT   : SubtargetFeature<"popcnt", "HasPOPCNT", "true",
49                                       "Support POPCNT instruction">;
50
51def FeatureFXSR    : SubtargetFeature<"fxsr", "HasFXSR", "true",
52                                      "Support fxsave/fxrestore instructions">;
53
54def FeatureXSAVE   : SubtargetFeature<"xsave", "HasXSAVE", "true",
55                                       "Support xsave instructions">;
56
57def FeatureXSAVEOPT: SubtargetFeature<"xsaveopt", "HasXSAVEOPT", "true",
58                                       "Support xsaveopt instructions",
59                                       [FeatureXSAVE]>;
60
61def FeatureXSAVEC  : SubtargetFeature<"xsavec", "HasXSAVEC", "true",
62                                       "Support xsavec instructions",
63                                       [FeatureXSAVE]>;
64
65def FeatureXSAVES  : SubtargetFeature<"xsaves", "HasXSAVES", "true",
66                                       "Support xsaves instructions",
67                                       [FeatureXSAVE]>;
68
69def FeatureSSE1    : SubtargetFeature<"sse", "X86SSELevel", "SSE1",
70                                      "Enable SSE instructions">;
71def FeatureSSE2    : SubtargetFeature<"sse2", "X86SSELevel", "SSE2",
72                                      "Enable SSE2 instructions",
73                                      [FeatureSSE1]>;
74def FeatureSSE3    : SubtargetFeature<"sse3", "X86SSELevel", "SSE3",
75                                      "Enable SSE3 instructions",
76                                      [FeatureSSE2]>;
77def FeatureSSSE3   : SubtargetFeature<"ssse3", "X86SSELevel", "SSSE3",
78                                      "Enable SSSE3 instructions",
79                                      [FeatureSSE3]>;
80def FeatureSSE41   : SubtargetFeature<"sse4.1", "X86SSELevel", "SSE41",
81                                      "Enable SSE 4.1 instructions",
82                                      [FeatureSSSE3]>;
83def FeatureSSE42   : SubtargetFeature<"sse4.2", "X86SSELevel", "SSE42",
84                                      "Enable SSE 4.2 instructions",
85                                      [FeatureSSE41]>;
86// The MMX subtarget feature is separate from the rest of the SSE features
87// because it's important (for odd compatibility reasons) to be able to
88// turn it off explicitly while allowing SSE+ to be on.
89def FeatureMMX     : SubtargetFeature<"mmx","HasMMX", "true",
90                                      "Enable MMX instructions">;
91// All x86-64 hardware has SSE2, but we don't mark SSE2 as an implied
92// feature, because SSE2 can be disabled (e.g. for compiling OS kernels)
93// without disabling 64-bit mode. Nothing should imply this feature bit. It
94// is used to enforce that only 64-bit capable CPUs are used in 64-bit mode.
95def FeatureX86_64   : SubtargetFeature<"64bit", "HasX86_64", "true",
96                                      "Support 64-bit instructions">;
97def FeatureCX16     : SubtargetFeature<"cx16", "HasCX16", "true",
98                                       "64-bit with cmpxchg16b (this is true for most x86-64 chips, but not the first AMD chips)",
99                                       [FeatureCX8]>;
100def FeatureSSE4A   : SubtargetFeature<"sse4a", "HasSSE4A", "true",
101                                      "Support SSE 4a instructions",
102                                      [FeatureSSE3]>;
103
104def FeatureAVX     : SubtargetFeature<"avx", "X86SSELevel", "AVX",
105                                      "Enable AVX instructions",
106                                      [FeatureSSE42]>;
107def FeatureAVX2    : SubtargetFeature<"avx2", "X86SSELevel", "AVX2",
108                                      "Enable AVX2 instructions",
109                                      [FeatureAVX]>;
110def FeatureFMA     : SubtargetFeature<"fma", "HasFMA", "true",
111                                      "Enable three-operand fused multiple-add",
112                                      [FeatureAVX]>;
113def FeatureF16C    : SubtargetFeature<"f16c", "HasF16C", "true",
114                       "Support 16-bit floating point conversion instructions",
115                       [FeatureAVX]>;
116def FeatureEVEX512  : SubtargetFeature<"evex512", "HasEVEX512", "true",
117                        "Support ZMM and 64-bit mask instructions">;
118def FeatureAVX512   : SubtargetFeature<"avx512f", "X86SSELevel", "AVX512",
119                                      "Enable AVX-512 instructions",
120                                      [FeatureAVX2, FeatureFMA, FeatureF16C]>;
121def FeatureCDI      : SubtargetFeature<"avx512cd", "HasCDI", "true",
122                      "Enable AVX-512 Conflict Detection Instructions",
123                                      [FeatureAVX512]>;
124def FeatureVPOPCNTDQ : SubtargetFeature<"avx512vpopcntdq", "HasVPOPCNTDQ",
125                       "true", "Enable AVX-512 Population Count Instructions",
126                                      [FeatureAVX512]>;
127def FeaturePREFETCHI  : SubtargetFeature<"prefetchi", "HasPREFETCHI",
128                                   "true",
129                                   "Prefetch instruction with T0 or T1 Hint">;
130def FeatureDQI     : SubtargetFeature<"avx512dq", "HasDQI", "true",
131                      "Enable AVX-512 Doubleword and Quadword Instructions",
132                                      [FeatureAVX512]>;
133def FeatureBWI     : SubtargetFeature<"avx512bw", "HasBWI", "true",
134                      "Enable AVX-512 Byte and Word Instructions",
135                                      [FeatureAVX512]>;
136def FeatureVLX     : SubtargetFeature<"avx512vl", "HasVLX", "true",
137                      "Enable AVX-512 Vector Length eXtensions",
138                                      [FeatureAVX512]>;
139def FeatureVBMI     : SubtargetFeature<"avx512vbmi", "HasVBMI", "true",
140                      "Enable AVX-512 Vector Byte Manipulation Instructions",
141                                      [FeatureBWI]>;
142def FeatureVBMI2    : SubtargetFeature<"avx512vbmi2", "HasVBMI2", "true",
143                      "Enable AVX-512 further Vector Byte Manipulation Instructions",
144                                      [FeatureBWI]>;
145def FeatureAVXIFMA    : SubtargetFeature<"avxifma", "HasAVXIFMA", "true",
146                           "Enable AVX-IFMA",
147                           [FeatureAVX2]>;
148def FeatureIFMA     : SubtargetFeature<"avx512ifma", "HasIFMA", "true",
149                      "Enable AVX-512 Integer Fused Multiple-Add",
150                                      [FeatureAVX512]>;
151def FeaturePKU   : SubtargetFeature<"pku", "HasPKU", "true",
152                      "Enable protection keys">;
153def FeatureVNNI    : SubtargetFeature<"avx512vnni", "HasVNNI", "true",
154                          "Enable AVX-512 Vector Neural Network Instructions",
155                                      [FeatureAVX512]>;
156def FeatureAVXVNNI    : SubtargetFeature<"avxvnni", "HasAVXVNNI", "true",
157                           "Support AVX_VNNI encoding",
158                                      [FeatureAVX2]>;
159def FeatureBF16    : SubtargetFeature<"avx512bf16", "HasBF16", "true",
160                           "Support bfloat16 floating point",
161                                      [FeatureBWI]>;
162def FeatureBITALG  : SubtargetFeature<"avx512bitalg", "HasBITALG", "true",
163                       "Enable AVX-512 Bit Algorithms",
164                        [FeatureBWI]>;
165def FeatureVP2INTERSECT  : SubtargetFeature<"avx512vp2intersect",
166                                            "HasVP2INTERSECT", "true",
167                                            "Enable AVX-512 vp2intersect",
168                                            [FeatureAVX512]>;
169// FIXME: FP16 scalar intrinsics use the type v8f16, which is supposed to be
170// guarded under condition hasVLX. So we imply it in FeatureFP16 currently.
171// FIXME: FP16 conversion between f16 and i64 customize type v8i64, which is
172// supposed to be guarded under condition hasDQI. So we imply it in FeatureFP16
173// currently.
174def FeatureFP16    : SubtargetFeature<"avx512fp16", "HasFP16", "true",
175                           "Support 16-bit floating point",
176                           [FeatureBWI, FeatureVLX, FeatureDQI]>;
177def FeatureAVXVNNIINT8  : SubtargetFeature<"avxvnniint8",
178                             "HasAVXVNNIINT8", "true",
179                             "Enable AVX-VNNI-INT8",
180                             [FeatureAVX2]>;
181def FeatureAVXVNNIINT16 : SubtargetFeature<"avxvnniint16",
182                             "HasAVXVNNIINT16", "true",
183                             "Enable AVX-VNNI-INT16",
184                             [FeatureAVX2]>;
185def FeaturePCLMUL  : SubtargetFeature<"pclmul", "HasPCLMUL", "true",
186                         "Enable packed carry-less multiplication instructions",
187                               [FeatureSSE2]>;
188def FeatureGFNI    : SubtargetFeature<"gfni", "HasGFNI", "true",
189                         "Enable Galois Field Arithmetic Instructions",
190                               [FeatureSSE2]>;
191def FeatureVPCLMULQDQ : SubtargetFeature<"vpclmulqdq", "HasVPCLMULQDQ", "true",
192                                         "Enable vpclmulqdq instructions",
193                                         [FeatureAVX, FeaturePCLMUL]>;
194def FeatureFMA4    : SubtargetFeature<"fma4", "HasFMA4", "true",
195                                      "Enable four-operand fused multiple-add",
196                                      [FeatureAVX, FeatureSSE4A]>;
197def FeatureXOP     : SubtargetFeature<"xop", "HasXOP", "true",
198                                      "Enable XOP instructions",
199                                      [FeatureFMA4]>;
200def FeatureSSEUnalignedMem : SubtargetFeature<"sse-unaligned-mem",
201                                          "HasSSEUnalignedMem", "true",
202                      "Allow unaligned memory operands with SSE instructions (this may require setting a configuration bit in the processor)">;
203def FeatureAES     : SubtargetFeature<"aes", "HasAES", "true",
204                                      "Enable AES instructions",
205                                      [FeatureSSE2]>;
206def FeatureVAES    : SubtargetFeature<"vaes", "HasVAES", "true",
207                       "Promote selected AES instructions to AVX512/AVX registers",
208                        [FeatureAVX2, FeatureAES]>;
209def FeatureTBM     : SubtargetFeature<"tbm", "HasTBM", "true",
210                                      "Enable TBM instructions">;
211def FeatureLWP     : SubtargetFeature<"lwp", "HasLWP", "true",
212                                      "Enable LWP instructions">;
213def FeatureMOVBE   : SubtargetFeature<"movbe", "HasMOVBE", "true",
214                                      "Support MOVBE instruction">;
215def FeatureRDRAND  : SubtargetFeature<"rdrnd", "HasRDRAND", "true",
216                                      "Support RDRAND instruction">;
217def FeatureFSGSBase : SubtargetFeature<"fsgsbase", "HasFSGSBase", "true",
218                                       "Support FS/GS Base instructions">;
219def FeatureLZCNT   : SubtargetFeature<"lzcnt", "HasLZCNT", "true",
220                                      "Support LZCNT instruction">;
221def FeatureBMI     : SubtargetFeature<"bmi", "HasBMI", "true",
222                                      "Support BMI instructions">;
223def FeatureBMI2    : SubtargetFeature<"bmi2", "HasBMI2", "true",
224                                      "Support BMI2 instructions">;
225def FeatureRTM     : SubtargetFeature<"rtm", "HasRTM", "true",
226                                      "Support RTM instructions">;
227def FeatureADX     : SubtargetFeature<"adx", "HasADX", "true",
228                                      "Support ADX instructions">;
229def FeatureSHA     : SubtargetFeature<"sha", "HasSHA", "true",
230                                      "Enable SHA instructions",
231                                      [FeatureSSE2]>;
232def FeatureSHA512  : SubtargetFeature<"sha512", "HasSHA512", "true",
233                                      "Support SHA512 instructions",
234                                      [FeatureAVX2]>;
235// Processor supports CET SHSTK - Control-Flow Enforcement Technology
236// using Shadow Stack
237def FeatureSHSTK   : SubtargetFeature<"shstk", "HasSHSTK", "true",
238                       "Support CET Shadow-Stack instructions">;
239def FeatureSM3     : SubtargetFeature<"sm3", "HasSM3", "true",
240                                      "Support SM3 instructions",
241                                      [FeatureAVX]>;
242def FeatureSM4     : SubtargetFeature<"sm4", "HasSM4", "true",
243                                      "Support SM4 instructions",
244                                      [FeatureAVX2]>;
245def FeaturePRFCHW  : SubtargetFeature<"prfchw", "HasPRFCHW", "true",
246                                      "Support PRFCHW instructions">;
247def FeatureRDSEED  : SubtargetFeature<"rdseed", "HasRDSEED", "true",
248                                      "Support RDSEED instruction">;
249def FeatureLAHFSAHF64 : SubtargetFeature<"sahf", "HasLAHFSAHF64", "true",
250                           "Support LAHF and SAHF instructions in 64-bit mode">;
251def FeatureMWAITX  : SubtargetFeature<"mwaitx", "HasMWAITX", "true",
252                                      "Enable MONITORX/MWAITX timer functionality">;
253def FeatureCLZERO  : SubtargetFeature<"clzero", "HasCLZERO", "true",
254                                      "Enable Cache Line Zero">;
255def FeatureCLDEMOTE  : SubtargetFeature<"cldemote", "HasCLDEMOTE", "true",
256                                      "Enable Cache Line Demote">;
257def FeaturePTWRITE  : SubtargetFeature<"ptwrite", "HasPTWRITE", "true",
258                                      "Support ptwrite instruction">;
259def FeatureAMXTILE     : SubtargetFeature<"amx-tile", "HasAMXTILE", "true",
260                                      "Support AMX-TILE instructions">;
261def FeatureAMXINT8     : SubtargetFeature<"amx-int8", "HasAMXINT8", "true",
262                                      "Support AMX-INT8 instructions",
263                                      [FeatureAMXTILE]>;
264def FeatureAMXBF16     : SubtargetFeature<"amx-bf16", "HasAMXBF16", "true",
265                                      "Support AMX-BF16 instructions",
266                                      [FeatureAMXTILE]>;
267def FeatureAMXFP16     : SubtargetFeature<"amx-fp16", "HasAMXFP16", "true",
268                                      "Support AMX amx-fp16 instructions",
269                                      [FeatureAMXTILE]>;
270def FeatureAMXCOMPLEX : SubtargetFeature<"amx-complex", "HasAMXCOMPLEX", "true",
271                                         "Support AMX-COMPLEX instructions",
272                                         [FeatureAMXTILE]>;
273def FeatureCMPCCXADD : SubtargetFeature<"cmpccxadd", "HasCMPCCXADD", "true",
274                                        "Support CMPCCXADD instructions">;
275def FeatureRAOINT : SubtargetFeature<"raoint", "HasRAOINT", "true",
276                                     "Support RAO-INT instructions",
277                                     []>;
278def FeatureAVXNECONVERT : SubtargetFeature<"avxneconvert", "HasAVXNECONVERT", "true",
279                                           "Support AVX-NE-CONVERT instructions",
280                                           [FeatureAVX2]>;
281def FeatureINVPCID : SubtargetFeature<"invpcid", "HasINVPCID", "true",
282                                      "Invalidate Process-Context Identifier">;
283def FeatureSGX     : SubtargetFeature<"sgx", "HasSGX", "true",
284                                      "Enable Software Guard Extensions">;
285def FeatureCLFLUSHOPT : SubtargetFeature<"clflushopt", "HasCLFLUSHOPT", "true",
286                                      "Flush A Cache Line Optimized">;
287def FeatureCLWB    : SubtargetFeature<"clwb", "HasCLWB", "true",
288                                      "Cache Line Write Back">;
289def FeatureWBNOINVD    : SubtargetFeature<"wbnoinvd", "HasWBNOINVD", "true",
290                                      "Write Back No Invalidate">;
291def FeatureRDPID : SubtargetFeature<"rdpid", "HasRDPID", "true",
292                                    "Support RDPID instructions">;
293def FeatureRDPRU : SubtargetFeature<"rdpru", "HasRDPRU", "true",
294                                    "Support RDPRU instructions">;
295def FeatureWAITPKG  : SubtargetFeature<"waitpkg", "HasWAITPKG", "true",
296                                      "Wait and pause enhancements">;
297def FeatureENQCMD : SubtargetFeature<"enqcmd", "HasENQCMD", "true",
298                                     "Has ENQCMD instructions">;
299def FeatureKL  : SubtargetFeature<"kl", "HasKL", "true",
300                                  "Support Key Locker kl Instructions",
301                                  [FeatureSSE2]>;
302def FeatureWIDEKL  : SubtargetFeature<"widekl", "HasWIDEKL", "true",
303                                      "Support Key Locker wide Instructions",
304                                      [FeatureKL]>;
305def FeatureHRESET : SubtargetFeature<"hreset", "HasHRESET", "true",
306                                      "Has hreset instruction">;
307def FeatureSERIALIZE : SubtargetFeature<"serialize", "HasSERIALIZE", "true",
308                                        "Has serialize instruction">;
309def FeatureTSXLDTRK : SubtargetFeature<"tsxldtrk", "HasTSXLDTRK", "true",
310                                       "Support TSXLDTRK instructions">;
311def FeatureUINTR : SubtargetFeature<"uintr", "HasUINTR", "true",
312                                    "Has UINTR Instructions">;
313def FeatureUSERMSR : SubtargetFeature<"usermsr", "HasUSERMSR", "true",
314                                      "Support USERMSR instructions">;
315def FeaturePCONFIG : SubtargetFeature<"pconfig", "HasPCONFIG", "true",
316                                      "platform configuration instruction">;
317def FeatureMOVDIRI  : SubtargetFeature<"movdiri", "HasMOVDIRI", "true",
318                                       "Support movdiri instruction (direct store integer)">;
319def FeatureMOVDIR64B : SubtargetFeature<"movdir64b", "HasMOVDIR64B", "true",
320                                        "Support movdir64b instruction (direct store 64 bytes)">;
321def FeatureAVX10_1 : SubtargetFeature<"avx10.1-256", "HasAVX10_1", "true",
322                                      "Support AVX10.1 up to 256-bit instruction",
323                                      [FeatureCDI, FeatureVBMI, FeatureIFMA, FeatureVNNI,
324                                       FeatureBF16, FeatureVPOPCNTDQ, FeatureVBMI2, FeatureBITALG,
325                                       FeatureVAES, FeatureVPCLMULQDQ, FeatureFP16]>;
326def FeatureAVX10_1_512 : SubtargetFeature<"avx10.1-512", "HasAVX10_1_512", "true",
327                                          "Support AVX10.1 up to 512-bit instruction",
328                                          [FeatureAVX10_1, FeatureEVEX512]>;
329def FeatureEGPR : SubtargetFeature<"egpr", "HasEGPR", "true",
330                                   "Support extended general purpose register">;
331def FeaturePush2Pop2 : SubtargetFeature<"push2pop2", "HasPush2Pop2", "true",
332                                        "Support PUSH2/POP2 instructions">;
333def FeaturePPX : SubtargetFeature<"ppx", "HasPPX", "true",
334                                  "Support Push-Pop Acceleration">;
335def FeatureNDD : SubtargetFeature<"ndd", "HasNDD", "true",
336                                  "Support non-destructive destination">;
337def FeatureCCMP : SubtargetFeature<"ccmp", "HasCCMP", "true",
338                                   "Support conditional cmp & test instructions">;
339def FeatureNF : SubtargetFeature<"nf", "HasNF", "true",
340                                 "Support status flags update suppression">;
341def FeatureCF : SubtargetFeature<"cf", "HasCF", "true",
342                                 "Support conditional faulting">;
343def FeatureZU : SubtargetFeature<"zu", "HasZU", "true",
344                                 "Support zero-upper SETcc/IMUL">;
345def FeatureUseGPR32InInlineAsm
346    : SubtargetFeature<"inline-asm-use-gpr32", "UseInlineAsmGPR32", "true",
347                       "Enable use of GPR32 in inline assembly for APX">;
348
349// Ivy Bridge and newer processors have enhanced REP MOVSB and STOSB (aka
350// "string operations"). See "REP String Enhancement" in the Intel Software
351// Development Manual. This feature essentially means that REP MOVSB will copy
352// using the largest available size instead of copying bytes one by one, making
353// it at least as fast as REPMOVS{W,D,Q}.
354def FeatureERMSB
355    : SubtargetFeature<
356          "ermsb", "HasERMSB", "true",
357          "REP MOVS/STOS are fast">;
358
359// Icelake and newer processors have Fast Short REP MOV.
360def FeatureFSRM
361    : SubtargetFeature<
362          "fsrm", "HasFSRM", "true",
363          "REP MOVSB of short lengths is faster">;
364
365def FeatureSoftFloat
366    : SubtargetFeature<"soft-float", "UseSoftFloat", "true",
367                       "Use software floating point features">;
368
369//===----------------------------------------------------------------------===//
370// X86 Subtarget Security Mitigation features
371//===----------------------------------------------------------------------===//
372
373// Lower indirect calls using a special construct called a `retpoline` to
374// mitigate potential Spectre v2 attacks against them.
375def FeatureRetpolineIndirectCalls
376    : SubtargetFeature<
377          "retpoline-indirect-calls", "UseRetpolineIndirectCalls", "true",
378          "Remove speculation of indirect calls from the generated code">;
379
380// Lower indirect branches and switches either using conditional branch trees
381// or using a special construct called a `retpoline` to mitigate potential
382// Spectre v2 attacks against them.
383def FeatureRetpolineIndirectBranches
384    : SubtargetFeature<
385          "retpoline-indirect-branches", "UseRetpolineIndirectBranches", "true",
386          "Remove speculation of indirect branches from the generated code">;
387
388// Deprecated umbrella feature for enabling both `retpoline-indirect-calls` and
389// `retpoline-indirect-branches` above.
390def FeatureRetpoline
391    : SubtargetFeature<"retpoline", "DeprecatedUseRetpoline", "true",
392                       "Remove speculation of indirect branches from the "
393                       "generated code, either by avoiding them entirely or "
394                       "lowering them with a speculation blocking construct",
395                       [FeatureRetpolineIndirectCalls,
396                        FeatureRetpolineIndirectBranches]>;
397
398// Rely on external thunks for the emitted retpoline calls. This allows users
399// to provide their own custom thunk definitions in highly specialized
400// environments such as a kernel that does boot-time hot patching.
401def FeatureRetpolineExternalThunk
402    : SubtargetFeature<
403          "retpoline-external-thunk", "UseRetpolineExternalThunk", "true",
404          "When lowering an indirect call or branch using a `retpoline`, rely "
405          "on the specified user provided thunk rather than emitting one "
406          "ourselves. Only has effect when combined with some other retpoline "
407          "feature", [FeatureRetpolineIndirectCalls]>;
408
409// Mitigate LVI attacks against indirect calls/branches and call returns
410def FeatureLVIControlFlowIntegrity
411    : SubtargetFeature<
412          "lvi-cfi", "UseLVIControlFlowIntegrity", "true",
413          "Prevent indirect calls/branches from using a memory operand, and "
414          "precede all indirect calls/branches from a register with an "
415          "LFENCE instruction to serialize control flow. Also decompose RET "
416          "instructions into a POP+LFENCE+JMP sequence.">;
417
418// Enable SESES to mitigate speculative execution attacks
419def FeatureSpeculativeExecutionSideEffectSuppression
420    : SubtargetFeature<
421          "seses", "UseSpeculativeExecutionSideEffectSuppression", "true",
422          "Prevent speculative execution side channel timing attacks by "
423          "inserting a speculation barrier before memory reads, memory writes, "
424          "and conditional branches. Implies LVI Control Flow integrity.",
425          [FeatureLVIControlFlowIntegrity]>;
426
427// Mitigate LVI attacks against data loads
428def FeatureLVILoadHardening
429    : SubtargetFeature<
430          "lvi-load-hardening", "UseLVILoadHardening", "true",
431          "Insert LFENCE instructions to prevent data speculatively injected "
432          "into loads from being used maliciously.">;
433
434def FeatureTaggedGlobals
435    : SubtargetFeature<
436          "tagged-globals", "AllowTaggedGlobals", "true",
437          "Use an instruction sequence for taking the address of a global "
438          "that allows a memory tag in the upper address bits.">;
439
440// Control codegen mitigation against Straight Line Speculation vulnerability.
441def FeatureHardenSlsRet
442    : SubtargetFeature<
443          "harden-sls-ret", "HardenSlsRet", "true",
444          "Harden against straight line speculation across RET instructions.">;
445
446def FeatureHardenSlsIJmp
447    : SubtargetFeature<
448          "harden-sls-ijmp", "HardenSlsIJmp", "true",
449          "Harden against straight line speculation across indirect JMP instructions.">;
450
451//===----------------------------------------------------------------------===//
452// X86 Subtarget Tuning features
453//===----------------------------------------------------------------------===//
454def TuningPreferMovmskOverVTest : SubtargetFeature<"prefer-movmsk-over-vtest",
455                                       "PreferMovmskOverVTest", "true",
456                                       "Prefer movmsk over vtest instruction">;
457
458def TuningSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true",
459                                       "SHLD instruction is slow">;
460
461def TuningSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true",
462                                        "PMULLD instruction is slow (compared to PMULLW/PMULHW and PMULUDQ)">;
463
464def TuningSlowPMADDWD : SubtargetFeature<"slow-pmaddwd", "IsPMADDWDSlow",
465                                          "true",
466                                          "PMADDWD is slower than PMULLD">;
467
468// FIXME: This should not apply to CPUs that do not have SSE.
469def TuningSlowUAMem16 : SubtargetFeature<"slow-unaligned-mem-16",
470                                "IsUnalignedMem16Slow", "true",
471                                "Slow unaligned 16-byte memory access">;
472
473def TuningSlowUAMem32 : SubtargetFeature<"slow-unaligned-mem-32",
474                                "IsUnalignedMem32Slow", "true",
475                                "Slow unaligned 32-byte memory access">;
476
477def TuningLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
478                                     "Use LEA for adjusting the stack pointer (this is an optimization for Intel Atom processors)">;
479
480// True if 8-bit divisions are significantly faster than
481// 32-bit divisions and should be used when possible.
482def TuningSlowDivide32 : SubtargetFeature<"idivl-to-divb",
483                                     "HasSlowDivide32", "true",
484                                     "Use 8-bit divide for positive values less than 256">;
485
486// True if 32-bit divides are significantly faster than
487// 64-bit divisions and should be used when possible.
488def TuningSlowDivide64 : SubtargetFeature<"idivq-to-divl",
489                                     "HasSlowDivide64", "true",
490                                     "Use 32-bit divide for positive values less than 2^32">;
491
492def TuningPadShortFunctions : SubtargetFeature<"pad-short-functions",
493                                     "PadShortFunctions", "true",
494                                     "Pad short functions (to prevent a stall when returning too early)">;
495
496// On some processors, instructions that implicitly take two memory operands are
497// slow. In practice, this means that CALL, PUSH, and POP with memory operands
498// should be avoided in favor of a MOV + register CALL/PUSH/POP.
499def TuningSlowTwoMemOps : SubtargetFeature<"slow-two-mem-ops",
500                                     "SlowTwoMemOps", "true",
501                                     "Two memory operand instructions are slow">;
502
503// True if the LEA instruction inputs have to be ready at address generation
504// (AG) time.
505def TuningLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LeaUsesAG", "true",
506                                   "LEA instruction needs inputs at AG stage">;
507
508def TuningSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true",
509                                   "LEA instruction with certain arguments is slow">;
510
511// True if the LEA instruction has all three source operands: base, index,
512// and offset or if the LEA instruction uses base and index registers where
513// the base is EBP, RBP,or R13
514def TuningSlow3OpsLEA : SubtargetFeature<"slow-3ops-lea", "Slow3OpsLEA", "true",
515                                   "LEA instruction with 3 ops or certain registers is slow">;
516
517// True if INC and DEC instructions are slow when writing to flags
518def TuningSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true",
519                                   "INC and DEC instructions are slower than ADD and SUB">;
520
521def TuningPOPCNTFalseDeps : SubtargetFeature<"false-deps-popcnt",
522                                     "HasPOPCNTFalseDeps", "true",
523                                     "POPCNT has a false dependency on dest register">;
524
525def TuningLZCNTFalseDeps : SubtargetFeature<"false-deps-lzcnt-tzcnt",
526                                     "HasLZCNTFalseDeps", "true",
527                                     "LZCNT/TZCNT have a false dependency on dest register">;
528
529def TuningMULCFalseDeps : SubtargetFeature<"false-deps-mulc",
530                               "HasMULCFalseDeps", "true",
531                               "VF[C]MULCPH/SH has a false dependency on dest register">;
532
533def TuningPERMFalseDeps : SubtargetFeature<"false-deps-perm",
534                               "HasPERMFalseDeps", "true",
535                               "VPERMD/Q/PS/PD has a false dependency on dest register">;
536
537def TuningRANGEFalseDeps : SubtargetFeature<"false-deps-range",
538                               "HasRANGEFalseDeps", "true",
539                               "VRANGEPD/PS/SD/SS has a false dependency on dest register">;
540
541def TuningGETMANTFalseDeps : SubtargetFeature<"false-deps-getmant",
542                               "HasGETMANTFalseDeps", "true",
543                               "VGETMANTSS/SD/SH and VGETMANDPS/PD(memory version) has a"
544                               " false dependency on dest register">;
545
546def TuningMULLQFalseDeps : SubtargetFeature<"false-deps-mullq",
547                               "HasMULLQFalseDeps", "true",
548                               "VPMULLQ has a false dependency on dest register">;
549
550def TuningSBBDepBreaking : SubtargetFeature<"sbb-dep-breaking",
551                                     "HasSBBDepBreaking", "true",
552                                     "SBB with same register has no source dependency">;
553
554// On recent X86 (port bound) processors, its preferable to combine to a single shuffle
555// using a variable mask over multiple fixed shuffles.
556def TuningFastVariableCrossLaneShuffle
557    : SubtargetFeature<"fast-variable-crosslane-shuffle",
558                       "HasFastVariableCrossLaneShuffle",
559                       "true", "Cross-lane shuffles with variable masks are fast">;
560def TuningFastVariablePerLaneShuffle
561    : SubtargetFeature<"fast-variable-perlane-shuffle",
562                       "HasFastVariablePerLaneShuffle",
563                       "true", "Per-lane shuffles with variable masks are fast">;
564
565// Goldmont / Tremont (atom in general) has no bypass delay
566def TuningNoDomainDelay : SubtargetFeature<"no-bypass-delay",
567                                   "NoDomainDelay","true",
568                                   "Has no bypass delay when using the 'wrong' domain">;
569
570// Many processors (Nehalem+ on Intel) have no bypass delay when
571// using the wrong mov type.
572def TuningNoDomainDelayMov : SubtargetFeature<"no-bypass-delay-mov",
573                                   "NoDomainDelayMov","true",
574                                   "Has no bypass delay when using the 'wrong' mov type">;
575
576// Newer processors (Skylake+ on Intel) have no bypass delay when
577// using the wrong blend type.
578def TuningNoDomainDelayBlend : SubtargetFeature<"no-bypass-delay-blend",
579                                   "NoDomainDelayBlend","true",
580                                   "Has no bypass delay when using the 'wrong' blend type">;
581
582// Newer processors (Haswell+ on Intel) have no bypass delay when
583// using the wrong shuffle type.
584def TuningNoDomainDelayShuffle : SubtargetFeature<"no-bypass-delay-shuffle",
585                                   "NoDomainDelayShuffle","true",
586                                   "Has no bypass delay when using the 'wrong' shuffle type">;
587
588// Prefer lowering shuffles on AVX512 targets (e.g. Skylake Server) to
589// imm shifts/rotate if they can use more ports than regular shuffles.
590def TuningPreferShiftShuffle : SubtargetFeature<"faster-shift-than-shuffle",
591                                   "PreferLowerShuffleAsShift", "true",
592                                   "Shifts are faster (or as fast) as shuffle">;
593
594def TuningFastImmVectorShift : SubtargetFeature<"tuning-fast-imm-vector-shift",
595                                   "FastImmVectorShift", "true",
596                                   "Vector shifts are fast (2/cycle) as opposed to slow (1/cycle)">;
597
598// On some X86 processors, a vzeroupper instruction should be inserted after
599// using ymm/zmm registers before executing code that may use SSE instructions.
600def TuningInsertVZEROUPPER
601    : SubtargetFeature<"vzeroupper",
602                       "InsertVZEROUPPER",
603                       "true", "Should insert vzeroupper instructions">;
604
605// TuningFastScalarFSQRT should be enabled if scalar FSQRT has shorter latency
606// than the corresponding NR code. TuningFastVectorFSQRT should be enabled if
607// vector FSQRT has higher throughput than the corresponding NR code.
608// The idea is that throughput bound code is likely to be vectorized, so for
609// vectorized code we should care about the throughput of SQRT operations.
610// But if the code is scalar that probably means that the code has some kind of
611// dependency and we should care more about reducing the latency.
612
613// True if hardware SQRTSS instruction is at least as fast (latency) as
614// RSQRTSS followed by a Newton-Raphson iteration.
615def TuningFastScalarFSQRT
616    : SubtargetFeature<"fast-scalar-fsqrt", "HasFastScalarFSQRT",
617                       "true", "Scalar SQRT is fast (disable Newton-Raphson)">;
618// True if hardware SQRTPS/VSQRTPS instructions are at least as fast
619// (throughput) as RSQRTPS/VRSQRTPS followed by a Newton-Raphson iteration.
620def TuningFastVectorFSQRT
621    : SubtargetFeature<"fast-vector-fsqrt", "HasFastVectorFSQRT",
622                       "true", "Vector SQRT is fast (disable Newton-Raphson)">;
623
624// If lzcnt has equivalent latency/throughput to most simple integer ops, it can
625// be used to replace test/set sequences.
626def TuningFastLZCNT
627    : SubtargetFeature<
628          "fast-lzcnt", "HasFastLZCNT", "true",
629          "LZCNT instructions are as fast as most simple integer ops">;
630
631// If the target can efficiently decode NOPs upto 7-bytes in length.
632def TuningFast7ByteNOP
633    : SubtargetFeature<
634          "fast-7bytenop", "HasFast7ByteNOP", "true",
635          "Target can quickly decode up to 7 byte NOPs">;
636
637// If the target can efficiently decode NOPs upto 11-bytes in length.
638def TuningFast11ByteNOP
639    : SubtargetFeature<
640          "fast-11bytenop", "HasFast11ByteNOP", "true",
641          "Target can quickly decode up to 11 byte NOPs">;
642
643// If the target can efficiently decode NOPs upto 15-bytes in length.
644def TuningFast15ByteNOP
645    : SubtargetFeature<
646          "fast-15bytenop", "HasFast15ByteNOP", "true",
647          "Target can quickly decode up to 15 byte NOPs">;
648
649// Sandy Bridge and newer processors can use SHLD with the same source on both
650// inputs to implement rotate to avoid the partial flag update of the normal
651// rotate instructions.
652def TuningFastSHLDRotate
653    : SubtargetFeature<
654          "fast-shld-rotate", "HasFastSHLDRotate", "true",
655          "SHLD can be used as a faster rotate">;
656
657// Bulldozer and newer processors can merge CMP/TEST (but not other
658// instructions) with conditional branches.
659def TuningBranchFusion
660    : SubtargetFeature<"branchfusion", "HasBranchFusion", "true",
661                 "CMP/TEST can be fused with conditional branches">;
662
663// Sandy Bridge and newer processors have many instructions that can be
664// fused with conditional branches and pass through the CPU as a single
665// operation.
666def TuningMacroFusion
667    : SubtargetFeature<"macrofusion", "HasMacroFusion", "true",
668                 "Various instructions can be fused with conditional branches">;
669
670// Gather is available since Haswell (AVX2 set). So technically, we can
671// generate Gathers on all AVX2 processors. But the overhead on HSW is high.
672// Skylake Client processor has faster Gathers than HSW and performance is
673// similar to Skylake Server (AVX-512).
674def TuningFastGather
675    : SubtargetFeature<"fast-gather", "HasFastGather", "true",
676                       "Indicates if gather is reasonably fast (this is true for Skylake client and all AVX-512 CPUs)">;
677
678// Generate vpdpwssd instead of vpmaddwd+vpaddd sequence.
679def TuningFastDPWSSD
680    : SubtargetFeature<
681          "fast-dpwssd", "HasFastDPWSSD", "true",
682          "Prefer vpdpwssd instruction over vpmaddwd+vpaddd instruction sequence">;
683
684def TuningPreferNoGather
685    : SubtargetFeature<"prefer-no-gather", "PreferGather", "false",
686                       "Prefer no gather instructions">;
687def TuningPreferNoScatter
688    : SubtargetFeature<"prefer-no-scatter", "PreferScatter", "false",
689                       "Prefer no scatter instructions">;
690
691def TuningPrefer128Bit
692    : SubtargetFeature<"prefer-128-bit", "Prefer128Bit", "true",
693                       "Prefer 128-bit AVX instructions">;
694
695def TuningPrefer256Bit
696    : SubtargetFeature<"prefer-256-bit", "Prefer256Bit", "true",
697                       "Prefer 256-bit AVX instructions">;
698
699def TuningAllowLight256Bit
700    : SubtargetFeature<"allow-light-256-bit", "AllowLight256Bit", "true",
701                       "Enable generation of 256-bit load/stores even if we prefer 128-bit">;
702
703def TuningPreferMaskRegisters
704    : SubtargetFeature<"prefer-mask-registers", "PreferMaskRegisters", "true",
705                       "Prefer AVX512 mask registers over PTEST/MOVMSK">;
706
707def TuningFastBEXTR : SubtargetFeature<"fast-bextr", "HasFastBEXTR", "true",
708          "Indicates that the BEXTR instruction is implemented as a single uop "
709          "with good throughput">;
710
711// Combine vector math operations with shuffles into horizontal math
712// instructions if a CPU implements horizontal operations (introduced with
713// SSE3) with better latency/throughput than the alternative sequence.
714def TuningFastHorizontalOps
715    : SubtargetFeature<
716        "fast-hops", "HasFastHorizontalOps", "true",
717        "Prefer horizontal vector math instructions (haddp, phsub, etc.) over "
718        "normal vector instructions with shuffles">;
719
720def TuningFastScalarShiftMasks
721    : SubtargetFeature<
722        "fast-scalar-shift-masks", "HasFastScalarShiftMasks", "true",
723        "Prefer a left/right scalar logical shift pair over a shift+and pair">;
724
725def TuningFastVectorShiftMasks
726    : SubtargetFeature<
727        "fast-vector-shift-masks", "HasFastVectorShiftMasks", "true",
728        "Prefer a left/right vector logical shift pair over a shift+and pair">;
729
730def TuningFastMOVBE
731    : SubtargetFeature<"fast-movbe", "HasFastMOVBE", "true",
732    "Prefer a movbe over a single-use load + bswap / single-use bswap + store">;
733
734def TuningFastImm16
735    : SubtargetFeature<"fast-imm16", "HasFastImm16", "true",
736    "Prefer a i16 instruction with i16 immediate over extension to i32">;
737
738def TuningUseSLMArithCosts
739    : SubtargetFeature<"use-slm-arith-costs", "UseSLMArithCosts", "true",
740        "Use Silvermont specific arithmetic costs">;
741
742def TuningUseGLMDivSqrtCosts
743    : SubtargetFeature<"use-glm-div-sqrt-costs", "UseGLMDivSqrtCosts", "true",
744        "Use Goldmont specific floating point div/sqrt costs">;
745
746// Starting with Redwood Cove architecture, the branch has branch taken hint
747// (i.e., instruction prefix 3EH).
748def TuningBranchHint: SubtargetFeature<"branch-hint", "HasBranchHint", "true",
749                                        "Target has branch hint feature">;
750
751//===----------------------------------------------------------------------===//
752// X86 CPU Families
753// TODO: Remove these - use general tuning features to determine codegen.
754//===----------------------------------------------------------------------===//
755
756// Bonnell
757def ProcIntelAtom : SubtargetFeature<"", "IsAtom", "true", "Is Intel Atom processor">;
758
759//===----------------------------------------------------------------------===//
760// Register File Description
761//===----------------------------------------------------------------------===//
762
763include "X86RegisterInfo.td"
764include "X86RegisterBanks.td"
765
766//===----------------------------------------------------------------------===//
767// Instruction Descriptions
768//===----------------------------------------------------------------------===//
769
770include "X86Schedule.td"
771include "X86InstrInfo.td"
772include "X86SchedPredicates.td"
773
774def X86InstrInfo : InstrInfo;
775
776//===----------------------------------------------------------------------===//
777// X86 Scheduler Models
778//===----------------------------------------------------------------------===//
779
780include "X86ScheduleAtom.td"
781include "X86SchedSandyBridge.td"
782include "X86SchedHaswell.td"
783include "X86SchedBroadwell.td"
784include "X86ScheduleSLM.td"
785include "X86ScheduleZnver1.td"
786include "X86ScheduleZnver2.td"
787include "X86ScheduleZnver3.td"
788include "X86ScheduleZnver4.td"
789include "X86ScheduleBdVer2.td"
790include "X86ScheduleBtVer2.td"
791include "X86SchedSkylakeClient.td"
792include "X86SchedSkylakeServer.td"
793include "X86SchedIceLake.td"
794include "X86SchedAlderlakeP.td"
795include "X86SchedSapphireRapids.td"
796
797//===----------------------------------------------------------------------===//
798// X86 Processor Feature Lists
799//===----------------------------------------------------------------------===//
800
801def ProcessorFeatures {
802  // x86-64 micro-architecture levels: x86-64 and x86-64-v[234]
803  list<SubtargetFeature> X86_64V1Features = [
804    FeatureX87, FeatureCX8, FeatureCMOV, FeatureMMX, FeatureSSE2,
805    FeatureFXSR, FeatureNOPL, FeatureX86_64,
806  ];
807  list<SubtargetFeature> X86_64V1Tuning = [
808    TuningMacroFusion,
809    TuningSlow3OpsLEA,
810    TuningSlowDivide64,
811    TuningSlowIncDec,
812    TuningInsertVZEROUPPER
813  ];
814
815  list<SubtargetFeature> X86_64V2Features = !listconcat(X86_64V1Features, [
816    FeatureCX16, FeatureLAHFSAHF64, FeatureCRC32, FeaturePOPCNT,
817    FeatureSSE42
818  ]);
819  list<SubtargetFeature> X86_64V2Tuning = [
820    TuningMacroFusion,
821    TuningSlow3OpsLEA,
822    TuningSlowDivide64,
823    TuningSlowUAMem32,
824    TuningFastScalarFSQRT,
825    TuningFastSHLDRotate,
826    TuningFast15ByteNOP,
827    TuningPOPCNTFalseDeps,
828    TuningInsertVZEROUPPER
829  ];
830
831  list<SubtargetFeature> X86_64V3Features = !listconcat(X86_64V2Features, [
832    FeatureAVX2, FeatureBMI, FeatureBMI2, FeatureF16C, FeatureFMA, FeatureLZCNT,
833    FeatureMOVBE, FeatureXSAVE
834  ]);
835  list<SubtargetFeature> X86_64V3Tuning = [
836    TuningMacroFusion,
837    TuningSlow3OpsLEA,
838    TuningSlowDivide64,
839    TuningFastScalarFSQRT,
840    TuningFastSHLDRotate,
841    TuningFast15ByteNOP,
842    TuningFastVariableCrossLaneShuffle,
843    TuningFastVariablePerLaneShuffle,
844    TuningPOPCNTFalseDeps,
845    TuningLZCNTFalseDeps,
846    TuningInsertVZEROUPPER,
847    TuningAllowLight256Bit
848  ];
849
850  list<SubtargetFeature> X86_64V4Features = !listconcat(X86_64V3Features, [
851    FeatureEVEX512,
852    FeatureBWI,
853    FeatureCDI,
854    FeatureDQI,
855    FeatureVLX,
856  ]);
857  list<SubtargetFeature> X86_64V4Tuning = [
858    TuningMacroFusion,
859    TuningSlow3OpsLEA,
860    TuningSlowDivide64,
861    TuningFastScalarFSQRT,
862    TuningFastVectorFSQRT,
863    TuningFastSHLDRotate,
864    TuningFast15ByteNOP,
865    TuningFastVariableCrossLaneShuffle,
866    TuningFastVariablePerLaneShuffle,
867    TuningPrefer256Bit,
868    TuningFastGather,
869    TuningPOPCNTFalseDeps,
870    TuningInsertVZEROUPPER,
871    TuningAllowLight256Bit
872  ];
873
874  // Nehalem
875  list<SubtargetFeature> NHMFeatures = X86_64V2Features;
876  list<SubtargetFeature> NHMTuning = [TuningMacroFusion,
877                                      TuningSlowDivide64,
878                                      TuningInsertVZEROUPPER,
879                                      TuningNoDomainDelayMov];
880
881  // Westmere
882  list<SubtargetFeature> WSMAdditionalFeatures = [FeaturePCLMUL];
883  list<SubtargetFeature> WSMTuning = NHMTuning;
884  list<SubtargetFeature> WSMFeatures =
885    !listconcat(NHMFeatures, WSMAdditionalFeatures);
886
887  // Sandybridge
888  list<SubtargetFeature> SNBAdditionalFeatures = [FeatureAVX,
889                                                  FeatureXSAVE,
890                                                  FeatureXSAVEOPT];
891  list<SubtargetFeature> SNBTuning = [TuningMacroFusion,
892                                      TuningSlow3OpsLEA,
893                                      TuningSlowDivide64,
894                                      TuningSlowUAMem32,
895                                      TuningFastScalarFSQRT,
896                                      TuningFastSHLDRotate,
897                                      TuningFast15ByteNOP,
898                                      TuningPOPCNTFalseDeps,
899                                      TuningInsertVZEROUPPER,
900                                      TuningNoDomainDelayMov];
901  list<SubtargetFeature> SNBFeatures =
902    !listconcat(WSMFeatures, SNBAdditionalFeatures);
903
904  // Ivybridge
905  list<SubtargetFeature> IVBAdditionalFeatures = [FeatureRDRAND,
906                                                  FeatureF16C,
907                                                  FeatureFSGSBase];
908  list<SubtargetFeature> IVBTuning = SNBTuning;
909  list<SubtargetFeature> IVBFeatures =
910    !listconcat(SNBFeatures, IVBAdditionalFeatures);
911
912  // Haswell
913  list<SubtargetFeature> HSWAdditionalFeatures = [FeatureAVX2,
914                                                  FeatureBMI,
915                                                  FeatureBMI2,
916                                                  FeatureERMSB,
917                                                  FeatureFMA,
918                                                  FeatureINVPCID,
919                                                  FeatureLZCNT,
920                                                  FeatureMOVBE];
921  list<SubtargetFeature> HSWTuning = [TuningMacroFusion,
922                                      TuningSlow3OpsLEA,
923                                      TuningSlowDivide64,
924                                      TuningFastScalarFSQRT,
925                                      TuningFastSHLDRotate,
926                                      TuningFast15ByteNOP,
927                                      TuningFastVariableCrossLaneShuffle,
928                                      TuningFastVariablePerLaneShuffle,
929                                      TuningPOPCNTFalseDeps,
930                                      TuningLZCNTFalseDeps,
931                                      TuningInsertVZEROUPPER,
932                                      TuningAllowLight256Bit,
933                                      TuningNoDomainDelayMov,
934                                      TuningNoDomainDelayShuffle];
935  list<SubtargetFeature> HSWFeatures =
936    !listconcat(IVBFeatures, HSWAdditionalFeatures);
937
938  // Broadwell
939  list<SubtargetFeature> BDWAdditionalFeatures = [FeatureADX,
940                                                  FeatureRDSEED,
941                                                  FeaturePRFCHW];
942  list<SubtargetFeature> BDWTuning = HSWTuning;
943  list<SubtargetFeature> BDWFeatures =
944    !listconcat(HSWFeatures, BDWAdditionalFeatures);
945
946  // Skylake
947  list<SubtargetFeature> SKLAdditionalFeatures = [FeatureAES,
948                                                  FeatureXSAVEC,
949                                                  FeatureXSAVES,
950                                                  FeatureCLFLUSHOPT];
951  list<SubtargetFeature> SKLTuning = [TuningFastGather,
952                                      TuningMacroFusion,
953                                      TuningSlow3OpsLEA,
954                                      TuningSlowDivide64,
955                                      TuningFastScalarFSQRT,
956                                      TuningFastVectorFSQRT,
957                                      TuningFastSHLDRotate,
958                                      TuningFast15ByteNOP,
959                                      TuningFastVariableCrossLaneShuffle,
960                                      TuningFastVariablePerLaneShuffle,
961                                      TuningPOPCNTFalseDeps,
962                                      TuningInsertVZEROUPPER,
963                                      TuningAllowLight256Bit,
964                                      TuningNoDomainDelayMov,
965                                      TuningNoDomainDelayShuffle,
966                                      TuningNoDomainDelayBlend];
967  list<SubtargetFeature> SKLFeatures =
968    !listconcat(BDWFeatures, SKLAdditionalFeatures);
969
970  // Skylake-AVX512
971  list<SubtargetFeature> SKXAdditionalFeatures = [FeatureAES,
972                                                  FeatureXSAVEC,
973                                                  FeatureXSAVES,
974                                                  FeatureCLFLUSHOPT,
975                                                  FeatureAVX512,
976                                                  FeatureEVEX512,
977                                                  FeatureCDI,
978                                                  FeatureDQI,
979                                                  FeatureBWI,
980                                                  FeatureVLX,
981                                                  FeaturePKU,
982                                                  FeatureCLWB];
983  list<SubtargetFeature> SKXTuning = [TuningFastGather,
984                                      TuningMacroFusion,
985                                      TuningSlow3OpsLEA,
986                                      TuningSlowDivide64,
987                                      TuningFastScalarFSQRT,
988                                      TuningFastVectorFSQRT,
989                                      TuningFastSHLDRotate,
990                                      TuningFast15ByteNOP,
991                                      TuningFastVariableCrossLaneShuffle,
992                                      TuningFastVariablePerLaneShuffle,
993                                      TuningPrefer256Bit,
994                                      TuningPOPCNTFalseDeps,
995                                      TuningInsertVZEROUPPER,
996                                      TuningAllowLight256Bit,
997                                      TuningPreferShiftShuffle,
998                                      TuningNoDomainDelayMov,
999                                      TuningNoDomainDelayShuffle,
1000                                      TuningNoDomainDelayBlend,
1001                                      TuningFastImmVectorShift];
1002  list<SubtargetFeature> SKXFeatures =
1003    !listconcat(BDWFeatures, SKXAdditionalFeatures);
1004
1005  // Cascadelake
1006  list<SubtargetFeature> CLXAdditionalFeatures = [FeatureVNNI];
1007  list<SubtargetFeature> CLXTuning = SKXTuning;
1008  list<SubtargetFeature> CLXFeatures =
1009    !listconcat(SKXFeatures, CLXAdditionalFeatures);
1010
1011  // Cooperlake
1012  list<SubtargetFeature> CPXAdditionalFeatures = [FeatureBF16];
1013  list<SubtargetFeature> CPXTuning = SKXTuning;
1014  list<SubtargetFeature> CPXFeatures =
1015    !listconcat(CLXFeatures, CPXAdditionalFeatures);
1016
1017  // Cannonlake
1018  list<SubtargetFeature> CNLAdditionalFeatures = [FeatureAVX512,
1019                                                  FeatureEVEX512,
1020                                                  FeatureCDI,
1021                                                  FeatureDQI,
1022                                                  FeatureBWI,
1023                                                  FeatureVLX,
1024                                                  FeaturePKU,
1025                                                  FeatureVBMI,
1026                                                  FeatureIFMA,
1027                                                  FeatureSHA];
1028  list<SubtargetFeature> CNLTuning = [TuningFastGather,
1029                                      TuningMacroFusion,
1030                                      TuningSlow3OpsLEA,
1031                                      TuningSlowDivide64,
1032                                      TuningFastScalarFSQRT,
1033                                      TuningFastVectorFSQRT,
1034                                      TuningFastSHLDRotate,
1035                                      TuningFast15ByteNOP,
1036                                      TuningFastVariableCrossLaneShuffle,
1037                                      TuningFastVariablePerLaneShuffle,
1038                                      TuningPrefer256Bit,
1039                                      TuningInsertVZEROUPPER,
1040                                      TuningAllowLight256Bit,
1041                                      TuningNoDomainDelayMov,
1042                                      TuningNoDomainDelayShuffle,
1043                                      TuningNoDomainDelayBlend,
1044                                      TuningFastImmVectorShift];
1045  list<SubtargetFeature> CNLFeatures =
1046    !listconcat(SKLFeatures, CNLAdditionalFeatures);
1047
1048  // Icelake
1049  list<SubtargetFeature> ICLAdditionalFeatures = [FeatureBITALG,
1050                                                  FeatureVAES,
1051                                                  FeatureVBMI2,
1052                                                  FeatureVNNI,
1053                                                  FeatureVPCLMULQDQ,
1054                                                  FeatureVPOPCNTDQ,
1055                                                  FeatureGFNI,
1056                                                  FeatureRDPID,
1057                                                  FeatureFSRM];
1058  list<SubtargetFeature> ICLTuning = [TuningFastGather,
1059                                      TuningMacroFusion,
1060                                      TuningSlowDivide64,
1061                                      TuningFastScalarFSQRT,
1062                                      TuningFastVectorFSQRT,
1063                                      TuningFastSHLDRotate,
1064                                      TuningFast15ByteNOP,
1065                                      TuningFastVariableCrossLaneShuffle,
1066                                      TuningFastVariablePerLaneShuffle,
1067                                      TuningPrefer256Bit,
1068                                      TuningInsertVZEROUPPER,
1069                                      TuningAllowLight256Bit,
1070                                      TuningNoDomainDelayMov,
1071                                      TuningNoDomainDelayShuffle,
1072                                      TuningNoDomainDelayBlend,
1073                                      TuningFastImmVectorShift];
1074  list<SubtargetFeature> ICLFeatures =
1075    !listconcat(CNLFeatures, ICLAdditionalFeatures);
1076
1077  // Icelake Server
1078  list<SubtargetFeature> ICXAdditionalFeatures = [FeaturePCONFIG,
1079                                                  FeatureCLWB,
1080                                                  FeatureWBNOINVD];
1081  list<SubtargetFeature> ICXTuning = ICLTuning;
1082  list<SubtargetFeature> ICXFeatures =
1083    !listconcat(ICLFeatures, ICXAdditionalFeatures);
1084
1085  // Tigerlake
1086  list<SubtargetFeature> TGLAdditionalFeatures = [FeatureVP2INTERSECT,
1087                                                  FeatureCLWB,
1088                                                  FeatureMOVDIRI,
1089                                                  FeatureMOVDIR64B,
1090                                                  FeatureSHSTK];
1091  list<SubtargetFeature> TGLTuning = ICLTuning;
1092  list<SubtargetFeature> TGLFeatures =
1093    !listconcat(ICLFeatures, TGLAdditionalFeatures );
1094
1095  // Sapphirerapids
1096  list<SubtargetFeature> SPRAdditionalFeatures = [FeatureAMXTILE,
1097                                                  FeatureAMXINT8,
1098                                                  FeatureAMXBF16,
1099                                                  FeatureBF16,
1100                                                  FeatureSERIALIZE,
1101                                                  FeatureCLDEMOTE,
1102                                                  FeatureWAITPKG,
1103                                                  FeaturePTWRITE,
1104                                                  FeatureFP16,
1105                                                  FeatureAVXVNNI,
1106                                                  FeatureTSXLDTRK,
1107                                                  FeatureENQCMD,
1108                                                  FeatureSHSTK,
1109                                                  FeatureMOVDIRI,
1110                                                  FeatureMOVDIR64B,
1111                                                  FeatureUINTR];
1112  list<SubtargetFeature> SPRAdditionalTuning = [TuningMULCFalseDeps,
1113                                                TuningPERMFalseDeps,
1114                                                TuningRANGEFalseDeps,
1115                                                TuningGETMANTFalseDeps,
1116                                                TuningMULLQFalseDeps];
1117  list<SubtargetFeature> SPRTuning = !listconcat(ICXTuning, SPRAdditionalTuning);
1118  list<SubtargetFeature> SPRFeatures =
1119    !listconcat(ICXFeatures, SPRAdditionalFeatures);
1120
1121  // Graniterapids
1122  list<SubtargetFeature> GNRAdditionalFeatures = [FeatureAMXFP16,
1123                                                  FeaturePREFETCHI];
1124  list<SubtargetFeature> GNRFeatures =
1125    !listconcat(SPRFeatures, GNRAdditionalFeatures);
1126  list<SubtargetFeature> GNRAdditionalTuning = [TuningBranchHint];
1127  list<SubtargetFeature> GNRTuning = !listconcat(SPRTuning, GNRAdditionalTuning);
1128
1129  // Graniterapids D
1130  list<SubtargetFeature> GNRDAdditionalFeatures = [FeatureAMXCOMPLEX];
1131  list<SubtargetFeature> GNRDFeatures =
1132    !listconcat(GNRFeatures, GNRDAdditionalFeatures);
1133
1134  // Atom
1135  list<SubtargetFeature> AtomFeatures = [FeatureX87,
1136                                         FeatureCX8,
1137                                         FeatureCMOV,
1138                                         FeatureMMX,
1139                                         FeatureSSSE3,
1140                                         FeatureFXSR,
1141                                         FeatureNOPL,
1142                                         FeatureX86_64,
1143                                         FeatureCX16,
1144                                         FeatureMOVBE,
1145                                         FeatureLAHFSAHF64];
1146  list<SubtargetFeature> AtomTuning = [ProcIntelAtom,
1147                                       TuningSlowUAMem16,
1148                                       TuningLEAForSP,
1149                                       TuningSlowDivide32,
1150                                       TuningSlowDivide64,
1151                                       TuningSlowTwoMemOps,
1152                                       TuningFastImm16,
1153                                       TuningLEAUsesAG,
1154                                       TuningPadShortFunctions,
1155                                       TuningInsertVZEROUPPER,
1156                                       TuningNoDomainDelay];
1157
1158  // Silvermont
1159  list<SubtargetFeature> SLMAdditionalFeatures = [FeatureSSE42,
1160                                                  FeatureCRC32,
1161                                                  FeaturePOPCNT,
1162                                                  FeaturePCLMUL,
1163                                                  FeaturePRFCHW,
1164                                                  FeatureRDRAND];
1165  list<SubtargetFeature> SLMTuning = [TuningUseSLMArithCosts,
1166                                      TuningSlowTwoMemOps,
1167                                      TuningSlowLEA,
1168                                      TuningSlowIncDec,
1169                                      TuningSlowDivide64,
1170                                      TuningSlowPMULLD,
1171                                      TuningFast7ByteNOP,
1172                                      TuningFastMOVBE,
1173                                      TuningFastImm16,
1174                                      TuningPOPCNTFalseDeps,
1175                                      TuningInsertVZEROUPPER,
1176                                      TuningNoDomainDelay];
1177  list<SubtargetFeature> SLMFeatures =
1178    !listconcat(AtomFeatures, SLMAdditionalFeatures);
1179
1180  // Goldmont
1181  list<SubtargetFeature> GLMAdditionalFeatures = [FeatureAES,
1182                                                  FeatureSHA,
1183                                                  FeatureRDSEED,
1184                                                  FeatureXSAVE,
1185                                                  FeatureXSAVEOPT,
1186                                                  FeatureXSAVEC,
1187                                                  FeatureXSAVES,
1188                                                  FeatureCLFLUSHOPT,
1189                                                  FeatureFSGSBase];
1190  list<SubtargetFeature> GLMTuning = [TuningUseGLMDivSqrtCosts,
1191                                      TuningSlowTwoMemOps,
1192                                      TuningSlowLEA,
1193                                      TuningSlowIncDec,
1194                                      TuningFastMOVBE,
1195                                      TuningFastImm16,
1196                                      TuningPOPCNTFalseDeps,
1197                                      TuningInsertVZEROUPPER,
1198                                      TuningNoDomainDelay];
1199  list<SubtargetFeature> GLMFeatures =
1200    !listconcat(SLMFeatures, GLMAdditionalFeatures);
1201
1202  // Goldmont Plus
1203  list<SubtargetFeature> GLPAdditionalFeatures = [FeaturePTWRITE,
1204                                                  FeatureRDPID];
1205  list<SubtargetFeature> GLPTuning = [TuningUseGLMDivSqrtCosts,
1206                                      TuningSlowTwoMemOps,
1207                                      TuningSlowLEA,
1208                                      TuningSlowIncDec,
1209                                      TuningFastMOVBE,
1210                                      TuningFastImm16,
1211                                      TuningInsertVZEROUPPER,
1212                                      TuningNoDomainDelay];
1213  list<SubtargetFeature> GLPFeatures =
1214    !listconcat(GLMFeatures, GLPAdditionalFeatures);
1215
1216  // Tremont
1217  list<SubtargetFeature> TRMAdditionalFeatures = [FeatureCLWB,
1218                                                  FeatureGFNI];
1219  list<SubtargetFeature> TRMTuning = GLPTuning;
1220  list<SubtargetFeature> TRMFeatures =
1221    !listconcat(GLPFeatures, TRMAdditionalFeatures);
1222
1223  // Alderlake
1224  list<SubtargetFeature> ADLAdditionalFeatures = [FeatureSERIALIZE,
1225                                                  FeaturePCONFIG,
1226                                                  FeatureSHSTK,
1227                                                  FeatureWIDEKL,
1228                                                  FeatureINVPCID,
1229                                                  FeatureADX,
1230                                                  FeatureFMA,
1231                                                  FeatureVAES,
1232                                                  FeatureVPCLMULQDQ,
1233                                                  FeatureF16C,
1234                                                  FeatureBMI,
1235                                                  FeatureBMI2,
1236                                                  FeatureLZCNT,
1237                                                  FeatureAVXVNNI,
1238                                                  FeaturePKU,
1239                                                  FeatureHRESET,
1240                                                  FeatureCLDEMOTE,
1241                                                  FeatureMOVDIRI,
1242                                                  FeatureMOVDIR64B,
1243                                                  FeatureWAITPKG];
1244  list<SubtargetFeature> ADLAdditionalTuning = [TuningPERMFalseDeps,
1245                                                TuningPreferMovmskOverVTest,
1246                                                TuningFastImmVectorShift];
1247  list<SubtargetFeature> ADLTuning = !listconcat(SKLTuning, ADLAdditionalTuning);
1248  list<SubtargetFeature> ADLFeatures =
1249    !listconcat(TRMFeatures, ADLAdditionalFeatures);
1250
1251  // Gracemont
1252  list<SubtargetFeature> GRTTuning = [TuningMacroFusion,
1253                                      TuningSlow3OpsLEA,
1254                                      TuningFastScalarFSQRT,
1255                                      TuningFastVectorFSQRT,
1256                                      TuningFast15ByteNOP,
1257                                      TuningFastVariablePerLaneShuffle,
1258                                      TuningPOPCNTFalseDeps,
1259                                      TuningInsertVZEROUPPER];
1260
1261  // Sierraforest
1262  list<SubtargetFeature> SRFAdditionalFeatures = [FeatureCMPCCXADD,
1263                                                  FeatureAVXIFMA,
1264                                                  FeatureAVXNECONVERT,
1265                                                  FeatureENQCMD,
1266                                                  FeatureUINTR,
1267                                                  FeatureAVXVNNIINT8];
1268  list<SubtargetFeature> SRFFeatures =
1269    !listconcat(ADLFeatures, SRFAdditionalFeatures);
1270
1271  // Arrowlake S
1272  list<SubtargetFeature> ARLSAdditionalFeatures = [FeatureAVXVNNIINT16,
1273                                                   FeatureSHA512,
1274                                                   FeatureSM3,
1275                                                   FeatureSM4];
1276  list<SubtargetFeature> ARLSFeatures =
1277    !listconcat(SRFFeatures, ARLSAdditionalFeatures);
1278
1279  // Pantherlake
1280  list<SubtargetFeature> PTLAdditionalFeatures = [FeaturePREFETCHI];
1281  list<SubtargetFeature> PTLFeatures =
1282    !listconcat(ARLSFeatures, PTLAdditionalFeatures);
1283
1284
1285  // Clearwaterforest
1286  list<SubtargetFeature> CWFAdditionalFeatures = [FeaturePREFETCHI,
1287                                                  FeatureUSERMSR];
1288  list<SubtargetFeature> CWFFeatures =
1289    !listconcat(ARLSFeatures, CWFAdditionalFeatures);
1290
1291  // Knights Landing
1292  list<SubtargetFeature> KNLFeatures = [FeatureX87,
1293                                        FeatureCX8,
1294                                        FeatureCMOV,
1295                                        FeatureMMX,
1296                                        FeatureFXSR,
1297                                        FeatureNOPL,
1298                                        FeatureX86_64,
1299                                        FeatureCX16,
1300                                        FeatureCRC32,
1301                                        FeaturePOPCNT,
1302                                        FeaturePCLMUL,
1303                                        FeatureXSAVE,
1304                                        FeatureXSAVEOPT,
1305                                        FeatureLAHFSAHF64,
1306                                        FeatureAES,
1307                                        FeatureRDRAND,
1308                                        FeatureF16C,
1309                                        FeatureFSGSBase,
1310                                        FeatureAVX512,
1311                                        FeatureEVEX512,
1312                                        FeatureCDI,
1313                                        FeatureADX,
1314                                        FeatureRDSEED,
1315                                        FeatureMOVBE,
1316                                        FeatureLZCNT,
1317                                        FeatureBMI,
1318                                        FeatureBMI2,
1319                                        FeatureFMA,
1320                                        FeaturePRFCHW];
1321  list<SubtargetFeature> KNLTuning = [TuningSlowDivide64,
1322                                      TuningSlow3OpsLEA,
1323                                      TuningSlowIncDec,
1324                                      TuningSlowTwoMemOps,
1325                                      TuningPreferMaskRegisters,
1326                                      TuningFastGather,
1327                                      TuningFastMOVBE,
1328                                      TuningFastImm16,
1329                                      TuningSlowPMADDWD];
1330  // TODO Add AVX5124FMAPS/AVX5124VNNIW features
1331  list<SubtargetFeature> KNMFeatures =
1332    !listconcat(KNLFeatures, [FeatureVPOPCNTDQ]);
1333
1334  // Barcelona
1335  list<SubtargetFeature> BarcelonaFeatures = [FeatureX87,
1336                                              FeatureCX8,
1337                                              FeatureSSE4A,
1338                                              FeatureFXSR,
1339                                              FeatureNOPL,
1340                                              FeatureCX16,
1341                                              FeaturePRFCHW,
1342                                              FeatureLZCNT,
1343                                              FeaturePOPCNT,
1344                                              FeatureLAHFSAHF64,
1345                                              FeatureCMOV,
1346                                              FeatureX86_64];
1347  list<SubtargetFeature> BarcelonaTuning = [TuningFastScalarShiftMasks,
1348                                            TuningSlowDivide64,
1349                                            TuningSlowSHLD,
1350                                            TuningSBBDepBreaking,
1351                                            TuningInsertVZEROUPPER];
1352
1353  // Bobcat
1354  list<SubtargetFeature> BtVer1Features = [FeatureX87,
1355                                           FeatureCX8,
1356                                           FeatureCMOV,
1357                                           FeatureMMX,
1358                                           FeatureSSSE3,
1359                                           FeatureSSE4A,
1360                                           FeatureFXSR,
1361                                           FeatureNOPL,
1362                                           FeatureX86_64,
1363                                           FeatureCX16,
1364                                           FeaturePRFCHW,
1365                                           FeatureLZCNT,
1366                                           FeaturePOPCNT,
1367                                           FeatureLAHFSAHF64];
1368  list<SubtargetFeature> BtVer1Tuning = [TuningFast15ByteNOP,
1369                                         TuningFastScalarShiftMasks,
1370                                         TuningFastVectorShiftMasks,
1371                                         TuningSlowDivide64,
1372                                         TuningSlowSHLD,
1373                                         TuningFastImm16,
1374                                         TuningSBBDepBreaking,
1375                                         TuningInsertVZEROUPPER];
1376
1377  // Jaguar
1378  list<SubtargetFeature> BtVer2AdditionalFeatures = [FeatureAVX,
1379                                                     FeatureAES,
1380                                                     FeatureCRC32,
1381                                                     FeaturePCLMUL,
1382                                                     FeatureBMI,
1383                                                     FeatureF16C,
1384                                                     FeatureMOVBE,
1385                                                     FeatureXSAVE,
1386                                                     FeatureXSAVEOPT];
1387  list<SubtargetFeature> BtVer2Tuning = [TuningFastLZCNT,
1388                                         TuningFastBEXTR,
1389                                         TuningFastHorizontalOps,
1390                                         TuningFast15ByteNOP,
1391                                         TuningFastScalarShiftMasks,
1392                                         TuningFastVectorShiftMasks,
1393                                         TuningFastMOVBE,
1394                                         TuningFastImm16,
1395                                         TuningSBBDepBreaking,
1396                                         TuningSlowDivide64,
1397                                         TuningSlowSHLD];
1398  list<SubtargetFeature> BtVer2Features =
1399    !listconcat(BtVer1Features, BtVer2AdditionalFeatures);
1400
1401  // Bulldozer
1402  list<SubtargetFeature> BdVer1Features = [FeatureX87,
1403                                           FeatureCX8,
1404                                           FeatureCMOV,
1405                                           FeatureXOP,
1406                                           FeatureX86_64,
1407                                           FeatureCX16,
1408                                           FeatureAES,
1409                                           FeatureCRC32,
1410                                           FeaturePRFCHW,
1411                                           FeaturePCLMUL,
1412                                           FeatureMMX,
1413                                           FeatureFXSR,
1414                                           FeatureNOPL,
1415                                           FeatureLZCNT,
1416                                           FeaturePOPCNT,
1417                                           FeatureXSAVE,
1418                                           FeatureLWP,
1419                                           FeatureLAHFSAHF64];
1420  list<SubtargetFeature> BdVer1Tuning = [TuningSlowSHLD,
1421                                         TuningSlowDivide64,
1422                                         TuningFast11ByteNOP,
1423                                         TuningFastScalarShiftMasks,
1424                                         TuningBranchFusion,
1425                                         TuningSBBDepBreaking,
1426                                         TuningInsertVZEROUPPER];
1427
1428  // PileDriver
1429  list<SubtargetFeature> BdVer2AdditionalFeatures = [FeatureF16C,
1430                                                     FeatureBMI,
1431                                                     FeatureTBM,
1432                                                     FeatureFMA];
1433  list<SubtargetFeature> BdVer2AdditionalTuning = [TuningFastBEXTR,
1434                                                   TuningFastMOVBE];
1435  list<SubtargetFeature> BdVer2Tuning =
1436    !listconcat(BdVer1Tuning, BdVer2AdditionalTuning);
1437  list<SubtargetFeature> BdVer2Features =
1438    !listconcat(BdVer1Features, BdVer2AdditionalFeatures);
1439
1440  // Steamroller
1441  list<SubtargetFeature> BdVer3AdditionalFeatures = [FeatureXSAVEOPT,
1442                                                     FeatureFSGSBase];
1443  list<SubtargetFeature> BdVer3Tuning = BdVer2Tuning;
1444  list<SubtargetFeature> BdVer3Features =
1445    !listconcat(BdVer2Features, BdVer3AdditionalFeatures);
1446
1447  // Excavator
1448  list<SubtargetFeature> BdVer4AdditionalFeatures = [FeatureAVX2,
1449                                                     FeatureBMI2,
1450                                                     FeatureMOVBE,
1451                                                     FeatureRDRAND,
1452                                                     FeatureMWAITX];
1453  list<SubtargetFeature> BdVer4Tuning = BdVer3Tuning;
1454  list<SubtargetFeature> BdVer4Features =
1455    !listconcat(BdVer3Features, BdVer4AdditionalFeatures);
1456
1457
1458  // AMD Zen Processors common ISAs
1459  list<SubtargetFeature> ZNFeatures = [FeatureADX,
1460                                       FeatureAES,
1461                                       FeatureAVX2,
1462                                       FeatureBMI,
1463                                       FeatureBMI2,
1464                                       FeatureCLFLUSHOPT,
1465                                       FeatureCLZERO,
1466                                       FeatureCMOV,
1467                                       FeatureX86_64,
1468                                       FeatureCX16,
1469                                       FeatureCRC32,
1470                                       FeatureF16C,
1471                                       FeatureFMA,
1472                                       FeatureFSGSBase,
1473                                       FeatureFXSR,
1474                                       FeatureNOPL,
1475                                       FeatureLAHFSAHF64,
1476                                       FeatureLZCNT,
1477                                       FeatureMMX,
1478                                       FeatureMOVBE,
1479                                       FeatureMWAITX,
1480                                       FeaturePCLMUL,
1481                                       FeaturePOPCNT,
1482                                       FeaturePRFCHW,
1483                                       FeatureRDRAND,
1484                                       FeatureRDSEED,
1485                                       FeatureSHA,
1486                                       FeatureSSE4A,
1487                                       FeatureX87,
1488                                       FeatureXSAVE,
1489                                       FeatureXSAVEC,
1490                                       FeatureXSAVEOPT,
1491                                       FeatureXSAVES];
1492  list<SubtargetFeature> ZNTuning = [TuningFastLZCNT,
1493                                     TuningFastBEXTR,
1494                                     TuningFast15ByteNOP,
1495                                     TuningBranchFusion,
1496                                     TuningFastScalarFSQRT,
1497                                     TuningFastVectorFSQRT,
1498                                     TuningFastScalarShiftMasks,
1499                                     TuningFastVariablePerLaneShuffle,
1500                                     TuningFastMOVBE,
1501                                     TuningFastImm16,
1502                                     TuningSlowDivide64,
1503                                     TuningSlowSHLD,
1504                                     TuningSBBDepBreaking,
1505                                     TuningInsertVZEROUPPER,
1506                                     TuningAllowLight256Bit];
1507  list<SubtargetFeature> ZN2AdditionalFeatures = [FeatureCLWB,
1508                                                  FeatureRDPID,
1509                                                  FeatureRDPRU,
1510                                                  FeatureWBNOINVD];
1511  list<SubtargetFeature> ZN2Tuning = ZNTuning;
1512  list<SubtargetFeature> ZN2Features =
1513    !listconcat(ZNFeatures, ZN2AdditionalFeatures);
1514  list<SubtargetFeature> ZN3AdditionalFeatures = [FeatureFSRM,
1515                                                  FeatureINVPCID,
1516                                                  FeaturePKU,
1517                                                  FeatureVAES,
1518                                                  FeatureVPCLMULQDQ];
1519  list<SubtargetFeature> ZN3AdditionalTuning = [TuningMacroFusion];
1520  list<SubtargetFeature> ZN3Tuning =
1521    !listconcat(ZN2Tuning, ZN3AdditionalTuning);
1522  list<SubtargetFeature> ZN3Features =
1523    !listconcat(ZN2Features, ZN3AdditionalFeatures);
1524
1525
1526  list<SubtargetFeature> ZN4AdditionalTuning = [TuningFastDPWSSD];
1527  list<SubtargetFeature> ZN4Tuning =
1528    !listconcat(ZN3Tuning, ZN4AdditionalTuning);
1529  list<SubtargetFeature> ZN4AdditionalFeatures = [FeatureAVX512,
1530                                                  FeatureEVEX512,
1531                                                  FeatureCDI,
1532                                                  FeatureDQI,
1533                                                  FeatureBWI,
1534                                                  FeatureVLX,
1535                                                  FeatureVBMI,
1536                                                  FeatureVBMI2,
1537                                                  FeatureIFMA,
1538                                                  FeatureVNNI,
1539                                                  FeatureBITALG,
1540                                                  FeatureGFNI,
1541                                                  FeatureBF16,
1542                                                  FeatureSHSTK,
1543                                                  FeatureVPOPCNTDQ];
1544  list<SubtargetFeature> ZN4Features =
1545    !listconcat(ZN3Features, ZN4AdditionalFeatures);
1546
1547
1548  list<SubtargetFeature> ZN5Tuning = ZN4Tuning;
1549  list<SubtargetFeature> ZN5AdditionalFeatures = [FeatureVNNI,
1550                                                  FeatureMOVDIRI,
1551                                                  FeatureMOVDIR64B,
1552                                                  FeatureVP2INTERSECT,
1553                                                  FeaturePREFETCHI,
1554                                                  FeatureAVXVNNI
1555                                                  ];
1556  list<SubtargetFeature> ZN5Features =
1557    !listconcat(ZN4Features, ZN5AdditionalFeatures);
1558
1559}
1560
1561//===----------------------------------------------------------------------===//
1562// X86 processors supported.
1563//===----------------------------------------------------------------------===//
1564
1565class Proc<string Name, list<SubtargetFeature> Features,
1566           list<SubtargetFeature> TuneFeatures>
1567 : ProcessorModel<Name, GenericModel, Features, TuneFeatures>;
1568
1569class ProcModel<string Name, SchedMachineModel Model,
1570                list<SubtargetFeature> Features,
1571                list<SubtargetFeature> TuneFeatures>
1572 : ProcessorModel<Name, Model, Features, TuneFeatures>;
1573
1574// NOTE: CMPXCHG8B is here for legacy compatibility so that it is only disabled
1575// if i386/i486 is specifically requested.
1576// NOTE: 64Bit is here as "generic" is the default llc CPU. The X86Subtarget
1577// constructor checks that any CPU used in 64-bit mode has FeatureX86_64
1578// enabled. It has no effect on code generation.
1579// NOTE: As a default tuning, "generic" aims to produce code optimized for the
1580// most common X86 processors. The tunings might be changed over time. It is
1581// recommended to use "tune-cpu"="x86-64" in function attribute for consistency.
1582def : ProcModel<"generic", SandyBridgeModel,
1583                [FeatureX87, FeatureCX8, FeatureX86_64],
1584                [TuningSlow3OpsLEA,
1585                 TuningSlowDivide64,
1586                 TuningMacroFusion,
1587                 TuningFastScalarFSQRT,
1588                 TuningFast15ByteNOP,
1589                 TuningInsertVZEROUPPER]>;
1590
1591def : Proc<"i386",            [FeatureX87],
1592                              [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1593def : Proc<"i486",            [FeatureX87],
1594                              [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1595def : Proc<"i586",            [FeatureX87, FeatureCX8],
1596                              [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1597def : Proc<"pentium",         [FeatureX87, FeatureCX8],
1598                              [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1599foreach P = ["pentium-mmx", "pentium_mmx"] in {
1600  def : Proc<P, [FeatureX87, FeatureCX8, FeatureMMX],
1601                [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1602}
1603def : Proc<"i686", [FeatureX87, FeatureCX8, FeatureCMOV],
1604                   [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1605foreach P = ["pentiumpro", "pentium_pro"] in {
1606  def : Proc<P, [FeatureX87, FeatureCX8, FeatureCMOV, FeatureNOPL],
1607                [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1608}
1609foreach P = ["pentium2", "pentium_ii"] in {
1610  def : Proc<P, [FeatureX87, FeatureCX8, FeatureMMX, FeatureCMOV,
1611                          FeatureFXSR, FeatureNOPL],
1612                        [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1613}
1614foreach P = ["pentium3", "pentium3m", "pentium_iii_no_xmm_regs", "pentium_iii"] in {
1615  def : Proc<P, [FeatureX87, FeatureCX8, FeatureMMX,
1616                 FeatureSSE1, FeatureFXSR, FeatureNOPL, FeatureCMOV],
1617                [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1618}
1619
1620// Enable the PostRAScheduler for SSE2 and SSE3 class cpus.
1621// The intent is to enable it for pentium4 which is the current default
1622// processor in a vanilla 32-bit clang compilation when no specific
1623// architecture is specified.  This generally gives a nice performance
1624// increase on silvermont, with largely neutral behavior on other
1625// contemporary large core processors.
1626// pentium-m, pentium4m, prescott and nocona are included as a preventative
1627// measure to avoid performance surprises, in case clang's default cpu
1628// changes slightly.
1629
1630foreach P = ["pentium_m", "pentium-m"] in {
1631def : ProcModel<P, GenericPostRAModel,
1632                [FeatureX87, FeatureCX8, FeatureMMX, FeatureSSE2,
1633                FeatureFXSR, FeatureNOPL, FeatureCMOV],
1634                [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1635}
1636
1637foreach P = ["pentium4", "pentium4m", "pentium_4"] in {
1638  def : ProcModel<P, GenericPostRAModel,
1639                  [FeatureX87, FeatureCX8, FeatureMMX, FeatureSSE2,
1640                   FeatureFXSR, FeatureNOPL, FeatureCMOV],
1641                  [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1642}
1643
1644// Intel Quark.
1645def : Proc<"lakemont", [FeatureCX8],
1646                       [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1647
1648// Intel Core Duo.
1649def : ProcModel<"yonah", SandyBridgeModel,
1650                [FeatureX87, FeatureCX8, FeatureMMX, FeatureSSE3,
1651                 FeatureFXSR, FeatureNOPL, FeatureCMOV],
1652                [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1653
1654// NetBurst.
1655foreach P = ["prescott", "pentium_4_sse3"] in {
1656  def : ProcModel<P, GenericPostRAModel,
1657                  [FeatureX87, FeatureCX8, FeatureMMX, FeatureSSE3,
1658                  FeatureFXSR, FeatureNOPL, FeatureCMOV],
1659                  [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1660}
1661def : ProcModel<"nocona", GenericPostRAModel, [
1662  FeatureX87,
1663  FeatureCX8,
1664  FeatureCMOV,
1665  FeatureMMX,
1666  FeatureSSE3,
1667  FeatureFXSR,
1668  FeatureNOPL,
1669  FeatureX86_64,
1670  FeatureCX16,
1671],
1672[
1673  TuningSlowUAMem16,
1674  TuningInsertVZEROUPPER
1675]>;
1676
1677// Intel Core 2 Solo/Duo.
1678foreach P = ["core2", "core_2_duo_ssse3"] in {
1679def : ProcModel<P, SandyBridgeModel, [
1680  FeatureX87,
1681  FeatureCX8,
1682  FeatureCMOV,
1683  FeatureMMX,
1684  FeatureSSSE3,
1685  FeatureFXSR,
1686  FeatureNOPL,
1687  FeatureX86_64,
1688  FeatureCX16,
1689  FeatureLAHFSAHF64
1690],
1691[
1692  TuningMacroFusion,
1693  TuningSlowUAMem16,
1694  TuningInsertVZEROUPPER
1695]>;
1696}
1697foreach P = ["penryn", "core_2_duo_sse4_1"] in {
1698def : ProcModel<P, SandyBridgeModel, [
1699  FeatureX87,
1700  FeatureCX8,
1701  FeatureCMOV,
1702  FeatureMMX,
1703  FeatureSSE41,
1704  FeatureFXSR,
1705  FeatureNOPL,
1706  FeatureX86_64,
1707  FeatureCX16,
1708  FeatureLAHFSAHF64
1709],
1710[
1711  TuningMacroFusion,
1712  TuningSlowUAMem16,
1713  TuningInsertVZEROUPPER
1714]>;
1715}
1716
1717// Atom CPUs.
1718foreach P = ["bonnell", "atom"] in {
1719  def : ProcModel<P, AtomModel, ProcessorFeatures.AtomFeatures,
1720                  ProcessorFeatures.AtomTuning>;
1721}
1722
1723foreach P = ["silvermont", "slm", "atom_sse4_2"] in {
1724  def : ProcModel<P, SLMModel, ProcessorFeatures.SLMFeatures,
1725                  ProcessorFeatures.SLMTuning>;
1726}
1727
1728def : ProcModel<"atom_sse4_2_movbe", SLMModel, ProcessorFeatures.GLMFeatures,
1729                ProcessorFeatures.SLMTuning>;
1730def : ProcModel<"goldmont", SLMModel, ProcessorFeatures.GLMFeatures,
1731                ProcessorFeatures.GLMTuning>;
1732foreach P = ["goldmont_plus", "goldmont-plus"] in {
1733  def : ProcModel<P, SLMModel, ProcessorFeatures.GLPFeatures,
1734                  ProcessorFeatures.GLPTuning>;
1735}
1736def : ProcModel<"tremont", SLMModel, ProcessorFeatures.TRMFeatures,
1737                ProcessorFeatures.TRMTuning>;
1738
1739// "Arrandale" along with corei3 and corei5
1740foreach P = ["nehalem", "corei7", "core_i7_sse4_2"] in {
1741  def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.NHMFeatures,
1742                  ProcessorFeatures.NHMTuning>;
1743}
1744
1745// Westmere is the corei3/i5/i7 path from nehalem to sandybridge
1746foreach P = ["westmere", "core_aes_pclmulqdq"] in {
1747  def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.WSMFeatures,
1748                  ProcessorFeatures.WSMTuning>;
1749}
1750
1751foreach P = ["sandybridge", "corei7-avx", "core_2nd_gen_avx"] in {
1752  def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.SNBFeatures,
1753                  ProcessorFeatures.SNBTuning>;
1754}
1755
1756foreach P = ["ivybridge", "core-avx-i", "core_3rd_gen_avx"] in {
1757  def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.IVBFeatures,
1758                  ProcessorFeatures.IVBTuning>;
1759}
1760
1761foreach P = ["haswell", "core-avx2", "core_4th_gen_avx", "core_4th_gen_avx_tsx"] in {
1762  def : ProcModel<P, HaswellModel, ProcessorFeatures.HSWFeatures,
1763                  ProcessorFeatures.HSWTuning>;
1764}
1765
1766foreach P = ["broadwell", "core_5th_gen_avx", "core_5th_gen_avx_tsx"] in {
1767  def : ProcModel<P, BroadwellModel, ProcessorFeatures.BDWFeatures,
1768                  ProcessorFeatures.BDWTuning>;
1769}
1770
1771def : ProcModel<"skylake", SkylakeClientModel, ProcessorFeatures.SKLFeatures,
1772                ProcessorFeatures.SKLTuning>;
1773
1774// FIXME: define KNL scheduler model
1775foreach P = ["knl", "mic_avx512"] in {
1776  def : ProcModel<P, HaswellModel, ProcessorFeatures.KNLFeatures,
1777                  ProcessorFeatures.KNLTuning>;
1778}
1779def : ProcModel<"knm", HaswellModel, ProcessorFeatures.KNMFeatures,
1780                ProcessorFeatures.KNLTuning>;
1781
1782foreach P = ["skylake-avx512", "skx", "skylake_avx512"] in {
1783  def : ProcModel<P, SkylakeServerModel, ProcessorFeatures.SKXFeatures,
1784                  ProcessorFeatures.SKXTuning>;
1785}
1786
1787def : ProcModel<"cascadelake", SkylakeServerModel,
1788                ProcessorFeatures.CLXFeatures, ProcessorFeatures.CLXTuning>;
1789def : ProcModel<"cooperlake", SkylakeServerModel,
1790                ProcessorFeatures.CPXFeatures, ProcessorFeatures.CPXTuning>;
1791def : ProcModel<"cannonlake", SkylakeServerModel,
1792                ProcessorFeatures.CNLFeatures, ProcessorFeatures.CNLTuning>;
1793foreach P = ["icelake-client", "icelake_client"] in {
1794def : ProcModel<P, IceLakeModel,
1795                ProcessorFeatures.ICLFeatures, ProcessorFeatures.ICLTuning>;
1796}
1797def : ProcModel<"rocketlake", IceLakeModel,
1798                ProcessorFeatures.ICLFeatures, ProcessorFeatures.ICLTuning>;
1799foreach P = ["icelake-server", "icelake_server"] in {
1800def : ProcModel<P, IceLakeModel,
1801                ProcessorFeatures.ICXFeatures, ProcessorFeatures.ICXTuning>;
1802}
1803def : ProcModel<"tigerlake", IceLakeModel,
1804                ProcessorFeatures.TGLFeatures, ProcessorFeatures.TGLTuning>;
1805def : ProcModel<"sapphirerapids", SapphireRapidsModel,
1806                ProcessorFeatures.SPRFeatures, ProcessorFeatures.SPRTuning>;
1807def : ProcModel<"alderlake", AlderlakePModel,
1808                ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>;
1809// FIXME: Use Gracemont Schedule Model when it is ready.
1810def : ProcModel<"gracemont", AlderlakePModel,
1811                ProcessorFeatures.ADLFeatures, ProcessorFeatures.GRTTuning>;
1812foreach P = ["sierraforest", "grandridge"] in {
1813  def : ProcModel<P, AlderlakePModel, ProcessorFeatures.SRFFeatures,
1814                ProcessorFeatures.GRTTuning>;
1815}
1816def : ProcModel<"raptorlake", AlderlakePModel,
1817                ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>;
1818def : ProcModel<"meteorlake", AlderlakePModel,
1819                ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>;
1820def : ProcModel<"arrowlake", AlderlakePModel,
1821                ProcessorFeatures.SRFFeatures, ProcessorFeatures.ADLTuning>;
1822foreach P = ["arrowlake-s", "arrowlake_s", "lunarlake"] in {
1823def : ProcModel<P, AlderlakePModel,
1824                ProcessorFeatures.ARLSFeatures, ProcessorFeatures.ADLTuning>;
1825}
1826def : ProcModel<"pantherlake", AlderlakePModel,
1827                ProcessorFeatures.PTLFeatures, ProcessorFeatures.ADLTuning>;
1828def : ProcModel<"clearwaterforest", AlderlakePModel,
1829                ProcessorFeatures.CWFFeatures, ProcessorFeatures.ADLTuning>;
1830def : ProcModel<"emeraldrapids", SapphireRapidsModel,
1831                ProcessorFeatures.SPRFeatures, ProcessorFeatures.SPRTuning>;
1832def : ProcModel<"graniterapids", SapphireRapidsModel,
1833                ProcessorFeatures.GNRFeatures, ProcessorFeatures.GNRTuning>;
1834foreach P = ["graniterapids-d", "graniterapids_d"] in {
1835def : ProcModel<P, SapphireRapidsModel,
1836                ProcessorFeatures.GNRDFeatures, ProcessorFeatures.GNRTuning>;
1837}
1838
1839// AMD CPUs.
1840
1841def : Proc<"k6",   [FeatureX87, FeatureCX8, FeatureMMX],
1842                   [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1843def : Proc<"k6-2", [FeatureX87, FeatureCX8, FeatureMMX, FeaturePRFCHW],
1844                   [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1845def : Proc<"k6-3", [FeatureX87, FeatureCX8, FeatureMMX, FeaturePRFCHW],
1846                   [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1847
1848foreach P = ["athlon", "athlon-tbird"] in {
1849  def : Proc<P, [FeatureX87, FeatureCX8, FeatureCMOV, FeatureMMX, FeaturePRFCHW,
1850                 FeatureNOPL],
1851                [TuningSlowSHLD, TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1852}
1853
1854foreach P = ["athlon-4", "athlon-xp", "athlon-mp"] in {
1855  def : Proc<P, [FeatureX87, FeatureCX8, FeatureCMOV,
1856                 FeatureSSE1, FeatureMMX, FeaturePRFCHW, FeatureFXSR, FeatureNOPL],
1857                [TuningSlowSHLD, TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1858}
1859
1860foreach P = ["k8", "opteron", "athlon64", "athlon-fx"] in {
1861  def : Proc<P, [FeatureX87, FeatureCX8, FeatureSSE2, FeatureMMX, FeaturePRFCHW,
1862                 FeatureFXSR, FeatureNOPL, FeatureX86_64, FeatureCMOV],
1863                [TuningFastScalarShiftMasks, TuningSlowSHLD, TuningSlowUAMem16,
1864                 TuningSBBDepBreaking, TuningInsertVZEROUPPER]>;
1865}
1866
1867foreach P = ["k8-sse3", "opteron-sse3", "athlon64-sse3"] in {
1868  def : Proc<P, [FeatureX87, FeatureCX8, FeatureSSE3, FeatureMMX, FeaturePRFCHW,
1869                 FeatureFXSR, FeatureNOPL, FeatureCX16, FeatureCMOV,
1870                 FeatureX86_64],
1871                [TuningFastScalarShiftMasks, TuningSlowSHLD, TuningSlowUAMem16,
1872                 TuningSBBDepBreaking, TuningInsertVZEROUPPER]>;
1873}
1874
1875foreach P = ["amdfam10", "barcelona"] in {
1876  def : Proc<P, ProcessorFeatures.BarcelonaFeatures,
1877             ProcessorFeatures.BarcelonaTuning>;
1878}
1879
1880// Bobcat
1881def : Proc<"btver1", ProcessorFeatures.BtVer1Features,
1882           ProcessorFeatures.BtVer1Tuning>;
1883// Jaguar
1884def : ProcModel<"btver2", BtVer2Model, ProcessorFeatures.BtVer2Features,
1885                ProcessorFeatures.BtVer2Tuning>;
1886
1887// Bulldozer
1888def : ProcModel<"bdver1", BdVer2Model, ProcessorFeatures.BdVer1Features,
1889                ProcessorFeatures.BdVer1Tuning>;
1890// Piledriver
1891def : ProcModel<"bdver2", BdVer2Model, ProcessorFeatures.BdVer2Features,
1892                ProcessorFeatures.BdVer2Tuning>;
1893// Steamroller
1894def : Proc<"bdver3", ProcessorFeatures.BdVer3Features,
1895           ProcessorFeatures.BdVer3Tuning>;
1896// Excavator
1897def : Proc<"bdver4", ProcessorFeatures.BdVer4Features,
1898           ProcessorFeatures.BdVer4Tuning>;
1899
1900def : ProcModel<"znver1", Znver1Model, ProcessorFeatures.ZNFeatures,
1901                ProcessorFeatures.ZNTuning>;
1902def : ProcModel<"znver2", Znver2Model, ProcessorFeatures.ZN2Features,
1903                ProcessorFeatures.ZN2Tuning>;
1904def : ProcModel<"znver3", Znver3Model, ProcessorFeatures.ZN3Features,
1905                ProcessorFeatures.ZN3Tuning>;
1906def : ProcModel<"znver4", Znver4Model, ProcessorFeatures.ZN4Features,
1907           ProcessorFeatures.ZN4Tuning>;
1908def : ProcModel<"znver5", Znver4Model, ProcessorFeatures.ZN5Features,
1909                ProcessorFeatures.ZN5Tuning>;
1910
1911def : Proc<"geode",           [FeatureX87, FeatureCX8, FeatureMMX, FeaturePRFCHW],
1912                              [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1913
1914def : Proc<"winchip-c6",      [FeatureX87, FeatureMMX],
1915                              [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1916def : Proc<"winchip2",        [FeatureX87, FeatureMMX, FeaturePRFCHW],
1917                              [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1918def : Proc<"c3",              [FeatureX87, FeatureMMX, FeaturePRFCHW],
1919                              [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1920def : Proc<"c3-2",            [FeatureX87, FeatureCX8, FeatureMMX,
1921                               FeatureSSE1, FeatureFXSR, FeatureCMOV],
1922                              [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1923
1924// We also provide a generic 64-bit specific x86 processor model which tries to
1925// be good for modern chips without enabling instruction set encodings past the
1926// basic SSE2 and 64-bit ones. It disables slow things from any mainstream and
1927// modern 64-bit x86 chip, and enables features that are generally beneficial.
1928//
1929// We currently use the Sandy Bridge model as the default scheduling model as
1930// we use it across Nehalem, Westmere, Sandy Bridge, and Ivy Bridge which
1931// covers a huge swath of x86 processors. If there are specific scheduling
1932// knobs which need to be tuned differently for AMD chips, we might consider
1933// forming a common base for them.
1934def : ProcModel<"x86-64", SandyBridgeModel, ProcessorFeatures.X86_64V1Features,
1935                ProcessorFeatures.X86_64V1Tuning>;
1936// Close to Sandybridge.
1937def : ProcModel<"x86-64-v2", SandyBridgeModel, ProcessorFeatures.X86_64V2Features,
1938                ProcessorFeatures.X86_64V2Tuning>;
1939// Close to Haswell.
1940def : ProcModel<"x86-64-v3", HaswellModel, ProcessorFeatures.X86_64V3Features,
1941                ProcessorFeatures.X86_64V3Tuning>;
1942// Close to the AVX-512 level implemented by Xeon Scalable Processors.
1943def : ProcModel<"x86-64-v4", SkylakeServerModel, ProcessorFeatures.X86_64V4Features,
1944                ProcessorFeatures.X86_64V4Tuning>;
1945
1946//===----------------------------------------------------------------------===//
1947// Calling Conventions
1948//===----------------------------------------------------------------------===//
1949
1950include "X86CallingConv.td"
1951
1952
1953//===----------------------------------------------------------------------===//
1954// Assembly Parser
1955//===----------------------------------------------------------------------===//
1956
1957def ATTAsmParserVariant : AsmParserVariant {
1958  int Variant = 0;
1959
1960  // Variant name.
1961  string Name = "att";
1962
1963  // Discard comments in assembly strings.
1964  string CommentDelimiter = "#";
1965
1966  // Recognize hard coded registers.
1967  string RegisterPrefix = "%";
1968}
1969
1970def IntelAsmParserVariant : AsmParserVariant {
1971  int Variant = 1;
1972
1973  // Variant name.
1974  string Name = "intel";
1975
1976  // Discard comments in assembly strings.
1977  string CommentDelimiter = ";";
1978
1979  // Recognize hard coded registers.
1980  string RegisterPrefix = "";
1981}
1982
1983//===----------------------------------------------------------------------===//
1984// Assembly Printers
1985//===----------------------------------------------------------------------===//
1986
1987// The X86 target supports two different syntaxes for emitting machine code.
1988// This is controlled by the -x86-asm-syntax={att|intel}
1989def ATTAsmWriter : AsmWriter {
1990  string AsmWriterClassName  = "ATTInstPrinter";
1991  int Variant = 0;
1992}
1993def IntelAsmWriter : AsmWriter {
1994  string AsmWriterClassName  = "IntelInstPrinter";
1995  int Variant = 1;
1996}
1997
1998def X86 : Target {
1999  // Information about the instructions...
2000  let InstructionSet = X86InstrInfo;
2001  let AssemblyParserVariants = [ATTAsmParserVariant, IntelAsmParserVariant];
2002  let AssemblyWriters = [ATTAsmWriter, IntelAsmWriter];
2003  let AllowRegisterRenaming = 1;
2004}
2005
2006//===----------------------------------------------------------------------===//
2007// Pfm Counters
2008//===----------------------------------------------------------------------===//
2009
2010include "X86PfmCounters.td"
2011