xref: /freebsd/contrib/llvm-project/llvm/lib/Target/X86/X86.td (revision 59144db3fca192c4637637dfe6b5a5d98632cd47)
1//===-- X86.td - Target definition file for the Intel X86 --*- tablegen -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This is a target description file for the Intel i386 architecture, referred
10// to here as the "X86" architecture.
11//
12//===----------------------------------------------------------------------===//
13
14// Get the target-independent interfaces which we are implementing...
15//
16include "llvm/Target/Target.td"
17
18//===----------------------------------------------------------------------===//
19// X86 Subtarget state
20//
21// disregarding specific ABI / programming model
22def Is64Bit : SubtargetFeature<"64bit-mode", "Is64Bit", "true",
23                               "64-bit mode (x86_64)">;
24def Is32Bit : SubtargetFeature<"32bit-mode", "Is32Bit", "true",
25                               "32-bit mode (80386)">;
26def Is16Bit : SubtargetFeature<"16bit-mode", "Is16Bit", "true",
27                               "16-bit mode (i8086)">;
28
29//===----------------------------------------------------------------------===//
30// X86 Subtarget ISA features
31//===----------------------------------------------------------------------===//
32
33def FeatureX87     : SubtargetFeature<"x87","HasX87", "true",
34                                      "Enable X87 float instructions">;
35
36def FeatureNOPL    : SubtargetFeature<"nopl", "HasNOPL", "true",
37                                      "Enable NOPL instruction (generally pentium pro+)">;
38
39def FeatureCMOV    : SubtargetFeature<"cmov","HasCMOV", "true",
40                                      "Enable conditional move instructions">;
41
42def FeatureCX8     : SubtargetFeature<"cx8", "HasCX8", "true",
43                                      "Support CMPXCHG8B instructions">;
44
45def FeatureCRC32   : SubtargetFeature<"crc32", "HasCRC32", "true",
46                                      "Enable SSE 4.2 CRC32 instruction (used when SSE4.2 is supported but function is GPR only)">;
47
48def FeaturePOPCNT   : SubtargetFeature<"popcnt", "HasPOPCNT", "true",
49                                       "Support POPCNT instruction">;
50
51def FeatureFXSR    : SubtargetFeature<"fxsr", "HasFXSR", "true",
52                                      "Support fxsave/fxrestore instructions">;
53
54def FeatureXSAVE   : SubtargetFeature<"xsave", "HasXSAVE", "true",
55                                       "Support xsave instructions">;
56
57def FeatureXSAVEOPT: SubtargetFeature<"xsaveopt", "HasXSAVEOPT", "true",
58                                       "Support xsaveopt instructions",
59                                       [FeatureXSAVE]>;
60
61def FeatureXSAVEC  : SubtargetFeature<"xsavec", "HasXSAVEC", "true",
62                                       "Support xsavec instructions",
63                                       [FeatureXSAVE]>;
64
65def FeatureXSAVES  : SubtargetFeature<"xsaves", "HasXSAVES", "true",
66                                       "Support xsaves instructions",
67                                       [FeatureXSAVE]>;
68
69def FeatureSSE1    : SubtargetFeature<"sse", "X86SSELevel", "SSE1",
70                                      "Enable SSE instructions">;
71def FeatureSSE2    : SubtargetFeature<"sse2", "X86SSELevel", "SSE2",
72                                      "Enable SSE2 instructions",
73                                      [FeatureSSE1]>;
74def FeatureSSE3    : SubtargetFeature<"sse3", "X86SSELevel", "SSE3",
75                                      "Enable SSE3 instructions",
76                                      [FeatureSSE2]>;
77def FeatureSSSE3   : SubtargetFeature<"ssse3", "X86SSELevel", "SSSE3",
78                                      "Enable SSSE3 instructions",
79                                      [FeatureSSE3]>;
80def FeatureSSE41   : SubtargetFeature<"sse4.1", "X86SSELevel", "SSE41",
81                                      "Enable SSE 4.1 instructions",
82                                      [FeatureSSSE3]>;
83def FeatureSSE42   : SubtargetFeature<"sse4.2", "X86SSELevel", "SSE42",
84                                      "Enable SSE 4.2 instructions",
85                                      [FeatureSSE41]>;
86// The MMX subtarget feature is separate from the rest of the SSE features
87// because it's important (for odd compatibility reasons) to be able to
88// turn it off explicitly while allowing SSE+ to be on.
89def FeatureMMX     : SubtargetFeature<"mmx","X863DNowLevel", "MMX",
90                                      "Enable MMX instructions">;
91def Feature3DNow   : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow",
92                                      "Enable 3DNow! instructions",
93                                      [FeatureMMX]>;
94def Feature3DNowA  : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA",
95                                      "Enable 3DNow! Athlon instructions",
96                                      [Feature3DNow]>;
97// All x86-64 hardware has SSE2, but we don't mark SSE2 as an implied
98// feature, because SSE2 can be disabled (e.g. for compiling OS kernels)
99// without disabling 64-bit mode. Nothing should imply this feature bit. It
100// is used to enforce that only 64-bit capable CPUs are used in 64-bit mode.
101def FeatureX86_64   : SubtargetFeature<"64bit", "HasX86_64", "true",
102                                      "Support 64-bit instructions">;
103def FeatureCX16     : SubtargetFeature<"cx16", "HasCX16", "true",
104                                       "64-bit with cmpxchg16b (this is true for most x86-64 chips, but not the first AMD chips)",
105                                       [FeatureCX8]>;
106def FeatureSSE4A   : SubtargetFeature<"sse4a", "HasSSE4A", "true",
107                                      "Support SSE 4a instructions",
108                                      [FeatureSSE3]>;
109
110def FeatureAVX     : SubtargetFeature<"avx", "X86SSELevel", "AVX",
111                                      "Enable AVX instructions",
112                                      [FeatureSSE42]>;
113def FeatureAVX2    : SubtargetFeature<"avx2", "X86SSELevel", "AVX2",
114                                      "Enable AVX2 instructions",
115                                      [FeatureAVX]>;
116def FeatureFMA     : SubtargetFeature<"fma", "HasFMA", "true",
117                                      "Enable three-operand fused multiple-add",
118                                      [FeatureAVX]>;
119def FeatureF16C    : SubtargetFeature<"f16c", "HasF16C", "true",
120                       "Support 16-bit floating point conversion instructions",
121                       [FeatureAVX]>;
122def FeatureEVEX512  : SubtargetFeature<"evex512", "HasEVEX512", "true",
123                        "Support ZMM and 64-bit mask instructions">;
124def FeatureAVX512   : SubtargetFeature<"avx512f", "X86SSELevel", "AVX512",
125                                      "Enable AVX-512 instructions",
126                                      [FeatureAVX2, FeatureFMA, FeatureF16C]>;
127def FeatureERI      : SubtargetFeature<"avx512er", "HasERI", "true",
128                      "Enable AVX-512 Exponential and Reciprocal Instructions",
129                                      [FeatureAVX512]>;
130def FeatureCDI      : SubtargetFeature<"avx512cd", "HasCDI", "true",
131                      "Enable AVX-512 Conflict Detection Instructions",
132                                      [FeatureAVX512]>;
133def FeatureVPOPCNTDQ : SubtargetFeature<"avx512vpopcntdq", "HasVPOPCNTDQ",
134                       "true", "Enable AVX-512 Population Count Instructions",
135                                      [FeatureAVX512]>;
136def FeaturePFI      : SubtargetFeature<"avx512pf", "HasPFI", "true",
137                      "Enable AVX-512 PreFetch Instructions",
138                                      [FeatureAVX512]>;
139def FeaturePREFETCHI  : SubtargetFeature<"prefetchi", "HasPREFETCHI",
140                                   "true",
141                                   "Prefetch instruction with T0 or T1 Hint">;
142def FeaturePREFETCHWT1  : SubtargetFeature<"prefetchwt1", "HasPREFETCHWT1",
143                                   "true",
144                                   "Prefetch with Intent to Write and T1 Hint">;
145def FeatureDQI     : SubtargetFeature<"avx512dq", "HasDQI", "true",
146                      "Enable AVX-512 Doubleword and Quadword Instructions",
147                                      [FeatureAVX512]>;
148def FeatureBWI     : SubtargetFeature<"avx512bw", "HasBWI", "true",
149                      "Enable AVX-512 Byte and Word Instructions",
150                                      [FeatureAVX512]>;
151def FeatureVLX     : SubtargetFeature<"avx512vl", "HasVLX", "true",
152                      "Enable AVX-512 Vector Length eXtensions",
153                                      [FeatureAVX512]>;
154def FeatureVBMI     : SubtargetFeature<"avx512vbmi", "HasVBMI", "true",
155                      "Enable AVX-512 Vector Byte Manipulation Instructions",
156                                      [FeatureBWI]>;
157def FeatureVBMI2    : SubtargetFeature<"avx512vbmi2", "HasVBMI2", "true",
158                      "Enable AVX-512 further Vector Byte Manipulation Instructions",
159                                      [FeatureBWI]>;
160def FeatureAVXIFMA    : SubtargetFeature<"avxifma", "HasAVXIFMA", "true",
161                           "Enable AVX-IFMA",
162                           [FeatureAVX2]>;
163def FeatureIFMA     : SubtargetFeature<"avx512ifma", "HasIFMA", "true",
164                      "Enable AVX-512 Integer Fused Multiple-Add",
165                                      [FeatureAVX512]>;
166def FeaturePKU   : SubtargetFeature<"pku", "HasPKU", "true",
167                      "Enable protection keys">;
168def FeatureVNNI    : SubtargetFeature<"avx512vnni", "HasVNNI", "true",
169                          "Enable AVX-512 Vector Neural Network Instructions",
170                                      [FeatureAVX512]>;
171def FeatureAVXVNNI    : SubtargetFeature<"avxvnni", "HasAVXVNNI", "true",
172                           "Support AVX_VNNI encoding",
173                                      [FeatureAVX2]>;
174def FeatureBF16    : SubtargetFeature<"avx512bf16", "HasBF16", "true",
175                           "Support bfloat16 floating point",
176                                      [FeatureBWI]>;
177def FeatureBITALG  : SubtargetFeature<"avx512bitalg", "HasBITALG", "true",
178                       "Enable AVX-512 Bit Algorithms",
179                        [FeatureBWI]>;
180def FeatureVP2INTERSECT  : SubtargetFeature<"avx512vp2intersect",
181                                            "HasVP2INTERSECT", "true",
182                                            "Enable AVX-512 vp2intersect",
183                                            [FeatureAVX512]>;
184// FIXME: FP16 scalar intrinsics use the type v8f16, which is supposed to be
185// guarded under condition hasVLX. So we imply it in FeatureFP16 currently.
186// FIXME: FP16 conversion between f16 and i64 customize type v8i64, which is
187// supposed to be guarded under condition hasDQI. So we imply it in FeatureFP16
188// currently.
189def FeatureFP16    : SubtargetFeature<"avx512fp16", "HasFP16", "true",
190                           "Support 16-bit floating point",
191                           [FeatureBWI, FeatureVLX, FeatureDQI]>;
192def FeatureAVXVNNIINT8  : SubtargetFeature<"avxvnniint8",
193                             "HasAVXVNNIINT8", "true",
194                             "Enable AVX-VNNI-INT8",
195                             [FeatureAVX2]>;
196def FeatureAVXVNNIINT16 : SubtargetFeature<"avxvnniint16",
197                             "HasAVXVNNIINT16", "true",
198                             "Enable AVX-VNNI-INT16",
199                             [FeatureAVX2]>;
200def FeaturePCLMUL  : SubtargetFeature<"pclmul", "HasPCLMUL", "true",
201                         "Enable packed carry-less multiplication instructions",
202                               [FeatureSSE2]>;
203def FeatureGFNI    : SubtargetFeature<"gfni", "HasGFNI", "true",
204                         "Enable Galois Field Arithmetic Instructions",
205                               [FeatureSSE2]>;
206def FeatureVPCLMULQDQ : SubtargetFeature<"vpclmulqdq", "HasVPCLMULQDQ", "true",
207                                         "Enable vpclmulqdq instructions",
208                                         [FeatureAVX, FeaturePCLMUL]>;
209def FeatureFMA4    : SubtargetFeature<"fma4", "HasFMA4", "true",
210                                      "Enable four-operand fused multiple-add",
211                                      [FeatureAVX, FeatureSSE4A]>;
212def FeatureXOP     : SubtargetFeature<"xop", "HasXOP", "true",
213                                      "Enable XOP instructions",
214                                      [FeatureFMA4]>;
215def FeatureSSEUnalignedMem : SubtargetFeature<"sse-unaligned-mem",
216                                          "HasSSEUnalignedMem", "true",
217                      "Allow unaligned memory operands with SSE instructions (this may require setting a configuration bit in the processor)">;
218def FeatureAES     : SubtargetFeature<"aes", "HasAES", "true",
219                                      "Enable AES instructions",
220                                      [FeatureSSE2]>;
221def FeatureVAES    : SubtargetFeature<"vaes", "HasVAES", "true",
222                       "Promote selected AES instructions to AVX512/AVX registers",
223                        [FeatureAVX2, FeatureAES]>;
224def FeatureTBM     : SubtargetFeature<"tbm", "HasTBM", "true",
225                                      "Enable TBM instructions">;
226def FeatureLWP     : SubtargetFeature<"lwp", "HasLWP", "true",
227                                      "Enable LWP instructions">;
228def FeatureMOVBE   : SubtargetFeature<"movbe", "HasMOVBE", "true",
229                                      "Support MOVBE instruction">;
230def FeatureRDRAND  : SubtargetFeature<"rdrnd", "HasRDRAND", "true",
231                                      "Support RDRAND instruction">;
232def FeatureFSGSBase : SubtargetFeature<"fsgsbase", "HasFSGSBase", "true",
233                                       "Support FS/GS Base instructions">;
234def FeatureLZCNT   : SubtargetFeature<"lzcnt", "HasLZCNT", "true",
235                                      "Support LZCNT instruction">;
236def FeatureBMI     : SubtargetFeature<"bmi", "HasBMI", "true",
237                                      "Support BMI instructions">;
238def FeatureBMI2    : SubtargetFeature<"bmi2", "HasBMI2", "true",
239                                      "Support BMI2 instructions">;
240def FeatureRTM     : SubtargetFeature<"rtm", "HasRTM", "true",
241                                      "Support RTM instructions">;
242def FeatureADX     : SubtargetFeature<"adx", "HasADX", "true",
243                                      "Support ADX instructions">;
244def FeatureSHA     : SubtargetFeature<"sha", "HasSHA", "true",
245                                      "Enable SHA instructions",
246                                      [FeatureSSE2]>;
247def FeatureSHA512  : SubtargetFeature<"sha512", "HasSHA512", "true",
248                                      "Support SHA512 instructions",
249                                      [FeatureAVX2]>;
250// Processor supports CET SHSTK - Control-Flow Enforcement Technology
251// using Shadow Stack
252def FeatureSHSTK   : SubtargetFeature<"shstk", "HasSHSTK", "true",
253                       "Support CET Shadow-Stack instructions">;
254def FeatureSM3     : SubtargetFeature<"sm3", "HasSM3", "true",
255                                      "Support SM3 instructions",
256                                      [FeatureAVX]>;
257def FeatureSM4     : SubtargetFeature<"sm4", "HasSM4", "true",
258                                      "Support SM4 instructions",
259                                      [FeatureAVX2]>;
260def FeaturePRFCHW  : SubtargetFeature<"prfchw", "HasPRFCHW", "true",
261                                      "Support PRFCHW instructions">;
262def FeatureRDSEED  : SubtargetFeature<"rdseed", "HasRDSEED", "true",
263                                      "Support RDSEED instruction">;
264def FeatureLAHFSAHF64 : SubtargetFeature<"sahf", "HasLAHFSAHF64", "true",
265                           "Support LAHF and SAHF instructions in 64-bit mode">;
266def FeatureMWAITX  : SubtargetFeature<"mwaitx", "HasMWAITX", "true",
267                                      "Enable MONITORX/MWAITX timer functionality">;
268def FeatureCLZERO  : SubtargetFeature<"clzero", "HasCLZERO", "true",
269                                      "Enable Cache Line Zero">;
270def FeatureCLDEMOTE  : SubtargetFeature<"cldemote", "HasCLDEMOTE", "true",
271                                      "Enable Cache Line Demote">;
272def FeaturePTWRITE  : SubtargetFeature<"ptwrite", "HasPTWRITE", "true",
273                                      "Support ptwrite instruction">;
274def FeatureAMXTILE     : SubtargetFeature<"amx-tile", "HasAMXTILE", "true",
275                                      "Support AMX-TILE instructions">;
276def FeatureAMXINT8     : SubtargetFeature<"amx-int8", "HasAMXINT8", "true",
277                                      "Support AMX-INT8 instructions",
278                                      [FeatureAMXTILE]>;
279def FeatureAMXBF16     : SubtargetFeature<"amx-bf16", "HasAMXBF16", "true",
280                                      "Support AMX-BF16 instructions",
281                                      [FeatureAMXTILE]>;
282def FeatureAMXFP16     : SubtargetFeature<"amx-fp16", "HasAMXFP16", "true",
283                                      "Support AMX amx-fp16 instructions",
284                                      [FeatureAMXTILE]>;
285def FeatureAMXCOMPLEX : SubtargetFeature<"amx-complex", "HasAMXCOMPLEX", "true",
286                                         "Support AMX-COMPLEX instructions",
287                                         [FeatureAMXTILE]>;
288def FeatureCMPCCXADD : SubtargetFeature<"cmpccxadd", "HasCMPCCXADD", "true",
289                                        "Support CMPCCXADD instructions">;
290def FeatureRAOINT : SubtargetFeature<"raoint", "HasRAOINT", "true",
291                                     "Support RAO-INT instructions",
292                                     []>;
293def FeatureAVXNECONVERT : SubtargetFeature<"avxneconvert", "HasAVXNECONVERT", "true",
294                                           "Support AVX-NE-CONVERT instructions",
295                                           [FeatureAVX2]>;
296def FeatureINVPCID : SubtargetFeature<"invpcid", "HasINVPCID", "true",
297                                      "Invalidate Process-Context Identifier">;
298def FeatureSGX     : SubtargetFeature<"sgx", "HasSGX", "true",
299                                      "Enable Software Guard Extensions">;
300def FeatureCLFLUSHOPT : SubtargetFeature<"clflushopt", "HasCLFLUSHOPT", "true",
301                                      "Flush A Cache Line Optimized">;
302def FeatureCLWB    : SubtargetFeature<"clwb", "HasCLWB", "true",
303                                      "Cache Line Write Back">;
304def FeatureWBNOINVD    : SubtargetFeature<"wbnoinvd", "HasWBNOINVD", "true",
305                                      "Write Back No Invalidate">;
306def FeatureRDPID : SubtargetFeature<"rdpid", "HasRDPID", "true",
307                                    "Support RDPID instructions">;
308def FeatureRDPRU : SubtargetFeature<"rdpru", "HasRDPRU", "true",
309                                    "Support RDPRU instructions">;
310def FeatureWAITPKG  : SubtargetFeature<"waitpkg", "HasWAITPKG", "true",
311                                      "Wait and pause enhancements">;
312def FeatureENQCMD : SubtargetFeature<"enqcmd", "HasENQCMD", "true",
313                                     "Has ENQCMD instructions">;
314def FeatureKL  : SubtargetFeature<"kl", "HasKL", "true",
315                                  "Support Key Locker kl Instructions",
316                                  [FeatureSSE2]>;
317def FeatureWIDEKL  : SubtargetFeature<"widekl", "HasWIDEKL", "true",
318                                      "Support Key Locker wide Instructions",
319                                      [FeatureKL]>;
320def FeatureHRESET : SubtargetFeature<"hreset", "HasHRESET", "true",
321                                      "Has hreset instruction">;
322def FeatureSERIALIZE : SubtargetFeature<"serialize", "HasSERIALIZE", "true",
323                                        "Has serialize instruction">;
324def FeatureTSXLDTRK : SubtargetFeature<"tsxldtrk", "HasTSXLDTRK", "true",
325                                       "Support TSXLDTRK instructions">;
326def FeatureUINTR : SubtargetFeature<"uintr", "HasUINTR", "true",
327                                    "Has UINTR Instructions">;
328def FeatureUSERMSR : SubtargetFeature<"usermsr", "HasUSERMSR", "true",
329                                      "Support USERMSR instructions">;
330def FeaturePCONFIG : SubtargetFeature<"pconfig", "HasPCONFIG", "true",
331                                      "platform configuration instruction">;
332def FeatureMOVDIRI  : SubtargetFeature<"movdiri", "HasMOVDIRI", "true",
333                                       "Support movdiri instruction (direct store integer)">;
334def FeatureMOVDIR64B : SubtargetFeature<"movdir64b", "HasMOVDIR64B", "true",
335                                        "Support movdir64b instruction (direct store 64 bytes)">;
336def FeatureAVX10_1 : SubtargetFeature<"avx10.1-256", "HasAVX10_1", "true",
337                                      "Support AVX10.1 up to 256-bit instruction",
338                                      [FeatureCDI, FeatureVBMI, FeatureIFMA, FeatureVNNI,
339                                       FeatureBF16, FeatureVPOPCNTDQ, FeatureVBMI2, FeatureBITALG,
340                                       FeatureVAES, FeatureVPCLMULQDQ, FeatureFP16]>;
341def FeatureAVX10_1_512 : SubtargetFeature<"avx10.1-512", "HasAVX10_1_512", "true",
342                                          "Support AVX10.1 up to 512-bit instruction",
343                                          [FeatureAVX10_1, FeatureEVEX512]>;
344def FeatureEGPR : SubtargetFeature<"egpr", "HasEGPR", "true",
345                                   "Support extended general purpose register">;
346def FeaturePush2Pop2 : SubtargetFeature<"push2pop2", "HasPush2Pop2", "true",
347                                        "Support PUSH2/POP2 instructions">;
348def FeaturePPX : SubtargetFeature<"ppx", "HasPPX", "true",
349                                  "Support Push-Pop Acceleration">;
350def FeatureNDD : SubtargetFeature<"ndd", "HasNDD", "true",
351                                  "Support non-destructive destination">;
352def FeatureCCMP : SubtargetFeature<"ccmp", "HasCCMP", "true",
353                                   "Support conditional cmp & test instructions">;
354def FeatureCF : SubtargetFeature<"cf", "HasCF", "true",
355                                 "Support conditional faulting">;
356
357// Ivy Bridge and newer processors have enhanced REP MOVSB and STOSB (aka
358// "string operations"). See "REP String Enhancement" in the Intel Software
359// Development Manual. This feature essentially means that REP MOVSB will copy
360// using the largest available size instead of copying bytes one by one, making
361// it at least as fast as REPMOVS{W,D,Q}.
362def FeatureERMSB
363    : SubtargetFeature<
364          "ermsb", "HasERMSB", "true",
365          "REP MOVS/STOS are fast">;
366
367// Icelake and newer processors have Fast Short REP MOV.
368def FeatureFSRM
369    : SubtargetFeature<
370          "fsrm", "HasFSRM", "true",
371          "REP MOVSB of short lengths is faster">;
372
373def FeatureSoftFloat
374    : SubtargetFeature<"soft-float", "UseSoftFloat", "true",
375                       "Use software floating point features">;
376
377//===----------------------------------------------------------------------===//
378// X86 Subtarget Security Mitigation features
379//===----------------------------------------------------------------------===//
380
381// Lower indirect calls using a special construct called a `retpoline` to
382// mitigate potential Spectre v2 attacks against them.
383def FeatureRetpolineIndirectCalls
384    : SubtargetFeature<
385          "retpoline-indirect-calls", "UseRetpolineIndirectCalls", "true",
386          "Remove speculation of indirect calls from the generated code">;
387
388// Lower indirect branches and switches either using conditional branch trees
389// or using a special construct called a `retpoline` to mitigate potential
390// Spectre v2 attacks against them.
391def FeatureRetpolineIndirectBranches
392    : SubtargetFeature<
393          "retpoline-indirect-branches", "UseRetpolineIndirectBranches", "true",
394          "Remove speculation of indirect branches from the generated code">;
395
396// Deprecated umbrella feature for enabling both `retpoline-indirect-calls` and
397// `retpoline-indirect-branches` above.
398def FeatureRetpoline
399    : SubtargetFeature<"retpoline", "DeprecatedUseRetpoline", "true",
400                       "Remove speculation of indirect branches from the "
401                       "generated code, either by avoiding them entirely or "
402                       "lowering them with a speculation blocking construct",
403                       [FeatureRetpolineIndirectCalls,
404                        FeatureRetpolineIndirectBranches]>;
405
406// Rely on external thunks for the emitted retpoline calls. This allows users
407// to provide their own custom thunk definitions in highly specialized
408// environments such as a kernel that does boot-time hot patching.
409def FeatureRetpolineExternalThunk
410    : SubtargetFeature<
411          "retpoline-external-thunk", "UseRetpolineExternalThunk", "true",
412          "When lowering an indirect call or branch using a `retpoline`, rely "
413          "on the specified user provided thunk rather than emitting one "
414          "ourselves. Only has effect when combined with some other retpoline "
415          "feature", [FeatureRetpolineIndirectCalls]>;
416
417// Mitigate LVI attacks against indirect calls/branches and call returns
418def FeatureLVIControlFlowIntegrity
419    : SubtargetFeature<
420          "lvi-cfi", "UseLVIControlFlowIntegrity", "true",
421          "Prevent indirect calls/branches from using a memory operand, and "
422          "precede all indirect calls/branches from a register with an "
423          "LFENCE instruction to serialize control flow. Also decompose RET "
424          "instructions into a POP+LFENCE+JMP sequence.">;
425
426// Enable SESES to mitigate speculative execution attacks
427def FeatureSpeculativeExecutionSideEffectSuppression
428    : SubtargetFeature<
429          "seses", "UseSpeculativeExecutionSideEffectSuppression", "true",
430          "Prevent speculative execution side channel timing attacks by "
431          "inserting a speculation barrier before memory reads, memory writes, "
432          "and conditional branches. Implies LVI Control Flow integrity.",
433          [FeatureLVIControlFlowIntegrity]>;
434
435// Mitigate LVI attacks against data loads
436def FeatureLVILoadHardening
437    : SubtargetFeature<
438          "lvi-load-hardening", "UseLVILoadHardening", "true",
439          "Insert LFENCE instructions to prevent data speculatively injected "
440          "into loads from being used maliciously.">;
441
442def FeatureTaggedGlobals
443    : SubtargetFeature<
444          "tagged-globals", "AllowTaggedGlobals", "true",
445          "Use an instruction sequence for taking the address of a global "
446          "that allows a memory tag in the upper address bits.">;
447
448// Control codegen mitigation against Straight Line Speculation vulnerability.
449def FeatureHardenSlsRet
450    : SubtargetFeature<
451          "harden-sls-ret", "HardenSlsRet", "true",
452          "Harden against straight line speculation across RET instructions.">;
453
454def FeatureHardenSlsIJmp
455    : SubtargetFeature<
456          "harden-sls-ijmp", "HardenSlsIJmp", "true",
457          "Harden against straight line speculation across indirect JMP instructions.">;
458
459//===----------------------------------------------------------------------===//
460// X86 Subtarget Tuning features
461//===----------------------------------------------------------------------===//
462def TuningPreferMovmskOverVTest : SubtargetFeature<"prefer-movmsk-over-vtest",
463                                       "PreferMovmskOverVTest", "true",
464                                       "Prefer movmsk over vtest instruction">;
465
466def TuningSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true",
467                                       "SHLD instruction is slow">;
468
469def TuningSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true",
470                                        "PMULLD instruction is slow (compared to PMULLW/PMULHW and PMULUDQ)">;
471
472def TuningSlowPMADDWD : SubtargetFeature<"slow-pmaddwd", "IsPMADDWDSlow",
473                                          "true",
474                                          "PMADDWD is slower than PMULLD">;
475
476// FIXME: This should not apply to CPUs that do not have SSE.
477def TuningSlowUAMem16 : SubtargetFeature<"slow-unaligned-mem-16",
478                                "IsUnalignedMem16Slow", "true",
479                                "Slow unaligned 16-byte memory access">;
480
481def TuningSlowUAMem32 : SubtargetFeature<"slow-unaligned-mem-32",
482                                "IsUnalignedMem32Slow", "true",
483                                "Slow unaligned 32-byte memory access">;
484
485def TuningLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
486                                     "Use LEA for adjusting the stack pointer (this is an optimization for Intel Atom processors)">;
487
488// True if 8-bit divisions are significantly faster than
489// 32-bit divisions and should be used when possible.
490def TuningSlowDivide32 : SubtargetFeature<"idivl-to-divb",
491                                     "HasSlowDivide32", "true",
492                                     "Use 8-bit divide for positive values less than 256">;
493
494// True if 32-bit divides are significantly faster than
495// 64-bit divisions and should be used when possible.
496def TuningSlowDivide64 : SubtargetFeature<"idivq-to-divl",
497                                     "HasSlowDivide64", "true",
498                                     "Use 32-bit divide for positive values less than 2^32">;
499
500def TuningPadShortFunctions : SubtargetFeature<"pad-short-functions",
501                                     "PadShortFunctions", "true",
502                                     "Pad short functions (to prevent a stall when returning too early)">;
503
504// On some processors, instructions that implicitly take two memory operands are
505// slow. In practice, this means that CALL, PUSH, and POP with memory operands
506// should be avoided in favor of a MOV + register CALL/PUSH/POP.
507def TuningSlowTwoMemOps : SubtargetFeature<"slow-two-mem-ops",
508                                     "SlowTwoMemOps", "true",
509                                     "Two memory operand instructions are slow">;
510
511// True if the LEA instruction inputs have to be ready at address generation
512// (AG) time.
513def TuningLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LeaUsesAG", "true",
514                                   "LEA instruction needs inputs at AG stage">;
515
516def TuningSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true",
517                                   "LEA instruction with certain arguments is slow">;
518
519// True if the LEA instruction has all three source operands: base, index,
520// and offset or if the LEA instruction uses base and index registers where
521// the base is EBP, RBP,or R13
522def TuningSlow3OpsLEA : SubtargetFeature<"slow-3ops-lea", "Slow3OpsLEA", "true",
523                                   "LEA instruction with 3 ops or certain registers is slow">;
524
525// True if INC and DEC instructions are slow when writing to flags
526def TuningSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true",
527                                   "INC and DEC instructions are slower than ADD and SUB">;
528
529def TuningPOPCNTFalseDeps : SubtargetFeature<"false-deps-popcnt",
530                                     "HasPOPCNTFalseDeps", "true",
531                                     "POPCNT has a false dependency on dest register">;
532
533def TuningLZCNTFalseDeps : SubtargetFeature<"false-deps-lzcnt-tzcnt",
534                                     "HasLZCNTFalseDeps", "true",
535                                     "LZCNT/TZCNT have a false dependency on dest register">;
536
537def TuningMULCFalseDeps : SubtargetFeature<"false-deps-mulc",
538                               "HasMULCFalseDeps", "true",
539                               "VF[C]MULCPH/SH has a false dependency on dest register">;
540
541def TuningPERMFalseDeps : SubtargetFeature<"false-deps-perm",
542                               "HasPERMFalseDeps", "true",
543                               "VPERMD/Q/PS/PD has a false dependency on dest register">;
544
545def TuningRANGEFalseDeps : SubtargetFeature<"false-deps-range",
546                               "HasRANGEFalseDeps", "true",
547                               "VRANGEPD/PS/SD/SS has a false dependency on dest register">;
548
549def TuningGETMANTFalseDeps : SubtargetFeature<"false-deps-getmant",
550                               "HasGETMANTFalseDeps", "true",
551                               "VGETMANTSS/SD/SH and VGETMANDPS/PD(memory version) has a"
552                               " false dependency on dest register">;
553
554def TuningMULLQFalseDeps : SubtargetFeature<"false-deps-mullq",
555                               "HasMULLQFalseDeps", "true",
556                               "VPMULLQ has a false dependency on dest register">;
557
558def TuningSBBDepBreaking : SubtargetFeature<"sbb-dep-breaking",
559                                     "HasSBBDepBreaking", "true",
560                                     "SBB with same register has no source dependency">;
561
562// On recent X86 (port bound) processors, its preferable to combine to a single shuffle
563// using a variable mask over multiple fixed shuffles.
564def TuningFastVariableCrossLaneShuffle
565    : SubtargetFeature<"fast-variable-crosslane-shuffle",
566                       "HasFastVariableCrossLaneShuffle",
567                       "true", "Cross-lane shuffles with variable masks are fast">;
568def TuningFastVariablePerLaneShuffle
569    : SubtargetFeature<"fast-variable-perlane-shuffle",
570                       "HasFastVariablePerLaneShuffle",
571                       "true", "Per-lane shuffles with variable masks are fast">;
572
573// Goldmont / Tremont (atom in general) has no bypass delay
574def TuningNoDomainDelay : SubtargetFeature<"no-bypass-delay",
575                                   "NoDomainDelay","true",
576                                   "Has no bypass delay when using the 'wrong' domain">;
577
578// Many processors (Nehalem+ on Intel) have no bypass delay when
579// using the wrong mov type.
580def TuningNoDomainDelayMov : SubtargetFeature<"no-bypass-delay-mov",
581                                   "NoDomainDelayMov","true",
582                                   "Has no bypass delay when using the 'wrong' mov type">;
583
584// Newer processors (Skylake+ on Intel) have no bypass delay when
585// using the wrong blend type.
586def TuningNoDomainDelayBlend : SubtargetFeature<"no-bypass-delay-blend",
587                                   "NoDomainDelayBlend","true",
588                                   "Has no bypass delay when using the 'wrong' blend type">;
589
590// Newer processors (Haswell+ on Intel) have no bypass delay when
591// using the wrong shuffle type.
592def TuningNoDomainDelayShuffle : SubtargetFeature<"no-bypass-delay-shuffle",
593                                   "NoDomainDelayShuffle","true",
594                                   "Has no bypass delay when using the 'wrong' shuffle type">;
595
596// Prefer lowering shuffles on AVX512 targets (e.g. Skylake Server) to
597// imm shifts/rotate if they can use more ports than regular shuffles.
598def TuningPreferShiftShuffle : SubtargetFeature<"faster-shift-than-shuffle",
599                                   "PreferLowerShuffleAsShift", "true",
600                                   "Shifts are faster (or as fast) as shuffle">;
601
602def TuningFastImmVectorShift : SubtargetFeature<"tuning-fast-imm-vector-shift",
603                                   "FastImmVectorShift", "true",
604                                   "Vector shifts are fast (2/cycle) as opposed to slow (1/cycle)">;
605
606// On some X86 processors, a vzeroupper instruction should be inserted after
607// using ymm/zmm registers before executing code that may use SSE instructions.
608def TuningInsertVZEROUPPER
609    : SubtargetFeature<"vzeroupper",
610                       "InsertVZEROUPPER",
611                       "true", "Should insert vzeroupper instructions">;
612
613// TuningFastScalarFSQRT should be enabled if scalar FSQRT has shorter latency
614// than the corresponding NR code. TuningFastVectorFSQRT should be enabled if
615// vector FSQRT has higher throughput than the corresponding NR code.
616// The idea is that throughput bound code is likely to be vectorized, so for
617// vectorized code we should care about the throughput of SQRT operations.
618// But if the code is scalar that probably means that the code has some kind of
619// dependency and we should care more about reducing the latency.
620
621// True if hardware SQRTSS instruction is at least as fast (latency) as
622// RSQRTSS followed by a Newton-Raphson iteration.
623def TuningFastScalarFSQRT
624    : SubtargetFeature<"fast-scalar-fsqrt", "HasFastScalarFSQRT",
625                       "true", "Scalar SQRT is fast (disable Newton-Raphson)">;
626// True if hardware SQRTPS/VSQRTPS instructions are at least as fast
627// (throughput) as RSQRTPS/VRSQRTPS followed by a Newton-Raphson iteration.
628def TuningFastVectorFSQRT
629    : SubtargetFeature<"fast-vector-fsqrt", "HasFastVectorFSQRT",
630                       "true", "Vector SQRT is fast (disable Newton-Raphson)">;
631
632// If lzcnt has equivalent latency/throughput to most simple integer ops, it can
633// be used to replace test/set sequences.
634def TuningFastLZCNT
635    : SubtargetFeature<
636          "fast-lzcnt", "HasFastLZCNT", "true",
637          "LZCNT instructions are as fast as most simple integer ops">;
638
639// If the target can efficiently decode NOPs upto 7-bytes in length.
640def TuningFast7ByteNOP
641    : SubtargetFeature<
642          "fast-7bytenop", "HasFast7ByteNOP", "true",
643          "Target can quickly decode up to 7 byte NOPs">;
644
645// If the target can efficiently decode NOPs upto 11-bytes in length.
646def TuningFast11ByteNOP
647    : SubtargetFeature<
648          "fast-11bytenop", "HasFast11ByteNOP", "true",
649          "Target can quickly decode up to 11 byte NOPs">;
650
651// If the target can efficiently decode NOPs upto 15-bytes in length.
652def TuningFast15ByteNOP
653    : SubtargetFeature<
654          "fast-15bytenop", "HasFast15ByteNOP", "true",
655          "Target can quickly decode up to 15 byte NOPs">;
656
657// Sandy Bridge and newer processors can use SHLD with the same source on both
658// inputs to implement rotate to avoid the partial flag update of the normal
659// rotate instructions.
660def TuningFastSHLDRotate
661    : SubtargetFeature<
662          "fast-shld-rotate", "HasFastSHLDRotate", "true",
663          "SHLD can be used as a faster rotate">;
664
665// Bulldozer and newer processors can merge CMP/TEST (but not other
666// instructions) with conditional branches.
667def TuningBranchFusion
668    : SubtargetFeature<"branchfusion", "HasBranchFusion", "true",
669                 "CMP/TEST can be fused with conditional branches">;
670
671// Sandy Bridge and newer processors have many instructions that can be
672// fused with conditional branches and pass through the CPU as a single
673// operation.
674def TuningMacroFusion
675    : SubtargetFeature<"macrofusion", "HasMacroFusion", "true",
676                 "Various instructions can be fused with conditional branches">;
677
678// Gather is available since Haswell (AVX2 set). So technically, we can
679// generate Gathers on all AVX2 processors. But the overhead on HSW is high.
680// Skylake Client processor has faster Gathers than HSW and performance is
681// similar to Skylake Server (AVX-512).
682def TuningFastGather
683    : SubtargetFeature<"fast-gather", "HasFastGather", "true",
684                       "Indicates if gather is reasonably fast (this is true for Skylake client and all AVX-512 CPUs)">;
685
686def TuningPreferNoGather
687    : SubtargetFeature<"prefer-no-gather", "PreferGather", "false",
688                       "Prefer no gather instructions">;
689def TuningPreferNoScatter
690    : SubtargetFeature<"prefer-no-scatter", "PreferScatter", "false",
691                       "Prefer no scatter instructions">;
692
693def TuningPrefer128Bit
694    : SubtargetFeature<"prefer-128-bit", "Prefer128Bit", "true",
695                       "Prefer 128-bit AVX instructions">;
696
697def TuningPrefer256Bit
698    : SubtargetFeature<"prefer-256-bit", "Prefer256Bit", "true",
699                       "Prefer 256-bit AVX instructions">;
700
701def TuningAllowLight256Bit
702    : SubtargetFeature<"allow-light-256-bit", "AllowLight256Bit", "true",
703                       "Enable generation of 256-bit load/stores even if we prefer 128-bit">;
704
705def TuningPreferMaskRegisters
706    : SubtargetFeature<"prefer-mask-registers", "PreferMaskRegisters", "true",
707                       "Prefer AVX512 mask registers over PTEST/MOVMSK">;
708
709def TuningFastBEXTR : SubtargetFeature<"fast-bextr", "HasFastBEXTR", "true",
710          "Indicates that the BEXTR instruction is implemented as a single uop "
711          "with good throughput">;
712
713// Combine vector math operations with shuffles into horizontal math
714// instructions if a CPU implements horizontal operations (introduced with
715// SSE3) with better latency/throughput than the alternative sequence.
716def TuningFastHorizontalOps
717    : SubtargetFeature<
718        "fast-hops", "HasFastHorizontalOps", "true",
719        "Prefer horizontal vector math instructions (haddp, phsub, etc.) over "
720        "normal vector instructions with shuffles">;
721
722def TuningFastScalarShiftMasks
723    : SubtargetFeature<
724        "fast-scalar-shift-masks", "HasFastScalarShiftMasks", "true",
725        "Prefer a left/right scalar logical shift pair over a shift+and pair">;
726
727def TuningFastVectorShiftMasks
728    : SubtargetFeature<
729        "fast-vector-shift-masks", "HasFastVectorShiftMasks", "true",
730        "Prefer a left/right vector logical shift pair over a shift+and pair">;
731
732def TuningFastMOVBE
733    : SubtargetFeature<"fast-movbe", "HasFastMOVBE", "true",
734    "Prefer a movbe over a single-use load + bswap / single-use bswap + store">;
735
736def TuningUseSLMArithCosts
737    : SubtargetFeature<"use-slm-arith-costs", "UseSLMArithCosts", "true",
738        "Use Silvermont specific arithmetic costs">;
739
740def TuningUseGLMDivSqrtCosts
741    : SubtargetFeature<"use-glm-div-sqrt-costs", "UseGLMDivSqrtCosts", "true",
742        "Use Goldmont specific floating point div/sqrt costs">;
743
744//===----------------------------------------------------------------------===//
745// X86 CPU Families
746// TODO: Remove these - use general tuning features to determine codegen.
747//===----------------------------------------------------------------------===//
748
749// Bonnell
750def ProcIntelAtom : SubtargetFeature<"", "IsAtom", "true", "Is Intel Atom processor">;
751
752//===----------------------------------------------------------------------===//
753// Register File Description
754//===----------------------------------------------------------------------===//
755
756include "X86RegisterInfo.td"
757include "X86RegisterBanks.td"
758
759//===----------------------------------------------------------------------===//
760// Instruction Descriptions
761//===----------------------------------------------------------------------===//
762
763include "X86Schedule.td"
764include "X86InstrInfo.td"
765include "X86SchedPredicates.td"
766
767def X86InstrInfo : InstrInfo;
768
769//===----------------------------------------------------------------------===//
770// X86 Scheduler Models
771//===----------------------------------------------------------------------===//
772
773include "X86ScheduleAtom.td"
774include "X86SchedSandyBridge.td"
775include "X86SchedHaswell.td"
776include "X86SchedBroadwell.td"
777include "X86ScheduleSLM.td"
778include "X86ScheduleZnver1.td"
779include "X86ScheduleZnver2.td"
780include "X86ScheduleZnver3.td"
781include "X86ScheduleZnver4.td"
782include "X86ScheduleBdVer2.td"
783include "X86ScheduleBtVer2.td"
784include "X86SchedSkylakeClient.td"
785include "X86SchedSkylakeServer.td"
786include "X86SchedIceLake.td"
787include "X86SchedAlderlakeP.td"
788include "X86SchedSapphireRapids.td"
789
790//===----------------------------------------------------------------------===//
791// X86 Processor Feature Lists
792//===----------------------------------------------------------------------===//
793
794def ProcessorFeatures {
795  // x86-64 micro-architecture levels: x86-64 and x86-64-v[234]
796  list<SubtargetFeature> X86_64V1Features = [
797    FeatureX87, FeatureCX8, FeatureCMOV, FeatureMMX, FeatureSSE2,
798    FeatureFXSR, FeatureNOPL, FeatureX86_64,
799  ];
800  list<SubtargetFeature> X86_64V1Tuning = [
801    TuningMacroFusion,
802    TuningSlow3OpsLEA,
803    TuningSlowDivide64,
804    TuningSlowIncDec,
805    TuningInsertVZEROUPPER
806  ];
807
808  list<SubtargetFeature> X86_64V2Features = !listconcat(X86_64V1Features, [
809    FeatureCX16, FeatureLAHFSAHF64, FeatureCRC32, FeaturePOPCNT,
810    FeatureSSE42
811  ]);
812  list<SubtargetFeature> X86_64V2Tuning = [
813    TuningMacroFusion,
814    TuningSlow3OpsLEA,
815    TuningSlowDivide64,
816    TuningSlowUAMem32,
817    TuningFastScalarFSQRT,
818    TuningFastSHLDRotate,
819    TuningFast15ByteNOP,
820    TuningPOPCNTFalseDeps,
821    TuningInsertVZEROUPPER
822  ];
823
824  list<SubtargetFeature> X86_64V3Features = !listconcat(X86_64V2Features, [
825    FeatureAVX2, FeatureBMI, FeatureBMI2, FeatureF16C, FeatureFMA, FeatureLZCNT,
826    FeatureMOVBE, FeatureXSAVE
827  ]);
828  list<SubtargetFeature> X86_64V3Tuning = [
829    TuningMacroFusion,
830    TuningSlow3OpsLEA,
831    TuningSlowDivide64,
832    TuningFastScalarFSQRT,
833    TuningFastSHLDRotate,
834    TuningFast15ByteNOP,
835    TuningFastVariableCrossLaneShuffle,
836    TuningFastVariablePerLaneShuffle,
837    TuningPOPCNTFalseDeps,
838    TuningLZCNTFalseDeps,
839    TuningInsertVZEROUPPER,
840    TuningAllowLight256Bit
841  ];
842
843  list<SubtargetFeature> X86_64V4Features = !listconcat(X86_64V3Features, [
844    FeatureEVEX512,
845    FeatureBWI,
846    FeatureCDI,
847    FeatureDQI,
848    FeatureVLX,
849  ]);
850  list<SubtargetFeature> X86_64V4Tuning = [
851    TuningMacroFusion,
852    TuningSlow3OpsLEA,
853    TuningSlowDivide64,
854    TuningFastScalarFSQRT,
855    TuningFastVectorFSQRT,
856    TuningFastSHLDRotate,
857    TuningFast15ByteNOP,
858    TuningFastVariableCrossLaneShuffle,
859    TuningFastVariablePerLaneShuffle,
860    TuningPrefer256Bit,
861    TuningFastGather,
862    TuningPOPCNTFalseDeps,
863    TuningInsertVZEROUPPER,
864    TuningAllowLight256Bit
865  ];
866
867  // Nehalem
868  list<SubtargetFeature> NHMFeatures = X86_64V2Features;
869  list<SubtargetFeature> NHMTuning = [TuningMacroFusion,
870                                      TuningInsertVZEROUPPER,
871                                      TuningNoDomainDelayMov];
872
873  // Westmere
874  list<SubtargetFeature> WSMAdditionalFeatures = [FeaturePCLMUL];
875  list<SubtargetFeature> WSMTuning = NHMTuning;
876  list<SubtargetFeature> WSMFeatures =
877    !listconcat(NHMFeatures, WSMAdditionalFeatures);
878
879  // Sandybridge
880  list<SubtargetFeature> SNBAdditionalFeatures = [FeatureAVX,
881                                                  FeatureXSAVE,
882                                                  FeatureXSAVEOPT];
883  list<SubtargetFeature> SNBTuning = [TuningMacroFusion,
884                                      TuningSlow3OpsLEA,
885                                      TuningSlowDivide64,
886                                      TuningSlowUAMem32,
887                                      TuningFastScalarFSQRT,
888                                      TuningFastSHLDRotate,
889                                      TuningFast15ByteNOP,
890                                      TuningPOPCNTFalseDeps,
891                                      TuningInsertVZEROUPPER,
892                                      TuningNoDomainDelayMov];
893  list<SubtargetFeature> SNBFeatures =
894    !listconcat(WSMFeatures, SNBAdditionalFeatures);
895
896  // Ivybridge
897  list<SubtargetFeature> IVBAdditionalFeatures = [FeatureRDRAND,
898                                                  FeatureF16C,
899                                                  FeatureFSGSBase];
900  list<SubtargetFeature> IVBTuning = SNBTuning;
901  list<SubtargetFeature> IVBFeatures =
902    !listconcat(SNBFeatures, IVBAdditionalFeatures);
903
904  // Haswell
905  list<SubtargetFeature> HSWAdditionalFeatures = [FeatureAVX2,
906                                                  FeatureBMI,
907                                                  FeatureBMI2,
908                                                  FeatureERMSB,
909                                                  FeatureFMA,
910                                                  FeatureINVPCID,
911                                                  FeatureLZCNT,
912                                                  FeatureMOVBE];
913  list<SubtargetFeature> HSWTuning = [TuningMacroFusion,
914                                      TuningSlow3OpsLEA,
915                                      TuningSlowDivide64,
916                                      TuningFastScalarFSQRT,
917                                      TuningFastSHLDRotate,
918                                      TuningFast15ByteNOP,
919                                      TuningFastVariableCrossLaneShuffle,
920                                      TuningFastVariablePerLaneShuffle,
921                                      TuningPOPCNTFalseDeps,
922                                      TuningLZCNTFalseDeps,
923                                      TuningInsertVZEROUPPER,
924                                      TuningAllowLight256Bit,
925                                      TuningNoDomainDelayMov,
926                                      TuningNoDomainDelayShuffle];
927  list<SubtargetFeature> HSWFeatures =
928    !listconcat(IVBFeatures, HSWAdditionalFeatures);
929
930  // Broadwell
931  list<SubtargetFeature> BDWAdditionalFeatures = [FeatureADX,
932                                                  FeatureRDSEED,
933                                                  FeaturePRFCHW];
934  list<SubtargetFeature> BDWTuning = HSWTuning;
935  list<SubtargetFeature> BDWFeatures =
936    !listconcat(HSWFeatures, BDWAdditionalFeatures);
937
938  // Skylake
939  list<SubtargetFeature> SKLAdditionalFeatures = [FeatureAES,
940                                                  FeatureXSAVEC,
941                                                  FeatureXSAVES,
942                                                  FeatureCLFLUSHOPT];
943  list<SubtargetFeature> SKLTuning = [TuningFastGather,
944                                      TuningMacroFusion,
945                                      TuningSlow3OpsLEA,
946                                      TuningSlowDivide64,
947                                      TuningFastScalarFSQRT,
948                                      TuningFastVectorFSQRT,
949                                      TuningFastSHLDRotate,
950                                      TuningFast15ByteNOP,
951                                      TuningFastVariableCrossLaneShuffle,
952                                      TuningFastVariablePerLaneShuffle,
953                                      TuningPOPCNTFalseDeps,
954                                      TuningInsertVZEROUPPER,
955                                      TuningAllowLight256Bit,
956                                      TuningNoDomainDelayMov,
957                                      TuningNoDomainDelayShuffle,
958                                      TuningNoDomainDelayBlend];
959  list<SubtargetFeature> SKLFeatures =
960    !listconcat(BDWFeatures, SKLAdditionalFeatures);
961
962  // Skylake-AVX512
963  list<SubtargetFeature> SKXAdditionalFeatures = [FeatureAES,
964                                                  FeatureXSAVEC,
965                                                  FeatureXSAVES,
966                                                  FeatureCLFLUSHOPT,
967                                                  FeatureAVX512,
968                                                  FeatureEVEX512,
969                                                  FeatureCDI,
970                                                  FeatureDQI,
971                                                  FeatureBWI,
972                                                  FeatureVLX,
973                                                  FeaturePKU,
974                                                  FeatureCLWB];
975  list<SubtargetFeature> SKXTuning = [TuningFastGather,
976                                      TuningMacroFusion,
977                                      TuningSlow3OpsLEA,
978                                      TuningSlowDivide64,
979                                      TuningFastScalarFSQRT,
980                                      TuningFastVectorFSQRT,
981                                      TuningFastSHLDRotate,
982                                      TuningFast15ByteNOP,
983                                      TuningFastVariableCrossLaneShuffle,
984                                      TuningFastVariablePerLaneShuffle,
985                                      TuningPrefer256Bit,
986                                      TuningPOPCNTFalseDeps,
987                                      TuningInsertVZEROUPPER,
988                                      TuningAllowLight256Bit,
989                                      TuningPreferShiftShuffle,
990                                      TuningNoDomainDelayMov,
991                                      TuningNoDomainDelayShuffle,
992                                      TuningNoDomainDelayBlend,
993                                      TuningFastImmVectorShift];
994  list<SubtargetFeature> SKXFeatures =
995    !listconcat(BDWFeatures, SKXAdditionalFeatures);
996
997  // Cascadelake
998  list<SubtargetFeature> CLXAdditionalFeatures = [FeatureVNNI];
999  list<SubtargetFeature> CLXTuning = SKXTuning;
1000  list<SubtargetFeature> CLXFeatures =
1001    !listconcat(SKXFeatures, CLXAdditionalFeatures);
1002
1003  // Cooperlake
1004  list<SubtargetFeature> CPXAdditionalFeatures = [FeatureBF16];
1005  list<SubtargetFeature> CPXTuning = SKXTuning;
1006  list<SubtargetFeature> CPXFeatures =
1007    !listconcat(CLXFeatures, CPXAdditionalFeatures);
1008
1009  // Cannonlake
1010  list<SubtargetFeature> CNLAdditionalFeatures = [FeatureAVX512,
1011                                                  FeatureEVEX512,
1012                                                  FeatureCDI,
1013                                                  FeatureDQI,
1014                                                  FeatureBWI,
1015                                                  FeatureVLX,
1016                                                  FeaturePKU,
1017                                                  FeatureVBMI,
1018                                                  FeatureIFMA,
1019                                                  FeatureSHA];
1020  list<SubtargetFeature> CNLTuning = [TuningFastGather,
1021                                      TuningMacroFusion,
1022                                      TuningSlow3OpsLEA,
1023                                      TuningSlowDivide64,
1024                                      TuningFastScalarFSQRT,
1025                                      TuningFastVectorFSQRT,
1026                                      TuningFastSHLDRotate,
1027                                      TuningFast15ByteNOP,
1028                                      TuningFastVariableCrossLaneShuffle,
1029                                      TuningFastVariablePerLaneShuffle,
1030                                      TuningPrefer256Bit,
1031                                      TuningInsertVZEROUPPER,
1032                                      TuningAllowLight256Bit,
1033                                      TuningNoDomainDelayMov,
1034                                      TuningNoDomainDelayShuffle,
1035                                      TuningNoDomainDelayBlend,
1036                                      TuningFastImmVectorShift];
1037  list<SubtargetFeature> CNLFeatures =
1038    !listconcat(SKLFeatures, CNLAdditionalFeatures);
1039
1040  // Icelake
1041  list<SubtargetFeature> ICLAdditionalFeatures = [FeatureBITALG,
1042                                                  FeatureVAES,
1043                                                  FeatureVBMI2,
1044                                                  FeatureVNNI,
1045                                                  FeatureVPCLMULQDQ,
1046                                                  FeatureVPOPCNTDQ,
1047                                                  FeatureGFNI,
1048                                                  FeatureRDPID,
1049                                                  FeatureFSRM];
1050  list<SubtargetFeature> ICLTuning = [TuningFastGather,
1051                                      TuningMacroFusion,
1052                                      TuningSlowDivide64,
1053                                      TuningFastScalarFSQRT,
1054                                      TuningFastVectorFSQRT,
1055                                      TuningFastSHLDRotate,
1056                                      TuningFast15ByteNOP,
1057                                      TuningFastVariableCrossLaneShuffle,
1058                                      TuningFastVariablePerLaneShuffle,
1059                                      TuningPrefer256Bit,
1060                                      TuningInsertVZEROUPPER,
1061                                      TuningAllowLight256Bit,
1062                                      TuningNoDomainDelayMov,
1063                                      TuningNoDomainDelayShuffle,
1064                                      TuningNoDomainDelayBlend,
1065                                      TuningFastImmVectorShift];
1066  list<SubtargetFeature> ICLFeatures =
1067    !listconcat(CNLFeatures, ICLAdditionalFeatures);
1068
1069  // Icelake Server
1070  list<SubtargetFeature> ICXAdditionalFeatures = [FeaturePCONFIG,
1071                                                  FeatureCLWB,
1072                                                  FeatureWBNOINVD];
1073  list<SubtargetFeature> ICXTuning = ICLTuning;
1074  list<SubtargetFeature> ICXFeatures =
1075    !listconcat(ICLFeatures, ICXAdditionalFeatures);
1076
1077  // Tigerlake
1078  list<SubtargetFeature> TGLAdditionalFeatures = [FeatureVP2INTERSECT,
1079                                                  FeatureCLWB,
1080                                                  FeatureMOVDIRI,
1081                                                  FeatureMOVDIR64B,
1082                                                  FeatureSHSTK];
1083  list<SubtargetFeature> TGLTuning = ICLTuning;
1084  list<SubtargetFeature> TGLFeatures =
1085    !listconcat(ICLFeatures, TGLAdditionalFeatures );
1086
1087  // Sapphirerapids
1088  list<SubtargetFeature> SPRAdditionalFeatures = [FeatureAMXTILE,
1089                                                  FeatureAMXINT8,
1090                                                  FeatureAMXBF16,
1091                                                  FeatureBF16,
1092                                                  FeatureSERIALIZE,
1093                                                  FeatureCLDEMOTE,
1094                                                  FeatureWAITPKG,
1095                                                  FeaturePTWRITE,
1096                                                  FeatureFP16,
1097                                                  FeatureAVXVNNI,
1098                                                  FeatureTSXLDTRK,
1099                                                  FeatureENQCMD,
1100                                                  FeatureSHSTK,
1101                                                  FeatureMOVDIRI,
1102                                                  FeatureMOVDIR64B,
1103                                                  FeatureUINTR];
1104  list<SubtargetFeature> SPRAdditionalTuning = [TuningMULCFalseDeps,
1105                                                TuningPERMFalseDeps,
1106                                                TuningRANGEFalseDeps,
1107                                                TuningGETMANTFalseDeps,
1108                                                TuningMULLQFalseDeps];
1109  list<SubtargetFeature> SPRTuning = !listconcat(ICXTuning, SPRAdditionalTuning);
1110  list<SubtargetFeature> SPRFeatures =
1111    !listconcat(ICXFeatures, SPRAdditionalFeatures);
1112
1113  // Graniterapids
1114  list<SubtargetFeature> GNRAdditionalFeatures = [FeatureAMXFP16,
1115                                                  FeaturePREFETCHI];
1116  list<SubtargetFeature> GNRFeatures =
1117    !listconcat(SPRFeatures, GNRAdditionalFeatures);
1118
1119  // Graniterapids D
1120  list<SubtargetFeature> GNRDAdditionalFeatures = [FeatureAMXCOMPLEX];
1121  list<SubtargetFeature> GNRDFeatures =
1122    !listconcat(GNRFeatures, GNRDAdditionalFeatures);
1123
1124  // Atom
1125  list<SubtargetFeature> AtomFeatures = [FeatureX87,
1126                                         FeatureCX8,
1127                                         FeatureCMOV,
1128                                         FeatureMMX,
1129                                         FeatureSSSE3,
1130                                         FeatureFXSR,
1131                                         FeatureNOPL,
1132                                         FeatureX86_64,
1133                                         FeatureCX16,
1134                                         FeatureMOVBE,
1135                                         FeatureLAHFSAHF64];
1136  list<SubtargetFeature> AtomTuning = [ProcIntelAtom,
1137                                       TuningSlowUAMem16,
1138                                       TuningLEAForSP,
1139                                       TuningSlowDivide32,
1140                                       TuningSlowDivide64,
1141                                       TuningSlowTwoMemOps,
1142                                       TuningLEAUsesAG,
1143                                       TuningPadShortFunctions,
1144                                       TuningInsertVZEROUPPER,
1145                                       TuningNoDomainDelay];
1146
1147  // Silvermont
1148  list<SubtargetFeature> SLMAdditionalFeatures = [FeatureSSE42,
1149                                                  FeatureCRC32,
1150                                                  FeaturePOPCNT,
1151                                                  FeaturePCLMUL,
1152                                                  FeaturePRFCHW,
1153                                                  FeatureRDRAND];
1154  list<SubtargetFeature> SLMTuning = [TuningUseSLMArithCosts,
1155                                      TuningSlowTwoMemOps,
1156                                      TuningSlowLEA,
1157                                      TuningSlowIncDec,
1158                                      TuningSlowDivide64,
1159                                      TuningSlowPMULLD,
1160                                      TuningFast7ByteNOP,
1161                                      TuningFastMOVBE,
1162                                      TuningPOPCNTFalseDeps,
1163                                      TuningInsertVZEROUPPER,
1164                                      TuningNoDomainDelay];
1165  list<SubtargetFeature> SLMFeatures =
1166    !listconcat(AtomFeatures, SLMAdditionalFeatures);
1167
1168  // Goldmont
1169  list<SubtargetFeature> GLMAdditionalFeatures = [FeatureAES,
1170                                                  FeatureSHA,
1171                                                  FeatureRDSEED,
1172                                                  FeatureXSAVE,
1173                                                  FeatureXSAVEOPT,
1174                                                  FeatureXSAVEC,
1175                                                  FeatureXSAVES,
1176                                                  FeatureCLFLUSHOPT,
1177                                                  FeatureFSGSBase];
1178  list<SubtargetFeature> GLMTuning = [TuningUseGLMDivSqrtCosts,
1179                                      TuningSlowTwoMemOps,
1180                                      TuningSlowLEA,
1181                                      TuningSlowIncDec,
1182                                      TuningFastMOVBE,
1183                                      TuningPOPCNTFalseDeps,
1184                                      TuningInsertVZEROUPPER,
1185                                      TuningNoDomainDelay];
1186  list<SubtargetFeature> GLMFeatures =
1187    !listconcat(SLMFeatures, GLMAdditionalFeatures);
1188
1189  // Goldmont Plus
1190  list<SubtargetFeature> GLPAdditionalFeatures = [FeaturePTWRITE,
1191                                                  FeatureRDPID];
1192  list<SubtargetFeature> GLPTuning = [TuningUseGLMDivSqrtCosts,
1193                                      TuningSlowTwoMemOps,
1194                                      TuningSlowLEA,
1195                                      TuningSlowIncDec,
1196                                      TuningFastMOVBE,
1197                                      TuningInsertVZEROUPPER,
1198                                      TuningNoDomainDelay];
1199  list<SubtargetFeature> GLPFeatures =
1200    !listconcat(GLMFeatures, GLPAdditionalFeatures);
1201
1202  // Tremont
1203  list<SubtargetFeature> TRMAdditionalFeatures = [FeatureCLWB,
1204                                                  FeatureGFNI];
1205  list<SubtargetFeature> TRMTuning = GLPTuning;
1206  list<SubtargetFeature> TRMFeatures =
1207    !listconcat(GLPFeatures, TRMAdditionalFeatures);
1208
1209  // Alderlake
1210  list<SubtargetFeature> ADLAdditionalFeatures = [FeatureSERIALIZE,
1211                                                  FeaturePCONFIG,
1212                                                  FeatureSHSTK,
1213                                                  FeatureWIDEKL,
1214                                                  FeatureINVPCID,
1215                                                  FeatureADX,
1216                                                  FeatureFMA,
1217                                                  FeatureVAES,
1218                                                  FeatureVPCLMULQDQ,
1219                                                  FeatureF16C,
1220                                                  FeatureBMI,
1221                                                  FeatureBMI2,
1222                                                  FeatureLZCNT,
1223                                                  FeatureAVXVNNI,
1224                                                  FeaturePKU,
1225                                                  FeatureHRESET,
1226                                                  FeatureCLDEMOTE,
1227                                                  FeatureMOVDIRI,
1228                                                  FeatureMOVDIR64B,
1229                                                  FeatureWAITPKG];
1230  list<SubtargetFeature> ADLAdditionalTuning = [TuningPERMFalseDeps,
1231                                                TuningPreferMovmskOverVTest,
1232                                                TuningFastImmVectorShift];
1233  list<SubtargetFeature> ADLTuning = !listconcat(SKLTuning, ADLAdditionalTuning);
1234  list<SubtargetFeature> ADLFeatures =
1235    !listconcat(TRMFeatures, ADLAdditionalFeatures);
1236
1237  // Gracemont
1238  list<SubtargetFeature> GRTTuning = [TuningMacroFusion,
1239                                      TuningSlow3OpsLEA,
1240                                      TuningSlowDivide32,
1241                                      TuningSlowDivide64,
1242                                      TuningFastScalarFSQRT,
1243                                      TuningFastVectorFSQRT,
1244                                      TuningFast15ByteNOP,
1245                                      TuningFastVariablePerLaneShuffle,
1246                                      TuningPOPCNTFalseDeps,
1247                                      TuningInsertVZEROUPPER];
1248
1249  // Sierraforest
1250  list<SubtargetFeature> SRFAdditionalFeatures = [FeatureCMPCCXADD,
1251                                                  FeatureAVXIFMA,
1252                                                  FeatureAVXNECONVERT,
1253                                                  FeatureENQCMD,
1254                                                  FeatureUINTR,
1255                                                  FeatureAVXVNNIINT8];
1256  list<SubtargetFeature> SRFFeatures =
1257    !listconcat(ADLFeatures, SRFAdditionalFeatures);
1258
1259  // Arrowlake S
1260  list<SubtargetFeature> ARLSAdditionalFeatures = [FeatureAVXVNNIINT16,
1261                                                   FeatureSHA512,
1262                                                   FeatureSM3,
1263                                                   FeatureSM4];
1264  list<SubtargetFeature> ARLSFeatures =
1265    !listconcat(SRFFeatures, ARLSAdditionalFeatures);
1266
1267  // Pantherlake
1268  list<SubtargetFeature> PTLAdditionalFeatures = [FeaturePREFETCHI];
1269  list<SubtargetFeature> PTLFeatures =
1270    !listconcat(ARLSFeatures, PTLAdditionalFeatures);
1271
1272
1273  // Clearwaterforest
1274  list<SubtargetFeature> CWFAdditionalFeatures = [FeaturePREFETCHI,
1275                                                  FeatureUSERMSR];
1276  list<SubtargetFeature> CWFFeatures =
1277    !listconcat(ARLSFeatures, CWFAdditionalFeatures);
1278
1279  // Knights Landing
1280  list<SubtargetFeature> KNLFeatures = [FeatureX87,
1281                                        FeatureCX8,
1282                                        FeatureCMOV,
1283                                        FeatureMMX,
1284                                        FeatureFXSR,
1285                                        FeatureNOPL,
1286                                        FeatureX86_64,
1287                                        FeatureCX16,
1288                                        FeatureCRC32,
1289                                        FeaturePOPCNT,
1290                                        FeaturePCLMUL,
1291                                        FeatureXSAVE,
1292                                        FeatureXSAVEOPT,
1293                                        FeatureLAHFSAHF64,
1294                                        FeatureAES,
1295                                        FeatureRDRAND,
1296                                        FeatureF16C,
1297                                        FeatureFSGSBase,
1298                                        FeatureAVX512,
1299                                        FeatureEVEX512,
1300                                        FeatureERI,
1301                                        FeatureCDI,
1302                                        FeaturePFI,
1303                                        FeaturePREFETCHWT1,
1304                                        FeatureADX,
1305                                        FeatureRDSEED,
1306                                        FeatureMOVBE,
1307                                        FeatureLZCNT,
1308                                        FeatureBMI,
1309                                        FeatureBMI2,
1310                                        FeatureFMA,
1311                                        FeaturePRFCHW];
1312  list<SubtargetFeature> KNLTuning = [TuningSlowDivide64,
1313                                      TuningSlow3OpsLEA,
1314                                      TuningSlowIncDec,
1315                                      TuningSlowTwoMemOps,
1316                                      TuningPreferMaskRegisters,
1317                                      TuningFastGather,
1318                                      TuningFastMOVBE,
1319                                      TuningSlowPMADDWD];
1320  // TODO Add AVX5124FMAPS/AVX5124VNNIW features
1321  list<SubtargetFeature> KNMFeatures =
1322    !listconcat(KNLFeatures, [FeatureVPOPCNTDQ]);
1323
1324  // Barcelona
1325  list<SubtargetFeature> BarcelonaFeatures = [FeatureX87,
1326                                              FeatureCX8,
1327                                              FeatureSSE4A,
1328                                              Feature3DNowA,
1329                                              FeatureFXSR,
1330                                              FeatureNOPL,
1331                                              FeatureCX16,
1332                                              FeaturePRFCHW,
1333                                              FeatureLZCNT,
1334                                              FeaturePOPCNT,
1335                                              FeatureLAHFSAHF64,
1336                                              FeatureCMOV,
1337                                              FeatureX86_64];
1338  list<SubtargetFeature> BarcelonaTuning = [TuningFastScalarShiftMasks,
1339                                            TuningSlowSHLD,
1340                                            TuningSBBDepBreaking,
1341                                            TuningInsertVZEROUPPER];
1342
1343  // Bobcat
1344  list<SubtargetFeature> BtVer1Features = [FeatureX87,
1345                                           FeatureCX8,
1346                                           FeatureCMOV,
1347                                           FeatureMMX,
1348                                           FeatureSSSE3,
1349                                           FeatureSSE4A,
1350                                           FeatureFXSR,
1351                                           FeatureNOPL,
1352                                           FeatureX86_64,
1353                                           FeatureCX16,
1354                                           FeaturePRFCHW,
1355                                           FeatureLZCNT,
1356                                           FeaturePOPCNT,
1357                                           FeatureLAHFSAHF64];
1358  list<SubtargetFeature> BtVer1Tuning = [TuningFast15ByteNOP,
1359                                         TuningFastScalarShiftMasks,
1360                                         TuningFastVectorShiftMasks,
1361                                         TuningSlowSHLD,
1362                                         TuningSBBDepBreaking,
1363                                         TuningInsertVZEROUPPER];
1364
1365  // Jaguar
1366  list<SubtargetFeature> BtVer2AdditionalFeatures = [FeatureAVX,
1367                                                     FeatureAES,
1368                                                     FeatureCRC32,
1369                                                     FeaturePCLMUL,
1370                                                     FeatureBMI,
1371                                                     FeatureF16C,
1372                                                     FeatureMOVBE,
1373                                                     FeatureXSAVE,
1374                                                     FeatureXSAVEOPT];
1375  list<SubtargetFeature> BtVer2Tuning = [TuningFastLZCNT,
1376                                         TuningFastBEXTR,
1377                                         TuningFastHorizontalOps,
1378                                         TuningFast15ByteNOP,
1379                                         TuningFastScalarShiftMasks,
1380                                         TuningFastVectorShiftMasks,
1381                                         TuningFastMOVBE,
1382                                         TuningSBBDepBreaking,
1383                                         TuningSlowSHLD];
1384  list<SubtargetFeature> BtVer2Features =
1385    !listconcat(BtVer1Features, BtVer2AdditionalFeatures);
1386
1387  // Bulldozer
1388  list<SubtargetFeature> BdVer1Features = [FeatureX87,
1389                                           FeatureCX8,
1390                                           FeatureCMOV,
1391                                           FeatureXOP,
1392                                           FeatureX86_64,
1393                                           FeatureCX16,
1394                                           FeatureAES,
1395                                           FeatureCRC32,
1396                                           FeaturePRFCHW,
1397                                           FeaturePCLMUL,
1398                                           FeatureMMX,
1399                                           FeatureFXSR,
1400                                           FeatureNOPL,
1401                                           FeatureLZCNT,
1402                                           FeaturePOPCNT,
1403                                           FeatureXSAVE,
1404                                           FeatureLWP,
1405                                           FeatureLAHFSAHF64];
1406  list<SubtargetFeature> BdVer1Tuning = [TuningSlowSHLD,
1407                                         TuningFast11ByteNOP,
1408                                         TuningFastScalarShiftMasks,
1409                                         TuningBranchFusion,
1410                                         TuningSBBDepBreaking,
1411                                         TuningInsertVZEROUPPER];
1412
1413  // PileDriver
1414  list<SubtargetFeature> BdVer2AdditionalFeatures = [FeatureF16C,
1415                                                     FeatureBMI,
1416                                                     FeatureTBM,
1417                                                     FeatureFMA];
1418  list<SubtargetFeature> BdVer2AdditionalTuning = [TuningFastBEXTR,
1419                                                   TuningFastMOVBE];
1420  list<SubtargetFeature> BdVer2Tuning =
1421    !listconcat(BdVer1Tuning, BdVer2AdditionalTuning);
1422  list<SubtargetFeature> BdVer2Features =
1423    !listconcat(BdVer1Features, BdVer2AdditionalFeatures);
1424
1425  // Steamroller
1426  list<SubtargetFeature> BdVer3AdditionalFeatures = [FeatureXSAVEOPT,
1427                                                     FeatureFSGSBase];
1428  list<SubtargetFeature> BdVer3Tuning = BdVer2Tuning;
1429  list<SubtargetFeature> BdVer3Features =
1430    !listconcat(BdVer2Features, BdVer3AdditionalFeatures);
1431
1432  // Excavator
1433  list<SubtargetFeature> BdVer4AdditionalFeatures = [FeatureAVX2,
1434                                                     FeatureBMI2,
1435                                                     FeatureMOVBE,
1436                                                     FeatureRDRAND,
1437                                                     FeatureMWAITX];
1438  list<SubtargetFeature> BdVer4Tuning = BdVer3Tuning;
1439  list<SubtargetFeature> BdVer4Features =
1440    !listconcat(BdVer3Features, BdVer4AdditionalFeatures);
1441
1442
1443  // AMD Zen Processors common ISAs
1444  list<SubtargetFeature> ZNFeatures = [FeatureADX,
1445                                       FeatureAES,
1446                                       FeatureAVX2,
1447                                       FeatureBMI,
1448                                       FeatureBMI2,
1449                                       FeatureCLFLUSHOPT,
1450                                       FeatureCLZERO,
1451                                       FeatureCMOV,
1452                                       FeatureX86_64,
1453                                       FeatureCX16,
1454                                       FeatureCRC32,
1455                                       FeatureF16C,
1456                                       FeatureFMA,
1457                                       FeatureFSGSBase,
1458                                       FeatureFXSR,
1459                                       FeatureNOPL,
1460                                       FeatureLAHFSAHF64,
1461                                       FeatureLZCNT,
1462                                       FeatureMMX,
1463                                       FeatureMOVBE,
1464                                       FeatureMWAITX,
1465                                       FeaturePCLMUL,
1466                                       FeaturePOPCNT,
1467                                       FeaturePRFCHW,
1468                                       FeatureRDRAND,
1469                                       FeatureRDSEED,
1470                                       FeatureSHA,
1471                                       FeatureSSE4A,
1472                                       FeatureX87,
1473                                       FeatureXSAVE,
1474                                       FeatureXSAVEC,
1475                                       FeatureXSAVEOPT,
1476                                       FeatureXSAVES];
1477  list<SubtargetFeature> ZNTuning = [TuningFastLZCNT,
1478                                     TuningFastBEXTR,
1479                                     TuningFast15ByteNOP,
1480                                     TuningBranchFusion,
1481                                     TuningFastScalarFSQRT,
1482                                     TuningFastVectorFSQRT,
1483                                     TuningFastScalarShiftMasks,
1484                                     TuningFastVariablePerLaneShuffle,
1485                                     TuningFastMOVBE,
1486                                     TuningSlowSHLD,
1487                                     TuningSBBDepBreaking,
1488                                     TuningInsertVZEROUPPER,
1489                                     TuningAllowLight256Bit];
1490  list<SubtargetFeature> ZN2AdditionalFeatures = [FeatureCLWB,
1491                                                  FeatureRDPID,
1492                                                  FeatureRDPRU,
1493                                                  FeatureWBNOINVD];
1494  list<SubtargetFeature> ZN2Tuning = ZNTuning;
1495  list<SubtargetFeature> ZN2Features =
1496    !listconcat(ZNFeatures, ZN2AdditionalFeatures);
1497  list<SubtargetFeature> ZN3AdditionalFeatures = [FeatureFSRM,
1498                                                  FeatureINVPCID,
1499                                                  FeaturePKU,
1500                                                  FeatureVAES,
1501                                                  FeatureVPCLMULQDQ];
1502  list<SubtargetFeature> ZN3AdditionalTuning = [TuningMacroFusion];
1503  list<SubtargetFeature> ZN3Tuning =
1504    !listconcat(ZN2Tuning, ZN3AdditionalTuning);
1505  list<SubtargetFeature> ZN3Features =
1506    !listconcat(ZN2Features, ZN3AdditionalFeatures);
1507  list<SubtargetFeature> ZN4Tuning = ZN3Tuning;
1508  list<SubtargetFeature> ZN4AdditionalFeatures = [FeatureAVX512,
1509                                                  FeatureEVEX512,
1510                                                  FeatureCDI,
1511                                                  FeatureDQI,
1512                                                  FeatureBWI,
1513                                                  FeatureVLX,
1514                                                  FeatureVBMI,
1515                                                  FeatureVBMI2,
1516                                                  FeatureIFMA,
1517                                                  FeatureVNNI,
1518                                                  FeatureBITALG,
1519                                                  FeatureGFNI,
1520                                                  FeatureBF16,
1521                                                  FeatureSHSTK,
1522                                                  FeatureVPOPCNTDQ];
1523  list<SubtargetFeature> ZN4Features =
1524    !listconcat(ZN3Features, ZN4AdditionalFeatures);
1525}
1526
1527//===----------------------------------------------------------------------===//
1528// X86 processors supported.
1529//===----------------------------------------------------------------------===//
1530
1531class Proc<string Name, list<SubtargetFeature> Features,
1532           list<SubtargetFeature> TuneFeatures>
1533 : ProcessorModel<Name, GenericModel, Features, TuneFeatures>;
1534
1535class ProcModel<string Name, SchedMachineModel Model,
1536                list<SubtargetFeature> Features,
1537                list<SubtargetFeature> TuneFeatures>
1538 : ProcessorModel<Name, Model, Features, TuneFeatures>;
1539
1540// NOTE: CMPXCHG8B is here for legacy compatibility so that it is only disabled
1541// if i386/i486 is specifically requested.
1542// NOTE: 64Bit is here as "generic" is the default llc CPU. The X86Subtarget
1543// constructor checks that any CPU used in 64-bit mode has FeatureX86_64
1544// enabled. It has no effect on code generation.
1545// NOTE: As a default tuning, "generic" aims to produce code optimized for the
1546// most common X86 processors. The tunings might be changed over time. It is
1547// recommended to use "tune-cpu"="x86-64" in function attribute for consistency.
1548def : ProcModel<"generic", SandyBridgeModel,
1549                [FeatureX87, FeatureCX8, FeatureX86_64],
1550                [TuningSlow3OpsLEA,
1551                 TuningSlowDivide64,
1552                 TuningMacroFusion,
1553                 TuningFastScalarFSQRT,
1554                 TuningFast15ByteNOP,
1555                 TuningInsertVZEROUPPER]>;
1556
1557def : Proc<"i386",            [FeatureX87],
1558                              [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1559def : Proc<"i486",            [FeatureX87],
1560                              [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1561def : Proc<"i586",            [FeatureX87, FeatureCX8],
1562                              [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1563def : Proc<"pentium",         [FeatureX87, FeatureCX8],
1564                              [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1565foreach P = ["pentium-mmx", "pentium_mmx"] in {
1566  def : Proc<P, [FeatureX87, FeatureCX8, FeatureMMX],
1567                [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1568}
1569def : Proc<"i686", [FeatureX87, FeatureCX8, FeatureCMOV],
1570                   [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1571foreach P = ["pentiumpro", "pentium_pro"] in {
1572  def : Proc<P, [FeatureX87, FeatureCX8, FeatureCMOV, FeatureNOPL],
1573                [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1574}
1575foreach P = ["pentium2", "pentium_ii"] in {
1576  def : Proc<P, [FeatureX87, FeatureCX8, FeatureMMX, FeatureCMOV,
1577                          FeatureFXSR, FeatureNOPL],
1578                        [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1579}
1580foreach P = ["pentium3", "pentium3m", "pentium_iii_no_xmm_regs", "pentium_iii"] in {
1581  def : Proc<P, [FeatureX87, FeatureCX8, FeatureMMX,
1582                 FeatureSSE1, FeatureFXSR, FeatureNOPL, FeatureCMOV],
1583                [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1584}
1585
1586// Enable the PostRAScheduler for SSE2 and SSE3 class cpus.
1587// The intent is to enable it for pentium4 which is the current default
1588// processor in a vanilla 32-bit clang compilation when no specific
1589// architecture is specified.  This generally gives a nice performance
1590// increase on silvermont, with largely neutral behavior on other
1591// contemporary large core processors.
1592// pentium-m, pentium4m, prescott and nocona are included as a preventative
1593// measure to avoid performance surprises, in case clang's default cpu
1594// changes slightly.
1595
1596foreach P = ["pentium_m", "pentium-m"] in {
1597def : ProcModel<P, GenericPostRAModel,
1598                [FeatureX87, FeatureCX8, FeatureMMX, FeatureSSE2,
1599                FeatureFXSR, FeatureNOPL, FeatureCMOV],
1600                [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1601}
1602
1603foreach P = ["pentium4", "pentium4m", "pentium_4"] in {
1604  def : ProcModel<P, GenericPostRAModel,
1605                  [FeatureX87, FeatureCX8, FeatureMMX, FeatureSSE2,
1606                   FeatureFXSR, FeatureNOPL, FeatureCMOV],
1607                  [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1608}
1609
1610// Intel Quark.
1611def : Proc<"lakemont", [FeatureCX8],
1612                       [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1613
1614// Intel Core Duo.
1615def : ProcModel<"yonah", SandyBridgeModel,
1616                [FeatureX87, FeatureCX8, FeatureMMX, FeatureSSE3,
1617                 FeatureFXSR, FeatureNOPL, FeatureCMOV],
1618                [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1619
1620// NetBurst.
1621foreach P = ["prescott", "pentium_4_sse3"] in {
1622  def : ProcModel<P, GenericPostRAModel,
1623                  [FeatureX87, FeatureCX8, FeatureMMX, FeatureSSE3,
1624                  FeatureFXSR, FeatureNOPL, FeatureCMOV],
1625                  [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1626}
1627def : ProcModel<"nocona", GenericPostRAModel, [
1628  FeatureX87,
1629  FeatureCX8,
1630  FeatureCMOV,
1631  FeatureMMX,
1632  FeatureSSE3,
1633  FeatureFXSR,
1634  FeatureNOPL,
1635  FeatureX86_64,
1636  FeatureCX16,
1637],
1638[
1639  TuningSlowUAMem16,
1640  TuningInsertVZEROUPPER
1641]>;
1642
1643// Intel Core 2 Solo/Duo.
1644foreach P = ["core2", "core_2_duo_ssse3"] in {
1645def : ProcModel<P, SandyBridgeModel, [
1646  FeatureX87,
1647  FeatureCX8,
1648  FeatureCMOV,
1649  FeatureMMX,
1650  FeatureSSSE3,
1651  FeatureFXSR,
1652  FeatureNOPL,
1653  FeatureX86_64,
1654  FeatureCX16,
1655  FeatureLAHFSAHF64
1656],
1657[
1658  TuningMacroFusion,
1659  TuningSlowUAMem16,
1660  TuningInsertVZEROUPPER
1661]>;
1662}
1663foreach P = ["penryn", "core_2_duo_sse4_1"] in {
1664def : ProcModel<P, SandyBridgeModel, [
1665  FeatureX87,
1666  FeatureCX8,
1667  FeatureCMOV,
1668  FeatureMMX,
1669  FeatureSSE41,
1670  FeatureFXSR,
1671  FeatureNOPL,
1672  FeatureX86_64,
1673  FeatureCX16,
1674  FeatureLAHFSAHF64
1675],
1676[
1677  TuningMacroFusion,
1678  TuningSlowUAMem16,
1679  TuningInsertVZEROUPPER
1680]>;
1681}
1682
1683// Atom CPUs.
1684foreach P = ["bonnell", "atom"] in {
1685  def : ProcModel<P, AtomModel, ProcessorFeatures.AtomFeatures,
1686                  ProcessorFeatures.AtomTuning>;
1687}
1688
1689foreach P = ["silvermont", "slm", "atom_sse4_2"] in {
1690  def : ProcModel<P, SLMModel, ProcessorFeatures.SLMFeatures,
1691                  ProcessorFeatures.SLMTuning>;
1692}
1693
1694def : ProcModel<"atom_sse4_2_movbe", SLMModel, ProcessorFeatures.GLMFeatures,
1695                ProcessorFeatures.SLMTuning>;
1696def : ProcModel<"goldmont", SLMModel, ProcessorFeatures.GLMFeatures,
1697                ProcessorFeatures.GLMTuning>;
1698foreach P = ["goldmont_plus", "goldmont-plus"] in {
1699  def : ProcModel<P, SLMModel, ProcessorFeatures.GLPFeatures,
1700                  ProcessorFeatures.GLPTuning>;
1701}
1702def : ProcModel<"tremont", SLMModel, ProcessorFeatures.TRMFeatures,
1703                ProcessorFeatures.TRMTuning>;
1704foreach P = ["sierraforest", "grandridge"] in {
1705  def : ProcModel<P, AlderlakePModel, ProcessorFeatures.SRFFeatures,
1706                ProcessorFeatures.TRMTuning>;
1707}
1708
1709// "Arrandale" along with corei3 and corei5
1710foreach P = ["nehalem", "corei7", "core_i7_sse4_2"] in {
1711  def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.NHMFeatures,
1712                  ProcessorFeatures.NHMTuning>;
1713}
1714
1715// Westmere is the corei3/i5/i7 path from nehalem to sandybridge
1716foreach P = ["westmere", "core_aes_pclmulqdq"] in {
1717  def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.WSMFeatures,
1718                  ProcessorFeatures.WSMTuning>;
1719}
1720
1721foreach P = ["sandybridge", "corei7-avx", "core_2nd_gen_avx"] in {
1722  def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.SNBFeatures,
1723                  ProcessorFeatures.SNBTuning>;
1724}
1725
1726foreach P = ["ivybridge", "core-avx-i", "core_3rd_gen_avx"] in {
1727  def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.IVBFeatures,
1728                  ProcessorFeatures.IVBTuning>;
1729}
1730
1731foreach P = ["haswell", "core-avx2", "core_4th_gen_avx", "core_4th_gen_avx_tsx"] in {
1732  def : ProcModel<P, HaswellModel, ProcessorFeatures.HSWFeatures,
1733                  ProcessorFeatures.HSWTuning>;
1734}
1735
1736foreach P = ["broadwell", "core_5th_gen_avx", "core_5th_gen_avx_tsx"] in {
1737  def : ProcModel<P, BroadwellModel, ProcessorFeatures.BDWFeatures,
1738                  ProcessorFeatures.BDWTuning>;
1739}
1740
1741def : ProcModel<"skylake", SkylakeClientModel, ProcessorFeatures.SKLFeatures,
1742                ProcessorFeatures.SKLTuning>;
1743
1744// FIXME: define KNL scheduler model
1745foreach P = ["knl", "mic_avx512"] in {
1746  def : ProcModel<P, HaswellModel, ProcessorFeatures.KNLFeatures,
1747                  ProcessorFeatures.KNLTuning>;
1748}
1749def : ProcModel<"knm", HaswellModel, ProcessorFeatures.KNMFeatures,
1750                ProcessorFeatures.KNLTuning>;
1751
1752foreach P = ["skylake-avx512", "skx", "skylake_avx512"] in {
1753  def : ProcModel<P, SkylakeServerModel, ProcessorFeatures.SKXFeatures,
1754                  ProcessorFeatures.SKXTuning>;
1755}
1756
1757def : ProcModel<"cascadelake", SkylakeServerModel,
1758                ProcessorFeatures.CLXFeatures, ProcessorFeatures.CLXTuning>;
1759def : ProcModel<"cooperlake", SkylakeServerModel,
1760                ProcessorFeatures.CPXFeatures, ProcessorFeatures.CPXTuning>;
1761def : ProcModel<"cannonlake", SkylakeServerModel,
1762                ProcessorFeatures.CNLFeatures, ProcessorFeatures.CNLTuning>;
1763foreach P = ["icelake-client", "icelake_client"] in {
1764def : ProcModel<P, IceLakeModel,
1765                ProcessorFeatures.ICLFeatures, ProcessorFeatures.ICLTuning>;
1766}
1767def : ProcModel<"rocketlake", IceLakeModel,
1768                ProcessorFeatures.ICLFeatures, ProcessorFeatures.ICLTuning>;
1769foreach P = ["icelake-server", "icelake_server"] in {
1770def : ProcModel<P, IceLakeModel,
1771                ProcessorFeatures.ICXFeatures, ProcessorFeatures.ICXTuning>;
1772}
1773def : ProcModel<"tigerlake", IceLakeModel,
1774                ProcessorFeatures.TGLFeatures, ProcessorFeatures.TGLTuning>;
1775def : ProcModel<"sapphirerapids", SapphireRapidsModel,
1776                ProcessorFeatures.SPRFeatures, ProcessorFeatures.SPRTuning>;
1777def : ProcModel<"alderlake", AlderlakePModel,
1778                ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>;
1779// FIXME: Use Gracemont Schedule Model when it is ready.
1780def : ProcModel<"gracemont", AlderlakePModel,
1781                ProcessorFeatures.ADLFeatures, ProcessorFeatures.GRTTuning>;
1782def : ProcModel<"raptorlake", AlderlakePModel,
1783                ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>;
1784def : ProcModel<"meteorlake", AlderlakePModel,
1785                ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>;
1786def : ProcModel<"arrowlake", AlderlakePModel,
1787                ProcessorFeatures.SRFFeatures, ProcessorFeatures.ADLTuning>;
1788foreach P = ["arrowlake-s", "arrowlake_s", "lunarlake"] in {
1789def : ProcModel<P, AlderlakePModel,
1790                ProcessorFeatures.ARLSFeatures, ProcessorFeatures.ADLTuning>;
1791}
1792def : ProcModel<"pantherlake", AlderlakePModel,
1793                ProcessorFeatures.PTLFeatures, ProcessorFeatures.ADLTuning>;
1794def : ProcModel<"clearwaterforest", AlderlakePModel,
1795                ProcessorFeatures.CWFFeatures, ProcessorFeatures.ADLTuning>;
1796def : ProcModel<"graniterapids", SapphireRapidsModel,
1797                ProcessorFeatures.GNRFeatures, ProcessorFeatures.SPRTuning>;
1798def : ProcModel<"emeraldrapids", SapphireRapidsModel,
1799                ProcessorFeatures.SPRFeatures, ProcessorFeatures.SPRTuning>;
1800foreach P = ["graniterapids-d", "graniterapids_d"] in {
1801def : ProcModel<P, SapphireRapidsModel,
1802                ProcessorFeatures.GNRDFeatures, ProcessorFeatures.SPRTuning>;
1803}
1804
1805// AMD CPUs.
1806
1807def : Proc<"k6",   [FeatureX87, FeatureCX8, FeatureMMX],
1808                   [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1809def : Proc<"k6-2", [FeatureX87, FeatureCX8, Feature3DNow],
1810                   [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1811def : Proc<"k6-3", [FeatureX87, FeatureCX8, Feature3DNow],
1812                   [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1813
1814foreach P = ["athlon", "athlon-tbird"] in {
1815  def : Proc<P, [FeatureX87, FeatureCX8, FeatureCMOV, Feature3DNowA,
1816                 FeatureNOPL],
1817                [TuningSlowSHLD, TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1818}
1819
1820foreach P = ["athlon-4", "athlon-xp", "athlon-mp"] in {
1821  def : Proc<P, [FeatureX87, FeatureCX8, FeatureCMOV,
1822                 FeatureSSE1, Feature3DNowA, FeatureFXSR, FeatureNOPL],
1823                [TuningSlowSHLD, TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1824}
1825
1826foreach P = ["k8", "opteron", "athlon64", "athlon-fx"] in {
1827  def : Proc<P, [FeatureX87, FeatureCX8, FeatureSSE2, Feature3DNowA,
1828                 FeatureFXSR, FeatureNOPL, FeatureX86_64, FeatureCMOV],
1829                [TuningFastScalarShiftMasks, TuningSlowSHLD, TuningSlowUAMem16,
1830                 TuningSBBDepBreaking, TuningInsertVZEROUPPER]>;
1831}
1832
1833foreach P = ["k8-sse3", "opteron-sse3", "athlon64-sse3"] in {
1834  def : Proc<P, [FeatureX87, FeatureCX8, FeatureSSE3, Feature3DNowA,
1835                 FeatureFXSR, FeatureNOPL, FeatureCX16, FeatureCMOV,
1836                 FeatureX86_64],
1837                [TuningFastScalarShiftMasks, TuningSlowSHLD, TuningSlowUAMem16,
1838                 TuningSBBDepBreaking, TuningInsertVZEROUPPER]>;
1839}
1840
1841foreach P = ["amdfam10", "barcelona"] in {
1842  def : Proc<P, ProcessorFeatures.BarcelonaFeatures,
1843             ProcessorFeatures.BarcelonaTuning>;
1844}
1845
1846// Bobcat
1847def : Proc<"btver1", ProcessorFeatures.BtVer1Features,
1848           ProcessorFeatures.BtVer1Tuning>;
1849// Jaguar
1850def : ProcModel<"btver2", BtVer2Model, ProcessorFeatures.BtVer2Features,
1851                ProcessorFeatures.BtVer2Tuning>;
1852
1853// Bulldozer
1854def : ProcModel<"bdver1", BdVer2Model, ProcessorFeatures.BdVer1Features,
1855                ProcessorFeatures.BdVer1Tuning>;
1856// Piledriver
1857def : ProcModel<"bdver2", BdVer2Model, ProcessorFeatures.BdVer2Features,
1858                ProcessorFeatures.BdVer2Tuning>;
1859// Steamroller
1860def : Proc<"bdver3", ProcessorFeatures.BdVer3Features,
1861           ProcessorFeatures.BdVer3Tuning>;
1862// Excavator
1863def : Proc<"bdver4", ProcessorFeatures.BdVer4Features,
1864           ProcessorFeatures.BdVer4Tuning>;
1865
1866def : ProcModel<"znver1", Znver1Model, ProcessorFeatures.ZNFeatures,
1867                ProcessorFeatures.ZNTuning>;
1868def : ProcModel<"znver2", Znver2Model, ProcessorFeatures.ZN2Features,
1869                ProcessorFeatures.ZN2Tuning>;
1870def : ProcModel<"znver3", Znver3Model, ProcessorFeatures.ZN3Features,
1871                ProcessorFeatures.ZN3Tuning>;
1872def : ProcModel<"znver4", Znver4Model, ProcessorFeatures.ZN4Features,
1873           ProcessorFeatures.ZN4Tuning>;
1874
1875def : Proc<"geode",           [FeatureX87, FeatureCX8, Feature3DNowA],
1876                              [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1877
1878def : Proc<"winchip-c6",      [FeatureX87, FeatureMMX],
1879                              [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1880def : Proc<"winchip2",        [FeatureX87, Feature3DNow],
1881                              [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1882def : Proc<"c3",              [FeatureX87, Feature3DNow],
1883                              [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1884def : Proc<"c3-2",            [FeatureX87, FeatureCX8, FeatureMMX,
1885                               FeatureSSE1, FeatureFXSR, FeatureCMOV],
1886                              [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1887
1888// We also provide a generic 64-bit specific x86 processor model which tries to
1889// be good for modern chips without enabling instruction set encodings past the
1890// basic SSE2 and 64-bit ones. It disables slow things from any mainstream and
1891// modern 64-bit x86 chip, and enables features that are generally beneficial.
1892//
1893// We currently use the Sandy Bridge model as the default scheduling model as
1894// we use it across Nehalem, Westmere, Sandy Bridge, and Ivy Bridge which
1895// covers a huge swath of x86 processors. If there are specific scheduling
1896// knobs which need to be tuned differently for AMD chips, we might consider
1897// forming a common base for them.
1898def : ProcModel<"x86-64", SandyBridgeModel, ProcessorFeatures.X86_64V1Features,
1899                ProcessorFeatures.X86_64V1Tuning>;
1900// Close to Sandybridge.
1901def : ProcModel<"x86-64-v2", SandyBridgeModel, ProcessorFeatures.X86_64V2Features,
1902                ProcessorFeatures.X86_64V2Tuning>;
1903// Close to Haswell.
1904def : ProcModel<"x86-64-v3", HaswellModel, ProcessorFeatures.X86_64V3Features,
1905                ProcessorFeatures.X86_64V3Tuning>;
1906// Close to the AVX-512 level implemented by Xeon Scalable Processors.
1907def : ProcModel<"x86-64-v4", SkylakeServerModel, ProcessorFeatures.X86_64V4Features,
1908                ProcessorFeatures.X86_64V4Tuning>;
1909
1910//===----------------------------------------------------------------------===//
1911// Calling Conventions
1912//===----------------------------------------------------------------------===//
1913
1914include "X86CallingConv.td"
1915
1916
1917//===----------------------------------------------------------------------===//
1918// Assembly Parser
1919//===----------------------------------------------------------------------===//
1920
1921def ATTAsmParserVariant : AsmParserVariant {
1922  int Variant = 0;
1923
1924  // Variant name.
1925  string Name = "att";
1926
1927  // Discard comments in assembly strings.
1928  string CommentDelimiter = "#";
1929
1930  // Recognize hard coded registers.
1931  string RegisterPrefix = "%";
1932}
1933
1934def IntelAsmParserVariant : AsmParserVariant {
1935  int Variant = 1;
1936
1937  // Variant name.
1938  string Name = "intel";
1939
1940  // Discard comments in assembly strings.
1941  string CommentDelimiter = ";";
1942
1943  // Recognize hard coded registers.
1944  string RegisterPrefix = "";
1945}
1946
1947//===----------------------------------------------------------------------===//
1948// Assembly Printers
1949//===----------------------------------------------------------------------===//
1950
1951// The X86 target supports two different syntaxes for emitting machine code.
1952// This is controlled by the -x86-asm-syntax={att|intel}
1953def ATTAsmWriter : AsmWriter {
1954  string AsmWriterClassName  = "ATTInstPrinter";
1955  int Variant = 0;
1956}
1957def IntelAsmWriter : AsmWriter {
1958  string AsmWriterClassName  = "IntelInstPrinter";
1959  int Variant = 1;
1960}
1961
1962def X86 : Target {
1963  // Information about the instructions...
1964  let InstructionSet = X86InstrInfo;
1965  let AssemblyParserVariants = [ATTAsmParserVariant, IntelAsmParserVariant];
1966  let AssemblyWriters = [ATTAsmWriter, IntelAsmWriter];
1967  let AllowRegisterRenaming = 1;
1968}
1969
1970//===----------------------------------------------------------------------===//
1971// Pfm Counters
1972//===----------------------------------------------------------------------===//
1973
1974include "X86PfmCounters.td"
1975