xref: /freebsd/contrib/llvm-project/llvm/lib/Target/X86/X86.td (revision 96190b4fef3b4a0cc3ca0606b0c4e3e69a5e6717)
1//===-- X86.td - Target definition file for the Intel X86 --*- tablegen -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This is a target description file for the Intel i386 architecture, referred
10// to here as the "X86" architecture.
11//
12//===----------------------------------------------------------------------===//
13
14// Get the target-independent interfaces which we are implementing...
15//
16include "llvm/Target/Target.td"
17
18//===----------------------------------------------------------------------===//
19// X86 Subtarget state
20//
21// disregarding specific ABI / programming model
22def Is64Bit : SubtargetFeature<"64bit-mode", "Is64Bit", "true",
23                               "64-bit mode (x86_64)">;
24def Is32Bit : SubtargetFeature<"32bit-mode", "Is32Bit", "true",
25                               "32-bit mode (80386)">;
26def Is16Bit : SubtargetFeature<"16bit-mode", "Is16Bit", "true",
27                               "16-bit mode (i8086)">;
28
29//===----------------------------------------------------------------------===//
30// X86 Subtarget ISA features
31//===----------------------------------------------------------------------===//
32
33def FeatureX87     : SubtargetFeature<"x87","HasX87", "true",
34                                      "Enable X87 float instructions">;
35
36def FeatureNOPL    : SubtargetFeature<"nopl", "HasNOPL", "true",
37                                      "Enable NOPL instruction (generally pentium pro+)">;
38
39def FeatureCMOV    : SubtargetFeature<"cmov","HasCMOV", "true",
40                                      "Enable conditional move instructions">;
41
42def FeatureCX8     : SubtargetFeature<"cx8", "HasCX8", "true",
43                                      "Support CMPXCHG8B instructions">;
44
45def FeatureCRC32   : SubtargetFeature<"crc32", "HasCRC32", "true",
46                                      "Enable SSE 4.2 CRC32 instruction (used when SSE4.2 is supported but function is GPR only)">;
47
48def FeaturePOPCNT   : SubtargetFeature<"popcnt", "HasPOPCNT", "true",
49                                       "Support POPCNT instruction">;
50
51def FeatureFXSR    : SubtargetFeature<"fxsr", "HasFXSR", "true",
52                                      "Support fxsave/fxrestore instructions">;
53
54def FeatureXSAVE   : SubtargetFeature<"xsave", "HasXSAVE", "true",
55                                       "Support xsave instructions">;
56
57def FeatureXSAVEOPT: SubtargetFeature<"xsaveopt", "HasXSAVEOPT", "true",
58                                       "Support xsaveopt instructions",
59                                       [FeatureXSAVE]>;
60
61def FeatureXSAVEC  : SubtargetFeature<"xsavec", "HasXSAVEC", "true",
62                                       "Support xsavec instructions",
63                                       [FeatureXSAVE]>;
64
65def FeatureXSAVES  : SubtargetFeature<"xsaves", "HasXSAVES", "true",
66                                       "Support xsaves instructions",
67                                       [FeatureXSAVE]>;
68
69def FeatureSSE1    : SubtargetFeature<"sse", "X86SSELevel", "SSE1",
70                                      "Enable SSE instructions">;
71def FeatureSSE2    : SubtargetFeature<"sse2", "X86SSELevel", "SSE2",
72                                      "Enable SSE2 instructions",
73                                      [FeatureSSE1]>;
74def FeatureSSE3    : SubtargetFeature<"sse3", "X86SSELevel", "SSE3",
75                                      "Enable SSE3 instructions",
76                                      [FeatureSSE2]>;
77def FeatureSSSE3   : SubtargetFeature<"ssse3", "X86SSELevel", "SSSE3",
78                                      "Enable SSSE3 instructions",
79                                      [FeatureSSE3]>;
80def FeatureSSE41   : SubtargetFeature<"sse4.1", "X86SSELevel", "SSE41",
81                                      "Enable SSE 4.1 instructions",
82                                      [FeatureSSSE3]>;
83def FeatureSSE42   : SubtargetFeature<"sse4.2", "X86SSELevel", "SSE42",
84                                      "Enable SSE 4.2 instructions",
85                                      [FeatureSSE41]>;
86// The MMX subtarget feature is separate from the rest of the SSE features
87// because it's important (for odd compatibility reasons) to be able to
88// turn it off explicitly while allowing SSE+ to be on.
89def FeatureMMX     : SubtargetFeature<"mmx","X863DNowLevel", "MMX",
90                                      "Enable MMX instructions">;
91def Feature3DNow   : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow",
92                                      "Enable 3DNow! instructions",
93                                      [FeatureMMX]>;
94def Feature3DNowA  : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA",
95                                      "Enable 3DNow! Athlon instructions",
96                                      [Feature3DNow]>;
97// All x86-64 hardware has SSE2, but we don't mark SSE2 as an implied
98// feature, because SSE2 can be disabled (e.g. for compiling OS kernels)
99// without disabling 64-bit mode. Nothing should imply this feature bit. It
100// is used to enforce that only 64-bit capable CPUs are used in 64-bit mode.
101def FeatureX86_64   : SubtargetFeature<"64bit", "HasX86_64", "true",
102                                      "Support 64-bit instructions">;
103def FeatureCX16     : SubtargetFeature<"cx16", "HasCX16", "true",
104                                       "64-bit with cmpxchg16b (this is true for most x86-64 chips, but not the first AMD chips)",
105                                       [FeatureCX8]>;
106def FeatureSSE4A   : SubtargetFeature<"sse4a", "HasSSE4A", "true",
107                                      "Support SSE 4a instructions",
108                                      [FeatureSSE3]>;
109
110def FeatureAVX     : SubtargetFeature<"avx", "X86SSELevel", "AVX",
111                                      "Enable AVX instructions",
112                                      [FeatureSSE42]>;
113def FeatureAVX2    : SubtargetFeature<"avx2", "X86SSELevel", "AVX2",
114                                      "Enable AVX2 instructions",
115                                      [FeatureAVX]>;
116def FeatureFMA     : SubtargetFeature<"fma", "HasFMA", "true",
117                                      "Enable three-operand fused multiple-add",
118                                      [FeatureAVX]>;
119def FeatureF16C    : SubtargetFeature<"f16c", "HasF16C", "true",
120                       "Support 16-bit floating point conversion instructions",
121                       [FeatureAVX]>;
122def FeatureEVEX512  : SubtargetFeature<"evex512", "HasEVEX512", "true",
123                        "Support ZMM and 64-bit mask instructions">;
124def FeatureAVX512   : SubtargetFeature<"avx512f", "X86SSELevel", "AVX512",
125                                      "Enable AVX-512 instructions",
126                                      [FeatureAVX2, FeatureFMA, FeatureF16C]>;
127def FeatureERI      : SubtargetFeature<"avx512er", "HasERI", "true",
128                      "Enable AVX-512 Exponential and Reciprocal Instructions",
129                                      [FeatureAVX512]>;
130def FeatureCDI      : SubtargetFeature<"avx512cd", "HasCDI", "true",
131                      "Enable AVX-512 Conflict Detection Instructions",
132                                      [FeatureAVX512]>;
133def FeatureVPOPCNTDQ : SubtargetFeature<"avx512vpopcntdq", "HasVPOPCNTDQ",
134                       "true", "Enable AVX-512 Population Count Instructions",
135                                      [FeatureAVX512]>;
136def FeaturePFI      : SubtargetFeature<"avx512pf", "HasPFI", "true",
137                      "Enable AVX-512 PreFetch Instructions",
138                                      [FeatureAVX512]>;
139def FeaturePREFETCHI  : SubtargetFeature<"prefetchi", "HasPREFETCHI",
140                                   "true",
141                                   "Prefetch instruction with T0 or T1 Hint">;
142def FeaturePREFETCHWT1  : SubtargetFeature<"prefetchwt1", "HasPREFETCHWT1",
143                                   "true",
144                                   "Prefetch with Intent to Write and T1 Hint">;
145def FeatureDQI     : SubtargetFeature<"avx512dq", "HasDQI", "true",
146                      "Enable AVX-512 Doubleword and Quadword Instructions",
147                                      [FeatureAVX512]>;
148def FeatureBWI     : SubtargetFeature<"avx512bw", "HasBWI", "true",
149                      "Enable AVX-512 Byte and Word Instructions",
150                                      [FeatureAVX512]>;
151def FeatureVLX     : SubtargetFeature<"avx512vl", "HasVLX", "true",
152                      "Enable AVX-512 Vector Length eXtensions",
153                                      [FeatureAVX512]>;
154def FeatureVBMI     : SubtargetFeature<"avx512vbmi", "HasVBMI", "true",
155                      "Enable AVX-512 Vector Byte Manipulation Instructions",
156                                      [FeatureBWI]>;
157def FeatureVBMI2    : SubtargetFeature<"avx512vbmi2", "HasVBMI2", "true",
158                      "Enable AVX-512 further Vector Byte Manipulation Instructions",
159                                      [FeatureBWI]>;
160def FeatureAVXIFMA    : SubtargetFeature<"avxifma", "HasAVXIFMA", "true",
161                           "Enable AVX-IFMA",
162                           [FeatureAVX2]>;
163def FeatureIFMA     : SubtargetFeature<"avx512ifma", "HasIFMA", "true",
164                      "Enable AVX-512 Integer Fused Multiple-Add",
165                                      [FeatureAVX512]>;
166def FeaturePKU   : SubtargetFeature<"pku", "HasPKU", "true",
167                      "Enable protection keys">;
168def FeatureVNNI    : SubtargetFeature<"avx512vnni", "HasVNNI", "true",
169                          "Enable AVX-512 Vector Neural Network Instructions",
170                                      [FeatureAVX512]>;
171def FeatureAVXVNNI    : SubtargetFeature<"avxvnni", "HasAVXVNNI", "true",
172                           "Support AVX_VNNI encoding",
173                                      [FeatureAVX2]>;
174def FeatureBF16    : SubtargetFeature<"avx512bf16", "HasBF16", "true",
175                           "Support bfloat16 floating point",
176                                      [FeatureBWI]>;
177def FeatureBITALG  : SubtargetFeature<"avx512bitalg", "HasBITALG", "true",
178                       "Enable AVX-512 Bit Algorithms",
179                        [FeatureBWI]>;
180def FeatureVP2INTERSECT  : SubtargetFeature<"avx512vp2intersect",
181                                            "HasVP2INTERSECT", "true",
182                                            "Enable AVX-512 vp2intersect",
183                                            [FeatureAVX512]>;
184// FIXME: FP16 scalar intrinsics use the type v8f16, which is supposed to be
185// guarded under condition hasVLX. So we imply it in FeatureFP16 currently.
186// FIXME: FP16 conversion between f16 and i64 customize type v8i64, which is
187// supposed to be guarded under condition hasDQI. So we imply it in FeatureFP16
188// currently.
189def FeatureFP16    : SubtargetFeature<"avx512fp16", "HasFP16", "true",
190                           "Support 16-bit floating point",
191                           [FeatureBWI, FeatureVLX, FeatureDQI]>;
192def FeatureAVXVNNIINT8  : SubtargetFeature<"avxvnniint8",
193                             "HasAVXVNNIINT8", "true",
194                             "Enable AVX-VNNI-INT8",
195                             [FeatureAVX2]>;
196def FeatureAVXVNNIINT16 : SubtargetFeature<"avxvnniint16",
197                             "HasAVXVNNIINT16", "true",
198                             "Enable AVX-VNNI-INT16",
199                             [FeatureAVX2]>;
200def FeaturePCLMUL  : SubtargetFeature<"pclmul", "HasPCLMUL", "true",
201                         "Enable packed carry-less multiplication instructions",
202                               [FeatureSSE2]>;
203def FeatureGFNI    : SubtargetFeature<"gfni", "HasGFNI", "true",
204                         "Enable Galois Field Arithmetic Instructions",
205                               [FeatureSSE2]>;
206def FeatureVPCLMULQDQ : SubtargetFeature<"vpclmulqdq", "HasVPCLMULQDQ", "true",
207                                         "Enable vpclmulqdq instructions",
208                                         [FeatureAVX, FeaturePCLMUL]>;
209def FeatureFMA4    : SubtargetFeature<"fma4", "HasFMA4", "true",
210                                      "Enable four-operand fused multiple-add",
211                                      [FeatureAVX, FeatureSSE4A]>;
212def FeatureXOP     : SubtargetFeature<"xop", "HasXOP", "true",
213                                      "Enable XOP instructions",
214                                      [FeatureFMA4]>;
215def FeatureSSEUnalignedMem : SubtargetFeature<"sse-unaligned-mem",
216                                          "HasSSEUnalignedMem", "true",
217                      "Allow unaligned memory operands with SSE instructions (this may require setting a configuration bit in the processor)">;
218def FeatureAES     : SubtargetFeature<"aes", "HasAES", "true",
219                                      "Enable AES instructions",
220                                      [FeatureSSE2]>;
221def FeatureVAES    : SubtargetFeature<"vaes", "HasVAES", "true",
222                       "Promote selected AES instructions to AVX512/AVX registers",
223                        [FeatureAVX2, FeatureAES]>;
224def FeatureTBM     : SubtargetFeature<"tbm", "HasTBM", "true",
225                                      "Enable TBM instructions">;
226def FeatureLWP     : SubtargetFeature<"lwp", "HasLWP", "true",
227                                      "Enable LWP instructions">;
228def FeatureMOVBE   : SubtargetFeature<"movbe", "HasMOVBE", "true",
229                                      "Support MOVBE instruction">;
230def FeatureRDRAND  : SubtargetFeature<"rdrnd", "HasRDRAND", "true",
231                                      "Support RDRAND instruction">;
232def FeatureFSGSBase : SubtargetFeature<"fsgsbase", "HasFSGSBase", "true",
233                                       "Support FS/GS Base instructions">;
234def FeatureLZCNT   : SubtargetFeature<"lzcnt", "HasLZCNT", "true",
235                                      "Support LZCNT instruction">;
236def FeatureBMI     : SubtargetFeature<"bmi", "HasBMI", "true",
237                                      "Support BMI instructions">;
238def FeatureBMI2    : SubtargetFeature<"bmi2", "HasBMI2", "true",
239                                      "Support BMI2 instructions">;
240def FeatureRTM     : SubtargetFeature<"rtm", "HasRTM", "true",
241                                      "Support RTM instructions">;
242def FeatureADX     : SubtargetFeature<"adx", "HasADX", "true",
243                                      "Support ADX instructions">;
244def FeatureSHA     : SubtargetFeature<"sha", "HasSHA", "true",
245                                      "Enable SHA instructions",
246                                      [FeatureSSE2]>;
247def FeatureSHA512  : SubtargetFeature<"sha512", "HasSHA512", "true",
248                                      "Support SHA512 instructions",
249                                      [FeatureAVX2]>;
250// Processor supports CET SHSTK - Control-Flow Enforcement Technology
251// using Shadow Stack
252def FeatureSHSTK   : SubtargetFeature<"shstk", "HasSHSTK", "true",
253                       "Support CET Shadow-Stack instructions">;
254def FeatureSM3     : SubtargetFeature<"sm3", "HasSM3", "true",
255                                      "Support SM3 instructions",
256                                      [FeatureAVX]>;
257def FeatureSM4     : SubtargetFeature<"sm4", "HasSM4", "true",
258                                      "Support SM4 instructions",
259                                      [FeatureAVX2]>;
260def FeaturePRFCHW  : SubtargetFeature<"prfchw", "HasPRFCHW", "true",
261                                      "Support PRFCHW instructions">;
262def FeatureRDSEED  : SubtargetFeature<"rdseed", "HasRDSEED", "true",
263                                      "Support RDSEED instruction">;
264def FeatureLAHFSAHF64 : SubtargetFeature<"sahf", "HasLAHFSAHF64", "true",
265                           "Support LAHF and SAHF instructions in 64-bit mode">;
266def FeatureMWAITX  : SubtargetFeature<"mwaitx", "HasMWAITX", "true",
267                                      "Enable MONITORX/MWAITX timer functionality">;
268def FeatureCLZERO  : SubtargetFeature<"clzero", "HasCLZERO", "true",
269                                      "Enable Cache Line Zero">;
270def FeatureCLDEMOTE  : SubtargetFeature<"cldemote", "HasCLDEMOTE", "true",
271                                      "Enable Cache Line Demote">;
272def FeaturePTWRITE  : SubtargetFeature<"ptwrite", "HasPTWRITE", "true",
273                                      "Support ptwrite instruction">;
274def FeatureAMXTILE     : SubtargetFeature<"amx-tile", "HasAMXTILE", "true",
275                                      "Support AMX-TILE instructions">;
276def FeatureAMXINT8     : SubtargetFeature<"amx-int8", "HasAMXINT8", "true",
277                                      "Support AMX-INT8 instructions",
278                                      [FeatureAMXTILE]>;
279def FeatureAMXBF16     : SubtargetFeature<"amx-bf16", "HasAMXBF16", "true",
280                                      "Support AMX-BF16 instructions",
281                                      [FeatureAMXTILE]>;
282def FeatureAMXFP16     : SubtargetFeature<"amx-fp16", "HasAMXFP16", "true",
283                                      "Support AMX amx-fp16 instructions",
284                                      [FeatureAMXTILE]>;
285def FeatureAMXCOMPLEX : SubtargetFeature<"amx-complex", "HasAMXCOMPLEX", "true",
286                                         "Support AMX-COMPLEX instructions",
287                                         [FeatureAMXTILE]>;
288def FeatureCMPCCXADD : SubtargetFeature<"cmpccxadd", "HasCMPCCXADD", "true",
289                                        "Support CMPCCXADD instructions">;
290def FeatureRAOINT : SubtargetFeature<"raoint", "HasRAOINT", "true",
291                                     "Support RAO-INT instructions",
292                                     []>;
293def FeatureAVXNECONVERT : SubtargetFeature<"avxneconvert", "HasAVXNECONVERT", "true",
294                                           "Support AVX-NE-CONVERT instructions",
295                                           [FeatureAVX2]>;
296def FeatureINVPCID : SubtargetFeature<"invpcid", "HasINVPCID", "true",
297                                      "Invalidate Process-Context Identifier">;
298def FeatureSGX     : SubtargetFeature<"sgx", "HasSGX", "true",
299                                      "Enable Software Guard Extensions">;
300def FeatureCLFLUSHOPT : SubtargetFeature<"clflushopt", "HasCLFLUSHOPT", "true",
301                                      "Flush A Cache Line Optimized">;
302def FeatureCLWB    : SubtargetFeature<"clwb", "HasCLWB", "true",
303                                      "Cache Line Write Back">;
304def FeatureWBNOINVD    : SubtargetFeature<"wbnoinvd", "HasWBNOINVD", "true",
305                                      "Write Back No Invalidate">;
306def FeatureRDPID : SubtargetFeature<"rdpid", "HasRDPID", "true",
307                                    "Support RDPID instructions">;
308def FeatureRDPRU : SubtargetFeature<"rdpru", "HasRDPRU", "true",
309                                    "Support RDPRU instructions">;
310def FeatureWAITPKG  : SubtargetFeature<"waitpkg", "HasWAITPKG", "true",
311                                      "Wait and pause enhancements">;
312def FeatureENQCMD : SubtargetFeature<"enqcmd", "HasENQCMD", "true",
313                                     "Has ENQCMD instructions">;
314def FeatureKL  : SubtargetFeature<"kl", "HasKL", "true",
315                                  "Support Key Locker kl Instructions",
316                                  [FeatureSSE2]>;
317def FeatureWIDEKL  : SubtargetFeature<"widekl", "HasWIDEKL", "true",
318                                      "Support Key Locker wide Instructions",
319                                      [FeatureKL]>;
320def FeatureHRESET : SubtargetFeature<"hreset", "HasHRESET", "true",
321                                      "Has hreset instruction">;
322def FeatureSERIALIZE : SubtargetFeature<"serialize", "HasSERIALIZE", "true",
323                                        "Has serialize instruction">;
324def FeatureTSXLDTRK : SubtargetFeature<"tsxldtrk", "HasTSXLDTRK", "true",
325                                       "Support TSXLDTRK instructions">;
326def FeatureUINTR : SubtargetFeature<"uintr", "HasUINTR", "true",
327                                    "Has UINTR Instructions">;
328def FeatureUSERMSR : SubtargetFeature<"usermsr", "HasUSERMSR", "true",
329                                      "Support USERMSR instructions">;
330def FeaturePCONFIG : SubtargetFeature<"pconfig", "HasPCONFIG", "true",
331                                      "platform configuration instruction">;
332def FeatureMOVDIRI  : SubtargetFeature<"movdiri", "HasMOVDIRI", "true",
333                                       "Support movdiri instruction (direct store integer)">;
334def FeatureMOVDIR64B : SubtargetFeature<"movdir64b", "HasMOVDIR64B", "true",
335                                        "Support movdir64b instruction (direct store 64 bytes)">;
336def FeatureAVX10_1 : SubtargetFeature<"avx10.1-256", "HasAVX10_1", "true",
337                                      "Support AVX10.1 up to 256-bit instruction",
338                                      [FeatureCDI, FeatureVBMI, FeatureIFMA, FeatureVNNI,
339                                       FeatureBF16, FeatureVPOPCNTDQ, FeatureVBMI2, FeatureBITALG,
340                                       FeatureVAES, FeatureVPCLMULQDQ, FeatureFP16]>;
341def FeatureAVX10_1_512 : SubtargetFeature<"avx10.1-512", "HasAVX10_1_512", "true",
342                                          "Support AVX10.1 up to 512-bit instruction",
343                                          [FeatureAVX10_1, FeatureEVEX512]>;
344def FeatureEGPR : SubtargetFeature<"egpr", "HasEGPR", "true",
345                                   "Support extended general purpose register">;
346def FeaturePush2Pop2 : SubtargetFeature<"push2pop2", "HasPush2Pop2", "true",
347                                        "Support PUSH2/POP2 instructions">;
348def FeaturePPX : SubtargetFeature<"ppx", "HasPPX", "true",
349                                  "Support Push-Pop Acceleration">;
350def FeatureNDD : SubtargetFeature<"ndd", "HasNDD", "true",
351                                  "Support non-destructive destination">;
352def FeatureCCMP : SubtargetFeature<"ccmp", "HasCCMP", "true",
353                                   "Support conditional cmp & test instructions">;
354def FeatureCF : SubtargetFeature<"cf", "HasCF", "true",
355                                 "Support conditional faulting">;
356
357// Ivy Bridge and newer processors have enhanced REP MOVSB and STOSB (aka
358// "string operations"). See "REP String Enhancement" in the Intel Software
359// Development Manual. This feature essentially means that REP MOVSB will copy
360// using the largest available size instead of copying bytes one by one, making
361// it at least as fast as REPMOVS{W,D,Q}.
362def FeatureERMSB
363    : SubtargetFeature<
364          "ermsb", "HasERMSB", "true",
365          "REP MOVS/STOS are fast">;
366
367// Icelake and newer processors have Fast Short REP MOV.
368def FeatureFSRM
369    : SubtargetFeature<
370          "fsrm", "HasFSRM", "true",
371          "REP MOVSB of short lengths is faster">;
372
373def FeatureSoftFloat
374    : SubtargetFeature<"soft-float", "UseSoftFloat", "true",
375                       "Use software floating point features">;
376
377//===----------------------------------------------------------------------===//
378// X86 Subtarget Security Mitigation features
379//===----------------------------------------------------------------------===//
380
381// Lower indirect calls using a special construct called a `retpoline` to
382// mitigate potential Spectre v2 attacks against them.
383def FeatureRetpolineIndirectCalls
384    : SubtargetFeature<
385          "retpoline-indirect-calls", "UseRetpolineIndirectCalls", "true",
386          "Remove speculation of indirect calls from the generated code">;
387
388// Lower indirect branches and switches either using conditional branch trees
389// or using a special construct called a `retpoline` to mitigate potential
390// Spectre v2 attacks against them.
391def FeatureRetpolineIndirectBranches
392    : SubtargetFeature<
393          "retpoline-indirect-branches", "UseRetpolineIndirectBranches", "true",
394          "Remove speculation of indirect branches from the generated code">;
395
396// Deprecated umbrella feature for enabling both `retpoline-indirect-calls` and
397// `retpoline-indirect-branches` above.
398def FeatureRetpoline
399    : SubtargetFeature<"retpoline", "DeprecatedUseRetpoline", "true",
400                       "Remove speculation of indirect branches from the "
401                       "generated code, either by avoiding them entirely or "
402                       "lowering them with a speculation blocking construct",
403                       [FeatureRetpolineIndirectCalls,
404                        FeatureRetpolineIndirectBranches]>;
405
406// Rely on external thunks for the emitted retpoline calls. This allows users
407// to provide their own custom thunk definitions in highly specialized
408// environments such as a kernel that does boot-time hot patching.
409def FeatureRetpolineExternalThunk
410    : SubtargetFeature<
411          "retpoline-external-thunk", "UseRetpolineExternalThunk", "true",
412          "When lowering an indirect call or branch using a `retpoline`, rely "
413          "on the specified user provided thunk rather than emitting one "
414          "ourselves. Only has effect when combined with some other retpoline "
415          "feature", [FeatureRetpolineIndirectCalls]>;
416
417// Mitigate LVI attacks against indirect calls/branches and call returns
418def FeatureLVIControlFlowIntegrity
419    : SubtargetFeature<
420          "lvi-cfi", "UseLVIControlFlowIntegrity", "true",
421          "Prevent indirect calls/branches from using a memory operand, and "
422          "precede all indirect calls/branches from a register with an "
423          "LFENCE instruction to serialize control flow. Also decompose RET "
424          "instructions into a POP+LFENCE+JMP sequence.">;
425
426// Enable SESES to mitigate speculative execution attacks
427def FeatureSpeculativeExecutionSideEffectSuppression
428    : SubtargetFeature<
429          "seses", "UseSpeculativeExecutionSideEffectSuppression", "true",
430          "Prevent speculative execution side channel timing attacks by "
431          "inserting a speculation barrier before memory reads, memory writes, "
432          "and conditional branches. Implies LVI Control Flow integrity.",
433          [FeatureLVIControlFlowIntegrity]>;
434
435// Mitigate LVI attacks against data loads
436def FeatureLVILoadHardening
437    : SubtargetFeature<
438          "lvi-load-hardening", "UseLVILoadHardening", "true",
439          "Insert LFENCE instructions to prevent data speculatively injected "
440          "into loads from being used maliciously.">;
441
442def FeatureTaggedGlobals
443    : SubtargetFeature<
444          "tagged-globals", "AllowTaggedGlobals", "true",
445          "Use an instruction sequence for taking the address of a global "
446          "that allows a memory tag in the upper address bits.">;
447
448// Control codegen mitigation against Straight Line Speculation vulnerability.
449def FeatureHardenSlsRet
450    : SubtargetFeature<
451          "harden-sls-ret", "HardenSlsRet", "true",
452          "Harden against straight line speculation across RET instructions.">;
453
454def FeatureHardenSlsIJmp
455    : SubtargetFeature<
456          "harden-sls-ijmp", "HardenSlsIJmp", "true",
457          "Harden against straight line speculation across indirect JMP instructions.">;
458
459//===----------------------------------------------------------------------===//
460// X86 Subtarget Tuning features
461//===----------------------------------------------------------------------===//
462def TuningPreferMovmskOverVTest : SubtargetFeature<"prefer-movmsk-over-vtest",
463                                       "PreferMovmskOverVTest", "true",
464                                       "Prefer movmsk over vtest instruction">;
465
466def TuningSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true",
467                                       "SHLD instruction is slow">;
468
469def TuningSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true",
470                                        "PMULLD instruction is slow (compared to PMULLW/PMULHW and PMULUDQ)">;
471
472def TuningSlowPMADDWD : SubtargetFeature<"slow-pmaddwd", "IsPMADDWDSlow",
473                                          "true",
474                                          "PMADDWD is slower than PMULLD">;
475
476// FIXME: This should not apply to CPUs that do not have SSE.
477def TuningSlowUAMem16 : SubtargetFeature<"slow-unaligned-mem-16",
478                                "IsUnalignedMem16Slow", "true",
479                                "Slow unaligned 16-byte memory access">;
480
481def TuningSlowUAMem32 : SubtargetFeature<"slow-unaligned-mem-32",
482                                "IsUnalignedMem32Slow", "true",
483                                "Slow unaligned 32-byte memory access">;
484
485def TuningLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
486                                     "Use LEA for adjusting the stack pointer (this is an optimization for Intel Atom processors)">;
487
488// True if 8-bit divisions are significantly faster than
489// 32-bit divisions and should be used when possible.
490def TuningSlowDivide32 : SubtargetFeature<"idivl-to-divb",
491                                     "HasSlowDivide32", "true",
492                                     "Use 8-bit divide for positive values less than 256">;
493
494// True if 32-bit divides are significantly faster than
495// 64-bit divisions and should be used when possible.
496def TuningSlowDivide64 : SubtargetFeature<"idivq-to-divl",
497                                     "HasSlowDivide64", "true",
498                                     "Use 32-bit divide for positive values less than 2^32">;
499
500def TuningPadShortFunctions : SubtargetFeature<"pad-short-functions",
501                                     "PadShortFunctions", "true",
502                                     "Pad short functions (to prevent a stall when returning too early)">;
503
504// On some processors, instructions that implicitly take two memory operands are
505// slow. In practice, this means that CALL, PUSH, and POP with memory operands
506// should be avoided in favor of a MOV + register CALL/PUSH/POP.
507def TuningSlowTwoMemOps : SubtargetFeature<"slow-two-mem-ops",
508                                     "SlowTwoMemOps", "true",
509                                     "Two memory operand instructions are slow">;
510
511// True if the LEA instruction inputs have to be ready at address generation
512// (AG) time.
513def TuningLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LeaUsesAG", "true",
514                                   "LEA instruction needs inputs at AG stage">;
515
516def TuningSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true",
517                                   "LEA instruction with certain arguments is slow">;
518
519// True if the LEA instruction has all three source operands: base, index,
520// and offset or if the LEA instruction uses base and index registers where
521// the base is EBP, RBP,or R13
522def TuningSlow3OpsLEA : SubtargetFeature<"slow-3ops-lea", "Slow3OpsLEA", "true",
523                                   "LEA instruction with 3 ops or certain registers is slow">;
524
525// True if INC and DEC instructions are slow when writing to flags
526def TuningSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true",
527                                   "INC and DEC instructions are slower than ADD and SUB">;
528
529def TuningPOPCNTFalseDeps : SubtargetFeature<"false-deps-popcnt",
530                                     "HasPOPCNTFalseDeps", "true",
531                                     "POPCNT has a false dependency on dest register">;
532
533def TuningLZCNTFalseDeps : SubtargetFeature<"false-deps-lzcnt-tzcnt",
534                                     "HasLZCNTFalseDeps", "true",
535                                     "LZCNT/TZCNT have a false dependency on dest register">;
536
537def TuningMULCFalseDeps : SubtargetFeature<"false-deps-mulc",
538                               "HasMULCFalseDeps", "true",
539                               "VF[C]MULCPH/SH has a false dependency on dest register">;
540
541def TuningPERMFalseDeps : SubtargetFeature<"false-deps-perm",
542                               "HasPERMFalseDeps", "true",
543                               "VPERMD/Q/PS/PD has a false dependency on dest register">;
544
545def TuningRANGEFalseDeps : SubtargetFeature<"false-deps-range",
546                               "HasRANGEFalseDeps", "true",
547                               "VRANGEPD/PS/SD/SS has a false dependency on dest register">;
548
549def TuningGETMANTFalseDeps : SubtargetFeature<"false-deps-getmant",
550                               "HasGETMANTFalseDeps", "true",
551                               "VGETMANTSS/SD/SH and VGETMANDPS/PD(memory version) has a"
552                               " false dependency on dest register">;
553
554def TuningMULLQFalseDeps : SubtargetFeature<"false-deps-mullq",
555                               "HasMULLQFalseDeps", "true",
556                               "VPMULLQ has a false dependency on dest register">;
557
558def TuningSBBDepBreaking : SubtargetFeature<"sbb-dep-breaking",
559                                     "HasSBBDepBreaking", "true",
560                                     "SBB with same register has no source dependency">;
561
562// On recent X86 (port bound) processors, its preferable to combine to a single shuffle
563// using a variable mask over multiple fixed shuffles.
564def TuningFastVariableCrossLaneShuffle
565    : SubtargetFeature<"fast-variable-crosslane-shuffle",
566                       "HasFastVariableCrossLaneShuffle",
567                       "true", "Cross-lane shuffles with variable masks are fast">;
568def TuningFastVariablePerLaneShuffle
569    : SubtargetFeature<"fast-variable-perlane-shuffle",
570                       "HasFastVariablePerLaneShuffle",
571                       "true", "Per-lane shuffles with variable masks are fast">;
572
573// Goldmont / Tremont (atom in general) has no bypass delay
574def TuningNoDomainDelay : SubtargetFeature<"no-bypass-delay",
575                                   "NoDomainDelay","true",
576                                   "Has no bypass delay when using the 'wrong' domain">;
577
578// Many processors (Nehalem+ on Intel) have no bypass delay when
579// using the wrong mov type.
580def TuningNoDomainDelayMov : SubtargetFeature<"no-bypass-delay-mov",
581                                   "NoDomainDelayMov","true",
582                                   "Has no bypass delay when using the 'wrong' mov type">;
583
584// Newer processors (Skylake+ on Intel) have no bypass delay when
585// using the wrong blend type.
586def TuningNoDomainDelayBlend : SubtargetFeature<"no-bypass-delay-blend",
587                                   "NoDomainDelayBlend","true",
588                                   "Has no bypass delay when using the 'wrong' blend type">;
589
590// Newer processors (Haswell+ on Intel) have no bypass delay when
591// using the wrong shuffle type.
592def TuningNoDomainDelayShuffle : SubtargetFeature<"no-bypass-delay-shuffle",
593                                   "NoDomainDelayShuffle","true",
594                                   "Has no bypass delay when using the 'wrong' shuffle type">;
595
596// Prefer lowering shuffles on AVX512 targets (e.g. Skylake Server) to
597// imm shifts/rotate if they can use more ports than regular shuffles.
598def TuningPreferShiftShuffle : SubtargetFeature<"faster-shift-than-shuffle",
599                                   "PreferLowerShuffleAsShift", "true",
600                                   "Shifts are faster (or as fast) as shuffle">;
601
602def TuningFastImmVectorShift : SubtargetFeature<"tuning-fast-imm-vector-shift",
603                                   "FastImmVectorShift", "true",
604                                   "Vector shifts are fast (2/cycle) as opposed to slow (1/cycle)">;
605
606// On some X86 processors, a vzeroupper instruction should be inserted after
607// using ymm/zmm registers before executing code that may use SSE instructions.
608def TuningInsertVZEROUPPER
609    : SubtargetFeature<"vzeroupper",
610                       "InsertVZEROUPPER",
611                       "true", "Should insert vzeroupper instructions">;
612
613// TuningFastScalarFSQRT should be enabled if scalar FSQRT has shorter latency
614// than the corresponding NR code. TuningFastVectorFSQRT should be enabled if
615// vector FSQRT has higher throughput than the corresponding NR code.
616// The idea is that throughput bound code is likely to be vectorized, so for
617// vectorized code we should care about the throughput of SQRT operations.
618// But if the code is scalar that probably means that the code has some kind of
619// dependency and we should care more about reducing the latency.
620
621// True if hardware SQRTSS instruction is at least as fast (latency) as
622// RSQRTSS followed by a Newton-Raphson iteration.
623def TuningFastScalarFSQRT
624    : SubtargetFeature<"fast-scalar-fsqrt", "HasFastScalarFSQRT",
625                       "true", "Scalar SQRT is fast (disable Newton-Raphson)">;
626// True if hardware SQRTPS/VSQRTPS instructions are at least as fast
627// (throughput) as RSQRTPS/VRSQRTPS followed by a Newton-Raphson iteration.
628def TuningFastVectorFSQRT
629    : SubtargetFeature<"fast-vector-fsqrt", "HasFastVectorFSQRT",
630                       "true", "Vector SQRT is fast (disable Newton-Raphson)">;
631
632// If lzcnt has equivalent latency/throughput to most simple integer ops, it can
633// be used to replace test/set sequences.
634def TuningFastLZCNT
635    : SubtargetFeature<
636          "fast-lzcnt", "HasFastLZCNT", "true",
637          "LZCNT instructions are as fast as most simple integer ops">;
638
639// If the target can efficiently decode NOPs upto 7-bytes in length.
640def TuningFast7ByteNOP
641    : SubtargetFeature<
642          "fast-7bytenop", "HasFast7ByteNOP", "true",
643          "Target can quickly decode up to 7 byte NOPs">;
644
645// If the target can efficiently decode NOPs upto 11-bytes in length.
646def TuningFast11ByteNOP
647    : SubtargetFeature<
648          "fast-11bytenop", "HasFast11ByteNOP", "true",
649          "Target can quickly decode up to 11 byte NOPs">;
650
651// If the target can efficiently decode NOPs upto 15-bytes in length.
652def TuningFast15ByteNOP
653    : SubtargetFeature<
654          "fast-15bytenop", "HasFast15ByteNOP", "true",
655          "Target can quickly decode up to 15 byte NOPs">;
656
657// Sandy Bridge and newer processors can use SHLD with the same source on both
658// inputs to implement rotate to avoid the partial flag update of the normal
659// rotate instructions.
660def TuningFastSHLDRotate
661    : SubtargetFeature<
662          "fast-shld-rotate", "HasFastSHLDRotate", "true",
663          "SHLD can be used as a faster rotate">;
664
665// Bulldozer and newer processors can merge CMP/TEST (but not other
666// instructions) with conditional branches.
667def TuningBranchFusion
668    : SubtargetFeature<"branchfusion", "HasBranchFusion", "true",
669                 "CMP/TEST can be fused with conditional branches">;
670
671// Sandy Bridge and newer processors have many instructions that can be
672// fused with conditional branches and pass through the CPU as a single
673// operation.
674def TuningMacroFusion
675    : SubtargetFeature<"macrofusion", "HasMacroFusion", "true",
676                 "Various instructions can be fused with conditional branches">;
677
678// Gather is available since Haswell (AVX2 set). So technically, we can
679// generate Gathers on all AVX2 processors. But the overhead on HSW is high.
680// Skylake Client processor has faster Gathers than HSW and performance is
681// similar to Skylake Server (AVX-512).
682def TuningFastGather
683    : SubtargetFeature<"fast-gather", "HasFastGather", "true",
684                       "Indicates if gather is reasonably fast (this is true for Skylake client and all AVX-512 CPUs)">;
685
686def TuningPreferNoGather
687    : SubtargetFeature<"prefer-no-gather", "PreferGather", "false",
688                       "Prefer no gather instructions">;
689def TuningPreferNoScatter
690    : SubtargetFeature<"prefer-no-scatter", "PreferScatter", "false",
691                       "Prefer no scatter instructions">;
692
693def TuningPrefer128Bit
694    : SubtargetFeature<"prefer-128-bit", "Prefer128Bit", "true",
695                       "Prefer 128-bit AVX instructions">;
696
697def TuningPrefer256Bit
698    : SubtargetFeature<"prefer-256-bit", "Prefer256Bit", "true",
699                       "Prefer 256-bit AVX instructions">;
700
701def TuningAllowLight256Bit
702    : SubtargetFeature<"allow-light-256-bit", "AllowLight256Bit", "true",
703                       "Enable generation of 256-bit load/stores even if we prefer 128-bit">;
704
705def TuningPreferMaskRegisters
706    : SubtargetFeature<"prefer-mask-registers", "PreferMaskRegisters", "true",
707                       "Prefer AVX512 mask registers over PTEST/MOVMSK">;
708
709def TuningFastBEXTR : SubtargetFeature<"fast-bextr", "HasFastBEXTR", "true",
710          "Indicates that the BEXTR instruction is implemented as a single uop "
711          "with good throughput">;
712
713// Combine vector math operations with shuffles into horizontal math
714// instructions if a CPU implements horizontal operations (introduced with
715// SSE3) with better latency/throughput than the alternative sequence.
716def TuningFastHorizontalOps
717    : SubtargetFeature<
718        "fast-hops", "HasFastHorizontalOps", "true",
719        "Prefer horizontal vector math instructions (haddp, phsub, etc.) over "
720        "normal vector instructions with shuffles">;
721
722def TuningFastScalarShiftMasks
723    : SubtargetFeature<
724        "fast-scalar-shift-masks", "HasFastScalarShiftMasks", "true",
725        "Prefer a left/right scalar logical shift pair over a shift+and pair">;
726
727def TuningFastVectorShiftMasks
728    : SubtargetFeature<
729        "fast-vector-shift-masks", "HasFastVectorShiftMasks", "true",
730        "Prefer a left/right vector logical shift pair over a shift+and pair">;
731
732def TuningFastMOVBE
733    : SubtargetFeature<"fast-movbe", "HasFastMOVBE", "true",
734    "Prefer a movbe over a single-use load + bswap / single-use bswap + store">;
735
736def TuningUseSLMArithCosts
737    : SubtargetFeature<"use-slm-arith-costs", "UseSLMArithCosts", "true",
738        "Use Silvermont specific arithmetic costs">;
739
740def TuningUseGLMDivSqrtCosts
741    : SubtargetFeature<"use-glm-div-sqrt-costs", "UseGLMDivSqrtCosts", "true",
742        "Use Goldmont specific floating point div/sqrt costs">;
743
744//===----------------------------------------------------------------------===//
745// X86 CPU Families
746// TODO: Remove these - use general tuning features to determine codegen.
747//===----------------------------------------------------------------------===//
748
749// Bonnell
750def ProcIntelAtom : SubtargetFeature<"", "IsAtom", "true", "Is Intel Atom processor">;
751
752//===----------------------------------------------------------------------===//
753// Register File Description
754//===----------------------------------------------------------------------===//
755
756include "X86RegisterInfo.td"
757include "X86RegisterBanks.td"
758
759//===----------------------------------------------------------------------===//
760// Instruction Descriptions
761//===----------------------------------------------------------------------===//
762
763include "X86Schedule.td"
764include "X86InstrInfo.td"
765include "X86SchedPredicates.td"
766
767def X86InstrInfo : InstrInfo;
768
769//===----------------------------------------------------------------------===//
770// X86 Scheduler Models
771//===----------------------------------------------------------------------===//
772
773include "X86ScheduleAtom.td"
774include "X86SchedSandyBridge.td"
775include "X86SchedHaswell.td"
776include "X86SchedBroadwell.td"
777include "X86ScheduleSLM.td"
778include "X86ScheduleZnver1.td"
779include "X86ScheduleZnver2.td"
780include "X86ScheduleZnver3.td"
781include "X86ScheduleZnver4.td"
782include "X86ScheduleBdVer2.td"
783include "X86ScheduleBtVer2.td"
784include "X86SchedSkylakeClient.td"
785include "X86SchedSkylakeServer.td"
786include "X86SchedIceLake.td"
787include "X86SchedAlderlakeP.td"
788include "X86SchedSapphireRapids.td"
789
790//===----------------------------------------------------------------------===//
791// X86 Processor Feature Lists
792//===----------------------------------------------------------------------===//
793
794def ProcessorFeatures {
795  // x86-64 micro-architecture levels: x86-64 and x86-64-v[234]
796  list<SubtargetFeature> X86_64V1Features = [
797    FeatureX87, FeatureCX8, FeatureCMOV, FeatureMMX, FeatureSSE2,
798    FeatureFXSR, FeatureNOPL, FeatureX86_64,
799  ];
800  list<SubtargetFeature> X86_64V1Tuning = [
801    TuningMacroFusion,
802    TuningSlow3OpsLEA,
803    TuningSlowDivide64,
804    TuningSlowIncDec,
805    TuningInsertVZEROUPPER
806  ];
807
808  list<SubtargetFeature> X86_64V2Features = !listconcat(X86_64V1Features, [
809    FeatureCX16, FeatureLAHFSAHF64, FeatureCRC32, FeaturePOPCNT,
810    FeatureSSE42
811  ]);
812  list<SubtargetFeature> X86_64V2Tuning = [
813    TuningMacroFusion,
814    TuningSlow3OpsLEA,
815    TuningSlowDivide64,
816    TuningSlowUAMem32,
817    TuningFastScalarFSQRT,
818    TuningFastSHLDRotate,
819    TuningFast15ByteNOP,
820    TuningPOPCNTFalseDeps,
821    TuningInsertVZEROUPPER
822  ];
823
824  list<SubtargetFeature> X86_64V3Features = !listconcat(X86_64V2Features, [
825    FeatureAVX2, FeatureBMI, FeatureBMI2, FeatureF16C, FeatureFMA, FeatureLZCNT,
826    FeatureMOVBE, FeatureXSAVE
827  ]);
828  list<SubtargetFeature> X86_64V3Tuning = [
829    TuningMacroFusion,
830    TuningSlow3OpsLEA,
831    TuningSlowDivide64,
832    TuningFastScalarFSQRT,
833    TuningFastSHLDRotate,
834    TuningFast15ByteNOP,
835    TuningFastVariableCrossLaneShuffle,
836    TuningFastVariablePerLaneShuffle,
837    TuningPOPCNTFalseDeps,
838    TuningLZCNTFalseDeps,
839    TuningInsertVZEROUPPER,
840    TuningAllowLight256Bit
841  ];
842
843  list<SubtargetFeature> X86_64V4Features = !listconcat(X86_64V3Features, [
844    FeatureEVEX512,
845    FeatureBWI,
846    FeatureCDI,
847    FeatureDQI,
848    FeatureVLX,
849  ]);
850  list<SubtargetFeature> X86_64V4Tuning = [
851    TuningMacroFusion,
852    TuningSlow3OpsLEA,
853    TuningSlowDivide64,
854    TuningFastScalarFSQRT,
855    TuningFastVectorFSQRT,
856    TuningFastSHLDRotate,
857    TuningFast15ByteNOP,
858    TuningFastVariableCrossLaneShuffle,
859    TuningFastVariablePerLaneShuffle,
860    TuningPrefer256Bit,
861    TuningFastGather,
862    TuningPOPCNTFalseDeps,
863    TuningInsertVZEROUPPER,
864    TuningAllowLight256Bit
865  ];
866
867  // Nehalem
868  list<SubtargetFeature> NHMFeatures = X86_64V2Features;
869  list<SubtargetFeature> NHMTuning = [TuningMacroFusion,
870                                      TuningSlowDivide64,
871                                      TuningInsertVZEROUPPER,
872                                      TuningNoDomainDelayMov];
873
874  // Westmere
875  list<SubtargetFeature> WSMAdditionalFeatures = [FeaturePCLMUL];
876  list<SubtargetFeature> WSMTuning = NHMTuning;
877  list<SubtargetFeature> WSMFeatures =
878    !listconcat(NHMFeatures, WSMAdditionalFeatures);
879
880  // Sandybridge
881  list<SubtargetFeature> SNBAdditionalFeatures = [FeatureAVX,
882                                                  FeatureXSAVE,
883                                                  FeatureXSAVEOPT];
884  list<SubtargetFeature> SNBTuning = [TuningMacroFusion,
885                                      TuningSlow3OpsLEA,
886                                      TuningSlowDivide64,
887                                      TuningSlowUAMem32,
888                                      TuningFastScalarFSQRT,
889                                      TuningFastSHLDRotate,
890                                      TuningFast15ByteNOP,
891                                      TuningPOPCNTFalseDeps,
892                                      TuningInsertVZEROUPPER,
893                                      TuningNoDomainDelayMov];
894  list<SubtargetFeature> SNBFeatures =
895    !listconcat(WSMFeatures, SNBAdditionalFeatures);
896
897  // Ivybridge
898  list<SubtargetFeature> IVBAdditionalFeatures = [FeatureRDRAND,
899                                                  FeatureF16C,
900                                                  FeatureFSGSBase];
901  list<SubtargetFeature> IVBTuning = SNBTuning;
902  list<SubtargetFeature> IVBFeatures =
903    !listconcat(SNBFeatures, IVBAdditionalFeatures);
904
905  // Haswell
906  list<SubtargetFeature> HSWAdditionalFeatures = [FeatureAVX2,
907                                                  FeatureBMI,
908                                                  FeatureBMI2,
909                                                  FeatureERMSB,
910                                                  FeatureFMA,
911                                                  FeatureINVPCID,
912                                                  FeatureLZCNT,
913                                                  FeatureMOVBE];
914  list<SubtargetFeature> HSWTuning = [TuningMacroFusion,
915                                      TuningSlow3OpsLEA,
916                                      TuningSlowDivide64,
917                                      TuningFastScalarFSQRT,
918                                      TuningFastSHLDRotate,
919                                      TuningFast15ByteNOP,
920                                      TuningFastVariableCrossLaneShuffle,
921                                      TuningFastVariablePerLaneShuffle,
922                                      TuningPOPCNTFalseDeps,
923                                      TuningLZCNTFalseDeps,
924                                      TuningInsertVZEROUPPER,
925                                      TuningAllowLight256Bit,
926                                      TuningNoDomainDelayMov,
927                                      TuningNoDomainDelayShuffle];
928  list<SubtargetFeature> HSWFeatures =
929    !listconcat(IVBFeatures, HSWAdditionalFeatures);
930
931  // Broadwell
932  list<SubtargetFeature> BDWAdditionalFeatures = [FeatureADX,
933                                                  FeatureRDSEED,
934                                                  FeaturePRFCHW];
935  list<SubtargetFeature> BDWTuning = HSWTuning;
936  list<SubtargetFeature> BDWFeatures =
937    !listconcat(HSWFeatures, BDWAdditionalFeatures);
938
939  // Skylake
940  list<SubtargetFeature> SKLAdditionalFeatures = [FeatureAES,
941                                                  FeatureXSAVEC,
942                                                  FeatureXSAVES,
943                                                  FeatureCLFLUSHOPT];
944  list<SubtargetFeature> SKLTuning = [TuningFastGather,
945                                      TuningMacroFusion,
946                                      TuningSlow3OpsLEA,
947                                      TuningSlowDivide64,
948                                      TuningFastScalarFSQRT,
949                                      TuningFastVectorFSQRT,
950                                      TuningFastSHLDRotate,
951                                      TuningFast15ByteNOP,
952                                      TuningFastVariableCrossLaneShuffle,
953                                      TuningFastVariablePerLaneShuffle,
954                                      TuningPOPCNTFalseDeps,
955                                      TuningInsertVZEROUPPER,
956                                      TuningAllowLight256Bit,
957                                      TuningNoDomainDelayMov,
958                                      TuningNoDomainDelayShuffle,
959                                      TuningNoDomainDelayBlend];
960  list<SubtargetFeature> SKLFeatures =
961    !listconcat(BDWFeatures, SKLAdditionalFeatures);
962
963  // Skylake-AVX512
964  list<SubtargetFeature> SKXAdditionalFeatures = [FeatureAES,
965                                                  FeatureXSAVEC,
966                                                  FeatureXSAVES,
967                                                  FeatureCLFLUSHOPT,
968                                                  FeatureAVX512,
969                                                  FeatureEVEX512,
970                                                  FeatureCDI,
971                                                  FeatureDQI,
972                                                  FeatureBWI,
973                                                  FeatureVLX,
974                                                  FeaturePKU,
975                                                  FeatureCLWB];
976  list<SubtargetFeature> SKXTuning = [TuningFastGather,
977                                      TuningMacroFusion,
978                                      TuningSlow3OpsLEA,
979                                      TuningSlowDivide64,
980                                      TuningFastScalarFSQRT,
981                                      TuningFastVectorFSQRT,
982                                      TuningFastSHLDRotate,
983                                      TuningFast15ByteNOP,
984                                      TuningFastVariableCrossLaneShuffle,
985                                      TuningFastVariablePerLaneShuffle,
986                                      TuningPrefer256Bit,
987                                      TuningPOPCNTFalseDeps,
988                                      TuningInsertVZEROUPPER,
989                                      TuningAllowLight256Bit,
990                                      TuningPreferShiftShuffle,
991                                      TuningNoDomainDelayMov,
992                                      TuningNoDomainDelayShuffle,
993                                      TuningNoDomainDelayBlend,
994                                      TuningFastImmVectorShift];
995  list<SubtargetFeature> SKXFeatures =
996    !listconcat(BDWFeatures, SKXAdditionalFeatures);
997
998  // Cascadelake
999  list<SubtargetFeature> CLXAdditionalFeatures = [FeatureVNNI];
1000  list<SubtargetFeature> CLXTuning = SKXTuning;
1001  list<SubtargetFeature> CLXFeatures =
1002    !listconcat(SKXFeatures, CLXAdditionalFeatures);
1003
1004  // Cooperlake
1005  list<SubtargetFeature> CPXAdditionalFeatures = [FeatureBF16];
1006  list<SubtargetFeature> CPXTuning = SKXTuning;
1007  list<SubtargetFeature> CPXFeatures =
1008    !listconcat(CLXFeatures, CPXAdditionalFeatures);
1009
1010  // Cannonlake
1011  list<SubtargetFeature> CNLAdditionalFeatures = [FeatureAVX512,
1012                                                  FeatureEVEX512,
1013                                                  FeatureCDI,
1014                                                  FeatureDQI,
1015                                                  FeatureBWI,
1016                                                  FeatureVLX,
1017                                                  FeaturePKU,
1018                                                  FeatureVBMI,
1019                                                  FeatureIFMA,
1020                                                  FeatureSHA];
1021  list<SubtargetFeature> CNLTuning = [TuningFastGather,
1022                                      TuningMacroFusion,
1023                                      TuningSlow3OpsLEA,
1024                                      TuningSlowDivide64,
1025                                      TuningFastScalarFSQRT,
1026                                      TuningFastVectorFSQRT,
1027                                      TuningFastSHLDRotate,
1028                                      TuningFast15ByteNOP,
1029                                      TuningFastVariableCrossLaneShuffle,
1030                                      TuningFastVariablePerLaneShuffle,
1031                                      TuningPrefer256Bit,
1032                                      TuningInsertVZEROUPPER,
1033                                      TuningAllowLight256Bit,
1034                                      TuningNoDomainDelayMov,
1035                                      TuningNoDomainDelayShuffle,
1036                                      TuningNoDomainDelayBlend,
1037                                      TuningFastImmVectorShift];
1038  list<SubtargetFeature> CNLFeatures =
1039    !listconcat(SKLFeatures, CNLAdditionalFeatures);
1040
1041  // Icelake
1042  list<SubtargetFeature> ICLAdditionalFeatures = [FeatureBITALG,
1043                                                  FeatureVAES,
1044                                                  FeatureVBMI2,
1045                                                  FeatureVNNI,
1046                                                  FeatureVPCLMULQDQ,
1047                                                  FeatureVPOPCNTDQ,
1048                                                  FeatureGFNI,
1049                                                  FeatureRDPID,
1050                                                  FeatureFSRM];
1051  list<SubtargetFeature> ICLTuning = [TuningFastGather,
1052                                      TuningMacroFusion,
1053                                      TuningSlowDivide64,
1054                                      TuningFastScalarFSQRT,
1055                                      TuningFastVectorFSQRT,
1056                                      TuningFastSHLDRotate,
1057                                      TuningFast15ByteNOP,
1058                                      TuningFastVariableCrossLaneShuffle,
1059                                      TuningFastVariablePerLaneShuffle,
1060                                      TuningPrefer256Bit,
1061                                      TuningInsertVZEROUPPER,
1062                                      TuningAllowLight256Bit,
1063                                      TuningNoDomainDelayMov,
1064                                      TuningNoDomainDelayShuffle,
1065                                      TuningNoDomainDelayBlend,
1066                                      TuningFastImmVectorShift];
1067  list<SubtargetFeature> ICLFeatures =
1068    !listconcat(CNLFeatures, ICLAdditionalFeatures);
1069
1070  // Icelake Server
1071  list<SubtargetFeature> ICXAdditionalFeatures = [FeaturePCONFIG,
1072                                                  FeatureCLWB,
1073                                                  FeatureWBNOINVD];
1074  list<SubtargetFeature> ICXTuning = ICLTuning;
1075  list<SubtargetFeature> ICXFeatures =
1076    !listconcat(ICLFeatures, ICXAdditionalFeatures);
1077
1078  // Tigerlake
1079  list<SubtargetFeature> TGLAdditionalFeatures = [FeatureVP2INTERSECT,
1080                                                  FeatureCLWB,
1081                                                  FeatureMOVDIRI,
1082                                                  FeatureMOVDIR64B,
1083                                                  FeatureSHSTK];
1084  list<SubtargetFeature> TGLTuning = ICLTuning;
1085  list<SubtargetFeature> TGLFeatures =
1086    !listconcat(ICLFeatures, TGLAdditionalFeatures );
1087
1088  // Sapphirerapids
1089  list<SubtargetFeature> SPRAdditionalFeatures = [FeatureAMXTILE,
1090                                                  FeatureAMXINT8,
1091                                                  FeatureAMXBF16,
1092                                                  FeatureBF16,
1093                                                  FeatureSERIALIZE,
1094                                                  FeatureCLDEMOTE,
1095                                                  FeatureWAITPKG,
1096                                                  FeaturePTWRITE,
1097                                                  FeatureFP16,
1098                                                  FeatureAVXVNNI,
1099                                                  FeatureTSXLDTRK,
1100                                                  FeatureENQCMD,
1101                                                  FeatureSHSTK,
1102                                                  FeatureMOVDIRI,
1103                                                  FeatureMOVDIR64B,
1104                                                  FeatureUINTR];
1105  list<SubtargetFeature> SPRAdditionalTuning = [TuningMULCFalseDeps,
1106                                                TuningPERMFalseDeps,
1107                                                TuningRANGEFalseDeps,
1108                                                TuningGETMANTFalseDeps,
1109                                                TuningMULLQFalseDeps];
1110  list<SubtargetFeature> SPRTuning = !listconcat(ICXTuning, SPRAdditionalTuning);
1111  list<SubtargetFeature> SPRFeatures =
1112    !listconcat(ICXFeatures, SPRAdditionalFeatures);
1113
1114  // Graniterapids
1115  list<SubtargetFeature> GNRAdditionalFeatures = [FeatureAMXFP16,
1116                                                  FeaturePREFETCHI];
1117  list<SubtargetFeature> GNRFeatures =
1118    !listconcat(SPRFeatures, GNRAdditionalFeatures);
1119
1120  // Graniterapids D
1121  list<SubtargetFeature> GNRDAdditionalFeatures = [FeatureAMXCOMPLEX];
1122  list<SubtargetFeature> GNRDFeatures =
1123    !listconcat(GNRFeatures, GNRDAdditionalFeatures);
1124
1125  // Atom
1126  list<SubtargetFeature> AtomFeatures = [FeatureX87,
1127                                         FeatureCX8,
1128                                         FeatureCMOV,
1129                                         FeatureMMX,
1130                                         FeatureSSSE3,
1131                                         FeatureFXSR,
1132                                         FeatureNOPL,
1133                                         FeatureX86_64,
1134                                         FeatureCX16,
1135                                         FeatureMOVBE,
1136                                         FeatureLAHFSAHF64];
1137  list<SubtargetFeature> AtomTuning = [ProcIntelAtom,
1138                                       TuningSlowUAMem16,
1139                                       TuningLEAForSP,
1140                                       TuningSlowDivide32,
1141                                       TuningSlowDivide64,
1142                                       TuningSlowTwoMemOps,
1143                                       TuningLEAUsesAG,
1144                                       TuningPadShortFunctions,
1145                                       TuningInsertVZEROUPPER,
1146                                       TuningNoDomainDelay];
1147
1148  // Silvermont
1149  list<SubtargetFeature> SLMAdditionalFeatures = [FeatureSSE42,
1150                                                  FeatureCRC32,
1151                                                  FeaturePOPCNT,
1152                                                  FeaturePCLMUL,
1153                                                  FeaturePRFCHW,
1154                                                  FeatureRDRAND];
1155  list<SubtargetFeature> SLMTuning = [TuningUseSLMArithCosts,
1156                                      TuningSlowTwoMemOps,
1157                                      TuningSlowLEA,
1158                                      TuningSlowIncDec,
1159                                      TuningSlowDivide64,
1160                                      TuningSlowPMULLD,
1161                                      TuningFast7ByteNOP,
1162                                      TuningFastMOVBE,
1163                                      TuningPOPCNTFalseDeps,
1164                                      TuningInsertVZEROUPPER,
1165                                      TuningNoDomainDelay];
1166  list<SubtargetFeature> SLMFeatures =
1167    !listconcat(AtomFeatures, SLMAdditionalFeatures);
1168
1169  // Goldmont
1170  list<SubtargetFeature> GLMAdditionalFeatures = [FeatureAES,
1171                                                  FeatureSHA,
1172                                                  FeatureRDSEED,
1173                                                  FeatureXSAVE,
1174                                                  FeatureXSAVEOPT,
1175                                                  FeatureXSAVEC,
1176                                                  FeatureXSAVES,
1177                                                  FeatureCLFLUSHOPT,
1178                                                  FeatureFSGSBase];
1179  list<SubtargetFeature> GLMTuning = [TuningUseGLMDivSqrtCosts,
1180                                      TuningSlowTwoMemOps,
1181                                      TuningSlowLEA,
1182                                      TuningSlowIncDec,
1183                                      TuningFastMOVBE,
1184                                      TuningPOPCNTFalseDeps,
1185                                      TuningInsertVZEROUPPER,
1186                                      TuningNoDomainDelay];
1187  list<SubtargetFeature> GLMFeatures =
1188    !listconcat(SLMFeatures, GLMAdditionalFeatures);
1189
1190  // Goldmont Plus
1191  list<SubtargetFeature> GLPAdditionalFeatures = [FeaturePTWRITE,
1192                                                  FeatureRDPID];
1193  list<SubtargetFeature> GLPTuning = [TuningUseGLMDivSqrtCosts,
1194                                      TuningSlowTwoMemOps,
1195                                      TuningSlowLEA,
1196                                      TuningSlowIncDec,
1197                                      TuningFastMOVBE,
1198                                      TuningInsertVZEROUPPER,
1199                                      TuningNoDomainDelay];
1200  list<SubtargetFeature> GLPFeatures =
1201    !listconcat(GLMFeatures, GLPAdditionalFeatures);
1202
1203  // Tremont
1204  list<SubtargetFeature> TRMAdditionalFeatures = [FeatureCLWB,
1205                                                  FeatureGFNI];
1206  list<SubtargetFeature> TRMTuning = GLPTuning;
1207  list<SubtargetFeature> TRMFeatures =
1208    !listconcat(GLPFeatures, TRMAdditionalFeatures);
1209
1210  // Alderlake
1211  list<SubtargetFeature> ADLAdditionalFeatures = [FeatureSERIALIZE,
1212                                                  FeaturePCONFIG,
1213                                                  FeatureSHSTK,
1214                                                  FeatureWIDEKL,
1215                                                  FeatureINVPCID,
1216                                                  FeatureADX,
1217                                                  FeatureFMA,
1218                                                  FeatureVAES,
1219                                                  FeatureVPCLMULQDQ,
1220                                                  FeatureF16C,
1221                                                  FeatureBMI,
1222                                                  FeatureBMI2,
1223                                                  FeatureLZCNT,
1224                                                  FeatureAVXVNNI,
1225                                                  FeaturePKU,
1226                                                  FeatureHRESET,
1227                                                  FeatureCLDEMOTE,
1228                                                  FeatureMOVDIRI,
1229                                                  FeatureMOVDIR64B,
1230                                                  FeatureWAITPKG];
1231  list<SubtargetFeature> ADLAdditionalTuning = [TuningPERMFalseDeps,
1232                                                TuningPreferMovmskOverVTest,
1233                                                TuningFastImmVectorShift];
1234  list<SubtargetFeature> ADLTuning = !listconcat(SKLTuning, ADLAdditionalTuning);
1235  list<SubtargetFeature> ADLFeatures =
1236    !listconcat(TRMFeatures, ADLAdditionalFeatures);
1237
1238  // Gracemont
1239  list<SubtargetFeature> GRTTuning = [TuningMacroFusion,
1240                                      TuningSlow3OpsLEA,
1241                                      TuningSlowDivide32,
1242                                      TuningSlowDivide64,
1243                                      TuningFastScalarFSQRT,
1244                                      TuningFastVectorFSQRT,
1245                                      TuningFast15ByteNOP,
1246                                      TuningFastVariablePerLaneShuffle,
1247                                      TuningPOPCNTFalseDeps,
1248                                      TuningInsertVZEROUPPER];
1249
1250  // Sierraforest
1251  list<SubtargetFeature> SRFAdditionalFeatures = [FeatureCMPCCXADD,
1252                                                  FeatureAVXIFMA,
1253                                                  FeatureAVXNECONVERT,
1254                                                  FeatureENQCMD,
1255                                                  FeatureUINTR,
1256                                                  FeatureAVXVNNIINT8];
1257  list<SubtargetFeature> SRFFeatures =
1258    !listconcat(ADLFeatures, SRFAdditionalFeatures);
1259
1260  // Arrowlake S
1261  list<SubtargetFeature> ARLSAdditionalFeatures = [FeatureAVXVNNIINT16,
1262                                                   FeatureSHA512,
1263                                                   FeatureSM3,
1264                                                   FeatureSM4];
1265  list<SubtargetFeature> ARLSFeatures =
1266    !listconcat(SRFFeatures, ARLSAdditionalFeatures);
1267
1268  // Pantherlake
1269  list<SubtargetFeature> PTLAdditionalFeatures = [FeaturePREFETCHI];
1270  list<SubtargetFeature> PTLFeatures =
1271    !listconcat(ARLSFeatures, PTLAdditionalFeatures);
1272
1273
1274  // Clearwaterforest
1275  list<SubtargetFeature> CWFAdditionalFeatures = [FeaturePREFETCHI,
1276                                                  FeatureUSERMSR];
1277  list<SubtargetFeature> CWFFeatures =
1278    !listconcat(ARLSFeatures, CWFAdditionalFeatures);
1279
1280  // Knights Landing
1281  list<SubtargetFeature> KNLFeatures = [FeatureX87,
1282                                        FeatureCX8,
1283                                        FeatureCMOV,
1284                                        FeatureMMX,
1285                                        FeatureFXSR,
1286                                        FeatureNOPL,
1287                                        FeatureX86_64,
1288                                        FeatureCX16,
1289                                        FeatureCRC32,
1290                                        FeaturePOPCNT,
1291                                        FeaturePCLMUL,
1292                                        FeatureXSAVE,
1293                                        FeatureXSAVEOPT,
1294                                        FeatureLAHFSAHF64,
1295                                        FeatureAES,
1296                                        FeatureRDRAND,
1297                                        FeatureF16C,
1298                                        FeatureFSGSBase,
1299                                        FeatureAVX512,
1300                                        FeatureEVEX512,
1301                                        FeatureERI,
1302                                        FeatureCDI,
1303                                        FeaturePFI,
1304                                        FeaturePREFETCHWT1,
1305                                        FeatureADX,
1306                                        FeatureRDSEED,
1307                                        FeatureMOVBE,
1308                                        FeatureLZCNT,
1309                                        FeatureBMI,
1310                                        FeatureBMI2,
1311                                        FeatureFMA,
1312                                        FeaturePRFCHW];
1313  list<SubtargetFeature> KNLTuning = [TuningSlowDivide64,
1314                                      TuningSlow3OpsLEA,
1315                                      TuningSlowIncDec,
1316                                      TuningSlowTwoMemOps,
1317                                      TuningPreferMaskRegisters,
1318                                      TuningFastGather,
1319                                      TuningFastMOVBE,
1320                                      TuningSlowPMADDWD];
1321  // TODO Add AVX5124FMAPS/AVX5124VNNIW features
1322  list<SubtargetFeature> KNMFeatures =
1323    !listconcat(KNLFeatures, [FeatureVPOPCNTDQ]);
1324
1325  // Barcelona
1326  list<SubtargetFeature> BarcelonaFeatures = [FeatureX87,
1327                                              FeatureCX8,
1328                                              FeatureSSE4A,
1329                                              Feature3DNowA,
1330                                              FeatureFXSR,
1331                                              FeatureNOPL,
1332                                              FeatureCX16,
1333                                              FeaturePRFCHW,
1334                                              FeatureLZCNT,
1335                                              FeaturePOPCNT,
1336                                              FeatureLAHFSAHF64,
1337                                              FeatureCMOV,
1338                                              FeatureX86_64];
1339  list<SubtargetFeature> BarcelonaTuning = [TuningFastScalarShiftMasks,
1340                                            TuningSlowDivide64,
1341                                            TuningSlowSHLD,
1342                                            TuningSBBDepBreaking,
1343                                            TuningInsertVZEROUPPER];
1344
1345  // Bobcat
1346  list<SubtargetFeature> BtVer1Features = [FeatureX87,
1347                                           FeatureCX8,
1348                                           FeatureCMOV,
1349                                           FeatureMMX,
1350                                           FeatureSSSE3,
1351                                           FeatureSSE4A,
1352                                           FeatureFXSR,
1353                                           FeatureNOPL,
1354                                           FeatureX86_64,
1355                                           FeatureCX16,
1356                                           FeaturePRFCHW,
1357                                           FeatureLZCNT,
1358                                           FeaturePOPCNT,
1359                                           FeatureLAHFSAHF64];
1360  list<SubtargetFeature> BtVer1Tuning = [TuningFast15ByteNOP,
1361                                         TuningFastScalarShiftMasks,
1362                                         TuningFastVectorShiftMasks,
1363                                         TuningSlowDivide64,
1364                                         TuningSlowSHLD,
1365                                         TuningSBBDepBreaking,
1366                                         TuningInsertVZEROUPPER];
1367
1368  // Jaguar
1369  list<SubtargetFeature> BtVer2AdditionalFeatures = [FeatureAVX,
1370                                                     FeatureAES,
1371                                                     FeatureCRC32,
1372                                                     FeaturePCLMUL,
1373                                                     FeatureBMI,
1374                                                     FeatureF16C,
1375                                                     FeatureMOVBE,
1376                                                     FeatureXSAVE,
1377                                                     FeatureXSAVEOPT];
1378  list<SubtargetFeature> BtVer2Tuning = [TuningFastLZCNT,
1379                                         TuningFastBEXTR,
1380                                         TuningFastHorizontalOps,
1381                                         TuningFast15ByteNOP,
1382                                         TuningFastScalarShiftMasks,
1383                                         TuningFastVectorShiftMasks,
1384                                         TuningFastMOVBE,
1385                                         TuningSBBDepBreaking,
1386                                         TuningSlowDivide64,
1387                                         TuningSlowSHLD];
1388  list<SubtargetFeature> BtVer2Features =
1389    !listconcat(BtVer1Features, BtVer2AdditionalFeatures);
1390
1391  // Bulldozer
1392  list<SubtargetFeature> BdVer1Features = [FeatureX87,
1393                                           FeatureCX8,
1394                                           FeatureCMOV,
1395                                           FeatureXOP,
1396                                           FeatureX86_64,
1397                                           FeatureCX16,
1398                                           FeatureAES,
1399                                           FeatureCRC32,
1400                                           FeaturePRFCHW,
1401                                           FeaturePCLMUL,
1402                                           FeatureMMX,
1403                                           FeatureFXSR,
1404                                           FeatureNOPL,
1405                                           FeatureLZCNT,
1406                                           FeaturePOPCNT,
1407                                           FeatureXSAVE,
1408                                           FeatureLWP,
1409                                           FeatureLAHFSAHF64];
1410  list<SubtargetFeature> BdVer1Tuning = [TuningSlowSHLD,
1411                                         TuningSlowDivide64,
1412                                         TuningFast11ByteNOP,
1413                                         TuningFastScalarShiftMasks,
1414                                         TuningBranchFusion,
1415                                         TuningSBBDepBreaking,
1416                                         TuningInsertVZEROUPPER];
1417
1418  // PileDriver
1419  list<SubtargetFeature> BdVer2AdditionalFeatures = [FeatureF16C,
1420                                                     FeatureBMI,
1421                                                     FeatureTBM,
1422                                                     FeatureFMA];
1423  list<SubtargetFeature> BdVer2AdditionalTuning = [TuningFastBEXTR,
1424                                                   TuningFastMOVBE];
1425  list<SubtargetFeature> BdVer2Tuning =
1426    !listconcat(BdVer1Tuning, BdVer2AdditionalTuning);
1427  list<SubtargetFeature> BdVer2Features =
1428    !listconcat(BdVer1Features, BdVer2AdditionalFeatures);
1429
1430  // Steamroller
1431  list<SubtargetFeature> BdVer3AdditionalFeatures = [FeatureXSAVEOPT,
1432                                                     FeatureFSGSBase];
1433  list<SubtargetFeature> BdVer3Tuning = BdVer2Tuning;
1434  list<SubtargetFeature> BdVer3Features =
1435    !listconcat(BdVer2Features, BdVer3AdditionalFeatures);
1436
1437  // Excavator
1438  list<SubtargetFeature> BdVer4AdditionalFeatures = [FeatureAVX2,
1439                                                     FeatureBMI2,
1440                                                     FeatureMOVBE,
1441                                                     FeatureRDRAND,
1442                                                     FeatureMWAITX];
1443  list<SubtargetFeature> BdVer4Tuning = BdVer3Tuning;
1444  list<SubtargetFeature> BdVer4Features =
1445    !listconcat(BdVer3Features, BdVer4AdditionalFeatures);
1446
1447
1448  // AMD Zen Processors common ISAs
1449  list<SubtargetFeature> ZNFeatures = [FeatureADX,
1450                                       FeatureAES,
1451                                       FeatureAVX2,
1452                                       FeatureBMI,
1453                                       FeatureBMI2,
1454                                       FeatureCLFLUSHOPT,
1455                                       FeatureCLZERO,
1456                                       FeatureCMOV,
1457                                       FeatureX86_64,
1458                                       FeatureCX16,
1459                                       FeatureCRC32,
1460                                       FeatureF16C,
1461                                       FeatureFMA,
1462                                       FeatureFSGSBase,
1463                                       FeatureFXSR,
1464                                       FeatureNOPL,
1465                                       FeatureLAHFSAHF64,
1466                                       FeatureLZCNT,
1467                                       FeatureMMX,
1468                                       FeatureMOVBE,
1469                                       FeatureMWAITX,
1470                                       FeaturePCLMUL,
1471                                       FeaturePOPCNT,
1472                                       FeaturePRFCHW,
1473                                       FeatureRDRAND,
1474                                       FeatureRDSEED,
1475                                       FeatureSHA,
1476                                       FeatureSSE4A,
1477                                       FeatureX87,
1478                                       FeatureXSAVE,
1479                                       FeatureXSAVEC,
1480                                       FeatureXSAVEOPT,
1481                                       FeatureXSAVES];
1482  list<SubtargetFeature> ZNTuning = [TuningFastLZCNT,
1483                                     TuningFastBEXTR,
1484                                     TuningFast15ByteNOP,
1485                                     TuningBranchFusion,
1486                                     TuningFastScalarFSQRT,
1487                                     TuningFastVectorFSQRT,
1488                                     TuningFastScalarShiftMasks,
1489                                     TuningFastVariablePerLaneShuffle,
1490                                     TuningFastMOVBE,
1491                                     TuningSlowDivide64,
1492                                     TuningSlowSHLD,
1493                                     TuningSBBDepBreaking,
1494                                     TuningInsertVZEROUPPER,
1495                                     TuningAllowLight256Bit];
1496  list<SubtargetFeature> ZN2AdditionalFeatures = [FeatureCLWB,
1497                                                  FeatureRDPID,
1498                                                  FeatureRDPRU,
1499                                                  FeatureWBNOINVD];
1500  list<SubtargetFeature> ZN2Tuning = ZNTuning;
1501  list<SubtargetFeature> ZN2Features =
1502    !listconcat(ZNFeatures, ZN2AdditionalFeatures);
1503  list<SubtargetFeature> ZN3AdditionalFeatures = [FeatureFSRM,
1504                                                  FeatureINVPCID,
1505                                                  FeaturePKU,
1506                                                  FeatureVAES,
1507                                                  FeatureVPCLMULQDQ];
1508  list<SubtargetFeature> ZN3AdditionalTuning = [TuningMacroFusion];
1509  list<SubtargetFeature> ZN3Tuning =
1510    !listconcat(ZN2Tuning, ZN3AdditionalTuning);
1511  list<SubtargetFeature> ZN3Features =
1512    !listconcat(ZN2Features, ZN3AdditionalFeatures);
1513  list<SubtargetFeature> ZN4Tuning = ZN3Tuning;
1514  list<SubtargetFeature> ZN4AdditionalFeatures = [FeatureAVX512,
1515                                                  FeatureEVEX512,
1516                                                  FeatureCDI,
1517                                                  FeatureDQI,
1518                                                  FeatureBWI,
1519                                                  FeatureVLX,
1520                                                  FeatureVBMI,
1521                                                  FeatureVBMI2,
1522                                                  FeatureIFMA,
1523                                                  FeatureVNNI,
1524                                                  FeatureBITALG,
1525                                                  FeatureGFNI,
1526                                                  FeatureBF16,
1527                                                  FeatureSHSTK,
1528                                                  FeatureVPOPCNTDQ];
1529  list<SubtargetFeature> ZN4Features =
1530    !listconcat(ZN3Features, ZN4AdditionalFeatures);
1531}
1532
1533//===----------------------------------------------------------------------===//
1534// X86 processors supported.
1535//===----------------------------------------------------------------------===//
1536
1537class Proc<string Name, list<SubtargetFeature> Features,
1538           list<SubtargetFeature> TuneFeatures>
1539 : ProcessorModel<Name, GenericModel, Features, TuneFeatures>;
1540
1541class ProcModel<string Name, SchedMachineModel Model,
1542                list<SubtargetFeature> Features,
1543                list<SubtargetFeature> TuneFeatures>
1544 : ProcessorModel<Name, Model, Features, TuneFeatures>;
1545
1546// NOTE: CMPXCHG8B is here for legacy compatibility so that it is only disabled
1547// if i386/i486 is specifically requested.
1548// NOTE: 64Bit is here as "generic" is the default llc CPU. The X86Subtarget
1549// constructor checks that any CPU used in 64-bit mode has FeatureX86_64
1550// enabled. It has no effect on code generation.
1551// NOTE: As a default tuning, "generic" aims to produce code optimized for the
1552// most common X86 processors. The tunings might be changed over time. It is
1553// recommended to use "tune-cpu"="x86-64" in function attribute for consistency.
1554def : ProcModel<"generic", SandyBridgeModel,
1555                [FeatureX87, FeatureCX8, FeatureX86_64],
1556                [TuningSlow3OpsLEA,
1557                 TuningSlowDivide64,
1558                 TuningMacroFusion,
1559                 TuningFastScalarFSQRT,
1560                 TuningFast15ByteNOP,
1561                 TuningInsertVZEROUPPER]>;
1562
1563def : Proc<"i386",            [FeatureX87],
1564                              [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1565def : Proc<"i486",            [FeatureX87],
1566                              [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1567def : Proc<"i586",            [FeatureX87, FeatureCX8],
1568                              [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1569def : Proc<"pentium",         [FeatureX87, FeatureCX8],
1570                              [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1571foreach P = ["pentium-mmx", "pentium_mmx"] in {
1572  def : Proc<P, [FeatureX87, FeatureCX8, FeatureMMX],
1573                [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1574}
1575def : Proc<"i686", [FeatureX87, FeatureCX8, FeatureCMOV],
1576                   [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1577foreach P = ["pentiumpro", "pentium_pro"] in {
1578  def : Proc<P, [FeatureX87, FeatureCX8, FeatureCMOV, FeatureNOPL],
1579                [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1580}
1581foreach P = ["pentium2", "pentium_ii"] in {
1582  def : Proc<P, [FeatureX87, FeatureCX8, FeatureMMX, FeatureCMOV,
1583                          FeatureFXSR, FeatureNOPL],
1584                        [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1585}
1586foreach P = ["pentium3", "pentium3m", "pentium_iii_no_xmm_regs", "pentium_iii"] in {
1587  def : Proc<P, [FeatureX87, FeatureCX8, FeatureMMX,
1588                 FeatureSSE1, FeatureFXSR, FeatureNOPL, FeatureCMOV],
1589                [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1590}
1591
1592// Enable the PostRAScheduler for SSE2 and SSE3 class cpus.
1593// The intent is to enable it for pentium4 which is the current default
1594// processor in a vanilla 32-bit clang compilation when no specific
1595// architecture is specified.  This generally gives a nice performance
1596// increase on silvermont, with largely neutral behavior on other
1597// contemporary large core processors.
1598// pentium-m, pentium4m, prescott and nocona are included as a preventative
1599// measure to avoid performance surprises, in case clang's default cpu
1600// changes slightly.
1601
1602foreach P = ["pentium_m", "pentium-m"] in {
1603def : ProcModel<P, GenericPostRAModel,
1604                [FeatureX87, FeatureCX8, FeatureMMX, FeatureSSE2,
1605                FeatureFXSR, FeatureNOPL, FeatureCMOV],
1606                [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1607}
1608
1609foreach P = ["pentium4", "pentium4m", "pentium_4"] in {
1610  def : ProcModel<P, GenericPostRAModel,
1611                  [FeatureX87, FeatureCX8, FeatureMMX, FeatureSSE2,
1612                   FeatureFXSR, FeatureNOPL, FeatureCMOV],
1613                  [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1614}
1615
1616// Intel Quark.
1617def : Proc<"lakemont", [FeatureCX8],
1618                       [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1619
1620// Intel Core Duo.
1621def : ProcModel<"yonah", SandyBridgeModel,
1622                [FeatureX87, FeatureCX8, FeatureMMX, FeatureSSE3,
1623                 FeatureFXSR, FeatureNOPL, FeatureCMOV],
1624                [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1625
1626// NetBurst.
1627foreach P = ["prescott", "pentium_4_sse3"] in {
1628  def : ProcModel<P, GenericPostRAModel,
1629                  [FeatureX87, FeatureCX8, FeatureMMX, FeatureSSE3,
1630                  FeatureFXSR, FeatureNOPL, FeatureCMOV],
1631                  [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1632}
1633def : ProcModel<"nocona", GenericPostRAModel, [
1634  FeatureX87,
1635  FeatureCX8,
1636  FeatureCMOV,
1637  FeatureMMX,
1638  FeatureSSE3,
1639  FeatureFXSR,
1640  FeatureNOPL,
1641  FeatureX86_64,
1642  FeatureCX16,
1643],
1644[
1645  TuningSlowUAMem16,
1646  TuningInsertVZEROUPPER
1647]>;
1648
1649// Intel Core 2 Solo/Duo.
1650foreach P = ["core2", "core_2_duo_ssse3"] in {
1651def : ProcModel<P, SandyBridgeModel, [
1652  FeatureX87,
1653  FeatureCX8,
1654  FeatureCMOV,
1655  FeatureMMX,
1656  FeatureSSSE3,
1657  FeatureFXSR,
1658  FeatureNOPL,
1659  FeatureX86_64,
1660  FeatureCX16,
1661  FeatureLAHFSAHF64
1662],
1663[
1664  TuningMacroFusion,
1665  TuningSlowUAMem16,
1666  TuningInsertVZEROUPPER
1667]>;
1668}
1669foreach P = ["penryn", "core_2_duo_sse4_1"] in {
1670def : ProcModel<P, SandyBridgeModel, [
1671  FeatureX87,
1672  FeatureCX8,
1673  FeatureCMOV,
1674  FeatureMMX,
1675  FeatureSSE41,
1676  FeatureFXSR,
1677  FeatureNOPL,
1678  FeatureX86_64,
1679  FeatureCX16,
1680  FeatureLAHFSAHF64
1681],
1682[
1683  TuningMacroFusion,
1684  TuningSlowUAMem16,
1685  TuningInsertVZEROUPPER
1686]>;
1687}
1688
1689// Atom CPUs.
1690foreach P = ["bonnell", "atom"] in {
1691  def : ProcModel<P, AtomModel, ProcessorFeatures.AtomFeatures,
1692                  ProcessorFeatures.AtomTuning>;
1693}
1694
1695foreach P = ["silvermont", "slm", "atom_sse4_2"] in {
1696  def : ProcModel<P, SLMModel, ProcessorFeatures.SLMFeatures,
1697                  ProcessorFeatures.SLMTuning>;
1698}
1699
1700def : ProcModel<"atom_sse4_2_movbe", SLMModel, ProcessorFeatures.GLMFeatures,
1701                ProcessorFeatures.SLMTuning>;
1702def : ProcModel<"goldmont", SLMModel, ProcessorFeatures.GLMFeatures,
1703                ProcessorFeatures.GLMTuning>;
1704foreach P = ["goldmont_plus", "goldmont-plus"] in {
1705  def : ProcModel<P, SLMModel, ProcessorFeatures.GLPFeatures,
1706                  ProcessorFeatures.GLPTuning>;
1707}
1708def : ProcModel<"tremont", SLMModel, ProcessorFeatures.TRMFeatures,
1709                ProcessorFeatures.TRMTuning>;
1710foreach P = ["sierraforest", "grandridge"] in {
1711  def : ProcModel<P, AlderlakePModel, ProcessorFeatures.SRFFeatures,
1712                ProcessorFeatures.TRMTuning>;
1713}
1714
1715// "Arrandale" along with corei3 and corei5
1716foreach P = ["nehalem", "corei7", "core_i7_sse4_2"] in {
1717  def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.NHMFeatures,
1718                  ProcessorFeatures.NHMTuning>;
1719}
1720
1721// Westmere is the corei3/i5/i7 path from nehalem to sandybridge
1722foreach P = ["westmere", "core_aes_pclmulqdq"] in {
1723  def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.WSMFeatures,
1724                  ProcessorFeatures.WSMTuning>;
1725}
1726
1727foreach P = ["sandybridge", "corei7-avx", "core_2nd_gen_avx"] in {
1728  def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.SNBFeatures,
1729                  ProcessorFeatures.SNBTuning>;
1730}
1731
1732foreach P = ["ivybridge", "core-avx-i", "core_3rd_gen_avx"] in {
1733  def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.IVBFeatures,
1734                  ProcessorFeatures.IVBTuning>;
1735}
1736
1737foreach P = ["haswell", "core-avx2", "core_4th_gen_avx", "core_4th_gen_avx_tsx"] in {
1738  def : ProcModel<P, HaswellModel, ProcessorFeatures.HSWFeatures,
1739                  ProcessorFeatures.HSWTuning>;
1740}
1741
1742foreach P = ["broadwell", "core_5th_gen_avx", "core_5th_gen_avx_tsx"] in {
1743  def : ProcModel<P, BroadwellModel, ProcessorFeatures.BDWFeatures,
1744                  ProcessorFeatures.BDWTuning>;
1745}
1746
1747def : ProcModel<"skylake", SkylakeClientModel, ProcessorFeatures.SKLFeatures,
1748                ProcessorFeatures.SKLTuning>;
1749
1750// FIXME: define KNL scheduler model
1751foreach P = ["knl", "mic_avx512"] in {
1752  def : ProcModel<P, HaswellModel, ProcessorFeatures.KNLFeatures,
1753                  ProcessorFeatures.KNLTuning>;
1754}
1755def : ProcModel<"knm", HaswellModel, ProcessorFeatures.KNMFeatures,
1756                ProcessorFeatures.KNLTuning>;
1757
1758foreach P = ["skylake-avx512", "skx", "skylake_avx512"] in {
1759  def : ProcModel<P, SkylakeServerModel, ProcessorFeatures.SKXFeatures,
1760                  ProcessorFeatures.SKXTuning>;
1761}
1762
1763def : ProcModel<"cascadelake", SkylakeServerModel,
1764                ProcessorFeatures.CLXFeatures, ProcessorFeatures.CLXTuning>;
1765def : ProcModel<"cooperlake", SkylakeServerModel,
1766                ProcessorFeatures.CPXFeatures, ProcessorFeatures.CPXTuning>;
1767def : ProcModel<"cannonlake", SkylakeServerModel,
1768                ProcessorFeatures.CNLFeatures, ProcessorFeatures.CNLTuning>;
1769foreach P = ["icelake-client", "icelake_client"] in {
1770def : ProcModel<P, IceLakeModel,
1771                ProcessorFeatures.ICLFeatures, ProcessorFeatures.ICLTuning>;
1772}
1773def : ProcModel<"rocketlake", IceLakeModel,
1774                ProcessorFeatures.ICLFeatures, ProcessorFeatures.ICLTuning>;
1775foreach P = ["icelake-server", "icelake_server"] in {
1776def : ProcModel<P, IceLakeModel,
1777                ProcessorFeatures.ICXFeatures, ProcessorFeatures.ICXTuning>;
1778}
1779def : ProcModel<"tigerlake", IceLakeModel,
1780                ProcessorFeatures.TGLFeatures, ProcessorFeatures.TGLTuning>;
1781def : ProcModel<"sapphirerapids", SapphireRapidsModel,
1782                ProcessorFeatures.SPRFeatures, ProcessorFeatures.SPRTuning>;
1783def : ProcModel<"alderlake", AlderlakePModel,
1784                ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>;
1785// FIXME: Use Gracemont Schedule Model when it is ready.
1786def : ProcModel<"gracemont", AlderlakePModel,
1787                ProcessorFeatures.ADLFeatures, ProcessorFeatures.GRTTuning>;
1788def : ProcModel<"raptorlake", AlderlakePModel,
1789                ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>;
1790def : ProcModel<"meteorlake", AlderlakePModel,
1791                ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>;
1792def : ProcModel<"arrowlake", AlderlakePModel,
1793                ProcessorFeatures.SRFFeatures, ProcessorFeatures.ADLTuning>;
1794foreach P = ["arrowlake-s", "arrowlake_s", "lunarlake"] in {
1795def : ProcModel<P, AlderlakePModel,
1796                ProcessorFeatures.ARLSFeatures, ProcessorFeatures.ADLTuning>;
1797}
1798def : ProcModel<"pantherlake", AlderlakePModel,
1799                ProcessorFeatures.PTLFeatures, ProcessorFeatures.ADLTuning>;
1800def : ProcModel<"clearwaterforest", AlderlakePModel,
1801                ProcessorFeatures.CWFFeatures, ProcessorFeatures.ADLTuning>;
1802def : ProcModel<"graniterapids", SapphireRapidsModel,
1803                ProcessorFeatures.GNRFeatures, ProcessorFeatures.SPRTuning>;
1804def : ProcModel<"emeraldrapids", SapphireRapidsModel,
1805                ProcessorFeatures.SPRFeatures, ProcessorFeatures.SPRTuning>;
1806foreach P = ["graniterapids-d", "graniterapids_d"] in {
1807def : ProcModel<P, SapphireRapidsModel,
1808                ProcessorFeatures.GNRDFeatures, ProcessorFeatures.SPRTuning>;
1809}
1810
1811// AMD CPUs.
1812
1813def : Proc<"k6",   [FeatureX87, FeatureCX8, FeatureMMX],
1814                   [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1815def : Proc<"k6-2", [FeatureX87, FeatureCX8, Feature3DNow],
1816                   [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1817def : Proc<"k6-3", [FeatureX87, FeatureCX8, Feature3DNow],
1818                   [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1819
1820foreach P = ["athlon", "athlon-tbird"] in {
1821  def : Proc<P, [FeatureX87, FeatureCX8, FeatureCMOV, Feature3DNowA,
1822                 FeatureNOPL],
1823                [TuningSlowSHLD, TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1824}
1825
1826foreach P = ["athlon-4", "athlon-xp", "athlon-mp"] in {
1827  def : Proc<P, [FeatureX87, FeatureCX8, FeatureCMOV,
1828                 FeatureSSE1, Feature3DNowA, FeatureFXSR, FeatureNOPL],
1829                [TuningSlowSHLD, TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1830}
1831
1832foreach P = ["k8", "opteron", "athlon64", "athlon-fx"] in {
1833  def : Proc<P, [FeatureX87, FeatureCX8, FeatureSSE2, Feature3DNowA,
1834                 FeatureFXSR, FeatureNOPL, FeatureX86_64, FeatureCMOV],
1835                [TuningFastScalarShiftMasks, TuningSlowSHLD, TuningSlowUAMem16,
1836                 TuningSBBDepBreaking, TuningInsertVZEROUPPER]>;
1837}
1838
1839foreach P = ["k8-sse3", "opteron-sse3", "athlon64-sse3"] in {
1840  def : Proc<P, [FeatureX87, FeatureCX8, FeatureSSE3, Feature3DNowA,
1841                 FeatureFXSR, FeatureNOPL, FeatureCX16, FeatureCMOV,
1842                 FeatureX86_64],
1843                [TuningFastScalarShiftMasks, TuningSlowSHLD, TuningSlowUAMem16,
1844                 TuningSBBDepBreaking, TuningInsertVZEROUPPER]>;
1845}
1846
1847foreach P = ["amdfam10", "barcelona"] in {
1848  def : Proc<P, ProcessorFeatures.BarcelonaFeatures,
1849             ProcessorFeatures.BarcelonaTuning>;
1850}
1851
1852// Bobcat
1853def : Proc<"btver1", ProcessorFeatures.BtVer1Features,
1854           ProcessorFeatures.BtVer1Tuning>;
1855// Jaguar
1856def : ProcModel<"btver2", BtVer2Model, ProcessorFeatures.BtVer2Features,
1857                ProcessorFeatures.BtVer2Tuning>;
1858
1859// Bulldozer
1860def : ProcModel<"bdver1", BdVer2Model, ProcessorFeatures.BdVer1Features,
1861                ProcessorFeatures.BdVer1Tuning>;
1862// Piledriver
1863def : ProcModel<"bdver2", BdVer2Model, ProcessorFeatures.BdVer2Features,
1864                ProcessorFeatures.BdVer2Tuning>;
1865// Steamroller
1866def : Proc<"bdver3", ProcessorFeatures.BdVer3Features,
1867           ProcessorFeatures.BdVer3Tuning>;
1868// Excavator
1869def : Proc<"bdver4", ProcessorFeatures.BdVer4Features,
1870           ProcessorFeatures.BdVer4Tuning>;
1871
1872def : ProcModel<"znver1", Znver1Model, ProcessorFeatures.ZNFeatures,
1873                ProcessorFeatures.ZNTuning>;
1874def : ProcModel<"znver2", Znver2Model, ProcessorFeatures.ZN2Features,
1875                ProcessorFeatures.ZN2Tuning>;
1876def : ProcModel<"znver3", Znver3Model, ProcessorFeatures.ZN3Features,
1877                ProcessorFeatures.ZN3Tuning>;
1878def : ProcModel<"znver4", Znver4Model, ProcessorFeatures.ZN4Features,
1879           ProcessorFeatures.ZN4Tuning>;
1880
1881def : Proc<"geode",           [FeatureX87, FeatureCX8, Feature3DNowA],
1882                              [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1883
1884def : Proc<"winchip-c6",      [FeatureX87, FeatureMMX],
1885                              [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1886def : Proc<"winchip2",        [FeatureX87, Feature3DNow],
1887                              [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1888def : Proc<"c3",              [FeatureX87, Feature3DNow],
1889                              [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1890def : Proc<"c3-2",            [FeatureX87, FeatureCX8, FeatureMMX,
1891                               FeatureSSE1, FeatureFXSR, FeatureCMOV],
1892                              [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1893
1894// We also provide a generic 64-bit specific x86 processor model which tries to
1895// be good for modern chips without enabling instruction set encodings past the
1896// basic SSE2 and 64-bit ones. It disables slow things from any mainstream and
1897// modern 64-bit x86 chip, and enables features that are generally beneficial.
1898//
1899// We currently use the Sandy Bridge model as the default scheduling model as
1900// we use it across Nehalem, Westmere, Sandy Bridge, and Ivy Bridge which
1901// covers a huge swath of x86 processors. If there are specific scheduling
1902// knobs which need to be tuned differently for AMD chips, we might consider
1903// forming a common base for them.
1904def : ProcModel<"x86-64", SandyBridgeModel, ProcessorFeatures.X86_64V1Features,
1905                ProcessorFeatures.X86_64V1Tuning>;
1906// Close to Sandybridge.
1907def : ProcModel<"x86-64-v2", SandyBridgeModel, ProcessorFeatures.X86_64V2Features,
1908                ProcessorFeatures.X86_64V2Tuning>;
1909// Close to Haswell.
1910def : ProcModel<"x86-64-v3", HaswellModel, ProcessorFeatures.X86_64V3Features,
1911                ProcessorFeatures.X86_64V3Tuning>;
1912// Close to the AVX-512 level implemented by Xeon Scalable Processors.
1913def : ProcModel<"x86-64-v4", SkylakeServerModel, ProcessorFeatures.X86_64V4Features,
1914                ProcessorFeatures.X86_64V4Tuning>;
1915
1916//===----------------------------------------------------------------------===//
1917// Calling Conventions
1918//===----------------------------------------------------------------------===//
1919
1920include "X86CallingConv.td"
1921
1922
1923//===----------------------------------------------------------------------===//
1924// Assembly Parser
1925//===----------------------------------------------------------------------===//
1926
1927def ATTAsmParserVariant : AsmParserVariant {
1928  int Variant = 0;
1929
1930  // Variant name.
1931  string Name = "att";
1932
1933  // Discard comments in assembly strings.
1934  string CommentDelimiter = "#";
1935
1936  // Recognize hard coded registers.
1937  string RegisterPrefix = "%";
1938}
1939
1940def IntelAsmParserVariant : AsmParserVariant {
1941  int Variant = 1;
1942
1943  // Variant name.
1944  string Name = "intel";
1945
1946  // Discard comments in assembly strings.
1947  string CommentDelimiter = ";";
1948
1949  // Recognize hard coded registers.
1950  string RegisterPrefix = "";
1951}
1952
1953//===----------------------------------------------------------------------===//
1954// Assembly Printers
1955//===----------------------------------------------------------------------===//
1956
1957// The X86 target supports two different syntaxes for emitting machine code.
1958// This is controlled by the -x86-asm-syntax={att|intel}
1959def ATTAsmWriter : AsmWriter {
1960  string AsmWriterClassName  = "ATTInstPrinter";
1961  int Variant = 0;
1962}
1963def IntelAsmWriter : AsmWriter {
1964  string AsmWriterClassName  = "IntelInstPrinter";
1965  int Variant = 1;
1966}
1967
1968def X86 : Target {
1969  // Information about the instructions...
1970  let InstructionSet = X86InstrInfo;
1971  let AssemblyParserVariants = [ATTAsmParserVariant, IntelAsmParserVariant];
1972  let AssemblyWriters = [ATTAsmWriter, IntelAsmWriter];
1973  let AllowRegisterRenaming = 1;
1974}
1975
1976//===----------------------------------------------------------------------===//
1977// Pfm Counters
1978//===----------------------------------------------------------------------===//
1979
1980include "X86PfmCounters.td"
1981