xref: /freebsd/contrib/llvm-project/llvm/lib/Target/X86/X86.td (revision cfd6422a5217410fbd66f7a7a8a64d9d85e61229)
1//===-- X86.td - Target definition file for the Intel X86 --*- tablegen -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This is a target description file for the Intel i386 architecture, referred
10// to here as the "X86" architecture.
11//
12//===----------------------------------------------------------------------===//
13
14// Get the target-independent interfaces which we are implementing...
15//
16include "llvm/Target/Target.td"
17
18//===----------------------------------------------------------------------===//
19// X86 Subtarget state
20//
21
22def Mode64Bit : SubtargetFeature<"64bit-mode", "In64BitMode", "true",
23                                  "64-bit mode (x86_64)">;
24def Mode32Bit : SubtargetFeature<"32bit-mode", "In32BitMode", "true",
25                                  "32-bit mode (80386)">;
26def Mode16Bit : SubtargetFeature<"16bit-mode", "In16BitMode", "true",
27                                  "16-bit mode (i8086)">;
28
29//===----------------------------------------------------------------------===//
30// X86 Subtarget features
31//===----------------------------------------------------------------------===//
32
33def FeatureX87     : SubtargetFeature<"x87","HasX87", "true",
34                                      "Enable X87 float instructions">;
35
36def FeatureNOPL    : SubtargetFeature<"nopl", "HasNOPL", "true",
37                                      "Enable NOPL instruction">;
38
39def FeatureCMOV    : SubtargetFeature<"cmov","HasCMov", "true",
40                                      "Enable conditional move instructions">;
41
42def FeatureCMPXCHG8B : SubtargetFeature<"cx8", "HasCmpxchg8b", "true",
43                                        "Support CMPXCHG8B instructions">;
44
45def FeaturePOPCNT   : SubtargetFeature<"popcnt", "HasPOPCNT", "true",
46                                       "Support POPCNT instruction">;
47
48def FeatureFXSR    : SubtargetFeature<"fxsr", "HasFXSR", "true",
49                                      "Support fxsave/fxrestore instructions">;
50
51def FeatureXSAVE   : SubtargetFeature<"xsave", "HasXSAVE", "true",
52                                       "Support xsave instructions">;
53
54def FeatureXSAVEOPT: SubtargetFeature<"xsaveopt", "HasXSAVEOPT", "true",
55                                       "Support xsaveopt instructions",
56                                       [FeatureXSAVE]>;
57
58def FeatureXSAVEC  : SubtargetFeature<"xsavec", "HasXSAVEC", "true",
59                                       "Support xsavec instructions",
60                                       [FeatureXSAVE]>;
61
62def FeatureXSAVES  : SubtargetFeature<"xsaves", "HasXSAVES", "true",
63                                       "Support xsaves instructions",
64                                       [FeatureXSAVE]>;
65
66def FeatureSSE1    : SubtargetFeature<"sse", "X86SSELevel", "SSE1",
67                                      "Enable SSE instructions">;
68def FeatureSSE2    : SubtargetFeature<"sse2", "X86SSELevel", "SSE2",
69                                      "Enable SSE2 instructions",
70                                      [FeatureSSE1]>;
71def FeatureSSE3    : SubtargetFeature<"sse3", "X86SSELevel", "SSE3",
72                                      "Enable SSE3 instructions",
73                                      [FeatureSSE2]>;
74def FeatureSSSE3   : SubtargetFeature<"ssse3", "X86SSELevel", "SSSE3",
75                                      "Enable SSSE3 instructions",
76                                      [FeatureSSE3]>;
77def FeatureSSE41   : SubtargetFeature<"sse4.1", "X86SSELevel", "SSE41",
78                                      "Enable SSE 4.1 instructions",
79                                      [FeatureSSSE3]>;
80def FeatureSSE42   : SubtargetFeature<"sse4.2", "X86SSELevel", "SSE42",
81                                      "Enable SSE 4.2 instructions",
82                                      [FeatureSSE41]>;
83// The MMX subtarget feature is separate from the rest of the SSE features
84// because it's important (for odd compatibility reasons) to be able to
85// turn it off explicitly while allowing SSE+ to be on.
86def FeatureMMX     : SubtargetFeature<"mmx","X863DNowLevel", "MMX",
87                                      "Enable MMX instructions">;
88def Feature3DNow   : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow",
89                                      "Enable 3DNow! instructions",
90                                      [FeatureMMX]>;
91def Feature3DNowA  : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA",
92                                      "Enable 3DNow! Athlon instructions",
93                                      [Feature3DNow]>;
94// All x86-64 hardware has SSE2, but we don't mark SSE2 as an implied
95// feature, because SSE2 can be disabled (e.g. for compiling OS kernels)
96// without disabling 64-bit mode. Nothing should imply this feature bit. It
97// is used to enforce that only 64-bit capable CPUs are used in 64-bit mode.
98def Feature64Bit   : SubtargetFeature<"64bit", "HasX86_64", "true",
99                                      "Support 64-bit instructions">;
100def FeatureCMPXCHG16B : SubtargetFeature<"cx16", "HasCmpxchg16b", "true",
101                                      "64-bit with cmpxchg16b",
102                                      [FeatureCMPXCHG8B]>;
103def FeatureSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true",
104                                       "SHLD instruction is slow">;
105def FeatureSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true",
106                                        "PMULLD instruction is slow">;
107def FeatureSlowPMADDWD : SubtargetFeature<"slow-pmaddwd", "IsPMADDWDSlow",
108                                          "true",
109                                          "PMADDWD is slower than PMULLD">;
110// FIXME: This should not apply to CPUs that do not have SSE.
111def FeatureSlowUAMem16 : SubtargetFeature<"slow-unaligned-mem-16",
112                                "IsUAMem16Slow", "true",
113                                "Slow unaligned 16-byte memory access">;
114def FeatureSlowUAMem32 : SubtargetFeature<"slow-unaligned-mem-32",
115                                "IsUAMem32Slow", "true",
116                                "Slow unaligned 32-byte memory access">;
117def FeatureSSE4A   : SubtargetFeature<"sse4a", "HasSSE4A", "true",
118                                      "Support SSE 4a instructions",
119                                      [FeatureSSE3]>;
120
121def FeatureAVX     : SubtargetFeature<"avx", "X86SSELevel", "AVX",
122                                      "Enable AVX instructions",
123                                      [FeatureSSE42]>;
124def FeatureAVX2    : SubtargetFeature<"avx2", "X86SSELevel", "AVX2",
125                                      "Enable AVX2 instructions",
126                                      [FeatureAVX]>;
127def FeatureFMA     : SubtargetFeature<"fma", "HasFMA", "true",
128                                      "Enable three-operand fused multiple-add",
129                                      [FeatureAVX]>;
130def FeatureF16C    : SubtargetFeature<"f16c", "HasF16C", "true",
131                       "Support 16-bit floating point conversion instructions",
132                       [FeatureAVX]>;
133def FeatureAVX512   : SubtargetFeature<"avx512f", "X86SSELevel", "AVX512F",
134                                      "Enable AVX-512 instructions",
135                                      [FeatureAVX2, FeatureFMA, FeatureF16C]>;
136def FeatureERI      : SubtargetFeature<"avx512er", "HasERI", "true",
137                      "Enable AVX-512 Exponential and Reciprocal Instructions",
138                                      [FeatureAVX512]>;
139def FeatureCDI      : SubtargetFeature<"avx512cd", "HasCDI", "true",
140                      "Enable AVX-512 Conflict Detection Instructions",
141                                      [FeatureAVX512]>;
142def FeatureVPOPCNTDQ : SubtargetFeature<"avx512vpopcntdq", "HasVPOPCNTDQ",
143                       "true", "Enable AVX-512 Population Count Instructions",
144                                      [FeatureAVX512]>;
145def FeaturePFI      : SubtargetFeature<"avx512pf", "HasPFI", "true",
146                      "Enable AVX-512 PreFetch Instructions",
147                                      [FeatureAVX512]>;
148def FeaturePREFETCHWT1  : SubtargetFeature<"prefetchwt1", "HasPREFETCHWT1",
149                                   "true",
150                                   "Prefetch with Intent to Write and T1 Hint">;
151def FeatureDQI     : SubtargetFeature<"avx512dq", "HasDQI", "true",
152                      "Enable AVX-512 Doubleword and Quadword Instructions",
153                                      [FeatureAVX512]>;
154def FeatureBWI     : SubtargetFeature<"avx512bw", "HasBWI", "true",
155                      "Enable AVX-512 Byte and Word Instructions",
156                                      [FeatureAVX512]>;
157def FeatureVLX     : SubtargetFeature<"avx512vl", "HasVLX", "true",
158                      "Enable AVX-512 Vector Length eXtensions",
159                                      [FeatureAVX512]>;
160def FeatureVBMI     : SubtargetFeature<"avx512vbmi", "HasVBMI", "true",
161                      "Enable AVX-512 Vector Byte Manipulation Instructions",
162                                      [FeatureBWI]>;
163def FeatureVBMI2    : SubtargetFeature<"avx512vbmi2", "HasVBMI2", "true",
164                      "Enable AVX-512 further Vector Byte Manipulation Instructions",
165                                      [FeatureBWI]>;
166def FeatureIFMA     : SubtargetFeature<"avx512ifma", "HasIFMA", "true",
167                      "Enable AVX-512 Integer Fused Multiple-Add",
168                                      [FeatureAVX512]>;
169def FeaturePKU   : SubtargetFeature<"pku", "HasPKU", "true",
170                      "Enable protection keys">;
171def FeatureVNNI    : SubtargetFeature<"avx512vnni", "HasVNNI", "true",
172                          "Enable AVX-512 Vector Neural Network Instructions",
173                                      [FeatureAVX512]>;
174def FeatureBF16    : SubtargetFeature<"avx512bf16", "HasBF16", "true",
175                           "Support bfloat16 floating point",
176                                      [FeatureBWI]>;
177def FeatureBITALG  : SubtargetFeature<"avx512bitalg", "HasBITALG", "true",
178                       "Enable AVX-512 Bit Algorithms",
179                        [FeatureBWI]>;
180def FeatureVP2INTERSECT  : SubtargetFeature<"avx512vp2intersect",
181                                            "HasVP2INTERSECT", "true",
182                                            "Enable AVX-512 vp2intersect",
183                                            [FeatureAVX512]>;
184def FeaturePCLMUL  : SubtargetFeature<"pclmul", "HasPCLMUL", "true",
185                         "Enable packed carry-less multiplication instructions",
186                               [FeatureSSE2]>;
187def FeatureGFNI    : SubtargetFeature<"gfni", "HasGFNI", "true",
188                         "Enable Galois Field Arithmetic Instructions",
189                               [FeatureSSE2]>;
190def FeatureVPCLMULQDQ : SubtargetFeature<"vpclmulqdq", "HasVPCLMULQDQ", "true",
191                                         "Enable vpclmulqdq instructions",
192                                         [FeatureAVX, FeaturePCLMUL]>;
193def FeatureFMA4    : SubtargetFeature<"fma4", "HasFMA4", "true",
194                                      "Enable four-operand fused multiple-add",
195                                      [FeatureAVX, FeatureSSE4A]>;
196def FeatureXOP     : SubtargetFeature<"xop", "HasXOP", "true",
197                                      "Enable XOP instructions",
198                                      [FeatureFMA4]>;
199def FeatureSSEUnalignedMem : SubtargetFeature<"sse-unaligned-mem",
200                                          "HasSSEUnalignedMem", "true",
201                      "Allow unaligned memory operands with SSE instructions">;
202def FeatureAES     : SubtargetFeature<"aes", "HasAES", "true",
203                                      "Enable AES instructions",
204                                      [FeatureSSE2]>;
205def FeatureVAES    : SubtargetFeature<"vaes", "HasVAES", "true",
206                       "Promote selected AES instructions to AVX512/AVX registers",
207                        [FeatureAVX, FeatureAES]>;
208def FeatureTBM     : SubtargetFeature<"tbm", "HasTBM", "true",
209                                      "Enable TBM instructions">;
210def FeatureLWP     : SubtargetFeature<"lwp", "HasLWP", "true",
211                                      "Enable LWP instructions">;
212def FeatureMOVBE   : SubtargetFeature<"movbe", "HasMOVBE", "true",
213                                      "Support MOVBE instruction">;
214def FeatureRDRAND  : SubtargetFeature<"rdrnd", "HasRDRAND", "true",
215                                      "Support RDRAND instruction">;
216def FeatureFSGSBase : SubtargetFeature<"fsgsbase", "HasFSGSBase", "true",
217                                       "Support FS/GS Base instructions">;
218def FeatureLZCNT   : SubtargetFeature<"lzcnt", "HasLZCNT", "true",
219                                      "Support LZCNT instruction">;
220def FeatureBMI     : SubtargetFeature<"bmi", "HasBMI", "true",
221                                      "Support BMI instructions">;
222def FeatureBMI2    : SubtargetFeature<"bmi2", "HasBMI2", "true",
223                                      "Support BMI2 instructions">;
224def FeatureRTM     : SubtargetFeature<"rtm", "HasRTM", "true",
225                                      "Support RTM instructions">;
226def FeatureADX     : SubtargetFeature<"adx", "HasADX", "true",
227                                      "Support ADX instructions">;
228def FeatureSHA     : SubtargetFeature<"sha", "HasSHA", "true",
229                                      "Enable SHA instructions",
230                                      [FeatureSSE2]>;
231def FeatureSHSTK   : SubtargetFeature<"shstk", "HasSHSTK", "true",
232                       "Support CET Shadow-Stack instructions">;
233def FeaturePRFCHW  : SubtargetFeature<"prfchw", "HasPRFCHW", "true",
234                                      "Support PRFCHW instructions">;
235def FeatureRDSEED  : SubtargetFeature<"rdseed", "HasRDSEED", "true",
236                                      "Support RDSEED instruction">;
237def FeatureLAHFSAHF : SubtargetFeature<"sahf", "HasLAHFSAHF", "true",
238                                       "Support LAHF and SAHF instructions">;
239def FeatureMWAITX  : SubtargetFeature<"mwaitx", "HasMWAITX", "true",
240                                      "Enable MONITORX/MWAITX timer functionality">;
241def FeatureCLZERO  : SubtargetFeature<"clzero", "HasCLZERO", "true",
242                                      "Enable Cache Line Zero">;
243def FeatureCLDEMOTE  : SubtargetFeature<"cldemote", "HasCLDEMOTE", "true",
244                                      "Enable Cache Demote">;
245def FeaturePTWRITE  : SubtargetFeature<"ptwrite", "HasPTWRITE", "true",
246                                      "Support ptwrite instruction">;
247// FIXME: This feature is deprecated in 10.0 and should not be used for
248// anything, but removing it would break IR files that may contain it in a
249// target-feature attribute.
250def FeatureDeprecatedMPX : SubtargetFeature<"mpx", "DeprecatedHasMPX", "false",
251                                      "Deprecated. Support MPX instructions">;
252def FeatureAMXTILE     : SubtargetFeature<"amx-tile", "HasAMXTILE", "true",
253                                      "Support AMX-TILE instructions">;
254def FeatureAMXINT8     : SubtargetFeature<"amx-int8", "HasAMXINT8", "true",
255                                      "Support AMX-INT8 instructions",
256                                      [FeatureAMXTILE]>;
257def FeatureAMXBF16     : SubtargetFeature<"amx-bf16", "HasAMXBF16", "true",
258                                      "Support AMX-BF16 instructions",
259                                      [FeatureAMXTILE]>;
260def FeatureLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
261                                     "Use LEA for adjusting the stack pointer">;
262def FeatureSlowDivide32 : SubtargetFeature<"idivl-to-divb",
263                                     "HasSlowDivide32", "true",
264                                     "Use 8-bit divide for positive values less than 256">;
265def FeatureSlowDivide64 : SubtargetFeature<"idivq-to-divl",
266                                     "HasSlowDivide64", "true",
267                                     "Use 32-bit divide for positive values less than 2^32">;
268def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions",
269                                     "PadShortFunctions", "true",
270                                     "Pad short functions">;
271def FeatureINVPCID : SubtargetFeature<"invpcid", "HasINVPCID", "true",
272                                      "Invalidate Process-Context Identifier">;
273def FeatureSGX     : SubtargetFeature<"sgx", "HasSGX", "true",
274                                      "Enable Software Guard Extensions">;
275def FeatureCLFLUSHOPT : SubtargetFeature<"clflushopt", "HasCLFLUSHOPT", "true",
276                                      "Flush A Cache Line Optimized">;
277def FeatureCLWB    : SubtargetFeature<"clwb", "HasCLWB", "true",
278                                      "Cache Line Write Back">;
279def FeatureWBNOINVD    : SubtargetFeature<"wbnoinvd", "HasWBNOINVD", "true",
280                                      "Write Back No Invalidate">;
281def FeatureRDPID : SubtargetFeature<"rdpid", "HasRDPID", "true",
282                                    "Support RDPID instructions">;
283def FeatureWAITPKG  : SubtargetFeature<"waitpkg", "HasWAITPKG", "true",
284                                      "Wait and pause enhancements">;
285def FeatureENQCMD : SubtargetFeature<"enqcmd", "HasENQCMD", "true",
286                                     "Has ENQCMD instructions">;
287def FeatureSERIALIZE : SubtargetFeature<"serialize", "HasSERIALIZE", "true",
288                                        "Has serialize instruction">;
289def FeatureTSXLDTRK : SubtargetFeature<"tsxldtrk", "HasTSXLDTRK", "true",
290                                       "Support TSXLDTRK instructions">;
291// On some processors, instructions that implicitly take two memory operands are
292// slow. In practice, this means that CALL, PUSH, and POP with memory operands
293// should be avoided in favor of a MOV + register CALL/PUSH/POP.
294def FeatureSlowTwoMemOps : SubtargetFeature<"slow-two-mem-ops",
295                                     "SlowTwoMemOps", "true",
296                                     "Two memory operand instructions are slow">;
297def FeatureLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LEAUsesAG", "true",
298                                   "LEA instruction needs inputs at AG stage">;
299def FeatureSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true",
300                                   "LEA instruction with certain arguments is slow">;
301def FeatureSlow3OpsLEA : SubtargetFeature<"slow-3ops-lea", "Slow3OpsLEA", "true",
302                                   "LEA instruction with 3 ops or certain registers is slow">;
303def FeatureSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true",
304                                   "INC and DEC instructions are slower than ADD and SUB">;
305def FeatureSoftFloat
306    : SubtargetFeature<"soft-float", "UseSoftFloat", "true",
307                       "Use software floating point features">;
308def FeaturePOPCNTFalseDeps : SubtargetFeature<"false-deps-popcnt",
309                                     "HasPOPCNTFalseDeps", "true",
310                                     "POPCNT has a false dependency on dest register">;
311def FeatureLZCNTFalseDeps : SubtargetFeature<"false-deps-lzcnt-tzcnt",
312                                     "HasLZCNTFalseDeps", "true",
313                                     "LZCNT/TZCNT have a false dependency on dest register">;
314def FeaturePCONFIG : SubtargetFeature<"pconfig", "HasPCONFIG", "true",
315                                      "platform configuration instruction">;
316// On recent X86 (port bound) processors, its preferable to combine to a single shuffle
317// using a variable mask over multiple fixed shuffles.
318def FeatureFastVariableShuffle
319    : SubtargetFeature<"fast-variable-shuffle",
320                       "HasFastVariableShuffle",
321                       "true", "Shuffles with variable masks are fast">;
322// On some X86 processors, a vzeroupper instruction should be inserted after
323// using ymm/zmm registers before executing code that may use SSE instructions.
324def FeatureInsertVZEROUPPER
325    : SubtargetFeature<"vzeroupper",
326                       "InsertVZEROUPPER",
327                       "true", "Should insert vzeroupper instructions">;
328// FeatureFastScalarFSQRT should be enabled if scalar FSQRT has shorter latency
329// than the corresponding NR code. FeatureFastVectorFSQRT should be enabled if
330// vector FSQRT has higher throughput than the corresponding NR code.
331// The idea is that throughput bound code is likely to be vectorized, so for
332// vectorized code we should care about the throughput of SQRT operations.
333// But if the code is scalar that probably means that the code has some kind of
334// dependency and we should care more about reducing the latency.
335def FeatureFastScalarFSQRT
336    : SubtargetFeature<"fast-scalar-fsqrt", "HasFastScalarFSQRT",
337                       "true", "Scalar SQRT is fast (disable Newton-Raphson)">;
338def FeatureFastVectorFSQRT
339    : SubtargetFeature<"fast-vector-fsqrt", "HasFastVectorFSQRT",
340                       "true", "Vector SQRT is fast (disable Newton-Raphson)">;
341// If lzcnt has equivalent latency/throughput to most simple integer ops, it can
342// be used to replace test/set sequences.
343def FeatureFastLZCNT
344    : SubtargetFeature<
345          "fast-lzcnt", "HasFastLZCNT", "true",
346          "LZCNT instructions are as fast as most simple integer ops">;
347// If the target can efficiently decode NOPs upto 7-bytes in length.
348def FeatureFast7ByteNOP
349    : SubtargetFeature<
350          "fast-7bytenop", "HasFast7ByteNOP", "true",
351          "Target can quickly decode up to 7 byte NOPs">;
352// If the target can efficiently decode NOPs upto 11-bytes in length.
353def FeatureFast11ByteNOP
354    : SubtargetFeature<
355          "fast-11bytenop", "HasFast11ByteNOP", "true",
356          "Target can quickly decode up to 11 byte NOPs">;
357// If the target can efficiently decode NOPs upto 15-bytes in length.
358def FeatureFast15ByteNOP
359    : SubtargetFeature<
360          "fast-15bytenop", "HasFast15ByteNOP", "true",
361          "Target can quickly decode up to 15 byte NOPs">;
362// Sandy Bridge and newer processors can use SHLD with the same source on both
363// inputs to implement rotate to avoid the partial flag update of the normal
364// rotate instructions.
365def FeatureFastSHLDRotate
366    : SubtargetFeature<
367          "fast-shld-rotate", "HasFastSHLDRotate", "true",
368          "SHLD can be used as a faster rotate">;
369
370// Ivy Bridge and newer processors have enhanced REP MOVSB and STOSB (aka
371// "string operations"). See "REP String Enhancement" in the Intel Software
372// Development Manual. This feature essentially means that REP MOVSB will copy
373// using the largest available size instead of copying bytes one by one, making
374// it at least as fast as REPMOVS{W,D,Q}.
375def FeatureERMSB
376    : SubtargetFeature<
377          "ermsb", "HasERMSB", "true",
378          "REP MOVS/STOS are fast">;
379
380// Bulldozer and newer processors can merge CMP/TEST (but not other
381// instructions) with conditional branches.
382def FeatureBranchFusion
383    : SubtargetFeature<"branchfusion", "HasBranchFusion", "true",
384                 "CMP/TEST can be fused with conditional branches">;
385
386// Sandy Bridge and newer processors have many instructions that can be
387// fused with conditional branches and pass through the CPU as a single
388// operation.
389def FeatureMacroFusion
390    : SubtargetFeature<"macrofusion", "HasMacroFusion", "true",
391                 "Various instructions can be fused with conditional branches">;
392
393// Gather is available since Haswell (AVX2 set). So technically, we can
394// generate Gathers on all AVX2 processors. But the overhead on HSW is high.
395// Skylake Client processor has faster Gathers than HSW and performance is
396// similar to Skylake Server (AVX-512).
397def FeatureHasFastGather
398    : SubtargetFeature<"fast-gather", "HasFastGather", "true",
399                       "Indicates if gather is reasonably fast">;
400
401def FeaturePrefer128Bit
402    : SubtargetFeature<"prefer-128-bit", "Prefer128Bit", "true",
403                       "Prefer 128-bit AVX instructions">;
404
405def FeaturePrefer256Bit
406    : SubtargetFeature<"prefer-256-bit", "Prefer256Bit", "true",
407                       "Prefer 256-bit AVX instructions">;
408
409def FeaturePreferMaskRegisters
410    : SubtargetFeature<"prefer-mask-registers", "PreferMaskRegisters", "true",
411                       "Prefer AVX512 mask registers over PTEST/MOVMSK">;
412
413// Lower indirect calls using a special construct called a `retpoline` to
414// mitigate potential Spectre v2 attacks against them.
415def FeatureRetpolineIndirectCalls
416    : SubtargetFeature<
417          "retpoline-indirect-calls", "UseRetpolineIndirectCalls", "true",
418          "Remove speculation of indirect calls from the generated code">;
419
420// Lower indirect branches and switches either using conditional branch trees
421// or using a special construct called a `retpoline` to mitigate potential
422// Spectre v2 attacks against them.
423def FeatureRetpolineIndirectBranches
424    : SubtargetFeature<
425          "retpoline-indirect-branches", "UseRetpolineIndirectBranches", "true",
426          "Remove speculation of indirect branches from the generated code">;
427
428// Deprecated umbrella feature for enabling both `retpoline-indirect-calls` and
429// `retpoline-indirect-branches` above.
430def FeatureRetpoline
431    : SubtargetFeature<"retpoline", "DeprecatedUseRetpoline", "true",
432                       "Remove speculation of indirect branches from the "
433                       "generated code, either by avoiding them entirely or "
434                       "lowering them with a speculation blocking construct",
435                       [FeatureRetpolineIndirectCalls,
436                        FeatureRetpolineIndirectBranches]>;
437
438// Rely on external thunks for the emitted retpoline calls. This allows users
439// to provide their own custom thunk definitions in highly specialized
440// environments such as a kernel that does boot-time hot patching.
441def FeatureRetpolineExternalThunk
442    : SubtargetFeature<
443          "retpoline-external-thunk", "UseRetpolineExternalThunk", "true",
444          "When lowering an indirect call or branch using a `retpoline`, rely "
445          "on the specified user provided thunk rather than emitting one "
446          "ourselves. Only has effect when combined with some other retpoline "
447          "feature", [FeatureRetpolineIndirectCalls]>;
448
449// Mitigate LVI attacks against indirect calls/branches and call returns
450def FeatureLVIControlFlowIntegrity
451    : SubtargetFeature<
452          "lvi-cfi", "UseLVIControlFlowIntegrity", "true",
453          "Prevent indirect calls/branches from using a memory operand, and "
454          "precede all indirect calls/branches from a register with an "
455          "LFENCE instruction to serialize control flow. Also decompose RET "
456          "instructions into a POP+LFENCE+JMP sequence.">;
457
458// Enable SESES to mitigate speculative execution attacks
459def FeatureSpeculativeExecutionSideEffectSuppression
460    : SubtargetFeature<
461          "seses", "UseSpeculativeExecutionSideEffectSuppression", "true",
462          "Prevent speculative execution side channel timing attacks by "
463          "inserting a speculation barrier before memory reads, memory writes, "
464          "and conditional branches. Implies LVI Control Flow integrity.",
465          [FeatureLVIControlFlowIntegrity]>;
466
467// Mitigate LVI attacks against data loads
468def FeatureLVILoadHardening
469    : SubtargetFeature<
470          "lvi-load-hardening", "UseLVILoadHardening", "true",
471          "Insert LFENCE instructions to prevent data speculatively injected "
472          "into loads from being used maliciously.">;
473
474// Direct Move instructions.
475def FeatureMOVDIRI  : SubtargetFeature<"movdiri", "HasMOVDIRI", "true",
476                                       "Support movdiri instruction">;
477def FeatureMOVDIR64B : SubtargetFeature<"movdir64b", "HasMOVDIR64B", "true",
478                                        "Support movdir64b instruction">;
479
480def FeatureFastBEXTR : SubtargetFeature<"fast-bextr", "HasFastBEXTR", "true",
481          "Indicates that the BEXTR instruction is implemented as a single uop "
482          "with good throughput">;
483
484// Combine vector math operations with shuffles into horizontal math
485// instructions if a CPU implements horizontal operations (introduced with
486// SSE3) with better latency/throughput than the alternative sequence.
487def FeatureFastHorizontalOps
488    : SubtargetFeature<
489        "fast-hops", "HasFastHorizontalOps", "true",
490        "Prefer horizontal vector math instructions (haddp, phsub, etc.) over "
491        "normal vector instructions with shuffles">;
492
493def FeatureFastScalarShiftMasks
494    : SubtargetFeature<
495        "fast-scalar-shift-masks", "HasFastScalarShiftMasks", "true",
496        "Prefer a left/right scalar logical shift pair over a shift+and pair">;
497
498def FeatureFastVectorShiftMasks
499    : SubtargetFeature<
500        "fast-vector-shift-masks", "HasFastVectorShiftMasks", "true",
501        "Prefer a left/right vector logical shift pair over a shift+and pair">;
502
503def FeatureUseGLMDivSqrtCosts
504    : SubtargetFeature<"use-glm-div-sqrt-costs", "UseGLMDivSqrtCosts", "true",
505        "Use Goldmont specific floating point div/sqrt costs">;
506
507// Merge branches using three-way conditional code.
508def FeatureMergeToThreeWayBranch : SubtargetFeature<"merge-to-threeway-branch",
509                                        "ThreewayBranchProfitable", "true",
510                                        "Merge branches to a three-way "
511                                        "conditional branch">;
512
513// Enable use of alias analysis during code generation.
514def FeatureUseAA : SubtargetFeature<"use-aa", "UseAA", "true",
515                                    "Use alias analysis during codegen">;
516
517// Bonnell
518def ProcIntelAtom : SubtargetFeature<"", "X86ProcFamily", "IntelAtom", "">;
519// Silvermont
520def ProcIntelSLM  : SubtargetFeature<"", "X86ProcFamily", "IntelSLM", "">;
521
522//===----------------------------------------------------------------------===//
523// Register File Description
524//===----------------------------------------------------------------------===//
525
526include "X86RegisterInfo.td"
527include "X86RegisterBanks.td"
528
529//===----------------------------------------------------------------------===//
530// Instruction Descriptions
531//===----------------------------------------------------------------------===//
532
533include "X86Schedule.td"
534include "X86InstrInfo.td"
535include "X86SchedPredicates.td"
536
537def X86InstrInfo : InstrInfo;
538
539//===----------------------------------------------------------------------===//
540// X86 Scheduler Models
541//===----------------------------------------------------------------------===//
542
543include "X86ScheduleAtom.td"
544include "X86SchedSandyBridge.td"
545include "X86SchedHaswell.td"
546include "X86SchedBroadwell.td"
547include "X86ScheduleSLM.td"
548include "X86ScheduleZnver1.td"
549include "X86ScheduleZnver2.td"
550include "X86ScheduleBdVer2.td"
551include "X86ScheduleBtVer2.td"
552include "X86SchedSkylakeClient.td"
553include "X86SchedSkylakeServer.td"
554
555//===----------------------------------------------------------------------===//
556// X86 Processor Feature Lists
557//===----------------------------------------------------------------------===//
558
559def ProcessorFeatures {
560  // Nehalem
561  list<SubtargetFeature> NHMInheritableFeatures = [FeatureX87,
562                                                   FeatureCMPXCHG8B,
563                                                   FeatureCMOV,
564                                                   FeatureMMX,
565                                                   FeatureSSE42,
566                                                   FeatureFXSR,
567                                                   FeatureNOPL,
568                                                   Feature64Bit,
569                                                   FeatureCMPXCHG16B,
570                                                   FeaturePOPCNT,
571                                                   FeatureLAHFSAHF,
572                                                   FeatureMacroFusion,
573                                                   FeatureInsertVZEROUPPER];
574  list<SubtargetFeature> NHMSpecificFeatures = [];
575  list<SubtargetFeature> NHMFeatures =
576    !listconcat(NHMInheritableFeatures, NHMSpecificFeatures);
577
578  // Westmere
579  list<SubtargetFeature> WSMAdditionalFeatures = [FeaturePCLMUL];
580  list<SubtargetFeature> WSMSpecificFeatures = [];
581  list<SubtargetFeature> WSMInheritableFeatures =
582    !listconcat(NHMInheritableFeatures, WSMAdditionalFeatures);
583  list<SubtargetFeature> WSMFeatures =
584    !listconcat(WSMInheritableFeatures, WSMSpecificFeatures);
585
586  // Sandybridge
587  list<SubtargetFeature> SNBAdditionalFeatures = [FeatureAVX,
588                                                  FeatureSlowDivide64,
589                                                  FeatureXSAVE,
590                                                  FeatureXSAVEOPT,
591                                                  FeatureSlow3OpsLEA,
592                                                  FeatureFastScalarFSQRT,
593                                                  FeatureFastSHLDRotate,
594                                                  FeatureMergeToThreeWayBranch,
595                                                  FeatureFast15ByteNOP];
596  list<SubtargetFeature> SNBSpecificFeatures = [FeatureSlowUAMem32,
597                                                FeaturePOPCNTFalseDeps];
598  list<SubtargetFeature> SNBInheritableFeatures =
599    !listconcat(WSMInheritableFeatures, SNBAdditionalFeatures);
600  list<SubtargetFeature> SNBFeatures =
601    !listconcat(SNBInheritableFeatures, SNBSpecificFeatures);
602
603  // Ivybridge
604  list<SubtargetFeature> IVBAdditionalFeatures = [FeatureRDRAND,
605                                                  FeatureF16C,
606                                                  FeatureFSGSBase];
607  list<SubtargetFeature> IVBSpecificFeatures = [FeatureSlowUAMem32,
608                                                FeaturePOPCNTFalseDeps];
609  list<SubtargetFeature> IVBInheritableFeatures =
610    !listconcat(SNBInheritableFeatures, IVBAdditionalFeatures);
611  list<SubtargetFeature> IVBFeatures =
612    !listconcat(IVBInheritableFeatures, IVBSpecificFeatures);
613
614  // Haswell
615  list<SubtargetFeature> HSWAdditionalFeatures = [FeatureAVX2,
616                                                  FeatureBMI,
617                                                  FeatureBMI2,
618                                                  FeatureERMSB,
619                                                  FeatureFMA,
620                                                  FeatureINVPCID,
621                                                  FeatureLZCNT,
622                                                  FeatureMOVBE,
623                                                  FeatureFastVariableShuffle];
624  list<SubtargetFeature> HSWSpecificFeatures = [FeaturePOPCNTFalseDeps,
625                                                FeatureLZCNTFalseDeps];
626  list<SubtargetFeature> HSWInheritableFeatures =
627    !listconcat(IVBInheritableFeatures, HSWAdditionalFeatures);
628  list<SubtargetFeature> HSWFeatures =
629    !listconcat(HSWInheritableFeatures, HSWSpecificFeatures);
630
631  // Broadwell
632  list<SubtargetFeature> BDWAdditionalFeatures = [FeatureADX,
633                                                  FeatureRDSEED,
634                                                  FeaturePRFCHW];
635  list<SubtargetFeature> BDWSpecificFeatures = [FeaturePOPCNTFalseDeps,
636                                                FeatureLZCNTFalseDeps];
637  list<SubtargetFeature> BDWInheritableFeatures =
638    !listconcat(HSWInheritableFeatures, BDWAdditionalFeatures);
639  list<SubtargetFeature> BDWFeatures =
640    !listconcat(BDWInheritableFeatures, BDWSpecificFeatures);
641
642  // Skylake
643  list<SubtargetFeature> SKLAdditionalFeatures = [FeatureAES,
644                                                  FeatureXSAVEC,
645                                                  FeatureXSAVES,
646                                                  FeatureCLFLUSHOPT,
647                                                  FeatureFastVectorFSQRT];
648  list<SubtargetFeature> SKLSpecificFeatures = [FeatureHasFastGather,
649                                                FeaturePOPCNTFalseDeps,
650                                                FeatureSGX];
651  list<SubtargetFeature> SKLInheritableFeatures =
652    !listconcat(BDWInheritableFeatures, SKLAdditionalFeatures);
653  list<SubtargetFeature> SKLFeatures =
654    !listconcat(SKLInheritableFeatures, SKLSpecificFeatures);
655
656  // Skylake-AVX512
657  list<SubtargetFeature> SKXAdditionalFeatures = [FeatureAVX512,
658                                                  FeaturePrefer256Bit,
659                                                  FeatureCDI,
660                                                  FeatureDQI,
661                                                  FeatureBWI,
662                                                  FeatureVLX,
663                                                  FeaturePKU,
664                                                  FeatureCLWB];
665  list<SubtargetFeature> SKXSpecificFeatures = [FeatureHasFastGather,
666                                                FeaturePOPCNTFalseDeps];
667  list<SubtargetFeature> SKXInheritableFeatures =
668    !listconcat(SKLInheritableFeatures, SKXAdditionalFeatures);
669  list<SubtargetFeature> SKXFeatures =
670    !listconcat(SKXInheritableFeatures, SKXSpecificFeatures);
671
672  // Cascadelake
673  list<SubtargetFeature> CLXAdditionalFeatures = [FeatureVNNI];
674  list<SubtargetFeature> CLXSpecificFeatures = [FeatureHasFastGather,
675                                                FeaturePOPCNTFalseDeps];
676  list<SubtargetFeature> CLXInheritableFeatures =
677    !listconcat(SKXInheritableFeatures, CLXAdditionalFeatures);
678  list<SubtargetFeature> CLXFeatures =
679    !listconcat(CLXInheritableFeatures, CLXSpecificFeatures);
680
681  // Cooperlake
682  list<SubtargetFeature> CPXAdditionalFeatures = [FeatureBF16];
683  list<SubtargetFeature> CPXSpecificFeatures = [FeatureHasFastGather,
684                                                FeaturePOPCNTFalseDeps];
685  list<SubtargetFeature> CPXInheritableFeatures =
686    !listconcat(CLXInheritableFeatures, CPXAdditionalFeatures);
687  list<SubtargetFeature> CPXFeatures =
688    !listconcat(CPXInheritableFeatures, CPXSpecificFeatures);
689
690  // Cannonlake
691  list<SubtargetFeature> CNLAdditionalFeatures = [FeatureAVX512,
692                                                  FeaturePrefer256Bit,
693                                                  FeatureCDI,
694                                                  FeatureDQI,
695                                                  FeatureBWI,
696                                                  FeatureVLX,
697                                                  FeaturePKU,
698                                                  FeatureVBMI,
699                                                  FeatureIFMA,
700                                                  FeatureSHA,
701                                                  FeatureSGX];
702  list<SubtargetFeature> CNLSpecificFeatures = [FeatureHasFastGather];
703  list<SubtargetFeature> CNLInheritableFeatures =
704    !listconcat(SKLInheritableFeatures, CNLAdditionalFeatures);
705  list<SubtargetFeature> CNLFeatures =
706    !listconcat(CNLInheritableFeatures, CNLSpecificFeatures);
707
708  // Icelake
709  list<SubtargetFeature> ICLAdditionalFeatures = [FeatureBITALG,
710                                                  FeatureVAES,
711                                                  FeatureVBMI2,
712                                                  FeatureVNNI,
713                                                  FeatureVPCLMULQDQ,
714                                                  FeatureVPOPCNTDQ,
715                                                  FeatureGFNI,
716                                                  FeatureCLWB,
717                                                  FeatureRDPID];
718  list<SubtargetFeature> ICLSpecificFeatures = [FeatureHasFastGather];
719  list<SubtargetFeature> ICLInheritableFeatures =
720    !listconcat(CNLInheritableFeatures, ICLAdditionalFeatures);
721  list<SubtargetFeature> ICLFeatures =
722    !listconcat(ICLInheritableFeatures, ICLSpecificFeatures);
723
724  // Icelake Server
725  list<SubtargetFeature> ICXSpecificFeatures = [FeaturePCONFIG,
726                                                FeatureWBNOINVD,
727                                                FeatureHasFastGather];
728  list<SubtargetFeature> ICXFeatures =
729    !listconcat(ICLInheritableFeatures, ICXSpecificFeatures);
730
731  //Tigerlake
732  list<SubtargetFeature> TGLAdditionalFeatures = [FeatureVP2INTERSECT,
733                                                  FeatureMOVDIRI,
734                                                  FeatureMOVDIR64B,
735                                                  FeatureSHSTK];
736  list<SubtargetFeature> TGLSpecificFeatures = [FeatureHasFastGather];
737  list<SubtargetFeature> TGLInheritableFeatures =
738    !listconcat(TGLAdditionalFeatures ,TGLSpecificFeatures);
739  list<SubtargetFeature> TGLFeatures =
740    !listconcat(ICLFeatures, TGLInheritableFeatures );
741
742  // Atom
743  list<SubtargetFeature> AtomInheritableFeatures = [FeatureX87,
744                                                    FeatureCMPXCHG8B,
745                                                    FeatureCMOV,
746                                                    FeatureMMX,
747                                                    FeatureSSSE3,
748                                                    FeatureFXSR,
749                                                    FeatureNOPL,
750                                                    Feature64Bit,
751                                                    FeatureCMPXCHG16B,
752                                                    FeatureMOVBE,
753                                                    FeatureSlowTwoMemOps,
754                                                    FeatureLAHFSAHF,
755                                                    FeatureInsertVZEROUPPER];
756  list<SubtargetFeature> AtomSpecificFeatures = [ProcIntelAtom,
757                                                 FeatureSlowUAMem16,
758                                                 FeatureLEAForSP,
759                                                 FeatureSlowDivide32,
760                                                 FeatureSlowDivide64,
761                                                 FeatureLEAUsesAG,
762                                                 FeaturePadShortFunctions];
763  list<SubtargetFeature> AtomFeatures =
764    !listconcat(AtomInheritableFeatures, AtomSpecificFeatures);
765
766  // Silvermont
767  list<SubtargetFeature> SLMAdditionalFeatures = [FeatureSSE42,
768                                                  FeaturePOPCNT,
769                                                  FeaturePCLMUL,
770                                                  FeaturePRFCHW,
771                                                  FeatureSlowLEA,
772                                                  FeatureSlowIncDec,
773                                                  FeatureRDRAND];
774  list<SubtargetFeature> SLMSpecificFeatures = [ProcIntelSLM,
775                                                FeatureSlowDivide64,
776                                                FeatureSlowPMULLD,
777                                                FeatureFast7ByteNOP,
778                                                FeaturePOPCNTFalseDeps];
779  list<SubtargetFeature> SLMInheritableFeatures =
780    !listconcat(AtomInheritableFeatures, SLMAdditionalFeatures);
781  list<SubtargetFeature> SLMFeatures =
782    !listconcat(SLMInheritableFeatures, SLMSpecificFeatures);
783
784  // Goldmont
785  list<SubtargetFeature> GLMAdditionalFeatures = [FeatureAES,
786                                                  FeatureSHA,
787                                                  FeatureRDSEED,
788                                                  FeatureXSAVE,
789                                                  FeatureXSAVEOPT,
790                                                  FeatureXSAVEC,
791                                                  FeatureXSAVES,
792                                                  FeatureCLFLUSHOPT,
793                                                  FeatureFSGSBase];
794  list<SubtargetFeature> GLMSpecificFeatures = [FeatureUseGLMDivSqrtCosts,
795                                                FeaturePOPCNTFalseDeps];
796  list<SubtargetFeature> GLMInheritableFeatures =
797    !listconcat(SLMInheritableFeatures, GLMAdditionalFeatures);
798  list<SubtargetFeature> GLMFeatures =
799    !listconcat(GLMInheritableFeatures, GLMSpecificFeatures);
800
801  // Goldmont Plus
802  list<SubtargetFeature> GLPAdditionalFeatures = [FeaturePTWRITE,
803                                                  FeatureRDPID,
804                                                  FeatureSGX];
805  list<SubtargetFeature> GLPSpecificFeatures = [FeatureUseGLMDivSqrtCosts];
806  list<SubtargetFeature> GLPInheritableFeatures =
807    !listconcat(GLMInheritableFeatures, GLPAdditionalFeatures);
808  list<SubtargetFeature> GLPFeatures =
809    !listconcat(GLPInheritableFeatures, GLPSpecificFeatures);
810
811  // Tremont
812  list<SubtargetFeature> TRMAdditionalFeatures = [FeatureCLWB,
813                                                  FeatureGFNI];
814  list<SubtargetFeature> TRMSpecificFeatures = [FeatureUseGLMDivSqrtCosts];
815  list<SubtargetFeature> TRMInheritableFeatures =
816    !listconcat(GLPInheritableFeatures, TRMAdditionalFeatures);
817  list<SubtargetFeature> TRMFeatures =
818    !listconcat(TRMInheritableFeatures, TRMSpecificFeatures);
819
820  // Knights Landing
821  list<SubtargetFeature> KNLFeatures = [FeatureX87,
822                                        FeatureCMPXCHG8B,
823                                        FeatureCMOV,
824                                        FeatureMMX,
825                                        FeatureFXSR,
826                                        FeatureNOPL,
827                                        Feature64Bit,
828                                        FeatureCMPXCHG16B,
829                                        FeaturePOPCNT,
830                                        FeatureSlowDivide64,
831                                        FeaturePCLMUL,
832                                        FeatureXSAVE,
833                                        FeatureXSAVEOPT,
834                                        FeatureLAHFSAHF,
835                                        FeatureSlow3OpsLEA,
836                                        FeatureSlowIncDec,
837                                        FeatureAES,
838                                        FeatureRDRAND,
839                                        FeatureF16C,
840                                        FeatureFSGSBase,
841                                        FeatureAVX512,
842                                        FeatureERI,
843                                        FeatureCDI,
844                                        FeaturePFI,
845                                        FeaturePREFETCHWT1,
846                                        FeatureADX,
847                                        FeatureRDSEED,
848                                        FeatureMOVBE,
849                                        FeatureLZCNT,
850                                        FeatureBMI,
851                                        FeatureBMI2,
852                                        FeatureFMA,
853                                        FeaturePRFCHW,
854                                        FeaturePreferMaskRegisters,
855                                        FeatureSlowTwoMemOps,
856                                        FeatureHasFastGather,
857                                        FeatureSlowPMADDWD];
858  // TODO Add AVX5124FMAPS/AVX5124VNNIW features
859  list<SubtargetFeature> KNMFeatures =
860    !listconcat(KNLFeatures, [FeatureVPOPCNTDQ]);
861
862  // Barcelona
863  list<SubtargetFeature> BarcelonaInheritableFeatures = [FeatureX87,
864                                                         FeatureCMPXCHG8B,
865                                                         FeatureSSE4A,
866                                                         Feature3DNowA,
867                                                         FeatureFXSR,
868                                                         FeatureNOPL,
869                                                         FeatureCMPXCHG16B,
870                                                         FeaturePRFCHW,
871                                                         FeatureLZCNT,
872                                                         FeaturePOPCNT,
873                                                         FeatureSlowSHLD,
874                                                         FeatureLAHFSAHF,
875                                                         FeatureCMOV,
876                                                         Feature64Bit,
877                                                         FeatureFastScalarShiftMasks,
878                                                         FeatureInsertVZEROUPPER];
879  list<SubtargetFeature> BarcelonaFeatures = BarcelonaInheritableFeatures;
880
881  // Bobcat
882  list<SubtargetFeature> BtVer1InheritableFeatures = [FeatureX87,
883                                                      FeatureCMPXCHG8B,
884                                                      FeatureCMOV,
885                                                      FeatureMMX,
886                                                      FeatureSSSE3,
887                                                      FeatureSSE4A,
888                                                      FeatureFXSR,
889                                                      FeatureNOPL,
890                                                      Feature64Bit,
891                                                      FeatureCMPXCHG16B,
892                                                      FeaturePRFCHW,
893                                                      FeatureLZCNT,
894                                                      FeaturePOPCNT,
895                                                      FeatureSlowSHLD,
896                                                      FeatureLAHFSAHF,
897                                                      FeatureFast15ByteNOP,
898                                                      FeatureFastScalarShiftMasks,
899                                                      FeatureFastVectorShiftMasks];
900  list<SubtargetFeature> BtVer1SpecificFeatures = [FeatureInsertVZEROUPPER];
901  list<SubtargetFeature> BtVer1Features =
902    !listconcat(BtVer1InheritableFeatures, BtVer1SpecificFeatures);
903
904  // Jaguar
905  list<SubtargetFeature> BtVer2AdditionalFeatures = [FeatureAVX,
906                                                     FeatureAES,
907                                                     FeaturePCLMUL,
908                                                     FeatureBMI,
909                                                     FeatureF16C,
910                                                     FeatureMOVBE,
911                                                     FeatureXSAVE,
912                                                     FeatureXSAVEOPT];
913  list<SubtargetFeature> BtVer2SpecificFeatures = [FeatureFastLZCNT,
914                                                   FeatureFastBEXTR,
915                                                   FeatureFastHorizontalOps];
916  list<SubtargetFeature> BtVer2InheritableFeatures =
917    !listconcat(BtVer1InheritableFeatures, BtVer2AdditionalFeatures);
918  list<SubtargetFeature> BtVer2Features =
919    !listconcat(BtVer2InheritableFeatures, BtVer2SpecificFeatures);
920
921  // Bulldozer
922  list<SubtargetFeature> BdVer1InheritableFeatures = [FeatureX87,
923                                                      FeatureCMPXCHG8B,
924                                                      FeatureCMOV,
925                                                      FeatureXOP,
926                                                      Feature64Bit,
927                                                      FeatureCMPXCHG16B,
928                                                      FeatureAES,
929                                                      FeaturePRFCHW,
930                                                      FeaturePCLMUL,
931                                                      FeatureMMX,
932                                                      FeatureFXSR,
933                                                      FeatureNOPL,
934                                                      FeatureLZCNT,
935                                                      FeaturePOPCNT,
936                                                      FeatureXSAVE,
937                                                      FeatureLWP,
938                                                      FeatureSlowSHLD,
939                                                      FeatureLAHFSAHF,
940                                                      FeatureFast11ByteNOP,
941                                                      FeatureFastScalarShiftMasks,
942                                                      FeatureBranchFusion,
943                                                      FeatureInsertVZEROUPPER];
944  list<SubtargetFeature> BdVer1Features = BdVer1InheritableFeatures;
945
946  // PileDriver
947  list<SubtargetFeature> BdVer2AdditionalFeatures = [FeatureF16C,
948                                                     FeatureBMI,
949                                                     FeatureTBM,
950                                                     FeatureFMA,
951                                                     FeatureFastBEXTR];
952  list<SubtargetFeature> BdVer2InheritableFeatures =
953    !listconcat(BdVer1InheritableFeatures, BdVer2AdditionalFeatures);
954  list<SubtargetFeature> BdVer2Features = BdVer2InheritableFeatures;
955
956  // Steamroller
957  list<SubtargetFeature> BdVer3AdditionalFeatures = [FeatureXSAVEOPT,
958                                                     FeatureFSGSBase];
959  list<SubtargetFeature> BdVer3InheritableFeatures =
960    !listconcat(BdVer2InheritableFeatures, BdVer3AdditionalFeatures);
961  list<SubtargetFeature> BdVer3Features = BdVer3InheritableFeatures;
962
963  // Excavator
964  list<SubtargetFeature> BdVer4AdditionalFeatures = [FeatureAVX2,
965                                                     FeatureBMI2,
966                                                     FeatureMOVBE,
967                                                     FeatureRDRAND,
968                                                     FeatureMWAITX];
969  list<SubtargetFeature> BdVer4InheritableFeatures =
970    !listconcat(BdVer3InheritableFeatures, BdVer4AdditionalFeatures);
971  list<SubtargetFeature> BdVer4Features = BdVer4InheritableFeatures;
972
973
974  // AMD Zen Processors common ISAs
975  list<SubtargetFeature> ZNFeatures = [FeatureADX,
976                                       FeatureAES,
977                                       FeatureAVX2,
978                                       FeatureBMI,
979                                       FeatureBMI2,
980                                       FeatureCLFLUSHOPT,
981                                       FeatureCLZERO,
982                                       FeatureCMOV,
983                                       Feature64Bit,
984                                       FeatureCMPXCHG16B,
985                                       FeatureF16C,
986                                       FeatureFMA,
987                                       FeatureFSGSBase,
988                                       FeatureFXSR,
989                                       FeatureNOPL,
990                                       FeatureFastLZCNT,
991                                       FeatureLAHFSAHF,
992                                       FeatureLZCNT,
993                                       FeatureFastBEXTR,
994                                       FeatureFast15ByteNOP,
995                                       FeatureBranchFusion,
996                                       FeatureFastScalarShiftMasks,
997                                       FeatureMMX,
998                                       FeatureMOVBE,
999                                       FeatureMWAITX,
1000                                       FeaturePCLMUL,
1001                                       FeaturePOPCNT,
1002                                       FeaturePRFCHW,
1003                                       FeatureRDRAND,
1004                                       FeatureRDSEED,
1005                                       FeatureSHA,
1006                                       FeatureSSE4A,
1007                                       FeatureSlowSHLD,
1008                                       FeatureInsertVZEROUPPER,
1009                                       FeatureX87,
1010                                       FeatureXSAVE,
1011                                       FeatureXSAVEC,
1012                                       FeatureXSAVEOPT,
1013                                       FeatureXSAVES];
1014  list<SubtargetFeature> ZN2AdditionalFeatures = [FeatureCLWB,
1015                                                  FeatureRDPID,
1016                                                  FeatureWBNOINVD];
1017  list<SubtargetFeature> ZN2Features =
1018    !listconcat(ZNFeatures, ZN2AdditionalFeatures);
1019}
1020
1021//===----------------------------------------------------------------------===//
1022// X86 processors supported.
1023//===----------------------------------------------------------------------===//
1024
1025class Proc<string Name, list<SubtargetFeature> Features>
1026 : ProcessorModel<Name, GenericModel, Features>;
1027
1028// NOTE: CMPXCHG8B is here for legacy compatibility so that it is only disabled
1029// if i386/i486 is specifically requested.
1030def : Proc<"generic",         [FeatureX87, FeatureSlowUAMem16,
1031                               FeatureCMPXCHG8B, FeatureInsertVZEROUPPER]>;
1032def : Proc<"i386",            [FeatureX87, FeatureSlowUAMem16,
1033                               FeatureInsertVZEROUPPER]>;
1034def : Proc<"i486",            [FeatureX87, FeatureSlowUAMem16,
1035                               FeatureInsertVZEROUPPER]>;
1036def : Proc<"i586",            [FeatureX87, FeatureSlowUAMem16,
1037                               FeatureCMPXCHG8B, FeatureInsertVZEROUPPER]>;
1038def : Proc<"pentium",         [FeatureX87, FeatureSlowUAMem16,
1039                               FeatureCMPXCHG8B, FeatureInsertVZEROUPPER]>;
1040def : Proc<"pentium-mmx",     [FeatureX87, FeatureSlowUAMem16,
1041                               FeatureCMPXCHG8B, FeatureMMX,
1042                               FeatureInsertVZEROUPPER]>;
1043
1044def : Proc<"i686", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
1045                    FeatureCMOV, FeatureInsertVZEROUPPER]>;
1046def : Proc<"pentiumpro", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
1047                          FeatureCMOV, FeatureNOPL, FeatureInsertVZEROUPPER]>;
1048
1049def : Proc<"pentium2",        [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
1050                               FeatureMMX, FeatureCMOV, FeatureFXSR,
1051                               FeatureNOPL, FeatureInsertVZEROUPPER]>;
1052
1053foreach P = ["pentium3", "pentium3m"] in {
1054  def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,FeatureMMX,
1055                 FeatureSSE1, FeatureFXSR, FeatureNOPL, FeatureCMOV,
1056                 FeatureInsertVZEROUPPER]>;
1057}
1058
1059// Enable the PostRAScheduler for SSE2 and SSE3 class cpus.
1060// The intent is to enable it for pentium4 which is the current default
1061// processor in a vanilla 32-bit clang compilation when no specific
1062// architecture is specified.  This generally gives a nice performance
1063// increase on silvermont, with largely neutral behavior on other
1064// contemporary large core processors.
1065// pentium-m, pentium4m, prescott and nocona are included as a preventative
1066// measure to avoid performance surprises, in case clang's default cpu
1067// changes slightly.
1068
1069def : ProcessorModel<"pentium-m", GenericPostRAModel,
1070                     [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
1071                      FeatureMMX, FeatureSSE2, FeatureFXSR, FeatureNOPL,
1072                      FeatureCMOV, FeatureInsertVZEROUPPER]>;
1073
1074foreach P = ["pentium4", "pentium4m"] in {
1075  def : ProcessorModel<P, GenericPostRAModel,
1076                       [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
1077                        FeatureMMX, FeatureSSE2, FeatureFXSR, FeatureNOPL,
1078                        FeatureCMOV, FeatureInsertVZEROUPPER]>;
1079}
1080
1081// Intel Quark.
1082def : Proc<"lakemont",        [FeatureInsertVZEROUPPER]>;
1083
1084// Intel Core Duo.
1085def : ProcessorModel<"yonah", SandyBridgeModel,
1086                     [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
1087                      FeatureMMX, FeatureSSE3, FeatureFXSR, FeatureNOPL,
1088                      FeatureCMOV, FeatureInsertVZEROUPPER]>;
1089
1090// NetBurst.
1091def : ProcessorModel<"prescott", GenericPostRAModel,
1092                     [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
1093                      FeatureMMX, FeatureSSE3, FeatureFXSR, FeatureNOPL,
1094                      FeatureCMOV, FeatureInsertVZEROUPPER]>;
1095def : ProcessorModel<"nocona", GenericPostRAModel, [
1096  FeatureX87,
1097  FeatureSlowUAMem16,
1098  FeatureCMPXCHG8B,
1099  FeatureCMOV,
1100  FeatureMMX,
1101  FeatureSSE3,
1102  FeatureFXSR,
1103  FeatureNOPL,
1104  Feature64Bit,
1105  FeatureCMPXCHG16B,
1106  FeatureInsertVZEROUPPER
1107]>;
1108
1109// Intel Core 2 Solo/Duo.
1110def : ProcessorModel<"core2", SandyBridgeModel, [
1111  FeatureX87,
1112  FeatureSlowUAMem16,
1113  FeatureCMPXCHG8B,
1114  FeatureCMOV,
1115  FeatureMMX,
1116  FeatureSSSE3,
1117  FeatureFXSR,
1118  FeatureNOPL,
1119  Feature64Bit,
1120  FeatureCMPXCHG16B,
1121  FeatureLAHFSAHF,
1122  FeatureMacroFusion,
1123  FeatureInsertVZEROUPPER
1124]>;
1125def : ProcessorModel<"penryn", SandyBridgeModel, [
1126  FeatureX87,
1127  FeatureSlowUAMem16,
1128  FeatureCMPXCHG8B,
1129  FeatureCMOV,
1130  FeatureMMX,
1131  FeatureSSE41,
1132  FeatureFXSR,
1133  FeatureNOPL,
1134  Feature64Bit,
1135  FeatureCMPXCHG16B,
1136  FeatureLAHFSAHF,
1137  FeatureMacroFusion,
1138  FeatureInsertVZEROUPPER
1139]>;
1140
1141// Atom CPUs.
1142foreach P = ["bonnell", "atom"] in {
1143  def : ProcessorModel<P, AtomModel, ProcessorFeatures.AtomFeatures>;
1144}
1145
1146foreach P = ["silvermont", "slm"] in {
1147  def : ProcessorModel<P, SLMModel, ProcessorFeatures.SLMFeatures>;
1148}
1149
1150def : ProcessorModel<"goldmont", SLMModel, ProcessorFeatures.GLMFeatures>;
1151def : ProcessorModel<"goldmont-plus", SLMModel, ProcessorFeatures.GLPFeatures>;
1152def : ProcessorModel<"tremont", SLMModel, ProcessorFeatures.TRMFeatures>;
1153
1154// "Arrandale" along with corei3 and corei5
1155foreach P = ["nehalem", "corei7"] in {
1156  def : ProcessorModel<P, SandyBridgeModel, ProcessorFeatures.NHMFeatures>;
1157}
1158
1159// Westmere is the corei3/i5/i7 path from nehalem to sandybridge
1160def : ProcessorModel<"westmere", SandyBridgeModel,
1161                     ProcessorFeatures.WSMFeatures>;
1162
1163foreach P = ["sandybridge", "corei7-avx"] in {
1164  def : ProcessorModel<P, SandyBridgeModel, ProcessorFeatures.SNBFeatures>;
1165}
1166
1167foreach P = ["ivybridge", "core-avx-i"] in {
1168  def : ProcessorModel<P, SandyBridgeModel, ProcessorFeatures.IVBFeatures>;
1169}
1170
1171foreach P = ["haswell", "core-avx2"] in {
1172  def : ProcessorModel<P, HaswellModel, ProcessorFeatures.HSWFeatures>;
1173}
1174
1175def : ProcessorModel<"broadwell", BroadwellModel,
1176                     ProcessorFeatures.BDWFeatures>;
1177
1178def : ProcessorModel<"skylake", SkylakeClientModel,
1179                     ProcessorFeatures.SKLFeatures>;
1180
1181// FIXME: define KNL scheduler model
1182def : ProcessorModel<"knl", HaswellModel, ProcessorFeatures.KNLFeatures>;
1183def : ProcessorModel<"knm", HaswellModel, ProcessorFeatures.KNMFeatures>;
1184
1185foreach P = ["skylake-avx512", "skx"] in {
1186  def : ProcessorModel<P, SkylakeServerModel, ProcessorFeatures.SKXFeatures>;
1187}
1188
1189def : ProcessorModel<"cascadelake", SkylakeServerModel,
1190                     ProcessorFeatures.CLXFeatures>;
1191def : ProcessorModel<"cooperlake", SkylakeServerModel,
1192                     ProcessorFeatures.CPXFeatures>;
1193def : ProcessorModel<"cannonlake", SkylakeServerModel,
1194                     ProcessorFeatures.CNLFeatures>;
1195def : ProcessorModel<"icelake-client", SkylakeServerModel,
1196                     ProcessorFeatures.ICLFeatures>;
1197def : ProcessorModel<"icelake-server", SkylakeServerModel,
1198                     ProcessorFeatures.ICXFeatures>;
1199def : ProcessorModel<"tigerlake", SkylakeServerModel,
1200                     ProcessorFeatures.TGLFeatures>;
1201
1202// AMD CPUs.
1203
1204def : Proc<"k6",   [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
1205                    FeatureMMX, FeatureInsertVZEROUPPER]>;
1206def : Proc<"k6-2", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
1207                    Feature3DNow, FeatureInsertVZEROUPPER]>;
1208def : Proc<"k6-3", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
1209                    Feature3DNow, FeatureInsertVZEROUPPER]>;
1210
1211foreach P = ["athlon", "athlon-tbird"] in {
1212  def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, FeatureCMOV,
1213                 Feature3DNowA, FeatureNOPL, FeatureSlowSHLD,
1214                 FeatureInsertVZEROUPPER]>;
1215}
1216
1217foreach P = ["athlon-4", "athlon-xp", "athlon-mp"] in {
1218  def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, FeatureCMOV,
1219                 FeatureSSE1, Feature3DNowA, FeatureFXSR, FeatureNOPL,
1220                 FeatureSlowSHLD, FeatureInsertVZEROUPPER]>;
1221}
1222
1223foreach P = ["k8", "opteron", "athlon64", "athlon-fx"] in {
1224  def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
1225                 FeatureSSE2, Feature3DNowA, FeatureFXSR, FeatureNOPL,
1226                 Feature64Bit, FeatureSlowSHLD, FeatureCMOV,
1227                 FeatureFastScalarShiftMasks, FeatureInsertVZEROUPPER]>;
1228}
1229
1230foreach P = ["k8-sse3", "opteron-sse3", "athlon64-sse3"] in {
1231  def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, FeatureSSE3,
1232                 Feature3DNowA, FeatureFXSR, FeatureNOPL, FeatureCMPXCHG16B,
1233                 FeatureSlowSHLD, FeatureCMOV, Feature64Bit,
1234                 FeatureFastScalarShiftMasks, FeatureInsertVZEROUPPER]>;
1235}
1236
1237foreach P = ["amdfam10", "barcelona"] in {
1238  def : Proc<P, ProcessorFeatures.BarcelonaFeatures>;
1239}
1240
1241// Bobcat
1242def : Proc<"btver1", ProcessorFeatures.BtVer1Features>;
1243// Jaguar
1244def : ProcessorModel<"btver2", BtVer2Model, ProcessorFeatures.BtVer2Features>;
1245
1246// Bulldozer
1247def : ProcessorModel<"bdver1", BdVer2Model, ProcessorFeatures.BdVer1Features>;
1248// Piledriver
1249def : ProcessorModel<"bdver2", BdVer2Model, ProcessorFeatures.BdVer2Features>;
1250// Steamroller
1251def : Proc<"bdver3", ProcessorFeatures.BdVer3Features>;
1252// Excavator
1253def : Proc<"bdver4", ProcessorFeatures.BdVer4Features>;
1254
1255def : ProcessorModel<"znver1", Znver1Model, ProcessorFeatures.ZNFeatures>;
1256def : ProcessorModel<"znver2", Znver2Model, ProcessorFeatures.ZN2Features>;
1257
1258def : Proc<"geode",           [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
1259                               Feature3DNowA, FeatureInsertVZEROUPPER]>;
1260
1261def : Proc<"winchip-c6",      [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
1262                               FeatureInsertVZEROUPPER]>;
1263def : Proc<"winchip2",        [FeatureX87, FeatureSlowUAMem16, Feature3DNow,
1264                               FeatureInsertVZEROUPPER]>;
1265def : Proc<"c3",              [FeatureX87, FeatureSlowUAMem16, Feature3DNow,
1266                               FeatureInsertVZEROUPPER]>;
1267def : Proc<"c3-2",            [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
1268                               FeatureMMX, FeatureSSE1, FeatureFXSR,
1269                               FeatureCMOV, FeatureInsertVZEROUPPER]>;
1270
1271// We also provide a generic 64-bit specific x86 processor model which tries to
1272// be good for modern chips without enabling instruction set encodings past the
1273// basic SSE2 and 64-bit ones. It disables slow things from any mainstream and
1274// modern 64-bit x86 chip, and enables features that are generally beneficial.
1275//
1276// We currently use the Sandy Bridge model as the default scheduling model as
1277// we use it across Nehalem, Westmere, Sandy Bridge, and Ivy Bridge which
1278// covers a huge swath of x86 processors. If there are specific scheduling
1279// knobs which need to be tuned differently for AMD chips, we might consider
1280// forming a common base for them.
1281def : ProcessorModel<"x86-64", SandyBridgeModel, [
1282  FeatureX87,
1283  FeatureCMPXCHG8B,
1284  FeatureCMOV,
1285  FeatureMMX,
1286  FeatureSSE2,
1287  FeatureFXSR,
1288  FeatureNOPL,
1289  Feature64Bit,
1290  FeatureSlow3OpsLEA,
1291  FeatureSlowDivide64,
1292  FeatureSlowIncDec,
1293  FeatureMacroFusion,
1294  FeatureInsertVZEROUPPER
1295]>;
1296
1297//===----------------------------------------------------------------------===//
1298// Calling Conventions
1299//===----------------------------------------------------------------------===//
1300
1301include "X86CallingConv.td"
1302
1303
1304//===----------------------------------------------------------------------===//
1305// Assembly Parser
1306//===----------------------------------------------------------------------===//
1307
1308def ATTAsmParserVariant : AsmParserVariant {
1309  int Variant = 0;
1310
1311  // Variant name.
1312  string Name = "att";
1313
1314  // Discard comments in assembly strings.
1315  string CommentDelimiter = "#";
1316
1317  // Recognize hard coded registers.
1318  string RegisterPrefix = "%";
1319}
1320
1321def IntelAsmParserVariant : AsmParserVariant {
1322  int Variant = 1;
1323
1324  // Variant name.
1325  string Name = "intel";
1326
1327  // Discard comments in assembly strings.
1328  string CommentDelimiter = ";";
1329
1330  // Recognize hard coded registers.
1331  string RegisterPrefix = "";
1332}
1333
1334//===----------------------------------------------------------------------===//
1335// Assembly Printers
1336//===----------------------------------------------------------------------===//
1337
1338// The X86 target supports two different syntaxes for emitting machine code.
1339// This is controlled by the -x86-asm-syntax={att|intel}
1340def ATTAsmWriter : AsmWriter {
1341  string AsmWriterClassName  = "ATTInstPrinter";
1342  int Variant = 0;
1343}
1344def IntelAsmWriter : AsmWriter {
1345  string AsmWriterClassName  = "IntelInstPrinter";
1346  int Variant = 1;
1347}
1348
1349def X86 : Target {
1350  // Information about the instructions...
1351  let InstructionSet = X86InstrInfo;
1352  let AssemblyParserVariants = [ATTAsmParserVariant, IntelAsmParserVariant];
1353  let AssemblyWriters = [ATTAsmWriter, IntelAsmWriter];
1354  let AllowRegisterRenaming = 1;
1355}
1356
1357//===----------------------------------------------------------------------===//
1358// Pfm Counters
1359//===----------------------------------------------------------------------===//
1360
1361include "X86PfmCounters.td"
1362