1//===-- X86.td - Target definition file for the Intel X86 --*- tablegen -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This is a target description file for the Intel i386 architecture, referred 10// to here as the "X86" architecture. 11// 12//===----------------------------------------------------------------------===// 13 14// Get the target-independent interfaces which we are implementing... 15// 16include "llvm/Target/Target.td" 17 18//===----------------------------------------------------------------------===// 19// X86 Subtarget state 20// 21 22def Mode64Bit : SubtargetFeature<"64bit-mode", "In64BitMode", "true", 23 "64-bit mode (x86_64)">; 24def Mode32Bit : SubtargetFeature<"32bit-mode", "In32BitMode", "true", 25 "32-bit mode (80386)">; 26def Mode16Bit : SubtargetFeature<"16bit-mode", "In16BitMode", "true", 27 "16-bit mode (i8086)">; 28 29//===----------------------------------------------------------------------===// 30// X86 Subtarget features 31//===----------------------------------------------------------------------===// 32 33def FeatureX87 : SubtargetFeature<"x87","HasX87", "true", 34 "Enable X87 float instructions">; 35 36def FeatureNOPL : SubtargetFeature<"nopl", "HasNOPL", "true", 37 "Enable NOPL instruction">; 38 39def FeatureCMOV : SubtargetFeature<"cmov","HasCMov", "true", 40 "Enable conditional move instructions">; 41 42def FeatureCMPXCHG8B : SubtargetFeature<"cx8", "HasCmpxchg8b", "true", 43 "Support CMPXCHG8B instructions">; 44 45def FeaturePOPCNT : SubtargetFeature<"popcnt", "HasPOPCNT", "true", 46 "Support POPCNT instruction">; 47 48def FeatureFXSR : SubtargetFeature<"fxsr", "HasFXSR", "true", 49 "Support fxsave/fxrestore instructions">; 50 51def FeatureXSAVE : SubtargetFeature<"xsave", "HasXSAVE", "true", 52 "Support xsave instructions">; 53 54def FeatureXSAVEOPT: SubtargetFeature<"xsaveopt", "HasXSAVEOPT", "true", 55 "Support xsaveopt instructions", 56 [FeatureXSAVE]>; 57 58def FeatureXSAVEC : SubtargetFeature<"xsavec", "HasXSAVEC", "true", 59 "Support xsavec instructions", 60 [FeatureXSAVE]>; 61 62def FeatureXSAVES : SubtargetFeature<"xsaves", "HasXSAVES", "true", 63 "Support xsaves instructions", 64 [FeatureXSAVE]>; 65 66def FeatureSSE1 : SubtargetFeature<"sse", "X86SSELevel", "SSE1", 67 "Enable SSE instructions">; 68def FeatureSSE2 : SubtargetFeature<"sse2", "X86SSELevel", "SSE2", 69 "Enable SSE2 instructions", 70 [FeatureSSE1]>; 71def FeatureSSE3 : SubtargetFeature<"sse3", "X86SSELevel", "SSE3", 72 "Enable SSE3 instructions", 73 [FeatureSSE2]>; 74def FeatureSSSE3 : SubtargetFeature<"ssse3", "X86SSELevel", "SSSE3", 75 "Enable SSSE3 instructions", 76 [FeatureSSE3]>; 77def FeatureSSE41 : SubtargetFeature<"sse4.1", "X86SSELevel", "SSE41", 78 "Enable SSE 4.1 instructions", 79 [FeatureSSSE3]>; 80def FeatureSSE42 : SubtargetFeature<"sse4.2", "X86SSELevel", "SSE42", 81 "Enable SSE 4.2 instructions", 82 [FeatureSSE41]>; 83// The MMX subtarget feature is separate from the rest of the SSE features 84// because it's important (for odd compatibility reasons) to be able to 85// turn it off explicitly while allowing SSE+ to be on. 86def FeatureMMX : SubtargetFeature<"mmx","X863DNowLevel", "MMX", 87 "Enable MMX instructions">; 88def Feature3DNow : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow", 89 "Enable 3DNow! instructions", 90 [FeatureMMX]>; 91def Feature3DNowA : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA", 92 "Enable 3DNow! Athlon instructions", 93 [Feature3DNow]>; 94// All x86-64 hardware has SSE2, but we don't mark SSE2 as an implied 95// feature, because SSE2 can be disabled (e.g. for compiling OS kernels) 96// without disabling 64-bit mode. Nothing should imply this feature bit. It 97// is used to enforce that only 64-bit capable CPUs are used in 64-bit mode. 98def Feature64Bit : SubtargetFeature<"64bit", "HasX86_64", "true", 99 "Support 64-bit instructions">; 100def FeatureCMPXCHG16B : SubtargetFeature<"cx16", "HasCmpxchg16b", "true", 101 "64-bit with cmpxchg16b", 102 [FeatureCMPXCHG8B]>; 103def FeatureSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true", 104 "SHLD instruction is slow">; 105def FeatureSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true", 106 "PMULLD instruction is slow">; 107def FeatureSlowPMADDWD : SubtargetFeature<"slow-pmaddwd", "IsPMADDWDSlow", 108 "true", 109 "PMADDWD is slower than PMULLD">; 110// FIXME: This should not apply to CPUs that do not have SSE. 111def FeatureSlowUAMem16 : SubtargetFeature<"slow-unaligned-mem-16", 112 "IsUAMem16Slow", "true", 113 "Slow unaligned 16-byte memory access">; 114def FeatureSlowUAMem32 : SubtargetFeature<"slow-unaligned-mem-32", 115 "IsUAMem32Slow", "true", 116 "Slow unaligned 32-byte memory access">; 117def FeatureSSE4A : SubtargetFeature<"sse4a", "HasSSE4A", "true", 118 "Support SSE 4a instructions", 119 [FeatureSSE3]>; 120 121def FeatureAVX : SubtargetFeature<"avx", "X86SSELevel", "AVX", 122 "Enable AVX instructions", 123 [FeatureSSE42]>; 124def FeatureAVX2 : SubtargetFeature<"avx2", "X86SSELevel", "AVX2", 125 "Enable AVX2 instructions", 126 [FeatureAVX]>; 127def FeatureFMA : SubtargetFeature<"fma", "HasFMA", "true", 128 "Enable three-operand fused multiple-add", 129 [FeatureAVX]>; 130def FeatureF16C : SubtargetFeature<"f16c", "HasF16C", "true", 131 "Support 16-bit floating point conversion instructions", 132 [FeatureAVX]>; 133def FeatureAVX512 : SubtargetFeature<"avx512f", "X86SSELevel", "AVX512F", 134 "Enable AVX-512 instructions", 135 [FeatureAVX2, FeatureFMA, FeatureF16C]>; 136def FeatureERI : SubtargetFeature<"avx512er", "HasERI", "true", 137 "Enable AVX-512 Exponential and Reciprocal Instructions", 138 [FeatureAVX512]>; 139def FeatureCDI : SubtargetFeature<"avx512cd", "HasCDI", "true", 140 "Enable AVX-512 Conflict Detection Instructions", 141 [FeatureAVX512]>; 142def FeatureVPOPCNTDQ : SubtargetFeature<"avx512vpopcntdq", "HasVPOPCNTDQ", 143 "true", "Enable AVX-512 Population Count Instructions", 144 [FeatureAVX512]>; 145def FeaturePFI : SubtargetFeature<"avx512pf", "HasPFI", "true", 146 "Enable AVX-512 PreFetch Instructions", 147 [FeatureAVX512]>; 148def FeaturePREFETCHWT1 : SubtargetFeature<"prefetchwt1", "HasPREFETCHWT1", 149 "true", 150 "Prefetch with Intent to Write and T1 Hint">; 151def FeatureDQI : SubtargetFeature<"avx512dq", "HasDQI", "true", 152 "Enable AVX-512 Doubleword and Quadword Instructions", 153 [FeatureAVX512]>; 154def FeatureBWI : SubtargetFeature<"avx512bw", "HasBWI", "true", 155 "Enable AVX-512 Byte and Word Instructions", 156 [FeatureAVX512]>; 157def FeatureVLX : SubtargetFeature<"avx512vl", "HasVLX", "true", 158 "Enable AVX-512 Vector Length eXtensions", 159 [FeatureAVX512]>; 160def FeatureVBMI : SubtargetFeature<"avx512vbmi", "HasVBMI", "true", 161 "Enable AVX-512 Vector Byte Manipulation Instructions", 162 [FeatureBWI]>; 163def FeatureVBMI2 : SubtargetFeature<"avx512vbmi2", "HasVBMI2", "true", 164 "Enable AVX-512 further Vector Byte Manipulation Instructions", 165 [FeatureBWI]>; 166def FeatureIFMA : SubtargetFeature<"avx512ifma", "HasIFMA", "true", 167 "Enable AVX-512 Integer Fused Multiple-Add", 168 [FeatureAVX512]>; 169def FeaturePKU : SubtargetFeature<"pku", "HasPKU", "true", 170 "Enable protection keys">; 171def FeatureVNNI : SubtargetFeature<"avx512vnni", "HasVNNI", "true", 172 "Enable AVX-512 Vector Neural Network Instructions", 173 [FeatureAVX512]>; 174def FeatureAVXVNNI : SubtargetFeature<"avxvnni", "HasAVXVNNI", "true", 175 "Support AVX_VNNI encoding", 176 [FeatureAVX2]>; 177def FeatureBF16 : SubtargetFeature<"avx512bf16", "HasBF16", "true", 178 "Support bfloat16 floating point", 179 [FeatureBWI]>; 180def FeatureBITALG : SubtargetFeature<"avx512bitalg", "HasBITALG", "true", 181 "Enable AVX-512 Bit Algorithms", 182 [FeatureBWI]>; 183def FeatureVP2INTERSECT : SubtargetFeature<"avx512vp2intersect", 184 "HasVP2INTERSECT", "true", 185 "Enable AVX-512 vp2intersect", 186 [FeatureAVX512]>; 187def FeaturePCLMUL : SubtargetFeature<"pclmul", "HasPCLMUL", "true", 188 "Enable packed carry-less multiplication instructions", 189 [FeatureSSE2]>; 190def FeatureGFNI : SubtargetFeature<"gfni", "HasGFNI", "true", 191 "Enable Galois Field Arithmetic Instructions", 192 [FeatureSSE2]>; 193def FeatureVPCLMULQDQ : SubtargetFeature<"vpclmulqdq", "HasVPCLMULQDQ", "true", 194 "Enable vpclmulqdq instructions", 195 [FeatureAVX, FeaturePCLMUL]>; 196def FeatureFMA4 : SubtargetFeature<"fma4", "HasFMA4", "true", 197 "Enable four-operand fused multiple-add", 198 [FeatureAVX, FeatureSSE4A]>; 199def FeatureXOP : SubtargetFeature<"xop", "HasXOP", "true", 200 "Enable XOP instructions", 201 [FeatureFMA4]>; 202def FeatureSSEUnalignedMem : SubtargetFeature<"sse-unaligned-mem", 203 "HasSSEUnalignedMem", "true", 204 "Allow unaligned memory operands with SSE instructions">; 205def FeatureAES : SubtargetFeature<"aes", "HasAES", "true", 206 "Enable AES instructions", 207 [FeatureSSE2]>; 208def FeatureVAES : SubtargetFeature<"vaes", "HasVAES", "true", 209 "Promote selected AES instructions to AVX512/AVX registers", 210 [FeatureAVX, FeatureAES]>; 211def FeatureTBM : SubtargetFeature<"tbm", "HasTBM", "true", 212 "Enable TBM instructions">; 213def FeatureLWP : SubtargetFeature<"lwp", "HasLWP", "true", 214 "Enable LWP instructions">; 215def FeatureMOVBE : SubtargetFeature<"movbe", "HasMOVBE", "true", 216 "Support MOVBE instruction">; 217def FeatureRDRAND : SubtargetFeature<"rdrnd", "HasRDRAND", "true", 218 "Support RDRAND instruction">; 219def FeatureFSGSBase : SubtargetFeature<"fsgsbase", "HasFSGSBase", "true", 220 "Support FS/GS Base instructions">; 221def FeatureLZCNT : SubtargetFeature<"lzcnt", "HasLZCNT", "true", 222 "Support LZCNT instruction">; 223def FeatureBMI : SubtargetFeature<"bmi", "HasBMI", "true", 224 "Support BMI instructions">; 225def FeatureBMI2 : SubtargetFeature<"bmi2", "HasBMI2", "true", 226 "Support BMI2 instructions">; 227def FeatureRTM : SubtargetFeature<"rtm", "HasRTM", "true", 228 "Support RTM instructions">; 229def FeatureADX : SubtargetFeature<"adx", "HasADX", "true", 230 "Support ADX instructions">; 231def FeatureSHA : SubtargetFeature<"sha", "HasSHA", "true", 232 "Enable SHA instructions", 233 [FeatureSSE2]>; 234def FeatureSHSTK : SubtargetFeature<"shstk", "HasSHSTK", "true", 235 "Support CET Shadow-Stack instructions">; 236def FeaturePRFCHW : SubtargetFeature<"prfchw", "HasPRFCHW", "true", 237 "Support PRFCHW instructions">; 238def FeatureRDSEED : SubtargetFeature<"rdseed", "HasRDSEED", "true", 239 "Support RDSEED instruction">; 240def FeatureLAHFSAHF : SubtargetFeature<"sahf", "HasLAHFSAHF64", "true", 241 "Support LAHF and SAHF instructions in 64-bit mode">; 242def FeatureMWAITX : SubtargetFeature<"mwaitx", "HasMWAITX", "true", 243 "Enable MONITORX/MWAITX timer functionality">; 244def FeatureCLZERO : SubtargetFeature<"clzero", "HasCLZERO", "true", 245 "Enable Cache Line Zero">; 246def FeatureCLDEMOTE : SubtargetFeature<"cldemote", "HasCLDEMOTE", "true", 247 "Enable Cache Demote">; 248def FeaturePTWRITE : SubtargetFeature<"ptwrite", "HasPTWRITE", "true", 249 "Support ptwrite instruction">; 250def FeatureAMXTILE : SubtargetFeature<"amx-tile", "HasAMXTILE", "true", 251 "Support AMX-TILE instructions">; 252def FeatureAMXINT8 : SubtargetFeature<"amx-int8", "HasAMXINT8", "true", 253 "Support AMX-INT8 instructions", 254 [FeatureAMXTILE]>; 255def FeatureAMXBF16 : SubtargetFeature<"amx-bf16", "HasAMXBF16", "true", 256 "Support AMX-BF16 instructions", 257 [FeatureAMXTILE]>; 258def FeatureLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true", 259 "Use LEA for adjusting the stack pointer">; 260def FeatureSlowDivide32 : SubtargetFeature<"idivl-to-divb", 261 "HasSlowDivide32", "true", 262 "Use 8-bit divide for positive values less than 256">; 263def FeatureSlowDivide64 : SubtargetFeature<"idivq-to-divl", 264 "HasSlowDivide64", "true", 265 "Use 32-bit divide for positive values less than 2^32">; 266def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions", 267 "PadShortFunctions", "true", 268 "Pad short functions">; 269def FeatureINVPCID : SubtargetFeature<"invpcid", "HasINVPCID", "true", 270 "Invalidate Process-Context Identifier">; 271def FeatureSGX : SubtargetFeature<"sgx", "HasSGX", "true", 272 "Enable Software Guard Extensions">; 273def FeatureCLFLUSHOPT : SubtargetFeature<"clflushopt", "HasCLFLUSHOPT", "true", 274 "Flush A Cache Line Optimized">; 275def FeatureCLWB : SubtargetFeature<"clwb", "HasCLWB", "true", 276 "Cache Line Write Back">; 277def FeatureWBNOINVD : SubtargetFeature<"wbnoinvd", "HasWBNOINVD", "true", 278 "Write Back No Invalidate">; 279def FeatureRDPID : SubtargetFeature<"rdpid", "HasRDPID", "true", 280 "Support RDPID instructions">; 281def FeatureWAITPKG : SubtargetFeature<"waitpkg", "HasWAITPKG", "true", 282 "Wait and pause enhancements">; 283def FeatureENQCMD : SubtargetFeature<"enqcmd", "HasENQCMD", "true", 284 "Has ENQCMD instructions">; 285def FeatureKL : SubtargetFeature<"kl", "HasKL", "true", 286 "Support Key Locker kl Instructions", 287 [FeatureSSE2]>; 288def FeatureWIDEKL : SubtargetFeature<"widekl", "HasWIDEKL", "true", 289 "Support Key Locker wide Instructions", 290 [FeatureKL]>; 291def FeatureHRESET : SubtargetFeature<"hreset", "HasHRESET", "true", 292 "Has hreset instruction">; 293def FeatureSERIALIZE : SubtargetFeature<"serialize", "HasSERIALIZE", "true", 294 "Has serialize instruction">; 295def FeatureTSXLDTRK : SubtargetFeature<"tsxldtrk", "HasTSXLDTRK", "true", 296 "Support TSXLDTRK instructions">; 297def FeatureUINTR : SubtargetFeature<"uintr", "HasUINTR", "true", 298 "Has UINTR Instructions">; 299// On some processors, instructions that implicitly take two memory operands are 300// slow. In practice, this means that CALL, PUSH, and POP with memory operands 301// should be avoided in favor of a MOV + register CALL/PUSH/POP. 302def FeatureSlowTwoMemOps : SubtargetFeature<"slow-two-mem-ops", 303 "SlowTwoMemOps", "true", 304 "Two memory operand instructions are slow">; 305def FeatureLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LEAUsesAG", "true", 306 "LEA instruction needs inputs at AG stage">; 307def FeatureSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true", 308 "LEA instruction with certain arguments is slow">; 309def FeatureSlow3OpsLEA : SubtargetFeature<"slow-3ops-lea", "Slow3OpsLEA", "true", 310 "LEA instruction with 3 ops or certain registers is slow">; 311def FeatureSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true", 312 "INC and DEC instructions are slower than ADD and SUB">; 313def FeatureSoftFloat 314 : SubtargetFeature<"soft-float", "UseSoftFloat", "true", 315 "Use software floating point features">; 316def FeaturePOPCNTFalseDeps : SubtargetFeature<"false-deps-popcnt", 317 "HasPOPCNTFalseDeps", "true", 318 "POPCNT has a false dependency on dest register">; 319def FeatureLZCNTFalseDeps : SubtargetFeature<"false-deps-lzcnt-tzcnt", 320 "HasLZCNTFalseDeps", "true", 321 "LZCNT/TZCNT have a false dependency on dest register">; 322def FeaturePCONFIG : SubtargetFeature<"pconfig", "HasPCONFIG", "true", 323 "platform configuration instruction">; 324// On recent X86 (port bound) processors, its preferable to combine to a single shuffle 325// using a variable mask over multiple fixed shuffles. 326def FeatureFastVariableShuffle 327 : SubtargetFeature<"fast-variable-shuffle", 328 "HasFastVariableShuffle", 329 "true", "Shuffles with variable masks are fast">; 330// On some X86 processors, a vzeroupper instruction should be inserted after 331// using ymm/zmm registers before executing code that may use SSE instructions. 332def FeatureInsertVZEROUPPER 333 : SubtargetFeature<"vzeroupper", 334 "InsertVZEROUPPER", 335 "true", "Should insert vzeroupper instructions">; 336// FeatureFastScalarFSQRT should be enabled if scalar FSQRT has shorter latency 337// than the corresponding NR code. FeatureFastVectorFSQRT should be enabled if 338// vector FSQRT has higher throughput than the corresponding NR code. 339// The idea is that throughput bound code is likely to be vectorized, so for 340// vectorized code we should care about the throughput of SQRT operations. 341// But if the code is scalar that probably means that the code has some kind of 342// dependency and we should care more about reducing the latency. 343def FeatureFastScalarFSQRT 344 : SubtargetFeature<"fast-scalar-fsqrt", "HasFastScalarFSQRT", 345 "true", "Scalar SQRT is fast (disable Newton-Raphson)">; 346def FeatureFastVectorFSQRT 347 : SubtargetFeature<"fast-vector-fsqrt", "HasFastVectorFSQRT", 348 "true", "Vector SQRT is fast (disable Newton-Raphson)">; 349// If lzcnt has equivalent latency/throughput to most simple integer ops, it can 350// be used to replace test/set sequences. 351def FeatureFastLZCNT 352 : SubtargetFeature< 353 "fast-lzcnt", "HasFastLZCNT", "true", 354 "LZCNT instructions are as fast as most simple integer ops">; 355// If the target can efficiently decode NOPs upto 7-bytes in length. 356def FeatureFast7ByteNOP 357 : SubtargetFeature< 358 "fast-7bytenop", "HasFast7ByteNOP", "true", 359 "Target can quickly decode up to 7 byte NOPs">; 360// If the target can efficiently decode NOPs upto 11-bytes in length. 361def FeatureFast11ByteNOP 362 : SubtargetFeature< 363 "fast-11bytenop", "HasFast11ByteNOP", "true", 364 "Target can quickly decode up to 11 byte NOPs">; 365// If the target can efficiently decode NOPs upto 15-bytes in length. 366def FeatureFast15ByteNOP 367 : SubtargetFeature< 368 "fast-15bytenop", "HasFast15ByteNOP", "true", 369 "Target can quickly decode up to 15 byte NOPs">; 370// Sandy Bridge and newer processors can use SHLD with the same source on both 371// inputs to implement rotate to avoid the partial flag update of the normal 372// rotate instructions. 373def FeatureFastSHLDRotate 374 : SubtargetFeature< 375 "fast-shld-rotate", "HasFastSHLDRotate", "true", 376 "SHLD can be used as a faster rotate">; 377 378// Ivy Bridge and newer processors have enhanced REP MOVSB and STOSB (aka 379// "string operations"). See "REP String Enhancement" in the Intel Software 380// Development Manual. This feature essentially means that REP MOVSB will copy 381// using the largest available size instead of copying bytes one by one, making 382// it at least as fast as REPMOVS{W,D,Q}. 383def FeatureERMSB 384 : SubtargetFeature< 385 "ermsb", "HasERMSB", "true", 386 "REP MOVS/STOS are fast">; 387 388// Icelake and newer processors have Fast Short REP MOV. 389def FeatureFSRM 390 : SubtargetFeature< 391 "fsrm", "HasFSRM", "true", 392 "REP MOVSB of short lengths is faster">; 393 394// Bulldozer and newer processors can merge CMP/TEST (but not other 395// instructions) with conditional branches. 396def FeatureBranchFusion 397 : SubtargetFeature<"branchfusion", "HasBranchFusion", "true", 398 "CMP/TEST can be fused with conditional branches">; 399 400// Sandy Bridge and newer processors have many instructions that can be 401// fused with conditional branches and pass through the CPU as a single 402// operation. 403def FeatureMacroFusion 404 : SubtargetFeature<"macrofusion", "HasMacroFusion", "true", 405 "Various instructions can be fused with conditional branches">; 406 407// Gather is available since Haswell (AVX2 set). So technically, we can 408// generate Gathers on all AVX2 processors. But the overhead on HSW is high. 409// Skylake Client processor has faster Gathers than HSW and performance is 410// similar to Skylake Server (AVX-512). 411def FeatureHasFastGather 412 : SubtargetFeature<"fast-gather", "HasFastGather", "true", 413 "Indicates if gather is reasonably fast">; 414 415def FeaturePrefer128Bit 416 : SubtargetFeature<"prefer-128-bit", "Prefer128Bit", "true", 417 "Prefer 128-bit AVX instructions">; 418 419def FeaturePrefer256Bit 420 : SubtargetFeature<"prefer-256-bit", "Prefer256Bit", "true", 421 "Prefer 256-bit AVX instructions">; 422 423def FeaturePreferMaskRegisters 424 : SubtargetFeature<"prefer-mask-registers", "PreferMaskRegisters", "true", 425 "Prefer AVX512 mask registers over PTEST/MOVMSK">; 426 427// Lower indirect calls using a special construct called a `retpoline` to 428// mitigate potential Spectre v2 attacks against them. 429def FeatureRetpolineIndirectCalls 430 : SubtargetFeature< 431 "retpoline-indirect-calls", "UseRetpolineIndirectCalls", "true", 432 "Remove speculation of indirect calls from the generated code">; 433 434// Lower indirect branches and switches either using conditional branch trees 435// or using a special construct called a `retpoline` to mitigate potential 436// Spectre v2 attacks against them. 437def FeatureRetpolineIndirectBranches 438 : SubtargetFeature< 439 "retpoline-indirect-branches", "UseRetpolineIndirectBranches", "true", 440 "Remove speculation of indirect branches from the generated code">; 441 442// Deprecated umbrella feature for enabling both `retpoline-indirect-calls` and 443// `retpoline-indirect-branches` above. 444def FeatureRetpoline 445 : SubtargetFeature<"retpoline", "DeprecatedUseRetpoline", "true", 446 "Remove speculation of indirect branches from the " 447 "generated code, either by avoiding them entirely or " 448 "lowering them with a speculation blocking construct", 449 [FeatureRetpolineIndirectCalls, 450 FeatureRetpolineIndirectBranches]>; 451 452// Rely on external thunks for the emitted retpoline calls. This allows users 453// to provide their own custom thunk definitions in highly specialized 454// environments such as a kernel that does boot-time hot patching. 455def FeatureRetpolineExternalThunk 456 : SubtargetFeature< 457 "retpoline-external-thunk", "UseRetpolineExternalThunk", "true", 458 "When lowering an indirect call or branch using a `retpoline`, rely " 459 "on the specified user provided thunk rather than emitting one " 460 "ourselves. Only has effect when combined with some other retpoline " 461 "feature", [FeatureRetpolineIndirectCalls]>; 462 463// Mitigate LVI attacks against indirect calls/branches and call returns 464def FeatureLVIControlFlowIntegrity 465 : SubtargetFeature< 466 "lvi-cfi", "UseLVIControlFlowIntegrity", "true", 467 "Prevent indirect calls/branches from using a memory operand, and " 468 "precede all indirect calls/branches from a register with an " 469 "LFENCE instruction to serialize control flow. Also decompose RET " 470 "instructions into a POP+LFENCE+JMP sequence.">; 471 472// Enable SESES to mitigate speculative execution attacks 473def FeatureSpeculativeExecutionSideEffectSuppression 474 : SubtargetFeature< 475 "seses", "UseSpeculativeExecutionSideEffectSuppression", "true", 476 "Prevent speculative execution side channel timing attacks by " 477 "inserting a speculation barrier before memory reads, memory writes, " 478 "and conditional branches. Implies LVI Control Flow integrity.", 479 [FeatureLVIControlFlowIntegrity]>; 480 481// Mitigate LVI attacks against data loads 482def FeatureLVILoadHardening 483 : SubtargetFeature< 484 "lvi-load-hardening", "UseLVILoadHardening", "true", 485 "Insert LFENCE instructions to prevent data speculatively injected " 486 "into loads from being used maliciously.">; 487 488// Direct Move instructions. 489def FeatureMOVDIRI : SubtargetFeature<"movdiri", "HasMOVDIRI", "true", 490 "Support movdiri instruction">; 491def FeatureMOVDIR64B : SubtargetFeature<"movdir64b", "HasMOVDIR64B", "true", 492 "Support movdir64b instruction">; 493 494def FeatureFastBEXTR : SubtargetFeature<"fast-bextr", "HasFastBEXTR", "true", 495 "Indicates that the BEXTR instruction is implemented as a single uop " 496 "with good throughput">; 497 498// Combine vector math operations with shuffles into horizontal math 499// instructions if a CPU implements horizontal operations (introduced with 500// SSE3) with better latency/throughput than the alternative sequence. 501def FeatureFastHorizontalOps 502 : SubtargetFeature< 503 "fast-hops", "HasFastHorizontalOps", "true", 504 "Prefer horizontal vector math instructions (haddp, phsub, etc.) over " 505 "normal vector instructions with shuffles">; 506 507def FeatureFastScalarShiftMasks 508 : SubtargetFeature< 509 "fast-scalar-shift-masks", "HasFastScalarShiftMasks", "true", 510 "Prefer a left/right scalar logical shift pair over a shift+and pair">; 511 512def FeatureFastVectorShiftMasks 513 : SubtargetFeature< 514 "fast-vector-shift-masks", "HasFastVectorShiftMasks", "true", 515 "Prefer a left/right vector logical shift pair over a shift+and pair">; 516 517def FeatureUseGLMDivSqrtCosts 518 : SubtargetFeature<"use-glm-div-sqrt-costs", "UseGLMDivSqrtCosts", "true", 519 "Use Goldmont specific floating point div/sqrt costs">; 520 521// Enable use of alias analysis during code generation. 522def FeatureUseAA : SubtargetFeature<"use-aa", "UseAA", "true", 523 "Use alias analysis during codegen">; 524 525// Bonnell 526def ProcIntelAtom : SubtargetFeature<"", "X86ProcFamily", "IntelAtom", "">; 527// Silvermont 528def ProcIntelSLM : SubtargetFeature<"", "X86ProcFamily", "IntelSLM", "">; 529 530//===----------------------------------------------------------------------===// 531// Register File Description 532//===----------------------------------------------------------------------===// 533 534include "X86RegisterInfo.td" 535include "X86RegisterBanks.td" 536 537//===----------------------------------------------------------------------===// 538// Instruction Descriptions 539//===----------------------------------------------------------------------===// 540 541include "X86Schedule.td" 542include "X86InstrInfo.td" 543include "X86SchedPredicates.td" 544 545def X86InstrInfo : InstrInfo; 546 547//===----------------------------------------------------------------------===// 548// X86 Scheduler Models 549//===----------------------------------------------------------------------===// 550 551include "X86ScheduleAtom.td" 552include "X86SchedSandyBridge.td" 553include "X86SchedHaswell.td" 554include "X86SchedBroadwell.td" 555include "X86ScheduleSLM.td" 556include "X86ScheduleZnver1.td" 557include "X86ScheduleZnver2.td" 558include "X86ScheduleBdVer2.td" 559include "X86ScheduleBtVer2.td" 560include "X86SchedSkylakeClient.td" 561include "X86SchedSkylakeServer.td" 562 563//===----------------------------------------------------------------------===// 564// X86 Processor Feature Lists 565//===----------------------------------------------------------------------===// 566 567def ProcessorFeatures { 568 // x86-64 and x86-64-v[234] 569 list<SubtargetFeature> X86_64V1Features = [ 570 FeatureX87, FeatureCMPXCHG8B, FeatureCMOV, FeatureMMX, FeatureSSE2, 571 FeatureFXSR, FeatureNOPL, Feature64Bit 572 ]; 573 list<SubtargetFeature> X86_64V2Features = !listconcat( 574 X86_64V1Features, 575 [FeatureCMPXCHG16B, FeatureLAHFSAHF, FeaturePOPCNT, FeatureSSE42]); 576 list<SubtargetFeature> X86_64V3Features = !listconcat(X86_64V2Features, [ 577 FeatureAVX2, FeatureBMI, FeatureBMI2, FeatureF16C, FeatureFMA, FeatureLZCNT, 578 FeatureMOVBE, FeatureXSAVE 579 ]); 580 list<SubtargetFeature> X86_64V4Features = !listconcat(X86_64V3Features, [ 581 FeatureBWI, 582 FeatureCDI, 583 FeatureDQI, 584 FeatureVLX, 585 ]); 586 587 // Nehalem 588 list<SubtargetFeature> NHMFeatures = X86_64V2Features; 589 list<SubtargetFeature> NHMTuning = [FeatureMacroFusion, 590 FeatureInsertVZEROUPPER]; 591 592 // Westmere 593 list<SubtargetFeature> WSMAdditionalFeatures = [FeaturePCLMUL]; 594 list<SubtargetFeature> WSMTuning = NHMTuning; 595 list<SubtargetFeature> WSMFeatures = 596 !listconcat(NHMFeatures, WSMAdditionalFeatures); 597 598 // Sandybridge 599 list<SubtargetFeature> SNBAdditionalFeatures = [FeatureAVX, 600 FeatureXSAVE, 601 FeatureXSAVEOPT]; 602 list<SubtargetFeature> SNBTuning = [FeatureMacroFusion, 603 FeatureSlow3OpsLEA, 604 FeatureSlowDivide64, 605 FeatureSlowUAMem32, 606 FeatureFastScalarFSQRT, 607 FeatureFastSHLDRotate, 608 FeatureFast15ByteNOP, 609 FeaturePOPCNTFalseDeps, 610 FeatureInsertVZEROUPPER]; 611 list<SubtargetFeature> SNBFeatures = 612 !listconcat(WSMFeatures, SNBAdditionalFeatures); 613 614 // Ivybridge 615 list<SubtargetFeature> IVBAdditionalFeatures = [FeatureRDRAND, 616 FeatureF16C, 617 FeatureFSGSBase]; 618 list<SubtargetFeature> IVBTuning = SNBTuning; 619 list<SubtargetFeature> IVBFeatures = 620 !listconcat(SNBFeatures, IVBAdditionalFeatures); 621 622 // Haswell 623 list<SubtargetFeature> HSWAdditionalFeatures = [FeatureAVX2, 624 FeatureBMI, 625 FeatureBMI2, 626 FeatureERMSB, 627 FeatureFMA, 628 FeatureINVPCID, 629 FeatureLZCNT, 630 FeatureMOVBE]; 631 list<SubtargetFeature> HSWTuning = [FeatureMacroFusion, 632 FeatureSlow3OpsLEA, 633 FeatureSlowDivide64, 634 FeatureFastScalarFSQRT, 635 FeatureFastSHLDRotate, 636 FeatureFast15ByteNOP, 637 FeatureFastVariableShuffle, 638 FeaturePOPCNTFalseDeps, 639 FeatureLZCNTFalseDeps, 640 FeatureInsertVZEROUPPER]; 641 list<SubtargetFeature> HSWFeatures = 642 !listconcat(IVBFeatures, HSWAdditionalFeatures); 643 644 // Broadwell 645 list<SubtargetFeature> BDWAdditionalFeatures = [FeatureADX, 646 FeatureRDSEED, 647 FeaturePRFCHW]; 648 list<SubtargetFeature> BDWTuning = HSWTuning; 649 list<SubtargetFeature> BDWFeatures = 650 !listconcat(HSWFeatures, BDWAdditionalFeatures); 651 652 // Skylake 653 list<SubtargetFeature> SKLAdditionalFeatures = [FeatureAES, 654 FeatureXSAVEC, 655 FeatureXSAVES, 656 FeatureCLFLUSHOPT, 657 FeatureSGX]; 658 list<SubtargetFeature> SKLTuning = [FeatureHasFastGather, 659 FeatureMacroFusion, 660 FeatureSlow3OpsLEA, 661 FeatureSlowDivide64, 662 FeatureFastScalarFSQRT, 663 FeatureFastVectorFSQRT, 664 FeatureFastSHLDRotate, 665 FeatureFast15ByteNOP, 666 FeatureFastVariableShuffle, 667 FeaturePOPCNTFalseDeps, 668 FeatureInsertVZEROUPPER]; 669 list<SubtargetFeature> SKLFeatures = 670 !listconcat(BDWFeatures, SKLAdditionalFeatures); 671 672 // Skylake-AVX512 673 list<SubtargetFeature> SKXAdditionalFeatures = [FeatureAES, 674 FeatureXSAVEC, 675 FeatureXSAVES, 676 FeatureCLFLUSHOPT, 677 FeatureAVX512, 678 FeatureCDI, 679 FeatureDQI, 680 FeatureBWI, 681 FeatureVLX, 682 FeaturePKU, 683 FeatureCLWB]; 684 list<SubtargetFeature> SKXTuning = [FeatureHasFastGather, 685 FeatureMacroFusion, 686 FeatureSlow3OpsLEA, 687 FeatureSlowDivide64, 688 FeatureFastScalarFSQRT, 689 FeatureFastVectorFSQRT, 690 FeatureFastSHLDRotate, 691 FeatureFast15ByteNOP, 692 FeatureFastVariableShuffle, 693 FeaturePrefer256Bit, 694 FeaturePOPCNTFalseDeps, 695 FeatureInsertVZEROUPPER]; 696 list<SubtargetFeature> SKXFeatures = 697 !listconcat(BDWFeatures, SKXAdditionalFeatures); 698 699 // Cascadelake 700 list<SubtargetFeature> CLXAdditionalFeatures = [FeatureVNNI]; 701 list<SubtargetFeature> CLXTuning = SKXTuning; 702 list<SubtargetFeature> CLXFeatures = 703 !listconcat(SKXFeatures, CLXAdditionalFeatures); 704 705 // Cooperlake 706 list<SubtargetFeature> CPXAdditionalFeatures = [FeatureBF16]; 707 list<SubtargetFeature> CPXTuning = SKXTuning; 708 list<SubtargetFeature> CPXFeatures = 709 !listconcat(CLXFeatures, CPXAdditionalFeatures); 710 711 // Cannonlake 712 list<SubtargetFeature> CNLAdditionalFeatures = [FeatureAVX512, 713 FeatureCDI, 714 FeatureDQI, 715 FeatureBWI, 716 FeatureVLX, 717 FeaturePKU, 718 FeatureVBMI, 719 FeatureIFMA, 720 FeatureSHA]; 721 list<SubtargetFeature> CNLTuning = [FeatureHasFastGather, 722 FeatureMacroFusion, 723 FeatureSlow3OpsLEA, 724 FeatureSlowDivide64, 725 FeatureFastScalarFSQRT, 726 FeatureFastVectorFSQRT, 727 FeatureFastSHLDRotate, 728 FeatureFast15ByteNOP, 729 FeatureFastVariableShuffle, 730 FeaturePrefer256Bit, 731 FeatureInsertVZEROUPPER]; 732 list<SubtargetFeature> CNLFeatures = 733 !listconcat(SKLFeatures, CNLAdditionalFeatures); 734 735 // Icelake 736 list<SubtargetFeature> ICLAdditionalFeatures = [FeatureBITALG, 737 FeatureVAES, 738 FeatureVBMI2, 739 FeatureVNNI, 740 FeatureVPCLMULQDQ, 741 FeatureVPOPCNTDQ, 742 FeatureGFNI, 743 FeatureCLWB, 744 FeatureRDPID, 745 FeatureFSRM]; 746 list<SubtargetFeature> ICLTuning = CNLTuning; 747 list<SubtargetFeature> ICLFeatures = 748 !listconcat(CNLFeatures, ICLAdditionalFeatures); 749 750 // Icelake Server 751 list<SubtargetFeature> ICXAdditionalFeatures = [FeaturePCONFIG, 752 FeatureWBNOINVD]; 753 list<SubtargetFeature> ICXTuning = CNLTuning; 754 list<SubtargetFeature> ICXFeatures = 755 !listconcat(ICLFeatures, ICXAdditionalFeatures); 756 757 //Tigerlake 758 list<SubtargetFeature> TGLAdditionalFeatures = [FeatureVP2INTERSECT, 759 FeatureMOVDIRI, 760 FeatureMOVDIR64B, 761 FeatureSHSTK]; 762 list<SubtargetFeature> TGLTuning = CNLTuning; 763 list<SubtargetFeature> TGLFeatures = 764 !listconcat(ICLFeatures, TGLAdditionalFeatures ); 765 766 //Sapphirerapids 767 list<SubtargetFeature> SPRAdditionalFeatures = [FeatureAMXTILE, 768 FeatureAMXINT8, 769 FeatureAMXBF16, 770 FeatureBF16, 771 FeatureSERIALIZE, 772 FeatureCLDEMOTE, 773 FeatureWAITPKG, 774 FeaturePTWRITE, 775 FeatureAVXVNNI, 776 FeatureTSXLDTRK, 777 FeatureENQCMD, 778 FeatureSHSTK, 779 FeatureVP2INTERSECT, 780 FeatureMOVDIRI, 781 FeatureMOVDIR64B, 782 FeatureUINTR]; 783 list<SubtargetFeature> SPRTuning = ICXTuning; 784 list<SubtargetFeature> SPRFeatures = 785 !listconcat(ICXFeatures, SPRAdditionalFeatures); 786 787 // Alderlake 788 list<SubtargetFeature> ADLAdditionalFeatures = [FeatureAVXVNNI, 789 FeatureCLDEMOTE, 790 FeatureHRESET, 791 FeaturePTWRITE, 792 FeatureSERIALIZE, 793 FeatureWAITPKG]; 794 list<SubtargetFeature> ADLTuning = SKLTuning; 795 list<SubtargetFeature> ADLFeatures = 796 !listconcat(SKLFeatures, ADLAdditionalFeatures); 797 798 // Atom 799 list<SubtargetFeature> AtomFeatures = [FeatureX87, 800 FeatureCMPXCHG8B, 801 FeatureCMOV, 802 FeatureMMX, 803 FeatureSSSE3, 804 FeatureFXSR, 805 FeatureNOPL, 806 Feature64Bit, 807 FeatureCMPXCHG16B, 808 FeatureMOVBE, 809 FeatureLAHFSAHF]; 810 list<SubtargetFeature> AtomTuning = [ProcIntelAtom, 811 FeatureSlowUAMem16, 812 FeatureLEAForSP, 813 FeatureSlowDivide32, 814 FeatureSlowDivide64, 815 FeatureSlowTwoMemOps, 816 FeatureLEAUsesAG, 817 FeaturePadShortFunctions, 818 FeatureInsertVZEROUPPER]; 819 820 // Silvermont 821 list<SubtargetFeature> SLMAdditionalFeatures = [FeatureSSE42, 822 FeaturePOPCNT, 823 FeaturePCLMUL, 824 FeaturePRFCHW, 825 FeatureRDRAND]; 826 list<SubtargetFeature> SLMTuning = [ProcIntelSLM, 827 FeatureSlowTwoMemOps, 828 FeatureSlowLEA, 829 FeatureSlowIncDec, 830 FeatureSlowDivide64, 831 FeatureSlowPMULLD, 832 FeatureFast7ByteNOP, 833 FeaturePOPCNTFalseDeps, 834 FeatureInsertVZEROUPPER]; 835 list<SubtargetFeature> SLMFeatures = 836 !listconcat(AtomFeatures, SLMAdditionalFeatures); 837 838 // Goldmont 839 list<SubtargetFeature> GLMAdditionalFeatures = [FeatureAES, 840 FeatureSHA, 841 FeatureRDSEED, 842 FeatureXSAVE, 843 FeatureXSAVEOPT, 844 FeatureXSAVEC, 845 FeatureXSAVES, 846 FeatureCLFLUSHOPT, 847 FeatureFSGSBase]; 848 list<SubtargetFeature> GLMTuning = [FeatureUseGLMDivSqrtCosts, 849 FeatureSlowTwoMemOps, 850 FeatureSlowLEA, 851 FeatureSlowIncDec, 852 FeaturePOPCNTFalseDeps, 853 FeatureInsertVZEROUPPER]; 854 list<SubtargetFeature> GLMFeatures = 855 !listconcat(SLMFeatures, GLMAdditionalFeatures); 856 857 // Goldmont Plus 858 list<SubtargetFeature> GLPAdditionalFeatures = [FeaturePTWRITE, 859 FeatureRDPID, 860 FeatureSGX]; 861 list<SubtargetFeature> GLPTuning = [FeatureUseGLMDivSqrtCosts, 862 FeatureSlowTwoMemOps, 863 FeatureSlowLEA, 864 FeatureSlowIncDec, 865 FeatureInsertVZEROUPPER]; 866 list<SubtargetFeature> GLPFeatures = 867 !listconcat(GLMFeatures, GLPAdditionalFeatures); 868 869 // Tremont 870 list<SubtargetFeature> TRMAdditionalFeatures = [FeatureCLWB, 871 FeatureGFNI]; 872 list<SubtargetFeature> TRMTuning = GLPTuning; 873 list<SubtargetFeature> TRMFeatures = 874 !listconcat(GLPFeatures, TRMAdditionalFeatures); 875 876 // Knights Landing 877 list<SubtargetFeature> KNLFeatures = [FeatureX87, 878 FeatureCMPXCHG8B, 879 FeatureCMOV, 880 FeatureMMX, 881 FeatureFXSR, 882 FeatureNOPL, 883 Feature64Bit, 884 FeatureCMPXCHG16B, 885 FeaturePOPCNT, 886 FeaturePCLMUL, 887 FeatureXSAVE, 888 FeatureXSAVEOPT, 889 FeatureLAHFSAHF, 890 FeatureAES, 891 FeatureRDRAND, 892 FeatureF16C, 893 FeatureFSGSBase, 894 FeatureAVX512, 895 FeatureERI, 896 FeatureCDI, 897 FeaturePFI, 898 FeaturePREFETCHWT1, 899 FeatureADX, 900 FeatureRDSEED, 901 FeatureMOVBE, 902 FeatureLZCNT, 903 FeatureBMI, 904 FeatureBMI2, 905 FeatureFMA, 906 FeaturePRFCHW]; 907 list<SubtargetFeature> KNLTuning = [FeatureSlowDivide64, 908 FeatureSlow3OpsLEA, 909 FeatureSlowIncDec, 910 FeatureSlowTwoMemOps, 911 FeaturePreferMaskRegisters, 912 FeatureHasFastGather, 913 FeatureSlowPMADDWD]; 914 // TODO Add AVX5124FMAPS/AVX5124VNNIW features 915 list<SubtargetFeature> KNMFeatures = 916 !listconcat(KNLFeatures, [FeatureVPOPCNTDQ]); 917 918 // Barcelona 919 list<SubtargetFeature> BarcelonaFeatures = [FeatureX87, 920 FeatureCMPXCHG8B, 921 FeatureSSE4A, 922 Feature3DNowA, 923 FeatureFXSR, 924 FeatureNOPL, 925 FeatureCMPXCHG16B, 926 FeaturePRFCHW, 927 FeatureLZCNT, 928 FeaturePOPCNT, 929 FeatureLAHFSAHF, 930 FeatureCMOV, 931 Feature64Bit]; 932 list<SubtargetFeature> BarcelonaTuning = [FeatureFastScalarShiftMasks, 933 FeatureSlowSHLD, 934 FeatureInsertVZEROUPPER]; 935 936 // Bobcat 937 list<SubtargetFeature> BtVer1Features = [FeatureX87, 938 FeatureCMPXCHG8B, 939 FeatureCMOV, 940 FeatureMMX, 941 FeatureSSSE3, 942 FeatureSSE4A, 943 FeatureFXSR, 944 FeatureNOPL, 945 Feature64Bit, 946 FeatureCMPXCHG16B, 947 FeaturePRFCHW, 948 FeatureLZCNT, 949 FeaturePOPCNT, 950 FeatureLAHFSAHF]; 951 list<SubtargetFeature> BtVer1Tuning = [FeatureFast15ByteNOP, 952 FeatureFastScalarShiftMasks, 953 FeatureFastVectorShiftMasks, 954 FeatureSlowSHLD, 955 FeatureInsertVZEROUPPER]; 956 957 // Jaguar 958 list<SubtargetFeature> BtVer2AdditionalFeatures = [FeatureAVX, 959 FeatureAES, 960 FeaturePCLMUL, 961 FeatureBMI, 962 FeatureF16C, 963 FeatureMOVBE, 964 FeatureXSAVE, 965 FeatureXSAVEOPT]; 966 list<SubtargetFeature> BtVer2Tuning = [FeatureFastLZCNT, 967 FeatureFastBEXTR, 968 FeatureFastHorizontalOps, 969 FeatureFast15ByteNOP, 970 FeatureFastScalarShiftMasks, 971 FeatureFastVectorShiftMasks, 972 FeatureSlowSHLD]; 973 list<SubtargetFeature> BtVer2Features = 974 !listconcat(BtVer1Features, BtVer2AdditionalFeatures); 975 976 // Bulldozer 977 list<SubtargetFeature> BdVer1Features = [FeatureX87, 978 FeatureCMPXCHG8B, 979 FeatureCMOV, 980 FeatureXOP, 981 Feature64Bit, 982 FeatureCMPXCHG16B, 983 FeatureAES, 984 FeaturePRFCHW, 985 FeaturePCLMUL, 986 FeatureMMX, 987 FeatureFXSR, 988 FeatureNOPL, 989 FeatureLZCNT, 990 FeaturePOPCNT, 991 FeatureXSAVE, 992 FeatureLWP, 993 FeatureLAHFSAHF]; 994 list<SubtargetFeature> BdVer1Tuning = [FeatureSlowSHLD, 995 FeatureFast11ByteNOP, 996 FeatureFastScalarShiftMasks, 997 FeatureBranchFusion, 998 FeatureInsertVZEROUPPER]; 999 1000 // PileDriver 1001 list<SubtargetFeature> BdVer2AdditionalFeatures = [FeatureF16C, 1002 FeatureBMI, 1003 FeatureTBM, 1004 FeatureFMA, 1005 FeatureFastBEXTR]; 1006 list<SubtargetFeature> BdVer2Tuning = BdVer1Tuning; 1007 list<SubtargetFeature> BdVer2Features = 1008 !listconcat(BdVer1Features, BdVer2AdditionalFeatures); 1009 1010 // Steamroller 1011 list<SubtargetFeature> BdVer3AdditionalFeatures = [FeatureXSAVEOPT, 1012 FeatureFSGSBase]; 1013 list<SubtargetFeature> BdVer3Tuning = BdVer2Tuning; 1014 list<SubtargetFeature> BdVer3Features = 1015 !listconcat(BdVer2Features, BdVer3AdditionalFeatures); 1016 1017 // Excavator 1018 list<SubtargetFeature> BdVer4AdditionalFeatures = [FeatureAVX2, 1019 FeatureBMI2, 1020 FeatureMOVBE, 1021 FeatureRDRAND, 1022 FeatureMWAITX]; 1023 list<SubtargetFeature> BdVer4Tuning = BdVer3Tuning; 1024 list<SubtargetFeature> BdVer4Features = 1025 !listconcat(BdVer3Features, BdVer4AdditionalFeatures); 1026 1027 1028 // AMD Zen Processors common ISAs 1029 list<SubtargetFeature> ZNFeatures = [FeatureADX, 1030 FeatureAES, 1031 FeatureAVX2, 1032 FeatureBMI, 1033 FeatureBMI2, 1034 FeatureCLFLUSHOPT, 1035 FeatureCLZERO, 1036 FeatureCMOV, 1037 Feature64Bit, 1038 FeatureCMPXCHG16B, 1039 FeatureF16C, 1040 FeatureFMA, 1041 FeatureFSGSBase, 1042 FeatureFXSR, 1043 FeatureNOPL, 1044 FeatureLAHFSAHF, 1045 FeatureLZCNT, 1046 FeatureMMX, 1047 FeatureMOVBE, 1048 FeatureMWAITX, 1049 FeaturePCLMUL, 1050 FeaturePOPCNT, 1051 FeaturePRFCHW, 1052 FeatureRDRAND, 1053 FeatureRDSEED, 1054 FeatureSHA, 1055 FeatureSSE4A, 1056 FeatureX87, 1057 FeatureXSAVE, 1058 FeatureXSAVEC, 1059 FeatureXSAVEOPT, 1060 FeatureXSAVES]; 1061 list<SubtargetFeature> ZNTuning = [FeatureFastLZCNT, 1062 FeatureFastBEXTR, 1063 FeatureFast15ByteNOP, 1064 FeatureBranchFusion, 1065 FeatureFastScalarShiftMasks, 1066 FeatureSlowSHLD, 1067 FeatureInsertVZEROUPPER]; 1068 list<SubtargetFeature> ZN2AdditionalFeatures = [FeatureCLWB, 1069 FeatureRDPID, 1070 FeatureWBNOINVD]; 1071 list<SubtargetFeature> ZN2Tuning = ZNTuning; 1072 list<SubtargetFeature> ZN2Features = 1073 !listconcat(ZNFeatures, ZN2AdditionalFeatures); 1074 list<SubtargetFeature> ZN3AdditionalFeatures = [FeatureFSRM, 1075 FeatureINVPCID, 1076 FeaturePKU, 1077 FeatureVAES, 1078 FeatureVPCLMULQDQ]; 1079 list<SubtargetFeature> ZN3Tuning = ZNTuning; 1080 list<SubtargetFeature> ZN3Features = 1081 !listconcat(ZN2Features, ZN3AdditionalFeatures); 1082} 1083 1084//===----------------------------------------------------------------------===// 1085// X86 processors supported. 1086//===----------------------------------------------------------------------===// 1087 1088class Proc<string Name, list<SubtargetFeature> Features, 1089 list<SubtargetFeature> TuneFeatures> 1090 : ProcessorModel<Name, GenericModel, Features, TuneFeatures>; 1091 1092class ProcModel<string Name, SchedMachineModel Model, 1093 list<SubtargetFeature> Features, 1094 list<SubtargetFeature> TuneFeatures> 1095 : ProcessorModel<Name, Model, Features, TuneFeatures>; 1096 1097// NOTE: CMPXCHG8B is here for legacy compatibility so that it is only disabled 1098// if i386/i486 is specifically requested. 1099// NOTE: 64Bit is here as "generic" is the default llc CPU. The X86Subtarget 1100// constructor checks that any CPU used in 64-bit mode has Feature64Bit enabled. 1101// It has no effect on code generation. 1102def : ProcModel<"generic", SandyBridgeModel, 1103 [FeatureX87, FeatureCMPXCHG8B, Feature64Bit], 1104 [FeatureSlow3OpsLEA, 1105 FeatureSlowDivide64, 1106 FeatureSlowIncDec, 1107 FeatureMacroFusion, 1108 FeatureInsertVZEROUPPER]>; 1109 1110def : Proc<"i386", [FeatureX87], 1111 [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>; 1112def : Proc<"i486", [FeatureX87], 1113 [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>; 1114def : Proc<"i586", [FeatureX87, FeatureCMPXCHG8B], 1115 [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>; 1116def : Proc<"pentium", [FeatureX87, FeatureCMPXCHG8B], 1117 [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>; 1118def : Proc<"pentium-mmx", [FeatureX87, FeatureCMPXCHG8B, FeatureMMX], 1119 [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>; 1120 1121def : Proc<"i686", [FeatureX87, FeatureCMPXCHG8B, FeatureCMOV], 1122 [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>; 1123def : Proc<"pentiumpro", [FeatureX87, FeatureCMPXCHG8B, FeatureCMOV, 1124 FeatureNOPL], 1125 [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>; 1126 1127def : Proc<"pentium2", [FeatureX87, FeatureCMPXCHG8B, FeatureMMX, FeatureCMOV, 1128 FeatureFXSR, FeatureNOPL], 1129 [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>; 1130 1131foreach P = ["pentium3", "pentium3m"] in { 1132 def : Proc<P, [FeatureX87, FeatureCMPXCHG8B, FeatureMMX, 1133 FeatureSSE1, FeatureFXSR, FeatureNOPL, FeatureCMOV], 1134 [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>; 1135} 1136 1137// Enable the PostRAScheduler for SSE2 and SSE3 class cpus. 1138// The intent is to enable it for pentium4 which is the current default 1139// processor in a vanilla 32-bit clang compilation when no specific 1140// architecture is specified. This generally gives a nice performance 1141// increase on silvermont, with largely neutral behavior on other 1142// contemporary large core processors. 1143// pentium-m, pentium4m, prescott and nocona are included as a preventative 1144// measure to avoid performance surprises, in case clang's default cpu 1145// changes slightly. 1146 1147def : ProcModel<"pentium-m", GenericPostRAModel, 1148 [FeatureX87, FeatureCMPXCHG8B, FeatureMMX, FeatureSSE2, 1149 FeatureFXSR, FeatureNOPL, FeatureCMOV], 1150 [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>; 1151 1152foreach P = ["pentium4", "pentium4m"] in { 1153 def : ProcModel<P, GenericPostRAModel, 1154 [FeatureX87, FeatureCMPXCHG8B, FeatureMMX, FeatureSSE2, 1155 FeatureFXSR, FeatureNOPL, FeatureCMOV], 1156 [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>; 1157} 1158 1159// Intel Quark. 1160def : Proc<"lakemont", [FeatureCMPXCHG8B], 1161 [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>; 1162 1163// Intel Core Duo. 1164def : ProcModel<"yonah", SandyBridgeModel, 1165 [FeatureX87, FeatureCMPXCHG8B, FeatureMMX, FeatureSSE3, 1166 FeatureFXSR, FeatureNOPL, FeatureCMOV], 1167 [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>; 1168 1169// NetBurst. 1170def : ProcModel<"prescott", GenericPostRAModel, 1171 [FeatureX87, FeatureCMPXCHG8B, FeatureMMX, FeatureSSE3, 1172 FeatureFXSR, FeatureNOPL, FeatureCMOV], 1173 [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>; 1174def : ProcModel<"nocona", GenericPostRAModel, [ 1175 FeatureX87, 1176 FeatureCMPXCHG8B, 1177 FeatureCMOV, 1178 FeatureMMX, 1179 FeatureSSE3, 1180 FeatureFXSR, 1181 FeatureNOPL, 1182 Feature64Bit, 1183 FeatureCMPXCHG16B, 1184], 1185[ 1186 FeatureSlowUAMem16, 1187 FeatureInsertVZEROUPPER 1188]>; 1189 1190// Intel Core 2 Solo/Duo. 1191def : ProcModel<"core2", SandyBridgeModel, [ 1192 FeatureX87, 1193 FeatureCMPXCHG8B, 1194 FeatureCMOV, 1195 FeatureMMX, 1196 FeatureSSSE3, 1197 FeatureFXSR, 1198 FeatureNOPL, 1199 Feature64Bit, 1200 FeatureCMPXCHG16B, 1201 FeatureLAHFSAHF 1202], 1203[ 1204 FeatureMacroFusion, 1205 FeatureSlowUAMem16, 1206 FeatureInsertVZEROUPPER 1207]>; 1208def : ProcModel<"penryn", SandyBridgeModel, [ 1209 FeatureX87, 1210 FeatureCMPXCHG8B, 1211 FeatureCMOV, 1212 FeatureMMX, 1213 FeatureSSE41, 1214 FeatureFXSR, 1215 FeatureNOPL, 1216 Feature64Bit, 1217 FeatureCMPXCHG16B, 1218 FeatureLAHFSAHF 1219], 1220[ 1221 FeatureMacroFusion, 1222 FeatureSlowUAMem16, 1223 FeatureInsertVZEROUPPER 1224]>; 1225 1226// Atom CPUs. 1227foreach P = ["bonnell", "atom"] in { 1228 def : ProcModel<P, AtomModel, ProcessorFeatures.AtomFeatures, 1229 ProcessorFeatures.AtomTuning>; 1230} 1231 1232foreach P = ["silvermont", "slm"] in { 1233 def : ProcModel<P, SLMModel, ProcessorFeatures.SLMFeatures, 1234 ProcessorFeatures.SLMTuning>; 1235} 1236 1237def : ProcModel<"goldmont", SLMModel, ProcessorFeatures.GLMFeatures, 1238 ProcessorFeatures.GLMTuning>; 1239def : ProcModel<"goldmont-plus", SLMModel, ProcessorFeatures.GLPFeatures, 1240 ProcessorFeatures.GLPTuning>; 1241def : ProcModel<"tremont", SLMModel, ProcessorFeatures.TRMFeatures, 1242 ProcessorFeatures.TRMTuning>; 1243 1244// "Arrandale" along with corei3 and corei5 1245foreach P = ["nehalem", "corei7"] in { 1246 def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.NHMFeatures, 1247 ProcessorFeatures.NHMTuning>; 1248} 1249 1250// Westmere is the corei3/i5/i7 path from nehalem to sandybridge 1251def : ProcModel<"westmere", SandyBridgeModel, ProcessorFeatures.WSMFeatures, 1252 ProcessorFeatures.WSMTuning>; 1253 1254foreach P = ["sandybridge", "corei7-avx"] in { 1255 def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.SNBFeatures, 1256 ProcessorFeatures.SNBTuning>; 1257} 1258 1259foreach P = ["ivybridge", "core-avx-i"] in { 1260 def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.IVBFeatures, 1261 ProcessorFeatures.IVBTuning>; 1262} 1263 1264foreach P = ["haswell", "core-avx2"] in { 1265 def : ProcModel<P, HaswellModel, ProcessorFeatures.HSWFeatures, 1266 ProcessorFeatures.HSWTuning>; 1267} 1268 1269def : ProcModel<"broadwell", BroadwellModel, ProcessorFeatures.BDWFeatures, 1270 ProcessorFeatures.BDWTuning>; 1271 1272def : ProcModel<"skylake", SkylakeClientModel, ProcessorFeatures.SKLFeatures, 1273 ProcessorFeatures.SKLTuning>; 1274 1275// FIXME: define KNL scheduler model 1276def : ProcModel<"knl", HaswellModel, ProcessorFeatures.KNLFeatures, 1277 ProcessorFeatures.KNLTuning>; 1278def : ProcModel<"knm", HaswellModel, ProcessorFeatures.KNMFeatures, 1279 ProcessorFeatures.KNLTuning>; 1280 1281foreach P = ["skylake-avx512", "skx"] in { 1282 def : ProcModel<P, SkylakeServerModel, ProcessorFeatures.SKXFeatures, 1283 ProcessorFeatures.SKXTuning>; 1284} 1285 1286def : ProcModel<"cascadelake", SkylakeServerModel, 1287 ProcessorFeatures.CLXFeatures, ProcessorFeatures.CLXTuning>; 1288def : ProcModel<"cooperlake", SkylakeServerModel, 1289 ProcessorFeatures.CPXFeatures, ProcessorFeatures.CPXTuning>; 1290def : ProcModel<"cannonlake", SkylakeServerModel, 1291 ProcessorFeatures.CNLFeatures, ProcessorFeatures.CNLTuning>; 1292def : ProcModel<"icelake-client", SkylakeServerModel, 1293 ProcessorFeatures.ICLFeatures, ProcessorFeatures.ICLTuning>; 1294def : ProcModel<"icelake-server", SkylakeServerModel, 1295 ProcessorFeatures.ICXFeatures, ProcessorFeatures.ICXTuning>; 1296def : ProcModel<"tigerlake", SkylakeServerModel, 1297 ProcessorFeatures.TGLFeatures, ProcessorFeatures.TGLTuning>; 1298def : ProcModel<"sapphirerapids", SkylakeServerModel, 1299 ProcessorFeatures.SPRFeatures, ProcessorFeatures.SPRTuning>; 1300def : ProcModel<"alderlake", SkylakeClientModel, 1301 ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>; 1302 1303// AMD CPUs. 1304 1305def : Proc<"k6", [FeatureX87, FeatureCMPXCHG8B, FeatureMMX], 1306 [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>; 1307def : Proc<"k6-2", [FeatureX87, FeatureCMPXCHG8B, Feature3DNow], 1308 [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>; 1309def : Proc<"k6-3", [FeatureX87, FeatureCMPXCHG8B, Feature3DNow], 1310 [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>; 1311 1312foreach P = ["athlon", "athlon-tbird"] in { 1313 def : Proc<P, [FeatureX87, FeatureCMPXCHG8B, FeatureCMOV, Feature3DNowA, 1314 FeatureNOPL], 1315 [FeatureSlowSHLD, FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>; 1316} 1317 1318foreach P = ["athlon-4", "athlon-xp", "athlon-mp"] in { 1319 def : Proc<P, [FeatureX87, FeatureCMPXCHG8B, FeatureCMOV, 1320 FeatureSSE1, Feature3DNowA, FeatureFXSR, FeatureNOPL], 1321 [FeatureSlowSHLD, FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>; 1322} 1323 1324foreach P = ["k8", "opteron", "athlon64", "athlon-fx"] in { 1325 def : Proc<P, [FeatureX87, FeatureCMPXCHG8B, FeatureSSE2, Feature3DNowA, 1326 FeatureFXSR, FeatureNOPL, Feature64Bit, FeatureCMOV], 1327 [FeatureFastScalarShiftMasks, FeatureSlowSHLD, FeatureSlowUAMem16, 1328 FeatureInsertVZEROUPPER]>; 1329} 1330 1331foreach P = ["k8-sse3", "opteron-sse3", "athlon64-sse3"] in { 1332 def : Proc<P, [FeatureX87, FeatureCMPXCHG8B, FeatureSSE3, Feature3DNowA, 1333 FeatureFXSR, FeatureNOPL, FeatureCMPXCHG16B, FeatureCMOV, 1334 Feature64Bit], 1335 [FeatureFastScalarShiftMasks, FeatureSlowSHLD, FeatureSlowUAMem16, 1336 FeatureInsertVZEROUPPER]>; 1337} 1338 1339foreach P = ["amdfam10", "barcelona"] in { 1340 def : Proc<P, ProcessorFeatures.BarcelonaFeatures, 1341 ProcessorFeatures.BarcelonaTuning>; 1342} 1343 1344// Bobcat 1345def : Proc<"btver1", ProcessorFeatures.BtVer1Features, 1346 ProcessorFeatures.BtVer1Tuning>; 1347// Jaguar 1348def : ProcModel<"btver2", BtVer2Model, ProcessorFeatures.BtVer2Features, 1349 ProcessorFeatures.BtVer2Tuning>; 1350 1351// Bulldozer 1352def : ProcModel<"bdver1", BdVer2Model, ProcessorFeatures.BdVer1Features, 1353 ProcessorFeatures.BdVer1Tuning>; 1354// Piledriver 1355def : ProcModel<"bdver2", BdVer2Model, ProcessorFeatures.BdVer2Features, 1356 ProcessorFeatures.BdVer2Tuning>; 1357// Steamroller 1358def : Proc<"bdver3", ProcessorFeatures.BdVer3Features, 1359 ProcessorFeatures.BdVer3Tuning>; 1360// Excavator 1361def : Proc<"bdver4", ProcessorFeatures.BdVer4Features, 1362 ProcessorFeatures.BdVer4Tuning>; 1363 1364def : ProcModel<"znver1", Znver1Model, ProcessorFeatures.ZNFeatures, 1365 ProcessorFeatures.ZNTuning>; 1366def : ProcModel<"znver2", Znver2Model, ProcessorFeatures.ZN2Features, 1367 ProcessorFeatures.ZN2Tuning>; 1368def : ProcModel<"znver3", Znver2Model, ProcessorFeatures.ZN3Features, 1369 ProcessorFeatures.ZN3Tuning>; 1370 1371def : Proc<"geode", [FeatureX87, FeatureCMPXCHG8B, Feature3DNowA], 1372 [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>; 1373 1374def : Proc<"winchip-c6", [FeatureX87, FeatureMMX], 1375 [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>; 1376def : Proc<"winchip2", [FeatureX87, Feature3DNow], 1377 [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>; 1378def : Proc<"c3", [FeatureX87, Feature3DNow], 1379 [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>; 1380def : Proc<"c3-2", [FeatureX87, FeatureCMPXCHG8B, FeatureMMX, 1381 FeatureSSE1, FeatureFXSR, FeatureCMOV], 1382 [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>; 1383 1384// We also provide a generic 64-bit specific x86 processor model which tries to 1385// be good for modern chips without enabling instruction set encodings past the 1386// basic SSE2 and 64-bit ones. It disables slow things from any mainstream and 1387// modern 64-bit x86 chip, and enables features that are generally beneficial. 1388// 1389// We currently use the Sandy Bridge model as the default scheduling model as 1390// we use it across Nehalem, Westmere, Sandy Bridge, and Ivy Bridge which 1391// covers a huge swath of x86 processors. If there are specific scheduling 1392// knobs which need to be tuned differently for AMD chips, we might consider 1393// forming a common base for them. 1394def : ProcModel<"x86-64", SandyBridgeModel, ProcessorFeatures.X86_64V1Features, 1395[ 1396 FeatureSlow3OpsLEA, 1397 FeatureSlowDivide64, 1398 FeatureSlowIncDec, 1399 FeatureMacroFusion, 1400 FeatureInsertVZEROUPPER 1401]>; 1402 1403// x86-64 micro-architecture levels. 1404def : ProcModel<"x86-64-v2", SandyBridgeModel, ProcessorFeatures.X86_64V2Features, 1405 ProcessorFeatures.SNBTuning>; 1406// Close to Haswell. 1407def : ProcModel<"x86-64-v3", HaswellModel, ProcessorFeatures.X86_64V3Features, 1408 ProcessorFeatures.HSWTuning>; 1409// Close to the AVX-512 level implemented by Xeon Scalable Processors. 1410def : ProcModel<"x86-64-v4", HaswellModel, ProcessorFeatures.X86_64V4Features, 1411 ProcessorFeatures.SKXTuning>; 1412 1413//===----------------------------------------------------------------------===// 1414// Calling Conventions 1415//===----------------------------------------------------------------------===// 1416 1417include "X86CallingConv.td" 1418 1419 1420//===----------------------------------------------------------------------===// 1421// Assembly Parser 1422//===----------------------------------------------------------------------===// 1423 1424def ATTAsmParserVariant : AsmParserVariant { 1425 int Variant = 0; 1426 1427 // Variant name. 1428 string Name = "att"; 1429 1430 // Discard comments in assembly strings. 1431 string CommentDelimiter = "#"; 1432 1433 // Recognize hard coded registers. 1434 string RegisterPrefix = "%"; 1435} 1436 1437def IntelAsmParserVariant : AsmParserVariant { 1438 int Variant = 1; 1439 1440 // Variant name. 1441 string Name = "intel"; 1442 1443 // Discard comments in assembly strings. 1444 string CommentDelimiter = ";"; 1445 1446 // Recognize hard coded registers. 1447 string RegisterPrefix = ""; 1448} 1449 1450//===----------------------------------------------------------------------===// 1451// Assembly Printers 1452//===----------------------------------------------------------------------===// 1453 1454// The X86 target supports two different syntaxes for emitting machine code. 1455// This is controlled by the -x86-asm-syntax={att|intel} 1456def ATTAsmWriter : AsmWriter { 1457 string AsmWriterClassName = "ATTInstPrinter"; 1458 int Variant = 0; 1459} 1460def IntelAsmWriter : AsmWriter { 1461 string AsmWriterClassName = "IntelInstPrinter"; 1462 int Variant = 1; 1463} 1464 1465def X86 : Target { 1466 // Information about the instructions... 1467 let InstructionSet = X86InstrInfo; 1468 let AssemblyParserVariants = [ATTAsmParserVariant, IntelAsmParserVariant]; 1469 let AssemblyWriters = [ATTAsmWriter, IntelAsmWriter]; 1470 let AllowRegisterRenaming = 1; 1471} 1472 1473//===----------------------------------------------------------------------===// 1474// Pfm Counters 1475//===----------------------------------------------------------------------===// 1476 1477include "X86PfmCounters.td" 1478