1//===-- X86.td - Target definition file for the Intel X86 --*- tablegen -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This is a target description file for the Intel i386 architecture, referred 10// to here as the "X86" architecture. 11// 12//===----------------------------------------------------------------------===// 13 14// Get the target-independent interfaces which we are implementing... 15// 16include "llvm/Target/Target.td" 17 18//===----------------------------------------------------------------------===// 19// X86 Subtarget state 20// 21 22def Mode64Bit : SubtargetFeature<"64bit-mode", "In64BitMode", "true", 23 "64-bit mode (x86_64)">; 24def Mode32Bit : SubtargetFeature<"32bit-mode", "In32BitMode", "true", 25 "32-bit mode (80386)">; 26def Mode16Bit : SubtargetFeature<"16bit-mode", "In16BitMode", "true", 27 "16-bit mode (i8086)">; 28 29//===----------------------------------------------------------------------===// 30// X86 Subtarget features 31//===----------------------------------------------------------------------===// 32 33def FeatureX87 : SubtargetFeature<"x87","HasX87", "true", 34 "Enable X87 float instructions">; 35 36def FeatureNOPL : SubtargetFeature<"nopl", "HasNOPL", "true", 37 "Enable NOPL instruction">; 38 39def FeatureCMOV : SubtargetFeature<"cmov","HasCMov", "true", 40 "Enable conditional move instructions">; 41 42def FeatureCMPXCHG8B : SubtargetFeature<"cx8", "HasCmpxchg8b", "true", 43 "Support CMPXCHG8B instructions">; 44 45def FeaturePOPCNT : SubtargetFeature<"popcnt", "HasPOPCNT", "true", 46 "Support POPCNT instruction">; 47 48def FeatureFXSR : SubtargetFeature<"fxsr", "HasFXSR", "true", 49 "Support fxsave/fxrestore instructions">; 50 51def FeatureXSAVE : SubtargetFeature<"xsave", "HasXSAVE", "true", 52 "Support xsave instructions">; 53 54def FeatureXSAVEOPT: SubtargetFeature<"xsaveopt", "HasXSAVEOPT", "true", 55 "Support xsaveopt instructions">; 56 57def FeatureXSAVEC : SubtargetFeature<"xsavec", "HasXSAVEC", "true", 58 "Support xsavec instructions">; 59 60def FeatureXSAVES : SubtargetFeature<"xsaves", "HasXSAVES", "true", 61 "Support xsaves instructions">; 62 63def FeatureSSE1 : SubtargetFeature<"sse", "X86SSELevel", "SSE1", 64 "Enable SSE instructions">; 65def FeatureSSE2 : SubtargetFeature<"sse2", "X86SSELevel", "SSE2", 66 "Enable SSE2 instructions", 67 [FeatureSSE1]>; 68def FeatureSSE3 : SubtargetFeature<"sse3", "X86SSELevel", "SSE3", 69 "Enable SSE3 instructions", 70 [FeatureSSE2]>; 71def FeatureSSSE3 : SubtargetFeature<"ssse3", "X86SSELevel", "SSSE3", 72 "Enable SSSE3 instructions", 73 [FeatureSSE3]>; 74def FeatureSSE41 : SubtargetFeature<"sse4.1", "X86SSELevel", "SSE41", 75 "Enable SSE 4.1 instructions", 76 [FeatureSSSE3]>; 77def FeatureSSE42 : SubtargetFeature<"sse4.2", "X86SSELevel", "SSE42", 78 "Enable SSE 4.2 instructions", 79 [FeatureSSE41]>; 80// The MMX subtarget feature is separate from the rest of the SSE features 81// because it's important (for odd compatibility reasons) to be able to 82// turn it off explicitly while allowing SSE+ to be on. 83def FeatureMMX : SubtargetFeature<"mmx","X863DNowLevel", "MMX", 84 "Enable MMX instructions">; 85def Feature3DNow : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow", 86 "Enable 3DNow! instructions", 87 [FeatureMMX]>; 88def Feature3DNowA : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA", 89 "Enable 3DNow! Athlon instructions", 90 [Feature3DNow]>; 91// All x86-64 hardware has SSE2, but we don't mark SSE2 as an implied 92// feature, because SSE2 can be disabled (e.g. for compiling OS kernels) 93// without disabling 64-bit mode. Nothing should imply this feature bit. It 94// is used to enforce that only 64-bit capable CPUs are used in 64-bit mode. 95def Feature64Bit : SubtargetFeature<"64bit", "HasX86_64", "true", 96 "Support 64-bit instructions">; 97def FeatureCMPXCHG16B : SubtargetFeature<"cx16", "HasCmpxchg16b", "true", 98 "64-bit with cmpxchg16b", 99 [FeatureCMPXCHG8B]>; 100def FeatureSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true", 101 "SHLD instruction is slow">; 102def FeatureSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true", 103 "PMULLD instruction is slow">; 104def FeatureSlowPMADDWD : SubtargetFeature<"slow-pmaddwd", "IsPMADDWDSlow", 105 "true", 106 "PMADDWD is slower than PMULLD">; 107// FIXME: This should not apply to CPUs that do not have SSE. 108def FeatureSlowUAMem16 : SubtargetFeature<"slow-unaligned-mem-16", 109 "IsUAMem16Slow", "true", 110 "Slow unaligned 16-byte memory access">; 111def FeatureSlowUAMem32 : SubtargetFeature<"slow-unaligned-mem-32", 112 "IsUAMem32Slow", "true", 113 "Slow unaligned 32-byte memory access">; 114def FeatureSSE4A : SubtargetFeature<"sse4a", "HasSSE4A", "true", 115 "Support SSE 4a instructions", 116 [FeatureSSE3]>; 117 118def FeatureAVX : SubtargetFeature<"avx", "X86SSELevel", "AVX", 119 "Enable AVX instructions", 120 [FeatureSSE42]>; 121def FeatureAVX2 : SubtargetFeature<"avx2", "X86SSELevel", "AVX2", 122 "Enable AVX2 instructions", 123 [FeatureAVX]>; 124def FeatureFMA : SubtargetFeature<"fma", "HasFMA", "true", 125 "Enable three-operand fused multiple-add", 126 [FeatureAVX]>; 127def FeatureF16C : SubtargetFeature<"f16c", "HasF16C", "true", 128 "Support 16-bit floating point conversion instructions", 129 [FeatureAVX]>; 130def FeatureAVX512 : SubtargetFeature<"avx512f", "X86SSELevel", "AVX512F", 131 "Enable AVX-512 instructions", 132 [FeatureAVX2, FeatureFMA, FeatureF16C]>; 133def FeatureERI : SubtargetFeature<"avx512er", "HasERI", "true", 134 "Enable AVX-512 Exponential and Reciprocal Instructions", 135 [FeatureAVX512]>; 136def FeatureCDI : SubtargetFeature<"avx512cd", "HasCDI", "true", 137 "Enable AVX-512 Conflict Detection Instructions", 138 [FeatureAVX512]>; 139def FeatureVPOPCNTDQ : SubtargetFeature<"avx512vpopcntdq", "HasVPOPCNTDQ", 140 "true", "Enable AVX-512 Population Count Instructions", 141 [FeatureAVX512]>; 142def FeaturePFI : SubtargetFeature<"avx512pf", "HasPFI", "true", 143 "Enable AVX-512 PreFetch Instructions", 144 [FeatureAVX512]>; 145def FeaturePREFETCHWT1 : SubtargetFeature<"prefetchwt1", "HasPREFETCHWT1", 146 "true", 147 "Prefetch with Intent to Write and T1 Hint">; 148def FeatureDQI : SubtargetFeature<"avx512dq", "HasDQI", "true", 149 "Enable AVX-512 Doubleword and Quadword Instructions", 150 [FeatureAVX512]>; 151def FeatureBWI : SubtargetFeature<"avx512bw", "HasBWI", "true", 152 "Enable AVX-512 Byte and Word Instructions", 153 [FeatureAVX512]>; 154def FeatureVLX : SubtargetFeature<"avx512vl", "HasVLX", "true", 155 "Enable AVX-512 Vector Length eXtensions", 156 [FeatureAVX512]>; 157def FeatureVBMI : SubtargetFeature<"avx512vbmi", "HasVBMI", "true", 158 "Enable AVX-512 Vector Byte Manipulation Instructions", 159 [FeatureBWI]>; 160def FeatureVBMI2 : SubtargetFeature<"avx512vbmi2", "HasVBMI2", "true", 161 "Enable AVX-512 further Vector Byte Manipulation Instructions", 162 [FeatureBWI]>; 163def FeatureIFMA : SubtargetFeature<"avx512ifma", "HasIFMA", "true", 164 "Enable AVX-512 Integer Fused Multiple-Add", 165 [FeatureAVX512]>; 166def FeaturePKU : SubtargetFeature<"pku", "HasPKU", "true", 167 "Enable protection keys">; 168def FeatureVNNI : SubtargetFeature<"avx512vnni", "HasVNNI", "true", 169 "Enable AVX-512 Vector Neural Network Instructions", 170 [FeatureAVX512]>; 171def FeatureBF16 : SubtargetFeature<"avx512bf16", "HasBF16", "true", 172 "Support bfloat16 floating point", 173 [FeatureBWI]>; 174def FeatureBITALG : SubtargetFeature<"avx512bitalg", "HasBITALG", "true", 175 "Enable AVX-512 Bit Algorithms", 176 [FeatureBWI]>; 177def FeatureVP2INTERSECT : SubtargetFeature<"avx512vp2intersect", 178 "HasVP2INTERSECT", "true", 179 "Enable AVX-512 vp2intersect", 180 [FeatureAVX512]>; 181def FeaturePCLMUL : SubtargetFeature<"pclmul", "HasPCLMUL", "true", 182 "Enable packed carry-less multiplication instructions", 183 [FeatureSSE2]>; 184def FeatureGFNI : SubtargetFeature<"gfni", "HasGFNI", "true", 185 "Enable Galois Field Arithmetic Instructions", 186 [FeatureSSE2]>; 187def FeatureVPCLMULQDQ : SubtargetFeature<"vpclmulqdq", "HasVPCLMULQDQ", "true", 188 "Enable vpclmulqdq instructions", 189 [FeatureAVX, FeaturePCLMUL]>; 190def FeatureFMA4 : SubtargetFeature<"fma4", "HasFMA4", "true", 191 "Enable four-operand fused multiple-add", 192 [FeatureAVX, FeatureSSE4A]>; 193def FeatureXOP : SubtargetFeature<"xop", "HasXOP", "true", 194 "Enable XOP instructions", 195 [FeatureFMA4]>; 196def FeatureSSEUnalignedMem : SubtargetFeature<"sse-unaligned-mem", 197 "HasSSEUnalignedMem", "true", 198 "Allow unaligned memory operands with SSE instructions">; 199def FeatureAES : SubtargetFeature<"aes", "HasAES", "true", 200 "Enable AES instructions", 201 [FeatureSSE2]>; 202def FeatureVAES : SubtargetFeature<"vaes", "HasVAES", "true", 203 "Promote selected AES instructions to AVX512/AVX registers", 204 [FeatureAVX, FeatureAES]>; 205def FeatureTBM : SubtargetFeature<"tbm", "HasTBM", "true", 206 "Enable TBM instructions">; 207def FeatureLWP : SubtargetFeature<"lwp", "HasLWP", "true", 208 "Enable LWP instructions">; 209def FeatureMOVBE : SubtargetFeature<"movbe", "HasMOVBE", "true", 210 "Support MOVBE instruction">; 211def FeatureRDRAND : SubtargetFeature<"rdrnd", "HasRDRAND", "true", 212 "Support RDRAND instruction">; 213def FeatureFSGSBase : SubtargetFeature<"fsgsbase", "HasFSGSBase", "true", 214 "Support FS/GS Base instructions">; 215def FeatureLZCNT : SubtargetFeature<"lzcnt", "HasLZCNT", "true", 216 "Support LZCNT instruction">; 217def FeatureBMI : SubtargetFeature<"bmi", "HasBMI", "true", 218 "Support BMI instructions">; 219def FeatureBMI2 : SubtargetFeature<"bmi2", "HasBMI2", "true", 220 "Support BMI2 instructions">; 221def FeatureRTM : SubtargetFeature<"rtm", "HasRTM", "true", 222 "Support RTM instructions">; 223def FeatureADX : SubtargetFeature<"adx", "HasADX", "true", 224 "Support ADX instructions">; 225def FeatureSHA : SubtargetFeature<"sha", "HasSHA", "true", 226 "Enable SHA instructions", 227 [FeatureSSE2]>; 228def FeatureSHSTK : SubtargetFeature<"shstk", "HasSHSTK", "true", 229 "Support CET Shadow-Stack instructions">; 230def FeaturePRFCHW : SubtargetFeature<"prfchw", "HasPRFCHW", "true", 231 "Support PRFCHW instructions">; 232def FeatureRDSEED : SubtargetFeature<"rdseed", "HasRDSEED", "true", 233 "Support RDSEED instruction">; 234def FeatureLAHFSAHF : SubtargetFeature<"sahf", "HasLAHFSAHF", "true", 235 "Support LAHF and SAHF instructions">; 236def FeatureMWAITX : SubtargetFeature<"mwaitx", "HasMWAITX", "true", 237 "Enable MONITORX/MWAITX timer functionality">; 238def FeatureCLZERO : SubtargetFeature<"clzero", "HasCLZERO", "true", 239 "Enable Cache Line Zero">; 240def FeatureCLDEMOTE : SubtargetFeature<"cldemote", "HasCLDEMOTE", "true", 241 "Enable Cache Demote">; 242def FeaturePTWRITE : SubtargetFeature<"ptwrite", "HasPTWRITE", "true", 243 "Support ptwrite instruction">; 244// FIXME: This feature is deprecated in 10.0 and should not be used for 245// anything, but removing it would break IR files that may contain it in a 246// target-feature attribute. 247def FeatureDeprecatedMPX : SubtargetFeature<"mpx", "DeprecatedHasMPX", "false", 248 "Deprecated. Support MPX instructions">; 249def FeatureLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true", 250 "Use LEA for adjusting the stack pointer">; 251def FeatureSlowDivide32 : SubtargetFeature<"idivl-to-divb", 252 "HasSlowDivide32", "true", 253 "Use 8-bit divide for positive values less than 256">; 254def FeatureSlowDivide64 : SubtargetFeature<"idivq-to-divl", 255 "HasSlowDivide64", "true", 256 "Use 32-bit divide for positive values less than 2^32">; 257def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions", 258 "PadShortFunctions", "true", 259 "Pad short functions">; 260def FeatureINVPCID : SubtargetFeature<"invpcid", "HasINVPCID", "true", 261 "Invalidate Process-Context Identifier">; 262def FeatureSGX : SubtargetFeature<"sgx", "HasSGX", "true", 263 "Enable Software Guard Extensions">; 264def FeatureCLFLUSHOPT : SubtargetFeature<"clflushopt", "HasCLFLUSHOPT", "true", 265 "Flush A Cache Line Optimized">; 266def FeatureCLWB : SubtargetFeature<"clwb", "HasCLWB", "true", 267 "Cache Line Write Back">; 268def FeatureWBNOINVD : SubtargetFeature<"wbnoinvd", "HasWBNOINVD", "true", 269 "Write Back No Invalidate">; 270def FeatureRDPID : SubtargetFeature<"rdpid", "HasRDPID", "true", 271 "Support RDPID instructions">; 272def FeatureWAITPKG : SubtargetFeature<"waitpkg", "HasWAITPKG", "true", 273 "Wait and pause enhancements">; 274def FeatureENQCMD : SubtargetFeature<"enqcmd", "HasENQCMD", "true", 275 "Has ENQCMD instructions">; 276// On some processors, instructions that implicitly take two memory operands are 277// slow. In practice, this means that CALL, PUSH, and POP with memory operands 278// should be avoided in favor of a MOV + register CALL/PUSH/POP. 279def FeatureSlowTwoMemOps : SubtargetFeature<"slow-two-mem-ops", 280 "SlowTwoMemOps", "true", 281 "Two memory operand instructions are slow">; 282def FeatureLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LEAUsesAG", "true", 283 "LEA instruction needs inputs at AG stage">; 284def FeatureSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true", 285 "LEA instruction with certain arguments is slow">; 286def FeatureSlow3OpsLEA : SubtargetFeature<"slow-3ops-lea", "Slow3OpsLEA", "true", 287 "LEA instruction with 3 ops or certain registers is slow">; 288def FeatureSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true", 289 "INC and DEC instructions are slower than ADD and SUB">; 290def FeatureSoftFloat 291 : SubtargetFeature<"soft-float", "UseSoftFloat", "true", 292 "Use software floating point features">; 293def FeaturePOPCNTFalseDeps : SubtargetFeature<"false-deps-popcnt", 294 "HasPOPCNTFalseDeps", "true", 295 "POPCNT has a false dependency on dest register">; 296def FeatureLZCNTFalseDeps : SubtargetFeature<"false-deps-lzcnt-tzcnt", 297 "HasLZCNTFalseDeps", "true", 298 "LZCNT/TZCNT have a false dependency on dest register">; 299def FeaturePCONFIG : SubtargetFeature<"pconfig", "HasPCONFIG", "true", 300 "platform configuration instruction">; 301// On recent X86 (port bound) processors, its preferable to combine to a single shuffle 302// using a variable mask over multiple fixed shuffles. 303def FeatureFastVariableShuffle 304 : SubtargetFeature<"fast-variable-shuffle", 305 "HasFastVariableShuffle", 306 "true", "Shuffles with variable masks are fast">; 307// On some X86 processors, a vzeroupper instruction should be inserted after 308// using ymm/zmm registers before executing code that may use SSE instructions. 309def FeatureInsertVZEROUPPER 310 : SubtargetFeature<"vzeroupper", 311 "InsertVZEROUPPER", 312 "true", "Should insert vzeroupper instructions">; 313// FeatureFastScalarFSQRT should be enabled if scalar FSQRT has shorter latency 314// than the corresponding NR code. FeatureFastVectorFSQRT should be enabled if 315// vector FSQRT has higher throughput than the corresponding NR code. 316// The idea is that throughput bound code is likely to be vectorized, so for 317// vectorized code we should care about the throughput of SQRT operations. 318// But if the code is scalar that probably means that the code has some kind of 319// dependency and we should care more about reducing the latency. 320def FeatureFastScalarFSQRT 321 : SubtargetFeature<"fast-scalar-fsqrt", "HasFastScalarFSQRT", 322 "true", "Scalar SQRT is fast (disable Newton-Raphson)">; 323def FeatureFastVectorFSQRT 324 : SubtargetFeature<"fast-vector-fsqrt", "HasFastVectorFSQRT", 325 "true", "Vector SQRT is fast (disable Newton-Raphson)">; 326// If lzcnt has equivalent latency/throughput to most simple integer ops, it can 327// be used to replace test/set sequences. 328def FeatureFastLZCNT 329 : SubtargetFeature< 330 "fast-lzcnt", "HasFastLZCNT", "true", 331 "LZCNT instructions are as fast as most simple integer ops">; 332// If the target can efficiently decode NOPs upto 11-bytes in length. 333def FeatureFast11ByteNOP 334 : SubtargetFeature< 335 "fast-11bytenop", "HasFast11ByteNOP", "true", 336 "Target can quickly decode up to 11 byte NOPs">; 337// If the target can efficiently decode NOPs upto 15-bytes in length. 338def FeatureFast15ByteNOP 339 : SubtargetFeature< 340 "fast-15bytenop", "HasFast15ByteNOP", "true", 341 "Target can quickly decode up to 15 byte NOPs">; 342// Sandy Bridge and newer processors can use SHLD with the same source on both 343// inputs to implement rotate to avoid the partial flag update of the normal 344// rotate instructions. 345def FeatureFastSHLDRotate 346 : SubtargetFeature< 347 "fast-shld-rotate", "HasFastSHLDRotate", "true", 348 "SHLD can be used as a faster rotate">; 349 350// Ivy Bridge and newer processors have enhanced REP MOVSB and STOSB (aka 351// "string operations"). See "REP String Enhancement" in the Intel Software 352// Development Manual. This feature essentially means that REP MOVSB will copy 353// using the largest available size instead of copying bytes one by one, making 354// it at least as fast as REPMOVS{W,D,Q}. 355def FeatureERMSB 356 : SubtargetFeature< 357 "ermsb", "HasERMSB", "true", 358 "REP MOVS/STOS are fast">; 359 360// Bulldozer and newer processors can merge CMP/TEST (but not other 361// instructions) with conditional branches. 362def FeatureBranchFusion 363 : SubtargetFeature<"branchfusion", "HasBranchFusion", "true", 364 "CMP/TEST can be fused with conditional branches">; 365 366// Sandy Bridge and newer processors have many instructions that can be 367// fused with conditional branches and pass through the CPU as a single 368// operation. 369def FeatureMacroFusion 370 : SubtargetFeature<"macrofusion", "HasMacroFusion", "true", 371 "Various instructions can be fused with conditional branches">; 372 373// Gather is available since Haswell (AVX2 set). So technically, we can 374// generate Gathers on all AVX2 processors. But the overhead on HSW is high. 375// Skylake Client processor has faster Gathers than HSW and performance is 376// similar to Skylake Server (AVX-512). 377def FeatureHasFastGather 378 : SubtargetFeature<"fast-gather", "HasFastGather", "true", 379 "Indicates if gather is reasonably fast">; 380 381def FeaturePrefer128Bit 382 : SubtargetFeature<"prefer-128-bit", "Prefer128Bit", "true", 383 "Prefer 128-bit AVX instructions">; 384 385def FeaturePrefer256Bit 386 : SubtargetFeature<"prefer-256-bit", "Prefer256Bit", "true", 387 "Prefer 256-bit AVX instructions">; 388 389def FeaturePreferMaskRegisters 390 : SubtargetFeature<"prefer-mask-registers", "PreferMaskRegisters", "true", 391 "Prefer AVX512 mask registers over PTEST/MOVMSK">; 392 393// Lower indirect calls using a special construct called a `retpoline` to 394// mitigate potential Spectre v2 attacks against them. 395def FeatureRetpolineIndirectCalls 396 : SubtargetFeature< 397 "retpoline-indirect-calls", "UseRetpolineIndirectCalls", "true", 398 "Remove speculation of indirect calls from the generated code">; 399 400// Lower indirect branches and switches either using conditional branch trees 401// or using a special construct called a `retpoline` to mitigate potential 402// Spectre v2 attacks against them. 403def FeatureRetpolineIndirectBranches 404 : SubtargetFeature< 405 "retpoline-indirect-branches", "UseRetpolineIndirectBranches", "true", 406 "Remove speculation of indirect branches from the generated code">; 407 408// Deprecated umbrella feature for enabling both `retpoline-indirect-calls` and 409// `retpoline-indirect-branches` above. 410def FeatureRetpoline 411 : SubtargetFeature<"retpoline", "DeprecatedUseRetpoline", "true", 412 "Remove speculation of indirect branches from the " 413 "generated code, either by avoiding them entirely or " 414 "lowering them with a speculation blocking construct", 415 [FeatureRetpolineIndirectCalls, 416 FeatureRetpolineIndirectBranches]>; 417 418// Rely on external thunks for the emitted retpoline calls. This allows users 419// to provide their own custom thunk definitions in highly specialized 420// environments such as a kernel that does boot-time hot patching. 421def FeatureRetpolineExternalThunk 422 : SubtargetFeature< 423 "retpoline-external-thunk", "UseRetpolineExternalThunk", "true", 424 "When lowering an indirect call or branch using a `retpoline`, rely " 425 "on the specified user provided thunk rather than emitting one " 426 "ourselves. Only has effect when combined with some other retpoline " 427 "feature", [FeatureRetpolineIndirectCalls]>; 428 429// Mitigate LVI attacks against indirect calls/branches and call returns 430def FeatureLVIControlFlowIntegrity 431 : SubtargetFeature< 432 "lvi-cfi", "UseLVIControlFlowIntegrity", "true", 433 "Prevent indirect calls/branches from using a memory operand, and " 434 "precede all indirect calls/branches from a register with an " 435 "LFENCE instruction to serialize control flow. Also decompose RET " 436 "instructions into a POP+LFENCE+JMP sequence.">; 437 438// Mitigate LVI attacks against data loads 439def FeatureLVILoadHardening 440 : SubtargetFeature< 441 "lvi-load-hardening", "UseLVILoadHardening", "true", 442 "Insert LFENCE instructions to prevent data speculatively injected " 443 "into loads from being used maliciously.">; 444 445// Direct Move instructions. 446def FeatureMOVDIRI : SubtargetFeature<"movdiri", "HasMOVDIRI", "true", 447 "Support movdiri instruction">; 448def FeatureMOVDIR64B : SubtargetFeature<"movdir64b", "HasMOVDIR64B", "true", 449 "Support movdir64b instruction">; 450 451def FeatureFastBEXTR : SubtargetFeature<"fast-bextr", "HasFastBEXTR", "true", 452 "Indicates that the BEXTR instruction is implemented as a single uop " 453 "with good throughput">; 454 455// Combine vector math operations with shuffles into horizontal math 456// instructions if a CPU implements horizontal operations (introduced with 457// SSE3) with better latency/throughput than the alternative sequence. 458def FeatureFastHorizontalOps 459 : SubtargetFeature< 460 "fast-hops", "HasFastHorizontalOps", "true", 461 "Prefer horizontal vector math instructions (haddp, phsub, etc.) over " 462 "normal vector instructions with shuffles">; 463 464def FeatureFastScalarShiftMasks 465 : SubtargetFeature< 466 "fast-scalar-shift-masks", "HasFastScalarShiftMasks", "true", 467 "Prefer a left/right scalar logical shift pair over a shift+and pair">; 468 469def FeatureFastVectorShiftMasks 470 : SubtargetFeature< 471 "fast-vector-shift-masks", "HasFastVectorShiftMasks", "true", 472 "Prefer a left/right vector logical shift pair over a shift+and pair">; 473 474def FeatureUseGLMDivSqrtCosts 475 : SubtargetFeature<"use-glm-div-sqrt-costs", "UseGLMDivSqrtCosts", "true", 476 "Use Goldmont specific floating point div/sqrt costs">; 477 478// Merge branches using three-way conditional code. 479def FeatureMergeToThreeWayBranch : SubtargetFeature<"merge-to-threeway-branch", 480 "ThreewayBranchProfitable", "true", 481 "Merge branches to a three-way " 482 "conditional branch">; 483 484// Enable use of alias analysis during code generation. 485def FeatureUseAA : SubtargetFeature<"use-aa", "UseAA", "true", 486 "Use alias analysis during codegen">; 487 488// Bonnell 489def ProcIntelAtom : SubtargetFeature<"", "X86ProcFamily", "IntelAtom", "">; 490// Silvermont 491def ProcIntelSLM : SubtargetFeature<"", "X86ProcFamily", "IntelSLM", "">; 492 493//===----------------------------------------------------------------------===// 494// Register File Description 495//===----------------------------------------------------------------------===// 496 497include "X86RegisterInfo.td" 498include "X86RegisterBanks.td" 499 500//===----------------------------------------------------------------------===// 501// Instruction Descriptions 502//===----------------------------------------------------------------------===// 503 504include "X86Schedule.td" 505include "X86InstrInfo.td" 506include "X86SchedPredicates.td" 507 508def X86InstrInfo : InstrInfo; 509 510//===----------------------------------------------------------------------===// 511// X86 Scheduler Models 512//===----------------------------------------------------------------------===// 513 514include "X86ScheduleAtom.td" 515include "X86SchedSandyBridge.td" 516include "X86SchedHaswell.td" 517include "X86SchedBroadwell.td" 518include "X86ScheduleSLM.td" 519include "X86ScheduleZnver1.td" 520include "X86ScheduleZnver2.td" 521include "X86ScheduleBdVer2.td" 522include "X86ScheduleBtVer2.td" 523include "X86SchedSkylakeClient.td" 524include "X86SchedSkylakeServer.td" 525 526//===----------------------------------------------------------------------===// 527// X86 Processor Feature Lists 528//===----------------------------------------------------------------------===// 529 530def ProcessorFeatures { 531 // Nehalem 532 list<SubtargetFeature> NHMInheritableFeatures = [FeatureX87, 533 FeatureCMPXCHG8B, 534 FeatureCMOV, 535 FeatureMMX, 536 FeatureSSE42, 537 FeatureFXSR, 538 FeatureNOPL, 539 Feature64Bit, 540 FeatureCMPXCHG16B, 541 FeaturePOPCNT, 542 FeatureLAHFSAHF, 543 FeatureMacroFusion, 544 FeatureInsertVZEROUPPER]; 545 list<SubtargetFeature> NHMSpecificFeatures = []; 546 list<SubtargetFeature> NHMFeatures = 547 !listconcat(NHMInheritableFeatures, NHMSpecificFeatures); 548 549 // Westmere 550 list<SubtargetFeature> WSMAdditionalFeatures = [FeaturePCLMUL]; 551 list<SubtargetFeature> WSMSpecificFeatures = []; 552 list<SubtargetFeature> WSMInheritableFeatures = 553 !listconcat(NHMInheritableFeatures, WSMAdditionalFeatures); 554 list<SubtargetFeature> WSMFeatures = 555 !listconcat(WSMInheritableFeatures, WSMSpecificFeatures); 556 557 // Sandybridge 558 list<SubtargetFeature> SNBAdditionalFeatures = [FeatureAVX, 559 FeatureSlowDivide64, 560 FeatureXSAVE, 561 FeatureXSAVEOPT, 562 FeatureSlow3OpsLEA, 563 FeatureFastScalarFSQRT, 564 FeatureFastSHLDRotate, 565 FeatureMergeToThreeWayBranch]; 566 list<SubtargetFeature> SNBSpecificFeatures = [FeatureSlowUAMem32, 567 FeaturePOPCNTFalseDeps]; 568 list<SubtargetFeature> SNBInheritableFeatures = 569 !listconcat(WSMInheritableFeatures, SNBAdditionalFeatures); 570 list<SubtargetFeature> SNBFeatures = 571 !listconcat(SNBInheritableFeatures, SNBSpecificFeatures); 572 573 // Ivybridge 574 list<SubtargetFeature> IVBAdditionalFeatures = [FeatureRDRAND, 575 FeatureF16C, 576 FeatureFSGSBase]; 577 list<SubtargetFeature> IVBSpecificFeatures = [FeatureSlowUAMem32, 578 FeaturePOPCNTFalseDeps]; 579 list<SubtargetFeature> IVBInheritableFeatures = 580 !listconcat(SNBInheritableFeatures, IVBAdditionalFeatures); 581 list<SubtargetFeature> IVBFeatures = 582 !listconcat(IVBInheritableFeatures, IVBSpecificFeatures); 583 584 // Haswell 585 list<SubtargetFeature> HSWAdditionalFeatures = [FeatureAVX2, 586 FeatureBMI, 587 FeatureBMI2, 588 FeatureERMSB, 589 FeatureFMA, 590 FeatureINVPCID, 591 FeatureLZCNT, 592 FeatureMOVBE, 593 FeatureFastVariableShuffle]; 594 list<SubtargetFeature> HSWSpecificFeatures = [FeaturePOPCNTFalseDeps, 595 FeatureLZCNTFalseDeps]; 596 list<SubtargetFeature> HSWInheritableFeatures = 597 !listconcat(IVBInheritableFeatures, HSWAdditionalFeatures); 598 list<SubtargetFeature> HSWFeatures = 599 !listconcat(HSWInheritableFeatures, HSWSpecificFeatures); 600 601 // Broadwell 602 list<SubtargetFeature> BDWAdditionalFeatures = [FeatureADX, 603 FeatureRDSEED, 604 FeaturePRFCHW]; 605 list<SubtargetFeature> BDWSpecificFeatures = [FeaturePOPCNTFalseDeps, 606 FeatureLZCNTFalseDeps]; 607 list<SubtargetFeature> BDWInheritableFeatures = 608 !listconcat(HSWInheritableFeatures, BDWAdditionalFeatures); 609 list<SubtargetFeature> BDWFeatures = 610 !listconcat(BDWInheritableFeatures, BDWSpecificFeatures); 611 612 // Skylake 613 list<SubtargetFeature> SKLAdditionalFeatures = [FeatureAES, 614 FeatureXSAVEC, 615 FeatureXSAVES, 616 FeatureCLFLUSHOPT, 617 FeatureFastVectorFSQRT]; 618 list<SubtargetFeature> SKLSpecificFeatures = [FeatureHasFastGather, 619 FeaturePOPCNTFalseDeps, 620 FeatureSGX]; 621 list<SubtargetFeature> SKLInheritableFeatures = 622 !listconcat(BDWInheritableFeatures, SKLAdditionalFeatures); 623 list<SubtargetFeature> SKLFeatures = 624 !listconcat(SKLInheritableFeatures, SKLSpecificFeatures); 625 626 // Skylake-AVX512 627 list<SubtargetFeature> SKXAdditionalFeatures = [FeatureAVX512, 628 FeaturePrefer256Bit, 629 FeatureCDI, 630 FeatureDQI, 631 FeatureBWI, 632 FeatureVLX, 633 FeaturePKU, 634 FeatureCLWB]; 635 list<SubtargetFeature> SKXSpecificFeatures = [FeatureHasFastGather, 636 FeaturePOPCNTFalseDeps]; 637 list<SubtargetFeature> SKXInheritableFeatures = 638 !listconcat(SKLInheritableFeatures, SKXAdditionalFeatures); 639 list<SubtargetFeature> SKXFeatures = 640 !listconcat(SKXInheritableFeatures, SKXSpecificFeatures); 641 642 // Cascadelake 643 list<SubtargetFeature> CLXAdditionalFeatures = [FeatureVNNI]; 644 list<SubtargetFeature> CLXSpecificFeatures = [FeatureHasFastGather, 645 FeaturePOPCNTFalseDeps]; 646 list<SubtargetFeature> CLXInheritableFeatures = 647 !listconcat(SKXInheritableFeatures, CLXAdditionalFeatures); 648 list<SubtargetFeature> CLXFeatures = 649 !listconcat(CLXInheritableFeatures, CLXSpecificFeatures); 650 651 // Cooperlake 652 list<SubtargetFeature> CPXAdditionalFeatures = [FeatureBF16]; 653 list<SubtargetFeature> CPXSpecificFeatures = [FeatureHasFastGather, 654 FeaturePOPCNTFalseDeps]; 655 list<SubtargetFeature> CPXInheritableFeatures = 656 !listconcat(CLXInheritableFeatures, CPXAdditionalFeatures); 657 list<SubtargetFeature> CPXFeatures = 658 !listconcat(CPXInheritableFeatures, CPXSpecificFeatures); 659 660 // Cannonlake 661 list<SubtargetFeature> CNLAdditionalFeatures = [FeatureAVX512, 662 FeaturePrefer256Bit, 663 FeatureCDI, 664 FeatureDQI, 665 FeatureBWI, 666 FeatureVLX, 667 FeaturePKU, 668 FeatureVBMI, 669 FeatureIFMA, 670 FeatureSHA, 671 FeatureSGX]; 672 list<SubtargetFeature> CNLSpecificFeatures = [FeatureHasFastGather]; 673 list<SubtargetFeature> CNLInheritableFeatures = 674 !listconcat(SKLInheritableFeatures, CNLAdditionalFeatures); 675 list<SubtargetFeature> CNLFeatures = 676 !listconcat(CNLInheritableFeatures, CNLSpecificFeatures); 677 678 // Icelake 679 list<SubtargetFeature> ICLAdditionalFeatures = [FeatureBITALG, 680 FeatureVAES, 681 FeatureVBMI2, 682 FeatureVNNI, 683 FeatureVPCLMULQDQ, 684 FeatureVPOPCNTDQ, 685 FeatureGFNI, 686 FeatureCLWB, 687 FeatureRDPID]; 688 list<SubtargetFeature> ICLSpecificFeatures = [FeatureHasFastGather]; 689 list<SubtargetFeature> ICLInheritableFeatures = 690 !listconcat(CNLInheritableFeatures, ICLAdditionalFeatures); 691 list<SubtargetFeature> ICLFeatures = 692 !listconcat(ICLInheritableFeatures, ICLSpecificFeatures); 693 694 // Icelake Server 695 list<SubtargetFeature> ICXSpecificFeatures = [FeaturePCONFIG, 696 FeatureWBNOINVD, 697 FeatureHasFastGather]; 698 list<SubtargetFeature> ICXFeatures = 699 !listconcat(ICLInheritableFeatures, ICXSpecificFeatures); 700 701 //Tigerlake 702 list<SubtargetFeature> TGLAdditionalFeatures = [FeatureVP2INTERSECT, 703 FeatureMOVDIRI, 704 FeatureMOVDIR64B, 705 FeatureSHSTK]; 706 list<SubtargetFeature> TGLSpecificFeatures = [FeatureHasFastGather]; 707 list<SubtargetFeature> TGLInheritableFeatures = 708 !listconcat(TGLAdditionalFeatures ,TGLSpecificFeatures); 709 list<SubtargetFeature> TGLFeatures = 710 !listconcat(ICLFeatures, TGLInheritableFeatures ); 711 712 // Atom 713 list<SubtargetFeature> AtomInheritableFeatures = [FeatureX87, 714 FeatureCMPXCHG8B, 715 FeatureCMOV, 716 FeatureMMX, 717 FeatureSSSE3, 718 FeatureFXSR, 719 FeatureNOPL, 720 Feature64Bit, 721 FeatureCMPXCHG16B, 722 FeatureMOVBE, 723 FeatureSlowTwoMemOps, 724 FeatureLAHFSAHF, 725 FeatureInsertVZEROUPPER]; 726 list<SubtargetFeature> AtomSpecificFeatures = [ProcIntelAtom, 727 FeatureSlowUAMem16, 728 FeatureLEAForSP, 729 FeatureSlowDivide32, 730 FeatureSlowDivide64, 731 FeatureLEAUsesAG, 732 FeaturePadShortFunctions]; 733 list<SubtargetFeature> AtomFeatures = 734 !listconcat(AtomInheritableFeatures, AtomSpecificFeatures); 735 736 // Silvermont 737 list<SubtargetFeature> SLMAdditionalFeatures = [FeatureSSE42, 738 FeaturePOPCNT, 739 FeaturePCLMUL, 740 FeaturePRFCHW, 741 FeatureSlowLEA, 742 FeatureSlowIncDec, 743 FeatureRDRAND]; 744 list<SubtargetFeature> SLMSpecificFeatures = [ProcIntelSLM, 745 FeatureSlowDivide64, 746 FeatureSlowPMULLD, 747 FeaturePOPCNTFalseDeps]; 748 list<SubtargetFeature> SLMInheritableFeatures = 749 !listconcat(AtomInheritableFeatures, SLMAdditionalFeatures); 750 list<SubtargetFeature> SLMFeatures = 751 !listconcat(SLMInheritableFeatures, SLMSpecificFeatures); 752 753 // Goldmont 754 list<SubtargetFeature> GLMAdditionalFeatures = [FeatureAES, 755 FeatureSHA, 756 FeatureRDSEED, 757 FeatureXSAVE, 758 FeatureXSAVEOPT, 759 FeatureXSAVEC, 760 FeatureXSAVES, 761 FeatureCLFLUSHOPT, 762 FeatureFSGSBase]; 763 list<SubtargetFeature> GLMSpecificFeatures = [FeatureUseGLMDivSqrtCosts, 764 FeaturePOPCNTFalseDeps]; 765 list<SubtargetFeature> GLMInheritableFeatures = 766 !listconcat(SLMInheritableFeatures, GLMAdditionalFeatures); 767 list<SubtargetFeature> GLMFeatures = 768 !listconcat(GLMInheritableFeatures, GLMSpecificFeatures); 769 770 // Goldmont Plus 771 list<SubtargetFeature> GLPAdditionalFeatures = [FeaturePTWRITE, 772 FeatureRDPID, 773 FeatureSGX]; 774 list<SubtargetFeature> GLPSpecificFeatures = [FeatureUseGLMDivSqrtCosts]; 775 list<SubtargetFeature> GLPInheritableFeatures = 776 !listconcat(GLMInheritableFeatures, GLPAdditionalFeatures); 777 list<SubtargetFeature> GLPFeatures = 778 !listconcat(GLPInheritableFeatures, GLPSpecificFeatures); 779 780 // Tremont 781 list<SubtargetFeature> TRMAdditionalFeatures = [FeatureCLDEMOTE, 782 FeatureGFNI, 783 FeatureMOVDIRI, 784 FeatureMOVDIR64B, 785 FeatureWAITPKG]; 786 list<SubtargetFeature> TRMSpecificFeatures = [FeatureUseGLMDivSqrtCosts]; 787 list<SubtargetFeature> TRMFeatures = 788 !listconcat(GLPInheritableFeatures, TRMAdditionalFeatures, 789 TRMSpecificFeatures); 790 791 // Knights Landing 792 list<SubtargetFeature> KNLFeatures = [FeatureX87, 793 FeatureCMPXCHG8B, 794 FeatureCMOV, 795 FeatureMMX, 796 FeatureFXSR, 797 FeatureNOPL, 798 Feature64Bit, 799 FeatureCMPXCHG16B, 800 FeaturePOPCNT, 801 FeatureSlowDivide64, 802 FeaturePCLMUL, 803 FeatureXSAVE, 804 FeatureXSAVEOPT, 805 FeatureLAHFSAHF, 806 FeatureSlow3OpsLEA, 807 FeatureSlowIncDec, 808 FeatureAES, 809 FeatureRDRAND, 810 FeatureF16C, 811 FeatureFSGSBase, 812 FeatureAVX512, 813 FeatureERI, 814 FeatureCDI, 815 FeaturePFI, 816 FeaturePREFETCHWT1, 817 FeatureADX, 818 FeatureRDSEED, 819 FeatureMOVBE, 820 FeatureLZCNT, 821 FeatureBMI, 822 FeatureBMI2, 823 FeatureFMA, 824 FeaturePRFCHW, 825 FeaturePreferMaskRegisters, 826 FeatureSlowTwoMemOps, 827 FeatureHasFastGather, 828 FeatureSlowPMADDWD]; 829 // TODO Add AVX5124FMAPS/AVX5124VNNIW features 830 list<SubtargetFeature> KNMFeatures = 831 !listconcat(KNLFeatures, [FeatureVPOPCNTDQ]); 832 833 // Barcelona 834 list<SubtargetFeature> BarcelonaInheritableFeatures = [FeatureX87, 835 FeatureCMPXCHG8B, 836 FeatureSSE4A, 837 Feature3DNowA, 838 FeatureFXSR, 839 FeatureNOPL, 840 FeatureCMPXCHG16B, 841 FeatureLZCNT, 842 FeaturePOPCNT, 843 FeatureSlowSHLD, 844 FeatureLAHFSAHF, 845 FeatureCMOV, 846 Feature64Bit, 847 FeatureFastScalarShiftMasks, 848 FeatureInsertVZEROUPPER]; 849 list<SubtargetFeature> BarcelonaFeatures = BarcelonaInheritableFeatures; 850 851 // Bobcat 852 list<SubtargetFeature> BtVer1InheritableFeatures = [FeatureX87, 853 FeatureCMPXCHG8B, 854 FeatureCMOV, 855 FeatureMMX, 856 FeatureSSSE3, 857 FeatureSSE4A, 858 FeatureFXSR, 859 FeatureNOPL, 860 Feature64Bit, 861 FeatureCMPXCHG16B, 862 FeaturePRFCHW, 863 FeatureLZCNT, 864 FeaturePOPCNT, 865 FeatureSlowSHLD, 866 FeatureLAHFSAHF, 867 FeatureFast15ByteNOP, 868 FeatureFastScalarShiftMasks, 869 FeatureFastVectorShiftMasks]; 870 list<SubtargetFeature> BtVer1SpecificFeatures = [FeatureInsertVZEROUPPER]; 871 list<SubtargetFeature> BtVer1Features = 872 !listconcat(BtVer1InheritableFeatures, BtVer1SpecificFeatures); 873 874 // Jaguar 875 list<SubtargetFeature> BtVer2AdditionalFeatures = [FeatureAVX, 876 FeatureAES, 877 FeaturePCLMUL, 878 FeatureBMI, 879 FeatureF16C, 880 FeatureMOVBE, 881 FeatureXSAVE, 882 FeatureXSAVEOPT]; 883 list<SubtargetFeature> BtVer2SpecificFeatures = [FeatureFastLZCNT, 884 FeatureFastBEXTR, 885 FeatureFastHorizontalOps]; 886 list<SubtargetFeature> BtVer2InheritableFeatures = 887 !listconcat(BtVer1InheritableFeatures, BtVer2AdditionalFeatures); 888 list<SubtargetFeature> BtVer2Features = 889 !listconcat(BtVer2InheritableFeatures, BtVer2SpecificFeatures); 890 891 // Bulldozer 892 list<SubtargetFeature> BdVer1InheritableFeatures = [FeatureX87, 893 FeatureCMPXCHG8B, 894 FeatureCMOV, 895 FeatureXOP, 896 Feature64Bit, 897 FeatureCMPXCHG16B, 898 FeatureAES, 899 FeaturePRFCHW, 900 FeaturePCLMUL, 901 FeatureMMX, 902 FeatureFXSR, 903 FeatureNOPL, 904 FeatureLZCNT, 905 FeaturePOPCNT, 906 FeatureXSAVE, 907 FeatureLWP, 908 FeatureSlowSHLD, 909 FeatureLAHFSAHF, 910 FeatureFast11ByteNOP, 911 FeatureFastScalarShiftMasks, 912 FeatureBranchFusion, 913 FeatureInsertVZEROUPPER]; 914 list<SubtargetFeature> BdVer1Features = BdVer1InheritableFeatures; 915 916 // PileDriver 917 list<SubtargetFeature> BdVer2AdditionalFeatures = [FeatureF16C, 918 FeatureBMI, 919 FeatureTBM, 920 FeatureFMA, 921 FeatureFastBEXTR]; 922 list<SubtargetFeature> BdVer2InheritableFeatures = 923 !listconcat(BdVer1InheritableFeatures, BdVer2AdditionalFeatures); 924 list<SubtargetFeature> BdVer2Features = BdVer2InheritableFeatures; 925 926 // Steamroller 927 list<SubtargetFeature> BdVer3AdditionalFeatures = [FeatureXSAVEOPT, 928 FeatureFSGSBase]; 929 list<SubtargetFeature> BdVer3InheritableFeatures = 930 !listconcat(BdVer2InheritableFeatures, BdVer3AdditionalFeatures); 931 list<SubtargetFeature> BdVer3Features = BdVer3InheritableFeatures; 932 933 // Excavator 934 list<SubtargetFeature> BdVer4AdditionalFeatures = [FeatureAVX2, 935 FeatureBMI2, 936 FeatureMWAITX]; 937 list<SubtargetFeature> BdVer4InheritableFeatures = 938 !listconcat(BdVer3InheritableFeatures, BdVer4AdditionalFeatures); 939 list<SubtargetFeature> BdVer4Features = BdVer4InheritableFeatures; 940 941 942 // AMD Zen Processors common ISAs 943 list<SubtargetFeature> ZNFeatures = [FeatureADX, 944 FeatureAES, 945 FeatureAVX2, 946 FeatureBMI, 947 FeatureBMI2, 948 FeatureCLFLUSHOPT, 949 FeatureCLZERO, 950 FeatureCMOV, 951 Feature64Bit, 952 FeatureCMPXCHG16B, 953 FeatureF16C, 954 FeatureFMA, 955 FeatureFSGSBase, 956 FeatureFXSR, 957 FeatureNOPL, 958 FeatureFastLZCNT, 959 FeatureLAHFSAHF, 960 FeatureLZCNT, 961 FeatureFastBEXTR, 962 FeatureFast15ByteNOP, 963 FeatureBranchFusion, 964 FeatureFastScalarShiftMasks, 965 FeatureMMX, 966 FeatureMOVBE, 967 FeatureMWAITX, 968 FeaturePCLMUL, 969 FeaturePOPCNT, 970 FeaturePRFCHW, 971 FeatureRDRAND, 972 FeatureRDSEED, 973 FeatureSHA, 974 FeatureSSE4A, 975 FeatureSlowSHLD, 976 FeatureInsertVZEROUPPER, 977 FeatureX87, 978 FeatureXSAVE, 979 FeatureXSAVEC, 980 FeatureXSAVEOPT, 981 FeatureXSAVES]; 982 list<SubtargetFeature> ZN2AdditionalFeatures = [FeatureCLWB, 983 FeatureRDPID, 984 FeatureWBNOINVD]; 985 list<SubtargetFeature> ZN2Features = 986 !listconcat(ZNFeatures, ZN2AdditionalFeatures); 987} 988 989//===----------------------------------------------------------------------===// 990// X86 processors supported. 991//===----------------------------------------------------------------------===// 992 993class Proc<string Name, list<SubtargetFeature> Features> 994 : ProcessorModel<Name, GenericModel, Features>; 995 996// NOTE: CMPXCHG8B is here for legacy compatbility so that it is only disabled 997// if i386/i486 is specifically requested. 998def : Proc<"generic", [FeatureX87, FeatureSlowUAMem16, 999 FeatureCMPXCHG8B, FeatureInsertVZEROUPPER]>; 1000def : Proc<"i386", [FeatureX87, FeatureSlowUAMem16, 1001 FeatureInsertVZEROUPPER]>; 1002def : Proc<"i486", [FeatureX87, FeatureSlowUAMem16, 1003 FeatureInsertVZEROUPPER]>; 1004def : Proc<"i586", [FeatureX87, FeatureSlowUAMem16, 1005 FeatureCMPXCHG8B, FeatureInsertVZEROUPPER]>; 1006def : Proc<"pentium", [FeatureX87, FeatureSlowUAMem16, 1007 FeatureCMPXCHG8B, FeatureInsertVZEROUPPER]>; 1008def : Proc<"pentium-mmx", [FeatureX87, FeatureSlowUAMem16, 1009 FeatureCMPXCHG8B, FeatureMMX, 1010 FeatureInsertVZEROUPPER]>; 1011 1012def : Proc<"i686", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, 1013 FeatureCMOV, FeatureInsertVZEROUPPER]>; 1014def : Proc<"pentiumpro", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, 1015 FeatureCMOV, FeatureNOPL, FeatureInsertVZEROUPPER]>; 1016 1017def : Proc<"pentium2", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, 1018 FeatureMMX, FeatureCMOV, FeatureFXSR, 1019 FeatureNOPL, FeatureInsertVZEROUPPER]>; 1020 1021foreach P = ["pentium3", "pentium3m"] in { 1022 def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,FeatureMMX, 1023 FeatureSSE1, FeatureFXSR, FeatureNOPL, FeatureCMOV, 1024 FeatureInsertVZEROUPPER]>; 1025} 1026 1027// Enable the PostRAScheduler for SSE2 and SSE3 class cpus. 1028// The intent is to enable it for pentium4 which is the current default 1029// processor in a vanilla 32-bit clang compilation when no specific 1030// architecture is specified. This generally gives a nice performance 1031// increase on silvermont, with largely neutral behavior on other 1032// contemporary large core processors. 1033// pentium-m, pentium4m, prescott and nocona are included as a preventative 1034// measure to avoid performance surprises, in case clang's default cpu 1035// changes slightly. 1036 1037def : ProcessorModel<"pentium-m", GenericPostRAModel, 1038 [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, 1039 FeatureMMX, FeatureSSE2, FeatureFXSR, FeatureNOPL, 1040 FeatureCMOV, FeatureInsertVZEROUPPER]>; 1041 1042foreach P = ["pentium4", "pentium4m"] in { 1043 def : ProcessorModel<P, GenericPostRAModel, 1044 [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, 1045 FeatureMMX, FeatureSSE2, FeatureFXSR, FeatureNOPL, 1046 FeatureCMOV, FeatureInsertVZEROUPPER]>; 1047} 1048 1049// Intel Quark. 1050def : Proc<"lakemont", [FeatureInsertVZEROUPPER]>; 1051 1052// Intel Core Duo. 1053def : ProcessorModel<"yonah", SandyBridgeModel, 1054 [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, 1055 FeatureMMX, FeatureSSE3, FeatureFXSR, FeatureNOPL, 1056 FeatureCMOV, FeatureInsertVZEROUPPER]>; 1057 1058// NetBurst. 1059def : ProcessorModel<"prescott", GenericPostRAModel, 1060 [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, 1061 FeatureMMX, FeatureSSE3, FeatureFXSR, FeatureNOPL, 1062 FeatureCMOV, FeatureInsertVZEROUPPER]>; 1063def : ProcessorModel<"nocona", GenericPostRAModel, [ 1064 FeatureX87, 1065 FeatureSlowUAMem16, 1066 FeatureCMPXCHG8B, 1067 FeatureCMOV, 1068 FeatureMMX, 1069 FeatureSSE3, 1070 FeatureFXSR, 1071 FeatureNOPL, 1072 Feature64Bit, 1073 FeatureCMPXCHG16B, 1074 FeatureInsertVZEROUPPER 1075]>; 1076 1077// Intel Core 2 Solo/Duo. 1078def : ProcessorModel<"core2", SandyBridgeModel, [ 1079 FeatureX87, 1080 FeatureSlowUAMem16, 1081 FeatureCMPXCHG8B, 1082 FeatureCMOV, 1083 FeatureMMX, 1084 FeatureSSSE3, 1085 FeatureFXSR, 1086 FeatureNOPL, 1087 Feature64Bit, 1088 FeatureCMPXCHG16B, 1089 FeatureLAHFSAHF, 1090 FeatureMacroFusion, 1091 FeatureInsertVZEROUPPER 1092]>; 1093def : ProcessorModel<"penryn", SandyBridgeModel, [ 1094 FeatureX87, 1095 FeatureSlowUAMem16, 1096 FeatureCMPXCHG8B, 1097 FeatureCMOV, 1098 FeatureMMX, 1099 FeatureSSE41, 1100 FeatureFXSR, 1101 FeatureNOPL, 1102 Feature64Bit, 1103 FeatureCMPXCHG16B, 1104 FeatureLAHFSAHF, 1105 FeatureMacroFusion, 1106 FeatureInsertVZEROUPPER 1107]>; 1108 1109// Atom CPUs. 1110foreach P = ["bonnell", "atom"] in { 1111 def : ProcessorModel<P, AtomModel, ProcessorFeatures.AtomFeatures>; 1112} 1113 1114foreach P = ["silvermont", "slm"] in { 1115 def : ProcessorModel<P, SLMModel, ProcessorFeatures.SLMFeatures>; 1116} 1117 1118def : ProcessorModel<"goldmont", SLMModel, ProcessorFeatures.GLMFeatures>; 1119def : ProcessorModel<"goldmont-plus", SLMModel, ProcessorFeatures.GLPFeatures>; 1120def : ProcessorModel<"tremont", SLMModel, ProcessorFeatures.TRMFeatures>; 1121 1122// "Arrandale" along with corei3 and corei5 1123foreach P = ["nehalem", "corei7"] in { 1124 def : ProcessorModel<P, SandyBridgeModel, ProcessorFeatures.NHMFeatures>; 1125} 1126 1127// Westmere is the corei3/i5/i7 path from nehalem to sandybridge 1128def : ProcessorModel<"westmere", SandyBridgeModel, 1129 ProcessorFeatures.WSMFeatures>; 1130 1131foreach P = ["sandybridge", "corei7-avx"] in { 1132 def : ProcessorModel<P, SandyBridgeModel, ProcessorFeatures.SNBFeatures>; 1133} 1134 1135foreach P = ["ivybridge", "core-avx-i"] in { 1136 def : ProcessorModel<P, SandyBridgeModel, ProcessorFeatures.IVBFeatures>; 1137} 1138 1139foreach P = ["haswell", "core-avx2"] in { 1140 def : ProcessorModel<P, HaswellModel, ProcessorFeatures.HSWFeatures>; 1141} 1142 1143def : ProcessorModel<"broadwell", BroadwellModel, 1144 ProcessorFeatures.BDWFeatures>; 1145 1146def : ProcessorModel<"skylake", SkylakeClientModel, 1147 ProcessorFeatures.SKLFeatures>; 1148 1149// FIXME: define KNL scheduler model 1150def : ProcessorModel<"knl", HaswellModel, ProcessorFeatures.KNLFeatures>; 1151def : ProcessorModel<"knm", HaswellModel, ProcessorFeatures.KNMFeatures>; 1152 1153foreach P = ["skylake-avx512", "skx"] in { 1154 def : ProcessorModel<P, SkylakeServerModel, ProcessorFeatures.SKXFeatures>; 1155} 1156 1157def : ProcessorModel<"cascadelake", SkylakeServerModel, 1158 ProcessorFeatures.CLXFeatures>; 1159def : ProcessorModel<"cooperlake", SkylakeServerModel, 1160 ProcessorFeatures.CPXFeatures>; 1161def : ProcessorModel<"cannonlake", SkylakeServerModel, 1162 ProcessorFeatures.CNLFeatures>; 1163def : ProcessorModel<"icelake-client", SkylakeServerModel, 1164 ProcessorFeatures.ICLFeatures>; 1165def : ProcessorModel<"icelake-server", SkylakeServerModel, 1166 ProcessorFeatures.ICXFeatures>; 1167def : ProcessorModel<"tigerlake", SkylakeServerModel, 1168 ProcessorFeatures.TGLFeatures>; 1169 1170// AMD CPUs. 1171 1172def : Proc<"k6", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, 1173 FeatureMMX, FeatureInsertVZEROUPPER]>; 1174def : Proc<"k6-2", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, 1175 Feature3DNow, FeatureInsertVZEROUPPER]>; 1176def : Proc<"k6-3", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, 1177 Feature3DNow, FeatureInsertVZEROUPPER]>; 1178 1179foreach P = ["athlon", "athlon-tbird"] in { 1180 def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, FeatureCMOV, 1181 Feature3DNowA, FeatureNOPL, FeatureSlowSHLD, 1182 FeatureInsertVZEROUPPER]>; 1183} 1184 1185foreach P = ["athlon-4", "athlon-xp", "athlon-mp"] in { 1186 def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, FeatureCMOV, 1187 FeatureSSE1, Feature3DNowA, FeatureFXSR, FeatureNOPL, 1188 FeatureSlowSHLD, FeatureInsertVZEROUPPER]>; 1189} 1190 1191foreach P = ["k8", "opteron", "athlon64", "athlon-fx"] in { 1192 def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, 1193 FeatureSSE2, Feature3DNowA, FeatureFXSR, FeatureNOPL, 1194 Feature64Bit, FeatureSlowSHLD, FeatureCMOV, 1195 FeatureFastScalarShiftMasks, FeatureInsertVZEROUPPER]>; 1196} 1197 1198foreach P = ["k8-sse3", "opteron-sse3", "athlon64-sse3"] in { 1199 def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, FeatureSSE3, 1200 Feature3DNowA, FeatureFXSR, FeatureNOPL, FeatureCMPXCHG16B, 1201 FeatureSlowSHLD, FeatureCMOV, Feature64Bit, 1202 FeatureFastScalarShiftMasks, FeatureInsertVZEROUPPER]>; 1203} 1204 1205foreach P = ["amdfam10", "barcelona"] in { 1206 def : Proc<P, ProcessorFeatures.BarcelonaFeatures>; 1207} 1208 1209// Bobcat 1210def : Proc<"btver1", ProcessorFeatures.BtVer1Features>; 1211// Jaguar 1212def : ProcessorModel<"btver2", BtVer2Model, ProcessorFeatures.BtVer2Features>; 1213 1214// Bulldozer 1215def : ProcessorModel<"bdver1", BdVer2Model, ProcessorFeatures.BdVer1Features>; 1216// Piledriver 1217def : ProcessorModel<"bdver2", BdVer2Model, ProcessorFeatures.BdVer2Features>; 1218// Steamroller 1219def : Proc<"bdver3", ProcessorFeatures.BdVer3Features>; 1220// Excavator 1221def : Proc<"bdver4", ProcessorFeatures.BdVer4Features>; 1222 1223def : ProcessorModel<"znver1", Znver1Model, ProcessorFeatures.ZNFeatures>; 1224def : ProcessorModel<"znver2", Znver2Model, ProcessorFeatures.ZN2Features>; 1225 1226def : Proc<"geode", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, 1227 Feature3DNowA, FeatureInsertVZEROUPPER]>; 1228 1229def : Proc<"winchip-c6", [FeatureX87, FeatureSlowUAMem16, FeatureMMX, 1230 FeatureInsertVZEROUPPER]>; 1231def : Proc<"winchip2", [FeatureX87, FeatureSlowUAMem16, Feature3DNow, 1232 FeatureInsertVZEROUPPER]>; 1233def : Proc<"c3", [FeatureX87, FeatureSlowUAMem16, Feature3DNow, 1234 FeatureInsertVZEROUPPER]>; 1235def : Proc<"c3-2", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, 1236 FeatureMMX, FeatureSSE1, FeatureFXSR, 1237 FeatureCMOV, FeatureInsertVZEROUPPER]>; 1238 1239// We also provide a generic 64-bit specific x86 processor model which tries to 1240// be good for modern chips without enabling instruction set encodings past the 1241// basic SSE2 and 64-bit ones. It disables slow things from any mainstream and 1242// modern 64-bit x86 chip, and enables features that are generally beneficial. 1243// 1244// We currently use the Sandy Bridge model as the default scheduling model as 1245// we use it across Nehalem, Westmere, Sandy Bridge, and Ivy Bridge which 1246// covers a huge swath of x86 processors. If there are specific scheduling 1247// knobs which need to be tuned differently for AMD chips, we might consider 1248// forming a common base for them. 1249def : ProcessorModel<"x86-64", SandyBridgeModel, [ 1250 FeatureX87, 1251 FeatureCMPXCHG8B, 1252 FeatureCMOV, 1253 FeatureMMX, 1254 FeatureSSE2, 1255 FeatureFXSR, 1256 FeatureNOPL, 1257 Feature64Bit, 1258 FeatureSlow3OpsLEA, 1259 FeatureSlowIncDec, 1260 FeatureMacroFusion, 1261 FeatureInsertVZEROUPPER 1262]>; 1263 1264//===----------------------------------------------------------------------===// 1265// Calling Conventions 1266//===----------------------------------------------------------------------===// 1267 1268include "X86CallingConv.td" 1269 1270 1271//===----------------------------------------------------------------------===// 1272// Assembly Parser 1273//===----------------------------------------------------------------------===// 1274 1275def ATTAsmParserVariant : AsmParserVariant { 1276 int Variant = 0; 1277 1278 // Variant name. 1279 string Name = "att"; 1280 1281 // Discard comments in assembly strings. 1282 string CommentDelimiter = "#"; 1283 1284 // Recognize hard coded registers. 1285 string RegisterPrefix = "%"; 1286} 1287 1288def IntelAsmParserVariant : AsmParserVariant { 1289 int Variant = 1; 1290 1291 // Variant name. 1292 string Name = "intel"; 1293 1294 // Discard comments in assembly strings. 1295 string CommentDelimiter = ";"; 1296 1297 // Recognize hard coded registers. 1298 string RegisterPrefix = ""; 1299} 1300 1301//===----------------------------------------------------------------------===// 1302// Assembly Printers 1303//===----------------------------------------------------------------------===// 1304 1305// The X86 target supports two different syntaxes for emitting machine code. 1306// This is controlled by the -x86-asm-syntax={att|intel} 1307def ATTAsmWriter : AsmWriter { 1308 string AsmWriterClassName = "ATTInstPrinter"; 1309 int Variant = 0; 1310} 1311def IntelAsmWriter : AsmWriter { 1312 string AsmWriterClassName = "IntelInstPrinter"; 1313 int Variant = 1; 1314} 1315 1316def X86 : Target { 1317 // Information about the instructions... 1318 let InstructionSet = X86InstrInfo; 1319 let AssemblyParserVariants = [ATTAsmParserVariant, IntelAsmParserVariant]; 1320 let AssemblyWriters = [ATTAsmWriter, IntelAsmWriter]; 1321 let AllowRegisterRenaming = 1; 1322} 1323 1324//===----------------------------------------------------------------------===// 1325// Pfm Counters 1326//===----------------------------------------------------------------------===// 1327 1328include "X86PfmCounters.td" 1329