1//===-- X86.td - Target definition file for the Intel X86 --*- tablegen -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This is a target description file for the Intel i386 architecture, referred 10// to here as the "X86" architecture. 11// 12//===----------------------------------------------------------------------===// 13 14// Get the target-independent interfaces which we are implementing... 15// 16include "llvm/Target/Target.td" 17 18//===----------------------------------------------------------------------===// 19// X86 Subtarget state 20// 21 22def Mode64Bit : SubtargetFeature<"64bit-mode", "In64BitMode", "true", 23 "64-bit mode (x86_64)">; 24def Mode32Bit : SubtargetFeature<"32bit-mode", "In32BitMode", "true", 25 "32-bit mode (80386)">; 26def Mode16Bit : SubtargetFeature<"16bit-mode", "In16BitMode", "true", 27 "16-bit mode (i8086)">; 28 29//===----------------------------------------------------------------------===// 30// X86 Subtarget features 31//===----------------------------------------------------------------------===// 32 33def FeatureX87 : SubtargetFeature<"x87","HasX87", "true", 34 "Enable X87 float instructions">; 35 36def FeatureNOPL : SubtargetFeature<"nopl", "HasNOPL", "true", 37 "Enable NOPL instruction">; 38 39def FeatureCMOV : SubtargetFeature<"cmov","HasCMov", "true", 40 "Enable conditional move instructions">; 41 42def FeatureCMPXCHG8B : SubtargetFeature<"cx8", "HasCmpxchg8b", "true", 43 "Support CMPXCHG8B instructions">; 44 45def FeaturePOPCNT : SubtargetFeature<"popcnt", "HasPOPCNT", "true", 46 "Support POPCNT instruction">; 47 48def FeatureFXSR : SubtargetFeature<"fxsr", "HasFXSR", "true", 49 "Support fxsave/fxrestore instructions">; 50 51def FeatureXSAVE : SubtargetFeature<"xsave", "HasXSAVE", "true", 52 "Support xsave instructions">; 53 54def FeatureXSAVEOPT: SubtargetFeature<"xsaveopt", "HasXSAVEOPT", "true", 55 "Support xsaveopt instructions">; 56 57def FeatureXSAVEC : SubtargetFeature<"xsavec", "HasXSAVEC", "true", 58 "Support xsavec instructions">; 59 60def FeatureXSAVES : SubtargetFeature<"xsaves", "HasXSAVES", "true", 61 "Support xsaves instructions">; 62 63def FeatureSSE1 : SubtargetFeature<"sse", "X86SSELevel", "SSE1", 64 "Enable SSE instructions">; 65def FeatureSSE2 : SubtargetFeature<"sse2", "X86SSELevel", "SSE2", 66 "Enable SSE2 instructions", 67 [FeatureSSE1]>; 68def FeatureSSE3 : SubtargetFeature<"sse3", "X86SSELevel", "SSE3", 69 "Enable SSE3 instructions", 70 [FeatureSSE2]>; 71def FeatureSSSE3 : SubtargetFeature<"ssse3", "X86SSELevel", "SSSE3", 72 "Enable SSSE3 instructions", 73 [FeatureSSE3]>; 74def FeatureSSE41 : SubtargetFeature<"sse4.1", "X86SSELevel", "SSE41", 75 "Enable SSE 4.1 instructions", 76 [FeatureSSSE3]>; 77def FeatureSSE42 : SubtargetFeature<"sse4.2", "X86SSELevel", "SSE42", 78 "Enable SSE 4.2 instructions", 79 [FeatureSSE41]>; 80// The MMX subtarget feature is separate from the rest of the SSE features 81// because it's important (for odd compatibility reasons) to be able to 82// turn it off explicitly while allowing SSE+ to be on. 83def FeatureMMX : SubtargetFeature<"mmx","X863DNowLevel", "MMX", 84 "Enable MMX instructions">; 85def Feature3DNow : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow", 86 "Enable 3DNow! instructions", 87 [FeatureMMX]>; 88def Feature3DNowA : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA", 89 "Enable 3DNow! Athlon instructions", 90 [Feature3DNow]>; 91// All x86-64 hardware has SSE2, but we don't mark SSE2 as an implied 92// feature, because SSE2 can be disabled (e.g. for compiling OS kernels) 93// without disabling 64-bit mode. Nothing should imply this feature bit. It 94// is used to enforce that only 64-bit capable CPUs are used in 64-bit mode. 95def Feature64Bit : SubtargetFeature<"64bit", "HasX86_64", "true", 96 "Support 64-bit instructions">; 97def FeatureCMPXCHG16B : SubtargetFeature<"cx16", "HasCmpxchg16b", "true", 98 "64-bit with cmpxchg16b", 99 [FeatureCMPXCHG8B]>; 100def FeatureSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true", 101 "SHLD instruction is slow">; 102def FeatureSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true", 103 "PMULLD instruction is slow">; 104def FeatureSlowPMADDWD : SubtargetFeature<"slow-pmaddwd", "IsPMADDWDSlow", 105 "true", 106 "PMADDWD is slower than PMULLD">; 107// FIXME: This should not apply to CPUs that do not have SSE. 108def FeatureSlowUAMem16 : SubtargetFeature<"slow-unaligned-mem-16", 109 "IsUAMem16Slow", "true", 110 "Slow unaligned 16-byte memory access">; 111def FeatureSlowUAMem32 : SubtargetFeature<"slow-unaligned-mem-32", 112 "IsUAMem32Slow", "true", 113 "Slow unaligned 32-byte memory access">; 114def FeatureSSE4A : SubtargetFeature<"sse4a", "HasSSE4A", "true", 115 "Support SSE 4a instructions", 116 [FeatureSSE3]>; 117 118def FeatureAVX : SubtargetFeature<"avx", "X86SSELevel", "AVX", 119 "Enable AVX instructions", 120 [FeatureSSE42]>; 121def FeatureAVX2 : SubtargetFeature<"avx2", "X86SSELevel", "AVX2", 122 "Enable AVX2 instructions", 123 [FeatureAVX]>; 124def FeatureFMA : SubtargetFeature<"fma", "HasFMA", "true", 125 "Enable three-operand fused multiple-add", 126 [FeatureAVX]>; 127def FeatureF16C : SubtargetFeature<"f16c", "HasF16C", "true", 128 "Support 16-bit floating point conversion instructions", 129 [FeatureAVX]>; 130def FeatureAVX512 : SubtargetFeature<"avx512f", "X86SSELevel", "AVX512F", 131 "Enable AVX-512 instructions", 132 [FeatureAVX2, FeatureFMA, FeatureF16C]>; 133def FeatureERI : SubtargetFeature<"avx512er", "HasERI", "true", 134 "Enable AVX-512 Exponential and Reciprocal Instructions", 135 [FeatureAVX512]>; 136def FeatureCDI : SubtargetFeature<"avx512cd", "HasCDI", "true", 137 "Enable AVX-512 Conflict Detection Instructions", 138 [FeatureAVX512]>; 139def FeatureVPOPCNTDQ : SubtargetFeature<"avx512vpopcntdq", "HasVPOPCNTDQ", 140 "true", "Enable AVX-512 Population Count Instructions", 141 [FeatureAVX512]>; 142def FeaturePFI : SubtargetFeature<"avx512pf", "HasPFI", "true", 143 "Enable AVX-512 PreFetch Instructions", 144 [FeatureAVX512]>; 145def FeaturePREFETCHWT1 : SubtargetFeature<"prefetchwt1", "HasPREFETCHWT1", 146 "true", 147 "Prefetch with Intent to Write and T1 Hint">; 148def FeatureDQI : SubtargetFeature<"avx512dq", "HasDQI", "true", 149 "Enable AVX-512 Doubleword and Quadword Instructions", 150 [FeatureAVX512]>; 151def FeatureBWI : SubtargetFeature<"avx512bw", "HasBWI", "true", 152 "Enable AVX-512 Byte and Word Instructions", 153 [FeatureAVX512]>; 154def FeatureVLX : SubtargetFeature<"avx512vl", "HasVLX", "true", 155 "Enable AVX-512 Vector Length eXtensions", 156 [FeatureAVX512]>; 157def FeatureVBMI : SubtargetFeature<"avx512vbmi", "HasVBMI", "true", 158 "Enable AVX-512 Vector Byte Manipulation Instructions", 159 [FeatureBWI]>; 160def FeatureVBMI2 : SubtargetFeature<"avx512vbmi2", "HasVBMI2", "true", 161 "Enable AVX-512 further Vector Byte Manipulation Instructions", 162 [FeatureBWI]>; 163def FeatureIFMA : SubtargetFeature<"avx512ifma", "HasIFMA", "true", 164 "Enable AVX-512 Integer Fused Multiple-Add", 165 [FeatureAVX512]>; 166def FeaturePKU : SubtargetFeature<"pku", "HasPKU", "true", 167 "Enable protection keys">; 168def FeatureVNNI : SubtargetFeature<"avx512vnni", "HasVNNI", "true", 169 "Enable AVX-512 Vector Neural Network Instructions", 170 [FeatureAVX512]>; 171def FeatureBF16 : SubtargetFeature<"avx512bf16", "HasBF16", "true", 172 "Support bfloat16 floating point", 173 [FeatureBWI]>; 174def FeatureBITALG : SubtargetFeature<"avx512bitalg", "HasBITALG", "true", 175 "Enable AVX-512 Bit Algorithms", 176 [FeatureBWI]>; 177def FeatureVP2INTERSECT : SubtargetFeature<"avx512vp2intersect", 178 "HasVP2INTERSECT", "true", 179 "Enable AVX-512 vp2intersect", 180 [FeatureAVX512]>; 181def FeaturePCLMUL : SubtargetFeature<"pclmul", "HasPCLMUL", "true", 182 "Enable packed carry-less multiplication instructions", 183 [FeatureSSE2]>; 184def FeatureGFNI : SubtargetFeature<"gfni", "HasGFNI", "true", 185 "Enable Galois Field Arithmetic Instructions", 186 [FeatureSSE2]>; 187def FeatureVPCLMULQDQ : SubtargetFeature<"vpclmulqdq", "HasVPCLMULQDQ", "true", 188 "Enable vpclmulqdq instructions", 189 [FeatureAVX, FeaturePCLMUL]>; 190def FeatureFMA4 : SubtargetFeature<"fma4", "HasFMA4", "true", 191 "Enable four-operand fused multiple-add", 192 [FeatureAVX, FeatureSSE4A]>; 193def FeatureXOP : SubtargetFeature<"xop", "HasXOP", "true", 194 "Enable XOP instructions", 195 [FeatureFMA4]>; 196def FeatureSSEUnalignedMem : SubtargetFeature<"sse-unaligned-mem", 197 "HasSSEUnalignedMem", "true", 198 "Allow unaligned memory operands with SSE instructions">; 199def FeatureAES : SubtargetFeature<"aes", "HasAES", "true", 200 "Enable AES instructions", 201 [FeatureSSE2]>; 202def FeatureVAES : SubtargetFeature<"vaes", "HasVAES", "true", 203 "Promote selected AES instructions to AVX512/AVX registers", 204 [FeatureAVX, FeatureAES]>; 205def FeatureTBM : SubtargetFeature<"tbm", "HasTBM", "true", 206 "Enable TBM instructions">; 207def FeatureLWP : SubtargetFeature<"lwp", "HasLWP", "true", 208 "Enable LWP instructions">; 209def FeatureMOVBE : SubtargetFeature<"movbe", "HasMOVBE", "true", 210 "Support MOVBE instruction">; 211def FeatureRDRAND : SubtargetFeature<"rdrnd", "HasRDRAND", "true", 212 "Support RDRAND instruction">; 213def FeatureFSGSBase : SubtargetFeature<"fsgsbase", "HasFSGSBase", "true", 214 "Support FS/GS Base instructions">; 215def FeatureLZCNT : SubtargetFeature<"lzcnt", "HasLZCNT", "true", 216 "Support LZCNT instruction">; 217def FeatureBMI : SubtargetFeature<"bmi", "HasBMI", "true", 218 "Support BMI instructions">; 219def FeatureBMI2 : SubtargetFeature<"bmi2", "HasBMI2", "true", 220 "Support BMI2 instructions">; 221def FeatureRTM : SubtargetFeature<"rtm", "HasRTM", "true", 222 "Support RTM instructions">; 223def FeatureADX : SubtargetFeature<"adx", "HasADX", "true", 224 "Support ADX instructions">; 225def FeatureSHA : SubtargetFeature<"sha", "HasSHA", "true", 226 "Enable SHA instructions", 227 [FeatureSSE2]>; 228def FeatureSHSTK : SubtargetFeature<"shstk", "HasSHSTK", "true", 229 "Support CET Shadow-Stack instructions">; 230def FeaturePRFCHW : SubtargetFeature<"prfchw", "HasPRFCHW", "true", 231 "Support PRFCHW instructions">; 232def FeatureRDSEED : SubtargetFeature<"rdseed", "HasRDSEED", "true", 233 "Support RDSEED instruction">; 234def FeatureLAHFSAHF : SubtargetFeature<"sahf", "HasLAHFSAHF", "true", 235 "Support LAHF and SAHF instructions">; 236def FeatureMWAITX : SubtargetFeature<"mwaitx", "HasMWAITX", "true", 237 "Enable MONITORX/MWAITX timer functionality">; 238def FeatureCLZERO : SubtargetFeature<"clzero", "HasCLZERO", "true", 239 "Enable Cache Line Zero">; 240def FeatureCLDEMOTE : SubtargetFeature<"cldemote", "HasCLDEMOTE", "true", 241 "Enable Cache Demote">; 242def FeaturePTWRITE : SubtargetFeature<"ptwrite", "HasPTWRITE", "true", 243 "Support ptwrite instruction">; 244// FIXME: This feature is deprecated in 10.0 and should not be used for 245// anything, but removing it would break IR files that may contain it in a 246// target-feature attribute. 247def FeatureDeprecatedMPX : SubtargetFeature<"mpx", "DeprecatedHasMPX", "false", 248 "Deprecated. Support MPX instructions">; 249def FeatureLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true", 250 "Use LEA for adjusting the stack pointer">; 251def FeatureSlowDivide32 : SubtargetFeature<"idivl-to-divb", 252 "HasSlowDivide32", "true", 253 "Use 8-bit divide for positive values less than 256">; 254def FeatureSlowDivide64 : SubtargetFeature<"idivq-to-divl", 255 "HasSlowDivide64", "true", 256 "Use 32-bit divide for positive values less than 2^32">; 257def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions", 258 "PadShortFunctions", "true", 259 "Pad short functions">; 260def FeatureINVPCID : SubtargetFeature<"invpcid", "HasINVPCID", "true", 261 "Invalidate Process-Context Identifier">; 262def FeatureSGX : SubtargetFeature<"sgx", "HasSGX", "true", 263 "Enable Software Guard Extensions">; 264def FeatureCLFLUSHOPT : SubtargetFeature<"clflushopt", "HasCLFLUSHOPT", "true", 265 "Flush A Cache Line Optimized">; 266def FeatureCLWB : SubtargetFeature<"clwb", "HasCLWB", "true", 267 "Cache Line Write Back">; 268def FeatureWBNOINVD : SubtargetFeature<"wbnoinvd", "HasWBNOINVD", "true", 269 "Write Back No Invalidate">; 270def FeatureRDPID : SubtargetFeature<"rdpid", "HasRDPID", "true", 271 "Support RDPID instructions">; 272def FeatureWAITPKG : SubtargetFeature<"waitpkg", "HasWAITPKG", "true", 273 "Wait and pause enhancements">; 274def FeatureENQCMD : SubtargetFeature<"enqcmd", "HasENQCMD", "true", 275 "Has ENQCMD instructions">; 276// On some processors, instructions that implicitly take two memory operands are 277// slow. In practice, this means that CALL, PUSH, and POP with memory operands 278// should be avoided in favor of a MOV + register CALL/PUSH/POP. 279def FeatureSlowTwoMemOps : SubtargetFeature<"slow-two-mem-ops", 280 "SlowTwoMemOps", "true", 281 "Two memory operand instructions are slow">; 282def FeatureLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LEAUsesAG", "true", 283 "LEA instruction needs inputs at AG stage">; 284def FeatureSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true", 285 "LEA instruction with certain arguments is slow">; 286def FeatureSlow3OpsLEA : SubtargetFeature<"slow-3ops-lea", "Slow3OpsLEA", "true", 287 "LEA instruction with 3 ops or certain registers is slow">; 288def FeatureSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true", 289 "INC and DEC instructions are slower than ADD and SUB">; 290def FeatureSoftFloat 291 : SubtargetFeature<"soft-float", "UseSoftFloat", "true", 292 "Use software floating point features">; 293def FeaturePOPCNTFalseDeps : SubtargetFeature<"false-deps-popcnt", 294 "HasPOPCNTFalseDeps", "true", 295 "POPCNT has a false dependency on dest register">; 296def FeatureLZCNTFalseDeps : SubtargetFeature<"false-deps-lzcnt-tzcnt", 297 "HasLZCNTFalseDeps", "true", 298 "LZCNT/TZCNT have a false dependency on dest register">; 299def FeaturePCONFIG : SubtargetFeature<"pconfig", "HasPCONFIG", "true", 300 "platform configuration instruction">; 301// On recent X86 (port bound) processors, its preferable to combine to a single shuffle 302// using a variable mask over multiple fixed shuffles. 303def FeatureFastVariableShuffle 304 : SubtargetFeature<"fast-variable-shuffle", 305 "HasFastVariableShuffle", 306 "true", "Shuffles with variable masks are fast">; 307// On some X86 processors, a vzeroupper instruction should be inserted after 308// using ymm/zmm registers before executing code that may use SSE instructions. 309def FeatureInsertVZEROUPPER 310 : SubtargetFeature<"vzeroupper", 311 "InsertVZEROUPPER", 312 "true", "Should insert vzeroupper instructions">; 313// FeatureFastScalarFSQRT should be enabled if scalar FSQRT has shorter latency 314// than the corresponding NR code. FeatureFastVectorFSQRT should be enabled if 315// vector FSQRT has higher throughput than the corresponding NR code. 316// The idea is that throughput bound code is likely to be vectorized, so for 317// vectorized code we should care about the throughput of SQRT operations. 318// But if the code is scalar that probably means that the code has some kind of 319// dependency and we should care more about reducing the latency. 320def FeatureFastScalarFSQRT 321 : SubtargetFeature<"fast-scalar-fsqrt", "HasFastScalarFSQRT", 322 "true", "Scalar SQRT is fast (disable Newton-Raphson)">; 323def FeatureFastVectorFSQRT 324 : SubtargetFeature<"fast-vector-fsqrt", "HasFastVectorFSQRT", 325 "true", "Vector SQRT is fast (disable Newton-Raphson)">; 326// If lzcnt has equivalent latency/throughput to most simple integer ops, it can 327// be used to replace test/set sequences. 328def FeatureFastLZCNT 329 : SubtargetFeature< 330 "fast-lzcnt", "HasFastLZCNT", "true", 331 "LZCNT instructions are as fast as most simple integer ops">; 332// If the target can efficiently decode NOPs upto 11-bytes in length. 333def FeatureFast11ByteNOP 334 : SubtargetFeature< 335 "fast-11bytenop", "HasFast11ByteNOP", "true", 336 "Target can quickly decode up to 11 byte NOPs">; 337// If the target can efficiently decode NOPs upto 15-bytes in length. 338def FeatureFast15ByteNOP 339 : SubtargetFeature< 340 "fast-15bytenop", "HasFast15ByteNOP", "true", 341 "Target can quickly decode up to 15 byte NOPs">; 342// Sandy Bridge and newer processors can use SHLD with the same source on both 343// inputs to implement rotate to avoid the partial flag update of the normal 344// rotate instructions. 345def FeatureFastSHLDRotate 346 : SubtargetFeature< 347 "fast-shld-rotate", "HasFastSHLDRotate", "true", 348 "SHLD can be used as a faster rotate">; 349 350// Ivy Bridge and newer processors have enhanced REP MOVSB and STOSB (aka 351// "string operations"). See "REP String Enhancement" in the Intel Software 352// Development Manual. This feature essentially means that REP MOVSB will copy 353// using the largest available size instead of copying bytes one by one, making 354// it at least as fast as REPMOVS{W,D,Q}. 355def FeatureERMSB 356 : SubtargetFeature< 357 "ermsb", "HasERMSB", "true", 358 "REP MOVS/STOS are fast">; 359 360// Bulldozer and newer processors can merge CMP/TEST (but not other 361// instructions) with conditional branches. 362def FeatureBranchFusion 363 : SubtargetFeature<"branchfusion", "HasBranchFusion", "true", 364 "CMP/TEST can be fused with conditional branches">; 365 366// Sandy Bridge and newer processors have many instructions that can be 367// fused with conditional branches and pass through the CPU as a single 368// operation. 369def FeatureMacroFusion 370 : SubtargetFeature<"macrofusion", "HasMacroFusion", "true", 371 "Various instructions can be fused with conditional branches">; 372 373// Gather is available since Haswell (AVX2 set). So technically, we can 374// generate Gathers on all AVX2 processors. But the overhead on HSW is high. 375// Skylake Client processor has faster Gathers than HSW and performance is 376// similar to Skylake Server (AVX-512). 377def FeatureHasFastGather 378 : SubtargetFeature<"fast-gather", "HasFastGather", "true", 379 "Indicates if gather is reasonably fast">; 380 381def FeaturePrefer128Bit 382 : SubtargetFeature<"prefer-128-bit", "Prefer128Bit", "true", 383 "Prefer 128-bit AVX instructions">; 384 385def FeaturePrefer256Bit 386 : SubtargetFeature<"prefer-256-bit", "Prefer256Bit", "true", 387 "Prefer 256-bit AVX instructions">; 388 389def FeaturePreferMaskRegisters 390 : SubtargetFeature<"prefer-mask-registers", "PreferMaskRegisters", "true", 391 "Prefer AVX512 mask registers over PTEST/MOVMSK">; 392 393// Lower indirect calls using a special construct called a `retpoline` to 394// mitigate potential Spectre v2 attacks against them. 395def FeatureRetpolineIndirectCalls 396 : SubtargetFeature< 397 "retpoline-indirect-calls", "UseRetpolineIndirectCalls", "true", 398 "Remove speculation of indirect calls from the generated code">; 399 400// Lower indirect branches and switches either using conditional branch trees 401// or using a special construct called a `retpoline` to mitigate potential 402// Spectre v2 attacks against them. 403def FeatureRetpolineIndirectBranches 404 : SubtargetFeature< 405 "retpoline-indirect-branches", "UseRetpolineIndirectBranches", "true", 406 "Remove speculation of indirect branches from the generated code">; 407 408// Deprecated umbrella feature for enabling both `retpoline-indirect-calls` and 409// `retpoline-indirect-branches` above. 410def FeatureRetpoline 411 : SubtargetFeature<"retpoline", "DeprecatedUseRetpoline", "true", 412 "Remove speculation of indirect branches from the " 413 "generated code, either by avoiding them entirely or " 414 "lowering them with a speculation blocking construct", 415 [FeatureRetpolineIndirectCalls, 416 FeatureRetpolineIndirectBranches]>; 417 418// Rely on external thunks for the emitted retpoline calls. This allows users 419// to provide their own custom thunk definitions in highly specialized 420// environments such as a kernel that does boot-time hot patching. 421def FeatureRetpolineExternalThunk 422 : SubtargetFeature< 423 "retpoline-external-thunk", "UseRetpolineExternalThunk", "true", 424 "When lowering an indirect call or branch using a `retpoline`, rely " 425 "on the specified user provided thunk rather than emitting one " 426 "ourselves. Only has effect when combined with some other retpoline " 427 "feature", [FeatureRetpolineIndirectCalls]>; 428 429// Direct Move instructions. 430def FeatureMOVDIRI : SubtargetFeature<"movdiri", "HasMOVDIRI", "true", 431 "Support movdiri instruction">; 432def FeatureMOVDIR64B : SubtargetFeature<"movdir64b", "HasMOVDIR64B", "true", 433 "Support movdir64b instruction">; 434 435def FeatureFastBEXTR : SubtargetFeature<"fast-bextr", "HasFastBEXTR", "true", 436 "Indicates that the BEXTR instruction is implemented as a single uop " 437 "with good throughput">; 438 439// Combine vector math operations with shuffles into horizontal math 440// instructions if a CPU implements horizontal operations (introduced with 441// SSE3) with better latency/throughput than the alternative sequence. 442def FeatureFastHorizontalOps 443 : SubtargetFeature< 444 "fast-hops", "HasFastHorizontalOps", "true", 445 "Prefer horizontal vector math instructions (haddp, phsub, etc.) over " 446 "normal vector instructions with shuffles">; 447 448def FeatureFastScalarShiftMasks 449 : SubtargetFeature< 450 "fast-scalar-shift-masks", "HasFastScalarShiftMasks", "true", 451 "Prefer a left/right scalar logical shift pair over a shift+and pair">; 452 453def FeatureFastVectorShiftMasks 454 : SubtargetFeature< 455 "fast-vector-shift-masks", "HasFastVectorShiftMasks", "true", 456 "Prefer a left/right vector logical shift pair over a shift+and pair">; 457 458def FeatureUseGLMDivSqrtCosts 459 : SubtargetFeature<"use-glm-div-sqrt-costs", "UseGLMDivSqrtCosts", "true", 460 "Use Goldmont specific floating point div/sqrt costs">; 461 462// Merge branches using three-way conditional code. 463def FeatureMergeToThreeWayBranch : SubtargetFeature<"merge-to-threeway-branch", 464 "ThreewayBranchProfitable", "true", 465 "Merge branches to a three-way " 466 "conditional branch">; 467 468// Enable use of alias analysis during code generation. 469def FeatureUseAA : SubtargetFeature<"use-aa", "UseAA", "true", 470 "Use alias analysis during codegen">; 471 472// Bonnell 473def ProcIntelAtom : SubtargetFeature<"", "X86ProcFamily", "IntelAtom", "">; 474// Silvermont 475def ProcIntelSLM : SubtargetFeature<"", "X86ProcFamily", "IntelSLM", "">; 476 477//===----------------------------------------------------------------------===// 478// Register File Description 479//===----------------------------------------------------------------------===// 480 481include "X86RegisterInfo.td" 482include "X86RegisterBanks.td" 483 484//===----------------------------------------------------------------------===// 485// Instruction Descriptions 486//===----------------------------------------------------------------------===// 487 488include "X86Schedule.td" 489include "X86InstrInfo.td" 490include "X86SchedPredicates.td" 491 492def X86InstrInfo : InstrInfo; 493 494//===----------------------------------------------------------------------===// 495// X86 Scheduler Models 496//===----------------------------------------------------------------------===// 497 498include "X86ScheduleAtom.td" 499include "X86SchedSandyBridge.td" 500include "X86SchedHaswell.td" 501include "X86SchedBroadwell.td" 502include "X86ScheduleSLM.td" 503include "X86ScheduleZnver1.td" 504include "X86ScheduleZnver2.td" 505include "X86ScheduleBdVer2.td" 506include "X86ScheduleBtVer2.td" 507include "X86SchedSkylakeClient.td" 508include "X86SchedSkylakeServer.td" 509 510//===----------------------------------------------------------------------===// 511// X86 Processor Feature Lists 512//===----------------------------------------------------------------------===// 513 514def ProcessorFeatures { 515 // Nehalem 516 list<SubtargetFeature> NHMInheritableFeatures = [FeatureX87, 517 FeatureCMPXCHG8B, 518 FeatureCMOV, 519 FeatureMMX, 520 FeatureSSE42, 521 FeatureFXSR, 522 FeatureNOPL, 523 Feature64Bit, 524 FeatureCMPXCHG16B, 525 FeaturePOPCNT, 526 FeatureLAHFSAHF, 527 FeatureMacroFusion, 528 FeatureInsertVZEROUPPER]; 529 list<SubtargetFeature> NHMSpecificFeatures = []; 530 list<SubtargetFeature> NHMFeatures = 531 !listconcat(NHMInheritableFeatures, NHMSpecificFeatures); 532 533 // Westmere 534 list<SubtargetFeature> WSMAdditionalFeatures = [FeaturePCLMUL]; 535 list<SubtargetFeature> WSMSpecificFeatures = []; 536 list<SubtargetFeature> WSMInheritableFeatures = 537 !listconcat(NHMInheritableFeatures, WSMAdditionalFeatures); 538 list<SubtargetFeature> WSMFeatures = 539 !listconcat(WSMInheritableFeatures, WSMSpecificFeatures); 540 541 // Sandybridge 542 list<SubtargetFeature> SNBAdditionalFeatures = [FeatureAVX, 543 FeatureSlowDivide64, 544 FeatureXSAVE, 545 FeatureXSAVEOPT, 546 FeatureSlow3OpsLEA, 547 FeatureFastScalarFSQRT, 548 FeatureFastSHLDRotate, 549 FeatureMergeToThreeWayBranch]; 550 list<SubtargetFeature> SNBSpecificFeatures = [FeatureSlowUAMem32, 551 FeaturePOPCNTFalseDeps]; 552 list<SubtargetFeature> SNBInheritableFeatures = 553 !listconcat(WSMInheritableFeatures, SNBAdditionalFeatures); 554 list<SubtargetFeature> SNBFeatures = 555 !listconcat(SNBInheritableFeatures, SNBSpecificFeatures); 556 557 // Ivybridge 558 list<SubtargetFeature> IVBAdditionalFeatures = [FeatureRDRAND, 559 FeatureF16C, 560 FeatureFSGSBase]; 561 list<SubtargetFeature> IVBSpecificFeatures = [FeatureSlowUAMem32, 562 FeaturePOPCNTFalseDeps]; 563 list<SubtargetFeature> IVBInheritableFeatures = 564 !listconcat(SNBInheritableFeatures, IVBAdditionalFeatures); 565 list<SubtargetFeature> IVBFeatures = 566 !listconcat(IVBInheritableFeatures, IVBSpecificFeatures); 567 568 // Haswell 569 list<SubtargetFeature> HSWAdditionalFeatures = [FeatureAVX2, 570 FeatureBMI, 571 FeatureBMI2, 572 FeatureERMSB, 573 FeatureFMA, 574 FeatureINVPCID, 575 FeatureLZCNT, 576 FeatureMOVBE, 577 FeatureFastVariableShuffle]; 578 list<SubtargetFeature> HSWSpecificFeatures = [FeaturePOPCNTFalseDeps, 579 FeatureLZCNTFalseDeps]; 580 list<SubtargetFeature> HSWInheritableFeatures = 581 !listconcat(IVBInheritableFeatures, HSWAdditionalFeatures); 582 list<SubtargetFeature> HSWFeatures = 583 !listconcat(HSWInheritableFeatures, HSWSpecificFeatures); 584 585 // Broadwell 586 list<SubtargetFeature> BDWAdditionalFeatures = [FeatureADX, 587 FeatureRDSEED, 588 FeaturePRFCHW]; 589 list<SubtargetFeature> BDWSpecificFeatures = [FeaturePOPCNTFalseDeps, 590 FeatureLZCNTFalseDeps]; 591 list<SubtargetFeature> BDWInheritableFeatures = 592 !listconcat(HSWInheritableFeatures, BDWAdditionalFeatures); 593 list<SubtargetFeature> BDWFeatures = 594 !listconcat(BDWInheritableFeatures, BDWSpecificFeatures); 595 596 // Skylake 597 list<SubtargetFeature> SKLAdditionalFeatures = [FeatureAES, 598 FeatureXSAVEC, 599 FeatureXSAVES, 600 FeatureCLFLUSHOPT, 601 FeatureFastVectorFSQRT]; 602 list<SubtargetFeature> SKLSpecificFeatures = [FeatureHasFastGather, 603 FeaturePOPCNTFalseDeps, 604 FeatureSGX]; 605 list<SubtargetFeature> SKLInheritableFeatures = 606 !listconcat(BDWInheritableFeatures, SKLAdditionalFeatures); 607 list<SubtargetFeature> SKLFeatures = 608 !listconcat(SKLInheritableFeatures, SKLSpecificFeatures); 609 610 // Skylake-AVX512 611 list<SubtargetFeature> SKXAdditionalFeatures = [FeatureAVX512, 612 FeaturePrefer256Bit, 613 FeatureCDI, 614 FeatureDQI, 615 FeatureBWI, 616 FeatureVLX, 617 FeaturePKU, 618 FeatureCLWB]; 619 list<SubtargetFeature> SKXSpecificFeatures = [FeatureHasFastGather, 620 FeaturePOPCNTFalseDeps]; 621 list<SubtargetFeature> SKXInheritableFeatures = 622 !listconcat(SKLInheritableFeatures, SKXAdditionalFeatures); 623 list<SubtargetFeature> SKXFeatures = 624 !listconcat(SKXInheritableFeatures, SKXSpecificFeatures); 625 626 // Cascadelake 627 list<SubtargetFeature> CLXAdditionalFeatures = [FeatureVNNI]; 628 list<SubtargetFeature> CLXSpecificFeatures = [FeatureHasFastGather, 629 FeaturePOPCNTFalseDeps]; 630 list<SubtargetFeature> CLXInheritableFeatures = 631 !listconcat(SKXInheritableFeatures, CLXAdditionalFeatures); 632 list<SubtargetFeature> CLXFeatures = 633 !listconcat(CLXInheritableFeatures, CLXSpecificFeatures); 634 635 // Cooperlake 636 list<SubtargetFeature> CPXAdditionalFeatures = [FeatureBF16]; 637 list<SubtargetFeature> CPXSpecificFeatures = [FeatureHasFastGather, 638 FeaturePOPCNTFalseDeps]; 639 list<SubtargetFeature> CPXInheritableFeatures = 640 !listconcat(CLXInheritableFeatures, CPXAdditionalFeatures); 641 list<SubtargetFeature> CPXFeatures = 642 !listconcat(CPXInheritableFeatures, CPXSpecificFeatures); 643 644 // Cannonlake 645 list<SubtargetFeature> CNLAdditionalFeatures = [FeatureAVX512, 646 FeaturePrefer256Bit, 647 FeatureCDI, 648 FeatureDQI, 649 FeatureBWI, 650 FeatureVLX, 651 FeaturePKU, 652 FeatureVBMI, 653 FeatureIFMA, 654 FeatureSHA, 655 FeatureSGX]; 656 list<SubtargetFeature> CNLSpecificFeatures = [FeatureHasFastGather]; 657 list<SubtargetFeature> CNLInheritableFeatures = 658 !listconcat(SKLInheritableFeatures, CNLAdditionalFeatures); 659 list<SubtargetFeature> CNLFeatures = 660 !listconcat(CNLInheritableFeatures, CNLSpecificFeatures); 661 662 // Icelake 663 list<SubtargetFeature> ICLAdditionalFeatures = [FeatureBITALG, 664 FeatureVAES, 665 FeatureVBMI2, 666 FeatureVNNI, 667 FeatureVPCLMULQDQ, 668 FeatureVPOPCNTDQ, 669 FeatureGFNI, 670 FeatureCLWB, 671 FeatureRDPID]; 672 list<SubtargetFeature> ICLSpecificFeatures = [FeatureHasFastGather]; 673 list<SubtargetFeature> ICLInheritableFeatures = 674 !listconcat(CNLInheritableFeatures, ICLAdditionalFeatures); 675 list<SubtargetFeature> ICLFeatures = 676 !listconcat(ICLInheritableFeatures, ICLSpecificFeatures); 677 678 // Icelake Server 679 list<SubtargetFeature> ICXSpecificFeatures = [FeaturePCONFIG, 680 FeatureWBNOINVD, 681 FeatureHasFastGather]; 682 list<SubtargetFeature> ICXFeatures = 683 !listconcat(ICLInheritableFeatures, ICXSpecificFeatures); 684 685 //Tigerlake 686 list<SubtargetFeature> TGLAdditionalFeatures = [FeatureVP2INTERSECT, 687 FeatureMOVDIRI, 688 FeatureMOVDIR64B, 689 FeatureSHSTK]; 690 list<SubtargetFeature> TGLSpecificFeatures = [FeatureHasFastGather]; 691 list<SubtargetFeature> TGLInheritableFeatures = 692 !listconcat(TGLAdditionalFeatures ,TGLSpecificFeatures); 693 list<SubtargetFeature> TGLFeatures = 694 !listconcat(ICLFeatures, TGLInheritableFeatures ); 695 696 // Atom 697 list<SubtargetFeature> AtomInheritableFeatures = [FeatureX87, 698 FeatureCMPXCHG8B, 699 FeatureCMOV, 700 FeatureMMX, 701 FeatureSSSE3, 702 FeatureFXSR, 703 FeatureNOPL, 704 Feature64Bit, 705 FeatureCMPXCHG16B, 706 FeatureMOVBE, 707 FeatureSlowTwoMemOps, 708 FeatureLAHFSAHF, 709 FeatureInsertVZEROUPPER]; 710 list<SubtargetFeature> AtomSpecificFeatures = [ProcIntelAtom, 711 FeatureSlowUAMem16, 712 FeatureLEAForSP, 713 FeatureSlowDivide32, 714 FeatureSlowDivide64, 715 FeatureLEAUsesAG, 716 FeaturePadShortFunctions]; 717 list<SubtargetFeature> AtomFeatures = 718 !listconcat(AtomInheritableFeatures, AtomSpecificFeatures); 719 720 // Silvermont 721 list<SubtargetFeature> SLMAdditionalFeatures = [FeatureSSE42, 722 FeaturePOPCNT, 723 FeaturePCLMUL, 724 FeaturePRFCHW, 725 FeatureSlowLEA, 726 FeatureSlowIncDec, 727 FeatureRDRAND]; 728 list<SubtargetFeature> SLMSpecificFeatures = [ProcIntelSLM, 729 FeatureSlowDivide64, 730 FeatureSlowPMULLD, 731 FeaturePOPCNTFalseDeps]; 732 list<SubtargetFeature> SLMInheritableFeatures = 733 !listconcat(AtomInheritableFeatures, SLMAdditionalFeatures); 734 list<SubtargetFeature> SLMFeatures = 735 !listconcat(SLMInheritableFeatures, SLMSpecificFeatures); 736 737 // Goldmont 738 list<SubtargetFeature> GLMAdditionalFeatures = [FeatureAES, 739 FeatureSHA, 740 FeatureRDSEED, 741 FeatureXSAVE, 742 FeatureXSAVEOPT, 743 FeatureXSAVEC, 744 FeatureXSAVES, 745 FeatureCLFLUSHOPT, 746 FeatureFSGSBase]; 747 list<SubtargetFeature> GLMSpecificFeatures = [FeatureUseGLMDivSqrtCosts, 748 FeaturePOPCNTFalseDeps]; 749 list<SubtargetFeature> GLMInheritableFeatures = 750 !listconcat(SLMInheritableFeatures, GLMAdditionalFeatures); 751 list<SubtargetFeature> GLMFeatures = 752 !listconcat(GLMInheritableFeatures, GLMSpecificFeatures); 753 754 // Goldmont Plus 755 list<SubtargetFeature> GLPAdditionalFeatures = [FeaturePTWRITE, 756 FeatureRDPID, 757 FeatureSGX]; 758 list<SubtargetFeature> GLPSpecificFeatures = [FeatureUseGLMDivSqrtCosts]; 759 list<SubtargetFeature> GLPInheritableFeatures = 760 !listconcat(GLMInheritableFeatures, GLPAdditionalFeatures); 761 list<SubtargetFeature> GLPFeatures = 762 !listconcat(GLPInheritableFeatures, GLPSpecificFeatures); 763 764 // Tremont 765 list<SubtargetFeature> TRMAdditionalFeatures = [FeatureCLDEMOTE, 766 FeatureGFNI, 767 FeatureMOVDIRI, 768 FeatureMOVDIR64B, 769 FeatureWAITPKG]; 770 list<SubtargetFeature> TRMSpecificFeatures = [FeatureUseGLMDivSqrtCosts]; 771 list<SubtargetFeature> TRMFeatures = 772 !listconcat(GLPInheritableFeatures, TRMAdditionalFeatures, 773 TRMSpecificFeatures); 774 775 // Knights Landing 776 list<SubtargetFeature> KNLFeatures = [FeatureX87, 777 FeatureCMPXCHG8B, 778 FeatureCMOV, 779 FeatureMMX, 780 FeatureFXSR, 781 FeatureNOPL, 782 Feature64Bit, 783 FeatureCMPXCHG16B, 784 FeaturePOPCNT, 785 FeatureSlowDivide64, 786 FeaturePCLMUL, 787 FeatureXSAVE, 788 FeatureXSAVEOPT, 789 FeatureLAHFSAHF, 790 FeatureSlow3OpsLEA, 791 FeatureSlowIncDec, 792 FeatureAES, 793 FeatureRDRAND, 794 FeatureF16C, 795 FeatureFSGSBase, 796 FeatureAVX512, 797 FeatureERI, 798 FeatureCDI, 799 FeaturePFI, 800 FeaturePREFETCHWT1, 801 FeatureADX, 802 FeatureRDSEED, 803 FeatureMOVBE, 804 FeatureLZCNT, 805 FeatureBMI, 806 FeatureBMI2, 807 FeatureFMA, 808 FeaturePRFCHW, 809 FeaturePreferMaskRegisters, 810 FeatureSlowTwoMemOps, 811 FeatureHasFastGather, 812 FeatureSlowPMADDWD]; 813 // TODO Add AVX5124FMAPS/AVX5124VNNIW features 814 list<SubtargetFeature> KNMFeatures = 815 !listconcat(KNLFeatures, [FeatureVPOPCNTDQ]); 816 817 // Barcelona 818 list<SubtargetFeature> BarcelonaInheritableFeatures = [FeatureX87, 819 FeatureCMPXCHG8B, 820 FeatureSSE4A, 821 Feature3DNowA, 822 FeatureFXSR, 823 FeatureNOPL, 824 FeatureCMPXCHG16B, 825 FeatureLZCNT, 826 FeaturePOPCNT, 827 FeatureSlowSHLD, 828 FeatureLAHFSAHF, 829 FeatureCMOV, 830 Feature64Bit, 831 FeatureFastScalarShiftMasks, 832 FeatureInsertVZEROUPPER]; 833 list<SubtargetFeature> BarcelonaFeatures = BarcelonaInheritableFeatures; 834 835 // Bobcat 836 list<SubtargetFeature> BtVer1InheritableFeatures = [FeatureX87, 837 FeatureCMPXCHG8B, 838 FeatureCMOV, 839 FeatureMMX, 840 FeatureSSSE3, 841 FeatureSSE4A, 842 FeatureFXSR, 843 FeatureNOPL, 844 Feature64Bit, 845 FeatureCMPXCHG16B, 846 FeaturePRFCHW, 847 FeatureLZCNT, 848 FeaturePOPCNT, 849 FeatureSlowSHLD, 850 FeatureLAHFSAHF, 851 FeatureFast15ByteNOP, 852 FeatureFastScalarShiftMasks, 853 FeatureFastVectorShiftMasks]; 854 list<SubtargetFeature> BtVer1SpecificFeatures = [FeatureInsertVZEROUPPER]; 855 list<SubtargetFeature> BtVer1Features = 856 !listconcat(BtVer1InheritableFeatures, BtVer1SpecificFeatures); 857 858 // Jaguar 859 list<SubtargetFeature> BtVer2AdditionalFeatures = [FeatureAVX, 860 FeatureAES, 861 FeaturePCLMUL, 862 FeatureBMI, 863 FeatureF16C, 864 FeatureMOVBE, 865 FeatureXSAVE, 866 FeatureXSAVEOPT]; 867 list<SubtargetFeature> BtVer2SpecificFeatures = [FeatureFastLZCNT, 868 FeatureFastBEXTR, 869 FeatureFastHorizontalOps]; 870 list<SubtargetFeature> BtVer2InheritableFeatures = 871 !listconcat(BtVer1InheritableFeatures, BtVer2AdditionalFeatures); 872 list<SubtargetFeature> BtVer2Features = 873 !listconcat(BtVer2InheritableFeatures, BtVer2SpecificFeatures); 874 875 // Bulldozer 876 list<SubtargetFeature> BdVer1InheritableFeatures = [FeatureX87, 877 FeatureCMPXCHG8B, 878 FeatureCMOV, 879 FeatureXOP, 880 Feature64Bit, 881 FeatureCMPXCHG16B, 882 FeatureAES, 883 FeaturePRFCHW, 884 FeaturePCLMUL, 885 FeatureMMX, 886 FeatureFXSR, 887 FeatureNOPL, 888 FeatureLZCNT, 889 FeaturePOPCNT, 890 FeatureXSAVE, 891 FeatureLWP, 892 FeatureSlowSHLD, 893 FeatureLAHFSAHF, 894 FeatureFast11ByteNOP, 895 FeatureFastScalarShiftMasks, 896 FeatureBranchFusion, 897 FeatureInsertVZEROUPPER]; 898 list<SubtargetFeature> BdVer1Features = BdVer1InheritableFeatures; 899 900 // PileDriver 901 list<SubtargetFeature> BdVer2AdditionalFeatures = [FeatureF16C, 902 FeatureBMI, 903 FeatureTBM, 904 FeatureFMA, 905 FeatureFastBEXTR]; 906 list<SubtargetFeature> BdVer2InheritableFeatures = 907 !listconcat(BdVer1InheritableFeatures, BdVer2AdditionalFeatures); 908 list<SubtargetFeature> BdVer2Features = BdVer2InheritableFeatures; 909 910 // Steamroller 911 list<SubtargetFeature> BdVer3AdditionalFeatures = [FeatureXSAVEOPT, 912 FeatureFSGSBase]; 913 list<SubtargetFeature> BdVer3InheritableFeatures = 914 !listconcat(BdVer2InheritableFeatures, BdVer3AdditionalFeatures); 915 list<SubtargetFeature> BdVer3Features = BdVer3InheritableFeatures; 916 917 // Excavator 918 list<SubtargetFeature> BdVer4AdditionalFeatures = [FeatureAVX2, 919 FeatureBMI2, 920 FeatureMWAITX]; 921 list<SubtargetFeature> BdVer4InheritableFeatures = 922 !listconcat(BdVer3InheritableFeatures, BdVer4AdditionalFeatures); 923 list<SubtargetFeature> BdVer4Features = BdVer4InheritableFeatures; 924 925 926 // AMD Zen Processors common ISAs 927 list<SubtargetFeature> ZNFeatures = [FeatureADX, 928 FeatureAES, 929 FeatureAVX2, 930 FeatureBMI, 931 FeatureBMI2, 932 FeatureCLFLUSHOPT, 933 FeatureCLZERO, 934 FeatureCMOV, 935 Feature64Bit, 936 FeatureCMPXCHG16B, 937 FeatureF16C, 938 FeatureFMA, 939 FeatureFSGSBase, 940 FeatureFXSR, 941 FeatureNOPL, 942 FeatureFastLZCNT, 943 FeatureLAHFSAHF, 944 FeatureLZCNT, 945 FeatureFastBEXTR, 946 FeatureFast15ByteNOP, 947 FeatureBranchFusion, 948 FeatureFastScalarShiftMasks, 949 FeatureMMX, 950 FeatureMOVBE, 951 FeatureMWAITX, 952 FeaturePCLMUL, 953 FeaturePOPCNT, 954 FeaturePRFCHW, 955 FeatureRDRAND, 956 FeatureRDSEED, 957 FeatureSHA, 958 FeatureSSE4A, 959 FeatureSlowSHLD, 960 FeatureInsertVZEROUPPER, 961 FeatureX87, 962 FeatureXSAVE, 963 FeatureXSAVEC, 964 FeatureXSAVEOPT, 965 FeatureXSAVES]; 966 list<SubtargetFeature> ZN2AdditionalFeatures = [FeatureCLWB, 967 FeatureRDPID, 968 FeatureWBNOINVD]; 969 list<SubtargetFeature> ZN2Features = 970 !listconcat(ZNFeatures, ZN2AdditionalFeatures); 971} 972 973//===----------------------------------------------------------------------===// 974// X86 processors supported. 975//===----------------------------------------------------------------------===// 976 977class Proc<string Name, list<SubtargetFeature> Features> 978 : ProcessorModel<Name, GenericModel, Features>; 979 980// NOTE: CMPXCHG8B is here for legacy compatbility so that it is only disabled 981// if i386/i486 is specifically requested. 982def : Proc<"generic", [FeatureX87, FeatureSlowUAMem16, 983 FeatureCMPXCHG8B, FeatureInsertVZEROUPPER]>; 984def : Proc<"i386", [FeatureX87, FeatureSlowUAMem16, 985 FeatureInsertVZEROUPPER]>; 986def : Proc<"i486", [FeatureX87, FeatureSlowUAMem16, 987 FeatureInsertVZEROUPPER]>; 988def : Proc<"i586", [FeatureX87, FeatureSlowUAMem16, 989 FeatureCMPXCHG8B, FeatureInsertVZEROUPPER]>; 990def : Proc<"pentium", [FeatureX87, FeatureSlowUAMem16, 991 FeatureCMPXCHG8B, FeatureInsertVZEROUPPER]>; 992def : Proc<"pentium-mmx", [FeatureX87, FeatureSlowUAMem16, 993 FeatureCMPXCHG8B, FeatureMMX, 994 FeatureInsertVZEROUPPER]>; 995 996def : Proc<"i686", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, 997 FeatureCMOV, FeatureInsertVZEROUPPER]>; 998def : Proc<"pentiumpro", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, 999 FeatureCMOV, FeatureNOPL, FeatureInsertVZEROUPPER]>; 1000 1001def : Proc<"pentium2", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, 1002 FeatureMMX, FeatureCMOV, FeatureFXSR, 1003 FeatureNOPL, FeatureInsertVZEROUPPER]>; 1004 1005foreach P = ["pentium3", "pentium3m"] in { 1006 def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,FeatureMMX, 1007 FeatureSSE1, FeatureFXSR, FeatureNOPL, FeatureCMOV, 1008 FeatureInsertVZEROUPPER]>; 1009} 1010 1011// Enable the PostRAScheduler for SSE2 and SSE3 class cpus. 1012// The intent is to enable it for pentium4 which is the current default 1013// processor in a vanilla 32-bit clang compilation when no specific 1014// architecture is specified. This generally gives a nice performance 1015// increase on silvermont, with largely neutral behavior on other 1016// contemporary large core processors. 1017// pentium-m, pentium4m, prescott and nocona are included as a preventative 1018// measure to avoid performance surprises, in case clang's default cpu 1019// changes slightly. 1020 1021def : ProcessorModel<"pentium-m", GenericPostRAModel, 1022 [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, 1023 FeatureMMX, FeatureSSE2, FeatureFXSR, FeatureNOPL, 1024 FeatureCMOV, FeatureInsertVZEROUPPER]>; 1025 1026foreach P = ["pentium4", "pentium4m"] in { 1027 def : ProcessorModel<P, GenericPostRAModel, 1028 [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, 1029 FeatureMMX, FeatureSSE2, FeatureFXSR, FeatureNOPL, 1030 FeatureCMOV, FeatureInsertVZEROUPPER]>; 1031} 1032 1033// Intel Quark. 1034def : Proc<"lakemont", [FeatureInsertVZEROUPPER]>; 1035 1036// Intel Core Duo. 1037def : ProcessorModel<"yonah", SandyBridgeModel, 1038 [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, 1039 FeatureMMX, FeatureSSE3, FeatureFXSR, FeatureNOPL, 1040 FeatureCMOV, FeatureInsertVZEROUPPER]>; 1041 1042// NetBurst. 1043def : ProcessorModel<"prescott", GenericPostRAModel, 1044 [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, 1045 FeatureMMX, FeatureSSE3, FeatureFXSR, FeatureNOPL, 1046 FeatureCMOV, FeatureInsertVZEROUPPER]>; 1047def : ProcessorModel<"nocona", GenericPostRAModel, [ 1048 FeatureX87, 1049 FeatureSlowUAMem16, 1050 FeatureCMPXCHG8B, 1051 FeatureCMOV, 1052 FeatureMMX, 1053 FeatureSSE3, 1054 FeatureFXSR, 1055 FeatureNOPL, 1056 Feature64Bit, 1057 FeatureCMPXCHG16B, 1058 FeatureInsertVZEROUPPER 1059]>; 1060 1061// Intel Core 2 Solo/Duo. 1062def : ProcessorModel<"core2", SandyBridgeModel, [ 1063 FeatureX87, 1064 FeatureSlowUAMem16, 1065 FeatureCMPXCHG8B, 1066 FeatureCMOV, 1067 FeatureMMX, 1068 FeatureSSSE3, 1069 FeatureFXSR, 1070 FeatureNOPL, 1071 Feature64Bit, 1072 FeatureCMPXCHG16B, 1073 FeatureLAHFSAHF, 1074 FeatureMacroFusion, 1075 FeatureInsertVZEROUPPER 1076]>; 1077def : ProcessorModel<"penryn", SandyBridgeModel, [ 1078 FeatureX87, 1079 FeatureSlowUAMem16, 1080 FeatureCMPXCHG8B, 1081 FeatureCMOV, 1082 FeatureMMX, 1083 FeatureSSE41, 1084 FeatureFXSR, 1085 FeatureNOPL, 1086 Feature64Bit, 1087 FeatureCMPXCHG16B, 1088 FeatureLAHFSAHF, 1089 FeatureMacroFusion, 1090 FeatureInsertVZEROUPPER 1091]>; 1092 1093// Atom CPUs. 1094foreach P = ["bonnell", "atom"] in { 1095 def : ProcessorModel<P, AtomModel, ProcessorFeatures.AtomFeatures>; 1096} 1097 1098foreach P = ["silvermont", "slm"] in { 1099 def : ProcessorModel<P, SLMModel, ProcessorFeatures.SLMFeatures>; 1100} 1101 1102def : ProcessorModel<"goldmont", SLMModel, ProcessorFeatures.GLMFeatures>; 1103def : ProcessorModel<"goldmont-plus", SLMModel, ProcessorFeatures.GLPFeatures>; 1104def : ProcessorModel<"tremont", SLMModel, ProcessorFeatures.TRMFeatures>; 1105 1106// "Arrandale" along with corei3 and corei5 1107foreach P = ["nehalem", "corei7"] in { 1108 def : ProcessorModel<P, SandyBridgeModel, ProcessorFeatures.NHMFeatures>; 1109} 1110 1111// Westmere is the corei3/i5/i7 path from nehalem to sandybridge 1112def : ProcessorModel<"westmere", SandyBridgeModel, 1113 ProcessorFeatures.WSMFeatures>; 1114 1115foreach P = ["sandybridge", "corei7-avx"] in { 1116 def : ProcessorModel<P, SandyBridgeModel, ProcessorFeatures.SNBFeatures>; 1117} 1118 1119foreach P = ["ivybridge", "core-avx-i"] in { 1120 def : ProcessorModel<P, SandyBridgeModel, ProcessorFeatures.IVBFeatures>; 1121} 1122 1123foreach P = ["haswell", "core-avx2"] in { 1124 def : ProcessorModel<P, HaswellModel, ProcessorFeatures.HSWFeatures>; 1125} 1126 1127def : ProcessorModel<"broadwell", BroadwellModel, 1128 ProcessorFeatures.BDWFeatures>; 1129 1130def : ProcessorModel<"skylake", SkylakeClientModel, 1131 ProcessorFeatures.SKLFeatures>; 1132 1133// FIXME: define KNL scheduler model 1134def : ProcessorModel<"knl", HaswellModel, ProcessorFeatures.KNLFeatures>; 1135def : ProcessorModel<"knm", HaswellModel, ProcessorFeatures.KNMFeatures>; 1136 1137foreach P = ["skylake-avx512", "skx"] in { 1138 def : ProcessorModel<P, SkylakeServerModel, ProcessorFeatures.SKXFeatures>; 1139} 1140 1141def : ProcessorModel<"cascadelake", SkylakeServerModel, 1142 ProcessorFeatures.CLXFeatures>; 1143def : ProcessorModel<"cooperlake", SkylakeServerModel, 1144 ProcessorFeatures.CPXFeatures>; 1145def : ProcessorModel<"cannonlake", SkylakeServerModel, 1146 ProcessorFeatures.CNLFeatures>; 1147def : ProcessorModel<"icelake-client", SkylakeServerModel, 1148 ProcessorFeatures.ICLFeatures>; 1149def : ProcessorModel<"icelake-server", SkylakeServerModel, 1150 ProcessorFeatures.ICXFeatures>; 1151def : ProcessorModel<"tigerlake", SkylakeServerModel, 1152 ProcessorFeatures.TGLFeatures>; 1153 1154// AMD CPUs. 1155 1156def : Proc<"k6", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, 1157 FeatureMMX, FeatureInsertVZEROUPPER]>; 1158def : Proc<"k6-2", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, 1159 Feature3DNow, FeatureInsertVZEROUPPER]>; 1160def : Proc<"k6-3", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, 1161 Feature3DNow, FeatureInsertVZEROUPPER]>; 1162 1163foreach P = ["athlon", "athlon-tbird"] in { 1164 def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, FeatureCMOV, 1165 Feature3DNowA, FeatureNOPL, FeatureSlowSHLD, 1166 FeatureInsertVZEROUPPER]>; 1167} 1168 1169foreach P = ["athlon-4", "athlon-xp", "athlon-mp"] in { 1170 def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, FeatureCMOV, 1171 FeatureSSE1, Feature3DNowA, FeatureFXSR, FeatureNOPL, 1172 FeatureSlowSHLD, FeatureInsertVZEROUPPER]>; 1173} 1174 1175foreach P = ["k8", "opteron", "athlon64", "athlon-fx"] in { 1176 def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, 1177 FeatureSSE2, Feature3DNowA, FeatureFXSR, FeatureNOPL, 1178 Feature64Bit, FeatureSlowSHLD, FeatureCMOV, 1179 FeatureFastScalarShiftMasks, FeatureInsertVZEROUPPER]>; 1180} 1181 1182foreach P = ["k8-sse3", "opteron-sse3", "athlon64-sse3"] in { 1183 def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, FeatureSSE3, 1184 Feature3DNowA, FeatureFXSR, FeatureNOPL, FeatureCMPXCHG16B, 1185 FeatureSlowSHLD, FeatureCMOV, Feature64Bit, 1186 FeatureFastScalarShiftMasks, FeatureInsertVZEROUPPER]>; 1187} 1188 1189foreach P = ["amdfam10", "barcelona"] in { 1190 def : Proc<P, ProcessorFeatures.BarcelonaFeatures>; 1191} 1192 1193// Bobcat 1194def : Proc<"btver1", ProcessorFeatures.BtVer1Features>; 1195// Jaguar 1196def : ProcessorModel<"btver2", BtVer2Model, ProcessorFeatures.BtVer2Features>; 1197 1198// Bulldozer 1199def : ProcessorModel<"bdver1", BdVer2Model, ProcessorFeatures.BdVer1Features>; 1200// Piledriver 1201def : ProcessorModel<"bdver2", BdVer2Model, ProcessorFeatures.BdVer2Features>; 1202// Steamroller 1203def : Proc<"bdver3", ProcessorFeatures.BdVer3Features>; 1204// Excavator 1205def : Proc<"bdver4", ProcessorFeatures.BdVer4Features>; 1206 1207def : ProcessorModel<"znver1", Znver1Model, ProcessorFeatures.ZNFeatures>; 1208def : ProcessorModel<"znver2", Znver2Model, ProcessorFeatures.ZN2Features>; 1209 1210def : Proc<"geode", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, 1211 Feature3DNowA, FeatureInsertVZEROUPPER]>; 1212 1213def : Proc<"winchip-c6", [FeatureX87, FeatureSlowUAMem16, FeatureMMX, 1214 FeatureInsertVZEROUPPER]>; 1215def : Proc<"winchip2", [FeatureX87, FeatureSlowUAMem16, Feature3DNow, 1216 FeatureInsertVZEROUPPER]>; 1217def : Proc<"c3", [FeatureX87, FeatureSlowUAMem16, Feature3DNow, 1218 FeatureInsertVZEROUPPER]>; 1219def : Proc<"c3-2", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, 1220 FeatureMMX, FeatureSSE1, FeatureFXSR, 1221 FeatureCMOV, FeatureInsertVZEROUPPER]>; 1222 1223// We also provide a generic 64-bit specific x86 processor model which tries to 1224// be good for modern chips without enabling instruction set encodings past the 1225// basic SSE2 and 64-bit ones. It disables slow things from any mainstream and 1226// modern 64-bit x86 chip, and enables features that are generally beneficial. 1227// 1228// We currently use the Sandy Bridge model as the default scheduling model as 1229// we use it across Nehalem, Westmere, Sandy Bridge, and Ivy Bridge which 1230// covers a huge swath of x86 processors. If there are specific scheduling 1231// knobs which need to be tuned differently for AMD chips, we might consider 1232// forming a common base for them. 1233def : ProcessorModel<"x86-64", SandyBridgeModel, [ 1234 FeatureX87, 1235 FeatureCMPXCHG8B, 1236 FeatureCMOV, 1237 FeatureMMX, 1238 FeatureSSE2, 1239 FeatureFXSR, 1240 FeatureNOPL, 1241 Feature64Bit, 1242 FeatureSlow3OpsLEA, 1243 FeatureSlowIncDec, 1244 FeatureMacroFusion, 1245 FeatureInsertVZEROUPPER 1246]>; 1247 1248//===----------------------------------------------------------------------===// 1249// Calling Conventions 1250//===----------------------------------------------------------------------===// 1251 1252include "X86CallingConv.td" 1253 1254 1255//===----------------------------------------------------------------------===// 1256// Assembly Parser 1257//===----------------------------------------------------------------------===// 1258 1259def ATTAsmParserVariant : AsmParserVariant { 1260 int Variant = 0; 1261 1262 // Variant name. 1263 string Name = "att"; 1264 1265 // Discard comments in assembly strings. 1266 string CommentDelimiter = "#"; 1267 1268 // Recognize hard coded registers. 1269 string RegisterPrefix = "%"; 1270} 1271 1272def IntelAsmParserVariant : AsmParserVariant { 1273 int Variant = 1; 1274 1275 // Variant name. 1276 string Name = "intel"; 1277 1278 // Discard comments in assembly strings. 1279 string CommentDelimiter = ";"; 1280 1281 // Recognize hard coded registers. 1282 string RegisterPrefix = ""; 1283} 1284 1285//===----------------------------------------------------------------------===// 1286// Assembly Printers 1287//===----------------------------------------------------------------------===// 1288 1289// The X86 target supports two different syntaxes for emitting machine code. 1290// This is controlled by the -x86-asm-syntax={att|intel} 1291def ATTAsmWriter : AsmWriter { 1292 string AsmWriterClassName = "ATTInstPrinter"; 1293 int Variant = 0; 1294} 1295def IntelAsmWriter : AsmWriter { 1296 string AsmWriterClassName = "IntelInstPrinter"; 1297 int Variant = 1; 1298} 1299 1300def X86 : Target { 1301 // Information about the instructions... 1302 let InstructionSet = X86InstrInfo; 1303 let AssemblyParserVariants = [ATTAsmParserVariant, IntelAsmParserVariant]; 1304 let AssemblyWriters = [ATTAsmWriter, IntelAsmWriter]; 1305 let AllowRegisterRenaming = 1; 1306} 1307 1308//===----------------------------------------------------------------------===// 1309// Pfm Counters 1310//===----------------------------------------------------------------------===// 1311 1312include "X86PfmCounters.td" 1313