1//===-- X86.td - Target definition file for the Intel X86 --*- tablegen -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This is a target description file for the Intel i386 architecture, referred 10// to here as the "X86" architecture. 11// 12//===----------------------------------------------------------------------===// 13 14// Get the target-independent interfaces which we are implementing... 15// 16include "llvm/Target/Target.td" 17 18//===----------------------------------------------------------------------===// 19// X86 Subtarget state 20// 21 22def Mode64Bit : SubtargetFeature<"64bit-mode", "In64BitMode", "true", 23 "64-bit mode (x86_64)">; 24def Mode32Bit : SubtargetFeature<"32bit-mode", "In32BitMode", "true", 25 "32-bit mode (80386)">; 26def Mode16Bit : SubtargetFeature<"16bit-mode", "In16BitMode", "true", 27 "16-bit mode (i8086)">; 28 29//===----------------------------------------------------------------------===// 30// X86 Subtarget features 31//===----------------------------------------------------------------------===// 32 33def FeatureX87 : SubtargetFeature<"x87","HasX87", "true", 34 "Enable X87 float instructions">; 35 36def FeatureNOPL : SubtargetFeature<"nopl", "HasNOPL", "true", 37 "Enable NOPL instruction">; 38 39def FeatureCMOV : SubtargetFeature<"cmov","HasCMov", "true", 40 "Enable conditional move instructions">; 41 42def FeatureCMPXCHG8B : SubtargetFeature<"cx8", "HasCmpxchg8b", "true", 43 "Support CMPXCHG8B instructions">; 44 45def FeaturePOPCNT : SubtargetFeature<"popcnt", "HasPOPCNT", "true", 46 "Support POPCNT instruction">; 47 48def FeatureFXSR : SubtargetFeature<"fxsr", "HasFXSR", "true", 49 "Support fxsave/fxrestore instructions">; 50 51def FeatureXSAVE : SubtargetFeature<"xsave", "HasXSAVE", "true", 52 "Support xsave instructions">; 53 54def FeatureXSAVEOPT: SubtargetFeature<"xsaveopt", "HasXSAVEOPT", "true", 55 "Support xsaveopt instructions", 56 [FeatureXSAVE]>; 57 58def FeatureXSAVEC : SubtargetFeature<"xsavec", "HasXSAVEC", "true", 59 "Support xsavec instructions", 60 [FeatureXSAVE]>; 61 62def FeatureXSAVES : SubtargetFeature<"xsaves", "HasXSAVES", "true", 63 "Support xsaves instructions", 64 [FeatureXSAVE]>; 65 66def FeatureSSE1 : SubtargetFeature<"sse", "X86SSELevel", "SSE1", 67 "Enable SSE instructions">; 68def FeatureSSE2 : SubtargetFeature<"sse2", "X86SSELevel", "SSE2", 69 "Enable SSE2 instructions", 70 [FeatureSSE1]>; 71def FeatureSSE3 : SubtargetFeature<"sse3", "X86SSELevel", "SSE3", 72 "Enable SSE3 instructions", 73 [FeatureSSE2]>; 74def FeatureSSSE3 : SubtargetFeature<"ssse3", "X86SSELevel", "SSSE3", 75 "Enable SSSE3 instructions", 76 [FeatureSSE3]>; 77def FeatureSSE41 : SubtargetFeature<"sse4.1", "X86SSELevel", "SSE41", 78 "Enable SSE 4.1 instructions", 79 [FeatureSSSE3]>; 80def FeatureSSE42 : SubtargetFeature<"sse4.2", "X86SSELevel", "SSE42", 81 "Enable SSE 4.2 instructions", 82 [FeatureSSE41]>; 83// The MMX subtarget feature is separate from the rest of the SSE features 84// because it's important (for odd compatibility reasons) to be able to 85// turn it off explicitly while allowing SSE+ to be on. 86def FeatureMMX : SubtargetFeature<"mmx","X863DNowLevel", "MMX", 87 "Enable MMX instructions">; 88def Feature3DNow : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow", 89 "Enable 3DNow! instructions", 90 [FeatureMMX]>; 91def Feature3DNowA : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA", 92 "Enable 3DNow! Athlon instructions", 93 [Feature3DNow]>; 94// All x86-64 hardware has SSE2, but we don't mark SSE2 as an implied 95// feature, because SSE2 can be disabled (e.g. for compiling OS kernels) 96// without disabling 64-bit mode. Nothing should imply this feature bit. It 97// is used to enforce that only 64-bit capable CPUs are used in 64-bit mode. 98def Feature64Bit : SubtargetFeature<"64bit", "HasX86_64", "true", 99 "Support 64-bit instructions">; 100def FeatureCMPXCHG16B : SubtargetFeature<"cx16", "HasCmpxchg16b", "true", 101 "64-bit with cmpxchg16b", 102 [FeatureCMPXCHG8B]>; 103def FeatureSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true", 104 "SHLD instruction is slow">; 105def FeatureSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true", 106 "PMULLD instruction is slow">; 107def FeatureSlowPMADDWD : SubtargetFeature<"slow-pmaddwd", "IsPMADDWDSlow", 108 "true", 109 "PMADDWD is slower than PMULLD">; 110// FIXME: This should not apply to CPUs that do not have SSE. 111def FeatureSlowUAMem16 : SubtargetFeature<"slow-unaligned-mem-16", 112 "IsUAMem16Slow", "true", 113 "Slow unaligned 16-byte memory access">; 114def FeatureSlowUAMem32 : SubtargetFeature<"slow-unaligned-mem-32", 115 "IsUAMem32Slow", "true", 116 "Slow unaligned 32-byte memory access">; 117def FeatureSSE4A : SubtargetFeature<"sse4a", "HasSSE4A", "true", 118 "Support SSE 4a instructions", 119 [FeatureSSE3]>; 120 121def FeatureAVX : SubtargetFeature<"avx", "X86SSELevel", "AVX", 122 "Enable AVX instructions", 123 [FeatureSSE42]>; 124def FeatureAVX2 : SubtargetFeature<"avx2", "X86SSELevel", "AVX2", 125 "Enable AVX2 instructions", 126 [FeatureAVX]>; 127def FeatureFMA : SubtargetFeature<"fma", "HasFMA", "true", 128 "Enable three-operand fused multiple-add", 129 [FeatureAVX]>; 130def FeatureF16C : SubtargetFeature<"f16c", "HasF16C", "true", 131 "Support 16-bit floating point conversion instructions", 132 [FeatureAVX]>; 133def FeatureAVX512 : SubtargetFeature<"avx512f", "X86SSELevel", "AVX512F", 134 "Enable AVX-512 instructions", 135 [FeatureAVX2, FeatureFMA, FeatureF16C]>; 136def FeatureERI : SubtargetFeature<"avx512er", "HasERI", "true", 137 "Enable AVX-512 Exponential and Reciprocal Instructions", 138 [FeatureAVX512]>; 139def FeatureCDI : SubtargetFeature<"avx512cd", "HasCDI", "true", 140 "Enable AVX-512 Conflict Detection Instructions", 141 [FeatureAVX512]>; 142def FeatureVPOPCNTDQ : SubtargetFeature<"avx512vpopcntdq", "HasVPOPCNTDQ", 143 "true", "Enable AVX-512 Population Count Instructions", 144 [FeatureAVX512]>; 145def FeaturePFI : SubtargetFeature<"avx512pf", "HasPFI", "true", 146 "Enable AVX-512 PreFetch Instructions", 147 [FeatureAVX512]>; 148def FeaturePREFETCHWT1 : SubtargetFeature<"prefetchwt1", "HasPREFETCHWT1", 149 "true", 150 "Prefetch with Intent to Write and T1 Hint">; 151def FeatureDQI : SubtargetFeature<"avx512dq", "HasDQI", "true", 152 "Enable AVX-512 Doubleword and Quadword Instructions", 153 [FeatureAVX512]>; 154def FeatureBWI : SubtargetFeature<"avx512bw", "HasBWI", "true", 155 "Enable AVX-512 Byte and Word Instructions", 156 [FeatureAVX512]>; 157def FeatureVLX : SubtargetFeature<"avx512vl", "HasVLX", "true", 158 "Enable AVX-512 Vector Length eXtensions", 159 [FeatureAVX512]>; 160def FeatureVBMI : SubtargetFeature<"avx512vbmi", "HasVBMI", "true", 161 "Enable AVX-512 Vector Byte Manipulation Instructions", 162 [FeatureBWI]>; 163def FeatureVBMI2 : SubtargetFeature<"avx512vbmi2", "HasVBMI2", "true", 164 "Enable AVX-512 further Vector Byte Manipulation Instructions", 165 [FeatureBWI]>; 166def FeatureIFMA : SubtargetFeature<"avx512ifma", "HasIFMA", "true", 167 "Enable AVX-512 Integer Fused Multiple-Add", 168 [FeatureAVX512]>; 169def FeaturePKU : SubtargetFeature<"pku", "HasPKU", "true", 170 "Enable protection keys">; 171def FeatureVNNI : SubtargetFeature<"avx512vnni", "HasVNNI", "true", 172 "Enable AVX-512 Vector Neural Network Instructions", 173 [FeatureAVX512]>; 174def FeatureBF16 : SubtargetFeature<"avx512bf16", "HasBF16", "true", 175 "Support bfloat16 floating point", 176 [FeatureBWI]>; 177def FeatureBITALG : SubtargetFeature<"avx512bitalg", "HasBITALG", "true", 178 "Enable AVX-512 Bit Algorithms", 179 [FeatureBWI]>; 180def FeatureVP2INTERSECT : SubtargetFeature<"avx512vp2intersect", 181 "HasVP2INTERSECT", "true", 182 "Enable AVX-512 vp2intersect", 183 [FeatureAVX512]>; 184def FeaturePCLMUL : SubtargetFeature<"pclmul", "HasPCLMUL", "true", 185 "Enable packed carry-less multiplication instructions", 186 [FeatureSSE2]>; 187def FeatureGFNI : SubtargetFeature<"gfni", "HasGFNI", "true", 188 "Enable Galois Field Arithmetic Instructions", 189 [FeatureSSE2]>; 190def FeatureVPCLMULQDQ : SubtargetFeature<"vpclmulqdq", "HasVPCLMULQDQ", "true", 191 "Enable vpclmulqdq instructions", 192 [FeatureAVX, FeaturePCLMUL]>; 193def FeatureFMA4 : SubtargetFeature<"fma4", "HasFMA4", "true", 194 "Enable four-operand fused multiple-add", 195 [FeatureAVX, FeatureSSE4A]>; 196def FeatureXOP : SubtargetFeature<"xop", "HasXOP", "true", 197 "Enable XOP instructions", 198 [FeatureFMA4]>; 199def FeatureSSEUnalignedMem : SubtargetFeature<"sse-unaligned-mem", 200 "HasSSEUnalignedMem", "true", 201 "Allow unaligned memory operands with SSE instructions">; 202def FeatureAES : SubtargetFeature<"aes", "HasAES", "true", 203 "Enable AES instructions", 204 [FeatureSSE2]>; 205def FeatureVAES : SubtargetFeature<"vaes", "HasVAES", "true", 206 "Promote selected AES instructions to AVX512/AVX registers", 207 [FeatureAVX, FeatureAES]>; 208def FeatureTBM : SubtargetFeature<"tbm", "HasTBM", "true", 209 "Enable TBM instructions">; 210def FeatureLWP : SubtargetFeature<"lwp", "HasLWP", "true", 211 "Enable LWP instructions">; 212def FeatureMOVBE : SubtargetFeature<"movbe", "HasMOVBE", "true", 213 "Support MOVBE instruction">; 214def FeatureRDRAND : SubtargetFeature<"rdrnd", "HasRDRAND", "true", 215 "Support RDRAND instruction">; 216def FeatureFSGSBase : SubtargetFeature<"fsgsbase", "HasFSGSBase", "true", 217 "Support FS/GS Base instructions">; 218def FeatureLZCNT : SubtargetFeature<"lzcnt", "HasLZCNT", "true", 219 "Support LZCNT instruction">; 220def FeatureBMI : SubtargetFeature<"bmi", "HasBMI", "true", 221 "Support BMI instructions">; 222def FeatureBMI2 : SubtargetFeature<"bmi2", "HasBMI2", "true", 223 "Support BMI2 instructions">; 224def FeatureRTM : SubtargetFeature<"rtm", "HasRTM", "true", 225 "Support RTM instructions">; 226def FeatureADX : SubtargetFeature<"adx", "HasADX", "true", 227 "Support ADX instructions">; 228def FeatureSHA : SubtargetFeature<"sha", "HasSHA", "true", 229 "Enable SHA instructions", 230 [FeatureSSE2]>; 231def FeatureSHSTK : SubtargetFeature<"shstk", "HasSHSTK", "true", 232 "Support CET Shadow-Stack instructions">; 233def FeaturePRFCHW : SubtargetFeature<"prfchw", "HasPRFCHW", "true", 234 "Support PRFCHW instructions">; 235def FeatureRDSEED : SubtargetFeature<"rdseed", "HasRDSEED", "true", 236 "Support RDSEED instruction">; 237def FeatureLAHFSAHF : SubtargetFeature<"sahf", "HasLAHFSAHF", "true", 238 "Support LAHF and SAHF instructions">; 239def FeatureMWAITX : SubtargetFeature<"mwaitx", "HasMWAITX", "true", 240 "Enable MONITORX/MWAITX timer functionality">; 241def FeatureCLZERO : SubtargetFeature<"clzero", "HasCLZERO", "true", 242 "Enable Cache Line Zero">; 243def FeatureCLDEMOTE : SubtargetFeature<"cldemote", "HasCLDEMOTE", "true", 244 "Enable Cache Demote">; 245def FeaturePTWRITE : SubtargetFeature<"ptwrite", "HasPTWRITE", "true", 246 "Support ptwrite instruction">; 247// FIXME: This feature is deprecated in 10.0 and should not be used for 248// anything, but removing it would break IR files that may contain it in a 249// target-feature attribute. 250def FeatureDeprecatedMPX : SubtargetFeature<"mpx", "DeprecatedHasMPX", "false", 251 "Deprecated. Support MPX instructions">; 252def FeatureAMXTILE : SubtargetFeature<"amx-tile", "HasAMXTILE", "true", 253 "Support AMX-TILE instructions">; 254def FeatureAMXINT8 : SubtargetFeature<"amx-int8", "HasAMXINT8", "true", 255 "Support AMX-INT8 instructions", 256 [FeatureAMXTILE]>; 257def FeatureAMXBF16 : SubtargetFeature<"amx-bf16", "HasAMXBF16", "true", 258 "Support AMX-BF16 instructions", 259 [FeatureAMXTILE]>; 260def FeatureLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true", 261 "Use LEA for adjusting the stack pointer">; 262def FeatureSlowDivide32 : SubtargetFeature<"idivl-to-divb", 263 "HasSlowDivide32", "true", 264 "Use 8-bit divide for positive values less than 256">; 265def FeatureSlowDivide64 : SubtargetFeature<"idivq-to-divl", 266 "HasSlowDivide64", "true", 267 "Use 32-bit divide for positive values less than 2^32">; 268def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions", 269 "PadShortFunctions", "true", 270 "Pad short functions">; 271def FeatureINVPCID : SubtargetFeature<"invpcid", "HasINVPCID", "true", 272 "Invalidate Process-Context Identifier">; 273def FeatureSGX : SubtargetFeature<"sgx", "HasSGX", "true", 274 "Enable Software Guard Extensions">; 275def FeatureCLFLUSHOPT : SubtargetFeature<"clflushopt", "HasCLFLUSHOPT", "true", 276 "Flush A Cache Line Optimized">; 277def FeatureCLWB : SubtargetFeature<"clwb", "HasCLWB", "true", 278 "Cache Line Write Back">; 279def FeatureWBNOINVD : SubtargetFeature<"wbnoinvd", "HasWBNOINVD", "true", 280 "Write Back No Invalidate">; 281def FeatureRDPID : SubtargetFeature<"rdpid", "HasRDPID", "true", 282 "Support RDPID instructions">; 283def FeatureWAITPKG : SubtargetFeature<"waitpkg", "HasWAITPKG", "true", 284 "Wait and pause enhancements">; 285def FeatureENQCMD : SubtargetFeature<"enqcmd", "HasENQCMD", "true", 286 "Has ENQCMD instructions">; 287def FeatureSERIALIZE : SubtargetFeature<"serialize", "HasSERIALIZE", "true", 288 "Has serialize instruction">; 289def FeatureTSXLDTRK : SubtargetFeature<"tsxldtrk", "HasTSXLDTRK", "true", 290 "Support TSXLDTRK instructions">; 291// On some processors, instructions that implicitly take two memory operands are 292// slow. In practice, this means that CALL, PUSH, and POP with memory operands 293// should be avoided in favor of a MOV + register CALL/PUSH/POP. 294def FeatureSlowTwoMemOps : SubtargetFeature<"slow-two-mem-ops", 295 "SlowTwoMemOps", "true", 296 "Two memory operand instructions are slow">; 297def FeatureLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LEAUsesAG", "true", 298 "LEA instruction needs inputs at AG stage">; 299def FeatureSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true", 300 "LEA instruction with certain arguments is slow">; 301def FeatureSlow3OpsLEA : SubtargetFeature<"slow-3ops-lea", "Slow3OpsLEA", "true", 302 "LEA instruction with 3 ops or certain registers is slow">; 303def FeatureSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true", 304 "INC and DEC instructions are slower than ADD and SUB">; 305def FeatureSoftFloat 306 : SubtargetFeature<"soft-float", "UseSoftFloat", "true", 307 "Use software floating point features">; 308def FeaturePOPCNTFalseDeps : SubtargetFeature<"false-deps-popcnt", 309 "HasPOPCNTFalseDeps", "true", 310 "POPCNT has a false dependency on dest register">; 311def FeatureLZCNTFalseDeps : SubtargetFeature<"false-deps-lzcnt-tzcnt", 312 "HasLZCNTFalseDeps", "true", 313 "LZCNT/TZCNT have a false dependency on dest register">; 314def FeaturePCONFIG : SubtargetFeature<"pconfig", "HasPCONFIG", "true", 315 "platform configuration instruction">; 316// On recent X86 (port bound) processors, its preferable to combine to a single shuffle 317// using a variable mask over multiple fixed shuffles. 318def FeatureFastVariableShuffle 319 : SubtargetFeature<"fast-variable-shuffle", 320 "HasFastVariableShuffle", 321 "true", "Shuffles with variable masks are fast">; 322// On some X86 processors, a vzeroupper instruction should be inserted after 323// using ymm/zmm registers before executing code that may use SSE instructions. 324def FeatureInsertVZEROUPPER 325 : SubtargetFeature<"vzeroupper", 326 "InsertVZEROUPPER", 327 "true", "Should insert vzeroupper instructions">; 328// FeatureFastScalarFSQRT should be enabled if scalar FSQRT has shorter latency 329// than the corresponding NR code. FeatureFastVectorFSQRT should be enabled if 330// vector FSQRT has higher throughput than the corresponding NR code. 331// The idea is that throughput bound code is likely to be vectorized, so for 332// vectorized code we should care about the throughput of SQRT operations. 333// But if the code is scalar that probably means that the code has some kind of 334// dependency and we should care more about reducing the latency. 335def FeatureFastScalarFSQRT 336 : SubtargetFeature<"fast-scalar-fsqrt", "HasFastScalarFSQRT", 337 "true", "Scalar SQRT is fast (disable Newton-Raphson)">; 338def FeatureFastVectorFSQRT 339 : SubtargetFeature<"fast-vector-fsqrt", "HasFastVectorFSQRT", 340 "true", "Vector SQRT is fast (disable Newton-Raphson)">; 341// If lzcnt has equivalent latency/throughput to most simple integer ops, it can 342// be used to replace test/set sequences. 343def FeatureFastLZCNT 344 : SubtargetFeature< 345 "fast-lzcnt", "HasFastLZCNT", "true", 346 "LZCNT instructions are as fast as most simple integer ops">; 347// If the target can efficiently decode NOPs upto 7-bytes in length. 348def FeatureFast7ByteNOP 349 : SubtargetFeature< 350 "fast-7bytenop", "HasFast7ByteNOP", "true", 351 "Target can quickly decode up to 7 byte NOPs">; 352// If the target can efficiently decode NOPs upto 11-bytes in length. 353def FeatureFast11ByteNOP 354 : SubtargetFeature< 355 "fast-11bytenop", "HasFast11ByteNOP", "true", 356 "Target can quickly decode up to 11 byte NOPs">; 357// If the target can efficiently decode NOPs upto 15-bytes in length. 358def FeatureFast15ByteNOP 359 : SubtargetFeature< 360 "fast-15bytenop", "HasFast15ByteNOP", "true", 361 "Target can quickly decode up to 15 byte NOPs">; 362// Sandy Bridge and newer processors can use SHLD with the same source on both 363// inputs to implement rotate to avoid the partial flag update of the normal 364// rotate instructions. 365def FeatureFastSHLDRotate 366 : SubtargetFeature< 367 "fast-shld-rotate", "HasFastSHLDRotate", "true", 368 "SHLD can be used as a faster rotate">; 369 370// Ivy Bridge and newer processors have enhanced REP MOVSB and STOSB (aka 371// "string operations"). See "REP String Enhancement" in the Intel Software 372// Development Manual. This feature essentially means that REP MOVSB will copy 373// using the largest available size instead of copying bytes one by one, making 374// it at least as fast as REPMOVS{W,D,Q}. 375def FeatureERMSB 376 : SubtargetFeature< 377 "ermsb", "HasERMSB", "true", 378 "REP MOVS/STOS are fast">; 379 380// Bulldozer and newer processors can merge CMP/TEST (but not other 381// instructions) with conditional branches. 382def FeatureBranchFusion 383 : SubtargetFeature<"branchfusion", "HasBranchFusion", "true", 384 "CMP/TEST can be fused with conditional branches">; 385 386// Sandy Bridge and newer processors have many instructions that can be 387// fused with conditional branches and pass through the CPU as a single 388// operation. 389def FeatureMacroFusion 390 : SubtargetFeature<"macrofusion", "HasMacroFusion", "true", 391 "Various instructions can be fused with conditional branches">; 392 393// Gather is available since Haswell (AVX2 set). So technically, we can 394// generate Gathers on all AVX2 processors. But the overhead on HSW is high. 395// Skylake Client processor has faster Gathers than HSW and performance is 396// similar to Skylake Server (AVX-512). 397def FeatureHasFastGather 398 : SubtargetFeature<"fast-gather", "HasFastGather", "true", 399 "Indicates if gather is reasonably fast">; 400 401def FeaturePrefer128Bit 402 : SubtargetFeature<"prefer-128-bit", "Prefer128Bit", "true", 403 "Prefer 128-bit AVX instructions">; 404 405def FeaturePrefer256Bit 406 : SubtargetFeature<"prefer-256-bit", "Prefer256Bit", "true", 407 "Prefer 256-bit AVX instructions">; 408 409def FeaturePreferMaskRegisters 410 : SubtargetFeature<"prefer-mask-registers", "PreferMaskRegisters", "true", 411 "Prefer AVX512 mask registers over PTEST/MOVMSK">; 412 413// Lower indirect calls using a special construct called a `retpoline` to 414// mitigate potential Spectre v2 attacks against them. 415def FeatureRetpolineIndirectCalls 416 : SubtargetFeature< 417 "retpoline-indirect-calls", "UseRetpolineIndirectCalls", "true", 418 "Remove speculation of indirect calls from the generated code">; 419 420// Lower indirect branches and switches either using conditional branch trees 421// or using a special construct called a `retpoline` to mitigate potential 422// Spectre v2 attacks against them. 423def FeatureRetpolineIndirectBranches 424 : SubtargetFeature< 425 "retpoline-indirect-branches", "UseRetpolineIndirectBranches", "true", 426 "Remove speculation of indirect branches from the generated code">; 427 428// Deprecated umbrella feature for enabling both `retpoline-indirect-calls` and 429// `retpoline-indirect-branches` above. 430def FeatureRetpoline 431 : SubtargetFeature<"retpoline", "DeprecatedUseRetpoline", "true", 432 "Remove speculation of indirect branches from the " 433 "generated code, either by avoiding them entirely or " 434 "lowering them with a speculation blocking construct", 435 [FeatureRetpolineIndirectCalls, 436 FeatureRetpolineIndirectBranches]>; 437 438// Rely on external thunks for the emitted retpoline calls. This allows users 439// to provide their own custom thunk definitions in highly specialized 440// environments such as a kernel that does boot-time hot patching. 441def FeatureRetpolineExternalThunk 442 : SubtargetFeature< 443 "retpoline-external-thunk", "UseRetpolineExternalThunk", "true", 444 "When lowering an indirect call or branch using a `retpoline`, rely " 445 "on the specified user provided thunk rather than emitting one " 446 "ourselves. Only has effect when combined with some other retpoline " 447 "feature", [FeatureRetpolineIndirectCalls]>; 448 449// Mitigate LVI attacks against indirect calls/branches and call returns 450def FeatureLVIControlFlowIntegrity 451 : SubtargetFeature< 452 "lvi-cfi", "UseLVIControlFlowIntegrity", "true", 453 "Prevent indirect calls/branches from using a memory operand, and " 454 "precede all indirect calls/branches from a register with an " 455 "LFENCE instruction to serialize control flow. Also decompose RET " 456 "instructions into a POP+LFENCE+JMP sequence.">; 457 458// Enable SESES to mitigate speculative execution attacks 459def FeatureSpeculativeExecutionSideEffectSuppression 460 : SubtargetFeature< 461 "seses", "UseSpeculativeExecutionSideEffectSuppression", "true", 462 "Prevent speculative execution side channel timing attacks by " 463 "inserting a speculation barrier before memory reads, memory writes, " 464 "and conditional branches. Implies LVI Control Flow integrity.", 465 [FeatureLVIControlFlowIntegrity]>; 466 467// Mitigate LVI attacks against data loads 468def FeatureLVILoadHardening 469 : SubtargetFeature< 470 "lvi-load-hardening", "UseLVILoadHardening", "true", 471 "Insert LFENCE instructions to prevent data speculatively injected " 472 "into loads from being used maliciously.">; 473 474// Direct Move instructions. 475def FeatureMOVDIRI : SubtargetFeature<"movdiri", "HasMOVDIRI", "true", 476 "Support movdiri instruction">; 477def FeatureMOVDIR64B : SubtargetFeature<"movdir64b", "HasMOVDIR64B", "true", 478 "Support movdir64b instruction">; 479 480def FeatureFastBEXTR : SubtargetFeature<"fast-bextr", "HasFastBEXTR", "true", 481 "Indicates that the BEXTR instruction is implemented as a single uop " 482 "with good throughput">; 483 484// Combine vector math operations with shuffles into horizontal math 485// instructions if a CPU implements horizontal operations (introduced with 486// SSE3) with better latency/throughput than the alternative sequence. 487def FeatureFastHorizontalOps 488 : SubtargetFeature< 489 "fast-hops", "HasFastHorizontalOps", "true", 490 "Prefer horizontal vector math instructions (haddp, phsub, etc.) over " 491 "normal vector instructions with shuffles">; 492 493def FeatureFastScalarShiftMasks 494 : SubtargetFeature< 495 "fast-scalar-shift-masks", "HasFastScalarShiftMasks", "true", 496 "Prefer a left/right scalar logical shift pair over a shift+and pair">; 497 498def FeatureFastVectorShiftMasks 499 : SubtargetFeature< 500 "fast-vector-shift-masks", "HasFastVectorShiftMasks", "true", 501 "Prefer a left/right vector logical shift pair over a shift+and pair">; 502 503def FeatureUseGLMDivSqrtCosts 504 : SubtargetFeature<"use-glm-div-sqrt-costs", "UseGLMDivSqrtCosts", "true", 505 "Use Goldmont specific floating point div/sqrt costs">; 506 507// Merge branches using three-way conditional code. 508def FeatureMergeToThreeWayBranch : SubtargetFeature<"merge-to-threeway-branch", 509 "ThreewayBranchProfitable", "true", 510 "Merge branches to a three-way " 511 "conditional branch">; 512 513// Enable use of alias analysis during code generation. 514def FeatureUseAA : SubtargetFeature<"use-aa", "UseAA", "true", 515 "Use alias analysis during codegen">; 516 517// Bonnell 518def ProcIntelAtom : SubtargetFeature<"", "X86ProcFamily", "IntelAtom", "">; 519// Silvermont 520def ProcIntelSLM : SubtargetFeature<"", "X86ProcFamily", "IntelSLM", "">; 521 522//===----------------------------------------------------------------------===// 523// Register File Description 524//===----------------------------------------------------------------------===// 525 526include "X86RegisterInfo.td" 527include "X86RegisterBanks.td" 528 529//===----------------------------------------------------------------------===// 530// Instruction Descriptions 531//===----------------------------------------------------------------------===// 532 533include "X86Schedule.td" 534include "X86InstrInfo.td" 535include "X86SchedPredicates.td" 536 537def X86InstrInfo : InstrInfo; 538 539//===----------------------------------------------------------------------===// 540// X86 Scheduler Models 541//===----------------------------------------------------------------------===// 542 543include "X86ScheduleAtom.td" 544include "X86SchedSandyBridge.td" 545include "X86SchedHaswell.td" 546include "X86SchedBroadwell.td" 547include "X86ScheduleSLM.td" 548include "X86ScheduleZnver1.td" 549include "X86ScheduleZnver2.td" 550include "X86ScheduleBdVer2.td" 551include "X86ScheduleBtVer2.td" 552include "X86SchedSkylakeClient.td" 553include "X86SchedSkylakeServer.td" 554 555//===----------------------------------------------------------------------===// 556// X86 Processor Feature Lists 557//===----------------------------------------------------------------------===// 558 559def ProcessorFeatures { 560 // Nehalem 561 list<SubtargetFeature> NHMInheritableFeatures = [FeatureX87, 562 FeatureCMPXCHG8B, 563 FeatureCMOV, 564 FeatureMMX, 565 FeatureSSE42, 566 FeatureFXSR, 567 FeatureNOPL, 568 Feature64Bit, 569 FeatureCMPXCHG16B, 570 FeaturePOPCNT, 571 FeatureLAHFSAHF, 572 FeatureMacroFusion, 573 FeatureInsertVZEROUPPER]; 574 list<SubtargetFeature> NHMSpecificFeatures = []; 575 list<SubtargetFeature> NHMFeatures = 576 !listconcat(NHMInheritableFeatures, NHMSpecificFeatures); 577 578 // Westmere 579 list<SubtargetFeature> WSMAdditionalFeatures = [FeaturePCLMUL]; 580 list<SubtargetFeature> WSMSpecificFeatures = []; 581 list<SubtargetFeature> WSMInheritableFeatures = 582 !listconcat(NHMInheritableFeatures, WSMAdditionalFeatures); 583 list<SubtargetFeature> WSMFeatures = 584 !listconcat(WSMInheritableFeatures, WSMSpecificFeatures); 585 586 // Sandybridge 587 list<SubtargetFeature> SNBAdditionalFeatures = [FeatureAVX, 588 FeatureSlowDivide64, 589 FeatureXSAVE, 590 FeatureXSAVEOPT, 591 FeatureSlow3OpsLEA, 592 FeatureFastScalarFSQRT, 593 FeatureFastSHLDRotate, 594 FeatureMergeToThreeWayBranch, 595 FeatureFast15ByteNOP]; 596 list<SubtargetFeature> SNBSpecificFeatures = [FeatureSlowUAMem32, 597 FeaturePOPCNTFalseDeps]; 598 list<SubtargetFeature> SNBInheritableFeatures = 599 !listconcat(WSMInheritableFeatures, SNBAdditionalFeatures); 600 list<SubtargetFeature> SNBFeatures = 601 !listconcat(SNBInheritableFeatures, SNBSpecificFeatures); 602 603 // Ivybridge 604 list<SubtargetFeature> IVBAdditionalFeatures = [FeatureRDRAND, 605 FeatureF16C, 606 FeatureFSGSBase]; 607 list<SubtargetFeature> IVBSpecificFeatures = [FeatureSlowUAMem32, 608 FeaturePOPCNTFalseDeps]; 609 list<SubtargetFeature> IVBInheritableFeatures = 610 !listconcat(SNBInheritableFeatures, IVBAdditionalFeatures); 611 list<SubtargetFeature> IVBFeatures = 612 !listconcat(IVBInheritableFeatures, IVBSpecificFeatures); 613 614 // Haswell 615 list<SubtargetFeature> HSWAdditionalFeatures = [FeatureAVX2, 616 FeatureBMI, 617 FeatureBMI2, 618 FeatureERMSB, 619 FeatureFMA, 620 FeatureINVPCID, 621 FeatureLZCNT, 622 FeatureMOVBE, 623 FeatureFastVariableShuffle]; 624 list<SubtargetFeature> HSWSpecificFeatures = [FeaturePOPCNTFalseDeps, 625 FeatureLZCNTFalseDeps]; 626 list<SubtargetFeature> HSWInheritableFeatures = 627 !listconcat(IVBInheritableFeatures, HSWAdditionalFeatures); 628 list<SubtargetFeature> HSWFeatures = 629 !listconcat(HSWInheritableFeatures, HSWSpecificFeatures); 630 631 // Broadwell 632 list<SubtargetFeature> BDWAdditionalFeatures = [FeatureADX, 633 FeatureRDSEED, 634 FeaturePRFCHW]; 635 list<SubtargetFeature> BDWSpecificFeatures = [FeaturePOPCNTFalseDeps, 636 FeatureLZCNTFalseDeps]; 637 list<SubtargetFeature> BDWInheritableFeatures = 638 !listconcat(HSWInheritableFeatures, BDWAdditionalFeatures); 639 list<SubtargetFeature> BDWFeatures = 640 !listconcat(BDWInheritableFeatures, BDWSpecificFeatures); 641 642 // Skylake 643 list<SubtargetFeature> SKLAdditionalFeatures = [FeatureAES, 644 FeatureXSAVEC, 645 FeatureXSAVES, 646 FeatureCLFLUSHOPT, 647 FeatureFastVectorFSQRT]; 648 list<SubtargetFeature> SKLSpecificFeatures = [FeatureHasFastGather, 649 FeaturePOPCNTFalseDeps, 650 FeatureSGX]; 651 list<SubtargetFeature> SKLInheritableFeatures = 652 !listconcat(BDWInheritableFeatures, SKLAdditionalFeatures); 653 list<SubtargetFeature> SKLFeatures = 654 !listconcat(SKLInheritableFeatures, SKLSpecificFeatures); 655 656 // Skylake-AVX512 657 list<SubtargetFeature> SKXAdditionalFeatures = [FeatureAVX512, 658 FeaturePrefer256Bit, 659 FeatureCDI, 660 FeatureDQI, 661 FeatureBWI, 662 FeatureVLX, 663 FeaturePKU, 664 FeatureCLWB]; 665 list<SubtargetFeature> SKXSpecificFeatures = [FeatureHasFastGather, 666 FeaturePOPCNTFalseDeps]; 667 list<SubtargetFeature> SKXInheritableFeatures = 668 !listconcat(SKLInheritableFeatures, SKXAdditionalFeatures); 669 list<SubtargetFeature> SKXFeatures = 670 !listconcat(SKXInheritableFeatures, SKXSpecificFeatures); 671 672 // Cascadelake 673 list<SubtargetFeature> CLXAdditionalFeatures = [FeatureVNNI]; 674 list<SubtargetFeature> CLXSpecificFeatures = [FeatureHasFastGather, 675 FeaturePOPCNTFalseDeps]; 676 list<SubtargetFeature> CLXInheritableFeatures = 677 !listconcat(SKXInheritableFeatures, CLXAdditionalFeatures); 678 list<SubtargetFeature> CLXFeatures = 679 !listconcat(CLXInheritableFeatures, CLXSpecificFeatures); 680 681 // Cooperlake 682 list<SubtargetFeature> CPXAdditionalFeatures = [FeatureBF16]; 683 list<SubtargetFeature> CPXSpecificFeatures = [FeatureHasFastGather, 684 FeaturePOPCNTFalseDeps]; 685 list<SubtargetFeature> CPXInheritableFeatures = 686 !listconcat(CLXInheritableFeatures, CPXAdditionalFeatures); 687 list<SubtargetFeature> CPXFeatures = 688 !listconcat(CPXInheritableFeatures, CPXSpecificFeatures); 689 690 // Cannonlake 691 list<SubtargetFeature> CNLAdditionalFeatures = [FeatureAVX512, 692 FeaturePrefer256Bit, 693 FeatureCDI, 694 FeatureDQI, 695 FeatureBWI, 696 FeatureVLX, 697 FeaturePKU, 698 FeatureVBMI, 699 FeatureIFMA, 700 FeatureSHA, 701 FeatureSGX]; 702 list<SubtargetFeature> CNLSpecificFeatures = [FeatureHasFastGather]; 703 list<SubtargetFeature> CNLInheritableFeatures = 704 !listconcat(SKLInheritableFeatures, CNLAdditionalFeatures); 705 list<SubtargetFeature> CNLFeatures = 706 !listconcat(CNLInheritableFeatures, CNLSpecificFeatures); 707 708 // Icelake 709 list<SubtargetFeature> ICLAdditionalFeatures = [FeatureBITALG, 710 FeatureVAES, 711 FeatureVBMI2, 712 FeatureVNNI, 713 FeatureVPCLMULQDQ, 714 FeatureVPOPCNTDQ, 715 FeatureGFNI, 716 FeatureCLWB, 717 FeatureRDPID]; 718 list<SubtargetFeature> ICLSpecificFeatures = [FeatureHasFastGather]; 719 list<SubtargetFeature> ICLInheritableFeatures = 720 !listconcat(CNLInheritableFeatures, ICLAdditionalFeatures); 721 list<SubtargetFeature> ICLFeatures = 722 !listconcat(ICLInheritableFeatures, ICLSpecificFeatures); 723 724 // Icelake Server 725 list<SubtargetFeature> ICXSpecificFeatures = [FeaturePCONFIG, 726 FeatureWBNOINVD, 727 FeatureHasFastGather]; 728 list<SubtargetFeature> ICXFeatures = 729 !listconcat(ICLInheritableFeatures, ICXSpecificFeatures); 730 731 //Tigerlake 732 list<SubtargetFeature> TGLAdditionalFeatures = [FeatureVP2INTERSECT, 733 FeatureMOVDIRI, 734 FeatureMOVDIR64B, 735 FeatureSHSTK]; 736 list<SubtargetFeature> TGLSpecificFeatures = [FeatureHasFastGather]; 737 list<SubtargetFeature> TGLInheritableFeatures = 738 !listconcat(TGLAdditionalFeatures ,TGLSpecificFeatures); 739 list<SubtargetFeature> TGLFeatures = 740 !listconcat(ICLFeatures, TGLInheritableFeatures ); 741 742 // Atom 743 list<SubtargetFeature> AtomInheritableFeatures = [FeatureX87, 744 FeatureCMPXCHG8B, 745 FeatureCMOV, 746 FeatureMMX, 747 FeatureSSSE3, 748 FeatureFXSR, 749 FeatureNOPL, 750 Feature64Bit, 751 FeatureCMPXCHG16B, 752 FeatureMOVBE, 753 FeatureSlowTwoMemOps, 754 FeatureLAHFSAHF, 755 FeatureInsertVZEROUPPER]; 756 list<SubtargetFeature> AtomSpecificFeatures = [ProcIntelAtom, 757 FeatureSlowUAMem16, 758 FeatureLEAForSP, 759 FeatureSlowDivide32, 760 FeatureSlowDivide64, 761 FeatureLEAUsesAG, 762 FeaturePadShortFunctions]; 763 list<SubtargetFeature> AtomFeatures = 764 !listconcat(AtomInheritableFeatures, AtomSpecificFeatures); 765 766 // Silvermont 767 list<SubtargetFeature> SLMAdditionalFeatures = [FeatureSSE42, 768 FeaturePOPCNT, 769 FeaturePCLMUL, 770 FeaturePRFCHW, 771 FeatureSlowLEA, 772 FeatureSlowIncDec, 773 FeatureRDRAND]; 774 list<SubtargetFeature> SLMSpecificFeatures = [ProcIntelSLM, 775 FeatureSlowDivide64, 776 FeatureSlowPMULLD, 777 FeatureFast7ByteNOP, 778 FeaturePOPCNTFalseDeps]; 779 list<SubtargetFeature> SLMInheritableFeatures = 780 !listconcat(AtomInheritableFeatures, SLMAdditionalFeatures); 781 list<SubtargetFeature> SLMFeatures = 782 !listconcat(SLMInheritableFeatures, SLMSpecificFeatures); 783 784 // Goldmont 785 list<SubtargetFeature> GLMAdditionalFeatures = [FeatureAES, 786 FeatureSHA, 787 FeatureRDSEED, 788 FeatureXSAVE, 789 FeatureXSAVEOPT, 790 FeatureXSAVEC, 791 FeatureXSAVES, 792 FeatureCLFLUSHOPT, 793 FeatureFSGSBase]; 794 list<SubtargetFeature> GLMSpecificFeatures = [FeatureUseGLMDivSqrtCosts, 795 FeaturePOPCNTFalseDeps]; 796 list<SubtargetFeature> GLMInheritableFeatures = 797 !listconcat(SLMInheritableFeatures, GLMAdditionalFeatures); 798 list<SubtargetFeature> GLMFeatures = 799 !listconcat(GLMInheritableFeatures, GLMSpecificFeatures); 800 801 // Goldmont Plus 802 list<SubtargetFeature> GLPAdditionalFeatures = [FeaturePTWRITE, 803 FeatureRDPID, 804 FeatureSGX]; 805 list<SubtargetFeature> GLPSpecificFeatures = [FeatureUseGLMDivSqrtCosts]; 806 list<SubtargetFeature> GLPInheritableFeatures = 807 !listconcat(GLMInheritableFeatures, GLPAdditionalFeatures); 808 list<SubtargetFeature> GLPFeatures = 809 !listconcat(GLPInheritableFeatures, GLPSpecificFeatures); 810 811 // Tremont 812 list<SubtargetFeature> TRMAdditionalFeatures = [FeatureCLWB, 813 FeatureGFNI]; 814 list<SubtargetFeature> TRMSpecificFeatures = [FeatureUseGLMDivSqrtCosts]; 815 list<SubtargetFeature> TRMInheritableFeatures = 816 !listconcat(GLPInheritableFeatures, TRMAdditionalFeatures); 817 list<SubtargetFeature> TRMFeatures = 818 !listconcat(TRMInheritableFeatures, TRMSpecificFeatures); 819 820 // Knights Landing 821 list<SubtargetFeature> KNLFeatures = [FeatureX87, 822 FeatureCMPXCHG8B, 823 FeatureCMOV, 824 FeatureMMX, 825 FeatureFXSR, 826 FeatureNOPL, 827 Feature64Bit, 828 FeatureCMPXCHG16B, 829 FeaturePOPCNT, 830 FeatureSlowDivide64, 831 FeaturePCLMUL, 832 FeatureXSAVE, 833 FeatureXSAVEOPT, 834 FeatureLAHFSAHF, 835 FeatureSlow3OpsLEA, 836 FeatureSlowIncDec, 837 FeatureAES, 838 FeatureRDRAND, 839 FeatureF16C, 840 FeatureFSGSBase, 841 FeatureAVX512, 842 FeatureERI, 843 FeatureCDI, 844 FeaturePFI, 845 FeaturePREFETCHWT1, 846 FeatureADX, 847 FeatureRDSEED, 848 FeatureMOVBE, 849 FeatureLZCNT, 850 FeatureBMI, 851 FeatureBMI2, 852 FeatureFMA, 853 FeaturePRFCHW, 854 FeaturePreferMaskRegisters, 855 FeatureSlowTwoMemOps, 856 FeatureHasFastGather, 857 FeatureSlowPMADDWD]; 858 // TODO Add AVX5124FMAPS/AVX5124VNNIW features 859 list<SubtargetFeature> KNMFeatures = 860 !listconcat(KNLFeatures, [FeatureVPOPCNTDQ]); 861 862 // Barcelona 863 list<SubtargetFeature> BarcelonaInheritableFeatures = [FeatureX87, 864 FeatureCMPXCHG8B, 865 FeatureSSE4A, 866 Feature3DNowA, 867 FeatureFXSR, 868 FeatureNOPL, 869 FeatureCMPXCHG16B, 870 FeaturePRFCHW, 871 FeatureLZCNT, 872 FeaturePOPCNT, 873 FeatureSlowSHLD, 874 FeatureLAHFSAHF, 875 FeatureCMOV, 876 Feature64Bit, 877 FeatureFastScalarShiftMasks, 878 FeatureInsertVZEROUPPER]; 879 list<SubtargetFeature> BarcelonaFeatures = BarcelonaInheritableFeatures; 880 881 // Bobcat 882 list<SubtargetFeature> BtVer1InheritableFeatures = [FeatureX87, 883 FeatureCMPXCHG8B, 884 FeatureCMOV, 885 FeatureMMX, 886 FeatureSSSE3, 887 FeatureSSE4A, 888 FeatureFXSR, 889 FeatureNOPL, 890 Feature64Bit, 891 FeatureCMPXCHG16B, 892 FeaturePRFCHW, 893 FeatureLZCNT, 894 FeaturePOPCNT, 895 FeatureSlowSHLD, 896 FeatureLAHFSAHF, 897 FeatureFast15ByteNOP, 898 FeatureFastScalarShiftMasks, 899 FeatureFastVectorShiftMasks]; 900 list<SubtargetFeature> BtVer1SpecificFeatures = [FeatureInsertVZEROUPPER]; 901 list<SubtargetFeature> BtVer1Features = 902 !listconcat(BtVer1InheritableFeatures, BtVer1SpecificFeatures); 903 904 // Jaguar 905 list<SubtargetFeature> BtVer2AdditionalFeatures = [FeatureAVX, 906 FeatureAES, 907 FeaturePCLMUL, 908 FeatureBMI, 909 FeatureF16C, 910 FeatureMOVBE, 911 FeatureXSAVE, 912 FeatureXSAVEOPT]; 913 list<SubtargetFeature> BtVer2SpecificFeatures = [FeatureFastLZCNT, 914 FeatureFastBEXTR, 915 FeatureFastHorizontalOps]; 916 list<SubtargetFeature> BtVer2InheritableFeatures = 917 !listconcat(BtVer1InheritableFeatures, BtVer2AdditionalFeatures); 918 list<SubtargetFeature> BtVer2Features = 919 !listconcat(BtVer2InheritableFeatures, BtVer2SpecificFeatures); 920 921 // Bulldozer 922 list<SubtargetFeature> BdVer1InheritableFeatures = [FeatureX87, 923 FeatureCMPXCHG8B, 924 FeatureCMOV, 925 FeatureXOP, 926 Feature64Bit, 927 FeatureCMPXCHG16B, 928 FeatureAES, 929 FeaturePRFCHW, 930 FeaturePCLMUL, 931 FeatureMMX, 932 FeatureFXSR, 933 FeatureNOPL, 934 FeatureLZCNT, 935 FeaturePOPCNT, 936 FeatureXSAVE, 937 FeatureLWP, 938 FeatureSlowSHLD, 939 FeatureLAHFSAHF, 940 FeatureFast11ByteNOP, 941 FeatureFastScalarShiftMasks, 942 FeatureBranchFusion, 943 FeatureInsertVZEROUPPER]; 944 list<SubtargetFeature> BdVer1Features = BdVer1InheritableFeatures; 945 946 // PileDriver 947 list<SubtargetFeature> BdVer2AdditionalFeatures = [FeatureF16C, 948 FeatureBMI, 949 FeatureTBM, 950 FeatureFMA, 951 FeatureFastBEXTR]; 952 list<SubtargetFeature> BdVer2InheritableFeatures = 953 !listconcat(BdVer1InheritableFeatures, BdVer2AdditionalFeatures); 954 list<SubtargetFeature> BdVer2Features = BdVer2InheritableFeatures; 955 956 // Steamroller 957 list<SubtargetFeature> BdVer3AdditionalFeatures = [FeatureXSAVEOPT, 958 FeatureFSGSBase]; 959 list<SubtargetFeature> BdVer3InheritableFeatures = 960 !listconcat(BdVer2InheritableFeatures, BdVer3AdditionalFeatures); 961 list<SubtargetFeature> BdVer3Features = BdVer3InheritableFeatures; 962 963 // Excavator 964 list<SubtargetFeature> BdVer4AdditionalFeatures = [FeatureAVX2, 965 FeatureBMI2, 966 FeatureMOVBE, 967 FeatureRDRAND, 968 FeatureMWAITX]; 969 list<SubtargetFeature> BdVer4InheritableFeatures = 970 !listconcat(BdVer3InheritableFeatures, BdVer4AdditionalFeatures); 971 list<SubtargetFeature> BdVer4Features = BdVer4InheritableFeatures; 972 973 974 // AMD Zen Processors common ISAs 975 list<SubtargetFeature> ZNFeatures = [FeatureADX, 976 FeatureAES, 977 FeatureAVX2, 978 FeatureBMI, 979 FeatureBMI2, 980 FeatureCLFLUSHOPT, 981 FeatureCLZERO, 982 FeatureCMOV, 983 Feature64Bit, 984 FeatureCMPXCHG16B, 985 FeatureF16C, 986 FeatureFMA, 987 FeatureFSGSBase, 988 FeatureFXSR, 989 FeatureNOPL, 990 FeatureFastLZCNT, 991 FeatureLAHFSAHF, 992 FeatureLZCNT, 993 FeatureFastBEXTR, 994 FeatureFast15ByteNOP, 995 FeatureBranchFusion, 996 FeatureFastScalarShiftMasks, 997 FeatureMMX, 998 FeatureMOVBE, 999 FeatureMWAITX, 1000 FeaturePCLMUL, 1001 FeaturePOPCNT, 1002 FeaturePRFCHW, 1003 FeatureRDRAND, 1004 FeatureRDSEED, 1005 FeatureSHA, 1006 FeatureSSE4A, 1007 FeatureSlowSHLD, 1008 FeatureInsertVZEROUPPER, 1009 FeatureX87, 1010 FeatureXSAVE, 1011 FeatureXSAVEC, 1012 FeatureXSAVEOPT, 1013 FeatureXSAVES]; 1014 list<SubtargetFeature> ZN2AdditionalFeatures = [FeatureCLWB, 1015 FeatureRDPID, 1016 FeatureWBNOINVD]; 1017 list<SubtargetFeature> ZN2Features = 1018 !listconcat(ZNFeatures, ZN2AdditionalFeatures); 1019} 1020 1021//===----------------------------------------------------------------------===// 1022// X86 processors supported. 1023//===----------------------------------------------------------------------===// 1024 1025class Proc<string Name, list<SubtargetFeature> Features> 1026 : ProcessorModel<Name, GenericModel, Features>; 1027 1028// NOTE: CMPXCHG8B is here for legacy compatibility so that it is only disabled 1029// if i386/i486 is specifically requested. 1030def : Proc<"generic", [FeatureX87, FeatureSlowUAMem16, 1031 FeatureCMPXCHG8B, FeatureInsertVZEROUPPER]>; 1032def : Proc<"i386", [FeatureX87, FeatureSlowUAMem16, 1033 FeatureInsertVZEROUPPER]>; 1034def : Proc<"i486", [FeatureX87, FeatureSlowUAMem16, 1035 FeatureInsertVZEROUPPER]>; 1036def : Proc<"i586", [FeatureX87, FeatureSlowUAMem16, 1037 FeatureCMPXCHG8B, FeatureInsertVZEROUPPER]>; 1038def : Proc<"pentium", [FeatureX87, FeatureSlowUAMem16, 1039 FeatureCMPXCHG8B, FeatureInsertVZEROUPPER]>; 1040def : Proc<"pentium-mmx", [FeatureX87, FeatureSlowUAMem16, 1041 FeatureCMPXCHG8B, FeatureMMX, 1042 FeatureInsertVZEROUPPER]>; 1043 1044def : Proc<"i686", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, 1045 FeatureCMOV, FeatureInsertVZEROUPPER]>; 1046def : Proc<"pentiumpro", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, 1047 FeatureCMOV, FeatureNOPL, FeatureInsertVZEROUPPER]>; 1048 1049def : Proc<"pentium2", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, 1050 FeatureMMX, FeatureCMOV, FeatureFXSR, 1051 FeatureNOPL, FeatureInsertVZEROUPPER]>; 1052 1053foreach P = ["pentium3", "pentium3m"] in { 1054 def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,FeatureMMX, 1055 FeatureSSE1, FeatureFXSR, FeatureNOPL, FeatureCMOV, 1056 FeatureInsertVZEROUPPER]>; 1057} 1058 1059// Enable the PostRAScheduler for SSE2 and SSE3 class cpus. 1060// The intent is to enable it for pentium4 which is the current default 1061// processor in a vanilla 32-bit clang compilation when no specific 1062// architecture is specified. This generally gives a nice performance 1063// increase on silvermont, with largely neutral behavior on other 1064// contemporary large core processors. 1065// pentium-m, pentium4m, prescott and nocona are included as a preventative 1066// measure to avoid performance surprises, in case clang's default cpu 1067// changes slightly. 1068 1069def : ProcessorModel<"pentium-m", GenericPostRAModel, 1070 [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, 1071 FeatureMMX, FeatureSSE2, FeatureFXSR, FeatureNOPL, 1072 FeatureCMOV, FeatureInsertVZEROUPPER]>; 1073 1074foreach P = ["pentium4", "pentium4m"] in { 1075 def : ProcessorModel<P, GenericPostRAModel, 1076 [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, 1077 FeatureMMX, FeatureSSE2, FeatureFXSR, FeatureNOPL, 1078 FeatureCMOV, FeatureInsertVZEROUPPER]>; 1079} 1080 1081// Intel Quark. 1082def : Proc<"lakemont", [FeatureInsertVZEROUPPER]>; 1083 1084// Intel Core Duo. 1085def : ProcessorModel<"yonah", SandyBridgeModel, 1086 [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, 1087 FeatureMMX, FeatureSSE3, FeatureFXSR, FeatureNOPL, 1088 FeatureCMOV, FeatureInsertVZEROUPPER]>; 1089 1090// NetBurst. 1091def : ProcessorModel<"prescott", GenericPostRAModel, 1092 [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, 1093 FeatureMMX, FeatureSSE3, FeatureFXSR, FeatureNOPL, 1094 FeatureCMOV, FeatureInsertVZEROUPPER]>; 1095def : ProcessorModel<"nocona", GenericPostRAModel, [ 1096 FeatureX87, 1097 FeatureSlowUAMem16, 1098 FeatureCMPXCHG8B, 1099 FeatureCMOV, 1100 FeatureMMX, 1101 FeatureSSE3, 1102 FeatureFXSR, 1103 FeatureNOPL, 1104 Feature64Bit, 1105 FeatureCMPXCHG16B, 1106 FeatureInsertVZEROUPPER 1107]>; 1108 1109// Intel Core 2 Solo/Duo. 1110def : ProcessorModel<"core2", SandyBridgeModel, [ 1111 FeatureX87, 1112 FeatureSlowUAMem16, 1113 FeatureCMPXCHG8B, 1114 FeatureCMOV, 1115 FeatureMMX, 1116 FeatureSSSE3, 1117 FeatureFXSR, 1118 FeatureNOPL, 1119 Feature64Bit, 1120 FeatureCMPXCHG16B, 1121 FeatureLAHFSAHF, 1122 FeatureMacroFusion, 1123 FeatureInsertVZEROUPPER 1124]>; 1125def : ProcessorModel<"penryn", SandyBridgeModel, [ 1126 FeatureX87, 1127 FeatureSlowUAMem16, 1128 FeatureCMPXCHG8B, 1129 FeatureCMOV, 1130 FeatureMMX, 1131 FeatureSSE41, 1132 FeatureFXSR, 1133 FeatureNOPL, 1134 Feature64Bit, 1135 FeatureCMPXCHG16B, 1136 FeatureLAHFSAHF, 1137 FeatureMacroFusion, 1138 FeatureInsertVZEROUPPER 1139]>; 1140 1141// Atom CPUs. 1142foreach P = ["bonnell", "atom"] in { 1143 def : ProcessorModel<P, AtomModel, ProcessorFeatures.AtomFeatures>; 1144} 1145 1146foreach P = ["silvermont", "slm"] in { 1147 def : ProcessorModel<P, SLMModel, ProcessorFeatures.SLMFeatures>; 1148} 1149 1150def : ProcessorModel<"goldmont", SLMModel, ProcessorFeatures.GLMFeatures>; 1151def : ProcessorModel<"goldmont-plus", SLMModel, ProcessorFeatures.GLPFeatures>; 1152def : ProcessorModel<"tremont", SLMModel, ProcessorFeatures.TRMFeatures>; 1153 1154// "Arrandale" along with corei3 and corei5 1155foreach P = ["nehalem", "corei7"] in { 1156 def : ProcessorModel<P, SandyBridgeModel, ProcessorFeatures.NHMFeatures>; 1157} 1158 1159// Westmere is the corei3/i5/i7 path from nehalem to sandybridge 1160def : ProcessorModel<"westmere", SandyBridgeModel, 1161 ProcessorFeatures.WSMFeatures>; 1162 1163foreach P = ["sandybridge", "corei7-avx"] in { 1164 def : ProcessorModel<P, SandyBridgeModel, ProcessorFeatures.SNBFeatures>; 1165} 1166 1167foreach P = ["ivybridge", "core-avx-i"] in { 1168 def : ProcessorModel<P, SandyBridgeModel, ProcessorFeatures.IVBFeatures>; 1169} 1170 1171foreach P = ["haswell", "core-avx2"] in { 1172 def : ProcessorModel<P, HaswellModel, ProcessorFeatures.HSWFeatures>; 1173} 1174 1175def : ProcessorModel<"broadwell", BroadwellModel, 1176 ProcessorFeatures.BDWFeatures>; 1177 1178def : ProcessorModel<"skylake", SkylakeClientModel, 1179 ProcessorFeatures.SKLFeatures>; 1180 1181// FIXME: define KNL scheduler model 1182def : ProcessorModel<"knl", HaswellModel, ProcessorFeatures.KNLFeatures>; 1183def : ProcessorModel<"knm", HaswellModel, ProcessorFeatures.KNMFeatures>; 1184 1185foreach P = ["skylake-avx512", "skx"] in { 1186 def : ProcessorModel<P, SkylakeServerModel, ProcessorFeatures.SKXFeatures>; 1187} 1188 1189def : ProcessorModel<"cascadelake", SkylakeServerModel, 1190 ProcessorFeatures.CLXFeatures>; 1191def : ProcessorModel<"cooperlake", SkylakeServerModel, 1192 ProcessorFeatures.CPXFeatures>; 1193def : ProcessorModel<"cannonlake", SkylakeServerModel, 1194 ProcessorFeatures.CNLFeatures>; 1195def : ProcessorModel<"icelake-client", SkylakeServerModel, 1196 ProcessorFeatures.ICLFeatures>; 1197def : ProcessorModel<"icelake-server", SkylakeServerModel, 1198 ProcessorFeatures.ICXFeatures>; 1199def : ProcessorModel<"tigerlake", SkylakeServerModel, 1200 ProcessorFeatures.TGLFeatures>; 1201 1202// AMD CPUs. 1203 1204def : Proc<"k6", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, 1205 FeatureMMX, FeatureInsertVZEROUPPER]>; 1206def : Proc<"k6-2", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, 1207 Feature3DNow, FeatureInsertVZEROUPPER]>; 1208def : Proc<"k6-3", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, 1209 Feature3DNow, FeatureInsertVZEROUPPER]>; 1210 1211foreach P = ["athlon", "athlon-tbird"] in { 1212 def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, FeatureCMOV, 1213 Feature3DNowA, FeatureNOPL, FeatureSlowSHLD, 1214 FeatureInsertVZEROUPPER]>; 1215} 1216 1217foreach P = ["athlon-4", "athlon-xp", "athlon-mp"] in { 1218 def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, FeatureCMOV, 1219 FeatureSSE1, Feature3DNowA, FeatureFXSR, FeatureNOPL, 1220 FeatureSlowSHLD, FeatureInsertVZEROUPPER]>; 1221} 1222 1223foreach P = ["k8", "opteron", "athlon64", "athlon-fx"] in { 1224 def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, 1225 FeatureSSE2, Feature3DNowA, FeatureFXSR, FeatureNOPL, 1226 Feature64Bit, FeatureSlowSHLD, FeatureCMOV, 1227 FeatureFastScalarShiftMasks, FeatureInsertVZEROUPPER]>; 1228} 1229 1230foreach P = ["k8-sse3", "opteron-sse3", "athlon64-sse3"] in { 1231 def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, FeatureSSE3, 1232 Feature3DNowA, FeatureFXSR, FeatureNOPL, FeatureCMPXCHG16B, 1233 FeatureSlowSHLD, FeatureCMOV, Feature64Bit, 1234 FeatureFastScalarShiftMasks, FeatureInsertVZEROUPPER]>; 1235} 1236 1237foreach P = ["amdfam10", "barcelona"] in { 1238 def : Proc<P, ProcessorFeatures.BarcelonaFeatures>; 1239} 1240 1241// Bobcat 1242def : Proc<"btver1", ProcessorFeatures.BtVer1Features>; 1243// Jaguar 1244def : ProcessorModel<"btver2", BtVer2Model, ProcessorFeatures.BtVer2Features>; 1245 1246// Bulldozer 1247def : ProcessorModel<"bdver1", BdVer2Model, ProcessorFeatures.BdVer1Features>; 1248// Piledriver 1249def : ProcessorModel<"bdver2", BdVer2Model, ProcessorFeatures.BdVer2Features>; 1250// Steamroller 1251def : Proc<"bdver3", ProcessorFeatures.BdVer3Features>; 1252// Excavator 1253def : Proc<"bdver4", ProcessorFeatures.BdVer4Features>; 1254 1255def : ProcessorModel<"znver1", Znver1Model, ProcessorFeatures.ZNFeatures>; 1256def : ProcessorModel<"znver2", Znver2Model, ProcessorFeatures.ZN2Features>; 1257 1258def : Proc<"geode", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, 1259 Feature3DNowA, FeatureInsertVZEROUPPER]>; 1260 1261def : Proc<"winchip-c6", [FeatureX87, FeatureSlowUAMem16, FeatureMMX, 1262 FeatureInsertVZEROUPPER]>; 1263def : Proc<"winchip2", [FeatureX87, FeatureSlowUAMem16, Feature3DNow, 1264 FeatureInsertVZEROUPPER]>; 1265def : Proc<"c3", [FeatureX87, FeatureSlowUAMem16, Feature3DNow, 1266 FeatureInsertVZEROUPPER]>; 1267def : Proc<"c3-2", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, 1268 FeatureMMX, FeatureSSE1, FeatureFXSR, 1269 FeatureCMOV, FeatureInsertVZEROUPPER]>; 1270 1271// We also provide a generic 64-bit specific x86 processor model which tries to 1272// be good for modern chips without enabling instruction set encodings past the 1273// basic SSE2 and 64-bit ones. It disables slow things from any mainstream and 1274// modern 64-bit x86 chip, and enables features that are generally beneficial. 1275// 1276// We currently use the Sandy Bridge model as the default scheduling model as 1277// we use it across Nehalem, Westmere, Sandy Bridge, and Ivy Bridge which 1278// covers a huge swath of x86 processors. If there are specific scheduling 1279// knobs which need to be tuned differently for AMD chips, we might consider 1280// forming a common base for them. 1281def : ProcessorModel<"x86-64", SandyBridgeModel, [ 1282 FeatureX87, 1283 FeatureCMPXCHG8B, 1284 FeatureCMOV, 1285 FeatureMMX, 1286 FeatureSSE2, 1287 FeatureFXSR, 1288 FeatureNOPL, 1289 Feature64Bit, 1290 FeatureSlow3OpsLEA, 1291 FeatureSlowDivide64, 1292 FeatureSlowIncDec, 1293 FeatureMacroFusion, 1294 FeatureInsertVZEROUPPER 1295]>; 1296 1297//===----------------------------------------------------------------------===// 1298// Calling Conventions 1299//===----------------------------------------------------------------------===// 1300 1301include "X86CallingConv.td" 1302 1303 1304//===----------------------------------------------------------------------===// 1305// Assembly Parser 1306//===----------------------------------------------------------------------===// 1307 1308def ATTAsmParserVariant : AsmParserVariant { 1309 int Variant = 0; 1310 1311 // Variant name. 1312 string Name = "att"; 1313 1314 // Discard comments in assembly strings. 1315 string CommentDelimiter = "#"; 1316 1317 // Recognize hard coded registers. 1318 string RegisterPrefix = "%"; 1319} 1320 1321def IntelAsmParserVariant : AsmParserVariant { 1322 int Variant = 1; 1323 1324 // Variant name. 1325 string Name = "intel"; 1326 1327 // Discard comments in assembly strings. 1328 string CommentDelimiter = ";"; 1329 1330 // Recognize hard coded registers. 1331 string RegisterPrefix = ""; 1332} 1333 1334//===----------------------------------------------------------------------===// 1335// Assembly Printers 1336//===----------------------------------------------------------------------===// 1337 1338// The X86 target supports two different syntaxes for emitting machine code. 1339// This is controlled by the -x86-asm-syntax={att|intel} 1340def ATTAsmWriter : AsmWriter { 1341 string AsmWriterClassName = "ATTInstPrinter"; 1342 int Variant = 0; 1343} 1344def IntelAsmWriter : AsmWriter { 1345 string AsmWriterClassName = "IntelInstPrinter"; 1346 int Variant = 1; 1347} 1348 1349def X86 : Target { 1350 // Information about the instructions... 1351 let InstructionSet = X86InstrInfo; 1352 let AssemblyParserVariants = [ATTAsmParserVariant, IntelAsmParserVariant]; 1353 let AssemblyWriters = [ATTAsmWriter, IntelAsmWriter]; 1354 let AllowRegisterRenaming = 1; 1355} 1356 1357//===----------------------------------------------------------------------===// 1358// Pfm Counters 1359//===----------------------------------------------------------------------===// 1360 1361include "X86PfmCounters.td" 1362