1//===-- X86.td - Target definition file for the Intel X86 --*- tablegen -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This is a target description file for the Intel i386 architecture, referred 10// to here as the "X86" architecture. 11// 12//===----------------------------------------------------------------------===// 13 14// Get the target-independent interfaces which we are implementing... 15// 16include "llvm/Target/Target.td" 17 18//===----------------------------------------------------------------------===// 19// X86 Subtarget state 20// 21 22def Mode64Bit : SubtargetFeature<"64bit-mode", "In64BitMode", "true", 23 "64-bit mode (x86_64)">; 24def Mode32Bit : SubtargetFeature<"32bit-mode", "In32BitMode", "true", 25 "32-bit mode (80386)">; 26def Mode16Bit : SubtargetFeature<"16bit-mode", "In16BitMode", "true", 27 "16-bit mode (i8086)">; 28 29//===----------------------------------------------------------------------===// 30// X86 Subtarget ISA features 31//===----------------------------------------------------------------------===// 32 33def FeatureX87 : SubtargetFeature<"x87","HasX87", "true", 34 "Enable X87 float instructions">; 35 36def FeatureNOPL : SubtargetFeature<"nopl", "HasNOPL", "true", 37 "Enable NOPL instruction">; 38 39def FeatureCMOV : SubtargetFeature<"cmov","HasCMov", "true", 40 "Enable conditional move instructions">; 41 42def FeatureCMPXCHG8B : SubtargetFeature<"cx8", "HasCmpxchg8b", "true", 43 "Support CMPXCHG8B instructions">; 44 45def FeatureCRC32 : SubtargetFeature<"crc32", "HasCRC32", "true", 46 "Enable SSE 4.2 CRC32 instruction">; 47 48def FeaturePOPCNT : SubtargetFeature<"popcnt", "HasPOPCNT", "true", 49 "Support POPCNT instruction">; 50 51def FeatureFXSR : SubtargetFeature<"fxsr", "HasFXSR", "true", 52 "Support fxsave/fxrestore instructions">; 53 54def FeatureXSAVE : SubtargetFeature<"xsave", "HasXSAVE", "true", 55 "Support xsave instructions">; 56 57def FeatureXSAVEOPT: SubtargetFeature<"xsaveopt", "HasXSAVEOPT", "true", 58 "Support xsaveopt instructions", 59 [FeatureXSAVE]>; 60 61def FeatureXSAVEC : SubtargetFeature<"xsavec", "HasXSAVEC", "true", 62 "Support xsavec instructions", 63 [FeatureXSAVE]>; 64 65def FeatureXSAVES : SubtargetFeature<"xsaves", "HasXSAVES", "true", 66 "Support xsaves instructions", 67 [FeatureXSAVE]>; 68 69def FeatureSSE1 : SubtargetFeature<"sse", "X86SSELevel", "SSE1", 70 "Enable SSE instructions">; 71def FeatureSSE2 : SubtargetFeature<"sse2", "X86SSELevel", "SSE2", 72 "Enable SSE2 instructions", 73 [FeatureSSE1]>; 74def FeatureSSE3 : SubtargetFeature<"sse3", "X86SSELevel", "SSE3", 75 "Enable SSE3 instructions", 76 [FeatureSSE2]>; 77def FeatureSSSE3 : SubtargetFeature<"ssse3", "X86SSELevel", "SSSE3", 78 "Enable SSSE3 instructions", 79 [FeatureSSE3]>; 80def FeatureSSE41 : SubtargetFeature<"sse4.1", "X86SSELevel", "SSE41", 81 "Enable SSE 4.1 instructions", 82 [FeatureSSSE3]>; 83def FeatureSSE42 : SubtargetFeature<"sse4.2", "X86SSELevel", "SSE42", 84 "Enable SSE 4.2 instructions", 85 [FeatureSSE41]>; 86// The MMX subtarget feature is separate from the rest of the SSE features 87// because it's important (for odd compatibility reasons) to be able to 88// turn it off explicitly while allowing SSE+ to be on. 89def FeatureMMX : SubtargetFeature<"mmx","X863DNowLevel", "MMX", 90 "Enable MMX instructions">; 91def Feature3DNow : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow", 92 "Enable 3DNow! instructions", 93 [FeatureMMX]>; 94def Feature3DNowA : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA", 95 "Enable 3DNow! Athlon instructions", 96 [Feature3DNow]>; 97// All x86-64 hardware has SSE2, but we don't mark SSE2 as an implied 98// feature, because SSE2 can be disabled (e.g. for compiling OS kernels) 99// without disabling 64-bit mode. Nothing should imply this feature bit. It 100// is used to enforce that only 64-bit capable CPUs are used in 64-bit mode. 101def Feature64Bit : SubtargetFeature<"64bit", "HasX86_64", "true", 102 "Support 64-bit instructions">; 103def FeatureCMPXCHG16B : SubtargetFeature<"cx16", "HasCmpxchg16b", "true", 104 "64-bit with cmpxchg16b", 105 [FeatureCMPXCHG8B]>; 106def FeatureSSE4A : SubtargetFeature<"sse4a", "HasSSE4A", "true", 107 "Support SSE 4a instructions", 108 [FeatureSSE3]>; 109 110def FeatureAVX : SubtargetFeature<"avx", "X86SSELevel", "AVX", 111 "Enable AVX instructions", 112 [FeatureSSE42]>; 113def FeatureAVX2 : SubtargetFeature<"avx2", "X86SSELevel", "AVX2", 114 "Enable AVX2 instructions", 115 [FeatureAVX]>; 116def FeatureFMA : SubtargetFeature<"fma", "HasFMA", "true", 117 "Enable three-operand fused multiple-add", 118 [FeatureAVX]>; 119def FeatureF16C : SubtargetFeature<"f16c", "HasF16C", "true", 120 "Support 16-bit floating point conversion instructions", 121 [FeatureAVX]>; 122def FeatureAVX512 : SubtargetFeature<"avx512f", "X86SSELevel", "AVX512F", 123 "Enable AVX-512 instructions", 124 [FeatureAVX2, FeatureFMA, FeatureF16C]>; 125def FeatureERI : SubtargetFeature<"avx512er", "HasERI", "true", 126 "Enable AVX-512 Exponential and Reciprocal Instructions", 127 [FeatureAVX512]>; 128def FeatureCDI : SubtargetFeature<"avx512cd", "HasCDI", "true", 129 "Enable AVX-512 Conflict Detection Instructions", 130 [FeatureAVX512]>; 131def FeatureVPOPCNTDQ : SubtargetFeature<"avx512vpopcntdq", "HasVPOPCNTDQ", 132 "true", "Enable AVX-512 Population Count Instructions", 133 [FeatureAVX512]>; 134def FeaturePFI : SubtargetFeature<"avx512pf", "HasPFI", "true", 135 "Enable AVX-512 PreFetch Instructions", 136 [FeatureAVX512]>; 137def FeaturePREFETCHWT1 : SubtargetFeature<"prefetchwt1", "HasPREFETCHWT1", 138 "true", 139 "Prefetch with Intent to Write and T1 Hint">; 140def FeatureDQI : SubtargetFeature<"avx512dq", "HasDQI", "true", 141 "Enable AVX-512 Doubleword and Quadword Instructions", 142 [FeatureAVX512]>; 143def FeatureBWI : SubtargetFeature<"avx512bw", "HasBWI", "true", 144 "Enable AVX-512 Byte and Word Instructions", 145 [FeatureAVX512]>; 146def FeatureVLX : SubtargetFeature<"avx512vl", "HasVLX", "true", 147 "Enable AVX-512 Vector Length eXtensions", 148 [FeatureAVX512]>; 149def FeatureVBMI : SubtargetFeature<"avx512vbmi", "HasVBMI", "true", 150 "Enable AVX-512 Vector Byte Manipulation Instructions", 151 [FeatureBWI]>; 152def FeatureVBMI2 : SubtargetFeature<"avx512vbmi2", "HasVBMI2", "true", 153 "Enable AVX-512 further Vector Byte Manipulation Instructions", 154 [FeatureBWI]>; 155def FeatureIFMA : SubtargetFeature<"avx512ifma", "HasIFMA", "true", 156 "Enable AVX-512 Integer Fused Multiple-Add", 157 [FeatureAVX512]>; 158def FeaturePKU : SubtargetFeature<"pku", "HasPKU", "true", 159 "Enable protection keys">; 160def FeatureVNNI : SubtargetFeature<"avx512vnni", "HasVNNI", "true", 161 "Enable AVX-512 Vector Neural Network Instructions", 162 [FeatureAVX512]>; 163def FeatureAVXVNNI : SubtargetFeature<"avxvnni", "HasAVXVNNI", "true", 164 "Support AVX_VNNI encoding", 165 [FeatureAVX2]>; 166def FeatureBF16 : SubtargetFeature<"avx512bf16", "HasBF16", "true", 167 "Support bfloat16 floating point", 168 [FeatureBWI]>; 169def FeatureBITALG : SubtargetFeature<"avx512bitalg", "HasBITALG", "true", 170 "Enable AVX-512 Bit Algorithms", 171 [FeatureBWI]>; 172def FeatureVP2INTERSECT : SubtargetFeature<"avx512vp2intersect", 173 "HasVP2INTERSECT", "true", 174 "Enable AVX-512 vp2intersect", 175 [FeatureAVX512]>; 176// FIXME: FP16 scalar intrinsics use the type v8f16, which is supposed to be 177// guarded under condition hasVLX. So we imply it in FeatureFP16 currently. 178// FIXME: FP16 conversion between f16 and i64 customize type v8i64, which is 179// supposed to be guarded under condition hasDQI. So we imply it in FeatureFP16 180// currently. 181def FeatureFP16 : SubtargetFeature<"avx512fp16", "HasFP16", "true", 182 "Support 16-bit floating point", 183 [FeatureBWI, FeatureVLX, FeatureDQI]>; 184def FeaturePCLMUL : SubtargetFeature<"pclmul", "HasPCLMUL", "true", 185 "Enable packed carry-less multiplication instructions", 186 [FeatureSSE2]>; 187def FeatureGFNI : SubtargetFeature<"gfni", "HasGFNI", "true", 188 "Enable Galois Field Arithmetic Instructions", 189 [FeatureSSE2]>; 190def FeatureVPCLMULQDQ : SubtargetFeature<"vpclmulqdq", "HasVPCLMULQDQ", "true", 191 "Enable vpclmulqdq instructions", 192 [FeatureAVX, FeaturePCLMUL]>; 193def FeatureFMA4 : SubtargetFeature<"fma4", "HasFMA4", "true", 194 "Enable four-operand fused multiple-add", 195 [FeatureAVX, FeatureSSE4A]>; 196def FeatureXOP : SubtargetFeature<"xop", "HasXOP", "true", 197 "Enable XOP instructions", 198 [FeatureFMA4]>; 199def FeatureSSEUnalignedMem : SubtargetFeature<"sse-unaligned-mem", 200 "HasSSEUnalignedMem", "true", 201 "Allow unaligned memory operands with SSE instructions">; 202def FeatureAES : SubtargetFeature<"aes", "HasAES", "true", 203 "Enable AES instructions", 204 [FeatureSSE2]>; 205def FeatureVAES : SubtargetFeature<"vaes", "HasVAES", "true", 206 "Promote selected AES instructions to AVX512/AVX registers", 207 [FeatureAVX, FeatureAES]>; 208def FeatureTBM : SubtargetFeature<"tbm", "HasTBM", "true", 209 "Enable TBM instructions">; 210def FeatureLWP : SubtargetFeature<"lwp", "HasLWP", "true", 211 "Enable LWP instructions">; 212def FeatureMOVBE : SubtargetFeature<"movbe", "HasMOVBE", "true", 213 "Support MOVBE instruction">; 214def FeatureRDRAND : SubtargetFeature<"rdrnd", "HasRDRAND", "true", 215 "Support RDRAND instruction">; 216def FeatureFSGSBase : SubtargetFeature<"fsgsbase", "HasFSGSBase", "true", 217 "Support FS/GS Base instructions">; 218def FeatureLZCNT : SubtargetFeature<"lzcnt", "HasLZCNT", "true", 219 "Support LZCNT instruction">; 220def FeatureBMI : SubtargetFeature<"bmi", "HasBMI", "true", 221 "Support BMI instructions">; 222def FeatureBMI2 : SubtargetFeature<"bmi2", "HasBMI2", "true", 223 "Support BMI2 instructions">; 224def FeatureRTM : SubtargetFeature<"rtm", "HasRTM", "true", 225 "Support RTM instructions">; 226def FeatureADX : SubtargetFeature<"adx", "HasADX", "true", 227 "Support ADX instructions">; 228def FeatureSHA : SubtargetFeature<"sha", "HasSHA", "true", 229 "Enable SHA instructions", 230 [FeatureSSE2]>; 231def FeatureSHSTK : SubtargetFeature<"shstk", "HasSHSTK", "true", 232 "Support CET Shadow-Stack instructions">; 233def FeaturePRFCHW : SubtargetFeature<"prfchw", "HasPRFCHW", "true", 234 "Support PRFCHW instructions">; 235def FeatureRDSEED : SubtargetFeature<"rdseed", "HasRDSEED", "true", 236 "Support RDSEED instruction">; 237def FeatureLAHFSAHF : SubtargetFeature<"sahf", "HasLAHFSAHF64", "true", 238 "Support LAHF and SAHF instructions in 64-bit mode">; 239def FeatureMWAITX : SubtargetFeature<"mwaitx", "HasMWAITX", "true", 240 "Enable MONITORX/MWAITX timer functionality">; 241def FeatureCLZERO : SubtargetFeature<"clzero", "HasCLZERO", "true", 242 "Enable Cache Line Zero">; 243def FeatureCLDEMOTE : SubtargetFeature<"cldemote", "HasCLDEMOTE", "true", 244 "Enable Cache Demote">; 245def FeaturePTWRITE : SubtargetFeature<"ptwrite", "HasPTWRITE", "true", 246 "Support ptwrite instruction">; 247def FeatureAMXTILE : SubtargetFeature<"amx-tile", "HasAMXTILE", "true", 248 "Support AMX-TILE instructions">; 249def FeatureAMXINT8 : SubtargetFeature<"amx-int8", "HasAMXINT8", "true", 250 "Support AMX-INT8 instructions", 251 [FeatureAMXTILE]>; 252def FeatureAMXBF16 : SubtargetFeature<"amx-bf16", "HasAMXBF16", "true", 253 "Support AMX-BF16 instructions", 254 [FeatureAMXTILE]>; 255def FeatureINVPCID : SubtargetFeature<"invpcid", "HasINVPCID", "true", 256 "Invalidate Process-Context Identifier">; 257def FeatureSGX : SubtargetFeature<"sgx", "HasSGX", "true", 258 "Enable Software Guard Extensions">; 259def FeatureCLFLUSHOPT : SubtargetFeature<"clflushopt", "HasCLFLUSHOPT", "true", 260 "Flush A Cache Line Optimized">; 261def FeatureCLWB : SubtargetFeature<"clwb", "HasCLWB", "true", 262 "Cache Line Write Back">; 263def FeatureWBNOINVD : SubtargetFeature<"wbnoinvd", "HasWBNOINVD", "true", 264 "Write Back No Invalidate">; 265def FeatureRDPID : SubtargetFeature<"rdpid", "HasRDPID", "true", 266 "Support RDPID instructions">; 267def FeatureWAITPKG : SubtargetFeature<"waitpkg", "HasWAITPKG", "true", 268 "Wait and pause enhancements">; 269def FeatureENQCMD : SubtargetFeature<"enqcmd", "HasENQCMD", "true", 270 "Has ENQCMD instructions">; 271def FeatureKL : SubtargetFeature<"kl", "HasKL", "true", 272 "Support Key Locker kl Instructions", 273 [FeatureSSE2]>; 274def FeatureWIDEKL : SubtargetFeature<"widekl", "HasWIDEKL", "true", 275 "Support Key Locker wide Instructions", 276 [FeatureKL]>; 277def FeatureHRESET : SubtargetFeature<"hreset", "HasHRESET", "true", 278 "Has hreset instruction">; 279def FeatureSERIALIZE : SubtargetFeature<"serialize", "HasSERIALIZE", "true", 280 "Has serialize instruction">; 281def FeatureTSXLDTRK : SubtargetFeature<"tsxldtrk", "HasTSXLDTRK", "true", 282 "Support TSXLDTRK instructions">; 283def FeatureUINTR : SubtargetFeature<"uintr", "HasUINTR", "true", 284 "Has UINTR Instructions">; 285def FeaturePCONFIG : SubtargetFeature<"pconfig", "HasPCONFIG", "true", 286 "platform configuration instruction">; 287def FeatureMOVDIRI : SubtargetFeature<"movdiri", "HasMOVDIRI", "true", 288 "Support movdiri instruction">; 289def FeatureMOVDIR64B : SubtargetFeature<"movdir64b", "HasMOVDIR64B", "true", 290 "Support movdir64b instruction">; 291 292// Ivy Bridge and newer processors have enhanced REP MOVSB and STOSB (aka 293// "string operations"). See "REP String Enhancement" in the Intel Software 294// Development Manual. This feature essentially means that REP MOVSB will copy 295// using the largest available size instead of copying bytes one by one, making 296// it at least as fast as REPMOVS{W,D,Q}. 297def FeatureERMSB 298 : SubtargetFeature< 299 "ermsb", "HasERMSB", "true", 300 "REP MOVS/STOS are fast">; 301 302// Icelake and newer processors have Fast Short REP MOV. 303def FeatureFSRM 304 : SubtargetFeature< 305 "fsrm", "HasFSRM", "true", 306 "REP MOVSB of short lengths is faster">; 307 308def FeatureSoftFloat 309 : SubtargetFeature<"soft-float", "UseSoftFloat", "true", 310 "Use software floating point features">; 311 312//===----------------------------------------------------------------------===// 313// X86 Subtarget Security Mitigation features 314//===----------------------------------------------------------------------===// 315 316// Lower indirect calls using a special construct called a `retpoline` to 317// mitigate potential Spectre v2 attacks against them. 318def FeatureRetpolineIndirectCalls 319 : SubtargetFeature< 320 "retpoline-indirect-calls", "UseRetpolineIndirectCalls", "true", 321 "Remove speculation of indirect calls from the generated code">; 322 323// Lower indirect branches and switches either using conditional branch trees 324// or using a special construct called a `retpoline` to mitigate potential 325// Spectre v2 attacks against them. 326def FeatureRetpolineIndirectBranches 327 : SubtargetFeature< 328 "retpoline-indirect-branches", "UseRetpolineIndirectBranches", "true", 329 "Remove speculation of indirect branches from the generated code">; 330 331// Deprecated umbrella feature for enabling both `retpoline-indirect-calls` and 332// `retpoline-indirect-branches` above. 333def FeatureRetpoline 334 : SubtargetFeature<"retpoline", "DeprecatedUseRetpoline", "true", 335 "Remove speculation of indirect branches from the " 336 "generated code, either by avoiding them entirely or " 337 "lowering them with a speculation blocking construct", 338 [FeatureRetpolineIndirectCalls, 339 FeatureRetpolineIndirectBranches]>; 340 341// Rely on external thunks for the emitted retpoline calls. This allows users 342// to provide their own custom thunk definitions in highly specialized 343// environments such as a kernel that does boot-time hot patching. 344def FeatureRetpolineExternalThunk 345 : SubtargetFeature< 346 "retpoline-external-thunk", "UseRetpolineExternalThunk", "true", 347 "When lowering an indirect call or branch using a `retpoline`, rely " 348 "on the specified user provided thunk rather than emitting one " 349 "ourselves. Only has effect when combined with some other retpoline " 350 "feature", [FeatureRetpolineIndirectCalls]>; 351 352// Mitigate LVI attacks against indirect calls/branches and call returns 353def FeatureLVIControlFlowIntegrity 354 : SubtargetFeature< 355 "lvi-cfi", "UseLVIControlFlowIntegrity", "true", 356 "Prevent indirect calls/branches from using a memory operand, and " 357 "precede all indirect calls/branches from a register with an " 358 "LFENCE instruction to serialize control flow. Also decompose RET " 359 "instructions into a POP+LFENCE+JMP sequence.">; 360 361// Enable SESES to mitigate speculative execution attacks 362def FeatureSpeculativeExecutionSideEffectSuppression 363 : SubtargetFeature< 364 "seses", "UseSpeculativeExecutionSideEffectSuppression", "true", 365 "Prevent speculative execution side channel timing attacks by " 366 "inserting a speculation barrier before memory reads, memory writes, " 367 "and conditional branches. Implies LVI Control Flow integrity.", 368 [FeatureLVIControlFlowIntegrity]>; 369 370// Mitigate LVI attacks against data loads 371def FeatureLVILoadHardening 372 : SubtargetFeature< 373 "lvi-load-hardening", "UseLVILoadHardening", "true", 374 "Insert LFENCE instructions to prevent data speculatively injected " 375 "into loads from being used maliciously.">; 376 377def FeatureTaggedGlobals 378 : SubtargetFeature< 379 "tagged-globals", "AllowTaggedGlobals", "true", 380 "Use an instruction sequence for taking the address of a global " 381 "that allows a memory tag in the upper address bits.">; 382 383//===----------------------------------------------------------------------===// 384// X86 Subtarget Tuning features 385//===----------------------------------------------------------------------===// 386 387def TuningSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true", 388 "SHLD instruction is slow">; 389 390def TuningSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true", 391 "PMULLD instruction is slow">; 392 393def TuningSlowPMADDWD : SubtargetFeature<"slow-pmaddwd", "IsPMADDWDSlow", 394 "true", 395 "PMADDWD is slower than PMULLD">; 396 397// FIXME: This should not apply to CPUs that do not have SSE. 398def TuningSlowUAMem16 : SubtargetFeature<"slow-unaligned-mem-16", 399 "IsUAMem16Slow", "true", 400 "Slow unaligned 16-byte memory access">; 401 402def TuningSlowUAMem32 : SubtargetFeature<"slow-unaligned-mem-32", 403 "IsUAMem32Slow", "true", 404 "Slow unaligned 32-byte memory access">; 405 406def TuningLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true", 407 "Use LEA for adjusting the stack pointer">; 408 409def TuningSlowDivide32 : SubtargetFeature<"idivl-to-divb", 410 "HasSlowDivide32", "true", 411 "Use 8-bit divide for positive values less than 256">; 412 413def TuningSlowDivide64 : SubtargetFeature<"idivq-to-divl", 414 "HasSlowDivide64", "true", 415 "Use 32-bit divide for positive values less than 2^32">; 416 417def TuningPadShortFunctions : SubtargetFeature<"pad-short-functions", 418 "PadShortFunctions", "true", 419 "Pad short functions">; 420 421// On some processors, instructions that implicitly take two memory operands are 422// slow. In practice, this means that CALL, PUSH, and POP with memory operands 423// should be avoided in favor of a MOV + register CALL/PUSH/POP. 424def TuningSlowTwoMemOps : SubtargetFeature<"slow-two-mem-ops", 425 "SlowTwoMemOps", "true", 426 "Two memory operand instructions are slow">; 427 428def TuningLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LEAUsesAG", "true", 429 "LEA instruction needs inputs at AG stage">; 430 431def TuningSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true", 432 "LEA instruction with certain arguments is slow">; 433 434def TuningSlow3OpsLEA : SubtargetFeature<"slow-3ops-lea", "Slow3OpsLEA", "true", 435 "LEA instruction with 3 ops or certain registers is slow">; 436 437def TuningSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true", 438 "INC and DEC instructions are slower than ADD and SUB">; 439 440def TuningPOPCNTFalseDeps : SubtargetFeature<"false-deps-popcnt", 441 "HasPOPCNTFalseDeps", "true", 442 "POPCNT has a false dependency on dest register">; 443 444def TuningLZCNTFalseDeps : SubtargetFeature<"false-deps-lzcnt-tzcnt", 445 "HasLZCNTFalseDeps", "true", 446 "LZCNT/TZCNT have a false dependency on dest register">; 447 448// On recent X86 (port bound) processors, its preferable to combine to a single shuffle 449// using a variable mask over multiple fixed shuffles. 450def TuningFastVariableCrossLaneShuffle 451 : SubtargetFeature<"fast-variable-crosslane-shuffle", 452 "HasFastVariableCrossLaneShuffle", 453 "true", "Cross-lane shuffles with variable masks are fast">; 454def TuningFastVariablePerLaneShuffle 455 : SubtargetFeature<"fast-variable-perlane-shuffle", 456 "HasFastVariablePerLaneShuffle", 457 "true", "Per-lane shuffles with variable masks are fast">; 458 459// On some X86 processors, a vzeroupper instruction should be inserted after 460// using ymm/zmm registers before executing code that may use SSE instructions. 461def TuningInsertVZEROUPPER 462 : SubtargetFeature<"vzeroupper", 463 "InsertVZEROUPPER", 464 "true", "Should insert vzeroupper instructions">; 465 466// TuningFastScalarFSQRT should be enabled if scalar FSQRT has shorter latency 467// than the corresponding NR code. TuningFastVectorFSQRT should be enabled if 468// vector FSQRT has higher throughput than the corresponding NR code. 469// The idea is that throughput bound code is likely to be vectorized, so for 470// vectorized code we should care about the throughput of SQRT operations. 471// But if the code is scalar that probably means that the code has some kind of 472// dependency and we should care more about reducing the latency. 473def TuningFastScalarFSQRT 474 : SubtargetFeature<"fast-scalar-fsqrt", "HasFastScalarFSQRT", 475 "true", "Scalar SQRT is fast (disable Newton-Raphson)">; 476def TuningFastVectorFSQRT 477 : SubtargetFeature<"fast-vector-fsqrt", "HasFastVectorFSQRT", 478 "true", "Vector SQRT is fast (disable Newton-Raphson)">; 479 480// If lzcnt has equivalent latency/throughput to most simple integer ops, it can 481// be used to replace test/set sequences. 482def TuningFastLZCNT 483 : SubtargetFeature< 484 "fast-lzcnt", "HasFastLZCNT", "true", 485 "LZCNT instructions are as fast as most simple integer ops">; 486 487// If the target can efficiently decode NOPs upto 7-bytes in length. 488def TuningFast7ByteNOP 489 : SubtargetFeature< 490 "fast-7bytenop", "HasFast7ByteNOP", "true", 491 "Target can quickly decode up to 7 byte NOPs">; 492 493// If the target can efficiently decode NOPs upto 11-bytes in length. 494def TuningFast11ByteNOP 495 : SubtargetFeature< 496 "fast-11bytenop", "HasFast11ByteNOP", "true", 497 "Target can quickly decode up to 11 byte NOPs">; 498 499// If the target can efficiently decode NOPs upto 15-bytes in length. 500def TuningFast15ByteNOP 501 : SubtargetFeature< 502 "fast-15bytenop", "HasFast15ByteNOP", "true", 503 "Target can quickly decode up to 15 byte NOPs">; 504 505// Sandy Bridge and newer processors can use SHLD with the same source on both 506// inputs to implement rotate to avoid the partial flag update of the normal 507// rotate instructions. 508def TuningFastSHLDRotate 509 : SubtargetFeature< 510 "fast-shld-rotate", "HasFastSHLDRotate", "true", 511 "SHLD can be used as a faster rotate">; 512 513// Bulldozer and newer processors can merge CMP/TEST (but not other 514// instructions) with conditional branches. 515def TuningBranchFusion 516 : SubtargetFeature<"branchfusion", "HasBranchFusion", "true", 517 "CMP/TEST can be fused with conditional branches">; 518 519// Sandy Bridge and newer processors have many instructions that can be 520// fused with conditional branches and pass through the CPU as a single 521// operation. 522def TuningMacroFusion 523 : SubtargetFeature<"macrofusion", "HasMacroFusion", "true", 524 "Various instructions can be fused with conditional branches">; 525 526// Gather is available since Haswell (AVX2 set). So technically, we can 527// generate Gathers on all AVX2 processors. But the overhead on HSW is high. 528// Skylake Client processor has faster Gathers than HSW and performance is 529// similar to Skylake Server (AVX-512). 530def TuningFastGather 531 : SubtargetFeature<"fast-gather", "HasFastGather", "true", 532 "Indicates if gather is reasonably fast">; 533 534def TuningPrefer128Bit 535 : SubtargetFeature<"prefer-128-bit", "Prefer128Bit", "true", 536 "Prefer 128-bit AVX instructions">; 537 538def TuningPrefer256Bit 539 : SubtargetFeature<"prefer-256-bit", "Prefer256Bit", "true", 540 "Prefer 256-bit AVX instructions">; 541 542def TuningPreferMaskRegisters 543 : SubtargetFeature<"prefer-mask-registers", "PreferMaskRegisters", "true", 544 "Prefer AVX512 mask registers over PTEST/MOVMSK">; 545 546def TuningFastBEXTR : SubtargetFeature<"fast-bextr", "HasFastBEXTR", "true", 547 "Indicates that the BEXTR instruction is implemented as a single uop " 548 "with good throughput">; 549 550// Combine vector math operations with shuffles into horizontal math 551// instructions if a CPU implements horizontal operations (introduced with 552// SSE3) with better latency/throughput than the alternative sequence. 553def TuningFastHorizontalOps 554 : SubtargetFeature< 555 "fast-hops", "HasFastHorizontalOps", "true", 556 "Prefer horizontal vector math instructions (haddp, phsub, etc.) over " 557 "normal vector instructions with shuffles">; 558 559def TuningFastScalarShiftMasks 560 : SubtargetFeature< 561 "fast-scalar-shift-masks", "HasFastScalarShiftMasks", "true", 562 "Prefer a left/right scalar logical shift pair over a shift+and pair">; 563 564def TuningFastVectorShiftMasks 565 : SubtargetFeature< 566 "fast-vector-shift-masks", "HasFastVectorShiftMasks", "true", 567 "Prefer a left/right vector logical shift pair over a shift+and pair">; 568 569def TuningFastMOVBE 570 : SubtargetFeature<"fast-movbe", "HasFastMOVBE", "true", 571 "Prefer a movbe over a single-use load + bswap / single-use bswap + store">; 572 573def TuningUseSLMArithCosts 574 : SubtargetFeature<"use-slm-arith-costs", "UseSLMArithCosts", "true", 575 "Use Silvermont specific arithmetic costs">; 576 577def TuningUseGLMDivSqrtCosts 578 : SubtargetFeature<"use-glm-div-sqrt-costs", "UseGLMDivSqrtCosts", "true", 579 "Use Goldmont specific floating point div/sqrt costs">; 580 581// Enable use of alias analysis during code generation. 582def FeatureUseAA : SubtargetFeature<"use-aa", "UseAA", "true", 583 "Use alias analysis during codegen">; 584 585//===----------------------------------------------------------------------===// 586// X86 CPU Families 587// TODO: Remove these - use general tuning features to determine codegen. 588//===----------------------------------------------------------------------===// 589 590// Bonnell 591def ProcIntelAtom : SubtargetFeature<"", "X86ProcFamily", "IntelAtom", "">; 592 593//===----------------------------------------------------------------------===// 594// Register File Description 595//===----------------------------------------------------------------------===// 596 597include "X86RegisterInfo.td" 598include "X86RegisterBanks.td" 599 600//===----------------------------------------------------------------------===// 601// Instruction Descriptions 602//===----------------------------------------------------------------------===// 603 604include "X86Schedule.td" 605include "X86InstrInfo.td" 606include "X86SchedPredicates.td" 607 608def X86InstrInfo : InstrInfo; 609 610//===----------------------------------------------------------------------===// 611// X86 Scheduler Models 612//===----------------------------------------------------------------------===// 613 614include "X86ScheduleAtom.td" 615include "X86SchedSandyBridge.td" 616include "X86SchedHaswell.td" 617include "X86SchedBroadwell.td" 618include "X86ScheduleSLM.td" 619include "X86ScheduleZnver1.td" 620include "X86ScheduleZnver2.td" 621include "X86ScheduleZnver3.td" 622include "X86ScheduleBdVer2.td" 623include "X86ScheduleBtVer2.td" 624include "X86SchedSkylakeClient.td" 625include "X86SchedSkylakeServer.td" 626include "X86SchedIceLake.td" 627 628//===----------------------------------------------------------------------===// 629// X86 Processor Feature Lists 630//===----------------------------------------------------------------------===// 631 632def ProcessorFeatures { 633 // x86-64 and x86-64-v[234] 634 list<SubtargetFeature> X86_64V1Features = [ 635 FeatureX87, FeatureCMPXCHG8B, FeatureCMOV, FeatureMMX, FeatureSSE2, 636 FeatureFXSR, FeatureNOPL, Feature64Bit 637 ]; 638 list<SubtargetFeature> X86_64V2Features = !listconcat(X86_64V1Features, [ 639 FeatureCMPXCHG16B, FeatureLAHFSAHF, FeatureCRC32, FeaturePOPCNT, 640 FeatureSSE42 641 ]); 642 list<SubtargetFeature> X86_64V3Features = !listconcat(X86_64V2Features, [ 643 FeatureAVX2, FeatureBMI, FeatureBMI2, FeatureF16C, FeatureFMA, FeatureLZCNT, 644 FeatureMOVBE, FeatureXSAVE 645 ]); 646 list<SubtargetFeature> X86_64V4Features = !listconcat(X86_64V3Features, [ 647 FeatureBWI, 648 FeatureCDI, 649 FeatureDQI, 650 FeatureVLX, 651 ]); 652 653 // Nehalem 654 list<SubtargetFeature> NHMFeatures = X86_64V2Features; 655 list<SubtargetFeature> NHMTuning = [TuningMacroFusion, 656 TuningInsertVZEROUPPER]; 657 658 // Westmere 659 list<SubtargetFeature> WSMAdditionalFeatures = [FeaturePCLMUL]; 660 list<SubtargetFeature> WSMTuning = NHMTuning; 661 list<SubtargetFeature> WSMFeatures = 662 !listconcat(NHMFeatures, WSMAdditionalFeatures); 663 664 // Sandybridge 665 list<SubtargetFeature> SNBAdditionalFeatures = [FeatureAVX, 666 FeatureXSAVE, 667 FeatureXSAVEOPT]; 668 list<SubtargetFeature> SNBTuning = [TuningMacroFusion, 669 TuningSlow3OpsLEA, 670 TuningSlowDivide64, 671 TuningSlowUAMem32, 672 TuningFastScalarFSQRT, 673 TuningFastSHLDRotate, 674 TuningFast15ByteNOP, 675 TuningPOPCNTFalseDeps, 676 TuningInsertVZEROUPPER]; 677 list<SubtargetFeature> SNBFeatures = 678 !listconcat(WSMFeatures, SNBAdditionalFeatures); 679 680 // Ivybridge 681 list<SubtargetFeature> IVBAdditionalFeatures = [FeatureRDRAND, 682 FeatureF16C, 683 FeatureFSGSBase]; 684 list<SubtargetFeature> IVBTuning = SNBTuning; 685 list<SubtargetFeature> IVBFeatures = 686 !listconcat(SNBFeatures, IVBAdditionalFeatures); 687 688 // Haswell 689 list<SubtargetFeature> HSWAdditionalFeatures = [FeatureAVX2, 690 FeatureBMI, 691 FeatureBMI2, 692 FeatureERMSB, 693 FeatureFMA, 694 FeatureINVPCID, 695 FeatureLZCNT, 696 FeatureMOVBE]; 697 list<SubtargetFeature> HSWTuning = [TuningMacroFusion, 698 TuningSlow3OpsLEA, 699 TuningSlowDivide64, 700 TuningFastScalarFSQRT, 701 TuningFastSHLDRotate, 702 TuningFast15ByteNOP, 703 TuningFastVariableCrossLaneShuffle, 704 TuningFastVariablePerLaneShuffle, 705 TuningPOPCNTFalseDeps, 706 TuningLZCNTFalseDeps, 707 TuningInsertVZEROUPPER]; 708 list<SubtargetFeature> HSWFeatures = 709 !listconcat(IVBFeatures, HSWAdditionalFeatures); 710 711 // Broadwell 712 list<SubtargetFeature> BDWAdditionalFeatures = [FeatureADX, 713 FeatureRDSEED, 714 FeaturePRFCHW]; 715 list<SubtargetFeature> BDWTuning = HSWTuning; 716 list<SubtargetFeature> BDWFeatures = 717 !listconcat(HSWFeatures, BDWAdditionalFeatures); 718 719 // Skylake 720 list<SubtargetFeature> SKLAdditionalFeatures = [FeatureAES, 721 FeatureXSAVEC, 722 FeatureXSAVES, 723 FeatureCLFLUSHOPT]; 724 list<SubtargetFeature> SKLTuning = [TuningFastGather, 725 TuningMacroFusion, 726 TuningSlow3OpsLEA, 727 TuningSlowDivide64, 728 TuningFastScalarFSQRT, 729 TuningFastVectorFSQRT, 730 TuningFastSHLDRotate, 731 TuningFast15ByteNOP, 732 TuningFastVariableCrossLaneShuffle, 733 TuningFastVariablePerLaneShuffle, 734 TuningPOPCNTFalseDeps, 735 TuningInsertVZEROUPPER]; 736 list<SubtargetFeature> SKLFeatures = 737 !listconcat(BDWFeatures, SKLAdditionalFeatures); 738 739 // Skylake-AVX512 740 list<SubtargetFeature> SKXAdditionalFeatures = [FeatureAES, 741 FeatureXSAVEC, 742 FeatureXSAVES, 743 FeatureCLFLUSHOPT, 744 FeatureAVX512, 745 FeatureCDI, 746 FeatureDQI, 747 FeatureBWI, 748 FeatureVLX, 749 FeaturePKU, 750 FeatureCLWB]; 751 list<SubtargetFeature> SKXTuning = [TuningFastGather, 752 TuningMacroFusion, 753 TuningSlow3OpsLEA, 754 TuningSlowDivide64, 755 TuningFastScalarFSQRT, 756 TuningFastVectorFSQRT, 757 TuningFastSHLDRotate, 758 TuningFast15ByteNOP, 759 TuningFastVariableCrossLaneShuffle, 760 TuningFastVariablePerLaneShuffle, 761 TuningPrefer256Bit, 762 TuningPOPCNTFalseDeps, 763 TuningInsertVZEROUPPER]; 764 list<SubtargetFeature> SKXFeatures = 765 !listconcat(BDWFeatures, SKXAdditionalFeatures); 766 767 // Cascadelake 768 list<SubtargetFeature> CLXAdditionalFeatures = [FeatureVNNI]; 769 list<SubtargetFeature> CLXTuning = SKXTuning; 770 list<SubtargetFeature> CLXFeatures = 771 !listconcat(SKXFeatures, CLXAdditionalFeatures); 772 773 // Cooperlake 774 list<SubtargetFeature> CPXAdditionalFeatures = [FeatureBF16]; 775 list<SubtargetFeature> CPXTuning = SKXTuning; 776 list<SubtargetFeature> CPXFeatures = 777 !listconcat(CLXFeatures, CPXAdditionalFeatures); 778 779 // Cannonlake 780 list<SubtargetFeature> CNLAdditionalFeatures = [FeatureAVX512, 781 FeatureCDI, 782 FeatureDQI, 783 FeatureBWI, 784 FeatureVLX, 785 FeaturePKU, 786 FeatureVBMI, 787 FeatureIFMA, 788 FeatureSHA]; 789 list<SubtargetFeature> CNLTuning = [TuningFastGather, 790 TuningMacroFusion, 791 TuningSlow3OpsLEA, 792 TuningSlowDivide64, 793 TuningFastScalarFSQRT, 794 TuningFastVectorFSQRT, 795 TuningFastSHLDRotate, 796 TuningFast15ByteNOP, 797 TuningFastVariableCrossLaneShuffle, 798 TuningFastVariablePerLaneShuffle, 799 TuningPrefer256Bit, 800 TuningInsertVZEROUPPER]; 801 list<SubtargetFeature> CNLFeatures = 802 !listconcat(SKLFeatures, CNLAdditionalFeatures); 803 804 // Icelake 805 list<SubtargetFeature> ICLAdditionalFeatures = [FeatureBITALG, 806 FeatureVAES, 807 FeatureVBMI2, 808 FeatureVNNI, 809 FeatureVPCLMULQDQ, 810 FeatureVPOPCNTDQ, 811 FeatureGFNI, 812 FeatureRDPID, 813 FeatureFSRM]; 814 list<SubtargetFeature> ICLTuning = [TuningFastGather, 815 TuningMacroFusion, 816 TuningSlow3OpsLEA, 817 TuningSlowDivide64, 818 TuningFastScalarFSQRT, 819 TuningFastVectorFSQRT, 820 TuningFastSHLDRotate, 821 TuningFast15ByteNOP, 822 TuningFastVariableCrossLaneShuffle, 823 TuningFastVariablePerLaneShuffle, 824 TuningPrefer256Bit, 825 TuningInsertVZEROUPPER]; 826 list<SubtargetFeature> ICLFeatures = 827 !listconcat(CNLFeatures, ICLAdditionalFeatures); 828 829 // Icelake Server 830 list<SubtargetFeature> ICXAdditionalFeatures = [FeaturePCONFIG, 831 FeatureCLWB, 832 FeatureWBNOINVD]; 833 list<SubtargetFeature> ICXTuning = ICLTuning; 834 list<SubtargetFeature> ICXFeatures = 835 !listconcat(ICLFeatures, ICXAdditionalFeatures); 836 837 // Tigerlake 838 list<SubtargetFeature> TGLAdditionalFeatures = [FeatureVP2INTERSECT, 839 FeatureCLWB, 840 FeatureMOVDIRI, 841 FeatureMOVDIR64B, 842 FeatureSHSTK]; 843 list<SubtargetFeature> TGLTuning = ICLTuning; 844 list<SubtargetFeature> TGLFeatures = 845 !listconcat(ICLFeatures, TGLAdditionalFeatures ); 846 847 // Sapphirerapids 848 list<SubtargetFeature> SPRAdditionalFeatures = [FeatureAMXTILE, 849 FeatureAMXINT8, 850 FeatureAMXBF16, 851 FeatureBF16, 852 FeatureSERIALIZE, 853 FeatureCLDEMOTE, 854 FeatureWAITPKG, 855 FeaturePTWRITE, 856 FeatureFP16, 857 FeatureAVXVNNI, 858 FeatureTSXLDTRK, 859 FeatureENQCMD, 860 FeatureSHSTK, 861 FeatureVP2INTERSECT, 862 FeatureMOVDIRI, 863 FeatureMOVDIR64B, 864 FeatureUINTR]; 865 list<SubtargetFeature> SPRTuning = ICXTuning; 866 list<SubtargetFeature> SPRFeatures = 867 !listconcat(ICXFeatures, SPRAdditionalFeatures); 868 869 // Atom 870 list<SubtargetFeature> AtomFeatures = [FeatureX87, 871 FeatureCMPXCHG8B, 872 FeatureCMOV, 873 FeatureMMX, 874 FeatureSSSE3, 875 FeatureFXSR, 876 FeatureNOPL, 877 Feature64Bit, 878 FeatureCMPXCHG16B, 879 FeatureMOVBE, 880 FeatureLAHFSAHF]; 881 list<SubtargetFeature> AtomTuning = [ProcIntelAtom, 882 TuningSlowUAMem16, 883 TuningLEAForSP, 884 TuningSlowDivide32, 885 TuningSlowDivide64, 886 TuningSlowTwoMemOps, 887 TuningLEAUsesAG, 888 TuningPadShortFunctions, 889 TuningInsertVZEROUPPER]; 890 891 // Silvermont 892 list<SubtargetFeature> SLMAdditionalFeatures = [FeatureSSE42, 893 FeatureCRC32, 894 FeaturePOPCNT, 895 FeaturePCLMUL, 896 FeaturePRFCHW, 897 FeatureRDRAND]; 898 list<SubtargetFeature> SLMTuning = [TuningUseSLMArithCosts, 899 TuningSlowTwoMemOps, 900 TuningSlowLEA, 901 TuningSlowIncDec, 902 TuningSlowDivide64, 903 TuningSlowPMULLD, 904 TuningFast7ByteNOP, 905 TuningFastMOVBE, 906 TuningPOPCNTFalseDeps, 907 TuningInsertVZEROUPPER]; 908 list<SubtargetFeature> SLMFeatures = 909 !listconcat(AtomFeatures, SLMAdditionalFeatures); 910 911 // Goldmont 912 list<SubtargetFeature> GLMAdditionalFeatures = [FeatureAES, 913 FeatureSHA, 914 FeatureRDSEED, 915 FeatureXSAVE, 916 FeatureXSAVEOPT, 917 FeatureXSAVEC, 918 FeatureXSAVES, 919 FeatureCLFLUSHOPT, 920 FeatureFSGSBase]; 921 list<SubtargetFeature> GLMTuning = [TuningUseGLMDivSqrtCosts, 922 TuningSlowTwoMemOps, 923 TuningSlowLEA, 924 TuningSlowIncDec, 925 TuningFastMOVBE, 926 TuningPOPCNTFalseDeps, 927 TuningInsertVZEROUPPER]; 928 list<SubtargetFeature> GLMFeatures = 929 !listconcat(SLMFeatures, GLMAdditionalFeatures); 930 931 // Goldmont Plus 932 list<SubtargetFeature> GLPAdditionalFeatures = [FeaturePTWRITE, 933 FeatureRDPID]; 934 list<SubtargetFeature> GLPTuning = [TuningUseGLMDivSqrtCosts, 935 TuningSlowTwoMemOps, 936 TuningSlowLEA, 937 TuningSlowIncDec, 938 TuningFastMOVBE, 939 TuningInsertVZEROUPPER]; 940 list<SubtargetFeature> GLPFeatures = 941 !listconcat(GLMFeatures, GLPAdditionalFeatures); 942 943 // Tremont 944 list<SubtargetFeature> TRMAdditionalFeatures = [FeatureCLWB, 945 FeatureGFNI]; 946 list<SubtargetFeature> TRMTuning = GLPTuning; 947 list<SubtargetFeature> TRMFeatures = 948 !listconcat(GLPFeatures, TRMAdditionalFeatures); 949 950 // Alderlake 951 list<SubtargetFeature> ADLAdditionalFeatures = [FeatureSERIALIZE, 952 FeaturePCONFIG, 953 FeatureSHSTK, 954 FeatureWIDEKL, 955 FeatureINVPCID, 956 FeatureADX, 957 FeatureFMA, 958 FeatureVAES, 959 FeatureVPCLMULQDQ, 960 FeatureF16C, 961 FeatureBMI, 962 FeatureBMI2, 963 FeatureLZCNT, 964 FeatureAVXVNNI, 965 FeaturePKU, 966 FeatureHRESET, 967 FeatureCLDEMOTE, 968 FeatureMOVDIRI, 969 FeatureMOVDIR64B, 970 FeatureWAITPKG]; 971 list<SubtargetFeature> ADLTuning = SKLTuning; 972 list<SubtargetFeature> ADLFeatures = 973 !listconcat(TRMFeatures, ADLAdditionalFeatures); 974 975 // Knights Landing 976 list<SubtargetFeature> KNLFeatures = [FeatureX87, 977 FeatureCMPXCHG8B, 978 FeatureCMOV, 979 FeatureMMX, 980 FeatureFXSR, 981 FeatureNOPL, 982 Feature64Bit, 983 FeatureCMPXCHG16B, 984 FeatureCRC32, 985 FeaturePOPCNT, 986 FeaturePCLMUL, 987 FeatureXSAVE, 988 FeatureXSAVEOPT, 989 FeatureLAHFSAHF, 990 FeatureAES, 991 FeatureRDRAND, 992 FeatureF16C, 993 FeatureFSGSBase, 994 FeatureAVX512, 995 FeatureERI, 996 FeatureCDI, 997 FeaturePFI, 998 FeaturePREFETCHWT1, 999 FeatureADX, 1000 FeatureRDSEED, 1001 FeatureMOVBE, 1002 FeatureLZCNT, 1003 FeatureBMI, 1004 FeatureBMI2, 1005 FeatureFMA, 1006 FeaturePRFCHW]; 1007 list<SubtargetFeature> KNLTuning = [TuningSlowDivide64, 1008 TuningSlow3OpsLEA, 1009 TuningSlowIncDec, 1010 TuningSlowTwoMemOps, 1011 TuningPreferMaskRegisters, 1012 TuningFastGather, 1013 TuningFastMOVBE, 1014 TuningSlowPMADDWD]; 1015 // TODO Add AVX5124FMAPS/AVX5124VNNIW features 1016 list<SubtargetFeature> KNMFeatures = 1017 !listconcat(KNLFeatures, [FeatureVPOPCNTDQ]); 1018 1019 // Barcelona 1020 list<SubtargetFeature> BarcelonaFeatures = [FeatureX87, 1021 FeatureCMPXCHG8B, 1022 FeatureSSE4A, 1023 Feature3DNowA, 1024 FeatureFXSR, 1025 FeatureNOPL, 1026 FeatureCMPXCHG16B, 1027 FeaturePRFCHW, 1028 FeatureLZCNT, 1029 FeaturePOPCNT, 1030 FeatureLAHFSAHF, 1031 FeatureCMOV, 1032 Feature64Bit]; 1033 list<SubtargetFeature> BarcelonaTuning = [TuningFastScalarShiftMasks, 1034 TuningSlowSHLD, 1035 TuningInsertVZEROUPPER]; 1036 1037 // Bobcat 1038 list<SubtargetFeature> BtVer1Features = [FeatureX87, 1039 FeatureCMPXCHG8B, 1040 FeatureCMOV, 1041 FeatureMMX, 1042 FeatureSSSE3, 1043 FeatureSSE4A, 1044 FeatureFXSR, 1045 FeatureNOPL, 1046 Feature64Bit, 1047 FeatureCMPXCHG16B, 1048 FeaturePRFCHW, 1049 FeatureLZCNT, 1050 FeaturePOPCNT, 1051 FeatureLAHFSAHF]; 1052 list<SubtargetFeature> BtVer1Tuning = [TuningFast15ByteNOP, 1053 TuningFastScalarShiftMasks, 1054 TuningFastVectorShiftMasks, 1055 TuningSlowSHLD, 1056 TuningInsertVZEROUPPER]; 1057 1058 // Jaguar 1059 list<SubtargetFeature> BtVer2AdditionalFeatures = [FeatureAVX, 1060 FeatureAES, 1061 FeatureCRC32, 1062 FeaturePCLMUL, 1063 FeatureBMI, 1064 FeatureF16C, 1065 FeatureMOVBE, 1066 FeatureXSAVE, 1067 FeatureXSAVEOPT]; 1068 list<SubtargetFeature> BtVer2Tuning = [TuningFastLZCNT, 1069 TuningFastBEXTR, 1070 TuningFastHorizontalOps, 1071 TuningFast15ByteNOP, 1072 TuningFastScalarShiftMasks, 1073 TuningFastVectorShiftMasks, 1074 TuningFastMOVBE, 1075 TuningSlowSHLD]; 1076 list<SubtargetFeature> BtVer2Features = 1077 !listconcat(BtVer1Features, BtVer2AdditionalFeatures); 1078 1079 // Bulldozer 1080 list<SubtargetFeature> BdVer1Features = [FeatureX87, 1081 FeatureCMPXCHG8B, 1082 FeatureCMOV, 1083 FeatureXOP, 1084 Feature64Bit, 1085 FeatureCMPXCHG16B, 1086 FeatureAES, 1087 FeatureCRC32, 1088 FeaturePRFCHW, 1089 FeaturePCLMUL, 1090 FeatureMMX, 1091 FeatureFXSR, 1092 FeatureNOPL, 1093 FeatureLZCNT, 1094 FeaturePOPCNT, 1095 FeatureXSAVE, 1096 FeatureLWP, 1097 FeatureLAHFSAHF]; 1098 list<SubtargetFeature> BdVer1Tuning = [TuningSlowSHLD, 1099 TuningFast11ByteNOP, 1100 TuningFastScalarShiftMasks, 1101 TuningBranchFusion, 1102 TuningInsertVZEROUPPER]; 1103 1104 // PileDriver 1105 list<SubtargetFeature> BdVer2AdditionalFeatures = [FeatureF16C, 1106 FeatureBMI, 1107 FeatureTBM, 1108 FeatureFMA]; 1109 list<SubtargetFeature> BdVer2AdditionalTuning = [TuningFastBEXTR, 1110 TuningFastMOVBE]; 1111 list<SubtargetFeature> BdVer2Tuning = 1112 !listconcat(BdVer1Tuning, BdVer2AdditionalTuning); 1113 list<SubtargetFeature> BdVer2Features = 1114 !listconcat(BdVer1Features, BdVer2AdditionalFeatures); 1115 1116 // Steamroller 1117 list<SubtargetFeature> BdVer3AdditionalFeatures = [FeatureXSAVEOPT, 1118 FeatureFSGSBase]; 1119 list<SubtargetFeature> BdVer3Tuning = BdVer2Tuning; 1120 list<SubtargetFeature> BdVer3Features = 1121 !listconcat(BdVer2Features, BdVer3AdditionalFeatures); 1122 1123 // Excavator 1124 list<SubtargetFeature> BdVer4AdditionalFeatures = [FeatureAVX2, 1125 FeatureBMI2, 1126 FeatureMOVBE, 1127 FeatureRDRAND, 1128 FeatureMWAITX]; 1129 list<SubtargetFeature> BdVer4Tuning = BdVer3Tuning; 1130 list<SubtargetFeature> BdVer4Features = 1131 !listconcat(BdVer3Features, BdVer4AdditionalFeatures); 1132 1133 1134 // AMD Zen Processors common ISAs 1135 list<SubtargetFeature> ZNFeatures = [FeatureADX, 1136 FeatureAES, 1137 FeatureAVX2, 1138 FeatureBMI, 1139 FeatureBMI2, 1140 FeatureCLFLUSHOPT, 1141 FeatureCLZERO, 1142 FeatureCMOV, 1143 Feature64Bit, 1144 FeatureCMPXCHG16B, 1145 FeatureCRC32, 1146 FeatureF16C, 1147 FeatureFMA, 1148 FeatureFSGSBase, 1149 FeatureFXSR, 1150 FeatureNOPL, 1151 FeatureLAHFSAHF, 1152 FeatureLZCNT, 1153 FeatureMMX, 1154 FeatureMOVBE, 1155 FeatureMWAITX, 1156 FeaturePCLMUL, 1157 FeaturePOPCNT, 1158 FeaturePRFCHW, 1159 FeatureRDRAND, 1160 FeatureRDSEED, 1161 FeatureSHA, 1162 FeatureSSE4A, 1163 FeatureX87, 1164 FeatureXSAVE, 1165 FeatureXSAVEC, 1166 FeatureXSAVEOPT, 1167 FeatureXSAVES]; 1168 list<SubtargetFeature> ZNTuning = [TuningFastLZCNT, 1169 TuningFastBEXTR, 1170 TuningFast15ByteNOP, 1171 TuningBranchFusion, 1172 TuningFastScalarFSQRT, 1173 TuningFastVectorFSQRT, 1174 TuningFastScalarShiftMasks, 1175 TuningFastMOVBE, 1176 TuningSlowSHLD, 1177 TuningInsertVZEROUPPER]; 1178 list<SubtargetFeature> ZN2AdditionalFeatures = [FeatureCLWB, 1179 FeatureRDPID, 1180 FeatureWBNOINVD]; 1181 list<SubtargetFeature> ZN2Tuning = ZNTuning; 1182 list<SubtargetFeature> ZN2Features = 1183 !listconcat(ZNFeatures, ZN2AdditionalFeatures); 1184 list<SubtargetFeature> ZN3AdditionalFeatures = [FeatureFSRM, 1185 FeatureINVPCID, 1186 FeaturePKU, 1187 FeatureVAES, 1188 FeatureVPCLMULQDQ]; 1189 list<SubtargetFeature> ZN3AdditionalTuning = 1190 [TuningMacroFusion, 1191 TuningFastVariablePerLaneShuffle]; 1192 list<SubtargetFeature> ZN3Tuning = 1193 !listconcat(ZNTuning, ZN3AdditionalTuning); 1194 list<SubtargetFeature> ZN3Features = 1195 !listconcat(ZN2Features, ZN3AdditionalFeatures); 1196} 1197 1198//===----------------------------------------------------------------------===// 1199// X86 processors supported. 1200//===----------------------------------------------------------------------===// 1201 1202class Proc<string Name, list<SubtargetFeature> Features, 1203 list<SubtargetFeature> TuneFeatures> 1204 : ProcessorModel<Name, GenericModel, Features, TuneFeatures>; 1205 1206class ProcModel<string Name, SchedMachineModel Model, 1207 list<SubtargetFeature> Features, 1208 list<SubtargetFeature> TuneFeatures> 1209 : ProcessorModel<Name, Model, Features, TuneFeatures>; 1210 1211// NOTE: CMPXCHG8B is here for legacy compatibility so that it is only disabled 1212// if i386/i486 is specifically requested. 1213// NOTE: 64Bit is here as "generic" is the default llc CPU. The X86Subtarget 1214// constructor checks that any CPU used in 64-bit mode has Feature64Bit enabled. 1215// It has no effect on code generation. 1216def : ProcModel<"generic", SandyBridgeModel, 1217 [FeatureX87, FeatureCMPXCHG8B, Feature64Bit], 1218 [TuningSlow3OpsLEA, 1219 TuningSlowDivide64, 1220 TuningSlowIncDec, 1221 TuningMacroFusion, 1222 TuningInsertVZEROUPPER]>; 1223 1224def : Proc<"i386", [FeatureX87], 1225 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1226def : Proc<"i486", [FeatureX87], 1227 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1228def : Proc<"i586", [FeatureX87, FeatureCMPXCHG8B], 1229 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1230def : Proc<"pentium", [FeatureX87, FeatureCMPXCHG8B], 1231 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1232def : Proc<"pentium-mmx", [FeatureX87, FeatureCMPXCHG8B, FeatureMMX], 1233 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1234 1235def : Proc<"i686", [FeatureX87, FeatureCMPXCHG8B, FeatureCMOV], 1236 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1237def : Proc<"pentiumpro", [FeatureX87, FeatureCMPXCHG8B, FeatureCMOV, 1238 FeatureNOPL], 1239 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1240 1241def : Proc<"pentium2", [FeatureX87, FeatureCMPXCHG8B, FeatureMMX, FeatureCMOV, 1242 FeatureFXSR, FeatureNOPL], 1243 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1244 1245foreach P = ["pentium3", "pentium3m"] in { 1246 def : Proc<P, [FeatureX87, FeatureCMPXCHG8B, FeatureMMX, 1247 FeatureSSE1, FeatureFXSR, FeatureNOPL, FeatureCMOV], 1248 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1249} 1250 1251// Enable the PostRAScheduler for SSE2 and SSE3 class cpus. 1252// The intent is to enable it for pentium4 which is the current default 1253// processor in a vanilla 32-bit clang compilation when no specific 1254// architecture is specified. This generally gives a nice performance 1255// increase on silvermont, with largely neutral behavior on other 1256// contemporary large core processors. 1257// pentium-m, pentium4m, prescott and nocona are included as a preventative 1258// measure to avoid performance surprises, in case clang's default cpu 1259// changes slightly. 1260 1261def : ProcModel<"pentium-m", GenericPostRAModel, 1262 [FeatureX87, FeatureCMPXCHG8B, FeatureMMX, FeatureSSE2, 1263 FeatureFXSR, FeatureNOPL, FeatureCMOV], 1264 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1265 1266foreach P = ["pentium4", "pentium4m"] in { 1267 def : ProcModel<P, GenericPostRAModel, 1268 [FeatureX87, FeatureCMPXCHG8B, FeatureMMX, FeatureSSE2, 1269 FeatureFXSR, FeatureNOPL, FeatureCMOV], 1270 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1271} 1272 1273// Intel Quark. 1274def : Proc<"lakemont", [FeatureCMPXCHG8B], 1275 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1276 1277// Intel Core Duo. 1278def : ProcModel<"yonah", SandyBridgeModel, 1279 [FeatureX87, FeatureCMPXCHG8B, FeatureMMX, FeatureSSE3, 1280 FeatureFXSR, FeatureNOPL, FeatureCMOV], 1281 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1282 1283// NetBurst. 1284def : ProcModel<"prescott", GenericPostRAModel, 1285 [FeatureX87, FeatureCMPXCHG8B, FeatureMMX, FeatureSSE3, 1286 FeatureFXSR, FeatureNOPL, FeatureCMOV], 1287 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1288def : ProcModel<"nocona", GenericPostRAModel, [ 1289 FeatureX87, 1290 FeatureCMPXCHG8B, 1291 FeatureCMOV, 1292 FeatureMMX, 1293 FeatureSSE3, 1294 FeatureFXSR, 1295 FeatureNOPL, 1296 Feature64Bit, 1297 FeatureCMPXCHG16B, 1298], 1299[ 1300 TuningSlowUAMem16, 1301 TuningInsertVZEROUPPER 1302]>; 1303 1304// Intel Core 2 Solo/Duo. 1305def : ProcModel<"core2", SandyBridgeModel, [ 1306 FeatureX87, 1307 FeatureCMPXCHG8B, 1308 FeatureCMOV, 1309 FeatureMMX, 1310 FeatureSSSE3, 1311 FeatureFXSR, 1312 FeatureNOPL, 1313 Feature64Bit, 1314 FeatureCMPXCHG16B, 1315 FeatureLAHFSAHF 1316], 1317[ 1318 TuningMacroFusion, 1319 TuningSlowUAMem16, 1320 TuningInsertVZEROUPPER 1321]>; 1322def : ProcModel<"penryn", SandyBridgeModel, [ 1323 FeatureX87, 1324 FeatureCMPXCHG8B, 1325 FeatureCMOV, 1326 FeatureMMX, 1327 FeatureSSE41, 1328 FeatureFXSR, 1329 FeatureNOPL, 1330 Feature64Bit, 1331 FeatureCMPXCHG16B, 1332 FeatureLAHFSAHF 1333], 1334[ 1335 TuningMacroFusion, 1336 TuningSlowUAMem16, 1337 TuningInsertVZEROUPPER 1338]>; 1339 1340// Atom CPUs. 1341foreach P = ["bonnell", "atom"] in { 1342 def : ProcModel<P, AtomModel, ProcessorFeatures.AtomFeatures, 1343 ProcessorFeatures.AtomTuning>; 1344} 1345 1346foreach P = ["silvermont", "slm"] in { 1347 def : ProcModel<P, SLMModel, ProcessorFeatures.SLMFeatures, 1348 ProcessorFeatures.SLMTuning>; 1349} 1350 1351def : ProcModel<"goldmont", SLMModel, ProcessorFeatures.GLMFeatures, 1352 ProcessorFeatures.GLMTuning>; 1353def : ProcModel<"goldmont-plus", SLMModel, ProcessorFeatures.GLPFeatures, 1354 ProcessorFeatures.GLPTuning>; 1355def : ProcModel<"tremont", SLMModel, ProcessorFeatures.TRMFeatures, 1356 ProcessorFeatures.TRMTuning>; 1357 1358// "Arrandale" along with corei3 and corei5 1359foreach P = ["nehalem", "corei7"] in { 1360 def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.NHMFeatures, 1361 ProcessorFeatures.NHMTuning>; 1362} 1363 1364// Westmere is the corei3/i5/i7 path from nehalem to sandybridge 1365def : ProcModel<"westmere", SandyBridgeModel, ProcessorFeatures.WSMFeatures, 1366 ProcessorFeatures.WSMTuning>; 1367 1368foreach P = ["sandybridge", "corei7-avx"] in { 1369 def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.SNBFeatures, 1370 ProcessorFeatures.SNBTuning>; 1371} 1372 1373foreach P = ["ivybridge", "core-avx-i"] in { 1374 def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.IVBFeatures, 1375 ProcessorFeatures.IVBTuning>; 1376} 1377 1378foreach P = ["haswell", "core-avx2"] in { 1379 def : ProcModel<P, HaswellModel, ProcessorFeatures.HSWFeatures, 1380 ProcessorFeatures.HSWTuning>; 1381} 1382 1383def : ProcModel<"broadwell", BroadwellModel, ProcessorFeatures.BDWFeatures, 1384 ProcessorFeatures.BDWTuning>; 1385 1386def : ProcModel<"skylake", SkylakeClientModel, ProcessorFeatures.SKLFeatures, 1387 ProcessorFeatures.SKLTuning>; 1388 1389// FIXME: define KNL scheduler model 1390def : ProcModel<"knl", HaswellModel, ProcessorFeatures.KNLFeatures, 1391 ProcessorFeatures.KNLTuning>; 1392def : ProcModel<"knm", HaswellModel, ProcessorFeatures.KNMFeatures, 1393 ProcessorFeatures.KNLTuning>; 1394 1395foreach P = ["skylake-avx512", "skx"] in { 1396 def : ProcModel<P, SkylakeServerModel, ProcessorFeatures.SKXFeatures, 1397 ProcessorFeatures.SKXTuning>; 1398} 1399 1400def : ProcModel<"cascadelake", SkylakeServerModel, 1401 ProcessorFeatures.CLXFeatures, ProcessorFeatures.CLXTuning>; 1402def : ProcModel<"cooperlake", SkylakeServerModel, 1403 ProcessorFeatures.CPXFeatures, ProcessorFeatures.CPXTuning>; 1404def : ProcModel<"cannonlake", SkylakeServerModel, 1405 ProcessorFeatures.CNLFeatures, ProcessorFeatures.CNLTuning>; 1406def : ProcModel<"icelake-client", IceLakeModel, 1407 ProcessorFeatures.ICLFeatures, ProcessorFeatures.ICLTuning>; 1408def : ProcModel<"rocketlake", IceLakeModel, 1409 ProcessorFeatures.ICLFeatures, ProcessorFeatures.ICLTuning>; 1410def : ProcModel<"icelake-server", IceLakeModel, 1411 ProcessorFeatures.ICXFeatures, ProcessorFeatures.ICXTuning>; 1412def : ProcModel<"tigerlake", IceLakeModel, 1413 ProcessorFeatures.TGLFeatures, ProcessorFeatures.TGLTuning>; 1414def : ProcModel<"sapphirerapids", SkylakeServerModel, 1415 ProcessorFeatures.SPRFeatures, ProcessorFeatures.SPRTuning>; 1416def : ProcModel<"alderlake", SkylakeClientModel, 1417 ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>; 1418 1419// AMD CPUs. 1420 1421def : Proc<"k6", [FeatureX87, FeatureCMPXCHG8B, FeatureMMX], 1422 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1423def : Proc<"k6-2", [FeatureX87, FeatureCMPXCHG8B, Feature3DNow], 1424 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1425def : Proc<"k6-3", [FeatureX87, FeatureCMPXCHG8B, Feature3DNow], 1426 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1427 1428foreach P = ["athlon", "athlon-tbird"] in { 1429 def : Proc<P, [FeatureX87, FeatureCMPXCHG8B, FeatureCMOV, Feature3DNowA, 1430 FeatureNOPL], 1431 [TuningSlowSHLD, TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1432} 1433 1434foreach P = ["athlon-4", "athlon-xp", "athlon-mp"] in { 1435 def : Proc<P, [FeatureX87, FeatureCMPXCHG8B, FeatureCMOV, 1436 FeatureSSE1, Feature3DNowA, FeatureFXSR, FeatureNOPL], 1437 [TuningSlowSHLD, TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1438} 1439 1440foreach P = ["k8", "opteron", "athlon64", "athlon-fx"] in { 1441 def : Proc<P, [FeatureX87, FeatureCMPXCHG8B, FeatureSSE2, Feature3DNowA, 1442 FeatureFXSR, FeatureNOPL, Feature64Bit, FeatureCMOV], 1443 [TuningFastScalarShiftMasks, TuningSlowSHLD, TuningSlowUAMem16, 1444 TuningInsertVZEROUPPER]>; 1445} 1446 1447foreach P = ["k8-sse3", "opteron-sse3", "athlon64-sse3"] in { 1448 def : Proc<P, [FeatureX87, FeatureCMPXCHG8B, FeatureSSE3, Feature3DNowA, 1449 FeatureFXSR, FeatureNOPL, FeatureCMPXCHG16B, FeatureCMOV, 1450 Feature64Bit], 1451 [TuningFastScalarShiftMasks, TuningSlowSHLD, TuningSlowUAMem16, 1452 TuningInsertVZEROUPPER]>; 1453} 1454 1455foreach P = ["amdfam10", "barcelona"] in { 1456 def : Proc<P, ProcessorFeatures.BarcelonaFeatures, 1457 ProcessorFeatures.BarcelonaTuning>; 1458} 1459 1460// Bobcat 1461def : Proc<"btver1", ProcessorFeatures.BtVer1Features, 1462 ProcessorFeatures.BtVer1Tuning>; 1463// Jaguar 1464def : ProcModel<"btver2", BtVer2Model, ProcessorFeatures.BtVer2Features, 1465 ProcessorFeatures.BtVer2Tuning>; 1466 1467// Bulldozer 1468def : ProcModel<"bdver1", BdVer2Model, ProcessorFeatures.BdVer1Features, 1469 ProcessorFeatures.BdVer1Tuning>; 1470// Piledriver 1471def : ProcModel<"bdver2", BdVer2Model, ProcessorFeatures.BdVer2Features, 1472 ProcessorFeatures.BdVer2Tuning>; 1473// Steamroller 1474def : Proc<"bdver3", ProcessorFeatures.BdVer3Features, 1475 ProcessorFeatures.BdVer3Tuning>; 1476// Excavator 1477def : Proc<"bdver4", ProcessorFeatures.BdVer4Features, 1478 ProcessorFeatures.BdVer4Tuning>; 1479 1480def : ProcModel<"znver1", Znver1Model, ProcessorFeatures.ZNFeatures, 1481 ProcessorFeatures.ZNTuning>; 1482def : ProcModel<"znver2", Znver2Model, ProcessorFeatures.ZN2Features, 1483 ProcessorFeatures.ZN2Tuning>; 1484def : ProcModel<"znver3", Znver3Model, ProcessorFeatures.ZN3Features, 1485 ProcessorFeatures.ZN3Tuning>; 1486 1487def : Proc<"geode", [FeatureX87, FeatureCMPXCHG8B, Feature3DNowA], 1488 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1489 1490def : Proc<"winchip-c6", [FeatureX87, FeatureMMX], 1491 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1492def : Proc<"winchip2", [FeatureX87, Feature3DNow], 1493 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1494def : Proc<"c3", [FeatureX87, Feature3DNow], 1495 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1496def : Proc<"c3-2", [FeatureX87, FeatureCMPXCHG8B, FeatureMMX, 1497 FeatureSSE1, FeatureFXSR, FeatureCMOV], 1498 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1499 1500// We also provide a generic 64-bit specific x86 processor model which tries to 1501// be good for modern chips without enabling instruction set encodings past the 1502// basic SSE2 and 64-bit ones. It disables slow things from any mainstream and 1503// modern 64-bit x86 chip, and enables features that are generally beneficial. 1504// 1505// We currently use the Sandy Bridge model as the default scheduling model as 1506// we use it across Nehalem, Westmere, Sandy Bridge, and Ivy Bridge which 1507// covers a huge swath of x86 processors. If there are specific scheduling 1508// knobs which need to be tuned differently for AMD chips, we might consider 1509// forming a common base for them. 1510def : ProcModel<"x86-64", SandyBridgeModel, ProcessorFeatures.X86_64V1Features, 1511[ 1512 TuningSlow3OpsLEA, 1513 TuningSlowDivide64, 1514 TuningSlowIncDec, 1515 TuningMacroFusion, 1516 TuningInsertVZEROUPPER 1517]>; 1518 1519// x86-64 micro-architecture levels. 1520def : ProcModel<"x86-64-v2", SandyBridgeModel, ProcessorFeatures.X86_64V2Features, 1521 ProcessorFeatures.SNBTuning>; 1522// Close to Haswell. 1523def : ProcModel<"x86-64-v3", HaswellModel, ProcessorFeatures.X86_64V3Features, 1524 ProcessorFeatures.HSWTuning>; 1525// Close to the AVX-512 level implemented by Xeon Scalable Processors. 1526def : ProcModel<"x86-64-v4", SkylakeServerModel, ProcessorFeatures.X86_64V4Features, 1527 ProcessorFeatures.SKXTuning>; 1528 1529//===----------------------------------------------------------------------===// 1530// Calling Conventions 1531//===----------------------------------------------------------------------===// 1532 1533include "X86CallingConv.td" 1534 1535 1536//===----------------------------------------------------------------------===// 1537// Assembly Parser 1538//===----------------------------------------------------------------------===// 1539 1540def ATTAsmParserVariant : AsmParserVariant { 1541 int Variant = 0; 1542 1543 // Variant name. 1544 string Name = "att"; 1545 1546 // Discard comments in assembly strings. 1547 string CommentDelimiter = "#"; 1548 1549 // Recognize hard coded registers. 1550 string RegisterPrefix = "%"; 1551} 1552 1553def IntelAsmParserVariant : AsmParserVariant { 1554 int Variant = 1; 1555 1556 // Variant name. 1557 string Name = "intel"; 1558 1559 // Discard comments in assembly strings. 1560 string CommentDelimiter = ";"; 1561 1562 // Recognize hard coded registers. 1563 string RegisterPrefix = ""; 1564} 1565 1566//===----------------------------------------------------------------------===// 1567// Assembly Printers 1568//===----------------------------------------------------------------------===// 1569 1570// The X86 target supports two different syntaxes for emitting machine code. 1571// This is controlled by the -x86-asm-syntax={att|intel} 1572def ATTAsmWriter : AsmWriter { 1573 string AsmWriterClassName = "ATTInstPrinter"; 1574 int Variant = 0; 1575} 1576def IntelAsmWriter : AsmWriter { 1577 string AsmWriterClassName = "IntelInstPrinter"; 1578 int Variant = 1; 1579} 1580 1581def X86 : Target { 1582 // Information about the instructions... 1583 let InstructionSet = X86InstrInfo; 1584 let AssemblyParserVariants = [ATTAsmParserVariant, IntelAsmParserVariant]; 1585 let AssemblyWriters = [ATTAsmWriter, IntelAsmWriter]; 1586 let AllowRegisterRenaming = 1; 1587} 1588 1589//===----------------------------------------------------------------------===// 1590// Pfm Counters 1591//===----------------------------------------------------------------------===// 1592 1593include "X86PfmCounters.td" 1594