1//===-- X86.td - Target definition file for the Intel X86 --*- tablegen -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This is a target description file for the Intel i386 architecture, referred 10// to here as the "X86" architecture. 11// 12//===----------------------------------------------------------------------===// 13 14// Get the target-independent interfaces which we are implementing... 15// 16include "llvm/Target/Target.td" 17 18//===----------------------------------------------------------------------===// 19// X86 Subtarget state 20// 21// disregarding specific ABI / programming model 22def Is64Bit : SubtargetFeature<"64bit-mode", "Is64Bit", "true", 23 "64-bit mode (x86_64)">; 24def Is32Bit : SubtargetFeature<"32bit-mode", "Is32Bit", "true", 25 "32-bit mode (80386)">; 26def Is16Bit : SubtargetFeature<"16bit-mode", "Is16Bit", "true", 27 "16-bit mode (i8086)">; 28 29//===----------------------------------------------------------------------===// 30// X86 Subtarget ISA features 31//===----------------------------------------------------------------------===// 32 33def FeatureX87 : SubtargetFeature<"x87","HasX87", "true", 34 "Enable X87 float instructions">; 35 36def FeatureNOPL : SubtargetFeature<"nopl", "HasNOPL", "true", 37 "Enable NOPL instruction (generally pentium pro+)">; 38 39def FeatureCMOV : SubtargetFeature<"cmov","HasCMOV", "true", 40 "Enable conditional move instructions">; 41 42def FeatureCX8 : SubtargetFeature<"cx8", "HasCX8", "true", 43 "Support CMPXCHG8B instructions">; 44 45def FeatureCRC32 : SubtargetFeature<"crc32", "HasCRC32", "true", 46 "Enable SSE 4.2 CRC32 instruction (used when SSE4.2 is supported but function is GPR only)">; 47 48def FeaturePOPCNT : SubtargetFeature<"popcnt", "HasPOPCNT", "true", 49 "Support POPCNT instruction">; 50 51def FeatureFXSR : SubtargetFeature<"fxsr", "HasFXSR", "true", 52 "Support fxsave/fxrestore instructions">; 53 54def FeatureXSAVE : SubtargetFeature<"xsave", "HasXSAVE", "true", 55 "Support xsave instructions">; 56 57def FeatureXSAVEOPT: SubtargetFeature<"xsaveopt", "HasXSAVEOPT", "true", 58 "Support xsaveopt instructions", 59 [FeatureXSAVE]>; 60 61def FeatureXSAVEC : SubtargetFeature<"xsavec", "HasXSAVEC", "true", 62 "Support xsavec instructions", 63 [FeatureXSAVE]>; 64 65def FeatureXSAVES : SubtargetFeature<"xsaves", "HasXSAVES", "true", 66 "Support xsaves instructions", 67 [FeatureXSAVE]>; 68 69def FeatureSSE1 : SubtargetFeature<"sse", "X86SSELevel", "SSE1", 70 "Enable SSE instructions">; 71def FeatureSSE2 : SubtargetFeature<"sse2", "X86SSELevel", "SSE2", 72 "Enable SSE2 instructions", 73 [FeatureSSE1]>; 74def FeatureSSE3 : SubtargetFeature<"sse3", "X86SSELevel", "SSE3", 75 "Enable SSE3 instructions", 76 [FeatureSSE2]>; 77def FeatureSSSE3 : SubtargetFeature<"ssse3", "X86SSELevel", "SSSE3", 78 "Enable SSSE3 instructions", 79 [FeatureSSE3]>; 80def FeatureSSE41 : SubtargetFeature<"sse4.1", "X86SSELevel", "SSE41", 81 "Enable SSE 4.1 instructions", 82 [FeatureSSSE3]>; 83def FeatureSSE42 : SubtargetFeature<"sse4.2", "X86SSELevel", "SSE42", 84 "Enable SSE 4.2 instructions", 85 [FeatureSSE41]>; 86// The MMX subtarget feature is separate from the rest of the SSE features 87// because it's important (for odd compatibility reasons) to be able to 88// turn it off explicitly while allowing SSE+ to be on. 89def FeatureMMX : SubtargetFeature<"mmx","X863DNowLevel", "MMX", 90 "Enable MMX instructions">; 91def Feature3DNow : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow", 92 "Enable 3DNow! instructions", 93 [FeatureMMX]>; 94def Feature3DNowA : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA", 95 "Enable 3DNow! Athlon instructions", 96 [Feature3DNow]>; 97// All x86-64 hardware has SSE2, but we don't mark SSE2 as an implied 98// feature, because SSE2 can be disabled (e.g. for compiling OS kernels) 99// without disabling 64-bit mode. Nothing should imply this feature bit. It 100// is used to enforce that only 64-bit capable CPUs are used in 64-bit mode. 101def FeatureX86_64 : SubtargetFeature<"64bit", "HasX86_64", "true", 102 "Support 64-bit instructions">; 103def FeatureCX16 : SubtargetFeature<"cx16", "HasCX16", "true", 104 "64-bit with cmpxchg16b (this is true for most x86-64 chips, but not the first AMD chips)", 105 [FeatureCX8]>; 106def FeatureSSE4A : SubtargetFeature<"sse4a", "HasSSE4A", "true", 107 "Support SSE 4a instructions", 108 [FeatureSSE3]>; 109 110def FeatureAVX : SubtargetFeature<"avx", "X86SSELevel", "AVX", 111 "Enable AVX instructions", 112 [FeatureSSE42]>; 113def FeatureAVX2 : SubtargetFeature<"avx2", "X86SSELevel", "AVX2", 114 "Enable AVX2 instructions", 115 [FeatureAVX]>; 116def FeatureFMA : SubtargetFeature<"fma", "HasFMA", "true", 117 "Enable three-operand fused multiple-add", 118 [FeatureAVX]>; 119def FeatureF16C : SubtargetFeature<"f16c", "HasF16C", "true", 120 "Support 16-bit floating point conversion instructions", 121 [FeatureAVX]>; 122def FeatureAVX512 : SubtargetFeature<"avx512f", "X86SSELevel", "AVX512", 123 "Enable AVX-512 instructions", 124 [FeatureAVX2, FeatureFMA, FeatureF16C]>; 125def FeatureERI : SubtargetFeature<"avx512er", "HasERI", "true", 126 "Enable AVX-512 Exponential and Reciprocal Instructions", 127 [FeatureAVX512]>; 128def FeatureCDI : SubtargetFeature<"avx512cd", "HasCDI", "true", 129 "Enable AVX-512 Conflict Detection Instructions", 130 [FeatureAVX512]>; 131def FeatureVPOPCNTDQ : SubtargetFeature<"avx512vpopcntdq", "HasVPOPCNTDQ", 132 "true", "Enable AVX-512 Population Count Instructions", 133 [FeatureAVX512]>; 134def FeaturePFI : SubtargetFeature<"avx512pf", "HasPFI", "true", 135 "Enable AVX-512 PreFetch Instructions", 136 [FeatureAVX512]>; 137def FeaturePREFETCHI : SubtargetFeature<"prefetchi", "HasPREFETCHI", 138 "true", 139 "Prefetch instruction with T0 or T1 Hint">; 140def FeaturePREFETCHWT1 : SubtargetFeature<"prefetchwt1", "HasPREFETCHWT1", 141 "true", 142 "Prefetch with Intent to Write and T1 Hint">; 143def FeatureDQI : SubtargetFeature<"avx512dq", "HasDQI", "true", 144 "Enable AVX-512 Doubleword and Quadword Instructions", 145 [FeatureAVX512]>; 146def FeatureBWI : SubtargetFeature<"avx512bw", "HasBWI", "true", 147 "Enable AVX-512 Byte and Word Instructions", 148 [FeatureAVX512]>; 149def FeatureVLX : SubtargetFeature<"avx512vl", "HasVLX", "true", 150 "Enable AVX-512 Vector Length eXtensions", 151 [FeatureAVX512]>; 152def FeatureVBMI : SubtargetFeature<"avx512vbmi", "HasVBMI", "true", 153 "Enable AVX-512 Vector Byte Manipulation Instructions", 154 [FeatureBWI]>; 155def FeatureVBMI2 : SubtargetFeature<"avx512vbmi2", "HasVBMI2", "true", 156 "Enable AVX-512 further Vector Byte Manipulation Instructions", 157 [FeatureBWI]>; 158def FeatureAVXIFMA : SubtargetFeature<"avxifma", "HasAVXIFMA", "true", 159 "Enable AVX-IFMA", 160 [FeatureAVX2]>; 161def FeatureIFMA : SubtargetFeature<"avx512ifma", "HasIFMA", "true", 162 "Enable AVX-512 Integer Fused Multiple-Add", 163 [FeatureAVX512]>; 164def FeaturePKU : SubtargetFeature<"pku", "HasPKU", "true", 165 "Enable protection keys">; 166def FeatureVNNI : SubtargetFeature<"avx512vnni", "HasVNNI", "true", 167 "Enable AVX-512 Vector Neural Network Instructions", 168 [FeatureAVX512]>; 169def FeatureAVXVNNI : SubtargetFeature<"avxvnni", "HasAVXVNNI", "true", 170 "Support AVX_VNNI encoding", 171 [FeatureAVX2]>; 172def FeatureBF16 : SubtargetFeature<"avx512bf16", "HasBF16", "true", 173 "Support bfloat16 floating point", 174 [FeatureBWI]>; 175def FeatureBITALG : SubtargetFeature<"avx512bitalg", "HasBITALG", "true", 176 "Enable AVX-512 Bit Algorithms", 177 [FeatureBWI]>; 178def FeatureVP2INTERSECT : SubtargetFeature<"avx512vp2intersect", 179 "HasVP2INTERSECT", "true", 180 "Enable AVX-512 vp2intersect", 181 [FeatureAVX512]>; 182// FIXME: FP16 scalar intrinsics use the type v8f16, which is supposed to be 183// guarded under condition hasVLX. So we imply it in FeatureFP16 currently. 184// FIXME: FP16 conversion between f16 and i64 customize type v8i64, which is 185// supposed to be guarded under condition hasDQI. So we imply it in FeatureFP16 186// currently. 187def FeatureFP16 : SubtargetFeature<"avx512fp16", "HasFP16", "true", 188 "Support 16-bit floating point", 189 [FeatureBWI, FeatureVLX, FeatureDQI]>; 190def FeatureAVXVNNIINT8 : SubtargetFeature<"avxvnniint8", 191 "HasAVXVNNIINT8", "true", 192 "Enable AVX-VNNI-INT8", 193 [FeatureAVX2]>; 194def FeatureAVXVNNIINT16 : SubtargetFeature<"avxvnniint16", 195 "HasAVXVNNIINT16", "true", 196 "Enable AVX-VNNI-INT16", 197 [FeatureAVX2]>; 198def FeaturePCLMUL : SubtargetFeature<"pclmul", "HasPCLMUL", "true", 199 "Enable packed carry-less multiplication instructions", 200 [FeatureSSE2]>; 201def FeatureGFNI : SubtargetFeature<"gfni", "HasGFNI", "true", 202 "Enable Galois Field Arithmetic Instructions", 203 [FeatureSSE2]>; 204def FeatureVPCLMULQDQ : SubtargetFeature<"vpclmulqdq", "HasVPCLMULQDQ", "true", 205 "Enable vpclmulqdq instructions", 206 [FeatureAVX, FeaturePCLMUL]>; 207def FeatureFMA4 : SubtargetFeature<"fma4", "HasFMA4", "true", 208 "Enable four-operand fused multiple-add", 209 [FeatureAVX, FeatureSSE4A]>; 210def FeatureXOP : SubtargetFeature<"xop", "HasXOP", "true", 211 "Enable XOP instructions", 212 [FeatureFMA4]>; 213def FeatureSSEUnalignedMem : SubtargetFeature<"sse-unaligned-mem", 214 "HasSSEUnalignedMem", "true", 215 "Allow unaligned memory operands with SSE instructions (this may require setting a configuration bit in the processor)">; 216def FeatureAES : SubtargetFeature<"aes", "HasAES", "true", 217 "Enable AES instructions", 218 [FeatureSSE2]>; 219def FeatureVAES : SubtargetFeature<"vaes", "HasVAES", "true", 220 "Promote selected AES instructions to AVX512/AVX registers", 221 [FeatureAVX, FeatureAES]>; 222def FeatureTBM : SubtargetFeature<"tbm", "HasTBM", "true", 223 "Enable TBM instructions">; 224def FeatureLWP : SubtargetFeature<"lwp", "HasLWP", "true", 225 "Enable LWP instructions">; 226def FeatureMOVBE : SubtargetFeature<"movbe", "HasMOVBE", "true", 227 "Support MOVBE instruction">; 228def FeatureRDRAND : SubtargetFeature<"rdrnd", "HasRDRAND", "true", 229 "Support RDRAND instruction">; 230def FeatureFSGSBase : SubtargetFeature<"fsgsbase", "HasFSGSBase", "true", 231 "Support FS/GS Base instructions">; 232def FeatureLZCNT : SubtargetFeature<"lzcnt", "HasLZCNT", "true", 233 "Support LZCNT instruction">; 234def FeatureBMI : SubtargetFeature<"bmi", "HasBMI", "true", 235 "Support BMI instructions">; 236def FeatureBMI2 : SubtargetFeature<"bmi2", "HasBMI2", "true", 237 "Support BMI2 instructions">; 238def FeatureRTM : SubtargetFeature<"rtm", "HasRTM", "true", 239 "Support RTM instructions">; 240def FeatureADX : SubtargetFeature<"adx", "HasADX", "true", 241 "Support ADX instructions">; 242def FeatureSHA : SubtargetFeature<"sha", "HasSHA", "true", 243 "Enable SHA instructions", 244 [FeatureSSE2]>; 245def FeatureSHA512 : SubtargetFeature<"sha512", "HasSHA512", "true", 246 "Support SHA512 instructions", 247 [FeatureAVX]>; 248// Processor supports CET SHSTK - Control-Flow Enforcement Technology 249// using Shadow Stack 250def FeatureSHSTK : SubtargetFeature<"shstk", "HasSHSTK", "true", 251 "Support CET Shadow-Stack instructions">; 252def FeatureSM3 : SubtargetFeature<"sm3", "HasSM3", "true", 253 "Support SM3 instructions", 254 [FeatureAVX]>; 255def FeatureSM4 : SubtargetFeature<"sm4", "HasSM4", "true", 256 "Support SM4 instructions", 257 [FeatureAVX]>; 258def FeaturePRFCHW : SubtargetFeature<"prfchw", "HasPRFCHW", "true", 259 "Support PRFCHW instructions">; 260def FeatureRDSEED : SubtargetFeature<"rdseed", "HasRDSEED", "true", 261 "Support RDSEED instruction">; 262def FeatureLAHFSAHF64 : SubtargetFeature<"sahf", "HasLAHFSAHF64", "true", 263 "Support LAHF and SAHF instructions in 64-bit mode">; 264def FeatureMWAITX : SubtargetFeature<"mwaitx", "HasMWAITX", "true", 265 "Enable MONITORX/MWAITX timer functionality">; 266def FeatureCLZERO : SubtargetFeature<"clzero", "HasCLZERO", "true", 267 "Enable Cache Line Zero">; 268def FeatureCLDEMOTE : SubtargetFeature<"cldemote", "HasCLDEMOTE", "true", 269 "Enable Cache Line Demote">; 270def FeaturePTWRITE : SubtargetFeature<"ptwrite", "HasPTWRITE", "true", 271 "Support ptwrite instruction">; 272def FeatureAMXTILE : SubtargetFeature<"amx-tile", "HasAMXTILE", "true", 273 "Support AMX-TILE instructions">; 274def FeatureAMXINT8 : SubtargetFeature<"amx-int8", "HasAMXINT8", "true", 275 "Support AMX-INT8 instructions", 276 [FeatureAMXTILE]>; 277def FeatureAMXBF16 : SubtargetFeature<"amx-bf16", "HasAMXBF16", "true", 278 "Support AMX-BF16 instructions", 279 [FeatureAMXTILE]>; 280def FeatureAMXFP16 : SubtargetFeature<"amx-fp16", "HasAMXFP16", "true", 281 "Support AMX amx-fp16 instructions", 282 [FeatureAMXTILE]>; 283def FeatureAMXCOMPLEX : SubtargetFeature<"amx-complex", "HasAMXCOMPLEX", "true", 284 "Support AMX-COMPLEX instructions", 285 [FeatureAMXTILE]>; 286def FeatureCMPCCXADD : SubtargetFeature<"cmpccxadd", "HasCMPCCXADD", "true", 287 "Support CMPCCXADD instructions">; 288def FeatureRAOINT : SubtargetFeature<"raoint", "HasRAOINT", "true", 289 "Support RAO-INT instructions", 290 []>; 291def FeatureAVXNECONVERT : SubtargetFeature<"avxneconvert", "HasAVXNECONVERT", "true", 292 "Support AVX-NE-CONVERT instructions", 293 [FeatureAVX2]>; 294def FeatureINVPCID : SubtargetFeature<"invpcid", "HasINVPCID", "true", 295 "Invalidate Process-Context Identifier">; 296def FeatureSGX : SubtargetFeature<"sgx", "HasSGX", "true", 297 "Enable Software Guard Extensions">; 298def FeatureCLFLUSHOPT : SubtargetFeature<"clflushopt", "HasCLFLUSHOPT", "true", 299 "Flush A Cache Line Optimized">; 300def FeatureCLWB : SubtargetFeature<"clwb", "HasCLWB", "true", 301 "Cache Line Write Back">; 302def FeatureWBNOINVD : SubtargetFeature<"wbnoinvd", "HasWBNOINVD", "true", 303 "Write Back No Invalidate">; 304def FeatureRDPID : SubtargetFeature<"rdpid", "HasRDPID", "true", 305 "Support RDPID instructions">; 306def FeatureRDPRU : SubtargetFeature<"rdpru", "HasRDPRU", "true", 307 "Support RDPRU instructions">; 308def FeatureWAITPKG : SubtargetFeature<"waitpkg", "HasWAITPKG", "true", 309 "Wait and pause enhancements">; 310def FeatureENQCMD : SubtargetFeature<"enqcmd", "HasENQCMD", "true", 311 "Has ENQCMD instructions">; 312def FeatureKL : SubtargetFeature<"kl", "HasKL", "true", 313 "Support Key Locker kl Instructions", 314 [FeatureSSE2]>; 315def FeatureWIDEKL : SubtargetFeature<"widekl", "HasWIDEKL", "true", 316 "Support Key Locker wide Instructions", 317 [FeatureKL]>; 318def FeatureHRESET : SubtargetFeature<"hreset", "HasHRESET", "true", 319 "Has hreset instruction">; 320def FeatureSERIALIZE : SubtargetFeature<"serialize", "HasSERIALIZE", "true", 321 "Has serialize instruction">; 322def FeatureTSXLDTRK : SubtargetFeature<"tsxldtrk", "HasTSXLDTRK", "true", 323 "Support TSXLDTRK instructions">; 324def FeatureUINTR : SubtargetFeature<"uintr", "HasUINTR", "true", 325 "Has UINTR Instructions">; 326def FeaturePCONFIG : SubtargetFeature<"pconfig", "HasPCONFIG", "true", 327 "platform configuration instruction">; 328def FeatureMOVDIRI : SubtargetFeature<"movdiri", "HasMOVDIRI", "true", 329 "Support movdiri instruction (direct store integer)">; 330def FeatureMOVDIR64B : SubtargetFeature<"movdir64b", "HasMOVDIR64B", "true", 331 "Support movdir64b instruction (direct store 64 bytes)">; 332 333// Ivy Bridge and newer processors have enhanced REP MOVSB and STOSB (aka 334// "string operations"). See "REP String Enhancement" in the Intel Software 335// Development Manual. This feature essentially means that REP MOVSB will copy 336// using the largest available size instead of copying bytes one by one, making 337// it at least as fast as REPMOVS{W,D,Q}. 338def FeatureERMSB 339 : SubtargetFeature< 340 "ermsb", "HasERMSB", "true", 341 "REP MOVS/STOS are fast">; 342 343// Icelake and newer processors have Fast Short REP MOV. 344def FeatureFSRM 345 : SubtargetFeature< 346 "fsrm", "HasFSRM", "true", 347 "REP MOVSB of short lengths is faster">; 348 349def FeatureSoftFloat 350 : SubtargetFeature<"soft-float", "UseSoftFloat", "true", 351 "Use software floating point features">; 352 353//===----------------------------------------------------------------------===// 354// X86 Subtarget Security Mitigation features 355//===----------------------------------------------------------------------===// 356 357// Lower indirect calls using a special construct called a `retpoline` to 358// mitigate potential Spectre v2 attacks against them. 359def FeatureRetpolineIndirectCalls 360 : SubtargetFeature< 361 "retpoline-indirect-calls", "UseRetpolineIndirectCalls", "true", 362 "Remove speculation of indirect calls from the generated code">; 363 364// Lower indirect branches and switches either using conditional branch trees 365// or using a special construct called a `retpoline` to mitigate potential 366// Spectre v2 attacks against them. 367def FeatureRetpolineIndirectBranches 368 : SubtargetFeature< 369 "retpoline-indirect-branches", "UseRetpolineIndirectBranches", "true", 370 "Remove speculation of indirect branches from the generated code">; 371 372// Deprecated umbrella feature for enabling both `retpoline-indirect-calls` and 373// `retpoline-indirect-branches` above. 374def FeatureRetpoline 375 : SubtargetFeature<"retpoline", "DeprecatedUseRetpoline", "true", 376 "Remove speculation of indirect branches from the " 377 "generated code, either by avoiding them entirely or " 378 "lowering them with a speculation blocking construct", 379 [FeatureRetpolineIndirectCalls, 380 FeatureRetpolineIndirectBranches]>; 381 382// Rely on external thunks for the emitted retpoline calls. This allows users 383// to provide their own custom thunk definitions in highly specialized 384// environments such as a kernel that does boot-time hot patching. 385def FeatureRetpolineExternalThunk 386 : SubtargetFeature< 387 "retpoline-external-thunk", "UseRetpolineExternalThunk", "true", 388 "When lowering an indirect call or branch using a `retpoline`, rely " 389 "on the specified user provided thunk rather than emitting one " 390 "ourselves. Only has effect when combined with some other retpoline " 391 "feature", [FeatureRetpolineIndirectCalls]>; 392 393// Mitigate LVI attacks against indirect calls/branches and call returns 394def FeatureLVIControlFlowIntegrity 395 : SubtargetFeature< 396 "lvi-cfi", "UseLVIControlFlowIntegrity", "true", 397 "Prevent indirect calls/branches from using a memory operand, and " 398 "precede all indirect calls/branches from a register with an " 399 "LFENCE instruction to serialize control flow. Also decompose RET " 400 "instructions into a POP+LFENCE+JMP sequence.">; 401 402// Enable SESES to mitigate speculative execution attacks 403def FeatureSpeculativeExecutionSideEffectSuppression 404 : SubtargetFeature< 405 "seses", "UseSpeculativeExecutionSideEffectSuppression", "true", 406 "Prevent speculative execution side channel timing attacks by " 407 "inserting a speculation barrier before memory reads, memory writes, " 408 "and conditional branches. Implies LVI Control Flow integrity.", 409 [FeatureLVIControlFlowIntegrity]>; 410 411// Mitigate LVI attacks against data loads 412def FeatureLVILoadHardening 413 : SubtargetFeature< 414 "lvi-load-hardening", "UseLVILoadHardening", "true", 415 "Insert LFENCE instructions to prevent data speculatively injected " 416 "into loads from being used maliciously.">; 417 418def FeatureTaggedGlobals 419 : SubtargetFeature< 420 "tagged-globals", "AllowTaggedGlobals", "true", 421 "Use an instruction sequence for taking the address of a global " 422 "that allows a memory tag in the upper address bits.">; 423 424// Control codegen mitigation against Straight Line Speculation vulnerability. 425def FeatureHardenSlsRet 426 : SubtargetFeature< 427 "harden-sls-ret", "HardenSlsRet", "true", 428 "Harden against straight line speculation across RET instructions.">; 429 430def FeatureHardenSlsIJmp 431 : SubtargetFeature< 432 "harden-sls-ijmp", "HardenSlsIJmp", "true", 433 "Harden against straight line speculation across indirect JMP instructions.">; 434 435//===----------------------------------------------------------------------===// 436// X86 Subtarget Tuning features 437//===----------------------------------------------------------------------===// 438def TuningPreferMovmskOverVTest : SubtargetFeature<"prefer-movmsk-over-vtest", 439 "PreferMovmskOverVTest", "true", 440 "Prefer movmsk over vtest instruction">; 441 442def TuningSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true", 443 "SHLD instruction is slow">; 444 445def TuningSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true", 446 "PMULLD instruction is slow (compared to PMULLW/PMULHW and PMULUDQ)">; 447 448def TuningSlowPMADDWD : SubtargetFeature<"slow-pmaddwd", "IsPMADDWDSlow", 449 "true", 450 "PMADDWD is slower than PMULLD">; 451 452// FIXME: This should not apply to CPUs that do not have SSE. 453def TuningSlowUAMem16 : SubtargetFeature<"slow-unaligned-mem-16", 454 "IsUnalignedMem16Slow", "true", 455 "Slow unaligned 16-byte memory access">; 456 457def TuningSlowUAMem32 : SubtargetFeature<"slow-unaligned-mem-32", 458 "IsUnalignedMem32Slow", "true", 459 "Slow unaligned 32-byte memory access">; 460 461def TuningLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true", 462 "Use LEA for adjusting the stack pointer (this is an optimization for Intel Atom processors)">; 463 464// True if 8-bit divisions are significantly faster than 465// 32-bit divisions and should be used when possible. 466def TuningSlowDivide32 : SubtargetFeature<"idivl-to-divb", 467 "HasSlowDivide32", "true", 468 "Use 8-bit divide for positive values less than 256">; 469 470// True if 32-bit divides are significantly faster than 471// 64-bit divisions and should be used when possible. 472def TuningSlowDivide64 : SubtargetFeature<"idivq-to-divl", 473 "HasSlowDivide64", "true", 474 "Use 32-bit divide for positive values less than 2^32">; 475 476def TuningPadShortFunctions : SubtargetFeature<"pad-short-functions", 477 "PadShortFunctions", "true", 478 "Pad short functions (to prevent a stall when returning too early)">; 479 480// On some processors, instructions that implicitly take two memory operands are 481// slow. In practice, this means that CALL, PUSH, and POP with memory operands 482// should be avoided in favor of a MOV + register CALL/PUSH/POP. 483def TuningSlowTwoMemOps : SubtargetFeature<"slow-two-mem-ops", 484 "SlowTwoMemOps", "true", 485 "Two memory operand instructions are slow">; 486 487// True if the LEA instruction inputs have to be ready at address generation 488// (AG) time. 489def TuningLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LeaUsesAG", "true", 490 "LEA instruction needs inputs at AG stage">; 491 492def TuningSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true", 493 "LEA instruction with certain arguments is slow">; 494 495// True if the LEA instruction has all three source operands: base, index, 496// and offset or if the LEA instruction uses base and index registers where 497// the base is EBP, RBP,or R13 498def TuningSlow3OpsLEA : SubtargetFeature<"slow-3ops-lea", "Slow3OpsLEA", "true", 499 "LEA instruction with 3 ops or certain registers is slow">; 500 501// True if INC and DEC instructions are slow when writing to flags 502def TuningSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true", 503 "INC and DEC instructions are slower than ADD and SUB">; 504 505def TuningPOPCNTFalseDeps : SubtargetFeature<"false-deps-popcnt", 506 "HasPOPCNTFalseDeps", "true", 507 "POPCNT has a false dependency on dest register">; 508 509def TuningLZCNTFalseDeps : SubtargetFeature<"false-deps-lzcnt-tzcnt", 510 "HasLZCNTFalseDeps", "true", 511 "LZCNT/TZCNT have a false dependency on dest register">; 512 513def TuningMULCFalseDeps : SubtargetFeature<"false-deps-mulc", 514 "HasMULCFalseDeps", "true", 515 "VF[C]MULCPH/SH has a false dependency on dest register">; 516 517def TuningPERMFalseDeps : SubtargetFeature<"false-deps-perm", 518 "HasPERMFalseDeps", "true", 519 "VPERMD/Q/PS/PD has a false dependency on dest register">; 520 521def TuningRANGEFalseDeps : SubtargetFeature<"false-deps-range", 522 "HasRANGEFalseDeps", "true", 523 "VRANGEPD/PS/SD/SS has a false dependency on dest register">; 524 525def TuningGETMANTFalseDeps : SubtargetFeature<"false-deps-getmant", 526 "HasGETMANTFalseDeps", "true", 527 "VGETMANTSS/SD/SH and VGETMANDPS/PD(memory version) has a" 528 " false dependency on dest register">; 529 530def TuningMULLQFalseDeps : SubtargetFeature<"false-deps-mullq", 531 "HasMULLQFalseDeps", "true", 532 "VPMULLQ has a false dependency on dest register">; 533 534def TuningSBBDepBreaking : SubtargetFeature<"sbb-dep-breaking", 535 "HasSBBDepBreaking", "true", 536 "SBB with same register has no source dependency">; 537 538// On recent X86 (port bound) processors, its preferable to combine to a single shuffle 539// using a variable mask over multiple fixed shuffles. 540def TuningFastVariableCrossLaneShuffle 541 : SubtargetFeature<"fast-variable-crosslane-shuffle", 542 "HasFastVariableCrossLaneShuffle", 543 "true", "Cross-lane shuffles with variable masks are fast">; 544def TuningFastVariablePerLaneShuffle 545 : SubtargetFeature<"fast-variable-perlane-shuffle", 546 "HasFastVariablePerLaneShuffle", 547 "true", "Per-lane shuffles with variable masks are fast">; 548 549// Goldmont / Tremont (atom in general) has no bypass delay 550def TuningNoDomainDelay : SubtargetFeature<"no-bypass-delay", 551 "NoDomainDelay","true", 552 "Has no bypass delay when using the 'wrong' domain">; 553 554// Many processors (Nehalem+ on Intel) have no bypass delay when 555// using the wrong mov type. 556def TuningNoDomainDelayMov : SubtargetFeature<"no-bypass-delay-mov", 557 "NoDomainDelayMov","true", 558 "Has no bypass delay when using the 'wrong' mov type">; 559 560// Newer processors (Skylake+ on Intel) have no bypass delay when 561// using the wrong blend type. 562def TuningNoDomainDelayBlend : SubtargetFeature<"no-bypass-delay-blend", 563 "NoDomainDelayBlend","true", 564 "Has no bypass delay when using the 'wrong' blend type">; 565 566// Newer processors (Haswell+ on Intel) have no bypass delay when 567// using the wrong shuffle type. 568def TuningNoDomainDelayShuffle : SubtargetFeature<"no-bypass-delay-shuffle", 569 "NoDomainDelayShuffle","true", 570 "Has no bypass delay when using the 'wrong' shuffle type">; 571 572// Prefer lowering shuffles on AVX512 targets (e.g. Skylake Server) to 573// imm shifts/rotate if they can use more ports than regular shuffles. 574def TuningPreferShiftShuffle : SubtargetFeature<"faster-shift-than-shuffle", 575 "PreferLowerShuffleAsShift", "true", 576 "Shifts are faster (or as fast) as shuffle">; 577 578def TuningFastImmVectorShift : SubtargetFeature<"tuning-fast-imm-vector-shift", 579 "FastImmVectorShift", "true", 580 "Vector shifts are fast (2/cycle) as opposed to slow (1/cycle)">; 581 582// On some X86 processors, a vzeroupper instruction should be inserted after 583// using ymm/zmm registers before executing code that may use SSE instructions. 584def TuningInsertVZEROUPPER 585 : SubtargetFeature<"vzeroupper", 586 "InsertVZEROUPPER", 587 "true", "Should insert vzeroupper instructions">; 588 589// TuningFastScalarFSQRT should be enabled if scalar FSQRT has shorter latency 590// than the corresponding NR code. TuningFastVectorFSQRT should be enabled if 591// vector FSQRT has higher throughput than the corresponding NR code. 592// The idea is that throughput bound code is likely to be vectorized, so for 593// vectorized code we should care about the throughput of SQRT operations. 594// But if the code is scalar that probably means that the code has some kind of 595// dependency and we should care more about reducing the latency. 596 597// True if hardware SQRTSS instruction is at least as fast (latency) as 598// RSQRTSS followed by a Newton-Raphson iteration. 599def TuningFastScalarFSQRT 600 : SubtargetFeature<"fast-scalar-fsqrt", "HasFastScalarFSQRT", 601 "true", "Scalar SQRT is fast (disable Newton-Raphson)">; 602// True if hardware SQRTPS/VSQRTPS instructions are at least as fast 603// (throughput) as RSQRTPS/VRSQRTPS followed by a Newton-Raphson iteration. 604def TuningFastVectorFSQRT 605 : SubtargetFeature<"fast-vector-fsqrt", "HasFastVectorFSQRT", 606 "true", "Vector SQRT is fast (disable Newton-Raphson)">; 607 608// If lzcnt has equivalent latency/throughput to most simple integer ops, it can 609// be used to replace test/set sequences. 610def TuningFastLZCNT 611 : SubtargetFeature< 612 "fast-lzcnt", "HasFastLZCNT", "true", 613 "LZCNT instructions are as fast as most simple integer ops">; 614 615// If the target can efficiently decode NOPs upto 7-bytes in length. 616def TuningFast7ByteNOP 617 : SubtargetFeature< 618 "fast-7bytenop", "HasFast7ByteNOP", "true", 619 "Target can quickly decode up to 7 byte NOPs">; 620 621// If the target can efficiently decode NOPs upto 11-bytes in length. 622def TuningFast11ByteNOP 623 : SubtargetFeature< 624 "fast-11bytenop", "HasFast11ByteNOP", "true", 625 "Target can quickly decode up to 11 byte NOPs">; 626 627// If the target can efficiently decode NOPs upto 15-bytes in length. 628def TuningFast15ByteNOP 629 : SubtargetFeature< 630 "fast-15bytenop", "HasFast15ByteNOP", "true", 631 "Target can quickly decode up to 15 byte NOPs">; 632 633// Sandy Bridge and newer processors can use SHLD with the same source on both 634// inputs to implement rotate to avoid the partial flag update of the normal 635// rotate instructions. 636def TuningFastSHLDRotate 637 : SubtargetFeature< 638 "fast-shld-rotate", "HasFastSHLDRotate", "true", 639 "SHLD can be used as a faster rotate">; 640 641// Bulldozer and newer processors can merge CMP/TEST (but not other 642// instructions) with conditional branches. 643def TuningBranchFusion 644 : SubtargetFeature<"branchfusion", "HasBranchFusion", "true", 645 "CMP/TEST can be fused with conditional branches">; 646 647// Sandy Bridge and newer processors have many instructions that can be 648// fused with conditional branches and pass through the CPU as a single 649// operation. 650def TuningMacroFusion 651 : SubtargetFeature<"macrofusion", "HasMacroFusion", "true", 652 "Various instructions can be fused with conditional branches">; 653 654// Gather is available since Haswell (AVX2 set). So technically, we can 655// generate Gathers on all AVX2 processors. But the overhead on HSW is high. 656// Skylake Client processor has faster Gathers than HSW and performance is 657// similar to Skylake Server (AVX-512). 658def TuningFastGather 659 : SubtargetFeature<"fast-gather", "HasFastGather", "true", 660 "Indicates if gather is reasonably fast (this is true for Skylake client and all AVX-512 CPUs)">; 661 662def TuningPreferNoGather 663 : SubtargetFeature<"prefer-no-gather", "PreferGather", "false", 664 "Prefer no gather instructions">; 665def TuningPreferNoScatter 666 : SubtargetFeature<"prefer-no-scatter", "PreferScatter", "false", 667 "Prefer no scatter instructions">; 668 669def TuningPrefer128Bit 670 : SubtargetFeature<"prefer-128-bit", "Prefer128Bit", "true", 671 "Prefer 128-bit AVX instructions">; 672 673def TuningPrefer256Bit 674 : SubtargetFeature<"prefer-256-bit", "Prefer256Bit", "true", 675 "Prefer 256-bit AVX instructions">; 676 677def TuningAllowLight256Bit 678 : SubtargetFeature<"allow-light-256-bit", "AllowLight256Bit", "true", 679 "Enable generation of 256-bit load/stores even if we prefer 128-bit">; 680 681def TuningPreferMaskRegisters 682 : SubtargetFeature<"prefer-mask-registers", "PreferMaskRegisters", "true", 683 "Prefer AVX512 mask registers over PTEST/MOVMSK">; 684 685def TuningFastBEXTR : SubtargetFeature<"fast-bextr", "HasFastBEXTR", "true", 686 "Indicates that the BEXTR instruction is implemented as a single uop " 687 "with good throughput">; 688 689// Combine vector math operations with shuffles into horizontal math 690// instructions if a CPU implements horizontal operations (introduced with 691// SSE3) with better latency/throughput than the alternative sequence. 692def TuningFastHorizontalOps 693 : SubtargetFeature< 694 "fast-hops", "HasFastHorizontalOps", "true", 695 "Prefer horizontal vector math instructions (haddp, phsub, etc.) over " 696 "normal vector instructions with shuffles">; 697 698def TuningFastScalarShiftMasks 699 : SubtargetFeature< 700 "fast-scalar-shift-masks", "HasFastScalarShiftMasks", "true", 701 "Prefer a left/right scalar logical shift pair over a shift+and pair">; 702 703def TuningFastVectorShiftMasks 704 : SubtargetFeature< 705 "fast-vector-shift-masks", "HasFastVectorShiftMasks", "true", 706 "Prefer a left/right vector logical shift pair over a shift+and pair">; 707 708def TuningFastMOVBE 709 : SubtargetFeature<"fast-movbe", "HasFastMOVBE", "true", 710 "Prefer a movbe over a single-use load + bswap / single-use bswap + store">; 711 712def TuningUseSLMArithCosts 713 : SubtargetFeature<"use-slm-arith-costs", "UseSLMArithCosts", "true", 714 "Use Silvermont specific arithmetic costs">; 715 716def TuningUseGLMDivSqrtCosts 717 : SubtargetFeature<"use-glm-div-sqrt-costs", "UseGLMDivSqrtCosts", "true", 718 "Use Goldmont specific floating point div/sqrt costs">; 719 720//===----------------------------------------------------------------------===// 721// X86 CPU Families 722// TODO: Remove these - use general tuning features to determine codegen. 723//===----------------------------------------------------------------------===// 724 725// Bonnell 726def ProcIntelAtom : SubtargetFeature<"", "IsAtom", "true", "Is Intel Atom processor">; 727 728//===----------------------------------------------------------------------===// 729// Register File Description 730//===----------------------------------------------------------------------===// 731 732include "X86RegisterInfo.td" 733include "X86RegisterBanks.td" 734 735//===----------------------------------------------------------------------===// 736// Instruction Descriptions 737//===----------------------------------------------------------------------===// 738 739include "X86Schedule.td" 740include "X86InstrInfo.td" 741include "X86SchedPredicates.td" 742 743def X86InstrInfo : InstrInfo; 744 745//===----------------------------------------------------------------------===// 746// X86 Scheduler Models 747//===----------------------------------------------------------------------===// 748 749include "X86ScheduleAtom.td" 750include "X86SchedSandyBridge.td" 751include "X86SchedHaswell.td" 752include "X86SchedBroadwell.td" 753include "X86ScheduleSLM.td" 754include "X86ScheduleZnver1.td" 755include "X86ScheduleZnver2.td" 756include "X86ScheduleZnver3.td" 757include "X86ScheduleZnver4.td" 758include "X86ScheduleBdVer2.td" 759include "X86ScheduleBtVer2.td" 760include "X86SchedSkylakeClient.td" 761include "X86SchedSkylakeServer.td" 762include "X86SchedIceLake.td" 763include "X86SchedAlderlakeP.td" 764include "X86SchedSapphireRapids.td" 765 766//===----------------------------------------------------------------------===// 767// X86 Processor Feature Lists 768//===----------------------------------------------------------------------===// 769 770def ProcessorFeatures { 771 // x86-64 micro-architecture levels: x86-64 and x86-64-v[234] 772 list<SubtargetFeature> X86_64V1Features = [ 773 FeatureX87, FeatureCX8, FeatureCMOV, FeatureMMX, FeatureSSE2, 774 FeatureFXSR, FeatureNOPL, FeatureX86_64, 775 ]; 776 list<SubtargetFeature> X86_64V1Tuning = [ 777 TuningMacroFusion, 778 TuningSlow3OpsLEA, 779 TuningSlowDivide64, 780 TuningSlowIncDec, 781 TuningInsertVZEROUPPER 782 ]; 783 784 list<SubtargetFeature> X86_64V2Features = !listconcat(X86_64V1Features, [ 785 FeatureCX16, FeatureLAHFSAHF64, FeatureCRC32, FeaturePOPCNT, 786 FeatureSSE42 787 ]); 788 list<SubtargetFeature> X86_64V2Tuning = [ 789 TuningMacroFusion, 790 TuningSlow3OpsLEA, 791 TuningSlowDivide64, 792 TuningSlowUAMem32, 793 TuningFastScalarFSQRT, 794 TuningFastSHLDRotate, 795 TuningFast15ByteNOP, 796 TuningPOPCNTFalseDeps, 797 TuningInsertVZEROUPPER 798 ]; 799 800 list<SubtargetFeature> X86_64V3Features = !listconcat(X86_64V2Features, [ 801 FeatureAVX2, FeatureBMI, FeatureBMI2, FeatureF16C, FeatureFMA, FeatureLZCNT, 802 FeatureMOVBE, FeatureXSAVE 803 ]); 804 list<SubtargetFeature> X86_64V3Tuning = [ 805 TuningMacroFusion, 806 TuningSlow3OpsLEA, 807 TuningSlowDivide64, 808 TuningFastScalarFSQRT, 809 TuningFastSHLDRotate, 810 TuningFast15ByteNOP, 811 TuningFastVariableCrossLaneShuffle, 812 TuningFastVariablePerLaneShuffle, 813 TuningPOPCNTFalseDeps, 814 TuningLZCNTFalseDeps, 815 TuningInsertVZEROUPPER, 816 TuningAllowLight256Bit 817 ]; 818 819 list<SubtargetFeature> X86_64V4Features = !listconcat(X86_64V3Features, [ 820 FeatureBWI, 821 FeatureCDI, 822 FeatureDQI, 823 FeatureVLX, 824 ]); 825 list<SubtargetFeature> X86_64V4Tuning = [ 826 TuningMacroFusion, 827 TuningSlow3OpsLEA, 828 TuningSlowDivide64, 829 TuningFastScalarFSQRT, 830 TuningFastVectorFSQRT, 831 TuningFastSHLDRotate, 832 TuningFast15ByteNOP, 833 TuningFastVariableCrossLaneShuffle, 834 TuningFastVariablePerLaneShuffle, 835 TuningPrefer256Bit, 836 TuningFastGather, 837 TuningPOPCNTFalseDeps, 838 TuningInsertVZEROUPPER, 839 TuningAllowLight256Bit 840 ]; 841 842 // Nehalem 843 list<SubtargetFeature> NHMFeatures = X86_64V2Features; 844 list<SubtargetFeature> NHMTuning = [TuningMacroFusion, 845 TuningInsertVZEROUPPER, 846 TuningNoDomainDelayMov]; 847 848 // Westmere 849 list<SubtargetFeature> WSMAdditionalFeatures = [FeaturePCLMUL]; 850 list<SubtargetFeature> WSMTuning = NHMTuning; 851 list<SubtargetFeature> WSMFeatures = 852 !listconcat(NHMFeatures, WSMAdditionalFeatures); 853 854 // Sandybridge 855 list<SubtargetFeature> SNBAdditionalFeatures = [FeatureAVX, 856 FeatureXSAVE, 857 FeatureXSAVEOPT]; 858 list<SubtargetFeature> SNBTuning = [TuningMacroFusion, 859 TuningSlow3OpsLEA, 860 TuningSlowDivide64, 861 TuningSlowUAMem32, 862 TuningFastScalarFSQRT, 863 TuningFastSHLDRotate, 864 TuningFast15ByteNOP, 865 TuningPOPCNTFalseDeps, 866 TuningInsertVZEROUPPER, 867 TuningNoDomainDelayMov]; 868 list<SubtargetFeature> SNBFeatures = 869 !listconcat(WSMFeatures, SNBAdditionalFeatures); 870 871 // Ivybridge 872 list<SubtargetFeature> IVBAdditionalFeatures = [FeatureRDRAND, 873 FeatureF16C, 874 FeatureFSGSBase]; 875 list<SubtargetFeature> IVBTuning = SNBTuning; 876 list<SubtargetFeature> IVBFeatures = 877 !listconcat(SNBFeatures, IVBAdditionalFeatures); 878 879 // Haswell 880 list<SubtargetFeature> HSWAdditionalFeatures = [FeatureAVX2, 881 FeatureBMI, 882 FeatureBMI2, 883 FeatureERMSB, 884 FeatureFMA, 885 FeatureINVPCID, 886 FeatureLZCNT, 887 FeatureMOVBE]; 888 list<SubtargetFeature> HSWTuning = [TuningMacroFusion, 889 TuningSlow3OpsLEA, 890 TuningSlowDivide64, 891 TuningFastScalarFSQRT, 892 TuningFastSHLDRotate, 893 TuningFast15ByteNOP, 894 TuningFastVariableCrossLaneShuffle, 895 TuningFastVariablePerLaneShuffle, 896 TuningPOPCNTFalseDeps, 897 TuningLZCNTFalseDeps, 898 TuningInsertVZEROUPPER, 899 TuningAllowLight256Bit, 900 TuningNoDomainDelayMov, 901 TuningNoDomainDelayShuffle]; 902 list<SubtargetFeature> HSWFeatures = 903 !listconcat(IVBFeatures, HSWAdditionalFeatures); 904 905 // Broadwell 906 list<SubtargetFeature> BDWAdditionalFeatures = [FeatureADX, 907 FeatureRDSEED, 908 FeaturePRFCHW]; 909 list<SubtargetFeature> BDWTuning = HSWTuning; 910 list<SubtargetFeature> BDWFeatures = 911 !listconcat(HSWFeatures, BDWAdditionalFeatures); 912 913 // Skylake 914 list<SubtargetFeature> SKLAdditionalFeatures = [FeatureAES, 915 FeatureXSAVEC, 916 FeatureXSAVES, 917 FeatureCLFLUSHOPT]; 918 list<SubtargetFeature> SKLTuning = [TuningFastGather, 919 TuningMacroFusion, 920 TuningSlow3OpsLEA, 921 TuningSlowDivide64, 922 TuningFastScalarFSQRT, 923 TuningFastVectorFSQRT, 924 TuningFastSHLDRotate, 925 TuningFast15ByteNOP, 926 TuningFastVariableCrossLaneShuffle, 927 TuningFastVariablePerLaneShuffle, 928 TuningPOPCNTFalseDeps, 929 TuningInsertVZEROUPPER, 930 TuningAllowLight256Bit, 931 TuningNoDomainDelayMov, 932 TuningNoDomainDelayShuffle, 933 TuningNoDomainDelayBlend]; 934 list<SubtargetFeature> SKLFeatures = 935 !listconcat(BDWFeatures, SKLAdditionalFeatures); 936 937 // Skylake-AVX512 938 list<SubtargetFeature> SKXAdditionalFeatures = [FeatureAES, 939 FeatureXSAVEC, 940 FeatureXSAVES, 941 FeatureCLFLUSHOPT, 942 FeatureAVX512, 943 FeatureCDI, 944 FeatureDQI, 945 FeatureBWI, 946 FeatureVLX, 947 FeaturePKU, 948 FeatureCLWB]; 949 list<SubtargetFeature> SKXTuning = [TuningFastGather, 950 TuningMacroFusion, 951 TuningSlow3OpsLEA, 952 TuningSlowDivide64, 953 TuningFastScalarFSQRT, 954 TuningFastVectorFSQRT, 955 TuningFastSHLDRotate, 956 TuningFast15ByteNOP, 957 TuningFastVariableCrossLaneShuffle, 958 TuningFastVariablePerLaneShuffle, 959 TuningPrefer256Bit, 960 TuningPOPCNTFalseDeps, 961 TuningInsertVZEROUPPER, 962 TuningAllowLight256Bit, 963 TuningPreferShiftShuffle, 964 TuningNoDomainDelayMov, 965 TuningNoDomainDelayShuffle, 966 TuningNoDomainDelayBlend, 967 TuningFastImmVectorShift]; 968 list<SubtargetFeature> SKXFeatures = 969 !listconcat(BDWFeatures, SKXAdditionalFeatures); 970 971 // Cascadelake 972 list<SubtargetFeature> CLXAdditionalFeatures = [FeatureVNNI]; 973 list<SubtargetFeature> CLXTuning = SKXTuning; 974 list<SubtargetFeature> CLXFeatures = 975 !listconcat(SKXFeatures, CLXAdditionalFeatures); 976 977 // Cooperlake 978 list<SubtargetFeature> CPXAdditionalFeatures = [FeatureBF16]; 979 list<SubtargetFeature> CPXTuning = SKXTuning; 980 list<SubtargetFeature> CPXFeatures = 981 !listconcat(CLXFeatures, CPXAdditionalFeatures); 982 983 // Cannonlake 984 list<SubtargetFeature> CNLAdditionalFeatures = [FeatureAVX512, 985 FeatureCDI, 986 FeatureDQI, 987 FeatureBWI, 988 FeatureVLX, 989 FeaturePKU, 990 FeatureVBMI, 991 FeatureIFMA, 992 FeatureSHA]; 993 list<SubtargetFeature> CNLTuning = [TuningFastGather, 994 TuningMacroFusion, 995 TuningSlow3OpsLEA, 996 TuningSlowDivide64, 997 TuningFastScalarFSQRT, 998 TuningFastVectorFSQRT, 999 TuningFastSHLDRotate, 1000 TuningFast15ByteNOP, 1001 TuningFastVariableCrossLaneShuffle, 1002 TuningFastVariablePerLaneShuffle, 1003 TuningPrefer256Bit, 1004 TuningInsertVZEROUPPER, 1005 TuningAllowLight256Bit, 1006 TuningNoDomainDelayMov, 1007 TuningNoDomainDelayShuffle, 1008 TuningNoDomainDelayBlend, 1009 TuningFastImmVectorShift]; 1010 list<SubtargetFeature> CNLFeatures = 1011 !listconcat(SKLFeatures, CNLAdditionalFeatures); 1012 1013 // Icelake 1014 list<SubtargetFeature> ICLAdditionalFeatures = [FeatureBITALG, 1015 FeatureVAES, 1016 FeatureVBMI2, 1017 FeatureVNNI, 1018 FeatureVPCLMULQDQ, 1019 FeatureVPOPCNTDQ, 1020 FeatureGFNI, 1021 FeatureRDPID, 1022 FeatureFSRM]; 1023 list<SubtargetFeature> ICLTuning = [TuningFastGather, 1024 TuningMacroFusion, 1025 TuningSlowDivide64, 1026 TuningFastScalarFSQRT, 1027 TuningFastVectorFSQRT, 1028 TuningFastSHLDRotate, 1029 TuningFast15ByteNOP, 1030 TuningFastVariableCrossLaneShuffle, 1031 TuningFastVariablePerLaneShuffle, 1032 TuningPrefer256Bit, 1033 TuningInsertVZEROUPPER, 1034 TuningAllowLight256Bit, 1035 TuningNoDomainDelayMov, 1036 TuningNoDomainDelayShuffle, 1037 TuningNoDomainDelayBlend, 1038 TuningFastImmVectorShift]; 1039 list<SubtargetFeature> ICLFeatures = 1040 !listconcat(CNLFeatures, ICLAdditionalFeatures); 1041 1042 // Icelake Server 1043 list<SubtargetFeature> ICXAdditionalFeatures = [FeaturePCONFIG, 1044 FeatureCLWB, 1045 FeatureWBNOINVD]; 1046 list<SubtargetFeature> ICXTuning = ICLTuning; 1047 list<SubtargetFeature> ICXFeatures = 1048 !listconcat(ICLFeatures, ICXAdditionalFeatures); 1049 1050 // Tigerlake 1051 list<SubtargetFeature> TGLAdditionalFeatures = [FeatureVP2INTERSECT, 1052 FeatureCLWB, 1053 FeatureMOVDIRI, 1054 FeatureMOVDIR64B, 1055 FeatureSHSTK]; 1056 list<SubtargetFeature> TGLTuning = ICLTuning; 1057 list<SubtargetFeature> TGLFeatures = 1058 !listconcat(ICLFeatures, TGLAdditionalFeatures ); 1059 1060 // Sapphirerapids 1061 list<SubtargetFeature> SPRAdditionalFeatures = [FeatureAMXTILE, 1062 FeatureAMXINT8, 1063 FeatureAMXBF16, 1064 FeatureBF16, 1065 FeatureSERIALIZE, 1066 FeatureCLDEMOTE, 1067 FeatureWAITPKG, 1068 FeaturePTWRITE, 1069 FeatureFP16, 1070 FeatureAVXVNNI, 1071 FeatureTSXLDTRK, 1072 FeatureENQCMD, 1073 FeatureSHSTK, 1074 FeatureMOVDIRI, 1075 FeatureMOVDIR64B, 1076 FeatureUINTR]; 1077 list<SubtargetFeature> SPRAdditionalTuning = [TuningMULCFalseDeps, 1078 TuningPERMFalseDeps, 1079 TuningRANGEFalseDeps, 1080 TuningGETMANTFalseDeps, 1081 TuningMULLQFalseDeps]; 1082 list<SubtargetFeature> SPRTuning = !listconcat(ICXTuning, SPRAdditionalTuning); 1083 list<SubtargetFeature> SPRFeatures = 1084 !listconcat(ICXFeatures, SPRAdditionalFeatures); 1085 1086 // Graniterapids 1087 list<SubtargetFeature> GNRAdditionalFeatures = [FeatureAMXFP16, 1088 FeaturePREFETCHI]; 1089 list<SubtargetFeature> GNRFeatures = 1090 !listconcat(SPRFeatures, GNRAdditionalFeatures); 1091 1092 // Graniterapids D 1093 list<SubtargetFeature> GNRDAdditionalFeatures = [FeatureAMXCOMPLEX]; 1094 list<SubtargetFeature> GNRDFeatures = 1095 !listconcat(GNRFeatures, GNRDAdditionalFeatures); 1096 1097 // Atom 1098 list<SubtargetFeature> AtomFeatures = [FeatureX87, 1099 FeatureCX8, 1100 FeatureCMOV, 1101 FeatureMMX, 1102 FeatureSSSE3, 1103 FeatureFXSR, 1104 FeatureNOPL, 1105 FeatureX86_64, 1106 FeatureCX16, 1107 FeatureMOVBE, 1108 FeatureLAHFSAHF64]; 1109 list<SubtargetFeature> AtomTuning = [ProcIntelAtom, 1110 TuningSlowUAMem16, 1111 TuningLEAForSP, 1112 TuningSlowDivide32, 1113 TuningSlowDivide64, 1114 TuningSlowTwoMemOps, 1115 TuningLEAUsesAG, 1116 TuningPadShortFunctions, 1117 TuningInsertVZEROUPPER, 1118 TuningNoDomainDelay]; 1119 1120 // Silvermont 1121 list<SubtargetFeature> SLMAdditionalFeatures = [FeatureSSE42, 1122 FeatureCRC32, 1123 FeaturePOPCNT, 1124 FeaturePCLMUL, 1125 FeaturePRFCHW, 1126 FeatureRDRAND]; 1127 list<SubtargetFeature> SLMTuning = [TuningUseSLMArithCosts, 1128 TuningSlowTwoMemOps, 1129 TuningSlowLEA, 1130 TuningSlowIncDec, 1131 TuningSlowDivide64, 1132 TuningSlowPMULLD, 1133 TuningFast7ByteNOP, 1134 TuningFastMOVBE, 1135 TuningPOPCNTFalseDeps, 1136 TuningInsertVZEROUPPER, 1137 TuningNoDomainDelay]; 1138 list<SubtargetFeature> SLMFeatures = 1139 !listconcat(AtomFeatures, SLMAdditionalFeatures); 1140 1141 // Goldmont 1142 list<SubtargetFeature> GLMAdditionalFeatures = [FeatureAES, 1143 FeatureSHA, 1144 FeatureRDSEED, 1145 FeatureXSAVE, 1146 FeatureXSAVEOPT, 1147 FeatureXSAVEC, 1148 FeatureXSAVES, 1149 FeatureCLFLUSHOPT, 1150 FeatureFSGSBase]; 1151 list<SubtargetFeature> GLMTuning = [TuningUseGLMDivSqrtCosts, 1152 TuningSlowTwoMemOps, 1153 TuningSlowLEA, 1154 TuningSlowIncDec, 1155 TuningFastMOVBE, 1156 TuningPOPCNTFalseDeps, 1157 TuningInsertVZEROUPPER, 1158 TuningNoDomainDelay]; 1159 list<SubtargetFeature> GLMFeatures = 1160 !listconcat(SLMFeatures, GLMAdditionalFeatures); 1161 1162 // Goldmont Plus 1163 list<SubtargetFeature> GLPAdditionalFeatures = [FeaturePTWRITE, 1164 FeatureRDPID]; 1165 list<SubtargetFeature> GLPTuning = [TuningUseGLMDivSqrtCosts, 1166 TuningSlowTwoMemOps, 1167 TuningSlowLEA, 1168 TuningSlowIncDec, 1169 TuningFastMOVBE, 1170 TuningInsertVZEROUPPER, 1171 TuningNoDomainDelay]; 1172 list<SubtargetFeature> GLPFeatures = 1173 !listconcat(GLMFeatures, GLPAdditionalFeatures); 1174 1175 // Tremont 1176 list<SubtargetFeature> TRMAdditionalFeatures = [FeatureCLWB, 1177 FeatureGFNI]; 1178 list<SubtargetFeature> TRMTuning = GLPTuning; 1179 list<SubtargetFeature> TRMFeatures = 1180 !listconcat(GLPFeatures, TRMAdditionalFeatures); 1181 1182 // Alderlake 1183 list<SubtargetFeature> ADLAdditionalFeatures = [FeatureSERIALIZE, 1184 FeaturePCONFIG, 1185 FeatureSHSTK, 1186 FeatureWIDEKL, 1187 FeatureINVPCID, 1188 FeatureADX, 1189 FeatureFMA, 1190 FeatureVAES, 1191 FeatureVPCLMULQDQ, 1192 FeatureF16C, 1193 FeatureBMI, 1194 FeatureBMI2, 1195 FeatureLZCNT, 1196 FeatureAVXVNNI, 1197 FeaturePKU, 1198 FeatureHRESET, 1199 FeatureCLDEMOTE, 1200 FeatureMOVDIRI, 1201 FeatureMOVDIR64B, 1202 FeatureWAITPKG]; 1203 list<SubtargetFeature> ADLAdditionalTuning = [TuningPERMFalseDeps, 1204 TuningPreferMovmskOverVTest, 1205 TuningFastImmVectorShift]; 1206 list<SubtargetFeature> ADLTuning = !listconcat(SKLTuning, ADLAdditionalTuning); 1207 list<SubtargetFeature> ADLFeatures = 1208 !listconcat(TRMFeatures, ADLAdditionalFeatures); 1209 1210 // Sierraforest 1211 list<SubtargetFeature> SRFAdditionalFeatures = [FeatureCMPCCXADD, 1212 FeatureAVXIFMA, 1213 FeatureAVXNECONVERT, 1214 FeatureENQCMD, 1215 FeatureUINTR, 1216 FeatureAVXVNNIINT8]; 1217 list<SubtargetFeature> SRFFeatures = 1218 !listconcat(ADLFeatures, SRFAdditionalFeatures); 1219 1220 // Grandridge 1221 list<SubtargetFeature> GRRAdditionalFeatures = [FeatureRAOINT]; 1222 list<SubtargetFeature> GRRFeatures = 1223 !listconcat(SRFFeatures, GRRAdditionalFeatures); 1224 1225 // Knights Landing 1226 list<SubtargetFeature> KNLFeatures = [FeatureX87, 1227 FeatureCX8, 1228 FeatureCMOV, 1229 FeatureMMX, 1230 FeatureFXSR, 1231 FeatureNOPL, 1232 FeatureX86_64, 1233 FeatureCX16, 1234 FeatureCRC32, 1235 FeaturePOPCNT, 1236 FeaturePCLMUL, 1237 FeatureXSAVE, 1238 FeatureXSAVEOPT, 1239 FeatureLAHFSAHF64, 1240 FeatureAES, 1241 FeatureRDRAND, 1242 FeatureF16C, 1243 FeatureFSGSBase, 1244 FeatureAVX512, 1245 FeatureERI, 1246 FeatureCDI, 1247 FeaturePFI, 1248 FeaturePREFETCHWT1, 1249 FeatureADX, 1250 FeatureRDSEED, 1251 FeatureMOVBE, 1252 FeatureLZCNT, 1253 FeatureBMI, 1254 FeatureBMI2, 1255 FeatureFMA, 1256 FeaturePRFCHW]; 1257 list<SubtargetFeature> KNLTuning = [TuningSlowDivide64, 1258 TuningSlow3OpsLEA, 1259 TuningSlowIncDec, 1260 TuningSlowTwoMemOps, 1261 TuningPreferMaskRegisters, 1262 TuningFastGather, 1263 TuningFastMOVBE, 1264 TuningSlowPMADDWD]; 1265 // TODO Add AVX5124FMAPS/AVX5124VNNIW features 1266 list<SubtargetFeature> KNMFeatures = 1267 !listconcat(KNLFeatures, [FeatureVPOPCNTDQ]); 1268 1269 // Barcelona 1270 list<SubtargetFeature> BarcelonaFeatures = [FeatureX87, 1271 FeatureCX8, 1272 FeatureSSE4A, 1273 Feature3DNowA, 1274 FeatureFXSR, 1275 FeatureNOPL, 1276 FeatureCX16, 1277 FeaturePRFCHW, 1278 FeatureLZCNT, 1279 FeaturePOPCNT, 1280 FeatureLAHFSAHF64, 1281 FeatureCMOV, 1282 FeatureX86_64]; 1283 list<SubtargetFeature> BarcelonaTuning = [TuningFastScalarShiftMasks, 1284 TuningSlowSHLD, 1285 TuningSBBDepBreaking, 1286 TuningInsertVZEROUPPER]; 1287 1288 // Bobcat 1289 list<SubtargetFeature> BtVer1Features = [FeatureX87, 1290 FeatureCX8, 1291 FeatureCMOV, 1292 FeatureMMX, 1293 FeatureSSSE3, 1294 FeatureSSE4A, 1295 FeatureFXSR, 1296 FeatureNOPL, 1297 FeatureX86_64, 1298 FeatureCX16, 1299 FeaturePRFCHW, 1300 FeatureLZCNT, 1301 FeaturePOPCNT, 1302 FeatureLAHFSAHF64]; 1303 list<SubtargetFeature> BtVer1Tuning = [TuningFast15ByteNOP, 1304 TuningFastScalarShiftMasks, 1305 TuningFastVectorShiftMasks, 1306 TuningSlowSHLD, 1307 TuningSBBDepBreaking, 1308 TuningInsertVZEROUPPER]; 1309 1310 // Jaguar 1311 list<SubtargetFeature> BtVer2AdditionalFeatures = [FeatureAVX, 1312 FeatureAES, 1313 FeatureCRC32, 1314 FeaturePCLMUL, 1315 FeatureBMI, 1316 FeatureF16C, 1317 FeatureMOVBE, 1318 FeatureXSAVE, 1319 FeatureXSAVEOPT]; 1320 list<SubtargetFeature> BtVer2Tuning = [TuningFastLZCNT, 1321 TuningFastBEXTR, 1322 TuningFastHorizontalOps, 1323 TuningFast15ByteNOP, 1324 TuningFastScalarShiftMasks, 1325 TuningFastVectorShiftMasks, 1326 TuningFastMOVBE, 1327 TuningSBBDepBreaking, 1328 TuningSlowSHLD]; 1329 list<SubtargetFeature> BtVer2Features = 1330 !listconcat(BtVer1Features, BtVer2AdditionalFeatures); 1331 1332 // Bulldozer 1333 list<SubtargetFeature> BdVer1Features = [FeatureX87, 1334 FeatureCX8, 1335 FeatureCMOV, 1336 FeatureXOP, 1337 FeatureX86_64, 1338 FeatureCX16, 1339 FeatureAES, 1340 FeatureCRC32, 1341 FeaturePRFCHW, 1342 FeaturePCLMUL, 1343 FeatureMMX, 1344 FeatureFXSR, 1345 FeatureNOPL, 1346 FeatureLZCNT, 1347 FeaturePOPCNT, 1348 FeatureXSAVE, 1349 FeatureLWP, 1350 FeatureLAHFSAHF64]; 1351 list<SubtargetFeature> BdVer1Tuning = [TuningSlowSHLD, 1352 TuningFast11ByteNOP, 1353 TuningFastScalarShiftMasks, 1354 TuningBranchFusion, 1355 TuningSBBDepBreaking, 1356 TuningInsertVZEROUPPER]; 1357 1358 // PileDriver 1359 list<SubtargetFeature> BdVer2AdditionalFeatures = [FeatureF16C, 1360 FeatureBMI, 1361 FeatureTBM, 1362 FeatureFMA]; 1363 list<SubtargetFeature> BdVer2AdditionalTuning = [TuningFastBEXTR, 1364 TuningFastMOVBE]; 1365 list<SubtargetFeature> BdVer2Tuning = 1366 !listconcat(BdVer1Tuning, BdVer2AdditionalTuning); 1367 list<SubtargetFeature> BdVer2Features = 1368 !listconcat(BdVer1Features, BdVer2AdditionalFeatures); 1369 1370 // Steamroller 1371 list<SubtargetFeature> BdVer3AdditionalFeatures = [FeatureXSAVEOPT, 1372 FeatureFSGSBase]; 1373 list<SubtargetFeature> BdVer3Tuning = BdVer2Tuning; 1374 list<SubtargetFeature> BdVer3Features = 1375 !listconcat(BdVer2Features, BdVer3AdditionalFeatures); 1376 1377 // Excavator 1378 list<SubtargetFeature> BdVer4AdditionalFeatures = [FeatureAVX2, 1379 FeatureBMI2, 1380 FeatureMOVBE, 1381 FeatureRDRAND, 1382 FeatureMWAITX]; 1383 list<SubtargetFeature> BdVer4Tuning = BdVer3Tuning; 1384 list<SubtargetFeature> BdVer4Features = 1385 !listconcat(BdVer3Features, BdVer4AdditionalFeatures); 1386 1387 1388 // AMD Zen Processors common ISAs 1389 list<SubtargetFeature> ZNFeatures = [FeatureADX, 1390 FeatureAES, 1391 FeatureAVX2, 1392 FeatureBMI, 1393 FeatureBMI2, 1394 FeatureCLFLUSHOPT, 1395 FeatureCLZERO, 1396 FeatureCMOV, 1397 FeatureX86_64, 1398 FeatureCX16, 1399 FeatureCRC32, 1400 FeatureF16C, 1401 FeatureFMA, 1402 FeatureFSGSBase, 1403 FeatureFXSR, 1404 FeatureNOPL, 1405 FeatureLAHFSAHF64, 1406 FeatureLZCNT, 1407 FeatureMMX, 1408 FeatureMOVBE, 1409 FeatureMWAITX, 1410 FeaturePCLMUL, 1411 FeaturePOPCNT, 1412 FeaturePRFCHW, 1413 FeatureRDRAND, 1414 FeatureRDSEED, 1415 FeatureSHA, 1416 FeatureSSE4A, 1417 FeatureX87, 1418 FeatureXSAVE, 1419 FeatureXSAVEC, 1420 FeatureXSAVEOPT, 1421 FeatureXSAVES]; 1422 list<SubtargetFeature> ZNTuning = [TuningFastLZCNT, 1423 TuningFastBEXTR, 1424 TuningFast15ByteNOP, 1425 TuningBranchFusion, 1426 TuningFastScalarFSQRT, 1427 TuningFastVectorFSQRT, 1428 TuningFastScalarShiftMasks, 1429 TuningFastVariablePerLaneShuffle, 1430 TuningFastMOVBE, 1431 TuningSlowSHLD, 1432 TuningSBBDepBreaking, 1433 TuningInsertVZEROUPPER, 1434 TuningAllowLight256Bit]; 1435 list<SubtargetFeature> ZN2AdditionalFeatures = [FeatureCLWB, 1436 FeatureRDPID, 1437 FeatureRDPRU, 1438 FeatureWBNOINVD]; 1439 list<SubtargetFeature> ZN2Tuning = ZNTuning; 1440 list<SubtargetFeature> ZN2Features = 1441 !listconcat(ZNFeatures, ZN2AdditionalFeatures); 1442 list<SubtargetFeature> ZN3AdditionalFeatures = [FeatureFSRM, 1443 FeatureINVPCID, 1444 FeaturePKU, 1445 FeatureVAES, 1446 FeatureVPCLMULQDQ]; 1447 list<SubtargetFeature> ZN3AdditionalTuning = [TuningMacroFusion]; 1448 list<SubtargetFeature> ZN3Tuning = 1449 !listconcat(ZN2Tuning, ZN3AdditionalTuning); 1450 list<SubtargetFeature> ZN3Features = 1451 !listconcat(ZN2Features, ZN3AdditionalFeatures); 1452 list<SubtargetFeature> ZN4Tuning = ZN3Tuning; 1453 list<SubtargetFeature> ZN4AdditionalFeatures = [FeatureAVX512, 1454 FeatureCDI, 1455 FeatureDQI, 1456 FeatureBWI, 1457 FeatureVLX, 1458 FeatureVBMI, 1459 FeatureVBMI2, 1460 FeatureIFMA, 1461 FeatureVNNI, 1462 FeatureBITALG, 1463 FeatureGFNI, 1464 FeatureBF16, 1465 FeatureSHSTK, 1466 FeatureVPOPCNTDQ]; 1467 list<SubtargetFeature> ZN4Features = 1468 !listconcat(ZN3Features, ZN4AdditionalFeatures); 1469} 1470 1471//===----------------------------------------------------------------------===// 1472// X86 processors supported. 1473//===----------------------------------------------------------------------===// 1474 1475class Proc<string Name, list<SubtargetFeature> Features, 1476 list<SubtargetFeature> TuneFeatures> 1477 : ProcessorModel<Name, GenericModel, Features, TuneFeatures>; 1478 1479class ProcModel<string Name, SchedMachineModel Model, 1480 list<SubtargetFeature> Features, 1481 list<SubtargetFeature> TuneFeatures> 1482 : ProcessorModel<Name, Model, Features, TuneFeatures>; 1483 1484// NOTE: CMPXCHG8B is here for legacy compatibility so that it is only disabled 1485// if i386/i486 is specifically requested. 1486// NOTE: 64Bit is here as "generic" is the default llc CPU. The X86Subtarget 1487// constructor checks that any CPU used in 64-bit mode has FeatureX86_64 1488// enabled. It has no effect on code generation. 1489// NOTE: As a default tuning, "generic" aims to produce code optimized for the 1490// most common X86 processors. The tunings might be changed over time. It is 1491// recommended to use "tune-cpu"="x86-64" in function attribute for consistency. 1492def : ProcModel<"generic", SandyBridgeModel, 1493 [FeatureX87, FeatureCX8, FeatureX86_64], 1494 [TuningSlow3OpsLEA, 1495 TuningSlowDivide64, 1496 TuningMacroFusion, 1497 TuningFastScalarFSQRT, 1498 TuningFast15ByteNOP, 1499 TuningInsertVZEROUPPER]>; 1500 1501def : Proc<"i386", [FeatureX87], 1502 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1503def : Proc<"i486", [FeatureX87], 1504 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1505def : Proc<"i586", [FeatureX87, FeatureCX8], 1506 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1507def : Proc<"pentium", [FeatureX87, FeatureCX8], 1508 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1509foreach P = ["pentium-mmx", "pentium_mmx"] in { 1510 def : Proc<P, [FeatureX87, FeatureCX8, FeatureMMX], 1511 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1512} 1513def : Proc<"i686", [FeatureX87, FeatureCX8, FeatureCMOV], 1514 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1515foreach P = ["pentiumpro", "pentium_pro"] in { 1516 def : Proc<P, [FeatureX87, FeatureCX8, FeatureCMOV, FeatureNOPL], 1517 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1518} 1519foreach P = ["pentium2", "pentium_ii"] in { 1520 def : Proc<P, [FeatureX87, FeatureCX8, FeatureMMX, FeatureCMOV, 1521 FeatureFXSR, FeatureNOPL], 1522 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1523} 1524foreach P = ["pentium3", "pentium3m", "pentium_iii_no_xmm_regs", "pentium_iii"] in { 1525 def : Proc<P, [FeatureX87, FeatureCX8, FeatureMMX, 1526 FeatureSSE1, FeatureFXSR, FeatureNOPL, FeatureCMOV], 1527 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1528} 1529 1530// Enable the PostRAScheduler for SSE2 and SSE3 class cpus. 1531// The intent is to enable it for pentium4 which is the current default 1532// processor in a vanilla 32-bit clang compilation when no specific 1533// architecture is specified. This generally gives a nice performance 1534// increase on silvermont, with largely neutral behavior on other 1535// contemporary large core processors. 1536// pentium-m, pentium4m, prescott and nocona are included as a preventative 1537// measure to avoid performance surprises, in case clang's default cpu 1538// changes slightly. 1539 1540foreach P = ["pentium_m", "pentium-m"] in { 1541def : ProcModel<P, GenericPostRAModel, 1542 [FeatureX87, FeatureCX8, FeatureMMX, FeatureSSE2, 1543 FeatureFXSR, FeatureNOPL, FeatureCMOV], 1544 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1545} 1546 1547foreach P = ["pentium4", "pentium4m", "pentium_4"] in { 1548 def : ProcModel<P, GenericPostRAModel, 1549 [FeatureX87, FeatureCX8, FeatureMMX, FeatureSSE2, 1550 FeatureFXSR, FeatureNOPL, FeatureCMOV], 1551 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1552} 1553 1554// Intel Quark. 1555def : Proc<"lakemont", [FeatureCX8], 1556 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1557 1558// Intel Core Duo. 1559def : ProcModel<"yonah", SandyBridgeModel, 1560 [FeatureX87, FeatureCX8, FeatureMMX, FeatureSSE3, 1561 FeatureFXSR, FeatureNOPL, FeatureCMOV], 1562 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1563 1564// NetBurst. 1565foreach P = ["prescott", "pentium_4_sse3"] in { 1566 def : ProcModel<P, GenericPostRAModel, 1567 [FeatureX87, FeatureCX8, FeatureMMX, FeatureSSE3, 1568 FeatureFXSR, FeatureNOPL, FeatureCMOV], 1569 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1570} 1571def : ProcModel<"nocona", GenericPostRAModel, [ 1572 FeatureX87, 1573 FeatureCX8, 1574 FeatureCMOV, 1575 FeatureMMX, 1576 FeatureSSE3, 1577 FeatureFXSR, 1578 FeatureNOPL, 1579 FeatureX86_64, 1580 FeatureCX16, 1581], 1582[ 1583 TuningSlowUAMem16, 1584 TuningInsertVZEROUPPER 1585]>; 1586 1587// Intel Core 2 Solo/Duo. 1588foreach P = ["core2", "core_2_duo_ssse3"] in { 1589def : ProcModel<P, SandyBridgeModel, [ 1590 FeatureX87, 1591 FeatureCX8, 1592 FeatureCMOV, 1593 FeatureMMX, 1594 FeatureSSSE3, 1595 FeatureFXSR, 1596 FeatureNOPL, 1597 FeatureX86_64, 1598 FeatureCX16, 1599 FeatureLAHFSAHF64 1600], 1601[ 1602 TuningMacroFusion, 1603 TuningSlowUAMem16, 1604 TuningInsertVZEROUPPER 1605]>; 1606} 1607foreach P = ["penryn", "core_2_duo_sse4_1"] in { 1608def : ProcModel<P, SandyBridgeModel, [ 1609 FeatureX87, 1610 FeatureCX8, 1611 FeatureCMOV, 1612 FeatureMMX, 1613 FeatureSSE41, 1614 FeatureFXSR, 1615 FeatureNOPL, 1616 FeatureX86_64, 1617 FeatureCX16, 1618 FeatureLAHFSAHF64 1619], 1620[ 1621 TuningMacroFusion, 1622 TuningSlowUAMem16, 1623 TuningInsertVZEROUPPER 1624]>; 1625} 1626 1627// Atom CPUs. 1628foreach P = ["bonnell", "atom"] in { 1629 def : ProcModel<P, AtomModel, ProcessorFeatures.AtomFeatures, 1630 ProcessorFeatures.AtomTuning>; 1631} 1632 1633foreach P = ["silvermont", "slm", "atom_sse4_2"] in { 1634 def : ProcModel<P, SLMModel, ProcessorFeatures.SLMFeatures, 1635 ProcessorFeatures.SLMTuning>; 1636} 1637 1638def : ProcModel<"atom_sse4_2_movbe", SLMModel, ProcessorFeatures.GLMFeatures, 1639 ProcessorFeatures.SLMTuning>; 1640def : ProcModel<"goldmont", SLMModel, ProcessorFeatures.GLMFeatures, 1641 ProcessorFeatures.GLMTuning>; 1642foreach P = ["goldmont_plus", "goldmont-plus"] in { 1643 def : ProcModel<P, SLMModel, ProcessorFeatures.GLPFeatures, 1644 ProcessorFeatures.GLPTuning>; 1645} 1646def : ProcModel<"tremont", SLMModel, ProcessorFeatures.TRMFeatures, 1647 ProcessorFeatures.TRMTuning>; 1648def : ProcModel<"sierraforest", AlderlakePModel, ProcessorFeatures.SRFFeatures, 1649 ProcessorFeatures.TRMTuning>; 1650def : ProcModel<"grandridge", AlderlakePModel, ProcessorFeatures.GRRFeatures, 1651 ProcessorFeatures.TRMTuning>; 1652 1653// "Arrandale" along with corei3 and corei5 1654foreach P = ["nehalem", "corei7", "core_i7_sse4_2"] in { 1655 def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.NHMFeatures, 1656 ProcessorFeatures.NHMTuning>; 1657} 1658 1659// Westmere is the corei3/i5/i7 path from nehalem to sandybridge 1660foreach P = ["westmere", "core_aes_pclmulqdq"] in { 1661 def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.WSMFeatures, 1662 ProcessorFeatures.WSMTuning>; 1663} 1664 1665foreach P = ["sandybridge", "corei7-avx", "core_2nd_gen_avx"] in { 1666 def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.SNBFeatures, 1667 ProcessorFeatures.SNBTuning>; 1668} 1669 1670foreach P = ["ivybridge", "core-avx-i", "core_3rd_gen_avx"] in { 1671 def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.IVBFeatures, 1672 ProcessorFeatures.IVBTuning>; 1673} 1674 1675foreach P = ["haswell", "core-avx2", "core_4th_gen_avx", "core_4th_gen_avx_tsx"] in { 1676 def : ProcModel<P, HaswellModel, ProcessorFeatures.HSWFeatures, 1677 ProcessorFeatures.HSWTuning>; 1678} 1679 1680foreach P = ["broadwell", "core_5th_gen_avx", "core_5th_gen_avx_tsx"] in { 1681 def : ProcModel<P, BroadwellModel, ProcessorFeatures.BDWFeatures, 1682 ProcessorFeatures.BDWTuning>; 1683} 1684 1685def : ProcModel<"skylake", SkylakeClientModel, ProcessorFeatures.SKLFeatures, 1686 ProcessorFeatures.SKLTuning>; 1687 1688// FIXME: define KNL scheduler model 1689foreach P = ["knl", "mic_avx512"] in { 1690 def : ProcModel<P, HaswellModel, ProcessorFeatures.KNLFeatures, 1691 ProcessorFeatures.KNLTuning>; 1692} 1693def : ProcModel<"knm", HaswellModel, ProcessorFeatures.KNMFeatures, 1694 ProcessorFeatures.KNLTuning>; 1695 1696foreach P = ["skylake-avx512", "skx", "skylake_avx512"] in { 1697 def : ProcModel<P, SkylakeServerModel, ProcessorFeatures.SKXFeatures, 1698 ProcessorFeatures.SKXTuning>; 1699} 1700 1701def : ProcModel<"cascadelake", SkylakeServerModel, 1702 ProcessorFeatures.CLXFeatures, ProcessorFeatures.CLXTuning>; 1703def : ProcModel<"cooperlake", SkylakeServerModel, 1704 ProcessorFeatures.CPXFeatures, ProcessorFeatures.CPXTuning>; 1705def : ProcModel<"cannonlake", SkylakeServerModel, 1706 ProcessorFeatures.CNLFeatures, ProcessorFeatures.CNLTuning>; 1707foreach P = ["icelake-client", "icelake_client"] in { 1708def : ProcModel<P, IceLakeModel, 1709 ProcessorFeatures.ICLFeatures, ProcessorFeatures.ICLTuning>; 1710} 1711def : ProcModel<"rocketlake", IceLakeModel, 1712 ProcessorFeatures.ICLFeatures, ProcessorFeatures.ICLTuning>; 1713foreach P = ["icelake-server", "icelake_server"] in { 1714def : ProcModel<P, IceLakeModel, 1715 ProcessorFeatures.ICXFeatures, ProcessorFeatures.ICXTuning>; 1716} 1717def : ProcModel<"tigerlake", IceLakeModel, 1718 ProcessorFeatures.TGLFeatures, ProcessorFeatures.TGLTuning>; 1719def : ProcModel<"sapphirerapids", SapphireRapidsModel, 1720 ProcessorFeatures.SPRFeatures, ProcessorFeatures.SPRTuning>; 1721def : ProcModel<"alderlake", AlderlakePModel, 1722 ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>; 1723def : ProcModel<"raptorlake", AlderlakePModel, 1724 ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>; 1725def : ProcModel<"meteorlake", AlderlakePModel, 1726 ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>; 1727def : ProcModel<"graniterapids", SapphireRapidsModel, 1728 ProcessorFeatures.GNRFeatures, ProcessorFeatures.SPRTuning>; 1729def : ProcModel<"emeraldrapids", SapphireRapidsModel, 1730 ProcessorFeatures.SPRFeatures, ProcessorFeatures.SPRTuning>; 1731foreach P = ["graniterapids-d", "graniterapids_d"] in { 1732def : ProcModel<P, SapphireRapidsModel, 1733 ProcessorFeatures.GNRDFeatures, ProcessorFeatures.SPRTuning>; 1734} 1735 1736// AMD CPUs. 1737 1738def : Proc<"k6", [FeatureX87, FeatureCX8, FeatureMMX], 1739 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1740def : Proc<"k6-2", [FeatureX87, FeatureCX8, Feature3DNow], 1741 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1742def : Proc<"k6-3", [FeatureX87, FeatureCX8, Feature3DNow], 1743 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1744 1745foreach P = ["athlon", "athlon-tbird"] in { 1746 def : Proc<P, [FeatureX87, FeatureCX8, FeatureCMOV, Feature3DNowA, 1747 FeatureNOPL], 1748 [TuningSlowSHLD, TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1749} 1750 1751foreach P = ["athlon-4", "athlon-xp", "athlon-mp"] in { 1752 def : Proc<P, [FeatureX87, FeatureCX8, FeatureCMOV, 1753 FeatureSSE1, Feature3DNowA, FeatureFXSR, FeatureNOPL], 1754 [TuningSlowSHLD, TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1755} 1756 1757foreach P = ["k8", "opteron", "athlon64", "athlon-fx"] in { 1758 def : Proc<P, [FeatureX87, FeatureCX8, FeatureSSE2, Feature3DNowA, 1759 FeatureFXSR, FeatureNOPL, FeatureX86_64, FeatureCMOV], 1760 [TuningFastScalarShiftMasks, TuningSlowSHLD, TuningSlowUAMem16, 1761 TuningSBBDepBreaking, TuningInsertVZEROUPPER]>; 1762} 1763 1764foreach P = ["k8-sse3", "opteron-sse3", "athlon64-sse3"] in { 1765 def : Proc<P, [FeatureX87, FeatureCX8, FeatureSSE3, Feature3DNowA, 1766 FeatureFXSR, FeatureNOPL, FeatureCX16, FeatureCMOV, 1767 FeatureX86_64], 1768 [TuningFastScalarShiftMasks, TuningSlowSHLD, TuningSlowUAMem16, 1769 TuningSBBDepBreaking, TuningInsertVZEROUPPER]>; 1770} 1771 1772foreach P = ["amdfam10", "barcelona"] in { 1773 def : Proc<P, ProcessorFeatures.BarcelonaFeatures, 1774 ProcessorFeatures.BarcelonaTuning>; 1775} 1776 1777// Bobcat 1778def : Proc<"btver1", ProcessorFeatures.BtVer1Features, 1779 ProcessorFeatures.BtVer1Tuning>; 1780// Jaguar 1781def : ProcModel<"btver2", BtVer2Model, ProcessorFeatures.BtVer2Features, 1782 ProcessorFeatures.BtVer2Tuning>; 1783 1784// Bulldozer 1785def : ProcModel<"bdver1", BdVer2Model, ProcessorFeatures.BdVer1Features, 1786 ProcessorFeatures.BdVer1Tuning>; 1787// Piledriver 1788def : ProcModel<"bdver2", BdVer2Model, ProcessorFeatures.BdVer2Features, 1789 ProcessorFeatures.BdVer2Tuning>; 1790// Steamroller 1791def : Proc<"bdver3", ProcessorFeatures.BdVer3Features, 1792 ProcessorFeatures.BdVer3Tuning>; 1793// Excavator 1794def : Proc<"bdver4", ProcessorFeatures.BdVer4Features, 1795 ProcessorFeatures.BdVer4Tuning>; 1796 1797def : ProcModel<"znver1", Znver1Model, ProcessorFeatures.ZNFeatures, 1798 ProcessorFeatures.ZNTuning>; 1799def : ProcModel<"znver2", Znver2Model, ProcessorFeatures.ZN2Features, 1800 ProcessorFeatures.ZN2Tuning>; 1801def : ProcModel<"znver3", Znver3Model, ProcessorFeatures.ZN3Features, 1802 ProcessorFeatures.ZN3Tuning>; 1803def : ProcModel<"znver4", Znver4Model, ProcessorFeatures.ZN4Features, 1804 ProcessorFeatures.ZN4Tuning>; 1805 1806def : Proc<"geode", [FeatureX87, FeatureCX8, Feature3DNowA], 1807 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1808 1809def : Proc<"winchip-c6", [FeatureX87, FeatureMMX], 1810 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1811def : Proc<"winchip2", [FeatureX87, Feature3DNow], 1812 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1813def : Proc<"c3", [FeatureX87, Feature3DNow], 1814 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1815def : Proc<"c3-2", [FeatureX87, FeatureCX8, FeatureMMX, 1816 FeatureSSE1, FeatureFXSR, FeatureCMOV], 1817 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1818 1819// We also provide a generic 64-bit specific x86 processor model which tries to 1820// be good for modern chips without enabling instruction set encodings past the 1821// basic SSE2 and 64-bit ones. It disables slow things from any mainstream and 1822// modern 64-bit x86 chip, and enables features that are generally beneficial. 1823// 1824// We currently use the Sandy Bridge model as the default scheduling model as 1825// we use it across Nehalem, Westmere, Sandy Bridge, and Ivy Bridge which 1826// covers a huge swath of x86 processors. If there are specific scheduling 1827// knobs which need to be tuned differently for AMD chips, we might consider 1828// forming a common base for them. 1829def : ProcModel<"x86-64", SandyBridgeModel, ProcessorFeatures.X86_64V1Features, 1830 ProcessorFeatures.X86_64V1Tuning>; 1831// Close to Sandybridge. 1832def : ProcModel<"x86-64-v2", SandyBridgeModel, ProcessorFeatures.X86_64V2Features, 1833 ProcessorFeatures.X86_64V2Tuning>; 1834// Close to Haswell. 1835def : ProcModel<"x86-64-v3", HaswellModel, ProcessorFeatures.X86_64V3Features, 1836 ProcessorFeatures.X86_64V3Tuning>; 1837// Close to the AVX-512 level implemented by Xeon Scalable Processors. 1838def : ProcModel<"x86-64-v4", SkylakeServerModel, ProcessorFeatures.X86_64V4Features, 1839 ProcessorFeatures.X86_64V4Tuning>; 1840 1841//===----------------------------------------------------------------------===// 1842// Calling Conventions 1843//===----------------------------------------------------------------------===// 1844 1845include "X86CallingConv.td" 1846 1847 1848//===----------------------------------------------------------------------===// 1849// Assembly Parser 1850//===----------------------------------------------------------------------===// 1851 1852def ATTAsmParserVariant : AsmParserVariant { 1853 int Variant = 0; 1854 1855 // Variant name. 1856 string Name = "att"; 1857 1858 // Discard comments in assembly strings. 1859 string CommentDelimiter = "#"; 1860 1861 // Recognize hard coded registers. 1862 string RegisterPrefix = "%"; 1863} 1864 1865def IntelAsmParserVariant : AsmParserVariant { 1866 int Variant = 1; 1867 1868 // Variant name. 1869 string Name = "intel"; 1870 1871 // Discard comments in assembly strings. 1872 string CommentDelimiter = ";"; 1873 1874 // Recognize hard coded registers. 1875 string RegisterPrefix = ""; 1876} 1877 1878//===----------------------------------------------------------------------===// 1879// Assembly Printers 1880//===----------------------------------------------------------------------===// 1881 1882// The X86 target supports two different syntaxes for emitting machine code. 1883// This is controlled by the -x86-asm-syntax={att|intel} 1884def ATTAsmWriter : AsmWriter { 1885 string AsmWriterClassName = "ATTInstPrinter"; 1886 int Variant = 0; 1887} 1888def IntelAsmWriter : AsmWriter { 1889 string AsmWriterClassName = "IntelInstPrinter"; 1890 int Variant = 1; 1891} 1892 1893def X86 : Target { 1894 // Information about the instructions... 1895 let InstructionSet = X86InstrInfo; 1896 let AssemblyParserVariants = [ATTAsmParserVariant, IntelAsmParserVariant]; 1897 let AssemblyWriters = [ATTAsmWriter, IntelAsmWriter]; 1898 let AllowRegisterRenaming = 1; 1899} 1900 1901//===----------------------------------------------------------------------===// 1902// Pfm Counters 1903//===----------------------------------------------------------------------===// 1904 1905include "X86PfmCounters.td" 1906