1//===-- X86.td - Target definition file for the Intel X86 --*- tablegen -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This is a target description file for the Intel i386 architecture, referred 10// to here as the "X86" architecture. 11// 12//===----------------------------------------------------------------------===// 13 14// Get the target-independent interfaces which we are implementing... 15// 16include "llvm/Target/Target.td" 17 18//===----------------------------------------------------------------------===// 19// X86 Subtarget state 20// 21// disregarding specific ABI / programming model 22def Is64Bit : SubtargetFeature<"64bit-mode", "Is64Bit", "true", 23 "64-bit mode (x86_64)">; 24def Is32Bit : SubtargetFeature<"32bit-mode", "Is32Bit", "true", 25 "32-bit mode (80386)">; 26def Is16Bit : SubtargetFeature<"16bit-mode", "Is16Bit", "true", 27 "16-bit mode (i8086)">; 28 29//===----------------------------------------------------------------------===// 30// X86 Subtarget ISA features 31//===----------------------------------------------------------------------===// 32 33def FeatureX87 : SubtargetFeature<"x87","HasX87", "true", 34 "Enable X87 float instructions">; 35 36def FeatureNOPL : SubtargetFeature<"nopl", "HasNOPL", "true", 37 "Enable NOPL instruction (generally pentium pro+)">; 38 39def FeatureCMOV : SubtargetFeature<"cmov","HasCMOV", "true", 40 "Enable conditional move instructions">; 41 42def FeatureCX8 : SubtargetFeature<"cx8", "HasCX8", "true", 43 "Support CMPXCHG8B instructions">; 44 45def FeatureCRC32 : SubtargetFeature<"crc32", "HasCRC32", "true", 46 "Enable SSE 4.2 CRC32 instruction (used when SSE4.2 is supported but function is GPR only)">; 47 48def FeaturePOPCNT : SubtargetFeature<"popcnt", "HasPOPCNT", "true", 49 "Support POPCNT instruction">; 50 51def FeatureFXSR : SubtargetFeature<"fxsr", "HasFXSR", "true", 52 "Support fxsave/fxrestore instructions">; 53 54def FeatureXSAVE : SubtargetFeature<"xsave", "HasXSAVE", "true", 55 "Support xsave instructions">; 56 57def FeatureXSAVEOPT: SubtargetFeature<"xsaveopt", "HasXSAVEOPT", "true", 58 "Support xsaveopt instructions", 59 [FeatureXSAVE]>; 60 61def FeatureXSAVEC : SubtargetFeature<"xsavec", "HasXSAVEC", "true", 62 "Support xsavec instructions", 63 [FeatureXSAVE]>; 64 65def FeatureXSAVES : SubtargetFeature<"xsaves", "HasXSAVES", "true", 66 "Support xsaves instructions", 67 [FeatureXSAVE]>; 68 69def FeatureSSE1 : SubtargetFeature<"sse", "X86SSELevel", "SSE1", 70 "Enable SSE instructions">; 71def FeatureSSE2 : SubtargetFeature<"sse2", "X86SSELevel", "SSE2", 72 "Enable SSE2 instructions", 73 [FeatureSSE1]>; 74def FeatureSSE3 : SubtargetFeature<"sse3", "X86SSELevel", "SSE3", 75 "Enable SSE3 instructions", 76 [FeatureSSE2]>; 77def FeatureSSSE3 : SubtargetFeature<"ssse3", "X86SSELevel", "SSSE3", 78 "Enable SSSE3 instructions", 79 [FeatureSSE3]>; 80def FeatureSSE41 : SubtargetFeature<"sse4.1", "X86SSELevel", "SSE41", 81 "Enable SSE 4.1 instructions", 82 [FeatureSSSE3]>; 83def FeatureSSE42 : SubtargetFeature<"sse4.2", "X86SSELevel", "SSE42", 84 "Enable SSE 4.2 instructions", 85 [FeatureSSE41]>; 86// The MMX subtarget feature is separate from the rest of the SSE features 87// because it's important (for odd compatibility reasons) to be able to 88// turn it off explicitly while allowing SSE+ to be on. 89def FeatureMMX : SubtargetFeature<"mmx","X863DNowLevel", "MMX", 90 "Enable MMX instructions">; 91def Feature3DNow : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow", 92 "Enable 3DNow! instructions", 93 [FeatureMMX]>; 94def Feature3DNowA : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA", 95 "Enable 3DNow! Athlon instructions", 96 [Feature3DNow]>; 97// All x86-64 hardware has SSE2, but we don't mark SSE2 as an implied 98// feature, because SSE2 can be disabled (e.g. for compiling OS kernels) 99// without disabling 64-bit mode. Nothing should imply this feature bit. It 100// is used to enforce that only 64-bit capable CPUs are used in 64-bit mode. 101def FeatureX86_64 : SubtargetFeature<"64bit", "HasX86_64", "true", 102 "Support 64-bit instructions">; 103def FeatureCX16 : SubtargetFeature<"cx16", "HasCX16", "true", 104 "64-bit with cmpxchg16b (this is true for most x86-64 chips, but not the first AMD chips)", 105 [FeatureCX8]>; 106def FeatureSSE4A : SubtargetFeature<"sse4a", "HasSSE4A", "true", 107 "Support SSE 4a instructions", 108 [FeatureSSE3]>; 109 110def FeatureAVX : SubtargetFeature<"avx", "X86SSELevel", "AVX", 111 "Enable AVX instructions", 112 [FeatureSSE42]>; 113def FeatureAVX2 : SubtargetFeature<"avx2", "X86SSELevel", "AVX2", 114 "Enable AVX2 instructions", 115 [FeatureAVX]>; 116def FeatureFMA : SubtargetFeature<"fma", "HasFMA", "true", 117 "Enable three-operand fused multiple-add", 118 [FeatureAVX]>; 119def FeatureF16C : SubtargetFeature<"f16c", "HasF16C", "true", 120 "Support 16-bit floating point conversion instructions", 121 [FeatureAVX]>; 122def FeatureEVEX512 : SubtargetFeature<"evex512", "HasEVEX512", "true", 123 "Support ZMM and 64-bit mask instructions">; 124def FeatureAVX512 : SubtargetFeature<"avx512f", "X86SSELevel", "AVX512", 125 "Enable AVX-512 instructions", 126 [FeatureAVX2, FeatureFMA, FeatureF16C]>; 127def FeatureERI : SubtargetFeature<"avx512er", "HasERI", "true", 128 "Enable AVX-512 Exponential and Reciprocal Instructions", 129 [FeatureAVX512]>; 130def FeatureCDI : SubtargetFeature<"avx512cd", "HasCDI", "true", 131 "Enable AVX-512 Conflict Detection Instructions", 132 [FeatureAVX512]>; 133def FeatureVPOPCNTDQ : SubtargetFeature<"avx512vpopcntdq", "HasVPOPCNTDQ", 134 "true", "Enable AVX-512 Population Count Instructions", 135 [FeatureAVX512]>; 136def FeaturePFI : SubtargetFeature<"avx512pf", "HasPFI", "true", 137 "Enable AVX-512 PreFetch Instructions", 138 [FeatureAVX512]>; 139def FeaturePREFETCHI : SubtargetFeature<"prefetchi", "HasPREFETCHI", 140 "true", 141 "Prefetch instruction with T0 or T1 Hint">; 142def FeaturePREFETCHWT1 : SubtargetFeature<"prefetchwt1", "HasPREFETCHWT1", 143 "true", 144 "Prefetch with Intent to Write and T1 Hint">; 145def FeatureDQI : SubtargetFeature<"avx512dq", "HasDQI", "true", 146 "Enable AVX-512 Doubleword and Quadword Instructions", 147 [FeatureAVX512]>; 148def FeatureBWI : SubtargetFeature<"avx512bw", "HasBWI", "true", 149 "Enable AVX-512 Byte and Word Instructions", 150 [FeatureAVX512]>; 151def FeatureVLX : SubtargetFeature<"avx512vl", "HasVLX", "true", 152 "Enable AVX-512 Vector Length eXtensions", 153 [FeatureAVX512]>; 154def FeatureVBMI : SubtargetFeature<"avx512vbmi", "HasVBMI", "true", 155 "Enable AVX-512 Vector Byte Manipulation Instructions", 156 [FeatureBWI]>; 157def FeatureVBMI2 : SubtargetFeature<"avx512vbmi2", "HasVBMI2", "true", 158 "Enable AVX-512 further Vector Byte Manipulation Instructions", 159 [FeatureBWI]>; 160def FeatureAVXIFMA : SubtargetFeature<"avxifma", "HasAVXIFMA", "true", 161 "Enable AVX-IFMA", 162 [FeatureAVX2]>; 163def FeatureIFMA : SubtargetFeature<"avx512ifma", "HasIFMA", "true", 164 "Enable AVX-512 Integer Fused Multiple-Add", 165 [FeatureAVX512]>; 166def FeaturePKU : SubtargetFeature<"pku", "HasPKU", "true", 167 "Enable protection keys">; 168def FeatureVNNI : SubtargetFeature<"avx512vnni", "HasVNNI", "true", 169 "Enable AVX-512 Vector Neural Network Instructions", 170 [FeatureAVX512]>; 171def FeatureAVXVNNI : SubtargetFeature<"avxvnni", "HasAVXVNNI", "true", 172 "Support AVX_VNNI encoding", 173 [FeatureAVX2]>; 174def FeatureBF16 : SubtargetFeature<"avx512bf16", "HasBF16", "true", 175 "Support bfloat16 floating point", 176 [FeatureBWI]>; 177def FeatureBITALG : SubtargetFeature<"avx512bitalg", "HasBITALG", "true", 178 "Enable AVX-512 Bit Algorithms", 179 [FeatureBWI]>; 180def FeatureVP2INTERSECT : SubtargetFeature<"avx512vp2intersect", 181 "HasVP2INTERSECT", "true", 182 "Enable AVX-512 vp2intersect", 183 [FeatureAVX512]>; 184// FIXME: FP16 scalar intrinsics use the type v8f16, which is supposed to be 185// guarded under condition hasVLX. So we imply it in FeatureFP16 currently. 186// FIXME: FP16 conversion between f16 and i64 customize type v8i64, which is 187// supposed to be guarded under condition hasDQI. So we imply it in FeatureFP16 188// currently. 189def FeatureFP16 : SubtargetFeature<"avx512fp16", "HasFP16", "true", 190 "Support 16-bit floating point", 191 [FeatureBWI, FeatureVLX, FeatureDQI]>; 192def FeatureAVXVNNIINT8 : SubtargetFeature<"avxvnniint8", 193 "HasAVXVNNIINT8", "true", 194 "Enable AVX-VNNI-INT8", 195 [FeatureAVX2]>; 196def FeatureAVXVNNIINT16 : SubtargetFeature<"avxvnniint16", 197 "HasAVXVNNIINT16", "true", 198 "Enable AVX-VNNI-INT16", 199 [FeatureAVX2]>; 200def FeaturePCLMUL : SubtargetFeature<"pclmul", "HasPCLMUL", "true", 201 "Enable packed carry-less multiplication instructions", 202 [FeatureSSE2]>; 203def FeatureGFNI : SubtargetFeature<"gfni", "HasGFNI", "true", 204 "Enable Galois Field Arithmetic Instructions", 205 [FeatureSSE2]>; 206def FeatureVPCLMULQDQ : SubtargetFeature<"vpclmulqdq", "HasVPCLMULQDQ", "true", 207 "Enable vpclmulqdq instructions", 208 [FeatureAVX, FeaturePCLMUL]>; 209def FeatureFMA4 : SubtargetFeature<"fma4", "HasFMA4", "true", 210 "Enable four-operand fused multiple-add", 211 [FeatureAVX, FeatureSSE4A]>; 212def FeatureXOP : SubtargetFeature<"xop", "HasXOP", "true", 213 "Enable XOP instructions", 214 [FeatureFMA4]>; 215def FeatureSSEUnalignedMem : SubtargetFeature<"sse-unaligned-mem", 216 "HasSSEUnalignedMem", "true", 217 "Allow unaligned memory operands with SSE instructions (this may require setting a configuration bit in the processor)">; 218def FeatureAES : SubtargetFeature<"aes", "HasAES", "true", 219 "Enable AES instructions", 220 [FeatureSSE2]>; 221def FeatureVAES : SubtargetFeature<"vaes", "HasVAES", "true", 222 "Promote selected AES instructions to AVX512/AVX registers", 223 [FeatureAVX2, FeatureAES]>; 224def FeatureTBM : SubtargetFeature<"tbm", "HasTBM", "true", 225 "Enable TBM instructions">; 226def FeatureLWP : SubtargetFeature<"lwp", "HasLWP", "true", 227 "Enable LWP instructions">; 228def FeatureMOVBE : SubtargetFeature<"movbe", "HasMOVBE", "true", 229 "Support MOVBE instruction">; 230def FeatureRDRAND : SubtargetFeature<"rdrnd", "HasRDRAND", "true", 231 "Support RDRAND instruction">; 232def FeatureFSGSBase : SubtargetFeature<"fsgsbase", "HasFSGSBase", "true", 233 "Support FS/GS Base instructions">; 234def FeatureLZCNT : SubtargetFeature<"lzcnt", "HasLZCNT", "true", 235 "Support LZCNT instruction">; 236def FeatureBMI : SubtargetFeature<"bmi", "HasBMI", "true", 237 "Support BMI instructions">; 238def FeatureBMI2 : SubtargetFeature<"bmi2", "HasBMI2", "true", 239 "Support BMI2 instructions">; 240def FeatureRTM : SubtargetFeature<"rtm", "HasRTM", "true", 241 "Support RTM instructions">; 242def FeatureADX : SubtargetFeature<"adx", "HasADX", "true", 243 "Support ADX instructions">; 244def FeatureSHA : SubtargetFeature<"sha", "HasSHA", "true", 245 "Enable SHA instructions", 246 [FeatureSSE2]>; 247def FeatureSHA512 : SubtargetFeature<"sha512", "HasSHA512", "true", 248 "Support SHA512 instructions", 249 [FeatureAVX2]>; 250// Processor supports CET SHSTK - Control-Flow Enforcement Technology 251// using Shadow Stack 252def FeatureSHSTK : SubtargetFeature<"shstk", "HasSHSTK", "true", 253 "Support CET Shadow-Stack instructions">; 254def FeatureSM3 : SubtargetFeature<"sm3", "HasSM3", "true", 255 "Support SM3 instructions", 256 [FeatureAVX]>; 257def FeatureSM4 : SubtargetFeature<"sm4", "HasSM4", "true", 258 "Support SM4 instructions", 259 [FeatureAVX2]>; 260def FeaturePRFCHW : SubtargetFeature<"prfchw", "HasPRFCHW", "true", 261 "Support PRFCHW instructions">; 262def FeatureRDSEED : SubtargetFeature<"rdseed", "HasRDSEED", "true", 263 "Support RDSEED instruction">; 264def FeatureLAHFSAHF64 : SubtargetFeature<"sahf", "HasLAHFSAHF64", "true", 265 "Support LAHF and SAHF instructions in 64-bit mode">; 266def FeatureMWAITX : SubtargetFeature<"mwaitx", "HasMWAITX", "true", 267 "Enable MONITORX/MWAITX timer functionality">; 268def FeatureCLZERO : SubtargetFeature<"clzero", "HasCLZERO", "true", 269 "Enable Cache Line Zero">; 270def FeatureCLDEMOTE : SubtargetFeature<"cldemote", "HasCLDEMOTE", "true", 271 "Enable Cache Line Demote">; 272def FeaturePTWRITE : SubtargetFeature<"ptwrite", "HasPTWRITE", "true", 273 "Support ptwrite instruction">; 274def FeatureAMXTILE : SubtargetFeature<"amx-tile", "HasAMXTILE", "true", 275 "Support AMX-TILE instructions">; 276def FeatureAMXINT8 : SubtargetFeature<"amx-int8", "HasAMXINT8", "true", 277 "Support AMX-INT8 instructions", 278 [FeatureAMXTILE]>; 279def FeatureAMXBF16 : SubtargetFeature<"amx-bf16", "HasAMXBF16", "true", 280 "Support AMX-BF16 instructions", 281 [FeatureAMXTILE]>; 282def FeatureAMXFP16 : SubtargetFeature<"amx-fp16", "HasAMXFP16", "true", 283 "Support AMX amx-fp16 instructions", 284 [FeatureAMXTILE]>; 285def FeatureAMXCOMPLEX : SubtargetFeature<"amx-complex", "HasAMXCOMPLEX", "true", 286 "Support AMX-COMPLEX instructions", 287 [FeatureAMXTILE]>; 288def FeatureCMPCCXADD : SubtargetFeature<"cmpccxadd", "HasCMPCCXADD", "true", 289 "Support CMPCCXADD instructions">; 290def FeatureRAOINT : SubtargetFeature<"raoint", "HasRAOINT", "true", 291 "Support RAO-INT instructions", 292 []>; 293def FeatureAVXNECONVERT : SubtargetFeature<"avxneconvert", "HasAVXNECONVERT", "true", 294 "Support AVX-NE-CONVERT instructions", 295 [FeatureAVX2]>; 296def FeatureINVPCID : SubtargetFeature<"invpcid", "HasINVPCID", "true", 297 "Invalidate Process-Context Identifier">; 298def FeatureSGX : SubtargetFeature<"sgx", "HasSGX", "true", 299 "Enable Software Guard Extensions">; 300def FeatureCLFLUSHOPT : SubtargetFeature<"clflushopt", "HasCLFLUSHOPT", "true", 301 "Flush A Cache Line Optimized">; 302def FeatureCLWB : SubtargetFeature<"clwb", "HasCLWB", "true", 303 "Cache Line Write Back">; 304def FeatureWBNOINVD : SubtargetFeature<"wbnoinvd", "HasWBNOINVD", "true", 305 "Write Back No Invalidate">; 306def FeatureRDPID : SubtargetFeature<"rdpid", "HasRDPID", "true", 307 "Support RDPID instructions">; 308def FeatureRDPRU : SubtargetFeature<"rdpru", "HasRDPRU", "true", 309 "Support RDPRU instructions">; 310def FeatureWAITPKG : SubtargetFeature<"waitpkg", "HasWAITPKG", "true", 311 "Wait and pause enhancements">; 312def FeatureENQCMD : SubtargetFeature<"enqcmd", "HasENQCMD", "true", 313 "Has ENQCMD instructions">; 314def FeatureKL : SubtargetFeature<"kl", "HasKL", "true", 315 "Support Key Locker kl Instructions", 316 [FeatureSSE2]>; 317def FeatureWIDEKL : SubtargetFeature<"widekl", "HasWIDEKL", "true", 318 "Support Key Locker wide Instructions", 319 [FeatureKL]>; 320def FeatureHRESET : SubtargetFeature<"hreset", "HasHRESET", "true", 321 "Has hreset instruction">; 322def FeatureSERIALIZE : SubtargetFeature<"serialize", "HasSERIALIZE", "true", 323 "Has serialize instruction">; 324def FeatureTSXLDTRK : SubtargetFeature<"tsxldtrk", "HasTSXLDTRK", "true", 325 "Support TSXLDTRK instructions">; 326def FeatureUINTR : SubtargetFeature<"uintr", "HasUINTR", "true", 327 "Has UINTR Instructions">; 328def FeatureUSERMSR : SubtargetFeature<"usermsr", "HasUSERMSR", "true", 329 "Support USERMSR instructions">; 330def FeaturePCONFIG : SubtargetFeature<"pconfig", "HasPCONFIG", "true", 331 "platform configuration instruction">; 332def FeatureMOVDIRI : SubtargetFeature<"movdiri", "HasMOVDIRI", "true", 333 "Support movdiri instruction (direct store integer)">; 334def FeatureMOVDIR64B : SubtargetFeature<"movdir64b", "HasMOVDIR64B", "true", 335 "Support movdir64b instruction (direct store 64 bytes)">; 336def FeatureAVX10_1 : SubtargetFeature<"avx10.1-256", "HasAVX10_1", "true", 337 "Support AVX10.1 up to 256-bit instruction", 338 [FeatureCDI, FeatureVBMI, FeatureIFMA, FeatureVNNI, 339 FeatureBF16, FeatureVPOPCNTDQ, FeatureVBMI2, FeatureBITALG, 340 FeatureVAES, FeatureVPCLMULQDQ, FeatureFP16]>; 341def FeatureAVX10_1_512 : SubtargetFeature<"avx10.1-512", "HasAVX10_1_512", "true", 342 "Support AVX10.1 up to 512-bit instruction", 343 [FeatureAVX10_1, FeatureEVEX512]>; 344def FeatureEGPR : SubtargetFeature<"egpr", "HasEGPR", "true", 345 "Support extended general purpose register">; 346def FeaturePush2Pop2 : SubtargetFeature<"push2pop2", "HasPush2Pop2", "true", 347 "Support PUSH2/POP2 instructions">; 348def FeaturePPX : SubtargetFeature<"ppx", "HasPPX", "true", 349 "Support Push-Pop Acceleration">; 350def FeatureNDD : SubtargetFeature<"ndd", "HasNDD", "true", 351 "Support non-destructive destination">; 352def FeatureCCMP : SubtargetFeature<"ccmp", "HasCCMP", "true", 353 "Support conditional cmp & test instructions">; 354def FeatureCF : SubtargetFeature<"cf", "HasCF", "true", 355 "Support conditional faulting">; 356 357// Ivy Bridge and newer processors have enhanced REP MOVSB and STOSB (aka 358// "string operations"). See "REP String Enhancement" in the Intel Software 359// Development Manual. This feature essentially means that REP MOVSB will copy 360// using the largest available size instead of copying bytes one by one, making 361// it at least as fast as REPMOVS{W,D,Q}. 362def FeatureERMSB 363 : SubtargetFeature< 364 "ermsb", "HasERMSB", "true", 365 "REP MOVS/STOS are fast">; 366 367// Icelake and newer processors have Fast Short REP MOV. 368def FeatureFSRM 369 : SubtargetFeature< 370 "fsrm", "HasFSRM", "true", 371 "REP MOVSB of short lengths is faster">; 372 373def FeatureSoftFloat 374 : SubtargetFeature<"soft-float", "UseSoftFloat", "true", 375 "Use software floating point features">; 376 377//===----------------------------------------------------------------------===// 378// X86 Subtarget Security Mitigation features 379//===----------------------------------------------------------------------===// 380 381// Lower indirect calls using a special construct called a `retpoline` to 382// mitigate potential Spectre v2 attacks against them. 383def FeatureRetpolineIndirectCalls 384 : SubtargetFeature< 385 "retpoline-indirect-calls", "UseRetpolineIndirectCalls", "true", 386 "Remove speculation of indirect calls from the generated code">; 387 388// Lower indirect branches and switches either using conditional branch trees 389// or using a special construct called a `retpoline` to mitigate potential 390// Spectre v2 attacks against them. 391def FeatureRetpolineIndirectBranches 392 : SubtargetFeature< 393 "retpoline-indirect-branches", "UseRetpolineIndirectBranches", "true", 394 "Remove speculation of indirect branches from the generated code">; 395 396// Deprecated umbrella feature for enabling both `retpoline-indirect-calls` and 397// `retpoline-indirect-branches` above. 398def FeatureRetpoline 399 : SubtargetFeature<"retpoline", "DeprecatedUseRetpoline", "true", 400 "Remove speculation of indirect branches from the " 401 "generated code, either by avoiding them entirely or " 402 "lowering them with a speculation blocking construct", 403 [FeatureRetpolineIndirectCalls, 404 FeatureRetpolineIndirectBranches]>; 405 406// Rely on external thunks for the emitted retpoline calls. This allows users 407// to provide their own custom thunk definitions in highly specialized 408// environments such as a kernel that does boot-time hot patching. 409def FeatureRetpolineExternalThunk 410 : SubtargetFeature< 411 "retpoline-external-thunk", "UseRetpolineExternalThunk", "true", 412 "When lowering an indirect call or branch using a `retpoline`, rely " 413 "on the specified user provided thunk rather than emitting one " 414 "ourselves. Only has effect when combined with some other retpoline " 415 "feature", [FeatureRetpolineIndirectCalls]>; 416 417// Mitigate LVI attacks against indirect calls/branches and call returns 418def FeatureLVIControlFlowIntegrity 419 : SubtargetFeature< 420 "lvi-cfi", "UseLVIControlFlowIntegrity", "true", 421 "Prevent indirect calls/branches from using a memory operand, and " 422 "precede all indirect calls/branches from a register with an " 423 "LFENCE instruction to serialize control flow. Also decompose RET " 424 "instructions into a POP+LFENCE+JMP sequence.">; 425 426// Enable SESES to mitigate speculative execution attacks 427def FeatureSpeculativeExecutionSideEffectSuppression 428 : SubtargetFeature< 429 "seses", "UseSpeculativeExecutionSideEffectSuppression", "true", 430 "Prevent speculative execution side channel timing attacks by " 431 "inserting a speculation barrier before memory reads, memory writes, " 432 "and conditional branches. Implies LVI Control Flow integrity.", 433 [FeatureLVIControlFlowIntegrity]>; 434 435// Mitigate LVI attacks against data loads 436def FeatureLVILoadHardening 437 : SubtargetFeature< 438 "lvi-load-hardening", "UseLVILoadHardening", "true", 439 "Insert LFENCE instructions to prevent data speculatively injected " 440 "into loads from being used maliciously.">; 441 442def FeatureTaggedGlobals 443 : SubtargetFeature< 444 "tagged-globals", "AllowTaggedGlobals", "true", 445 "Use an instruction sequence for taking the address of a global " 446 "that allows a memory tag in the upper address bits.">; 447 448// Control codegen mitigation against Straight Line Speculation vulnerability. 449def FeatureHardenSlsRet 450 : SubtargetFeature< 451 "harden-sls-ret", "HardenSlsRet", "true", 452 "Harden against straight line speculation across RET instructions.">; 453 454def FeatureHardenSlsIJmp 455 : SubtargetFeature< 456 "harden-sls-ijmp", "HardenSlsIJmp", "true", 457 "Harden against straight line speculation across indirect JMP instructions.">; 458 459//===----------------------------------------------------------------------===// 460// X86 Subtarget Tuning features 461//===----------------------------------------------------------------------===// 462def TuningPreferMovmskOverVTest : SubtargetFeature<"prefer-movmsk-over-vtest", 463 "PreferMovmskOverVTest", "true", 464 "Prefer movmsk over vtest instruction">; 465 466def TuningSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true", 467 "SHLD instruction is slow">; 468 469def TuningSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true", 470 "PMULLD instruction is slow (compared to PMULLW/PMULHW and PMULUDQ)">; 471 472def TuningSlowPMADDWD : SubtargetFeature<"slow-pmaddwd", "IsPMADDWDSlow", 473 "true", 474 "PMADDWD is slower than PMULLD">; 475 476// FIXME: This should not apply to CPUs that do not have SSE. 477def TuningSlowUAMem16 : SubtargetFeature<"slow-unaligned-mem-16", 478 "IsUnalignedMem16Slow", "true", 479 "Slow unaligned 16-byte memory access">; 480 481def TuningSlowUAMem32 : SubtargetFeature<"slow-unaligned-mem-32", 482 "IsUnalignedMem32Slow", "true", 483 "Slow unaligned 32-byte memory access">; 484 485def TuningLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true", 486 "Use LEA for adjusting the stack pointer (this is an optimization for Intel Atom processors)">; 487 488// True if 8-bit divisions are significantly faster than 489// 32-bit divisions and should be used when possible. 490def TuningSlowDivide32 : SubtargetFeature<"idivl-to-divb", 491 "HasSlowDivide32", "true", 492 "Use 8-bit divide for positive values less than 256">; 493 494// True if 32-bit divides are significantly faster than 495// 64-bit divisions and should be used when possible. 496def TuningSlowDivide64 : SubtargetFeature<"idivq-to-divl", 497 "HasSlowDivide64", "true", 498 "Use 32-bit divide for positive values less than 2^32">; 499 500def TuningPadShortFunctions : SubtargetFeature<"pad-short-functions", 501 "PadShortFunctions", "true", 502 "Pad short functions (to prevent a stall when returning too early)">; 503 504// On some processors, instructions that implicitly take two memory operands are 505// slow. In practice, this means that CALL, PUSH, and POP with memory operands 506// should be avoided in favor of a MOV + register CALL/PUSH/POP. 507def TuningSlowTwoMemOps : SubtargetFeature<"slow-two-mem-ops", 508 "SlowTwoMemOps", "true", 509 "Two memory operand instructions are slow">; 510 511// True if the LEA instruction inputs have to be ready at address generation 512// (AG) time. 513def TuningLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LeaUsesAG", "true", 514 "LEA instruction needs inputs at AG stage">; 515 516def TuningSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true", 517 "LEA instruction with certain arguments is slow">; 518 519// True if the LEA instruction has all three source operands: base, index, 520// and offset or if the LEA instruction uses base and index registers where 521// the base is EBP, RBP,or R13 522def TuningSlow3OpsLEA : SubtargetFeature<"slow-3ops-lea", "Slow3OpsLEA", "true", 523 "LEA instruction with 3 ops or certain registers is slow">; 524 525// True if INC and DEC instructions are slow when writing to flags 526def TuningSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true", 527 "INC and DEC instructions are slower than ADD and SUB">; 528 529def TuningPOPCNTFalseDeps : SubtargetFeature<"false-deps-popcnt", 530 "HasPOPCNTFalseDeps", "true", 531 "POPCNT has a false dependency on dest register">; 532 533def TuningLZCNTFalseDeps : SubtargetFeature<"false-deps-lzcnt-tzcnt", 534 "HasLZCNTFalseDeps", "true", 535 "LZCNT/TZCNT have a false dependency on dest register">; 536 537def TuningMULCFalseDeps : SubtargetFeature<"false-deps-mulc", 538 "HasMULCFalseDeps", "true", 539 "VF[C]MULCPH/SH has a false dependency on dest register">; 540 541def TuningPERMFalseDeps : SubtargetFeature<"false-deps-perm", 542 "HasPERMFalseDeps", "true", 543 "VPERMD/Q/PS/PD has a false dependency on dest register">; 544 545def TuningRANGEFalseDeps : SubtargetFeature<"false-deps-range", 546 "HasRANGEFalseDeps", "true", 547 "VRANGEPD/PS/SD/SS has a false dependency on dest register">; 548 549def TuningGETMANTFalseDeps : SubtargetFeature<"false-deps-getmant", 550 "HasGETMANTFalseDeps", "true", 551 "VGETMANTSS/SD/SH and VGETMANDPS/PD(memory version) has a" 552 " false dependency on dest register">; 553 554def TuningMULLQFalseDeps : SubtargetFeature<"false-deps-mullq", 555 "HasMULLQFalseDeps", "true", 556 "VPMULLQ has a false dependency on dest register">; 557 558def TuningSBBDepBreaking : SubtargetFeature<"sbb-dep-breaking", 559 "HasSBBDepBreaking", "true", 560 "SBB with same register has no source dependency">; 561 562// On recent X86 (port bound) processors, its preferable to combine to a single shuffle 563// using a variable mask over multiple fixed shuffles. 564def TuningFastVariableCrossLaneShuffle 565 : SubtargetFeature<"fast-variable-crosslane-shuffle", 566 "HasFastVariableCrossLaneShuffle", 567 "true", "Cross-lane shuffles with variable masks are fast">; 568def TuningFastVariablePerLaneShuffle 569 : SubtargetFeature<"fast-variable-perlane-shuffle", 570 "HasFastVariablePerLaneShuffle", 571 "true", "Per-lane shuffles with variable masks are fast">; 572 573// Goldmont / Tremont (atom in general) has no bypass delay 574def TuningNoDomainDelay : SubtargetFeature<"no-bypass-delay", 575 "NoDomainDelay","true", 576 "Has no bypass delay when using the 'wrong' domain">; 577 578// Many processors (Nehalem+ on Intel) have no bypass delay when 579// using the wrong mov type. 580def TuningNoDomainDelayMov : SubtargetFeature<"no-bypass-delay-mov", 581 "NoDomainDelayMov","true", 582 "Has no bypass delay when using the 'wrong' mov type">; 583 584// Newer processors (Skylake+ on Intel) have no bypass delay when 585// using the wrong blend type. 586def TuningNoDomainDelayBlend : SubtargetFeature<"no-bypass-delay-blend", 587 "NoDomainDelayBlend","true", 588 "Has no bypass delay when using the 'wrong' blend type">; 589 590// Newer processors (Haswell+ on Intel) have no bypass delay when 591// using the wrong shuffle type. 592def TuningNoDomainDelayShuffle : SubtargetFeature<"no-bypass-delay-shuffle", 593 "NoDomainDelayShuffle","true", 594 "Has no bypass delay when using the 'wrong' shuffle type">; 595 596// Prefer lowering shuffles on AVX512 targets (e.g. Skylake Server) to 597// imm shifts/rotate if they can use more ports than regular shuffles. 598def TuningPreferShiftShuffle : SubtargetFeature<"faster-shift-than-shuffle", 599 "PreferLowerShuffleAsShift", "true", 600 "Shifts are faster (or as fast) as shuffle">; 601 602def TuningFastImmVectorShift : SubtargetFeature<"tuning-fast-imm-vector-shift", 603 "FastImmVectorShift", "true", 604 "Vector shifts are fast (2/cycle) as opposed to slow (1/cycle)">; 605 606// On some X86 processors, a vzeroupper instruction should be inserted after 607// using ymm/zmm registers before executing code that may use SSE instructions. 608def TuningInsertVZEROUPPER 609 : SubtargetFeature<"vzeroupper", 610 "InsertVZEROUPPER", 611 "true", "Should insert vzeroupper instructions">; 612 613// TuningFastScalarFSQRT should be enabled if scalar FSQRT has shorter latency 614// than the corresponding NR code. TuningFastVectorFSQRT should be enabled if 615// vector FSQRT has higher throughput than the corresponding NR code. 616// The idea is that throughput bound code is likely to be vectorized, so for 617// vectorized code we should care about the throughput of SQRT operations. 618// But if the code is scalar that probably means that the code has some kind of 619// dependency and we should care more about reducing the latency. 620 621// True if hardware SQRTSS instruction is at least as fast (latency) as 622// RSQRTSS followed by a Newton-Raphson iteration. 623def TuningFastScalarFSQRT 624 : SubtargetFeature<"fast-scalar-fsqrt", "HasFastScalarFSQRT", 625 "true", "Scalar SQRT is fast (disable Newton-Raphson)">; 626// True if hardware SQRTPS/VSQRTPS instructions are at least as fast 627// (throughput) as RSQRTPS/VRSQRTPS followed by a Newton-Raphson iteration. 628def TuningFastVectorFSQRT 629 : SubtargetFeature<"fast-vector-fsqrt", "HasFastVectorFSQRT", 630 "true", "Vector SQRT is fast (disable Newton-Raphson)">; 631 632// If lzcnt has equivalent latency/throughput to most simple integer ops, it can 633// be used to replace test/set sequences. 634def TuningFastLZCNT 635 : SubtargetFeature< 636 "fast-lzcnt", "HasFastLZCNT", "true", 637 "LZCNT instructions are as fast as most simple integer ops">; 638 639// If the target can efficiently decode NOPs upto 7-bytes in length. 640def TuningFast7ByteNOP 641 : SubtargetFeature< 642 "fast-7bytenop", "HasFast7ByteNOP", "true", 643 "Target can quickly decode up to 7 byte NOPs">; 644 645// If the target can efficiently decode NOPs upto 11-bytes in length. 646def TuningFast11ByteNOP 647 : SubtargetFeature< 648 "fast-11bytenop", "HasFast11ByteNOP", "true", 649 "Target can quickly decode up to 11 byte NOPs">; 650 651// If the target can efficiently decode NOPs upto 15-bytes in length. 652def TuningFast15ByteNOP 653 : SubtargetFeature< 654 "fast-15bytenop", "HasFast15ByteNOP", "true", 655 "Target can quickly decode up to 15 byte NOPs">; 656 657// Sandy Bridge and newer processors can use SHLD with the same source on both 658// inputs to implement rotate to avoid the partial flag update of the normal 659// rotate instructions. 660def TuningFastSHLDRotate 661 : SubtargetFeature< 662 "fast-shld-rotate", "HasFastSHLDRotate", "true", 663 "SHLD can be used as a faster rotate">; 664 665// Bulldozer and newer processors can merge CMP/TEST (but not other 666// instructions) with conditional branches. 667def TuningBranchFusion 668 : SubtargetFeature<"branchfusion", "HasBranchFusion", "true", 669 "CMP/TEST can be fused with conditional branches">; 670 671// Sandy Bridge and newer processors have many instructions that can be 672// fused with conditional branches and pass through the CPU as a single 673// operation. 674def TuningMacroFusion 675 : SubtargetFeature<"macrofusion", "HasMacroFusion", "true", 676 "Various instructions can be fused with conditional branches">; 677 678// Gather is available since Haswell (AVX2 set). So technically, we can 679// generate Gathers on all AVX2 processors. But the overhead on HSW is high. 680// Skylake Client processor has faster Gathers than HSW and performance is 681// similar to Skylake Server (AVX-512). 682def TuningFastGather 683 : SubtargetFeature<"fast-gather", "HasFastGather", "true", 684 "Indicates if gather is reasonably fast (this is true for Skylake client and all AVX-512 CPUs)">; 685 686def TuningPreferNoGather 687 : SubtargetFeature<"prefer-no-gather", "PreferGather", "false", 688 "Prefer no gather instructions">; 689def TuningPreferNoScatter 690 : SubtargetFeature<"prefer-no-scatter", "PreferScatter", "false", 691 "Prefer no scatter instructions">; 692 693def TuningPrefer128Bit 694 : SubtargetFeature<"prefer-128-bit", "Prefer128Bit", "true", 695 "Prefer 128-bit AVX instructions">; 696 697def TuningPrefer256Bit 698 : SubtargetFeature<"prefer-256-bit", "Prefer256Bit", "true", 699 "Prefer 256-bit AVX instructions">; 700 701def TuningAllowLight256Bit 702 : SubtargetFeature<"allow-light-256-bit", "AllowLight256Bit", "true", 703 "Enable generation of 256-bit load/stores even if we prefer 128-bit">; 704 705def TuningPreferMaskRegisters 706 : SubtargetFeature<"prefer-mask-registers", "PreferMaskRegisters", "true", 707 "Prefer AVX512 mask registers over PTEST/MOVMSK">; 708 709def TuningFastBEXTR : SubtargetFeature<"fast-bextr", "HasFastBEXTR", "true", 710 "Indicates that the BEXTR instruction is implemented as a single uop " 711 "with good throughput">; 712 713// Combine vector math operations with shuffles into horizontal math 714// instructions if a CPU implements horizontal operations (introduced with 715// SSE3) with better latency/throughput than the alternative sequence. 716def TuningFastHorizontalOps 717 : SubtargetFeature< 718 "fast-hops", "HasFastHorizontalOps", "true", 719 "Prefer horizontal vector math instructions (haddp, phsub, etc.) over " 720 "normal vector instructions with shuffles">; 721 722def TuningFastScalarShiftMasks 723 : SubtargetFeature< 724 "fast-scalar-shift-masks", "HasFastScalarShiftMasks", "true", 725 "Prefer a left/right scalar logical shift pair over a shift+and pair">; 726 727def TuningFastVectorShiftMasks 728 : SubtargetFeature< 729 "fast-vector-shift-masks", "HasFastVectorShiftMasks", "true", 730 "Prefer a left/right vector logical shift pair over a shift+and pair">; 731 732def TuningFastMOVBE 733 : SubtargetFeature<"fast-movbe", "HasFastMOVBE", "true", 734 "Prefer a movbe over a single-use load + bswap / single-use bswap + store">; 735 736def TuningUseSLMArithCosts 737 : SubtargetFeature<"use-slm-arith-costs", "UseSLMArithCosts", "true", 738 "Use Silvermont specific arithmetic costs">; 739 740def TuningUseGLMDivSqrtCosts 741 : SubtargetFeature<"use-glm-div-sqrt-costs", "UseGLMDivSqrtCosts", "true", 742 "Use Goldmont specific floating point div/sqrt costs">; 743 744//===----------------------------------------------------------------------===// 745// X86 CPU Families 746// TODO: Remove these - use general tuning features to determine codegen. 747//===----------------------------------------------------------------------===// 748 749// Bonnell 750def ProcIntelAtom : SubtargetFeature<"", "IsAtom", "true", "Is Intel Atom processor">; 751 752//===----------------------------------------------------------------------===// 753// Register File Description 754//===----------------------------------------------------------------------===// 755 756include "X86RegisterInfo.td" 757include "X86RegisterBanks.td" 758 759//===----------------------------------------------------------------------===// 760// Instruction Descriptions 761//===----------------------------------------------------------------------===// 762 763include "X86Schedule.td" 764include "X86InstrInfo.td" 765include "X86SchedPredicates.td" 766 767def X86InstrInfo : InstrInfo; 768 769//===----------------------------------------------------------------------===// 770// X86 Scheduler Models 771//===----------------------------------------------------------------------===// 772 773include "X86ScheduleAtom.td" 774include "X86SchedSandyBridge.td" 775include "X86SchedHaswell.td" 776include "X86SchedBroadwell.td" 777include "X86ScheduleSLM.td" 778include "X86ScheduleZnver1.td" 779include "X86ScheduleZnver2.td" 780include "X86ScheduleZnver3.td" 781include "X86ScheduleZnver4.td" 782include "X86ScheduleBdVer2.td" 783include "X86ScheduleBtVer2.td" 784include "X86SchedSkylakeClient.td" 785include "X86SchedSkylakeServer.td" 786include "X86SchedIceLake.td" 787include "X86SchedAlderlakeP.td" 788include "X86SchedSapphireRapids.td" 789 790//===----------------------------------------------------------------------===// 791// X86 Processor Feature Lists 792//===----------------------------------------------------------------------===// 793 794def ProcessorFeatures { 795 // x86-64 micro-architecture levels: x86-64 and x86-64-v[234] 796 list<SubtargetFeature> X86_64V1Features = [ 797 FeatureX87, FeatureCX8, FeatureCMOV, FeatureMMX, FeatureSSE2, 798 FeatureFXSR, FeatureNOPL, FeatureX86_64, 799 ]; 800 list<SubtargetFeature> X86_64V1Tuning = [ 801 TuningMacroFusion, 802 TuningSlow3OpsLEA, 803 TuningSlowDivide64, 804 TuningSlowIncDec, 805 TuningInsertVZEROUPPER 806 ]; 807 808 list<SubtargetFeature> X86_64V2Features = !listconcat(X86_64V1Features, [ 809 FeatureCX16, FeatureLAHFSAHF64, FeatureCRC32, FeaturePOPCNT, 810 FeatureSSE42 811 ]); 812 list<SubtargetFeature> X86_64V2Tuning = [ 813 TuningMacroFusion, 814 TuningSlow3OpsLEA, 815 TuningSlowDivide64, 816 TuningSlowUAMem32, 817 TuningFastScalarFSQRT, 818 TuningFastSHLDRotate, 819 TuningFast15ByteNOP, 820 TuningPOPCNTFalseDeps, 821 TuningInsertVZEROUPPER 822 ]; 823 824 list<SubtargetFeature> X86_64V3Features = !listconcat(X86_64V2Features, [ 825 FeatureAVX2, FeatureBMI, FeatureBMI2, FeatureF16C, FeatureFMA, FeatureLZCNT, 826 FeatureMOVBE, FeatureXSAVE 827 ]); 828 list<SubtargetFeature> X86_64V3Tuning = [ 829 TuningMacroFusion, 830 TuningSlow3OpsLEA, 831 TuningSlowDivide64, 832 TuningFastScalarFSQRT, 833 TuningFastSHLDRotate, 834 TuningFast15ByteNOP, 835 TuningFastVariableCrossLaneShuffle, 836 TuningFastVariablePerLaneShuffle, 837 TuningPOPCNTFalseDeps, 838 TuningLZCNTFalseDeps, 839 TuningInsertVZEROUPPER, 840 TuningAllowLight256Bit 841 ]; 842 843 list<SubtargetFeature> X86_64V4Features = !listconcat(X86_64V3Features, [ 844 FeatureEVEX512, 845 FeatureBWI, 846 FeatureCDI, 847 FeatureDQI, 848 FeatureVLX, 849 ]); 850 list<SubtargetFeature> X86_64V4Tuning = [ 851 TuningMacroFusion, 852 TuningSlow3OpsLEA, 853 TuningSlowDivide64, 854 TuningFastScalarFSQRT, 855 TuningFastVectorFSQRT, 856 TuningFastSHLDRotate, 857 TuningFast15ByteNOP, 858 TuningFastVariableCrossLaneShuffle, 859 TuningFastVariablePerLaneShuffle, 860 TuningPrefer256Bit, 861 TuningFastGather, 862 TuningPOPCNTFalseDeps, 863 TuningInsertVZEROUPPER, 864 TuningAllowLight256Bit 865 ]; 866 867 // Nehalem 868 list<SubtargetFeature> NHMFeatures = X86_64V2Features; 869 list<SubtargetFeature> NHMTuning = [TuningMacroFusion, 870 TuningSlowDivide64, 871 TuningInsertVZEROUPPER, 872 TuningNoDomainDelayMov]; 873 874 // Westmere 875 list<SubtargetFeature> WSMAdditionalFeatures = [FeaturePCLMUL]; 876 list<SubtargetFeature> WSMTuning = NHMTuning; 877 list<SubtargetFeature> WSMFeatures = 878 !listconcat(NHMFeatures, WSMAdditionalFeatures); 879 880 // Sandybridge 881 list<SubtargetFeature> SNBAdditionalFeatures = [FeatureAVX, 882 FeatureXSAVE, 883 FeatureXSAVEOPT]; 884 list<SubtargetFeature> SNBTuning = [TuningMacroFusion, 885 TuningSlow3OpsLEA, 886 TuningSlowDivide64, 887 TuningSlowUAMem32, 888 TuningFastScalarFSQRT, 889 TuningFastSHLDRotate, 890 TuningFast15ByteNOP, 891 TuningPOPCNTFalseDeps, 892 TuningInsertVZEROUPPER, 893 TuningNoDomainDelayMov]; 894 list<SubtargetFeature> SNBFeatures = 895 !listconcat(WSMFeatures, SNBAdditionalFeatures); 896 897 // Ivybridge 898 list<SubtargetFeature> IVBAdditionalFeatures = [FeatureRDRAND, 899 FeatureF16C, 900 FeatureFSGSBase]; 901 list<SubtargetFeature> IVBTuning = SNBTuning; 902 list<SubtargetFeature> IVBFeatures = 903 !listconcat(SNBFeatures, IVBAdditionalFeatures); 904 905 // Haswell 906 list<SubtargetFeature> HSWAdditionalFeatures = [FeatureAVX2, 907 FeatureBMI, 908 FeatureBMI2, 909 FeatureERMSB, 910 FeatureFMA, 911 FeatureINVPCID, 912 FeatureLZCNT, 913 FeatureMOVBE]; 914 list<SubtargetFeature> HSWTuning = [TuningMacroFusion, 915 TuningSlow3OpsLEA, 916 TuningSlowDivide64, 917 TuningFastScalarFSQRT, 918 TuningFastSHLDRotate, 919 TuningFast15ByteNOP, 920 TuningFastVariableCrossLaneShuffle, 921 TuningFastVariablePerLaneShuffle, 922 TuningPOPCNTFalseDeps, 923 TuningLZCNTFalseDeps, 924 TuningInsertVZEROUPPER, 925 TuningAllowLight256Bit, 926 TuningNoDomainDelayMov, 927 TuningNoDomainDelayShuffle]; 928 list<SubtargetFeature> HSWFeatures = 929 !listconcat(IVBFeatures, HSWAdditionalFeatures); 930 931 // Broadwell 932 list<SubtargetFeature> BDWAdditionalFeatures = [FeatureADX, 933 FeatureRDSEED, 934 FeaturePRFCHW]; 935 list<SubtargetFeature> BDWTuning = HSWTuning; 936 list<SubtargetFeature> BDWFeatures = 937 !listconcat(HSWFeatures, BDWAdditionalFeatures); 938 939 // Skylake 940 list<SubtargetFeature> SKLAdditionalFeatures = [FeatureAES, 941 FeatureXSAVEC, 942 FeatureXSAVES, 943 FeatureCLFLUSHOPT]; 944 list<SubtargetFeature> SKLTuning = [TuningFastGather, 945 TuningMacroFusion, 946 TuningSlow3OpsLEA, 947 TuningSlowDivide64, 948 TuningFastScalarFSQRT, 949 TuningFastVectorFSQRT, 950 TuningFastSHLDRotate, 951 TuningFast15ByteNOP, 952 TuningFastVariableCrossLaneShuffle, 953 TuningFastVariablePerLaneShuffle, 954 TuningPOPCNTFalseDeps, 955 TuningInsertVZEROUPPER, 956 TuningAllowLight256Bit, 957 TuningNoDomainDelayMov, 958 TuningNoDomainDelayShuffle, 959 TuningNoDomainDelayBlend]; 960 list<SubtargetFeature> SKLFeatures = 961 !listconcat(BDWFeatures, SKLAdditionalFeatures); 962 963 // Skylake-AVX512 964 list<SubtargetFeature> SKXAdditionalFeatures = [FeatureAES, 965 FeatureXSAVEC, 966 FeatureXSAVES, 967 FeatureCLFLUSHOPT, 968 FeatureAVX512, 969 FeatureEVEX512, 970 FeatureCDI, 971 FeatureDQI, 972 FeatureBWI, 973 FeatureVLX, 974 FeaturePKU, 975 FeatureCLWB]; 976 list<SubtargetFeature> SKXTuning = [TuningFastGather, 977 TuningMacroFusion, 978 TuningSlow3OpsLEA, 979 TuningSlowDivide64, 980 TuningFastScalarFSQRT, 981 TuningFastVectorFSQRT, 982 TuningFastSHLDRotate, 983 TuningFast15ByteNOP, 984 TuningFastVariableCrossLaneShuffle, 985 TuningFastVariablePerLaneShuffle, 986 TuningPrefer256Bit, 987 TuningPOPCNTFalseDeps, 988 TuningInsertVZEROUPPER, 989 TuningAllowLight256Bit, 990 TuningPreferShiftShuffle, 991 TuningNoDomainDelayMov, 992 TuningNoDomainDelayShuffle, 993 TuningNoDomainDelayBlend, 994 TuningFastImmVectorShift]; 995 list<SubtargetFeature> SKXFeatures = 996 !listconcat(BDWFeatures, SKXAdditionalFeatures); 997 998 // Cascadelake 999 list<SubtargetFeature> CLXAdditionalFeatures = [FeatureVNNI]; 1000 list<SubtargetFeature> CLXTuning = SKXTuning; 1001 list<SubtargetFeature> CLXFeatures = 1002 !listconcat(SKXFeatures, CLXAdditionalFeatures); 1003 1004 // Cooperlake 1005 list<SubtargetFeature> CPXAdditionalFeatures = [FeatureBF16]; 1006 list<SubtargetFeature> CPXTuning = SKXTuning; 1007 list<SubtargetFeature> CPXFeatures = 1008 !listconcat(CLXFeatures, CPXAdditionalFeatures); 1009 1010 // Cannonlake 1011 list<SubtargetFeature> CNLAdditionalFeatures = [FeatureAVX512, 1012 FeatureEVEX512, 1013 FeatureCDI, 1014 FeatureDQI, 1015 FeatureBWI, 1016 FeatureVLX, 1017 FeaturePKU, 1018 FeatureVBMI, 1019 FeatureIFMA, 1020 FeatureSHA]; 1021 list<SubtargetFeature> CNLTuning = [TuningFastGather, 1022 TuningMacroFusion, 1023 TuningSlow3OpsLEA, 1024 TuningSlowDivide64, 1025 TuningFastScalarFSQRT, 1026 TuningFastVectorFSQRT, 1027 TuningFastSHLDRotate, 1028 TuningFast15ByteNOP, 1029 TuningFastVariableCrossLaneShuffle, 1030 TuningFastVariablePerLaneShuffle, 1031 TuningPrefer256Bit, 1032 TuningInsertVZEROUPPER, 1033 TuningAllowLight256Bit, 1034 TuningNoDomainDelayMov, 1035 TuningNoDomainDelayShuffle, 1036 TuningNoDomainDelayBlend, 1037 TuningFastImmVectorShift]; 1038 list<SubtargetFeature> CNLFeatures = 1039 !listconcat(SKLFeatures, CNLAdditionalFeatures); 1040 1041 // Icelake 1042 list<SubtargetFeature> ICLAdditionalFeatures = [FeatureBITALG, 1043 FeatureVAES, 1044 FeatureVBMI2, 1045 FeatureVNNI, 1046 FeatureVPCLMULQDQ, 1047 FeatureVPOPCNTDQ, 1048 FeatureGFNI, 1049 FeatureRDPID, 1050 FeatureFSRM]; 1051 list<SubtargetFeature> ICLTuning = [TuningFastGather, 1052 TuningMacroFusion, 1053 TuningSlowDivide64, 1054 TuningFastScalarFSQRT, 1055 TuningFastVectorFSQRT, 1056 TuningFastSHLDRotate, 1057 TuningFast15ByteNOP, 1058 TuningFastVariableCrossLaneShuffle, 1059 TuningFastVariablePerLaneShuffle, 1060 TuningPrefer256Bit, 1061 TuningInsertVZEROUPPER, 1062 TuningAllowLight256Bit, 1063 TuningNoDomainDelayMov, 1064 TuningNoDomainDelayShuffle, 1065 TuningNoDomainDelayBlend, 1066 TuningFastImmVectorShift]; 1067 list<SubtargetFeature> ICLFeatures = 1068 !listconcat(CNLFeatures, ICLAdditionalFeatures); 1069 1070 // Icelake Server 1071 list<SubtargetFeature> ICXAdditionalFeatures = [FeaturePCONFIG, 1072 FeatureCLWB, 1073 FeatureWBNOINVD]; 1074 list<SubtargetFeature> ICXTuning = ICLTuning; 1075 list<SubtargetFeature> ICXFeatures = 1076 !listconcat(ICLFeatures, ICXAdditionalFeatures); 1077 1078 // Tigerlake 1079 list<SubtargetFeature> TGLAdditionalFeatures = [FeatureVP2INTERSECT, 1080 FeatureCLWB, 1081 FeatureMOVDIRI, 1082 FeatureMOVDIR64B, 1083 FeatureSHSTK]; 1084 list<SubtargetFeature> TGLTuning = ICLTuning; 1085 list<SubtargetFeature> TGLFeatures = 1086 !listconcat(ICLFeatures, TGLAdditionalFeatures ); 1087 1088 // Sapphirerapids 1089 list<SubtargetFeature> SPRAdditionalFeatures = [FeatureAMXTILE, 1090 FeatureAMXINT8, 1091 FeatureAMXBF16, 1092 FeatureBF16, 1093 FeatureSERIALIZE, 1094 FeatureCLDEMOTE, 1095 FeatureWAITPKG, 1096 FeaturePTWRITE, 1097 FeatureFP16, 1098 FeatureAVXVNNI, 1099 FeatureTSXLDTRK, 1100 FeatureENQCMD, 1101 FeatureSHSTK, 1102 FeatureMOVDIRI, 1103 FeatureMOVDIR64B, 1104 FeatureUINTR]; 1105 list<SubtargetFeature> SPRAdditionalTuning = [TuningMULCFalseDeps, 1106 TuningPERMFalseDeps, 1107 TuningRANGEFalseDeps, 1108 TuningGETMANTFalseDeps, 1109 TuningMULLQFalseDeps]; 1110 list<SubtargetFeature> SPRTuning = !listconcat(ICXTuning, SPRAdditionalTuning); 1111 list<SubtargetFeature> SPRFeatures = 1112 !listconcat(ICXFeatures, SPRAdditionalFeatures); 1113 1114 // Graniterapids 1115 list<SubtargetFeature> GNRAdditionalFeatures = [FeatureAMXFP16, 1116 FeaturePREFETCHI]; 1117 list<SubtargetFeature> GNRFeatures = 1118 !listconcat(SPRFeatures, GNRAdditionalFeatures); 1119 1120 // Graniterapids D 1121 list<SubtargetFeature> GNRDAdditionalFeatures = [FeatureAMXCOMPLEX]; 1122 list<SubtargetFeature> GNRDFeatures = 1123 !listconcat(GNRFeatures, GNRDAdditionalFeatures); 1124 1125 // Atom 1126 list<SubtargetFeature> AtomFeatures = [FeatureX87, 1127 FeatureCX8, 1128 FeatureCMOV, 1129 FeatureMMX, 1130 FeatureSSSE3, 1131 FeatureFXSR, 1132 FeatureNOPL, 1133 FeatureX86_64, 1134 FeatureCX16, 1135 FeatureMOVBE, 1136 FeatureLAHFSAHF64]; 1137 list<SubtargetFeature> AtomTuning = [ProcIntelAtom, 1138 TuningSlowUAMem16, 1139 TuningLEAForSP, 1140 TuningSlowDivide32, 1141 TuningSlowDivide64, 1142 TuningSlowTwoMemOps, 1143 TuningLEAUsesAG, 1144 TuningPadShortFunctions, 1145 TuningInsertVZEROUPPER, 1146 TuningNoDomainDelay]; 1147 1148 // Silvermont 1149 list<SubtargetFeature> SLMAdditionalFeatures = [FeatureSSE42, 1150 FeatureCRC32, 1151 FeaturePOPCNT, 1152 FeaturePCLMUL, 1153 FeaturePRFCHW, 1154 FeatureRDRAND]; 1155 list<SubtargetFeature> SLMTuning = [TuningUseSLMArithCosts, 1156 TuningSlowTwoMemOps, 1157 TuningSlowLEA, 1158 TuningSlowIncDec, 1159 TuningSlowDivide64, 1160 TuningSlowPMULLD, 1161 TuningFast7ByteNOP, 1162 TuningFastMOVBE, 1163 TuningPOPCNTFalseDeps, 1164 TuningInsertVZEROUPPER, 1165 TuningNoDomainDelay]; 1166 list<SubtargetFeature> SLMFeatures = 1167 !listconcat(AtomFeatures, SLMAdditionalFeatures); 1168 1169 // Goldmont 1170 list<SubtargetFeature> GLMAdditionalFeatures = [FeatureAES, 1171 FeatureSHA, 1172 FeatureRDSEED, 1173 FeatureXSAVE, 1174 FeatureXSAVEOPT, 1175 FeatureXSAVEC, 1176 FeatureXSAVES, 1177 FeatureCLFLUSHOPT, 1178 FeatureFSGSBase]; 1179 list<SubtargetFeature> GLMTuning = [TuningUseGLMDivSqrtCosts, 1180 TuningSlowTwoMemOps, 1181 TuningSlowLEA, 1182 TuningSlowIncDec, 1183 TuningFastMOVBE, 1184 TuningPOPCNTFalseDeps, 1185 TuningInsertVZEROUPPER, 1186 TuningNoDomainDelay]; 1187 list<SubtargetFeature> GLMFeatures = 1188 !listconcat(SLMFeatures, GLMAdditionalFeatures); 1189 1190 // Goldmont Plus 1191 list<SubtargetFeature> GLPAdditionalFeatures = [FeaturePTWRITE, 1192 FeatureRDPID]; 1193 list<SubtargetFeature> GLPTuning = [TuningUseGLMDivSqrtCosts, 1194 TuningSlowTwoMemOps, 1195 TuningSlowLEA, 1196 TuningSlowIncDec, 1197 TuningFastMOVBE, 1198 TuningInsertVZEROUPPER, 1199 TuningNoDomainDelay]; 1200 list<SubtargetFeature> GLPFeatures = 1201 !listconcat(GLMFeatures, GLPAdditionalFeatures); 1202 1203 // Tremont 1204 list<SubtargetFeature> TRMAdditionalFeatures = [FeatureCLWB, 1205 FeatureGFNI]; 1206 list<SubtargetFeature> TRMTuning = GLPTuning; 1207 list<SubtargetFeature> TRMFeatures = 1208 !listconcat(GLPFeatures, TRMAdditionalFeatures); 1209 1210 // Alderlake 1211 list<SubtargetFeature> ADLAdditionalFeatures = [FeatureSERIALIZE, 1212 FeaturePCONFIG, 1213 FeatureSHSTK, 1214 FeatureWIDEKL, 1215 FeatureINVPCID, 1216 FeatureADX, 1217 FeatureFMA, 1218 FeatureVAES, 1219 FeatureVPCLMULQDQ, 1220 FeatureF16C, 1221 FeatureBMI, 1222 FeatureBMI2, 1223 FeatureLZCNT, 1224 FeatureAVXVNNI, 1225 FeaturePKU, 1226 FeatureHRESET, 1227 FeatureCLDEMOTE, 1228 FeatureMOVDIRI, 1229 FeatureMOVDIR64B, 1230 FeatureWAITPKG]; 1231 list<SubtargetFeature> ADLAdditionalTuning = [TuningPERMFalseDeps, 1232 TuningPreferMovmskOverVTest, 1233 TuningFastImmVectorShift]; 1234 list<SubtargetFeature> ADLTuning = !listconcat(SKLTuning, ADLAdditionalTuning); 1235 list<SubtargetFeature> ADLFeatures = 1236 !listconcat(TRMFeatures, ADLAdditionalFeatures); 1237 1238 // Gracemont 1239 list<SubtargetFeature> GRTTuning = [TuningMacroFusion, 1240 TuningSlow3OpsLEA, 1241 TuningSlowDivide32, 1242 TuningSlowDivide64, 1243 TuningFastScalarFSQRT, 1244 TuningFastVectorFSQRT, 1245 TuningFast15ByteNOP, 1246 TuningFastVariablePerLaneShuffle, 1247 TuningPOPCNTFalseDeps, 1248 TuningInsertVZEROUPPER]; 1249 1250 // Sierraforest 1251 list<SubtargetFeature> SRFAdditionalFeatures = [FeatureCMPCCXADD, 1252 FeatureAVXIFMA, 1253 FeatureAVXNECONVERT, 1254 FeatureENQCMD, 1255 FeatureUINTR, 1256 FeatureAVXVNNIINT8]; 1257 list<SubtargetFeature> SRFFeatures = 1258 !listconcat(ADLFeatures, SRFAdditionalFeatures); 1259 1260 // Arrowlake S 1261 list<SubtargetFeature> ARLSAdditionalFeatures = [FeatureAVXVNNIINT16, 1262 FeatureSHA512, 1263 FeatureSM3, 1264 FeatureSM4]; 1265 list<SubtargetFeature> ARLSFeatures = 1266 !listconcat(SRFFeatures, ARLSAdditionalFeatures); 1267 1268 // Pantherlake 1269 list<SubtargetFeature> PTLAdditionalFeatures = [FeaturePREFETCHI]; 1270 list<SubtargetFeature> PTLFeatures = 1271 !listconcat(ARLSFeatures, PTLAdditionalFeatures); 1272 1273 1274 // Clearwaterforest 1275 list<SubtargetFeature> CWFAdditionalFeatures = [FeaturePREFETCHI, 1276 FeatureUSERMSR]; 1277 list<SubtargetFeature> CWFFeatures = 1278 !listconcat(ARLSFeatures, CWFAdditionalFeatures); 1279 1280 // Knights Landing 1281 list<SubtargetFeature> KNLFeatures = [FeatureX87, 1282 FeatureCX8, 1283 FeatureCMOV, 1284 FeatureMMX, 1285 FeatureFXSR, 1286 FeatureNOPL, 1287 FeatureX86_64, 1288 FeatureCX16, 1289 FeatureCRC32, 1290 FeaturePOPCNT, 1291 FeaturePCLMUL, 1292 FeatureXSAVE, 1293 FeatureXSAVEOPT, 1294 FeatureLAHFSAHF64, 1295 FeatureAES, 1296 FeatureRDRAND, 1297 FeatureF16C, 1298 FeatureFSGSBase, 1299 FeatureAVX512, 1300 FeatureEVEX512, 1301 FeatureERI, 1302 FeatureCDI, 1303 FeaturePFI, 1304 FeaturePREFETCHWT1, 1305 FeatureADX, 1306 FeatureRDSEED, 1307 FeatureMOVBE, 1308 FeatureLZCNT, 1309 FeatureBMI, 1310 FeatureBMI2, 1311 FeatureFMA, 1312 FeaturePRFCHW]; 1313 list<SubtargetFeature> KNLTuning = [TuningSlowDivide64, 1314 TuningSlow3OpsLEA, 1315 TuningSlowIncDec, 1316 TuningSlowTwoMemOps, 1317 TuningPreferMaskRegisters, 1318 TuningFastGather, 1319 TuningFastMOVBE, 1320 TuningSlowPMADDWD]; 1321 // TODO Add AVX5124FMAPS/AVX5124VNNIW features 1322 list<SubtargetFeature> KNMFeatures = 1323 !listconcat(KNLFeatures, [FeatureVPOPCNTDQ]); 1324 1325 // Barcelona 1326 list<SubtargetFeature> BarcelonaFeatures = [FeatureX87, 1327 FeatureCX8, 1328 FeatureSSE4A, 1329 Feature3DNowA, 1330 FeatureFXSR, 1331 FeatureNOPL, 1332 FeatureCX16, 1333 FeaturePRFCHW, 1334 FeatureLZCNT, 1335 FeaturePOPCNT, 1336 FeatureLAHFSAHF64, 1337 FeatureCMOV, 1338 FeatureX86_64]; 1339 list<SubtargetFeature> BarcelonaTuning = [TuningFastScalarShiftMasks, 1340 TuningSlowDivide64, 1341 TuningSlowSHLD, 1342 TuningSBBDepBreaking, 1343 TuningInsertVZEROUPPER]; 1344 1345 // Bobcat 1346 list<SubtargetFeature> BtVer1Features = [FeatureX87, 1347 FeatureCX8, 1348 FeatureCMOV, 1349 FeatureMMX, 1350 FeatureSSSE3, 1351 FeatureSSE4A, 1352 FeatureFXSR, 1353 FeatureNOPL, 1354 FeatureX86_64, 1355 FeatureCX16, 1356 FeaturePRFCHW, 1357 FeatureLZCNT, 1358 FeaturePOPCNT, 1359 FeatureLAHFSAHF64]; 1360 list<SubtargetFeature> BtVer1Tuning = [TuningFast15ByteNOP, 1361 TuningFastScalarShiftMasks, 1362 TuningFastVectorShiftMasks, 1363 TuningSlowDivide64, 1364 TuningSlowSHLD, 1365 TuningSBBDepBreaking, 1366 TuningInsertVZEROUPPER]; 1367 1368 // Jaguar 1369 list<SubtargetFeature> BtVer2AdditionalFeatures = [FeatureAVX, 1370 FeatureAES, 1371 FeatureCRC32, 1372 FeaturePCLMUL, 1373 FeatureBMI, 1374 FeatureF16C, 1375 FeatureMOVBE, 1376 FeatureXSAVE, 1377 FeatureXSAVEOPT]; 1378 list<SubtargetFeature> BtVer2Tuning = [TuningFastLZCNT, 1379 TuningFastBEXTR, 1380 TuningFastHorizontalOps, 1381 TuningFast15ByteNOP, 1382 TuningFastScalarShiftMasks, 1383 TuningFastVectorShiftMasks, 1384 TuningFastMOVBE, 1385 TuningSBBDepBreaking, 1386 TuningSlowDivide64, 1387 TuningSlowSHLD]; 1388 list<SubtargetFeature> BtVer2Features = 1389 !listconcat(BtVer1Features, BtVer2AdditionalFeatures); 1390 1391 // Bulldozer 1392 list<SubtargetFeature> BdVer1Features = [FeatureX87, 1393 FeatureCX8, 1394 FeatureCMOV, 1395 FeatureXOP, 1396 FeatureX86_64, 1397 FeatureCX16, 1398 FeatureAES, 1399 FeatureCRC32, 1400 FeaturePRFCHW, 1401 FeaturePCLMUL, 1402 FeatureMMX, 1403 FeatureFXSR, 1404 FeatureNOPL, 1405 FeatureLZCNT, 1406 FeaturePOPCNT, 1407 FeatureXSAVE, 1408 FeatureLWP, 1409 FeatureLAHFSAHF64]; 1410 list<SubtargetFeature> BdVer1Tuning = [TuningSlowSHLD, 1411 TuningSlowDivide64, 1412 TuningFast11ByteNOP, 1413 TuningFastScalarShiftMasks, 1414 TuningBranchFusion, 1415 TuningSBBDepBreaking, 1416 TuningInsertVZEROUPPER]; 1417 1418 // PileDriver 1419 list<SubtargetFeature> BdVer2AdditionalFeatures = [FeatureF16C, 1420 FeatureBMI, 1421 FeatureTBM, 1422 FeatureFMA]; 1423 list<SubtargetFeature> BdVer2AdditionalTuning = [TuningFastBEXTR, 1424 TuningFastMOVBE]; 1425 list<SubtargetFeature> BdVer2Tuning = 1426 !listconcat(BdVer1Tuning, BdVer2AdditionalTuning); 1427 list<SubtargetFeature> BdVer2Features = 1428 !listconcat(BdVer1Features, BdVer2AdditionalFeatures); 1429 1430 // Steamroller 1431 list<SubtargetFeature> BdVer3AdditionalFeatures = [FeatureXSAVEOPT, 1432 FeatureFSGSBase]; 1433 list<SubtargetFeature> BdVer3Tuning = BdVer2Tuning; 1434 list<SubtargetFeature> BdVer3Features = 1435 !listconcat(BdVer2Features, BdVer3AdditionalFeatures); 1436 1437 // Excavator 1438 list<SubtargetFeature> BdVer4AdditionalFeatures = [FeatureAVX2, 1439 FeatureBMI2, 1440 FeatureMOVBE, 1441 FeatureRDRAND, 1442 FeatureMWAITX]; 1443 list<SubtargetFeature> BdVer4Tuning = BdVer3Tuning; 1444 list<SubtargetFeature> BdVer4Features = 1445 !listconcat(BdVer3Features, BdVer4AdditionalFeatures); 1446 1447 1448 // AMD Zen Processors common ISAs 1449 list<SubtargetFeature> ZNFeatures = [FeatureADX, 1450 FeatureAES, 1451 FeatureAVX2, 1452 FeatureBMI, 1453 FeatureBMI2, 1454 FeatureCLFLUSHOPT, 1455 FeatureCLZERO, 1456 FeatureCMOV, 1457 FeatureX86_64, 1458 FeatureCX16, 1459 FeatureCRC32, 1460 FeatureF16C, 1461 FeatureFMA, 1462 FeatureFSGSBase, 1463 FeatureFXSR, 1464 FeatureNOPL, 1465 FeatureLAHFSAHF64, 1466 FeatureLZCNT, 1467 FeatureMMX, 1468 FeatureMOVBE, 1469 FeatureMWAITX, 1470 FeaturePCLMUL, 1471 FeaturePOPCNT, 1472 FeaturePRFCHW, 1473 FeatureRDRAND, 1474 FeatureRDSEED, 1475 FeatureSHA, 1476 FeatureSSE4A, 1477 FeatureX87, 1478 FeatureXSAVE, 1479 FeatureXSAVEC, 1480 FeatureXSAVEOPT, 1481 FeatureXSAVES]; 1482 list<SubtargetFeature> ZNTuning = [TuningFastLZCNT, 1483 TuningFastBEXTR, 1484 TuningFast15ByteNOP, 1485 TuningBranchFusion, 1486 TuningFastScalarFSQRT, 1487 TuningFastVectorFSQRT, 1488 TuningFastScalarShiftMasks, 1489 TuningFastVariablePerLaneShuffle, 1490 TuningFastMOVBE, 1491 TuningSlowDivide64, 1492 TuningSlowSHLD, 1493 TuningSBBDepBreaking, 1494 TuningInsertVZEROUPPER, 1495 TuningAllowLight256Bit]; 1496 list<SubtargetFeature> ZN2AdditionalFeatures = [FeatureCLWB, 1497 FeatureRDPID, 1498 FeatureRDPRU, 1499 FeatureWBNOINVD]; 1500 list<SubtargetFeature> ZN2Tuning = ZNTuning; 1501 list<SubtargetFeature> ZN2Features = 1502 !listconcat(ZNFeatures, ZN2AdditionalFeatures); 1503 list<SubtargetFeature> ZN3AdditionalFeatures = [FeatureFSRM, 1504 FeatureINVPCID, 1505 FeaturePKU, 1506 FeatureVAES, 1507 FeatureVPCLMULQDQ]; 1508 list<SubtargetFeature> ZN3AdditionalTuning = [TuningMacroFusion]; 1509 list<SubtargetFeature> ZN3Tuning = 1510 !listconcat(ZN2Tuning, ZN3AdditionalTuning); 1511 list<SubtargetFeature> ZN3Features = 1512 !listconcat(ZN2Features, ZN3AdditionalFeatures); 1513 list<SubtargetFeature> ZN4Tuning = ZN3Tuning; 1514 list<SubtargetFeature> ZN4AdditionalFeatures = [FeatureAVX512, 1515 FeatureEVEX512, 1516 FeatureCDI, 1517 FeatureDQI, 1518 FeatureBWI, 1519 FeatureVLX, 1520 FeatureVBMI, 1521 FeatureVBMI2, 1522 FeatureIFMA, 1523 FeatureVNNI, 1524 FeatureBITALG, 1525 FeatureGFNI, 1526 FeatureBF16, 1527 FeatureSHSTK, 1528 FeatureVPOPCNTDQ]; 1529 list<SubtargetFeature> ZN4Features = 1530 !listconcat(ZN3Features, ZN4AdditionalFeatures); 1531} 1532 1533//===----------------------------------------------------------------------===// 1534// X86 processors supported. 1535//===----------------------------------------------------------------------===// 1536 1537class Proc<string Name, list<SubtargetFeature> Features, 1538 list<SubtargetFeature> TuneFeatures> 1539 : ProcessorModel<Name, GenericModel, Features, TuneFeatures>; 1540 1541class ProcModel<string Name, SchedMachineModel Model, 1542 list<SubtargetFeature> Features, 1543 list<SubtargetFeature> TuneFeatures> 1544 : ProcessorModel<Name, Model, Features, TuneFeatures>; 1545 1546// NOTE: CMPXCHG8B is here for legacy compatibility so that it is only disabled 1547// if i386/i486 is specifically requested. 1548// NOTE: 64Bit is here as "generic" is the default llc CPU. The X86Subtarget 1549// constructor checks that any CPU used in 64-bit mode has FeatureX86_64 1550// enabled. It has no effect on code generation. 1551// NOTE: As a default tuning, "generic" aims to produce code optimized for the 1552// most common X86 processors. The tunings might be changed over time. It is 1553// recommended to use "tune-cpu"="x86-64" in function attribute for consistency. 1554def : ProcModel<"generic", SandyBridgeModel, 1555 [FeatureX87, FeatureCX8, FeatureX86_64], 1556 [TuningSlow3OpsLEA, 1557 TuningSlowDivide64, 1558 TuningMacroFusion, 1559 TuningFastScalarFSQRT, 1560 TuningFast15ByteNOP, 1561 TuningInsertVZEROUPPER]>; 1562 1563def : Proc<"i386", [FeatureX87], 1564 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1565def : Proc<"i486", [FeatureX87], 1566 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1567def : Proc<"i586", [FeatureX87, FeatureCX8], 1568 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1569def : Proc<"pentium", [FeatureX87, FeatureCX8], 1570 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1571foreach P = ["pentium-mmx", "pentium_mmx"] in { 1572 def : Proc<P, [FeatureX87, FeatureCX8, FeatureMMX], 1573 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1574} 1575def : Proc<"i686", [FeatureX87, FeatureCX8, FeatureCMOV], 1576 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1577foreach P = ["pentiumpro", "pentium_pro"] in { 1578 def : Proc<P, [FeatureX87, FeatureCX8, FeatureCMOV, FeatureNOPL], 1579 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1580} 1581foreach P = ["pentium2", "pentium_ii"] in { 1582 def : Proc<P, [FeatureX87, FeatureCX8, FeatureMMX, FeatureCMOV, 1583 FeatureFXSR, FeatureNOPL], 1584 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1585} 1586foreach P = ["pentium3", "pentium3m", "pentium_iii_no_xmm_regs", "pentium_iii"] in { 1587 def : Proc<P, [FeatureX87, FeatureCX8, FeatureMMX, 1588 FeatureSSE1, FeatureFXSR, FeatureNOPL, FeatureCMOV], 1589 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1590} 1591 1592// Enable the PostRAScheduler for SSE2 and SSE3 class cpus. 1593// The intent is to enable it for pentium4 which is the current default 1594// processor in a vanilla 32-bit clang compilation when no specific 1595// architecture is specified. This generally gives a nice performance 1596// increase on silvermont, with largely neutral behavior on other 1597// contemporary large core processors. 1598// pentium-m, pentium4m, prescott and nocona are included as a preventative 1599// measure to avoid performance surprises, in case clang's default cpu 1600// changes slightly. 1601 1602foreach P = ["pentium_m", "pentium-m"] in { 1603def : ProcModel<P, GenericPostRAModel, 1604 [FeatureX87, FeatureCX8, FeatureMMX, FeatureSSE2, 1605 FeatureFXSR, FeatureNOPL, FeatureCMOV], 1606 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1607} 1608 1609foreach P = ["pentium4", "pentium4m", "pentium_4"] in { 1610 def : ProcModel<P, GenericPostRAModel, 1611 [FeatureX87, FeatureCX8, FeatureMMX, FeatureSSE2, 1612 FeatureFXSR, FeatureNOPL, FeatureCMOV], 1613 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1614} 1615 1616// Intel Quark. 1617def : Proc<"lakemont", [FeatureCX8], 1618 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1619 1620// Intel Core Duo. 1621def : ProcModel<"yonah", SandyBridgeModel, 1622 [FeatureX87, FeatureCX8, FeatureMMX, FeatureSSE3, 1623 FeatureFXSR, FeatureNOPL, FeatureCMOV], 1624 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1625 1626// NetBurst. 1627foreach P = ["prescott", "pentium_4_sse3"] in { 1628 def : ProcModel<P, GenericPostRAModel, 1629 [FeatureX87, FeatureCX8, FeatureMMX, FeatureSSE3, 1630 FeatureFXSR, FeatureNOPL, FeatureCMOV], 1631 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1632} 1633def : ProcModel<"nocona", GenericPostRAModel, [ 1634 FeatureX87, 1635 FeatureCX8, 1636 FeatureCMOV, 1637 FeatureMMX, 1638 FeatureSSE3, 1639 FeatureFXSR, 1640 FeatureNOPL, 1641 FeatureX86_64, 1642 FeatureCX16, 1643], 1644[ 1645 TuningSlowUAMem16, 1646 TuningInsertVZEROUPPER 1647]>; 1648 1649// Intel Core 2 Solo/Duo. 1650foreach P = ["core2", "core_2_duo_ssse3"] in { 1651def : ProcModel<P, SandyBridgeModel, [ 1652 FeatureX87, 1653 FeatureCX8, 1654 FeatureCMOV, 1655 FeatureMMX, 1656 FeatureSSSE3, 1657 FeatureFXSR, 1658 FeatureNOPL, 1659 FeatureX86_64, 1660 FeatureCX16, 1661 FeatureLAHFSAHF64 1662], 1663[ 1664 TuningMacroFusion, 1665 TuningSlowUAMem16, 1666 TuningInsertVZEROUPPER 1667]>; 1668} 1669foreach P = ["penryn", "core_2_duo_sse4_1"] in { 1670def : ProcModel<P, SandyBridgeModel, [ 1671 FeatureX87, 1672 FeatureCX8, 1673 FeatureCMOV, 1674 FeatureMMX, 1675 FeatureSSE41, 1676 FeatureFXSR, 1677 FeatureNOPL, 1678 FeatureX86_64, 1679 FeatureCX16, 1680 FeatureLAHFSAHF64 1681], 1682[ 1683 TuningMacroFusion, 1684 TuningSlowUAMem16, 1685 TuningInsertVZEROUPPER 1686]>; 1687} 1688 1689// Atom CPUs. 1690foreach P = ["bonnell", "atom"] in { 1691 def : ProcModel<P, AtomModel, ProcessorFeatures.AtomFeatures, 1692 ProcessorFeatures.AtomTuning>; 1693} 1694 1695foreach P = ["silvermont", "slm", "atom_sse4_2"] in { 1696 def : ProcModel<P, SLMModel, ProcessorFeatures.SLMFeatures, 1697 ProcessorFeatures.SLMTuning>; 1698} 1699 1700def : ProcModel<"atom_sse4_2_movbe", SLMModel, ProcessorFeatures.GLMFeatures, 1701 ProcessorFeatures.SLMTuning>; 1702def : ProcModel<"goldmont", SLMModel, ProcessorFeatures.GLMFeatures, 1703 ProcessorFeatures.GLMTuning>; 1704foreach P = ["goldmont_plus", "goldmont-plus"] in { 1705 def : ProcModel<P, SLMModel, ProcessorFeatures.GLPFeatures, 1706 ProcessorFeatures.GLPTuning>; 1707} 1708def : ProcModel<"tremont", SLMModel, ProcessorFeatures.TRMFeatures, 1709 ProcessorFeatures.TRMTuning>; 1710foreach P = ["sierraforest", "grandridge"] in { 1711 def : ProcModel<P, AlderlakePModel, ProcessorFeatures.SRFFeatures, 1712 ProcessorFeatures.TRMTuning>; 1713} 1714 1715// "Arrandale" along with corei3 and corei5 1716foreach P = ["nehalem", "corei7", "core_i7_sse4_2"] in { 1717 def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.NHMFeatures, 1718 ProcessorFeatures.NHMTuning>; 1719} 1720 1721// Westmere is the corei3/i5/i7 path from nehalem to sandybridge 1722foreach P = ["westmere", "core_aes_pclmulqdq"] in { 1723 def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.WSMFeatures, 1724 ProcessorFeatures.WSMTuning>; 1725} 1726 1727foreach P = ["sandybridge", "corei7-avx", "core_2nd_gen_avx"] in { 1728 def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.SNBFeatures, 1729 ProcessorFeatures.SNBTuning>; 1730} 1731 1732foreach P = ["ivybridge", "core-avx-i", "core_3rd_gen_avx"] in { 1733 def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.IVBFeatures, 1734 ProcessorFeatures.IVBTuning>; 1735} 1736 1737foreach P = ["haswell", "core-avx2", "core_4th_gen_avx", "core_4th_gen_avx_tsx"] in { 1738 def : ProcModel<P, HaswellModel, ProcessorFeatures.HSWFeatures, 1739 ProcessorFeatures.HSWTuning>; 1740} 1741 1742foreach P = ["broadwell", "core_5th_gen_avx", "core_5th_gen_avx_tsx"] in { 1743 def : ProcModel<P, BroadwellModel, ProcessorFeatures.BDWFeatures, 1744 ProcessorFeatures.BDWTuning>; 1745} 1746 1747def : ProcModel<"skylake", SkylakeClientModel, ProcessorFeatures.SKLFeatures, 1748 ProcessorFeatures.SKLTuning>; 1749 1750// FIXME: define KNL scheduler model 1751foreach P = ["knl", "mic_avx512"] in { 1752 def : ProcModel<P, HaswellModel, ProcessorFeatures.KNLFeatures, 1753 ProcessorFeatures.KNLTuning>; 1754} 1755def : ProcModel<"knm", HaswellModel, ProcessorFeatures.KNMFeatures, 1756 ProcessorFeatures.KNLTuning>; 1757 1758foreach P = ["skylake-avx512", "skx", "skylake_avx512"] in { 1759 def : ProcModel<P, SkylakeServerModel, ProcessorFeatures.SKXFeatures, 1760 ProcessorFeatures.SKXTuning>; 1761} 1762 1763def : ProcModel<"cascadelake", SkylakeServerModel, 1764 ProcessorFeatures.CLXFeatures, ProcessorFeatures.CLXTuning>; 1765def : ProcModel<"cooperlake", SkylakeServerModel, 1766 ProcessorFeatures.CPXFeatures, ProcessorFeatures.CPXTuning>; 1767def : ProcModel<"cannonlake", SkylakeServerModel, 1768 ProcessorFeatures.CNLFeatures, ProcessorFeatures.CNLTuning>; 1769foreach P = ["icelake-client", "icelake_client"] in { 1770def : ProcModel<P, IceLakeModel, 1771 ProcessorFeatures.ICLFeatures, ProcessorFeatures.ICLTuning>; 1772} 1773def : ProcModel<"rocketlake", IceLakeModel, 1774 ProcessorFeatures.ICLFeatures, ProcessorFeatures.ICLTuning>; 1775foreach P = ["icelake-server", "icelake_server"] in { 1776def : ProcModel<P, IceLakeModel, 1777 ProcessorFeatures.ICXFeatures, ProcessorFeatures.ICXTuning>; 1778} 1779def : ProcModel<"tigerlake", IceLakeModel, 1780 ProcessorFeatures.TGLFeatures, ProcessorFeatures.TGLTuning>; 1781def : ProcModel<"sapphirerapids", SapphireRapidsModel, 1782 ProcessorFeatures.SPRFeatures, ProcessorFeatures.SPRTuning>; 1783def : ProcModel<"alderlake", AlderlakePModel, 1784 ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>; 1785// FIXME: Use Gracemont Schedule Model when it is ready. 1786def : ProcModel<"gracemont", AlderlakePModel, 1787 ProcessorFeatures.ADLFeatures, ProcessorFeatures.GRTTuning>; 1788def : ProcModel<"raptorlake", AlderlakePModel, 1789 ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>; 1790def : ProcModel<"meteorlake", AlderlakePModel, 1791 ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>; 1792def : ProcModel<"arrowlake", AlderlakePModel, 1793 ProcessorFeatures.SRFFeatures, ProcessorFeatures.ADLTuning>; 1794foreach P = ["arrowlake-s", "arrowlake_s", "lunarlake"] in { 1795def : ProcModel<P, AlderlakePModel, 1796 ProcessorFeatures.ARLSFeatures, ProcessorFeatures.ADLTuning>; 1797} 1798def : ProcModel<"pantherlake", AlderlakePModel, 1799 ProcessorFeatures.PTLFeatures, ProcessorFeatures.ADLTuning>; 1800def : ProcModel<"clearwaterforest", AlderlakePModel, 1801 ProcessorFeatures.CWFFeatures, ProcessorFeatures.ADLTuning>; 1802def : ProcModel<"graniterapids", SapphireRapidsModel, 1803 ProcessorFeatures.GNRFeatures, ProcessorFeatures.SPRTuning>; 1804def : ProcModel<"emeraldrapids", SapphireRapidsModel, 1805 ProcessorFeatures.SPRFeatures, ProcessorFeatures.SPRTuning>; 1806foreach P = ["graniterapids-d", "graniterapids_d"] in { 1807def : ProcModel<P, SapphireRapidsModel, 1808 ProcessorFeatures.GNRDFeatures, ProcessorFeatures.SPRTuning>; 1809} 1810 1811// AMD CPUs. 1812 1813def : Proc<"k6", [FeatureX87, FeatureCX8, FeatureMMX], 1814 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1815def : Proc<"k6-2", [FeatureX87, FeatureCX8, Feature3DNow], 1816 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1817def : Proc<"k6-3", [FeatureX87, FeatureCX8, Feature3DNow], 1818 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1819 1820foreach P = ["athlon", "athlon-tbird"] in { 1821 def : Proc<P, [FeatureX87, FeatureCX8, FeatureCMOV, Feature3DNowA, 1822 FeatureNOPL], 1823 [TuningSlowSHLD, TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1824} 1825 1826foreach P = ["athlon-4", "athlon-xp", "athlon-mp"] in { 1827 def : Proc<P, [FeatureX87, FeatureCX8, FeatureCMOV, 1828 FeatureSSE1, Feature3DNowA, FeatureFXSR, FeatureNOPL], 1829 [TuningSlowSHLD, TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1830} 1831 1832foreach P = ["k8", "opteron", "athlon64", "athlon-fx"] in { 1833 def : Proc<P, [FeatureX87, FeatureCX8, FeatureSSE2, Feature3DNowA, 1834 FeatureFXSR, FeatureNOPL, FeatureX86_64, FeatureCMOV], 1835 [TuningFastScalarShiftMasks, TuningSlowSHLD, TuningSlowUAMem16, 1836 TuningSBBDepBreaking, TuningInsertVZEROUPPER]>; 1837} 1838 1839foreach P = ["k8-sse3", "opteron-sse3", "athlon64-sse3"] in { 1840 def : Proc<P, [FeatureX87, FeatureCX8, FeatureSSE3, Feature3DNowA, 1841 FeatureFXSR, FeatureNOPL, FeatureCX16, FeatureCMOV, 1842 FeatureX86_64], 1843 [TuningFastScalarShiftMasks, TuningSlowSHLD, TuningSlowUAMem16, 1844 TuningSBBDepBreaking, TuningInsertVZEROUPPER]>; 1845} 1846 1847foreach P = ["amdfam10", "barcelona"] in { 1848 def : Proc<P, ProcessorFeatures.BarcelonaFeatures, 1849 ProcessorFeatures.BarcelonaTuning>; 1850} 1851 1852// Bobcat 1853def : Proc<"btver1", ProcessorFeatures.BtVer1Features, 1854 ProcessorFeatures.BtVer1Tuning>; 1855// Jaguar 1856def : ProcModel<"btver2", BtVer2Model, ProcessorFeatures.BtVer2Features, 1857 ProcessorFeatures.BtVer2Tuning>; 1858 1859// Bulldozer 1860def : ProcModel<"bdver1", BdVer2Model, ProcessorFeatures.BdVer1Features, 1861 ProcessorFeatures.BdVer1Tuning>; 1862// Piledriver 1863def : ProcModel<"bdver2", BdVer2Model, ProcessorFeatures.BdVer2Features, 1864 ProcessorFeatures.BdVer2Tuning>; 1865// Steamroller 1866def : Proc<"bdver3", ProcessorFeatures.BdVer3Features, 1867 ProcessorFeatures.BdVer3Tuning>; 1868// Excavator 1869def : Proc<"bdver4", ProcessorFeatures.BdVer4Features, 1870 ProcessorFeatures.BdVer4Tuning>; 1871 1872def : ProcModel<"znver1", Znver1Model, ProcessorFeatures.ZNFeatures, 1873 ProcessorFeatures.ZNTuning>; 1874def : ProcModel<"znver2", Znver2Model, ProcessorFeatures.ZN2Features, 1875 ProcessorFeatures.ZN2Tuning>; 1876def : ProcModel<"znver3", Znver3Model, ProcessorFeatures.ZN3Features, 1877 ProcessorFeatures.ZN3Tuning>; 1878def : ProcModel<"znver4", Znver4Model, ProcessorFeatures.ZN4Features, 1879 ProcessorFeatures.ZN4Tuning>; 1880 1881def : Proc<"geode", [FeatureX87, FeatureCX8, Feature3DNowA], 1882 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1883 1884def : Proc<"winchip-c6", [FeatureX87, FeatureMMX], 1885 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1886def : Proc<"winchip2", [FeatureX87, Feature3DNow], 1887 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1888def : Proc<"c3", [FeatureX87, Feature3DNow], 1889 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1890def : Proc<"c3-2", [FeatureX87, FeatureCX8, FeatureMMX, 1891 FeatureSSE1, FeatureFXSR, FeatureCMOV], 1892 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1893 1894// We also provide a generic 64-bit specific x86 processor model which tries to 1895// be good for modern chips without enabling instruction set encodings past the 1896// basic SSE2 and 64-bit ones. It disables slow things from any mainstream and 1897// modern 64-bit x86 chip, and enables features that are generally beneficial. 1898// 1899// We currently use the Sandy Bridge model as the default scheduling model as 1900// we use it across Nehalem, Westmere, Sandy Bridge, and Ivy Bridge which 1901// covers a huge swath of x86 processors. If there are specific scheduling 1902// knobs which need to be tuned differently for AMD chips, we might consider 1903// forming a common base for them. 1904def : ProcModel<"x86-64", SandyBridgeModel, ProcessorFeatures.X86_64V1Features, 1905 ProcessorFeatures.X86_64V1Tuning>; 1906// Close to Sandybridge. 1907def : ProcModel<"x86-64-v2", SandyBridgeModel, ProcessorFeatures.X86_64V2Features, 1908 ProcessorFeatures.X86_64V2Tuning>; 1909// Close to Haswell. 1910def : ProcModel<"x86-64-v3", HaswellModel, ProcessorFeatures.X86_64V3Features, 1911 ProcessorFeatures.X86_64V3Tuning>; 1912// Close to the AVX-512 level implemented by Xeon Scalable Processors. 1913def : ProcModel<"x86-64-v4", SkylakeServerModel, ProcessorFeatures.X86_64V4Features, 1914 ProcessorFeatures.X86_64V4Tuning>; 1915 1916//===----------------------------------------------------------------------===// 1917// Calling Conventions 1918//===----------------------------------------------------------------------===// 1919 1920include "X86CallingConv.td" 1921 1922 1923//===----------------------------------------------------------------------===// 1924// Assembly Parser 1925//===----------------------------------------------------------------------===// 1926 1927def ATTAsmParserVariant : AsmParserVariant { 1928 int Variant = 0; 1929 1930 // Variant name. 1931 string Name = "att"; 1932 1933 // Discard comments in assembly strings. 1934 string CommentDelimiter = "#"; 1935 1936 // Recognize hard coded registers. 1937 string RegisterPrefix = "%"; 1938} 1939 1940def IntelAsmParserVariant : AsmParserVariant { 1941 int Variant = 1; 1942 1943 // Variant name. 1944 string Name = "intel"; 1945 1946 // Discard comments in assembly strings. 1947 string CommentDelimiter = ";"; 1948 1949 // Recognize hard coded registers. 1950 string RegisterPrefix = ""; 1951} 1952 1953//===----------------------------------------------------------------------===// 1954// Assembly Printers 1955//===----------------------------------------------------------------------===// 1956 1957// The X86 target supports two different syntaxes for emitting machine code. 1958// This is controlled by the -x86-asm-syntax={att|intel} 1959def ATTAsmWriter : AsmWriter { 1960 string AsmWriterClassName = "ATTInstPrinter"; 1961 int Variant = 0; 1962} 1963def IntelAsmWriter : AsmWriter { 1964 string AsmWriterClassName = "IntelInstPrinter"; 1965 int Variant = 1; 1966} 1967 1968def X86 : Target { 1969 // Information about the instructions... 1970 let InstructionSet = X86InstrInfo; 1971 let AssemblyParserVariants = [ATTAsmParserVariant, IntelAsmParserVariant]; 1972 let AssemblyWriters = [ATTAsmWriter, IntelAsmWriter]; 1973 let AllowRegisterRenaming = 1; 1974} 1975 1976//===----------------------------------------------------------------------===// 1977// Pfm Counters 1978//===----------------------------------------------------------------------===// 1979 1980include "X86PfmCounters.td" 1981