1//===-- X86.td - Target definition file for the Intel X86 --*- tablegen -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This is a target description file for the Intel i386 architecture, referred 10// to here as the "X86" architecture. 11// 12//===----------------------------------------------------------------------===// 13 14// Get the target-independent interfaces which we are implementing... 15// 16include "llvm/Target/Target.td" 17 18//===----------------------------------------------------------------------===// 19// X86 Subtarget state 20// 21// disregarding specific ABI / programming model 22def Is64Bit : SubtargetFeature<"64bit-mode", "Is64Bit", "true", 23 "64-bit mode (x86_64)">; 24def Is32Bit : SubtargetFeature<"32bit-mode", "Is32Bit", "true", 25 "32-bit mode (80386)">; 26def Is16Bit : SubtargetFeature<"16bit-mode", "Is16Bit", "true", 27 "16-bit mode (i8086)">; 28 29//===----------------------------------------------------------------------===// 30// X86 Subtarget ISA features 31//===----------------------------------------------------------------------===// 32 33def FeatureX87 : SubtargetFeature<"x87","HasX87", "true", 34 "Enable X87 float instructions">; 35 36def FeatureNOPL : SubtargetFeature<"nopl", "HasNOPL", "true", 37 "Enable NOPL instruction (generally pentium pro+)">; 38 39def FeatureCMOV : SubtargetFeature<"cmov","HasCMOV", "true", 40 "Enable conditional move instructions">; 41 42def FeatureCX8 : SubtargetFeature<"cx8", "HasCX8", "true", 43 "Support CMPXCHG8B instructions">; 44 45def FeatureCRC32 : SubtargetFeature<"crc32", "HasCRC32", "true", 46 "Enable SSE 4.2 CRC32 instruction (used when SSE4.2 is supported but function is GPR only)">; 47 48def FeaturePOPCNT : SubtargetFeature<"popcnt", "HasPOPCNT", "true", 49 "Support POPCNT instruction">; 50 51def FeatureFXSR : SubtargetFeature<"fxsr", "HasFXSR", "true", 52 "Support fxsave/fxrestore instructions">; 53 54def FeatureXSAVE : SubtargetFeature<"xsave", "HasXSAVE", "true", 55 "Support xsave instructions">; 56 57def FeatureXSAVEOPT: SubtargetFeature<"xsaveopt", "HasXSAVEOPT", "true", 58 "Support xsaveopt instructions", 59 [FeatureXSAVE]>; 60 61def FeatureXSAVEC : SubtargetFeature<"xsavec", "HasXSAVEC", "true", 62 "Support xsavec instructions", 63 [FeatureXSAVE]>; 64 65def FeatureXSAVES : SubtargetFeature<"xsaves", "HasXSAVES", "true", 66 "Support xsaves instructions", 67 [FeatureXSAVE]>; 68 69def FeatureSSE1 : SubtargetFeature<"sse", "X86SSELevel", "SSE1", 70 "Enable SSE instructions">; 71def FeatureSSE2 : SubtargetFeature<"sse2", "X86SSELevel", "SSE2", 72 "Enable SSE2 instructions", 73 [FeatureSSE1]>; 74def FeatureSSE3 : SubtargetFeature<"sse3", "X86SSELevel", "SSE3", 75 "Enable SSE3 instructions", 76 [FeatureSSE2]>; 77def FeatureSSSE3 : SubtargetFeature<"ssse3", "X86SSELevel", "SSSE3", 78 "Enable SSSE3 instructions", 79 [FeatureSSE3]>; 80def FeatureSSE41 : SubtargetFeature<"sse4.1", "X86SSELevel", "SSE41", 81 "Enable SSE 4.1 instructions", 82 [FeatureSSSE3]>; 83def FeatureSSE42 : SubtargetFeature<"sse4.2", "X86SSELevel", "SSE42", 84 "Enable SSE 4.2 instructions", 85 [FeatureSSE41]>; 86// The MMX subtarget feature is separate from the rest of the SSE features 87// because it's important (for odd compatibility reasons) to be able to 88// turn it off explicitly while allowing SSE+ to be on. 89def FeatureMMX : SubtargetFeature<"mmx","X863DNowLevel", "MMX", 90 "Enable MMX instructions">; 91def Feature3DNow : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow", 92 "Enable 3DNow! instructions", 93 [FeatureMMX]>; 94def Feature3DNowA : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA", 95 "Enable 3DNow! Athlon instructions", 96 [Feature3DNow]>; 97// All x86-64 hardware has SSE2, but we don't mark SSE2 as an implied 98// feature, because SSE2 can be disabled (e.g. for compiling OS kernels) 99// without disabling 64-bit mode. Nothing should imply this feature bit. It 100// is used to enforce that only 64-bit capable CPUs are used in 64-bit mode. 101def FeatureX86_64 : SubtargetFeature<"64bit", "HasX86_64", "true", 102 "Support 64-bit instructions">; 103def FeatureCX16 : SubtargetFeature<"cx16", "HasCX16", "true", 104 "64-bit with cmpxchg16b (this is true for most x86-64 chips, but not the first AMD chips)", 105 [FeatureCX8]>; 106def FeatureSSE4A : SubtargetFeature<"sse4a", "HasSSE4A", "true", 107 "Support SSE 4a instructions", 108 [FeatureSSE3]>; 109 110def FeatureAVX : SubtargetFeature<"avx", "X86SSELevel", "AVX", 111 "Enable AVX instructions", 112 [FeatureSSE42]>; 113def FeatureAVX2 : SubtargetFeature<"avx2", "X86SSELevel", "AVX2", 114 "Enable AVX2 instructions", 115 [FeatureAVX]>; 116def FeatureFMA : SubtargetFeature<"fma", "HasFMA", "true", 117 "Enable three-operand fused multiple-add", 118 [FeatureAVX]>; 119def FeatureF16C : SubtargetFeature<"f16c", "HasF16C", "true", 120 "Support 16-bit floating point conversion instructions", 121 [FeatureAVX]>; 122def FeatureEVEX512 : SubtargetFeature<"evex512", "HasEVEX512", "true", 123 "Support ZMM and 64-bit mask instructions">; 124def FeatureAVX512 : SubtargetFeature<"avx512f", "X86SSELevel", "AVX512", 125 "Enable AVX-512 instructions", 126 [FeatureAVX2, FeatureFMA, FeatureF16C]>; 127def FeatureERI : SubtargetFeature<"avx512er", "HasERI", "true", 128 "Enable AVX-512 Exponential and Reciprocal Instructions", 129 [FeatureAVX512]>; 130def FeatureCDI : SubtargetFeature<"avx512cd", "HasCDI", "true", 131 "Enable AVX-512 Conflict Detection Instructions", 132 [FeatureAVX512]>; 133def FeatureVPOPCNTDQ : SubtargetFeature<"avx512vpopcntdq", "HasVPOPCNTDQ", 134 "true", "Enable AVX-512 Population Count Instructions", 135 [FeatureAVX512]>; 136def FeaturePFI : SubtargetFeature<"avx512pf", "HasPFI", "true", 137 "Enable AVX-512 PreFetch Instructions", 138 [FeatureAVX512]>; 139def FeaturePREFETCHI : SubtargetFeature<"prefetchi", "HasPREFETCHI", 140 "true", 141 "Prefetch instruction with T0 or T1 Hint">; 142def FeaturePREFETCHWT1 : SubtargetFeature<"prefetchwt1", "HasPREFETCHWT1", 143 "true", 144 "Prefetch with Intent to Write and T1 Hint">; 145def FeatureDQI : SubtargetFeature<"avx512dq", "HasDQI", "true", 146 "Enable AVX-512 Doubleword and Quadword Instructions", 147 [FeatureAVX512]>; 148def FeatureBWI : SubtargetFeature<"avx512bw", "HasBWI", "true", 149 "Enable AVX-512 Byte and Word Instructions", 150 [FeatureAVX512]>; 151def FeatureVLX : SubtargetFeature<"avx512vl", "HasVLX", "true", 152 "Enable AVX-512 Vector Length eXtensions", 153 [FeatureAVX512]>; 154def FeatureVBMI : SubtargetFeature<"avx512vbmi", "HasVBMI", "true", 155 "Enable AVX-512 Vector Byte Manipulation Instructions", 156 [FeatureBWI]>; 157def FeatureVBMI2 : SubtargetFeature<"avx512vbmi2", "HasVBMI2", "true", 158 "Enable AVX-512 further Vector Byte Manipulation Instructions", 159 [FeatureBWI]>; 160def FeatureAVXIFMA : SubtargetFeature<"avxifma", "HasAVXIFMA", "true", 161 "Enable AVX-IFMA", 162 [FeatureAVX2]>; 163def FeatureIFMA : SubtargetFeature<"avx512ifma", "HasIFMA", "true", 164 "Enable AVX-512 Integer Fused Multiple-Add", 165 [FeatureAVX512]>; 166def FeaturePKU : SubtargetFeature<"pku", "HasPKU", "true", 167 "Enable protection keys">; 168def FeatureVNNI : SubtargetFeature<"avx512vnni", "HasVNNI", "true", 169 "Enable AVX-512 Vector Neural Network Instructions", 170 [FeatureAVX512]>; 171def FeatureAVXVNNI : SubtargetFeature<"avxvnni", "HasAVXVNNI", "true", 172 "Support AVX_VNNI encoding", 173 [FeatureAVX2]>; 174def FeatureBF16 : SubtargetFeature<"avx512bf16", "HasBF16", "true", 175 "Support bfloat16 floating point", 176 [FeatureBWI]>; 177def FeatureBITALG : SubtargetFeature<"avx512bitalg", "HasBITALG", "true", 178 "Enable AVX-512 Bit Algorithms", 179 [FeatureBWI]>; 180def FeatureVP2INTERSECT : SubtargetFeature<"avx512vp2intersect", 181 "HasVP2INTERSECT", "true", 182 "Enable AVX-512 vp2intersect", 183 [FeatureAVX512]>; 184// FIXME: FP16 scalar intrinsics use the type v8f16, which is supposed to be 185// guarded under condition hasVLX. So we imply it in FeatureFP16 currently. 186// FIXME: FP16 conversion between f16 and i64 customize type v8i64, which is 187// supposed to be guarded under condition hasDQI. So we imply it in FeatureFP16 188// currently. 189def FeatureFP16 : SubtargetFeature<"avx512fp16", "HasFP16", "true", 190 "Support 16-bit floating point", 191 [FeatureBWI, FeatureVLX, FeatureDQI]>; 192def FeatureAVXVNNIINT8 : SubtargetFeature<"avxvnniint8", 193 "HasAVXVNNIINT8", "true", 194 "Enable AVX-VNNI-INT8", 195 [FeatureAVX2]>; 196def FeatureAVXVNNIINT16 : SubtargetFeature<"avxvnniint16", 197 "HasAVXVNNIINT16", "true", 198 "Enable AVX-VNNI-INT16", 199 [FeatureAVX2]>; 200def FeaturePCLMUL : SubtargetFeature<"pclmul", "HasPCLMUL", "true", 201 "Enable packed carry-less multiplication instructions", 202 [FeatureSSE2]>; 203def FeatureGFNI : SubtargetFeature<"gfni", "HasGFNI", "true", 204 "Enable Galois Field Arithmetic Instructions", 205 [FeatureSSE2]>; 206def FeatureVPCLMULQDQ : SubtargetFeature<"vpclmulqdq", "HasVPCLMULQDQ", "true", 207 "Enable vpclmulqdq instructions", 208 [FeatureAVX, FeaturePCLMUL]>; 209def FeatureFMA4 : SubtargetFeature<"fma4", "HasFMA4", "true", 210 "Enable four-operand fused multiple-add", 211 [FeatureAVX, FeatureSSE4A]>; 212def FeatureXOP : SubtargetFeature<"xop", "HasXOP", "true", 213 "Enable XOP instructions", 214 [FeatureFMA4]>; 215def FeatureSSEUnalignedMem : SubtargetFeature<"sse-unaligned-mem", 216 "HasSSEUnalignedMem", "true", 217 "Allow unaligned memory operands with SSE instructions (this may require setting a configuration bit in the processor)">; 218def FeatureAES : SubtargetFeature<"aes", "HasAES", "true", 219 "Enable AES instructions", 220 [FeatureSSE2]>; 221def FeatureVAES : SubtargetFeature<"vaes", "HasVAES", "true", 222 "Promote selected AES instructions to AVX512/AVX registers", 223 [FeatureAVX2, FeatureAES]>; 224def FeatureTBM : SubtargetFeature<"tbm", "HasTBM", "true", 225 "Enable TBM instructions">; 226def FeatureLWP : SubtargetFeature<"lwp", "HasLWP", "true", 227 "Enable LWP instructions">; 228def FeatureMOVBE : SubtargetFeature<"movbe", "HasMOVBE", "true", 229 "Support MOVBE instruction">; 230def FeatureRDRAND : SubtargetFeature<"rdrnd", "HasRDRAND", "true", 231 "Support RDRAND instruction">; 232def FeatureFSGSBase : SubtargetFeature<"fsgsbase", "HasFSGSBase", "true", 233 "Support FS/GS Base instructions">; 234def FeatureLZCNT : SubtargetFeature<"lzcnt", "HasLZCNT", "true", 235 "Support LZCNT instruction">; 236def FeatureBMI : SubtargetFeature<"bmi", "HasBMI", "true", 237 "Support BMI instructions">; 238def FeatureBMI2 : SubtargetFeature<"bmi2", "HasBMI2", "true", 239 "Support BMI2 instructions">; 240def FeatureRTM : SubtargetFeature<"rtm", "HasRTM", "true", 241 "Support RTM instructions">; 242def FeatureADX : SubtargetFeature<"adx", "HasADX", "true", 243 "Support ADX instructions">; 244def FeatureSHA : SubtargetFeature<"sha", "HasSHA", "true", 245 "Enable SHA instructions", 246 [FeatureSSE2]>; 247def FeatureSHA512 : SubtargetFeature<"sha512", "HasSHA512", "true", 248 "Support SHA512 instructions", 249 [FeatureAVX2]>; 250// Processor supports CET SHSTK - Control-Flow Enforcement Technology 251// using Shadow Stack 252def FeatureSHSTK : SubtargetFeature<"shstk", "HasSHSTK", "true", 253 "Support CET Shadow-Stack instructions">; 254def FeatureSM3 : SubtargetFeature<"sm3", "HasSM3", "true", 255 "Support SM3 instructions", 256 [FeatureAVX]>; 257def FeatureSM4 : SubtargetFeature<"sm4", "HasSM4", "true", 258 "Support SM4 instructions", 259 [FeatureAVX2]>; 260def FeaturePRFCHW : SubtargetFeature<"prfchw", "HasPRFCHW", "true", 261 "Support PRFCHW instructions">; 262def FeatureRDSEED : SubtargetFeature<"rdseed", "HasRDSEED", "true", 263 "Support RDSEED instruction">; 264def FeatureLAHFSAHF64 : SubtargetFeature<"sahf", "HasLAHFSAHF64", "true", 265 "Support LAHF and SAHF instructions in 64-bit mode">; 266def FeatureMWAITX : SubtargetFeature<"mwaitx", "HasMWAITX", "true", 267 "Enable MONITORX/MWAITX timer functionality">; 268def FeatureCLZERO : SubtargetFeature<"clzero", "HasCLZERO", "true", 269 "Enable Cache Line Zero">; 270def FeatureCLDEMOTE : SubtargetFeature<"cldemote", "HasCLDEMOTE", "true", 271 "Enable Cache Line Demote">; 272def FeaturePTWRITE : SubtargetFeature<"ptwrite", "HasPTWRITE", "true", 273 "Support ptwrite instruction">; 274def FeatureAMXTILE : SubtargetFeature<"amx-tile", "HasAMXTILE", "true", 275 "Support AMX-TILE instructions">; 276def FeatureAMXINT8 : SubtargetFeature<"amx-int8", "HasAMXINT8", "true", 277 "Support AMX-INT8 instructions", 278 [FeatureAMXTILE]>; 279def FeatureAMXBF16 : SubtargetFeature<"amx-bf16", "HasAMXBF16", "true", 280 "Support AMX-BF16 instructions", 281 [FeatureAMXTILE]>; 282def FeatureAMXFP16 : SubtargetFeature<"amx-fp16", "HasAMXFP16", "true", 283 "Support AMX amx-fp16 instructions", 284 [FeatureAMXTILE]>; 285def FeatureAMXCOMPLEX : SubtargetFeature<"amx-complex", "HasAMXCOMPLEX", "true", 286 "Support AMX-COMPLEX instructions", 287 [FeatureAMXTILE]>; 288def FeatureCMPCCXADD : SubtargetFeature<"cmpccxadd", "HasCMPCCXADD", "true", 289 "Support CMPCCXADD instructions">; 290def FeatureRAOINT : SubtargetFeature<"raoint", "HasRAOINT", "true", 291 "Support RAO-INT instructions", 292 []>; 293def FeatureAVXNECONVERT : SubtargetFeature<"avxneconvert", "HasAVXNECONVERT", "true", 294 "Support AVX-NE-CONVERT instructions", 295 [FeatureAVX2]>; 296def FeatureINVPCID : SubtargetFeature<"invpcid", "HasINVPCID", "true", 297 "Invalidate Process-Context Identifier">; 298def FeatureSGX : SubtargetFeature<"sgx", "HasSGX", "true", 299 "Enable Software Guard Extensions">; 300def FeatureCLFLUSHOPT : SubtargetFeature<"clflushopt", "HasCLFLUSHOPT", "true", 301 "Flush A Cache Line Optimized">; 302def FeatureCLWB : SubtargetFeature<"clwb", "HasCLWB", "true", 303 "Cache Line Write Back">; 304def FeatureWBNOINVD : SubtargetFeature<"wbnoinvd", "HasWBNOINVD", "true", 305 "Write Back No Invalidate">; 306def FeatureRDPID : SubtargetFeature<"rdpid", "HasRDPID", "true", 307 "Support RDPID instructions">; 308def FeatureRDPRU : SubtargetFeature<"rdpru", "HasRDPRU", "true", 309 "Support RDPRU instructions">; 310def FeatureWAITPKG : SubtargetFeature<"waitpkg", "HasWAITPKG", "true", 311 "Wait and pause enhancements">; 312def FeatureENQCMD : SubtargetFeature<"enqcmd", "HasENQCMD", "true", 313 "Has ENQCMD instructions">; 314def FeatureKL : SubtargetFeature<"kl", "HasKL", "true", 315 "Support Key Locker kl Instructions", 316 [FeatureSSE2]>; 317def FeatureWIDEKL : SubtargetFeature<"widekl", "HasWIDEKL", "true", 318 "Support Key Locker wide Instructions", 319 [FeatureKL]>; 320def FeatureHRESET : SubtargetFeature<"hreset", "HasHRESET", "true", 321 "Has hreset instruction">; 322def FeatureSERIALIZE : SubtargetFeature<"serialize", "HasSERIALIZE", "true", 323 "Has serialize instruction">; 324def FeatureTSXLDTRK : SubtargetFeature<"tsxldtrk", "HasTSXLDTRK", "true", 325 "Support TSXLDTRK instructions">; 326def FeatureUINTR : SubtargetFeature<"uintr", "HasUINTR", "true", 327 "Has UINTR Instructions">; 328def FeatureUSERMSR : SubtargetFeature<"usermsr", "HasUSERMSR", "true", 329 "Support USERMSR instructions">; 330def FeaturePCONFIG : SubtargetFeature<"pconfig", "HasPCONFIG", "true", 331 "platform configuration instruction">; 332def FeatureMOVDIRI : SubtargetFeature<"movdiri", "HasMOVDIRI", "true", 333 "Support movdiri instruction (direct store integer)">; 334def FeatureMOVDIR64B : SubtargetFeature<"movdir64b", "HasMOVDIR64B", "true", 335 "Support movdir64b instruction (direct store 64 bytes)">; 336def FeatureAVX10_1 : SubtargetFeature<"avx10.1-256", "HasAVX10_1", "true", 337 "Support AVX10.1 up to 256-bit instruction", 338 [FeatureCDI, FeatureVBMI, FeatureIFMA, FeatureVNNI, 339 FeatureBF16, FeatureVPOPCNTDQ, FeatureVBMI2, FeatureBITALG, 340 FeatureVAES, FeatureVPCLMULQDQ, FeatureFP16]>; 341def FeatureAVX10_1_512 : SubtargetFeature<"avx10.1-512", "HasAVX10_1_512", "true", 342 "Support AVX10.1 up to 512-bit instruction", 343 [FeatureAVX10_1, FeatureEVEX512]>; 344def FeatureEGPR : SubtargetFeature<"egpr", "HasEGPR", "true", 345 "Support extended general purpose register">; 346def FeaturePush2Pop2 : SubtargetFeature<"push2pop2", "HasPush2Pop2", "true", 347 "Support PUSH2/POP2 instructions">; 348def FeaturePPX : SubtargetFeature<"ppx", "HasPPX", "true", 349 "Support Push-Pop Acceleration">; 350def FeatureNDD : SubtargetFeature<"ndd", "HasNDD", "true", 351 "Support non-destructive destination">; 352def FeatureCCMP : SubtargetFeature<"ccmp", "HasCCMP", "true", 353 "Support conditional cmp & test instructions">; 354def FeatureCF : SubtargetFeature<"cf", "HasCF", "true", 355 "Support conditional faulting">; 356 357// Ivy Bridge and newer processors have enhanced REP MOVSB and STOSB (aka 358// "string operations"). See "REP String Enhancement" in the Intel Software 359// Development Manual. This feature essentially means that REP MOVSB will copy 360// using the largest available size instead of copying bytes one by one, making 361// it at least as fast as REPMOVS{W,D,Q}. 362def FeatureERMSB 363 : SubtargetFeature< 364 "ermsb", "HasERMSB", "true", 365 "REP MOVS/STOS are fast">; 366 367// Icelake and newer processors have Fast Short REP MOV. 368def FeatureFSRM 369 : SubtargetFeature< 370 "fsrm", "HasFSRM", "true", 371 "REP MOVSB of short lengths is faster">; 372 373def FeatureSoftFloat 374 : SubtargetFeature<"soft-float", "UseSoftFloat", "true", 375 "Use software floating point features">; 376 377//===----------------------------------------------------------------------===// 378// X86 Subtarget Security Mitigation features 379//===----------------------------------------------------------------------===// 380 381// Lower indirect calls using a special construct called a `retpoline` to 382// mitigate potential Spectre v2 attacks against them. 383def FeatureRetpolineIndirectCalls 384 : SubtargetFeature< 385 "retpoline-indirect-calls", "UseRetpolineIndirectCalls", "true", 386 "Remove speculation of indirect calls from the generated code">; 387 388// Lower indirect branches and switches either using conditional branch trees 389// or using a special construct called a `retpoline` to mitigate potential 390// Spectre v2 attacks against them. 391def FeatureRetpolineIndirectBranches 392 : SubtargetFeature< 393 "retpoline-indirect-branches", "UseRetpolineIndirectBranches", "true", 394 "Remove speculation of indirect branches from the generated code">; 395 396// Deprecated umbrella feature for enabling both `retpoline-indirect-calls` and 397// `retpoline-indirect-branches` above. 398def FeatureRetpoline 399 : SubtargetFeature<"retpoline", "DeprecatedUseRetpoline", "true", 400 "Remove speculation of indirect branches from the " 401 "generated code, either by avoiding them entirely or " 402 "lowering them with a speculation blocking construct", 403 [FeatureRetpolineIndirectCalls, 404 FeatureRetpolineIndirectBranches]>; 405 406// Rely on external thunks for the emitted retpoline calls. This allows users 407// to provide their own custom thunk definitions in highly specialized 408// environments such as a kernel that does boot-time hot patching. 409def FeatureRetpolineExternalThunk 410 : SubtargetFeature< 411 "retpoline-external-thunk", "UseRetpolineExternalThunk", "true", 412 "When lowering an indirect call or branch using a `retpoline`, rely " 413 "on the specified user provided thunk rather than emitting one " 414 "ourselves. Only has effect when combined with some other retpoline " 415 "feature", [FeatureRetpolineIndirectCalls]>; 416 417// Mitigate LVI attacks against indirect calls/branches and call returns 418def FeatureLVIControlFlowIntegrity 419 : SubtargetFeature< 420 "lvi-cfi", "UseLVIControlFlowIntegrity", "true", 421 "Prevent indirect calls/branches from using a memory operand, and " 422 "precede all indirect calls/branches from a register with an " 423 "LFENCE instruction to serialize control flow. Also decompose RET " 424 "instructions into a POP+LFENCE+JMP sequence.">; 425 426// Enable SESES to mitigate speculative execution attacks 427def FeatureSpeculativeExecutionSideEffectSuppression 428 : SubtargetFeature< 429 "seses", "UseSpeculativeExecutionSideEffectSuppression", "true", 430 "Prevent speculative execution side channel timing attacks by " 431 "inserting a speculation barrier before memory reads, memory writes, " 432 "and conditional branches. Implies LVI Control Flow integrity.", 433 [FeatureLVIControlFlowIntegrity]>; 434 435// Mitigate LVI attacks against data loads 436def FeatureLVILoadHardening 437 : SubtargetFeature< 438 "lvi-load-hardening", "UseLVILoadHardening", "true", 439 "Insert LFENCE instructions to prevent data speculatively injected " 440 "into loads from being used maliciously.">; 441 442def FeatureTaggedGlobals 443 : SubtargetFeature< 444 "tagged-globals", "AllowTaggedGlobals", "true", 445 "Use an instruction sequence for taking the address of a global " 446 "that allows a memory tag in the upper address bits.">; 447 448// Control codegen mitigation against Straight Line Speculation vulnerability. 449def FeatureHardenSlsRet 450 : SubtargetFeature< 451 "harden-sls-ret", "HardenSlsRet", "true", 452 "Harden against straight line speculation across RET instructions.">; 453 454def FeatureHardenSlsIJmp 455 : SubtargetFeature< 456 "harden-sls-ijmp", "HardenSlsIJmp", "true", 457 "Harden against straight line speculation across indirect JMP instructions.">; 458 459//===----------------------------------------------------------------------===// 460// X86 Subtarget Tuning features 461//===----------------------------------------------------------------------===// 462def TuningPreferMovmskOverVTest : SubtargetFeature<"prefer-movmsk-over-vtest", 463 "PreferMovmskOverVTest", "true", 464 "Prefer movmsk over vtest instruction">; 465 466def TuningSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true", 467 "SHLD instruction is slow">; 468 469def TuningSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true", 470 "PMULLD instruction is slow (compared to PMULLW/PMULHW and PMULUDQ)">; 471 472def TuningSlowPMADDWD : SubtargetFeature<"slow-pmaddwd", "IsPMADDWDSlow", 473 "true", 474 "PMADDWD is slower than PMULLD">; 475 476// FIXME: This should not apply to CPUs that do not have SSE. 477def TuningSlowUAMem16 : SubtargetFeature<"slow-unaligned-mem-16", 478 "IsUnalignedMem16Slow", "true", 479 "Slow unaligned 16-byte memory access">; 480 481def TuningSlowUAMem32 : SubtargetFeature<"slow-unaligned-mem-32", 482 "IsUnalignedMem32Slow", "true", 483 "Slow unaligned 32-byte memory access">; 484 485def TuningLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true", 486 "Use LEA for adjusting the stack pointer (this is an optimization for Intel Atom processors)">; 487 488// True if 8-bit divisions are significantly faster than 489// 32-bit divisions and should be used when possible. 490def TuningSlowDivide32 : SubtargetFeature<"idivl-to-divb", 491 "HasSlowDivide32", "true", 492 "Use 8-bit divide for positive values less than 256">; 493 494// True if 32-bit divides are significantly faster than 495// 64-bit divisions and should be used when possible. 496def TuningSlowDivide64 : SubtargetFeature<"idivq-to-divl", 497 "HasSlowDivide64", "true", 498 "Use 32-bit divide for positive values less than 2^32">; 499 500def TuningPadShortFunctions : SubtargetFeature<"pad-short-functions", 501 "PadShortFunctions", "true", 502 "Pad short functions (to prevent a stall when returning too early)">; 503 504// On some processors, instructions that implicitly take two memory operands are 505// slow. In practice, this means that CALL, PUSH, and POP with memory operands 506// should be avoided in favor of a MOV + register CALL/PUSH/POP. 507def TuningSlowTwoMemOps : SubtargetFeature<"slow-two-mem-ops", 508 "SlowTwoMemOps", "true", 509 "Two memory operand instructions are slow">; 510 511// True if the LEA instruction inputs have to be ready at address generation 512// (AG) time. 513def TuningLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LeaUsesAG", "true", 514 "LEA instruction needs inputs at AG stage">; 515 516def TuningSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true", 517 "LEA instruction with certain arguments is slow">; 518 519// True if the LEA instruction has all three source operands: base, index, 520// and offset or if the LEA instruction uses base and index registers where 521// the base is EBP, RBP,or R13 522def TuningSlow3OpsLEA : SubtargetFeature<"slow-3ops-lea", "Slow3OpsLEA", "true", 523 "LEA instruction with 3 ops or certain registers is slow">; 524 525// True if INC and DEC instructions are slow when writing to flags 526def TuningSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true", 527 "INC and DEC instructions are slower than ADD and SUB">; 528 529def TuningPOPCNTFalseDeps : SubtargetFeature<"false-deps-popcnt", 530 "HasPOPCNTFalseDeps", "true", 531 "POPCNT has a false dependency on dest register">; 532 533def TuningLZCNTFalseDeps : SubtargetFeature<"false-deps-lzcnt-tzcnt", 534 "HasLZCNTFalseDeps", "true", 535 "LZCNT/TZCNT have a false dependency on dest register">; 536 537def TuningMULCFalseDeps : SubtargetFeature<"false-deps-mulc", 538 "HasMULCFalseDeps", "true", 539 "VF[C]MULCPH/SH has a false dependency on dest register">; 540 541def TuningPERMFalseDeps : SubtargetFeature<"false-deps-perm", 542 "HasPERMFalseDeps", "true", 543 "VPERMD/Q/PS/PD has a false dependency on dest register">; 544 545def TuningRANGEFalseDeps : SubtargetFeature<"false-deps-range", 546 "HasRANGEFalseDeps", "true", 547 "VRANGEPD/PS/SD/SS has a false dependency on dest register">; 548 549def TuningGETMANTFalseDeps : SubtargetFeature<"false-deps-getmant", 550 "HasGETMANTFalseDeps", "true", 551 "VGETMANTSS/SD/SH and VGETMANDPS/PD(memory version) has a" 552 " false dependency on dest register">; 553 554def TuningMULLQFalseDeps : SubtargetFeature<"false-deps-mullq", 555 "HasMULLQFalseDeps", "true", 556 "VPMULLQ has a false dependency on dest register">; 557 558def TuningSBBDepBreaking : SubtargetFeature<"sbb-dep-breaking", 559 "HasSBBDepBreaking", "true", 560 "SBB with same register has no source dependency">; 561 562// On recent X86 (port bound) processors, its preferable to combine to a single shuffle 563// using a variable mask over multiple fixed shuffles. 564def TuningFastVariableCrossLaneShuffle 565 : SubtargetFeature<"fast-variable-crosslane-shuffle", 566 "HasFastVariableCrossLaneShuffle", 567 "true", "Cross-lane shuffles with variable masks are fast">; 568def TuningFastVariablePerLaneShuffle 569 : SubtargetFeature<"fast-variable-perlane-shuffle", 570 "HasFastVariablePerLaneShuffle", 571 "true", "Per-lane shuffles with variable masks are fast">; 572 573// Goldmont / Tremont (atom in general) has no bypass delay 574def TuningNoDomainDelay : SubtargetFeature<"no-bypass-delay", 575 "NoDomainDelay","true", 576 "Has no bypass delay when using the 'wrong' domain">; 577 578// Many processors (Nehalem+ on Intel) have no bypass delay when 579// using the wrong mov type. 580def TuningNoDomainDelayMov : SubtargetFeature<"no-bypass-delay-mov", 581 "NoDomainDelayMov","true", 582 "Has no bypass delay when using the 'wrong' mov type">; 583 584// Newer processors (Skylake+ on Intel) have no bypass delay when 585// using the wrong blend type. 586def TuningNoDomainDelayBlend : SubtargetFeature<"no-bypass-delay-blend", 587 "NoDomainDelayBlend","true", 588 "Has no bypass delay when using the 'wrong' blend type">; 589 590// Newer processors (Haswell+ on Intel) have no bypass delay when 591// using the wrong shuffle type. 592def TuningNoDomainDelayShuffle : SubtargetFeature<"no-bypass-delay-shuffle", 593 "NoDomainDelayShuffle","true", 594 "Has no bypass delay when using the 'wrong' shuffle type">; 595 596// Prefer lowering shuffles on AVX512 targets (e.g. Skylake Server) to 597// imm shifts/rotate if they can use more ports than regular shuffles. 598def TuningPreferShiftShuffle : SubtargetFeature<"faster-shift-than-shuffle", 599 "PreferLowerShuffleAsShift", "true", 600 "Shifts are faster (or as fast) as shuffle">; 601 602def TuningFastImmVectorShift : SubtargetFeature<"tuning-fast-imm-vector-shift", 603 "FastImmVectorShift", "true", 604 "Vector shifts are fast (2/cycle) as opposed to slow (1/cycle)">; 605 606// On some X86 processors, a vzeroupper instruction should be inserted after 607// using ymm/zmm registers before executing code that may use SSE instructions. 608def TuningInsertVZEROUPPER 609 : SubtargetFeature<"vzeroupper", 610 "InsertVZEROUPPER", 611 "true", "Should insert vzeroupper instructions">; 612 613// TuningFastScalarFSQRT should be enabled if scalar FSQRT has shorter latency 614// than the corresponding NR code. TuningFastVectorFSQRT should be enabled if 615// vector FSQRT has higher throughput than the corresponding NR code. 616// The idea is that throughput bound code is likely to be vectorized, so for 617// vectorized code we should care about the throughput of SQRT operations. 618// But if the code is scalar that probably means that the code has some kind of 619// dependency and we should care more about reducing the latency. 620 621// True if hardware SQRTSS instruction is at least as fast (latency) as 622// RSQRTSS followed by a Newton-Raphson iteration. 623def TuningFastScalarFSQRT 624 : SubtargetFeature<"fast-scalar-fsqrt", "HasFastScalarFSQRT", 625 "true", "Scalar SQRT is fast (disable Newton-Raphson)">; 626// True if hardware SQRTPS/VSQRTPS instructions are at least as fast 627// (throughput) as RSQRTPS/VRSQRTPS followed by a Newton-Raphson iteration. 628def TuningFastVectorFSQRT 629 : SubtargetFeature<"fast-vector-fsqrt", "HasFastVectorFSQRT", 630 "true", "Vector SQRT is fast (disable Newton-Raphson)">; 631 632// If lzcnt has equivalent latency/throughput to most simple integer ops, it can 633// be used to replace test/set sequences. 634def TuningFastLZCNT 635 : SubtargetFeature< 636 "fast-lzcnt", "HasFastLZCNT", "true", 637 "LZCNT instructions are as fast as most simple integer ops">; 638 639// If the target can efficiently decode NOPs upto 7-bytes in length. 640def TuningFast7ByteNOP 641 : SubtargetFeature< 642 "fast-7bytenop", "HasFast7ByteNOP", "true", 643 "Target can quickly decode up to 7 byte NOPs">; 644 645// If the target can efficiently decode NOPs upto 11-bytes in length. 646def TuningFast11ByteNOP 647 : SubtargetFeature< 648 "fast-11bytenop", "HasFast11ByteNOP", "true", 649 "Target can quickly decode up to 11 byte NOPs">; 650 651// If the target can efficiently decode NOPs upto 15-bytes in length. 652def TuningFast15ByteNOP 653 : SubtargetFeature< 654 "fast-15bytenop", "HasFast15ByteNOP", "true", 655 "Target can quickly decode up to 15 byte NOPs">; 656 657// Sandy Bridge and newer processors can use SHLD with the same source on both 658// inputs to implement rotate to avoid the partial flag update of the normal 659// rotate instructions. 660def TuningFastSHLDRotate 661 : SubtargetFeature< 662 "fast-shld-rotate", "HasFastSHLDRotate", "true", 663 "SHLD can be used as a faster rotate">; 664 665// Bulldozer and newer processors can merge CMP/TEST (but not other 666// instructions) with conditional branches. 667def TuningBranchFusion 668 : SubtargetFeature<"branchfusion", "HasBranchFusion", "true", 669 "CMP/TEST can be fused with conditional branches">; 670 671// Sandy Bridge and newer processors have many instructions that can be 672// fused with conditional branches and pass through the CPU as a single 673// operation. 674def TuningMacroFusion 675 : SubtargetFeature<"macrofusion", "HasMacroFusion", "true", 676 "Various instructions can be fused with conditional branches">; 677 678// Gather is available since Haswell (AVX2 set). So technically, we can 679// generate Gathers on all AVX2 processors. But the overhead on HSW is high. 680// Skylake Client processor has faster Gathers than HSW and performance is 681// similar to Skylake Server (AVX-512). 682def TuningFastGather 683 : SubtargetFeature<"fast-gather", "HasFastGather", "true", 684 "Indicates if gather is reasonably fast (this is true for Skylake client and all AVX-512 CPUs)">; 685 686def TuningPreferNoGather 687 : SubtargetFeature<"prefer-no-gather", "PreferGather", "false", 688 "Prefer no gather instructions">; 689def TuningPreferNoScatter 690 : SubtargetFeature<"prefer-no-scatter", "PreferScatter", "false", 691 "Prefer no scatter instructions">; 692 693def TuningPrefer128Bit 694 : SubtargetFeature<"prefer-128-bit", "Prefer128Bit", "true", 695 "Prefer 128-bit AVX instructions">; 696 697def TuningPrefer256Bit 698 : SubtargetFeature<"prefer-256-bit", "Prefer256Bit", "true", 699 "Prefer 256-bit AVX instructions">; 700 701def TuningAllowLight256Bit 702 : SubtargetFeature<"allow-light-256-bit", "AllowLight256Bit", "true", 703 "Enable generation of 256-bit load/stores even if we prefer 128-bit">; 704 705def TuningPreferMaskRegisters 706 : SubtargetFeature<"prefer-mask-registers", "PreferMaskRegisters", "true", 707 "Prefer AVX512 mask registers over PTEST/MOVMSK">; 708 709def TuningFastBEXTR : SubtargetFeature<"fast-bextr", "HasFastBEXTR", "true", 710 "Indicates that the BEXTR instruction is implemented as a single uop " 711 "with good throughput">; 712 713// Combine vector math operations with shuffles into horizontal math 714// instructions if a CPU implements horizontal operations (introduced with 715// SSE3) with better latency/throughput than the alternative sequence. 716def TuningFastHorizontalOps 717 : SubtargetFeature< 718 "fast-hops", "HasFastHorizontalOps", "true", 719 "Prefer horizontal vector math instructions (haddp, phsub, etc.) over " 720 "normal vector instructions with shuffles">; 721 722def TuningFastScalarShiftMasks 723 : SubtargetFeature< 724 "fast-scalar-shift-masks", "HasFastScalarShiftMasks", "true", 725 "Prefer a left/right scalar logical shift pair over a shift+and pair">; 726 727def TuningFastVectorShiftMasks 728 : SubtargetFeature< 729 "fast-vector-shift-masks", "HasFastVectorShiftMasks", "true", 730 "Prefer a left/right vector logical shift pair over a shift+and pair">; 731 732def TuningFastMOVBE 733 : SubtargetFeature<"fast-movbe", "HasFastMOVBE", "true", 734 "Prefer a movbe over a single-use load + bswap / single-use bswap + store">; 735 736def TuningUseSLMArithCosts 737 : SubtargetFeature<"use-slm-arith-costs", "UseSLMArithCosts", "true", 738 "Use Silvermont specific arithmetic costs">; 739 740def TuningUseGLMDivSqrtCosts 741 : SubtargetFeature<"use-glm-div-sqrt-costs", "UseGLMDivSqrtCosts", "true", 742 "Use Goldmont specific floating point div/sqrt costs">; 743 744//===----------------------------------------------------------------------===// 745// X86 CPU Families 746// TODO: Remove these - use general tuning features to determine codegen. 747//===----------------------------------------------------------------------===// 748 749// Bonnell 750def ProcIntelAtom : SubtargetFeature<"", "IsAtom", "true", "Is Intel Atom processor">; 751 752//===----------------------------------------------------------------------===// 753// Register File Description 754//===----------------------------------------------------------------------===// 755 756include "X86RegisterInfo.td" 757include "X86RegisterBanks.td" 758 759//===----------------------------------------------------------------------===// 760// Instruction Descriptions 761//===----------------------------------------------------------------------===// 762 763include "X86Schedule.td" 764include "X86InstrInfo.td" 765include "X86SchedPredicates.td" 766 767def X86InstrInfo : InstrInfo; 768 769//===----------------------------------------------------------------------===// 770// X86 Scheduler Models 771//===----------------------------------------------------------------------===// 772 773include "X86ScheduleAtom.td" 774include "X86SchedSandyBridge.td" 775include "X86SchedHaswell.td" 776include "X86SchedBroadwell.td" 777include "X86ScheduleSLM.td" 778include "X86ScheduleZnver1.td" 779include "X86ScheduleZnver2.td" 780include "X86ScheduleZnver3.td" 781include "X86ScheduleZnver4.td" 782include "X86ScheduleBdVer2.td" 783include "X86ScheduleBtVer2.td" 784include "X86SchedSkylakeClient.td" 785include "X86SchedSkylakeServer.td" 786include "X86SchedIceLake.td" 787include "X86SchedAlderlakeP.td" 788include "X86SchedSapphireRapids.td" 789 790//===----------------------------------------------------------------------===// 791// X86 Processor Feature Lists 792//===----------------------------------------------------------------------===// 793 794def ProcessorFeatures { 795 // x86-64 micro-architecture levels: x86-64 and x86-64-v[234] 796 list<SubtargetFeature> X86_64V1Features = [ 797 FeatureX87, FeatureCX8, FeatureCMOV, FeatureMMX, FeatureSSE2, 798 FeatureFXSR, FeatureNOPL, FeatureX86_64, 799 ]; 800 list<SubtargetFeature> X86_64V1Tuning = [ 801 TuningMacroFusion, 802 TuningSlow3OpsLEA, 803 TuningSlowDivide64, 804 TuningSlowIncDec, 805 TuningInsertVZEROUPPER 806 ]; 807 808 list<SubtargetFeature> X86_64V2Features = !listconcat(X86_64V1Features, [ 809 FeatureCX16, FeatureLAHFSAHF64, FeatureCRC32, FeaturePOPCNT, 810 FeatureSSE42 811 ]); 812 list<SubtargetFeature> X86_64V2Tuning = [ 813 TuningMacroFusion, 814 TuningSlow3OpsLEA, 815 TuningSlowDivide64, 816 TuningSlowUAMem32, 817 TuningFastScalarFSQRT, 818 TuningFastSHLDRotate, 819 TuningFast15ByteNOP, 820 TuningPOPCNTFalseDeps, 821 TuningInsertVZEROUPPER 822 ]; 823 824 list<SubtargetFeature> X86_64V3Features = !listconcat(X86_64V2Features, [ 825 FeatureAVX2, FeatureBMI, FeatureBMI2, FeatureF16C, FeatureFMA, FeatureLZCNT, 826 FeatureMOVBE, FeatureXSAVE 827 ]); 828 list<SubtargetFeature> X86_64V3Tuning = [ 829 TuningMacroFusion, 830 TuningSlow3OpsLEA, 831 TuningSlowDivide64, 832 TuningFastScalarFSQRT, 833 TuningFastSHLDRotate, 834 TuningFast15ByteNOP, 835 TuningFastVariableCrossLaneShuffle, 836 TuningFastVariablePerLaneShuffle, 837 TuningPOPCNTFalseDeps, 838 TuningLZCNTFalseDeps, 839 TuningInsertVZEROUPPER, 840 TuningAllowLight256Bit 841 ]; 842 843 list<SubtargetFeature> X86_64V4Features = !listconcat(X86_64V3Features, [ 844 FeatureEVEX512, 845 FeatureBWI, 846 FeatureCDI, 847 FeatureDQI, 848 FeatureVLX, 849 ]); 850 list<SubtargetFeature> X86_64V4Tuning = [ 851 TuningMacroFusion, 852 TuningSlow3OpsLEA, 853 TuningSlowDivide64, 854 TuningFastScalarFSQRT, 855 TuningFastVectorFSQRT, 856 TuningFastSHLDRotate, 857 TuningFast15ByteNOP, 858 TuningFastVariableCrossLaneShuffle, 859 TuningFastVariablePerLaneShuffle, 860 TuningPrefer256Bit, 861 TuningFastGather, 862 TuningPOPCNTFalseDeps, 863 TuningInsertVZEROUPPER, 864 TuningAllowLight256Bit 865 ]; 866 867 // Nehalem 868 list<SubtargetFeature> NHMFeatures = X86_64V2Features; 869 list<SubtargetFeature> NHMTuning = [TuningMacroFusion, 870 TuningInsertVZEROUPPER, 871 TuningNoDomainDelayMov]; 872 873 // Westmere 874 list<SubtargetFeature> WSMAdditionalFeatures = [FeaturePCLMUL]; 875 list<SubtargetFeature> WSMTuning = NHMTuning; 876 list<SubtargetFeature> WSMFeatures = 877 !listconcat(NHMFeatures, WSMAdditionalFeatures); 878 879 // Sandybridge 880 list<SubtargetFeature> SNBAdditionalFeatures = [FeatureAVX, 881 FeatureXSAVE, 882 FeatureXSAVEOPT]; 883 list<SubtargetFeature> SNBTuning = [TuningMacroFusion, 884 TuningSlow3OpsLEA, 885 TuningSlowDivide64, 886 TuningSlowUAMem32, 887 TuningFastScalarFSQRT, 888 TuningFastSHLDRotate, 889 TuningFast15ByteNOP, 890 TuningPOPCNTFalseDeps, 891 TuningInsertVZEROUPPER, 892 TuningNoDomainDelayMov]; 893 list<SubtargetFeature> SNBFeatures = 894 !listconcat(WSMFeatures, SNBAdditionalFeatures); 895 896 // Ivybridge 897 list<SubtargetFeature> IVBAdditionalFeatures = [FeatureRDRAND, 898 FeatureF16C, 899 FeatureFSGSBase]; 900 list<SubtargetFeature> IVBTuning = SNBTuning; 901 list<SubtargetFeature> IVBFeatures = 902 !listconcat(SNBFeatures, IVBAdditionalFeatures); 903 904 // Haswell 905 list<SubtargetFeature> HSWAdditionalFeatures = [FeatureAVX2, 906 FeatureBMI, 907 FeatureBMI2, 908 FeatureERMSB, 909 FeatureFMA, 910 FeatureINVPCID, 911 FeatureLZCNT, 912 FeatureMOVBE]; 913 list<SubtargetFeature> HSWTuning = [TuningMacroFusion, 914 TuningSlow3OpsLEA, 915 TuningSlowDivide64, 916 TuningFastScalarFSQRT, 917 TuningFastSHLDRotate, 918 TuningFast15ByteNOP, 919 TuningFastVariableCrossLaneShuffle, 920 TuningFastVariablePerLaneShuffle, 921 TuningPOPCNTFalseDeps, 922 TuningLZCNTFalseDeps, 923 TuningInsertVZEROUPPER, 924 TuningAllowLight256Bit, 925 TuningNoDomainDelayMov, 926 TuningNoDomainDelayShuffle]; 927 list<SubtargetFeature> HSWFeatures = 928 !listconcat(IVBFeatures, HSWAdditionalFeatures); 929 930 // Broadwell 931 list<SubtargetFeature> BDWAdditionalFeatures = [FeatureADX, 932 FeatureRDSEED, 933 FeaturePRFCHW]; 934 list<SubtargetFeature> BDWTuning = HSWTuning; 935 list<SubtargetFeature> BDWFeatures = 936 !listconcat(HSWFeatures, BDWAdditionalFeatures); 937 938 // Skylake 939 list<SubtargetFeature> SKLAdditionalFeatures = [FeatureAES, 940 FeatureXSAVEC, 941 FeatureXSAVES, 942 FeatureCLFLUSHOPT]; 943 list<SubtargetFeature> SKLTuning = [TuningFastGather, 944 TuningMacroFusion, 945 TuningSlow3OpsLEA, 946 TuningSlowDivide64, 947 TuningFastScalarFSQRT, 948 TuningFastVectorFSQRT, 949 TuningFastSHLDRotate, 950 TuningFast15ByteNOP, 951 TuningFastVariableCrossLaneShuffle, 952 TuningFastVariablePerLaneShuffle, 953 TuningPOPCNTFalseDeps, 954 TuningInsertVZEROUPPER, 955 TuningAllowLight256Bit, 956 TuningNoDomainDelayMov, 957 TuningNoDomainDelayShuffle, 958 TuningNoDomainDelayBlend]; 959 list<SubtargetFeature> SKLFeatures = 960 !listconcat(BDWFeatures, SKLAdditionalFeatures); 961 962 // Skylake-AVX512 963 list<SubtargetFeature> SKXAdditionalFeatures = [FeatureAES, 964 FeatureXSAVEC, 965 FeatureXSAVES, 966 FeatureCLFLUSHOPT, 967 FeatureAVX512, 968 FeatureEVEX512, 969 FeatureCDI, 970 FeatureDQI, 971 FeatureBWI, 972 FeatureVLX, 973 FeaturePKU, 974 FeatureCLWB]; 975 list<SubtargetFeature> SKXTuning = [TuningFastGather, 976 TuningMacroFusion, 977 TuningSlow3OpsLEA, 978 TuningSlowDivide64, 979 TuningFastScalarFSQRT, 980 TuningFastVectorFSQRT, 981 TuningFastSHLDRotate, 982 TuningFast15ByteNOP, 983 TuningFastVariableCrossLaneShuffle, 984 TuningFastVariablePerLaneShuffle, 985 TuningPrefer256Bit, 986 TuningPOPCNTFalseDeps, 987 TuningInsertVZEROUPPER, 988 TuningAllowLight256Bit, 989 TuningPreferShiftShuffle, 990 TuningNoDomainDelayMov, 991 TuningNoDomainDelayShuffle, 992 TuningNoDomainDelayBlend, 993 TuningFastImmVectorShift]; 994 list<SubtargetFeature> SKXFeatures = 995 !listconcat(BDWFeatures, SKXAdditionalFeatures); 996 997 // Cascadelake 998 list<SubtargetFeature> CLXAdditionalFeatures = [FeatureVNNI]; 999 list<SubtargetFeature> CLXTuning = SKXTuning; 1000 list<SubtargetFeature> CLXFeatures = 1001 !listconcat(SKXFeatures, CLXAdditionalFeatures); 1002 1003 // Cooperlake 1004 list<SubtargetFeature> CPXAdditionalFeatures = [FeatureBF16]; 1005 list<SubtargetFeature> CPXTuning = SKXTuning; 1006 list<SubtargetFeature> CPXFeatures = 1007 !listconcat(CLXFeatures, CPXAdditionalFeatures); 1008 1009 // Cannonlake 1010 list<SubtargetFeature> CNLAdditionalFeatures = [FeatureAVX512, 1011 FeatureEVEX512, 1012 FeatureCDI, 1013 FeatureDQI, 1014 FeatureBWI, 1015 FeatureVLX, 1016 FeaturePKU, 1017 FeatureVBMI, 1018 FeatureIFMA, 1019 FeatureSHA]; 1020 list<SubtargetFeature> CNLTuning = [TuningFastGather, 1021 TuningMacroFusion, 1022 TuningSlow3OpsLEA, 1023 TuningSlowDivide64, 1024 TuningFastScalarFSQRT, 1025 TuningFastVectorFSQRT, 1026 TuningFastSHLDRotate, 1027 TuningFast15ByteNOP, 1028 TuningFastVariableCrossLaneShuffle, 1029 TuningFastVariablePerLaneShuffle, 1030 TuningPrefer256Bit, 1031 TuningInsertVZEROUPPER, 1032 TuningAllowLight256Bit, 1033 TuningNoDomainDelayMov, 1034 TuningNoDomainDelayShuffle, 1035 TuningNoDomainDelayBlend, 1036 TuningFastImmVectorShift]; 1037 list<SubtargetFeature> CNLFeatures = 1038 !listconcat(SKLFeatures, CNLAdditionalFeatures); 1039 1040 // Icelake 1041 list<SubtargetFeature> ICLAdditionalFeatures = [FeatureBITALG, 1042 FeatureVAES, 1043 FeatureVBMI2, 1044 FeatureVNNI, 1045 FeatureVPCLMULQDQ, 1046 FeatureVPOPCNTDQ, 1047 FeatureGFNI, 1048 FeatureRDPID, 1049 FeatureFSRM]; 1050 list<SubtargetFeature> ICLTuning = [TuningFastGather, 1051 TuningMacroFusion, 1052 TuningSlowDivide64, 1053 TuningFastScalarFSQRT, 1054 TuningFastVectorFSQRT, 1055 TuningFastSHLDRotate, 1056 TuningFast15ByteNOP, 1057 TuningFastVariableCrossLaneShuffle, 1058 TuningFastVariablePerLaneShuffle, 1059 TuningPrefer256Bit, 1060 TuningInsertVZEROUPPER, 1061 TuningAllowLight256Bit, 1062 TuningNoDomainDelayMov, 1063 TuningNoDomainDelayShuffle, 1064 TuningNoDomainDelayBlend, 1065 TuningFastImmVectorShift]; 1066 list<SubtargetFeature> ICLFeatures = 1067 !listconcat(CNLFeatures, ICLAdditionalFeatures); 1068 1069 // Icelake Server 1070 list<SubtargetFeature> ICXAdditionalFeatures = [FeaturePCONFIG, 1071 FeatureCLWB, 1072 FeatureWBNOINVD]; 1073 list<SubtargetFeature> ICXTuning = ICLTuning; 1074 list<SubtargetFeature> ICXFeatures = 1075 !listconcat(ICLFeatures, ICXAdditionalFeatures); 1076 1077 // Tigerlake 1078 list<SubtargetFeature> TGLAdditionalFeatures = [FeatureVP2INTERSECT, 1079 FeatureCLWB, 1080 FeatureMOVDIRI, 1081 FeatureMOVDIR64B, 1082 FeatureSHSTK]; 1083 list<SubtargetFeature> TGLTuning = ICLTuning; 1084 list<SubtargetFeature> TGLFeatures = 1085 !listconcat(ICLFeatures, TGLAdditionalFeatures ); 1086 1087 // Sapphirerapids 1088 list<SubtargetFeature> SPRAdditionalFeatures = [FeatureAMXTILE, 1089 FeatureAMXINT8, 1090 FeatureAMXBF16, 1091 FeatureBF16, 1092 FeatureSERIALIZE, 1093 FeatureCLDEMOTE, 1094 FeatureWAITPKG, 1095 FeaturePTWRITE, 1096 FeatureFP16, 1097 FeatureAVXVNNI, 1098 FeatureTSXLDTRK, 1099 FeatureENQCMD, 1100 FeatureSHSTK, 1101 FeatureMOVDIRI, 1102 FeatureMOVDIR64B, 1103 FeatureUINTR]; 1104 list<SubtargetFeature> SPRAdditionalTuning = [TuningMULCFalseDeps, 1105 TuningPERMFalseDeps, 1106 TuningRANGEFalseDeps, 1107 TuningGETMANTFalseDeps, 1108 TuningMULLQFalseDeps]; 1109 list<SubtargetFeature> SPRTuning = !listconcat(ICXTuning, SPRAdditionalTuning); 1110 list<SubtargetFeature> SPRFeatures = 1111 !listconcat(ICXFeatures, SPRAdditionalFeatures); 1112 1113 // Graniterapids 1114 list<SubtargetFeature> GNRAdditionalFeatures = [FeatureAMXFP16, 1115 FeaturePREFETCHI]; 1116 list<SubtargetFeature> GNRFeatures = 1117 !listconcat(SPRFeatures, GNRAdditionalFeatures); 1118 1119 // Graniterapids D 1120 list<SubtargetFeature> GNRDAdditionalFeatures = [FeatureAMXCOMPLEX]; 1121 list<SubtargetFeature> GNRDFeatures = 1122 !listconcat(GNRFeatures, GNRDAdditionalFeatures); 1123 1124 // Atom 1125 list<SubtargetFeature> AtomFeatures = [FeatureX87, 1126 FeatureCX8, 1127 FeatureCMOV, 1128 FeatureMMX, 1129 FeatureSSSE3, 1130 FeatureFXSR, 1131 FeatureNOPL, 1132 FeatureX86_64, 1133 FeatureCX16, 1134 FeatureMOVBE, 1135 FeatureLAHFSAHF64]; 1136 list<SubtargetFeature> AtomTuning = [ProcIntelAtom, 1137 TuningSlowUAMem16, 1138 TuningLEAForSP, 1139 TuningSlowDivide32, 1140 TuningSlowDivide64, 1141 TuningSlowTwoMemOps, 1142 TuningLEAUsesAG, 1143 TuningPadShortFunctions, 1144 TuningInsertVZEROUPPER, 1145 TuningNoDomainDelay]; 1146 1147 // Silvermont 1148 list<SubtargetFeature> SLMAdditionalFeatures = [FeatureSSE42, 1149 FeatureCRC32, 1150 FeaturePOPCNT, 1151 FeaturePCLMUL, 1152 FeaturePRFCHW, 1153 FeatureRDRAND]; 1154 list<SubtargetFeature> SLMTuning = [TuningUseSLMArithCosts, 1155 TuningSlowTwoMemOps, 1156 TuningSlowLEA, 1157 TuningSlowIncDec, 1158 TuningSlowDivide64, 1159 TuningSlowPMULLD, 1160 TuningFast7ByteNOP, 1161 TuningFastMOVBE, 1162 TuningPOPCNTFalseDeps, 1163 TuningInsertVZEROUPPER, 1164 TuningNoDomainDelay]; 1165 list<SubtargetFeature> SLMFeatures = 1166 !listconcat(AtomFeatures, SLMAdditionalFeatures); 1167 1168 // Goldmont 1169 list<SubtargetFeature> GLMAdditionalFeatures = [FeatureAES, 1170 FeatureSHA, 1171 FeatureRDSEED, 1172 FeatureXSAVE, 1173 FeatureXSAVEOPT, 1174 FeatureXSAVEC, 1175 FeatureXSAVES, 1176 FeatureCLFLUSHOPT, 1177 FeatureFSGSBase]; 1178 list<SubtargetFeature> GLMTuning = [TuningUseGLMDivSqrtCosts, 1179 TuningSlowTwoMemOps, 1180 TuningSlowLEA, 1181 TuningSlowIncDec, 1182 TuningFastMOVBE, 1183 TuningPOPCNTFalseDeps, 1184 TuningInsertVZEROUPPER, 1185 TuningNoDomainDelay]; 1186 list<SubtargetFeature> GLMFeatures = 1187 !listconcat(SLMFeatures, GLMAdditionalFeatures); 1188 1189 // Goldmont Plus 1190 list<SubtargetFeature> GLPAdditionalFeatures = [FeaturePTWRITE, 1191 FeatureRDPID]; 1192 list<SubtargetFeature> GLPTuning = [TuningUseGLMDivSqrtCosts, 1193 TuningSlowTwoMemOps, 1194 TuningSlowLEA, 1195 TuningSlowIncDec, 1196 TuningFastMOVBE, 1197 TuningInsertVZEROUPPER, 1198 TuningNoDomainDelay]; 1199 list<SubtargetFeature> GLPFeatures = 1200 !listconcat(GLMFeatures, GLPAdditionalFeatures); 1201 1202 // Tremont 1203 list<SubtargetFeature> TRMAdditionalFeatures = [FeatureCLWB, 1204 FeatureGFNI]; 1205 list<SubtargetFeature> TRMTuning = GLPTuning; 1206 list<SubtargetFeature> TRMFeatures = 1207 !listconcat(GLPFeatures, TRMAdditionalFeatures); 1208 1209 // Alderlake 1210 list<SubtargetFeature> ADLAdditionalFeatures = [FeatureSERIALIZE, 1211 FeaturePCONFIG, 1212 FeatureSHSTK, 1213 FeatureWIDEKL, 1214 FeatureINVPCID, 1215 FeatureADX, 1216 FeatureFMA, 1217 FeatureVAES, 1218 FeatureVPCLMULQDQ, 1219 FeatureF16C, 1220 FeatureBMI, 1221 FeatureBMI2, 1222 FeatureLZCNT, 1223 FeatureAVXVNNI, 1224 FeaturePKU, 1225 FeatureHRESET, 1226 FeatureCLDEMOTE, 1227 FeatureMOVDIRI, 1228 FeatureMOVDIR64B, 1229 FeatureWAITPKG]; 1230 list<SubtargetFeature> ADLAdditionalTuning = [TuningPERMFalseDeps, 1231 TuningPreferMovmskOverVTest, 1232 TuningFastImmVectorShift]; 1233 list<SubtargetFeature> ADLTuning = !listconcat(SKLTuning, ADLAdditionalTuning); 1234 list<SubtargetFeature> ADLFeatures = 1235 !listconcat(TRMFeatures, ADLAdditionalFeatures); 1236 1237 // Gracemont 1238 list<SubtargetFeature> GRTTuning = [TuningMacroFusion, 1239 TuningSlow3OpsLEA, 1240 TuningSlowDivide32, 1241 TuningSlowDivide64, 1242 TuningFastScalarFSQRT, 1243 TuningFastVectorFSQRT, 1244 TuningFast15ByteNOP, 1245 TuningFastVariablePerLaneShuffle, 1246 TuningPOPCNTFalseDeps, 1247 TuningInsertVZEROUPPER]; 1248 1249 // Sierraforest 1250 list<SubtargetFeature> SRFAdditionalFeatures = [FeatureCMPCCXADD, 1251 FeatureAVXIFMA, 1252 FeatureAVXNECONVERT, 1253 FeatureENQCMD, 1254 FeatureUINTR, 1255 FeatureAVXVNNIINT8]; 1256 list<SubtargetFeature> SRFFeatures = 1257 !listconcat(ADLFeatures, SRFAdditionalFeatures); 1258 1259 // Arrowlake S 1260 list<SubtargetFeature> ARLSAdditionalFeatures = [FeatureAVXVNNIINT16, 1261 FeatureSHA512, 1262 FeatureSM3, 1263 FeatureSM4]; 1264 list<SubtargetFeature> ARLSFeatures = 1265 !listconcat(SRFFeatures, ARLSAdditionalFeatures); 1266 1267 // Pantherlake 1268 list<SubtargetFeature> PTLAdditionalFeatures = [FeaturePREFETCHI]; 1269 list<SubtargetFeature> PTLFeatures = 1270 !listconcat(ARLSFeatures, PTLAdditionalFeatures); 1271 1272 1273 // Clearwaterforest 1274 list<SubtargetFeature> CWFAdditionalFeatures = [FeaturePREFETCHI, 1275 FeatureUSERMSR]; 1276 list<SubtargetFeature> CWFFeatures = 1277 !listconcat(ARLSFeatures, CWFAdditionalFeatures); 1278 1279 // Knights Landing 1280 list<SubtargetFeature> KNLFeatures = [FeatureX87, 1281 FeatureCX8, 1282 FeatureCMOV, 1283 FeatureMMX, 1284 FeatureFXSR, 1285 FeatureNOPL, 1286 FeatureX86_64, 1287 FeatureCX16, 1288 FeatureCRC32, 1289 FeaturePOPCNT, 1290 FeaturePCLMUL, 1291 FeatureXSAVE, 1292 FeatureXSAVEOPT, 1293 FeatureLAHFSAHF64, 1294 FeatureAES, 1295 FeatureRDRAND, 1296 FeatureF16C, 1297 FeatureFSGSBase, 1298 FeatureAVX512, 1299 FeatureEVEX512, 1300 FeatureERI, 1301 FeatureCDI, 1302 FeaturePFI, 1303 FeaturePREFETCHWT1, 1304 FeatureADX, 1305 FeatureRDSEED, 1306 FeatureMOVBE, 1307 FeatureLZCNT, 1308 FeatureBMI, 1309 FeatureBMI2, 1310 FeatureFMA, 1311 FeaturePRFCHW]; 1312 list<SubtargetFeature> KNLTuning = [TuningSlowDivide64, 1313 TuningSlow3OpsLEA, 1314 TuningSlowIncDec, 1315 TuningSlowTwoMemOps, 1316 TuningPreferMaskRegisters, 1317 TuningFastGather, 1318 TuningFastMOVBE, 1319 TuningSlowPMADDWD]; 1320 // TODO Add AVX5124FMAPS/AVX5124VNNIW features 1321 list<SubtargetFeature> KNMFeatures = 1322 !listconcat(KNLFeatures, [FeatureVPOPCNTDQ]); 1323 1324 // Barcelona 1325 list<SubtargetFeature> BarcelonaFeatures = [FeatureX87, 1326 FeatureCX8, 1327 FeatureSSE4A, 1328 Feature3DNowA, 1329 FeatureFXSR, 1330 FeatureNOPL, 1331 FeatureCX16, 1332 FeaturePRFCHW, 1333 FeatureLZCNT, 1334 FeaturePOPCNT, 1335 FeatureLAHFSAHF64, 1336 FeatureCMOV, 1337 FeatureX86_64]; 1338 list<SubtargetFeature> BarcelonaTuning = [TuningFastScalarShiftMasks, 1339 TuningSlowSHLD, 1340 TuningSBBDepBreaking, 1341 TuningInsertVZEROUPPER]; 1342 1343 // Bobcat 1344 list<SubtargetFeature> BtVer1Features = [FeatureX87, 1345 FeatureCX8, 1346 FeatureCMOV, 1347 FeatureMMX, 1348 FeatureSSSE3, 1349 FeatureSSE4A, 1350 FeatureFXSR, 1351 FeatureNOPL, 1352 FeatureX86_64, 1353 FeatureCX16, 1354 FeaturePRFCHW, 1355 FeatureLZCNT, 1356 FeaturePOPCNT, 1357 FeatureLAHFSAHF64]; 1358 list<SubtargetFeature> BtVer1Tuning = [TuningFast15ByteNOP, 1359 TuningFastScalarShiftMasks, 1360 TuningFastVectorShiftMasks, 1361 TuningSlowSHLD, 1362 TuningSBBDepBreaking, 1363 TuningInsertVZEROUPPER]; 1364 1365 // Jaguar 1366 list<SubtargetFeature> BtVer2AdditionalFeatures = [FeatureAVX, 1367 FeatureAES, 1368 FeatureCRC32, 1369 FeaturePCLMUL, 1370 FeatureBMI, 1371 FeatureF16C, 1372 FeatureMOVBE, 1373 FeatureXSAVE, 1374 FeatureXSAVEOPT]; 1375 list<SubtargetFeature> BtVer2Tuning = [TuningFastLZCNT, 1376 TuningFastBEXTR, 1377 TuningFastHorizontalOps, 1378 TuningFast15ByteNOP, 1379 TuningFastScalarShiftMasks, 1380 TuningFastVectorShiftMasks, 1381 TuningFastMOVBE, 1382 TuningSBBDepBreaking, 1383 TuningSlowSHLD]; 1384 list<SubtargetFeature> BtVer2Features = 1385 !listconcat(BtVer1Features, BtVer2AdditionalFeatures); 1386 1387 // Bulldozer 1388 list<SubtargetFeature> BdVer1Features = [FeatureX87, 1389 FeatureCX8, 1390 FeatureCMOV, 1391 FeatureXOP, 1392 FeatureX86_64, 1393 FeatureCX16, 1394 FeatureAES, 1395 FeatureCRC32, 1396 FeaturePRFCHW, 1397 FeaturePCLMUL, 1398 FeatureMMX, 1399 FeatureFXSR, 1400 FeatureNOPL, 1401 FeatureLZCNT, 1402 FeaturePOPCNT, 1403 FeatureXSAVE, 1404 FeatureLWP, 1405 FeatureLAHFSAHF64]; 1406 list<SubtargetFeature> BdVer1Tuning = [TuningSlowSHLD, 1407 TuningFast11ByteNOP, 1408 TuningFastScalarShiftMasks, 1409 TuningBranchFusion, 1410 TuningSBBDepBreaking, 1411 TuningInsertVZEROUPPER]; 1412 1413 // PileDriver 1414 list<SubtargetFeature> BdVer2AdditionalFeatures = [FeatureF16C, 1415 FeatureBMI, 1416 FeatureTBM, 1417 FeatureFMA]; 1418 list<SubtargetFeature> BdVer2AdditionalTuning = [TuningFastBEXTR, 1419 TuningFastMOVBE]; 1420 list<SubtargetFeature> BdVer2Tuning = 1421 !listconcat(BdVer1Tuning, BdVer2AdditionalTuning); 1422 list<SubtargetFeature> BdVer2Features = 1423 !listconcat(BdVer1Features, BdVer2AdditionalFeatures); 1424 1425 // Steamroller 1426 list<SubtargetFeature> BdVer3AdditionalFeatures = [FeatureXSAVEOPT, 1427 FeatureFSGSBase]; 1428 list<SubtargetFeature> BdVer3Tuning = BdVer2Tuning; 1429 list<SubtargetFeature> BdVer3Features = 1430 !listconcat(BdVer2Features, BdVer3AdditionalFeatures); 1431 1432 // Excavator 1433 list<SubtargetFeature> BdVer4AdditionalFeatures = [FeatureAVX2, 1434 FeatureBMI2, 1435 FeatureMOVBE, 1436 FeatureRDRAND, 1437 FeatureMWAITX]; 1438 list<SubtargetFeature> BdVer4Tuning = BdVer3Tuning; 1439 list<SubtargetFeature> BdVer4Features = 1440 !listconcat(BdVer3Features, BdVer4AdditionalFeatures); 1441 1442 1443 // AMD Zen Processors common ISAs 1444 list<SubtargetFeature> ZNFeatures = [FeatureADX, 1445 FeatureAES, 1446 FeatureAVX2, 1447 FeatureBMI, 1448 FeatureBMI2, 1449 FeatureCLFLUSHOPT, 1450 FeatureCLZERO, 1451 FeatureCMOV, 1452 FeatureX86_64, 1453 FeatureCX16, 1454 FeatureCRC32, 1455 FeatureF16C, 1456 FeatureFMA, 1457 FeatureFSGSBase, 1458 FeatureFXSR, 1459 FeatureNOPL, 1460 FeatureLAHFSAHF64, 1461 FeatureLZCNT, 1462 FeatureMMX, 1463 FeatureMOVBE, 1464 FeatureMWAITX, 1465 FeaturePCLMUL, 1466 FeaturePOPCNT, 1467 FeaturePRFCHW, 1468 FeatureRDRAND, 1469 FeatureRDSEED, 1470 FeatureSHA, 1471 FeatureSSE4A, 1472 FeatureX87, 1473 FeatureXSAVE, 1474 FeatureXSAVEC, 1475 FeatureXSAVEOPT, 1476 FeatureXSAVES]; 1477 list<SubtargetFeature> ZNTuning = [TuningFastLZCNT, 1478 TuningFastBEXTR, 1479 TuningFast15ByteNOP, 1480 TuningBranchFusion, 1481 TuningFastScalarFSQRT, 1482 TuningFastVectorFSQRT, 1483 TuningFastScalarShiftMasks, 1484 TuningFastVariablePerLaneShuffle, 1485 TuningFastMOVBE, 1486 TuningSlowSHLD, 1487 TuningSBBDepBreaking, 1488 TuningInsertVZEROUPPER, 1489 TuningAllowLight256Bit]; 1490 list<SubtargetFeature> ZN2AdditionalFeatures = [FeatureCLWB, 1491 FeatureRDPID, 1492 FeatureRDPRU, 1493 FeatureWBNOINVD]; 1494 list<SubtargetFeature> ZN2Tuning = ZNTuning; 1495 list<SubtargetFeature> ZN2Features = 1496 !listconcat(ZNFeatures, ZN2AdditionalFeatures); 1497 list<SubtargetFeature> ZN3AdditionalFeatures = [FeatureFSRM, 1498 FeatureINVPCID, 1499 FeaturePKU, 1500 FeatureVAES, 1501 FeatureVPCLMULQDQ]; 1502 list<SubtargetFeature> ZN3AdditionalTuning = [TuningMacroFusion]; 1503 list<SubtargetFeature> ZN3Tuning = 1504 !listconcat(ZN2Tuning, ZN3AdditionalTuning); 1505 list<SubtargetFeature> ZN3Features = 1506 !listconcat(ZN2Features, ZN3AdditionalFeatures); 1507 list<SubtargetFeature> ZN4Tuning = ZN3Tuning; 1508 list<SubtargetFeature> ZN4AdditionalFeatures = [FeatureAVX512, 1509 FeatureEVEX512, 1510 FeatureCDI, 1511 FeatureDQI, 1512 FeatureBWI, 1513 FeatureVLX, 1514 FeatureVBMI, 1515 FeatureVBMI2, 1516 FeatureIFMA, 1517 FeatureVNNI, 1518 FeatureBITALG, 1519 FeatureGFNI, 1520 FeatureBF16, 1521 FeatureSHSTK, 1522 FeatureVPOPCNTDQ]; 1523 list<SubtargetFeature> ZN4Features = 1524 !listconcat(ZN3Features, ZN4AdditionalFeatures); 1525} 1526 1527//===----------------------------------------------------------------------===// 1528// X86 processors supported. 1529//===----------------------------------------------------------------------===// 1530 1531class Proc<string Name, list<SubtargetFeature> Features, 1532 list<SubtargetFeature> TuneFeatures> 1533 : ProcessorModel<Name, GenericModel, Features, TuneFeatures>; 1534 1535class ProcModel<string Name, SchedMachineModel Model, 1536 list<SubtargetFeature> Features, 1537 list<SubtargetFeature> TuneFeatures> 1538 : ProcessorModel<Name, Model, Features, TuneFeatures>; 1539 1540// NOTE: CMPXCHG8B is here for legacy compatibility so that it is only disabled 1541// if i386/i486 is specifically requested. 1542// NOTE: 64Bit is here as "generic" is the default llc CPU. The X86Subtarget 1543// constructor checks that any CPU used in 64-bit mode has FeatureX86_64 1544// enabled. It has no effect on code generation. 1545// NOTE: As a default tuning, "generic" aims to produce code optimized for the 1546// most common X86 processors. The tunings might be changed over time. It is 1547// recommended to use "tune-cpu"="x86-64" in function attribute for consistency. 1548def : ProcModel<"generic", SandyBridgeModel, 1549 [FeatureX87, FeatureCX8, FeatureX86_64], 1550 [TuningSlow3OpsLEA, 1551 TuningSlowDivide64, 1552 TuningMacroFusion, 1553 TuningFastScalarFSQRT, 1554 TuningFast15ByteNOP, 1555 TuningInsertVZEROUPPER]>; 1556 1557def : Proc<"i386", [FeatureX87], 1558 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1559def : Proc<"i486", [FeatureX87], 1560 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1561def : Proc<"i586", [FeatureX87, FeatureCX8], 1562 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1563def : Proc<"pentium", [FeatureX87, FeatureCX8], 1564 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1565foreach P = ["pentium-mmx", "pentium_mmx"] in { 1566 def : Proc<P, [FeatureX87, FeatureCX8, FeatureMMX], 1567 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1568} 1569def : Proc<"i686", [FeatureX87, FeatureCX8, FeatureCMOV], 1570 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1571foreach P = ["pentiumpro", "pentium_pro"] in { 1572 def : Proc<P, [FeatureX87, FeatureCX8, FeatureCMOV, FeatureNOPL], 1573 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1574} 1575foreach P = ["pentium2", "pentium_ii"] in { 1576 def : Proc<P, [FeatureX87, FeatureCX8, FeatureMMX, FeatureCMOV, 1577 FeatureFXSR, FeatureNOPL], 1578 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1579} 1580foreach P = ["pentium3", "pentium3m", "pentium_iii_no_xmm_regs", "pentium_iii"] in { 1581 def : Proc<P, [FeatureX87, FeatureCX8, FeatureMMX, 1582 FeatureSSE1, FeatureFXSR, FeatureNOPL, FeatureCMOV], 1583 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1584} 1585 1586// Enable the PostRAScheduler for SSE2 and SSE3 class cpus. 1587// The intent is to enable it for pentium4 which is the current default 1588// processor in a vanilla 32-bit clang compilation when no specific 1589// architecture is specified. This generally gives a nice performance 1590// increase on silvermont, with largely neutral behavior on other 1591// contemporary large core processors. 1592// pentium-m, pentium4m, prescott and nocona are included as a preventative 1593// measure to avoid performance surprises, in case clang's default cpu 1594// changes slightly. 1595 1596foreach P = ["pentium_m", "pentium-m"] in { 1597def : ProcModel<P, GenericPostRAModel, 1598 [FeatureX87, FeatureCX8, FeatureMMX, FeatureSSE2, 1599 FeatureFXSR, FeatureNOPL, FeatureCMOV], 1600 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1601} 1602 1603foreach P = ["pentium4", "pentium4m", "pentium_4"] in { 1604 def : ProcModel<P, GenericPostRAModel, 1605 [FeatureX87, FeatureCX8, FeatureMMX, FeatureSSE2, 1606 FeatureFXSR, FeatureNOPL, FeatureCMOV], 1607 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1608} 1609 1610// Intel Quark. 1611def : Proc<"lakemont", [FeatureCX8], 1612 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1613 1614// Intel Core Duo. 1615def : ProcModel<"yonah", SandyBridgeModel, 1616 [FeatureX87, FeatureCX8, FeatureMMX, FeatureSSE3, 1617 FeatureFXSR, FeatureNOPL, FeatureCMOV], 1618 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1619 1620// NetBurst. 1621foreach P = ["prescott", "pentium_4_sse3"] in { 1622 def : ProcModel<P, GenericPostRAModel, 1623 [FeatureX87, FeatureCX8, FeatureMMX, FeatureSSE3, 1624 FeatureFXSR, FeatureNOPL, FeatureCMOV], 1625 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1626} 1627def : ProcModel<"nocona", GenericPostRAModel, [ 1628 FeatureX87, 1629 FeatureCX8, 1630 FeatureCMOV, 1631 FeatureMMX, 1632 FeatureSSE3, 1633 FeatureFXSR, 1634 FeatureNOPL, 1635 FeatureX86_64, 1636 FeatureCX16, 1637], 1638[ 1639 TuningSlowUAMem16, 1640 TuningInsertVZEROUPPER 1641]>; 1642 1643// Intel Core 2 Solo/Duo. 1644foreach P = ["core2", "core_2_duo_ssse3"] in { 1645def : ProcModel<P, SandyBridgeModel, [ 1646 FeatureX87, 1647 FeatureCX8, 1648 FeatureCMOV, 1649 FeatureMMX, 1650 FeatureSSSE3, 1651 FeatureFXSR, 1652 FeatureNOPL, 1653 FeatureX86_64, 1654 FeatureCX16, 1655 FeatureLAHFSAHF64 1656], 1657[ 1658 TuningMacroFusion, 1659 TuningSlowUAMem16, 1660 TuningInsertVZEROUPPER 1661]>; 1662} 1663foreach P = ["penryn", "core_2_duo_sse4_1"] in { 1664def : ProcModel<P, SandyBridgeModel, [ 1665 FeatureX87, 1666 FeatureCX8, 1667 FeatureCMOV, 1668 FeatureMMX, 1669 FeatureSSE41, 1670 FeatureFXSR, 1671 FeatureNOPL, 1672 FeatureX86_64, 1673 FeatureCX16, 1674 FeatureLAHFSAHF64 1675], 1676[ 1677 TuningMacroFusion, 1678 TuningSlowUAMem16, 1679 TuningInsertVZEROUPPER 1680]>; 1681} 1682 1683// Atom CPUs. 1684foreach P = ["bonnell", "atom"] in { 1685 def : ProcModel<P, AtomModel, ProcessorFeatures.AtomFeatures, 1686 ProcessorFeatures.AtomTuning>; 1687} 1688 1689foreach P = ["silvermont", "slm", "atom_sse4_2"] in { 1690 def : ProcModel<P, SLMModel, ProcessorFeatures.SLMFeatures, 1691 ProcessorFeatures.SLMTuning>; 1692} 1693 1694def : ProcModel<"atom_sse4_2_movbe", SLMModel, ProcessorFeatures.GLMFeatures, 1695 ProcessorFeatures.SLMTuning>; 1696def : ProcModel<"goldmont", SLMModel, ProcessorFeatures.GLMFeatures, 1697 ProcessorFeatures.GLMTuning>; 1698foreach P = ["goldmont_plus", "goldmont-plus"] in { 1699 def : ProcModel<P, SLMModel, ProcessorFeatures.GLPFeatures, 1700 ProcessorFeatures.GLPTuning>; 1701} 1702def : ProcModel<"tremont", SLMModel, ProcessorFeatures.TRMFeatures, 1703 ProcessorFeatures.TRMTuning>; 1704foreach P = ["sierraforest", "grandridge"] in { 1705 def : ProcModel<P, AlderlakePModel, ProcessorFeatures.SRFFeatures, 1706 ProcessorFeatures.TRMTuning>; 1707} 1708 1709// "Arrandale" along with corei3 and corei5 1710foreach P = ["nehalem", "corei7", "core_i7_sse4_2"] in { 1711 def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.NHMFeatures, 1712 ProcessorFeatures.NHMTuning>; 1713} 1714 1715// Westmere is the corei3/i5/i7 path from nehalem to sandybridge 1716foreach P = ["westmere", "core_aes_pclmulqdq"] in { 1717 def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.WSMFeatures, 1718 ProcessorFeatures.WSMTuning>; 1719} 1720 1721foreach P = ["sandybridge", "corei7-avx", "core_2nd_gen_avx"] in { 1722 def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.SNBFeatures, 1723 ProcessorFeatures.SNBTuning>; 1724} 1725 1726foreach P = ["ivybridge", "core-avx-i", "core_3rd_gen_avx"] in { 1727 def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.IVBFeatures, 1728 ProcessorFeatures.IVBTuning>; 1729} 1730 1731foreach P = ["haswell", "core-avx2", "core_4th_gen_avx", "core_4th_gen_avx_tsx"] in { 1732 def : ProcModel<P, HaswellModel, ProcessorFeatures.HSWFeatures, 1733 ProcessorFeatures.HSWTuning>; 1734} 1735 1736foreach P = ["broadwell", "core_5th_gen_avx", "core_5th_gen_avx_tsx"] in { 1737 def : ProcModel<P, BroadwellModel, ProcessorFeatures.BDWFeatures, 1738 ProcessorFeatures.BDWTuning>; 1739} 1740 1741def : ProcModel<"skylake", SkylakeClientModel, ProcessorFeatures.SKLFeatures, 1742 ProcessorFeatures.SKLTuning>; 1743 1744// FIXME: define KNL scheduler model 1745foreach P = ["knl", "mic_avx512"] in { 1746 def : ProcModel<P, HaswellModel, ProcessorFeatures.KNLFeatures, 1747 ProcessorFeatures.KNLTuning>; 1748} 1749def : ProcModel<"knm", HaswellModel, ProcessorFeatures.KNMFeatures, 1750 ProcessorFeatures.KNLTuning>; 1751 1752foreach P = ["skylake-avx512", "skx", "skylake_avx512"] in { 1753 def : ProcModel<P, SkylakeServerModel, ProcessorFeatures.SKXFeatures, 1754 ProcessorFeatures.SKXTuning>; 1755} 1756 1757def : ProcModel<"cascadelake", SkylakeServerModel, 1758 ProcessorFeatures.CLXFeatures, ProcessorFeatures.CLXTuning>; 1759def : ProcModel<"cooperlake", SkylakeServerModel, 1760 ProcessorFeatures.CPXFeatures, ProcessorFeatures.CPXTuning>; 1761def : ProcModel<"cannonlake", SkylakeServerModel, 1762 ProcessorFeatures.CNLFeatures, ProcessorFeatures.CNLTuning>; 1763foreach P = ["icelake-client", "icelake_client"] in { 1764def : ProcModel<P, IceLakeModel, 1765 ProcessorFeatures.ICLFeatures, ProcessorFeatures.ICLTuning>; 1766} 1767def : ProcModel<"rocketlake", IceLakeModel, 1768 ProcessorFeatures.ICLFeatures, ProcessorFeatures.ICLTuning>; 1769foreach P = ["icelake-server", "icelake_server"] in { 1770def : ProcModel<P, IceLakeModel, 1771 ProcessorFeatures.ICXFeatures, ProcessorFeatures.ICXTuning>; 1772} 1773def : ProcModel<"tigerlake", IceLakeModel, 1774 ProcessorFeatures.TGLFeatures, ProcessorFeatures.TGLTuning>; 1775def : ProcModel<"sapphirerapids", SapphireRapidsModel, 1776 ProcessorFeatures.SPRFeatures, ProcessorFeatures.SPRTuning>; 1777def : ProcModel<"alderlake", AlderlakePModel, 1778 ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>; 1779// FIXME: Use Gracemont Schedule Model when it is ready. 1780def : ProcModel<"gracemont", AlderlakePModel, 1781 ProcessorFeatures.ADLFeatures, ProcessorFeatures.GRTTuning>; 1782def : ProcModel<"raptorlake", AlderlakePModel, 1783 ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>; 1784def : ProcModel<"meteorlake", AlderlakePModel, 1785 ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>; 1786def : ProcModel<"arrowlake", AlderlakePModel, 1787 ProcessorFeatures.SRFFeatures, ProcessorFeatures.ADLTuning>; 1788foreach P = ["arrowlake-s", "arrowlake_s", "lunarlake"] in { 1789def : ProcModel<P, AlderlakePModel, 1790 ProcessorFeatures.ARLSFeatures, ProcessorFeatures.ADLTuning>; 1791} 1792def : ProcModel<"pantherlake", AlderlakePModel, 1793 ProcessorFeatures.PTLFeatures, ProcessorFeatures.ADLTuning>; 1794def : ProcModel<"clearwaterforest", AlderlakePModel, 1795 ProcessorFeatures.CWFFeatures, ProcessorFeatures.ADLTuning>; 1796def : ProcModel<"graniterapids", SapphireRapidsModel, 1797 ProcessorFeatures.GNRFeatures, ProcessorFeatures.SPRTuning>; 1798def : ProcModel<"emeraldrapids", SapphireRapidsModel, 1799 ProcessorFeatures.SPRFeatures, ProcessorFeatures.SPRTuning>; 1800foreach P = ["graniterapids-d", "graniterapids_d"] in { 1801def : ProcModel<P, SapphireRapidsModel, 1802 ProcessorFeatures.GNRDFeatures, ProcessorFeatures.SPRTuning>; 1803} 1804 1805// AMD CPUs. 1806 1807def : Proc<"k6", [FeatureX87, FeatureCX8, FeatureMMX], 1808 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1809def : Proc<"k6-2", [FeatureX87, FeatureCX8, Feature3DNow], 1810 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1811def : Proc<"k6-3", [FeatureX87, FeatureCX8, Feature3DNow], 1812 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1813 1814foreach P = ["athlon", "athlon-tbird"] in { 1815 def : Proc<P, [FeatureX87, FeatureCX8, FeatureCMOV, Feature3DNowA, 1816 FeatureNOPL], 1817 [TuningSlowSHLD, TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1818} 1819 1820foreach P = ["athlon-4", "athlon-xp", "athlon-mp"] in { 1821 def : Proc<P, [FeatureX87, FeatureCX8, FeatureCMOV, 1822 FeatureSSE1, Feature3DNowA, FeatureFXSR, FeatureNOPL], 1823 [TuningSlowSHLD, TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1824} 1825 1826foreach P = ["k8", "opteron", "athlon64", "athlon-fx"] in { 1827 def : Proc<P, [FeatureX87, FeatureCX8, FeatureSSE2, Feature3DNowA, 1828 FeatureFXSR, FeatureNOPL, FeatureX86_64, FeatureCMOV], 1829 [TuningFastScalarShiftMasks, TuningSlowSHLD, TuningSlowUAMem16, 1830 TuningSBBDepBreaking, TuningInsertVZEROUPPER]>; 1831} 1832 1833foreach P = ["k8-sse3", "opteron-sse3", "athlon64-sse3"] in { 1834 def : Proc<P, [FeatureX87, FeatureCX8, FeatureSSE3, Feature3DNowA, 1835 FeatureFXSR, FeatureNOPL, FeatureCX16, FeatureCMOV, 1836 FeatureX86_64], 1837 [TuningFastScalarShiftMasks, TuningSlowSHLD, TuningSlowUAMem16, 1838 TuningSBBDepBreaking, TuningInsertVZEROUPPER]>; 1839} 1840 1841foreach P = ["amdfam10", "barcelona"] in { 1842 def : Proc<P, ProcessorFeatures.BarcelonaFeatures, 1843 ProcessorFeatures.BarcelonaTuning>; 1844} 1845 1846// Bobcat 1847def : Proc<"btver1", ProcessorFeatures.BtVer1Features, 1848 ProcessorFeatures.BtVer1Tuning>; 1849// Jaguar 1850def : ProcModel<"btver2", BtVer2Model, ProcessorFeatures.BtVer2Features, 1851 ProcessorFeatures.BtVer2Tuning>; 1852 1853// Bulldozer 1854def : ProcModel<"bdver1", BdVer2Model, ProcessorFeatures.BdVer1Features, 1855 ProcessorFeatures.BdVer1Tuning>; 1856// Piledriver 1857def : ProcModel<"bdver2", BdVer2Model, ProcessorFeatures.BdVer2Features, 1858 ProcessorFeatures.BdVer2Tuning>; 1859// Steamroller 1860def : Proc<"bdver3", ProcessorFeatures.BdVer3Features, 1861 ProcessorFeatures.BdVer3Tuning>; 1862// Excavator 1863def : Proc<"bdver4", ProcessorFeatures.BdVer4Features, 1864 ProcessorFeatures.BdVer4Tuning>; 1865 1866def : ProcModel<"znver1", Znver1Model, ProcessorFeatures.ZNFeatures, 1867 ProcessorFeatures.ZNTuning>; 1868def : ProcModel<"znver2", Znver2Model, ProcessorFeatures.ZN2Features, 1869 ProcessorFeatures.ZN2Tuning>; 1870def : ProcModel<"znver3", Znver3Model, ProcessorFeatures.ZN3Features, 1871 ProcessorFeatures.ZN3Tuning>; 1872def : ProcModel<"znver4", Znver4Model, ProcessorFeatures.ZN4Features, 1873 ProcessorFeatures.ZN4Tuning>; 1874 1875def : Proc<"geode", [FeatureX87, FeatureCX8, Feature3DNowA], 1876 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1877 1878def : Proc<"winchip-c6", [FeatureX87, FeatureMMX], 1879 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1880def : Proc<"winchip2", [FeatureX87, Feature3DNow], 1881 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1882def : Proc<"c3", [FeatureX87, Feature3DNow], 1883 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1884def : Proc<"c3-2", [FeatureX87, FeatureCX8, FeatureMMX, 1885 FeatureSSE1, FeatureFXSR, FeatureCMOV], 1886 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1887 1888// We also provide a generic 64-bit specific x86 processor model which tries to 1889// be good for modern chips without enabling instruction set encodings past the 1890// basic SSE2 and 64-bit ones. It disables slow things from any mainstream and 1891// modern 64-bit x86 chip, and enables features that are generally beneficial. 1892// 1893// We currently use the Sandy Bridge model as the default scheduling model as 1894// we use it across Nehalem, Westmere, Sandy Bridge, and Ivy Bridge which 1895// covers a huge swath of x86 processors. If there are specific scheduling 1896// knobs which need to be tuned differently for AMD chips, we might consider 1897// forming a common base for them. 1898def : ProcModel<"x86-64", SandyBridgeModel, ProcessorFeatures.X86_64V1Features, 1899 ProcessorFeatures.X86_64V1Tuning>; 1900// Close to Sandybridge. 1901def : ProcModel<"x86-64-v2", SandyBridgeModel, ProcessorFeatures.X86_64V2Features, 1902 ProcessorFeatures.X86_64V2Tuning>; 1903// Close to Haswell. 1904def : ProcModel<"x86-64-v3", HaswellModel, ProcessorFeatures.X86_64V3Features, 1905 ProcessorFeatures.X86_64V3Tuning>; 1906// Close to the AVX-512 level implemented by Xeon Scalable Processors. 1907def : ProcModel<"x86-64-v4", SkylakeServerModel, ProcessorFeatures.X86_64V4Features, 1908 ProcessorFeatures.X86_64V4Tuning>; 1909 1910//===----------------------------------------------------------------------===// 1911// Calling Conventions 1912//===----------------------------------------------------------------------===// 1913 1914include "X86CallingConv.td" 1915 1916 1917//===----------------------------------------------------------------------===// 1918// Assembly Parser 1919//===----------------------------------------------------------------------===// 1920 1921def ATTAsmParserVariant : AsmParserVariant { 1922 int Variant = 0; 1923 1924 // Variant name. 1925 string Name = "att"; 1926 1927 // Discard comments in assembly strings. 1928 string CommentDelimiter = "#"; 1929 1930 // Recognize hard coded registers. 1931 string RegisterPrefix = "%"; 1932} 1933 1934def IntelAsmParserVariant : AsmParserVariant { 1935 int Variant = 1; 1936 1937 // Variant name. 1938 string Name = "intel"; 1939 1940 // Discard comments in assembly strings. 1941 string CommentDelimiter = ";"; 1942 1943 // Recognize hard coded registers. 1944 string RegisterPrefix = ""; 1945} 1946 1947//===----------------------------------------------------------------------===// 1948// Assembly Printers 1949//===----------------------------------------------------------------------===// 1950 1951// The X86 target supports two different syntaxes for emitting machine code. 1952// This is controlled by the -x86-asm-syntax={att|intel} 1953def ATTAsmWriter : AsmWriter { 1954 string AsmWriterClassName = "ATTInstPrinter"; 1955 int Variant = 0; 1956} 1957def IntelAsmWriter : AsmWriter { 1958 string AsmWriterClassName = "IntelInstPrinter"; 1959 int Variant = 1; 1960} 1961 1962def X86 : Target { 1963 // Information about the instructions... 1964 let InstructionSet = X86InstrInfo; 1965 let AssemblyParserVariants = [ATTAsmParserVariant, IntelAsmParserVariant]; 1966 let AssemblyWriters = [ATTAsmWriter, IntelAsmWriter]; 1967 let AllowRegisterRenaming = 1; 1968} 1969 1970//===----------------------------------------------------------------------===// 1971// Pfm Counters 1972//===----------------------------------------------------------------------===// 1973 1974include "X86PfmCounters.td" 1975