1//===-- X86.td - Target definition file for the Intel X86 --*- tablegen -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This is a target description file for the Intel i386 architecture, referred 10// to here as the "X86" architecture. 11// 12//===----------------------------------------------------------------------===// 13 14// Get the target-independent interfaces which we are implementing... 15// 16include "llvm/Target/Target.td" 17 18//===----------------------------------------------------------------------===// 19// X86 Subtarget state 20// 21// disregarding specific ABI / programming model 22def Is64Bit : SubtargetFeature<"64bit-mode", "Is64Bit", "true", 23 "64-bit mode (x86_64)">; 24def Is32Bit : SubtargetFeature<"32bit-mode", "Is32Bit", "true", 25 "32-bit mode (80386)">; 26def Is16Bit : SubtargetFeature<"16bit-mode", "Is16Bit", "true", 27 "16-bit mode (i8086)">; 28 29//===----------------------------------------------------------------------===// 30// X86 Subtarget ISA features 31//===----------------------------------------------------------------------===// 32 33def FeatureX87 : SubtargetFeature<"x87","HasX87", "true", 34 "Enable X87 float instructions">; 35 36def FeatureNOPL : SubtargetFeature<"nopl", "HasNOPL", "true", 37 "Enable NOPL instruction (generally pentium pro+)">; 38 39def FeatureCMOV : SubtargetFeature<"cmov","HasCMOV", "true", 40 "Enable conditional move instructions">; 41 42def FeatureCX8 : SubtargetFeature<"cx8", "HasCX8", "true", 43 "Support CMPXCHG8B instructions">; 44 45def FeatureCRC32 : SubtargetFeature<"crc32", "HasCRC32", "true", 46 "Enable SSE 4.2 CRC32 instruction (used when SSE4.2 is supported but function is GPR only)">; 47 48def FeaturePOPCNT : SubtargetFeature<"popcnt", "HasPOPCNT", "true", 49 "Support POPCNT instruction">; 50 51def FeatureFXSR : SubtargetFeature<"fxsr", "HasFXSR", "true", 52 "Support fxsave/fxrestore instructions">; 53 54def FeatureXSAVE : SubtargetFeature<"xsave", "HasXSAVE", "true", 55 "Support xsave instructions">; 56 57def FeatureXSAVEOPT: SubtargetFeature<"xsaveopt", "HasXSAVEOPT", "true", 58 "Support xsaveopt instructions", 59 [FeatureXSAVE]>; 60 61def FeatureXSAVEC : SubtargetFeature<"xsavec", "HasXSAVEC", "true", 62 "Support xsavec instructions", 63 [FeatureXSAVE]>; 64 65def FeatureXSAVES : SubtargetFeature<"xsaves", "HasXSAVES", "true", 66 "Support xsaves instructions", 67 [FeatureXSAVE]>; 68 69def FeatureSSE1 : SubtargetFeature<"sse", "X86SSELevel", "SSE1", 70 "Enable SSE instructions">; 71def FeatureSSE2 : SubtargetFeature<"sse2", "X86SSELevel", "SSE2", 72 "Enable SSE2 instructions", 73 [FeatureSSE1]>; 74def FeatureSSE3 : SubtargetFeature<"sse3", "X86SSELevel", "SSE3", 75 "Enable SSE3 instructions", 76 [FeatureSSE2]>; 77def FeatureSSSE3 : SubtargetFeature<"ssse3", "X86SSELevel", "SSSE3", 78 "Enable SSSE3 instructions", 79 [FeatureSSE3]>; 80def FeatureSSE41 : SubtargetFeature<"sse4.1", "X86SSELevel", "SSE41", 81 "Enable SSE 4.1 instructions", 82 [FeatureSSSE3]>; 83def FeatureSSE42 : SubtargetFeature<"sse4.2", "X86SSELevel", "SSE42", 84 "Enable SSE 4.2 instructions", 85 [FeatureSSE41]>; 86// The MMX subtarget feature is separate from the rest of the SSE features 87// because it's important (for odd compatibility reasons) to be able to 88// turn it off explicitly while allowing SSE+ to be on. 89def FeatureMMX : SubtargetFeature<"mmx","HasMMX", "true", 90 "Enable MMX instructions">; 91// All x86-64 hardware has SSE2, but we don't mark SSE2 as an implied 92// feature, because SSE2 can be disabled (e.g. for compiling OS kernels) 93// without disabling 64-bit mode. Nothing should imply this feature bit. It 94// is used to enforce that only 64-bit capable CPUs are used in 64-bit mode. 95def FeatureX86_64 : SubtargetFeature<"64bit", "HasX86_64", "true", 96 "Support 64-bit instructions">; 97def FeatureCX16 : SubtargetFeature<"cx16", "HasCX16", "true", 98 "64-bit with cmpxchg16b (this is true for most x86-64 chips, but not the first AMD chips)", 99 [FeatureCX8]>; 100def FeatureSSE4A : SubtargetFeature<"sse4a", "HasSSE4A", "true", 101 "Support SSE 4a instructions", 102 [FeatureSSE3]>; 103 104def FeatureAVX : SubtargetFeature<"avx", "X86SSELevel", "AVX", 105 "Enable AVX instructions", 106 [FeatureSSE42]>; 107def FeatureAVX2 : SubtargetFeature<"avx2", "X86SSELevel", "AVX2", 108 "Enable AVX2 instructions", 109 [FeatureAVX]>; 110def FeatureFMA : SubtargetFeature<"fma", "HasFMA", "true", 111 "Enable three-operand fused multiple-add", 112 [FeatureAVX]>; 113def FeatureF16C : SubtargetFeature<"f16c", "HasF16C", "true", 114 "Support 16-bit floating point conversion instructions", 115 [FeatureAVX]>; 116def FeatureEVEX512 : SubtargetFeature<"evex512", "HasEVEX512", "true", 117 "Support ZMM and 64-bit mask instructions">; 118def FeatureAVX512 : SubtargetFeature<"avx512f", "X86SSELevel", "AVX512", 119 "Enable AVX-512 instructions", 120 [FeatureAVX2, FeatureFMA, FeatureF16C]>; 121def FeatureCDI : SubtargetFeature<"avx512cd", "HasCDI", "true", 122 "Enable AVX-512 Conflict Detection Instructions", 123 [FeatureAVX512]>; 124def FeatureVPOPCNTDQ : SubtargetFeature<"avx512vpopcntdq", "HasVPOPCNTDQ", 125 "true", "Enable AVX-512 Population Count Instructions", 126 [FeatureAVX512]>; 127def FeaturePREFETCHI : SubtargetFeature<"prefetchi", "HasPREFETCHI", 128 "true", 129 "Prefetch instruction with T0 or T1 Hint">; 130def FeatureDQI : SubtargetFeature<"avx512dq", "HasDQI", "true", 131 "Enable AVX-512 Doubleword and Quadword Instructions", 132 [FeatureAVX512]>; 133def FeatureBWI : SubtargetFeature<"avx512bw", "HasBWI", "true", 134 "Enable AVX-512 Byte and Word Instructions", 135 [FeatureAVX512]>; 136def FeatureVLX : SubtargetFeature<"avx512vl", "HasVLX", "true", 137 "Enable AVX-512 Vector Length eXtensions", 138 [FeatureAVX512]>; 139def FeatureVBMI : SubtargetFeature<"avx512vbmi", "HasVBMI", "true", 140 "Enable AVX-512 Vector Byte Manipulation Instructions", 141 [FeatureBWI]>; 142def FeatureVBMI2 : SubtargetFeature<"avx512vbmi2", "HasVBMI2", "true", 143 "Enable AVX-512 further Vector Byte Manipulation Instructions", 144 [FeatureBWI]>; 145def FeatureAVXIFMA : SubtargetFeature<"avxifma", "HasAVXIFMA", "true", 146 "Enable AVX-IFMA", 147 [FeatureAVX2]>; 148def FeatureIFMA : SubtargetFeature<"avx512ifma", "HasIFMA", "true", 149 "Enable AVX-512 Integer Fused Multiple-Add", 150 [FeatureAVX512]>; 151def FeaturePKU : SubtargetFeature<"pku", "HasPKU", "true", 152 "Enable protection keys">; 153def FeatureVNNI : SubtargetFeature<"avx512vnni", "HasVNNI", "true", 154 "Enable AVX-512 Vector Neural Network Instructions", 155 [FeatureAVX512]>; 156def FeatureAVXVNNI : SubtargetFeature<"avxvnni", "HasAVXVNNI", "true", 157 "Support AVX_VNNI encoding", 158 [FeatureAVX2]>; 159def FeatureBF16 : SubtargetFeature<"avx512bf16", "HasBF16", "true", 160 "Support bfloat16 floating point", 161 [FeatureBWI]>; 162def FeatureBITALG : SubtargetFeature<"avx512bitalg", "HasBITALG", "true", 163 "Enable AVX-512 Bit Algorithms", 164 [FeatureBWI]>; 165def FeatureVP2INTERSECT : SubtargetFeature<"avx512vp2intersect", 166 "HasVP2INTERSECT", "true", 167 "Enable AVX-512 vp2intersect", 168 [FeatureAVX512]>; 169// FIXME: FP16 scalar intrinsics use the type v8f16, which is supposed to be 170// guarded under condition hasVLX. So we imply it in FeatureFP16 currently. 171// FIXME: FP16 conversion between f16 and i64 customize type v8i64, which is 172// supposed to be guarded under condition hasDQI. So we imply it in FeatureFP16 173// currently. 174def FeatureFP16 : SubtargetFeature<"avx512fp16", "HasFP16", "true", 175 "Support 16-bit floating point", 176 [FeatureBWI, FeatureVLX, FeatureDQI]>; 177def FeatureAVXVNNIINT8 : SubtargetFeature<"avxvnniint8", 178 "HasAVXVNNIINT8", "true", 179 "Enable AVX-VNNI-INT8", 180 [FeatureAVX2]>; 181def FeatureAVXVNNIINT16 : SubtargetFeature<"avxvnniint16", 182 "HasAVXVNNIINT16", "true", 183 "Enable AVX-VNNI-INT16", 184 [FeatureAVX2]>; 185def FeaturePCLMUL : SubtargetFeature<"pclmul", "HasPCLMUL", "true", 186 "Enable packed carry-less multiplication instructions", 187 [FeatureSSE2]>; 188def FeatureGFNI : SubtargetFeature<"gfni", "HasGFNI", "true", 189 "Enable Galois Field Arithmetic Instructions", 190 [FeatureSSE2]>; 191def FeatureVPCLMULQDQ : SubtargetFeature<"vpclmulqdq", "HasVPCLMULQDQ", "true", 192 "Enable vpclmulqdq instructions", 193 [FeatureAVX, FeaturePCLMUL]>; 194def FeatureFMA4 : SubtargetFeature<"fma4", "HasFMA4", "true", 195 "Enable four-operand fused multiple-add", 196 [FeatureAVX, FeatureSSE4A]>; 197def FeatureXOP : SubtargetFeature<"xop", "HasXOP", "true", 198 "Enable XOP instructions", 199 [FeatureFMA4]>; 200def FeatureSSEUnalignedMem : SubtargetFeature<"sse-unaligned-mem", 201 "HasSSEUnalignedMem", "true", 202 "Allow unaligned memory operands with SSE instructions (this may require setting a configuration bit in the processor)">; 203def FeatureAES : SubtargetFeature<"aes", "HasAES", "true", 204 "Enable AES instructions", 205 [FeatureSSE2]>; 206def FeatureVAES : SubtargetFeature<"vaes", "HasVAES", "true", 207 "Promote selected AES instructions to AVX512/AVX registers", 208 [FeatureAVX2, FeatureAES]>; 209def FeatureTBM : SubtargetFeature<"tbm", "HasTBM", "true", 210 "Enable TBM instructions">; 211def FeatureLWP : SubtargetFeature<"lwp", "HasLWP", "true", 212 "Enable LWP instructions">; 213def FeatureMOVBE : SubtargetFeature<"movbe", "HasMOVBE", "true", 214 "Support MOVBE instruction">; 215def FeatureRDRAND : SubtargetFeature<"rdrnd", "HasRDRAND", "true", 216 "Support RDRAND instruction">; 217def FeatureFSGSBase : SubtargetFeature<"fsgsbase", "HasFSGSBase", "true", 218 "Support FS/GS Base instructions">; 219def FeatureLZCNT : SubtargetFeature<"lzcnt", "HasLZCNT", "true", 220 "Support LZCNT instruction">; 221def FeatureBMI : SubtargetFeature<"bmi", "HasBMI", "true", 222 "Support BMI instructions">; 223def FeatureBMI2 : SubtargetFeature<"bmi2", "HasBMI2", "true", 224 "Support BMI2 instructions">; 225def FeatureRTM : SubtargetFeature<"rtm", "HasRTM", "true", 226 "Support RTM instructions">; 227def FeatureADX : SubtargetFeature<"adx", "HasADX", "true", 228 "Support ADX instructions">; 229def FeatureSHA : SubtargetFeature<"sha", "HasSHA", "true", 230 "Enable SHA instructions", 231 [FeatureSSE2]>; 232def FeatureSHA512 : SubtargetFeature<"sha512", "HasSHA512", "true", 233 "Support SHA512 instructions", 234 [FeatureAVX2]>; 235// Processor supports CET SHSTK - Control-Flow Enforcement Technology 236// using Shadow Stack 237def FeatureSHSTK : SubtargetFeature<"shstk", "HasSHSTK", "true", 238 "Support CET Shadow-Stack instructions">; 239def FeatureSM3 : SubtargetFeature<"sm3", "HasSM3", "true", 240 "Support SM3 instructions", 241 [FeatureAVX]>; 242def FeatureSM4 : SubtargetFeature<"sm4", "HasSM4", "true", 243 "Support SM4 instructions", 244 [FeatureAVX2]>; 245def FeaturePRFCHW : SubtargetFeature<"prfchw", "HasPRFCHW", "true", 246 "Support PRFCHW instructions">; 247def FeatureRDSEED : SubtargetFeature<"rdseed", "HasRDSEED", "true", 248 "Support RDSEED instruction">; 249def FeatureLAHFSAHF64 : SubtargetFeature<"sahf", "HasLAHFSAHF64", "true", 250 "Support LAHF and SAHF instructions in 64-bit mode">; 251def FeatureMWAITX : SubtargetFeature<"mwaitx", "HasMWAITX", "true", 252 "Enable MONITORX/MWAITX timer functionality">; 253def FeatureCLZERO : SubtargetFeature<"clzero", "HasCLZERO", "true", 254 "Enable Cache Line Zero">; 255def FeatureCLDEMOTE : SubtargetFeature<"cldemote", "HasCLDEMOTE", "true", 256 "Enable Cache Line Demote">; 257def FeaturePTWRITE : SubtargetFeature<"ptwrite", "HasPTWRITE", "true", 258 "Support ptwrite instruction">; 259def FeatureAMXTILE : SubtargetFeature<"amx-tile", "HasAMXTILE", "true", 260 "Support AMX-TILE instructions">; 261def FeatureAMXINT8 : SubtargetFeature<"amx-int8", "HasAMXINT8", "true", 262 "Support AMX-INT8 instructions", 263 [FeatureAMXTILE]>; 264def FeatureAMXBF16 : SubtargetFeature<"amx-bf16", "HasAMXBF16", "true", 265 "Support AMX-BF16 instructions", 266 [FeatureAMXTILE]>; 267def FeatureAMXFP16 : SubtargetFeature<"amx-fp16", "HasAMXFP16", "true", 268 "Support AMX amx-fp16 instructions", 269 [FeatureAMXTILE]>; 270def FeatureAMXCOMPLEX : SubtargetFeature<"amx-complex", "HasAMXCOMPLEX", "true", 271 "Support AMX-COMPLEX instructions", 272 [FeatureAMXTILE]>; 273def FeatureCMPCCXADD : SubtargetFeature<"cmpccxadd", "HasCMPCCXADD", "true", 274 "Support CMPCCXADD instructions">; 275def FeatureRAOINT : SubtargetFeature<"raoint", "HasRAOINT", "true", 276 "Support RAO-INT instructions", 277 []>; 278def FeatureAVXNECONVERT : SubtargetFeature<"avxneconvert", "HasAVXNECONVERT", "true", 279 "Support AVX-NE-CONVERT instructions", 280 [FeatureAVX2]>; 281def FeatureINVPCID : SubtargetFeature<"invpcid", "HasINVPCID", "true", 282 "Invalidate Process-Context Identifier">; 283def FeatureSGX : SubtargetFeature<"sgx", "HasSGX", "true", 284 "Enable Software Guard Extensions">; 285def FeatureCLFLUSHOPT : SubtargetFeature<"clflushopt", "HasCLFLUSHOPT", "true", 286 "Flush A Cache Line Optimized">; 287def FeatureCLWB : SubtargetFeature<"clwb", "HasCLWB", "true", 288 "Cache Line Write Back">; 289def FeatureWBNOINVD : SubtargetFeature<"wbnoinvd", "HasWBNOINVD", "true", 290 "Write Back No Invalidate">; 291def FeatureRDPID : SubtargetFeature<"rdpid", "HasRDPID", "true", 292 "Support RDPID instructions">; 293def FeatureRDPRU : SubtargetFeature<"rdpru", "HasRDPRU", "true", 294 "Support RDPRU instructions">; 295def FeatureWAITPKG : SubtargetFeature<"waitpkg", "HasWAITPKG", "true", 296 "Wait and pause enhancements">; 297def FeatureENQCMD : SubtargetFeature<"enqcmd", "HasENQCMD", "true", 298 "Has ENQCMD instructions">; 299def FeatureKL : SubtargetFeature<"kl", "HasKL", "true", 300 "Support Key Locker kl Instructions", 301 [FeatureSSE2]>; 302def FeatureWIDEKL : SubtargetFeature<"widekl", "HasWIDEKL", "true", 303 "Support Key Locker wide Instructions", 304 [FeatureKL]>; 305def FeatureHRESET : SubtargetFeature<"hreset", "HasHRESET", "true", 306 "Has hreset instruction">; 307def FeatureSERIALIZE : SubtargetFeature<"serialize", "HasSERIALIZE", "true", 308 "Has serialize instruction">; 309def FeatureTSXLDTRK : SubtargetFeature<"tsxldtrk", "HasTSXLDTRK", "true", 310 "Support TSXLDTRK instructions">; 311def FeatureUINTR : SubtargetFeature<"uintr", "HasUINTR", "true", 312 "Has UINTR Instructions">; 313def FeatureUSERMSR : SubtargetFeature<"usermsr", "HasUSERMSR", "true", 314 "Support USERMSR instructions">; 315def FeaturePCONFIG : SubtargetFeature<"pconfig", "HasPCONFIG", "true", 316 "platform configuration instruction">; 317def FeatureMOVDIRI : SubtargetFeature<"movdiri", "HasMOVDIRI", "true", 318 "Support movdiri instruction (direct store integer)">; 319def FeatureMOVDIR64B : SubtargetFeature<"movdir64b", "HasMOVDIR64B", "true", 320 "Support movdir64b instruction (direct store 64 bytes)">; 321def FeatureAVX10_1 : SubtargetFeature<"avx10.1-256", "HasAVX10_1", "true", 322 "Support AVX10.1 up to 256-bit instruction", 323 [FeatureCDI, FeatureVBMI, FeatureIFMA, FeatureVNNI, 324 FeatureBF16, FeatureVPOPCNTDQ, FeatureVBMI2, FeatureBITALG, 325 FeatureVAES, FeatureVPCLMULQDQ, FeatureFP16]>; 326def FeatureAVX10_1_512 : SubtargetFeature<"avx10.1-512", "HasAVX10_1_512", "true", 327 "Support AVX10.1 up to 512-bit instruction", 328 [FeatureAVX10_1, FeatureEVEX512]>; 329def FeatureEGPR : SubtargetFeature<"egpr", "HasEGPR", "true", 330 "Support extended general purpose register">; 331def FeaturePush2Pop2 : SubtargetFeature<"push2pop2", "HasPush2Pop2", "true", 332 "Support PUSH2/POP2 instructions">; 333def FeaturePPX : SubtargetFeature<"ppx", "HasPPX", "true", 334 "Support Push-Pop Acceleration">; 335def FeatureNDD : SubtargetFeature<"ndd", "HasNDD", "true", 336 "Support non-destructive destination">; 337def FeatureCCMP : SubtargetFeature<"ccmp", "HasCCMP", "true", 338 "Support conditional cmp & test instructions">; 339def FeatureNF : SubtargetFeature<"nf", "HasNF", "true", 340 "Support status flags update suppression">; 341def FeatureCF : SubtargetFeature<"cf", "HasCF", "true", 342 "Support conditional faulting">; 343def FeatureZU : SubtargetFeature<"zu", "HasZU", "true", 344 "Support zero-upper SETcc/IMUL">; 345def FeatureUseGPR32InInlineAsm 346 : SubtargetFeature<"inline-asm-use-gpr32", "UseInlineAsmGPR32", "true", 347 "Enable use of GPR32 in inline assembly for APX">; 348 349// Ivy Bridge and newer processors have enhanced REP MOVSB and STOSB (aka 350// "string operations"). See "REP String Enhancement" in the Intel Software 351// Development Manual. This feature essentially means that REP MOVSB will copy 352// using the largest available size instead of copying bytes one by one, making 353// it at least as fast as REPMOVS{W,D,Q}. 354def FeatureERMSB 355 : SubtargetFeature< 356 "ermsb", "HasERMSB", "true", 357 "REP MOVS/STOS are fast">; 358 359// Icelake and newer processors have Fast Short REP MOV. 360def FeatureFSRM 361 : SubtargetFeature< 362 "fsrm", "HasFSRM", "true", 363 "REP MOVSB of short lengths is faster">; 364 365def FeatureSoftFloat 366 : SubtargetFeature<"soft-float", "UseSoftFloat", "true", 367 "Use software floating point features">; 368 369//===----------------------------------------------------------------------===// 370// X86 Subtarget Security Mitigation features 371//===----------------------------------------------------------------------===// 372 373// Lower indirect calls using a special construct called a `retpoline` to 374// mitigate potential Spectre v2 attacks against them. 375def FeatureRetpolineIndirectCalls 376 : SubtargetFeature< 377 "retpoline-indirect-calls", "UseRetpolineIndirectCalls", "true", 378 "Remove speculation of indirect calls from the generated code">; 379 380// Lower indirect branches and switches either using conditional branch trees 381// or using a special construct called a `retpoline` to mitigate potential 382// Spectre v2 attacks against them. 383def FeatureRetpolineIndirectBranches 384 : SubtargetFeature< 385 "retpoline-indirect-branches", "UseRetpolineIndirectBranches", "true", 386 "Remove speculation of indirect branches from the generated code">; 387 388// Deprecated umbrella feature for enabling both `retpoline-indirect-calls` and 389// `retpoline-indirect-branches` above. 390def FeatureRetpoline 391 : SubtargetFeature<"retpoline", "DeprecatedUseRetpoline", "true", 392 "Remove speculation of indirect branches from the " 393 "generated code, either by avoiding them entirely or " 394 "lowering them with a speculation blocking construct", 395 [FeatureRetpolineIndirectCalls, 396 FeatureRetpolineIndirectBranches]>; 397 398// Rely on external thunks for the emitted retpoline calls. This allows users 399// to provide their own custom thunk definitions in highly specialized 400// environments such as a kernel that does boot-time hot patching. 401def FeatureRetpolineExternalThunk 402 : SubtargetFeature< 403 "retpoline-external-thunk", "UseRetpolineExternalThunk", "true", 404 "When lowering an indirect call or branch using a `retpoline`, rely " 405 "on the specified user provided thunk rather than emitting one " 406 "ourselves. Only has effect when combined with some other retpoline " 407 "feature", [FeatureRetpolineIndirectCalls]>; 408 409// Mitigate LVI attacks against indirect calls/branches and call returns 410def FeatureLVIControlFlowIntegrity 411 : SubtargetFeature< 412 "lvi-cfi", "UseLVIControlFlowIntegrity", "true", 413 "Prevent indirect calls/branches from using a memory operand, and " 414 "precede all indirect calls/branches from a register with an " 415 "LFENCE instruction to serialize control flow. Also decompose RET " 416 "instructions into a POP+LFENCE+JMP sequence.">; 417 418// Enable SESES to mitigate speculative execution attacks 419def FeatureSpeculativeExecutionSideEffectSuppression 420 : SubtargetFeature< 421 "seses", "UseSpeculativeExecutionSideEffectSuppression", "true", 422 "Prevent speculative execution side channel timing attacks by " 423 "inserting a speculation barrier before memory reads, memory writes, " 424 "and conditional branches. Implies LVI Control Flow integrity.", 425 [FeatureLVIControlFlowIntegrity]>; 426 427// Mitigate LVI attacks against data loads 428def FeatureLVILoadHardening 429 : SubtargetFeature< 430 "lvi-load-hardening", "UseLVILoadHardening", "true", 431 "Insert LFENCE instructions to prevent data speculatively injected " 432 "into loads from being used maliciously.">; 433 434def FeatureTaggedGlobals 435 : SubtargetFeature< 436 "tagged-globals", "AllowTaggedGlobals", "true", 437 "Use an instruction sequence for taking the address of a global " 438 "that allows a memory tag in the upper address bits.">; 439 440// Control codegen mitigation against Straight Line Speculation vulnerability. 441def FeatureHardenSlsRet 442 : SubtargetFeature< 443 "harden-sls-ret", "HardenSlsRet", "true", 444 "Harden against straight line speculation across RET instructions.">; 445 446def FeatureHardenSlsIJmp 447 : SubtargetFeature< 448 "harden-sls-ijmp", "HardenSlsIJmp", "true", 449 "Harden against straight line speculation across indirect JMP instructions.">; 450 451//===----------------------------------------------------------------------===// 452// X86 Subtarget Tuning features 453//===----------------------------------------------------------------------===// 454def TuningPreferMovmskOverVTest : SubtargetFeature<"prefer-movmsk-over-vtest", 455 "PreferMovmskOverVTest", "true", 456 "Prefer movmsk over vtest instruction">; 457 458def TuningSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true", 459 "SHLD instruction is slow">; 460 461def TuningSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true", 462 "PMULLD instruction is slow (compared to PMULLW/PMULHW and PMULUDQ)">; 463 464def TuningSlowPMADDWD : SubtargetFeature<"slow-pmaddwd", "IsPMADDWDSlow", 465 "true", 466 "PMADDWD is slower than PMULLD">; 467 468// FIXME: This should not apply to CPUs that do not have SSE. 469def TuningSlowUAMem16 : SubtargetFeature<"slow-unaligned-mem-16", 470 "IsUnalignedMem16Slow", "true", 471 "Slow unaligned 16-byte memory access">; 472 473def TuningSlowUAMem32 : SubtargetFeature<"slow-unaligned-mem-32", 474 "IsUnalignedMem32Slow", "true", 475 "Slow unaligned 32-byte memory access">; 476 477def TuningLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true", 478 "Use LEA for adjusting the stack pointer (this is an optimization for Intel Atom processors)">; 479 480// True if 8-bit divisions are significantly faster than 481// 32-bit divisions and should be used when possible. 482def TuningSlowDivide32 : SubtargetFeature<"idivl-to-divb", 483 "HasSlowDivide32", "true", 484 "Use 8-bit divide for positive values less than 256">; 485 486// True if 32-bit divides are significantly faster than 487// 64-bit divisions and should be used when possible. 488def TuningSlowDivide64 : SubtargetFeature<"idivq-to-divl", 489 "HasSlowDivide64", "true", 490 "Use 32-bit divide for positive values less than 2^32">; 491 492def TuningPadShortFunctions : SubtargetFeature<"pad-short-functions", 493 "PadShortFunctions", "true", 494 "Pad short functions (to prevent a stall when returning too early)">; 495 496// On some processors, instructions that implicitly take two memory operands are 497// slow. In practice, this means that CALL, PUSH, and POP with memory operands 498// should be avoided in favor of a MOV + register CALL/PUSH/POP. 499def TuningSlowTwoMemOps : SubtargetFeature<"slow-two-mem-ops", 500 "SlowTwoMemOps", "true", 501 "Two memory operand instructions are slow">; 502 503// True if the LEA instruction inputs have to be ready at address generation 504// (AG) time. 505def TuningLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LeaUsesAG", "true", 506 "LEA instruction needs inputs at AG stage">; 507 508def TuningSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true", 509 "LEA instruction with certain arguments is slow">; 510 511// True if the LEA instruction has all three source operands: base, index, 512// and offset or if the LEA instruction uses base and index registers where 513// the base is EBP, RBP,or R13 514def TuningSlow3OpsLEA : SubtargetFeature<"slow-3ops-lea", "Slow3OpsLEA", "true", 515 "LEA instruction with 3 ops or certain registers is slow">; 516 517// True if INC and DEC instructions are slow when writing to flags 518def TuningSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true", 519 "INC and DEC instructions are slower than ADD and SUB">; 520 521def TuningPOPCNTFalseDeps : SubtargetFeature<"false-deps-popcnt", 522 "HasPOPCNTFalseDeps", "true", 523 "POPCNT has a false dependency on dest register">; 524 525def TuningLZCNTFalseDeps : SubtargetFeature<"false-deps-lzcnt-tzcnt", 526 "HasLZCNTFalseDeps", "true", 527 "LZCNT/TZCNT have a false dependency on dest register">; 528 529def TuningMULCFalseDeps : SubtargetFeature<"false-deps-mulc", 530 "HasMULCFalseDeps", "true", 531 "VF[C]MULCPH/SH has a false dependency on dest register">; 532 533def TuningPERMFalseDeps : SubtargetFeature<"false-deps-perm", 534 "HasPERMFalseDeps", "true", 535 "VPERMD/Q/PS/PD has a false dependency on dest register">; 536 537def TuningRANGEFalseDeps : SubtargetFeature<"false-deps-range", 538 "HasRANGEFalseDeps", "true", 539 "VRANGEPD/PS/SD/SS has a false dependency on dest register">; 540 541def TuningGETMANTFalseDeps : SubtargetFeature<"false-deps-getmant", 542 "HasGETMANTFalseDeps", "true", 543 "VGETMANTSS/SD/SH and VGETMANDPS/PD(memory version) has a" 544 " false dependency on dest register">; 545 546def TuningMULLQFalseDeps : SubtargetFeature<"false-deps-mullq", 547 "HasMULLQFalseDeps", "true", 548 "VPMULLQ has a false dependency on dest register">; 549 550def TuningSBBDepBreaking : SubtargetFeature<"sbb-dep-breaking", 551 "HasSBBDepBreaking", "true", 552 "SBB with same register has no source dependency">; 553 554// On recent X86 (port bound) processors, its preferable to combine to a single shuffle 555// using a variable mask over multiple fixed shuffles. 556def TuningFastVariableCrossLaneShuffle 557 : SubtargetFeature<"fast-variable-crosslane-shuffle", 558 "HasFastVariableCrossLaneShuffle", 559 "true", "Cross-lane shuffles with variable masks are fast">; 560def TuningFastVariablePerLaneShuffle 561 : SubtargetFeature<"fast-variable-perlane-shuffle", 562 "HasFastVariablePerLaneShuffle", 563 "true", "Per-lane shuffles with variable masks are fast">; 564 565// Goldmont / Tremont (atom in general) has no bypass delay 566def TuningNoDomainDelay : SubtargetFeature<"no-bypass-delay", 567 "NoDomainDelay","true", 568 "Has no bypass delay when using the 'wrong' domain">; 569 570// Many processors (Nehalem+ on Intel) have no bypass delay when 571// using the wrong mov type. 572def TuningNoDomainDelayMov : SubtargetFeature<"no-bypass-delay-mov", 573 "NoDomainDelayMov","true", 574 "Has no bypass delay when using the 'wrong' mov type">; 575 576// Newer processors (Skylake+ on Intel) have no bypass delay when 577// using the wrong blend type. 578def TuningNoDomainDelayBlend : SubtargetFeature<"no-bypass-delay-blend", 579 "NoDomainDelayBlend","true", 580 "Has no bypass delay when using the 'wrong' blend type">; 581 582// Newer processors (Haswell+ on Intel) have no bypass delay when 583// using the wrong shuffle type. 584def TuningNoDomainDelayShuffle : SubtargetFeature<"no-bypass-delay-shuffle", 585 "NoDomainDelayShuffle","true", 586 "Has no bypass delay when using the 'wrong' shuffle type">; 587 588// Prefer lowering shuffles on AVX512 targets (e.g. Skylake Server) to 589// imm shifts/rotate if they can use more ports than regular shuffles. 590def TuningPreferShiftShuffle : SubtargetFeature<"faster-shift-than-shuffle", 591 "PreferLowerShuffleAsShift", "true", 592 "Shifts are faster (or as fast) as shuffle">; 593 594def TuningFastImmVectorShift : SubtargetFeature<"tuning-fast-imm-vector-shift", 595 "FastImmVectorShift", "true", 596 "Vector shifts are fast (2/cycle) as opposed to slow (1/cycle)">; 597 598// On some X86 processors, a vzeroupper instruction should be inserted after 599// using ymm/zmm registers before executing code that may use SSE instructions. 600def TuningInsertVZEROUPPER 601 : SubtargetFeature<"vzeroupper", 602 "InsertVZEROUPPER", 603 "true", "Should insert vzeroupper instructions">; 604 605// TuningFastScalarFSQRT should be enabled if scalar FSQRT has shorter latency 606// than the corresponding NR code. TuningFastVectorFSQRT should be enabled if 607// vector FSQRT has higher throughput than the corresponding NR code. 608// The idea is that throughput bound code is likely to be vectorized, so for 609// vectorized code we should care about the throughput of SQRT operations. 610// But if the code is scalar that probably means that the code has some kind of 611// dependency and we should care more about reducing the latency. 612 613// True if hardware SQRTSS instruction is at least as fast (latency) as 614// RSQRTSS followed by a Newton-Raphson iteration. 615def TuningFastScalarFSQRT 616 : SubtargetFeature<"fast-scalar-fsqrt", "HasFastScalarFSQRT", 617 "true", "Scalar SQRT is fast (disable Newton-Raphson)">; 618// True if hardware SQRTPS/VSQRTPS instructions are at least as fast 619// (throughput) as RSQRTPS/VRSQRTPS followed by a Newton-Raphson iteration. 620def TuningFastVectorFSQRT 621 : SubtargetFeature<"fast-vector-fsqrt", "HasFastVectorFSQRT", 622 "true", "Vector SQRT is fast (disable Newton-Raphson)">; 623 624// If lzcnt has equivalent latency/throughput to most simple integer ops, it can 625// be used to replace test/set sequences. 626def TuningFastLZCNT 627 : SubtargetFeature< 628 "fast-lzcnt", "HasFastLZCNT", "true", 629 "LZCNT instructions are as fast as most simple integer ops">; 630 631// If the target can efficiently decode NOPs upto 7-bytes in length. 632def TuningFast7ByteNOP 633 : SubtargetFeature< 634 "fast-7bytenop", "HasFast7ByteNOP", "true", 635 "Target can quickly decode up to 7 byte NOPs">; 636 637// If the target can efficiently decode NOPs upto 11-bytes in length. 638def TuningFast11ByteNOP 639 : SubtargetFeature< 640 "fast-11bytenop", "HasFast11ByteNOP", "true", 641 "Target can quickly decode up to 11 byte NOPs">; 642 643// If the target can efficiently decode NOPs upto 15-bytes in length. 644def TuningFast15ByteNOP 645 : SubtargetFeature< 646 "fast-15bytenop", "HasFast15ByteNOP", "true", 647 "Target can quickly decode up to 15 byte NOPs">; 648 649// Sandy Bridge and newer processors can use SHLD with the same source on both 650// inputs to implement rotate to avoid the partial flag update of the normal 651// rotate instructions. 652def TuningFastSHLDRotate 653 : SubtargetFeature< 654 "fast-shld-rotate", "HasFastSHLDRotate", "true", 655 "SHLD can be used as a faster rotate">; 656 657// Bulldozer and newer processors can merge CMP/TEST (but not other 658// instructions) with conditional branches. 659def TuningBranchFusion 660 : SubtargetFeature<"branchfusion", "HasBranchFusion", "true", 661 "CMP/TEST can be fused with conditional branches">; 662 663// Sandy Bridge and newer processors have many instructions that can be 664// fused with conditional branches and pass through the CPU as a single 665// operation. 666def TuningMacroFusion 667 : SubtargetFeature<"macrofusion", "HasMacroFusion", "true", 668 "Various instructions can be fused with conditional branches">; 669 670// Gather is available since Haswell (AVX2 set). So technically, we can 671// generate Gathers on all AVX2 processors. But the overhead on HSW is high. 672// Skylake Client processor has faster Gathers than HSW and performance is 673// similar to Skylake Server (AVX-512). 674def TuningFastGather 675 : SubtargetFeature<"fast-gather", "HasFastGather", "true", 676 "Indicates if gather is reasonably fast (this is true for Skylake client and all AVX-512 CPUs)">; 677 678// Generate vpdpwssd instead of vpmaddwd+vpaddd sequence. 679def TuningFastDPWSSD 680 : SubtargetFeature< 681 "fast-dpwssd", "HasFastDPWSSD", "true", 682 "Prefer vpdpwssd instruction over vpmaddwd+vpaddd instruction sequence">; 683 684def TuningPreferNoGather 685 : SubtargetFeature<"prefer-no-gather", "PreferGather", "false", 686 "Prefer no gather instructions">; 687def TuningPreferNoScatter 688 : SubtargetFeature<"prefer-no-scatter", "PreferScatter", "false", 689 "Prefer no scatter instructions">; 690 691def TuningPrefer128Bit 692 : SubtargetFeature<"prefer-128-bit", "Prefer128Bit", "true", 693 "Prefer 128-bit AVX instructions">; 694 695def TuningPrefer256Bit 696 : SubtargetFeature<"prefer-256-bit", "Prefer256Bit", "true", 697 "Prefer 256-bit AVX instructions">; 698 699def TuningAllowLight256Bit 700 : SubtargetFeature<"allow-light-256-bit", "AllowLight256Bit", "true", 701 "Enable generation of 256-bit load/stores even if we prefer 128-bit">; 702 703def TuningPreferMaskRegisters 704 : SubtargetFeature<"prefer-mask-registers", "PreferMaskRegisters", "true", 705 "Prefer AVX512 mask registers over PTEST/MOVMSK">; 706 707def TuningFastBEXTR : SubtargetFeature<"fast-bextr", "HasFastBEXTR", "true", 708 "Indicates that the BEXTR instruction is implemented as a single uop " 709 "with good throughput">; 710 711// Combine vector math operations with shuffles into horizontal math 712// instructions if a CPU implements horizontal operations (introduced with 713// SSE3) with better latency/throughput than the alternative sequence. 714def TuningFastHorizontalOps 715 : SubtargetFeature< 716 "fast-hops", "HasFastHorizontalOps", "true", 717 "Prefer horizontal vector math instructions (haddp, phsub, etc.) over " 718 "normal vector instructions with shuffles">; 719 720def TuningFastScalarShiftMasks 721 : SubtargetFeature< 722 "fast-scalar-shift-masks", "HasFastScalarShiftMasks", "true", 723 "Prefer a left/right scalar logical shift pair over a shift+and pair">; 724 725def TuningFastVectorShiftMasks 726 : SubtargetFeature< 727 "fast-vector-shift-masks", "HasFastVectorShiftMasks", "true", 728 "Prefer a left/right vector logical shift pair over a shift+and pair">; 729 730def TuningFastMOVBE 731 : SubtargetFeature<"fast-movbe", "HasFastMOVBE", "true", 732 "Prefer a movbe over a single-use load + bswap / single-use bswap + store">; 733 734def TuningFastImm16 735 : SubtargetFeature<"fast-imm16", "HasFastImm16", "true", 736 "Prefer a i16 instruction with i16 immediate over extension to i32">; 737 738def TuningUseSLMArithCosts 739 : SubtargetFeature<"use-slm-arith-costs", "UseSLMArithCosts", "true", 740 "Use Silvermont specific arithmetic costs">; 741 742def TuningUseGLMDivSqrtCosts 743 : SubtargetFeature<"use-glm-div-sqrt-costs", "UseGLMDivSqrtCosts", "true", 744 "Use Goldmont specific floating point div/sqrt costs">; 745 746// Starting with Redwood Cove architecture, the branch has branch taken hint 747// (i.e., instruction prefix 3EH). 748def TuningBranchHint: SubtargetFeature<"branch-hint", "HasBranchHint", "true", 749 "Target has branch hint feature">; 750 751//===----------------------------------------------------------------------===// 752// X86 CPU Families 753// TODO: Remove these - use general tuning features to determine codegen. 754//===----------------------------------------------------------------------===// 755 756// Bonnell 757def ProcIntelAtom : SubtargetFeature<"", "IsAtom", "true", "Is Intel Atom processor">; 758 759//===----------------------------------------------------------------------===// 760// Register File Description 761//===----------------------------------------------------------------------===// 762 763include "X86RegisterInfo.td" 764include "X86RegisterBanks.td" 765 766//===----------------------------------------------------------------------===// 767// Instruction Descriptions 768//===----------------------------------------------------------------------===// 769 770include "X86Schedule.td" 771include "X86InstrInfo.td" 772include "X86SchedPredicates.td" 773 774def X86InstrInfo : InstrInfo; 775 776//===----------------------------------------------------------------------===// 777// X86 Scheduler Models 778//===----------------------------------------------------------------------===// 779 780include "X86ScheduleAtom.td" 781include "X86SchedSandyBridge.td" 782include "X86SchedHaswell.td" 783include "X86SchedBroadwell.td" 784include "X86ScheduleSLM.td" 785include "X86ScheduleZnver1.td" 786include "X86ScheduleZnver2.td" 787include "X86ScheduleZnver3.td" 788include "X86ScheduleZnver4.td" 789include "X86ScheduleBdVer2.td" 790include "X86ScheduleBtVer2.td" 791include "X86SchedSkylakeClient.td" 792include "X86SchedSkylakeServer.td" 793include "X86SchedIceLake.td" 794include "X86SchedAlderlakeP.td" 795include "X86SchedSapphireRapids.td" 796 797//===----------------------------------------------------------------------===// 798// X86 Processor Feature Lists 799//===----------------------------------------------------------------------===// 800 801def ProcessorFeatures { 802 // x86-64 micro-architecture levels: x86-64 and x86-64-v[234] 803 list<SubtargetFeature> X86_64V1Features = [ 804 FeatureX87, FeatureCX8, FeatureCMOV, FeatureMMX, FeatureSSE2, 805 FeatureFXSR, FeatureNOPL, FeatureX86_64, 806 ]; 807 list<SubtargetFeature> X86_64V1Tuning = [ 808 TuningMacroFusion, 809 TuningSlow3OpsLEA, 810 TuningSlowDivide64, 811 TuningSlowIncDec, 812 TuningInsertVZEROUPPER 813 ]; 814 815 list<SubtargetFeature> X86_64V2Features = !listconcat(X86_64V1Features, [ 816 FeatureCX16, FeatureLAHFSAHF64, FeatureCRC32, FeaturePOPCNT, 817 FeatureSSE42 818 ]); 819 list<SubtargetFeature> X86_64V2Tuning = [ 820 TuningMacroFusion, 821 TuningSlow3OpsLEA, 822 TuningSlowDivide64, 823 TuningSlowUAMem32, 824 TuningFastScalarFSQRT, 825 TuningFastSHLDRotate, 826 TuningFast15ByteNOP, 827 TuningPOPCNTFalseDeps, 828 TuningInsertVZEROUPPER 829 ]; 830 831 list<SubtargetFeature> X86_64V3Features = !listconcat(X86_64V2Features, [ 832 FeatureAVX2, FeatureBMI, FeatureBMI2, FeatureF16C, FeatureFMA, FeatureLZCNT, 833 FeatureMOVBE, FeatureXSAVE 834 ]); 835 list<SubtargetFeature> X86_64V3Tuning = [ 836 TuningMacroFusion, 837 TuningSlow3OpsLEA, 838 TuningSlowDivide64, 839 TuningFastScalarFSQRT, 840 TuningFastSHLDRotate, 841 TuningFast15ByteNOP, 842 TuningFastVariableCrossLaneShuffle, 843 TuningFastVariablePerLaneShuffle, 844 TuningPOPCNTFalseDeps, 845 TuningLZCNTFalseDeps, 846 TuningInsertVZEROUPPER, 847 TuningAllowLight256Bit 848 ]; 849 850 list<SubtargetFeature> X86_64V4Features = !listconcat(X86_64V3Features, [ 851 FeatureEVEX512, 852 FeatureBWI, 853 FeatureCDI, 854 FeatureDQI, 855 FeatureVLX, 856 ]); 857 list<SubtargetFeature> X86_64V4Tuning = [ 858 TuningMacroFusion, 859 TuningSlow3OpsLEA, 860 TuningSlowDivide64, 861 TuningFastScalarFSQRT, 862 TuningFastVectorFSQRT, 863 TuningFastSHLDRotate, 864 TuningFast15ByteNOP, 865 TuningFastVariableCrossLaneShuffle, 866 TuningFastVariablePerLaneShuffle, 867 TuningPrefer256Bit, 868 TuningFastGather, 869 TuningPOPCNTFalseDeps, 870 TuningInsertVZEROUPPER, 871 TuningAllowLight256Bit 872 ]; 873 874 // Nehalem 875 list<SubtargetFeature> NHMFeatures = X86_64V2Features; 876 list<SubtargetFeature> NHMTuning = [TuningMacroFusion, 877 TuningSlowDivide64, 878 TuningInsertVZEROUPPER, 879 TuningNoDomainDelayMov]; 880 881 // Westmere 882 list<SubtargetFeature> WSMAdditionalFeatures = [FeaturePCLMUL]; 883 list<SubtargetFeature> WSMTuning = NHMTuning; 884 list<SubtargetFeature> WSMFeatures = 885 !listconcat(NHMFeatures, WSMAdditionalFeatures); 886 887 // Sandybridge 888 list<SubtargetFeature> SNBAdditionalFeatures = [FeatureAVX, 889 FeatureXSAVE, 890 FeatureXSAVEOPT]; 891 list<SubtargetFeature> SNBTuning = [TuningMacroFusion, 892 TuningSlow3OpsLEA, 893 TuningSlowDivide64, 894 TuningSlowUAMem32, 895 TuningFastScalarFSQRT, 896 TuningFastSHLDRotate, 897 TuningFast15ByteNOP, 898 TuningPOPCNTFalseDeps, 899 TuningInsertVZEROUPPER, 900 TuningNoDomainDelayMov]; 901 list<SubtargetFeature> SNBFeatures = 902 !listconcat(WSMFeatures, SNBAdditionalFeatures); 903 904 // Ivybridge 905 list<SubtargetFeature> IVBAdditionalFeatures = [FeatureRDRAND, 906 FeatureF16C, 907 FeatureFSGSBase]; 908 list<SubtargetFeature> IVBTuning = SNBTuning; 909 list<SubtargetFeature> IVBFeatures = 910 !listconcat(SNBFeatures, IVBAdditionalFeatures); 911 912 // Haswell 913 list<SubtargetFeature> HSWAdditionalFeatures = [FeatureAVX2, 914 FeatureBMI, 915 FeatureBMI2, 916 FeatureERMSB, 917 FeatureFMA, 918 FeatureINVPCID, 919 FeatureLZCNT, 920 FeatureMOVBE]; 921 list<SubtargetFeature> HSWTuning = [TuningMacroFusion, 922 TuningSlow3OpsLEA, 923 TuningSlowDivide64, 924 TuningFastScalarFSQRT, 925 TuningFastSHLDRotate, 926 TuningFast15ByteNOP, 927 TuningFastVariableCrossLaneShuffle, 928 TuningFastVariablePerLaneShuffle, 929 TuningPOPCNTFalseDeps, 930 TuningLZCNTFalseDeps, 931 TuningInsertVZEROUPPER, 932 TuningAllowLight256Bit, 933 TuningNoDomainDelayMov, 934 TuningNoDomainDelayShuffle]; 935 list<SubtargetFeature> HSWFeatures = 936 !listconcat(IVBFeatures, HSWAdditionalFeatures); 937 938 // Broadwell 939 list<SubtargetFeature> BDWAdditionalFeatures = [FeatureADX, 940 FeatureRDSEED, 941 FeaturePRFCHW]; 942 list<SubtargetFeature> BDWTuning = HSWTuning; 943 list<SubtargetFeature> BDWFeatures = 944 !listconcat(HSWFeatures, BDWAdditionalFeatures); 945 946 // Skylake 947 list<SubtargetFeature> SKLAdditionalFeatures = [FeatureAES, 948 FeatureXSAVEC, 949 FeatureXSAVES, 950 FeatureCLFLUSHOPT]; 951 list<SubtargetFeature> SKLTuning = [TuningFastGather, 952 TuningMacroFusion, 953 TuningSlow3OpsLEA, 954 TuningSlowDivide64, 955 TuningFastScalarFSQRT, 956 TuningFastVectorFSQRT, 957 TuningFastSHLDRotate, 958 TuningFast15ByteNOP, 959 TuningFastVariableCrossLaneShuffle, 960 TuningFastVariablePerLaneShuffle, 961 TuningPOPCNTFalseDeps, 962 TuningInsertVZEROUPPER, 963 TuningAllowLight256Bit, 964 TuningNoDomainDelayMov, 965 TuningNoDomainDelayShuffle, 966 TuningNoDomainDelayBlend]; 967 list<SubtargetFeature> SKLFeatures = 968 !listconcat(BDWFeatures, SKLAdditionalFeatures); 969 970 // Skylake-AVX512 971 list<SubtargetFeature> SKXAdditionalFeatures = [FeatureAES, 972 FeatureXSAVEC, 973 FeatureXSAVES, 974 FeatureCLFLUSHOPT, 975 FeatureAVX512, 976 FeatureEVEX512, 977 FeatureCDI, 978 FeatureDQI, 979 FeatureBWI, 980 FeatureVLX, 981 FeaturePKU, 982 FeatureCLWB]; 983 list<SubtargetFeature> SKXTuning = [TuningFastGather, 984 TuningMacroFusion, 985 TuningSlow3OpsLEA, 986 TuningSlowDivide64, 987 TuningFastScalarFSQRT, 988 TuningFastVectorFSQRT, 989 TuningFastSHLDRotate, 990 TuningFast15ByteNOP, 991 TuningFastVariableCrossLaneShuffle, 992 TuningFastVariablePerLaneShuffle, 993 TuningPrefer256Bit, 994 TuningPOPCNTFalseDeps, 995 TuningInsertVZEROUPPER, 996 TuningAllowLight256Bit, 997 TuningPreferShiftShuffle, 998 TuningNoDomainDelayMov, 999 TuningNoDomainDelayShuffle, 1000 TuningNoDomainDelayBlend, 1001 TuningFastImmVectorShift]; 1002 list<SubtargetFeature> SKXFeatures = 1003 !listconcat(BDWFeatures, SKXAdditionalFeatures); 1004 1005 // Cascadelake 1006 list<SubtargetFeature> CLXAdditionalFeatures = [FeatureVNNI]; 1007 list<SubtargetFeature> CLXTuning = SKXTuning; 1008 list<SubtargetFeature> CLXFeatures = 1009 !listconcat(SKXFeatures, CLXAdditionalFeatures); 1010 1011 // Cooperlake 1012 list<SubtargetFeature> CPXAdditionalFeatures = [FeatureBF16]; 1013 list<SubtargetFeature> CPXTuning = SKXTuning; 1014 list<SubtargetFeature> CPXFeatures = 1015 !listconcat(CLXFeatures, CPXAdditionalFeatures); 1016 1017 // Cannonlake 1018 list<SubtargetFeature> CNLAdditionalFeatures = [FeatureAVX512, 1019 FeatureEVEX512, 1020 FeatureCDI, 1021 FeatureDQI, 1022 FeatureBWI, 1023 FeatureVLX, 1024 FeaturePKU, 1025 FeatureVBMI, 1026 FeatureIFMA, 1027 FeatureSHA]; 1028 list<SubtargetFeature> CNLTuning = [TuningFastGather, 1029 TuningMacroFusion, 1030 TuningSlow3OpsLEA, 1031 TuningSlowDivide64, 1032 TuningFastScalarFSQRT, 1033 TuningFastVectorFSQRT, 1034 TuningFastSHLDRotate, 1035 TuningFast15ByteNOP, 1036 TuningFastVariableCrossLaneShuffle, 1037 TuningFastVariablePerLaneShuffle, 1038 TuningPrefer256Bit, 1039 TuningInsertVZEROUPPER, 1040 TuningAllowLight256Bit, 1041 TuningNoDomainDelayMov, 1042 TuningNoDomainDelayShuffle, 1043 TuningNoDomainDelayBlend, 1044 TuningFastImmVectorShift]; 1045 list<SubtargetFeature> CNLFeatures = 1046 !listconcat(SKLFeatures, CNLAdditionalFeatures); 1047 1048 // Icelake 1049 list<SubtargetFeature> ICLAdditionalFeatures = [FeatureBITALG, 1050 FeatureVAES, 1051 FeatureVBMI2, 1052 FeatureVNNI, 1053 FeatureVPCLMULQDQ, 1054 FeatureVPOPCNTDQ, 1055 FeatureGFNI, 1056 FeatureRDPID, 1057 FeatureFSRM]; 1058 list<SubtargetFeature> ICLTuning = [TuningFastGather, 1059 TuningMacroFusion, 1060 TuningSlowDivide64, 1061 TuningFastScalarFSQRT, 1062 TuningFastVectorFSQRT, 1063 TuningFastSHLDRotate, 1064 TuningFast15ByteNOP, 1065 TuningFastVariableCrossLaneShuffle, 1066 TuningFastVariablePerLaneShuffle, 1067 TuningPrefer256Bit, 1068 TuningInsertVZEROUPPER, 1069 TuningAllowLight256Bit, 1070 TuningNoDomainDelayMov, 1071 TuningNoDomainDelayShuffle, 1072 TuningNoDomainDelayBlend, 1073 TuningFastImmVectorShift]; 1074 list<SubtargetFeature> ICLFeatures = 1075 !listconcat(CNLFeatures, ICLAdditionalFeatures); 1076 1077 // Icelake Server 1078 list<SubtargetFeature> ICXAdditionalFeatures = [FeaturePCONFIG, 1079 FeatureCLWB, 1080 FeatureWBNOINVD]; 1081 list<SubtargetFeature> ICXTuning = ICLTuning; 1082 list<SubtargetFeature> ICXFeatures = 1083 !listconcat(ICLFeatures, ICXAdditionalFeatures); 1084 1085 // Tigerlake 1086 list<SubtargetFeature> TGLAdditionalFeatures = [FeatureVP2INTERSECT, 1087 FeatureCLWB, 1088 FeatureMOVDIRI, 1089 FeatureMOVDIR64B, 1090 FeatureSHSTK]; 1091 list<SubtargetFeature> TGLTuning = ICLTuning; 1092 list<SubtargetFeature> TGLFeatures = 1093 !listconcat(ICLFeatures, TGLAdditionalFeatures ); 1094 1095 // Sapphirerapids 1096 list<SubtargetFeature> SPRAdditionalFeatures = [FeatureAMXTILE, 1097 FeatureAMXINT8, 1098 FeatureAMXBF16, 1099 FeatureBF16, 1100 FeatureSERIALIZE, 1101 FeatureCLDEMOTE, 1102 FeatureWAITPKG, 1103 FeaturePTWRITE, 1104 FeatureFP16, 1105 FeatureAVXVNNI, 1106 FeatureTSXLDTRK, 1107 FeatureENQCMD, 1108 FeatureSHSTK, 1109 FeatureMOVDIRI, 1110 FeatureMOVDIR64B, 1111 FeatureUINTR]; 1112 list<SubtargetFeature> SPRAdditionalTuning = [TuningMULCFalseDeps, 1113 TuningPERMFalseDeps, 1114 TuningRANGEFalseDeps, 1115 TuningGETMANTFalseDeps, 1116 TuningMULLQFalseDeps]; 1117 list<SubtargetFeature> SPRTuning = !listconcat(ICXTuning, SPRAdditionalTuning); 1118 list<SubtargetFeature> SPRFeatures = 1119 !listconcat(ICXFeatures, SPRAdditionalFeatures); 1120 1121 // Graniterapids 1122 list<SubtargetFeature> GNRAdditionalFeatures = [FeatureAMXFP16, 1123 FeaturePREFETCHI]; 1124 list<SubtargetFeature> GNRFeatures = 1125 !listconcat(SPRFeatures, GNRAdditionalFeatures); 1126 list<SubtargetFeature> GNRAdditionalTuning = [TuningBranchHint]; 1127 list<SubtargetFeature> GNRTuning = !listconcat(SPRTuning, GNRAdditionalTuning); 1128 1129 // Graniterapids D 1130 list<SubtargetFeature> GNRDAdditionalFeatures = [FeatureAMXCOMPLEX]; 1131 list<SubtargetFeature> GNRDFeatures = 1132 !listconcat(GNRFeatures, GNRDAdditionalFeatures); 1133 1134 // Atom 1135 list<SubtargetFeature> AtomFeatures = [FeatureX87, 1136 FeatureCX8, 1137 FeatureCMOV, 1138 FeatureMMX, 1139 FeatureSSSE3, 1140 FeatureFXSR, 1141 FeatureNOPL, 1142 FeatureX86_64, 1143 FeatureCX16, 1144 FeatureMOVBE, 1145 FeatureLAHFSAHF64]; 1146 list<SubtargetFeature> AtomTuning = [ProcIntelAtom, 1147 TuningSlowUAMem16, 1148 TuningLEAForSP, 1149 TuningSlowDivide32, 1150 TuningSlowDivide64, 1151 TuningSlowTwoMemOps, 1152 TuningFastImm16, 1153 TuningLEAUsesAG, 1154 TuningPadShortFunctions, 1155 TuningInsertVZEROUPPER, 1156 TuningNoDomainDelay]; 1157 1158 // Silvermont 1159 list<SubtargetFeature> SLMAdditionalFeatures = [FeatureSSE42, 1160 FeatureCRC32, 1161 FeaturePOPCNT, 1162 FeaturePCLMUL, 1163 FeaturePRFCHW, 1164 FeatureRDRAND]; 1165 list<SubtargetFeature> SLMTuning = [TuningUseSLMArithCosts, 1166 TuningSlowTwoMemOps, 1167 TuningSlowLEA, 1168 TuningSlowIncDec, 1169 TuningSlowDivide64, 1170 TuningSlowPMULLD, 1171 TuningFast7ByteNOP, 1172 TuningFastMOVBE, 1173 TuningFastImm16, 1174 TuningPOPCNTFalseDeps, 1175 TuningInsertVZEROUPPER, 1176 TuningNoDomainDelay]; 1177 list<SubtargetFeature> SLMFeatures = 1178 !listconcat(AtomFeatures, SLMAdditionalFeatures); 1179 1180 // Goldmont 1181 list<SubtargetFeature> GLMAdditionalFeatures = [FeatureAES, 1182 FeatureSHA, 1183 FeatureRDSEED, 1184 FeatureXSAVE, 1185 FeatureXSAVEOPT, 1186 FeatureXSAVEC, 1187 FeatureXSAVES, 1188 FeatureCLFLUSHOPT, 1189 FeatureFSGSBase]; 1190 list<SubtargetFeature> GLMTuning = [TuningUseGLMDivSqrtCosts, 1191 TuningSlowTwoMemOps, 1192 TuningSlowLEA, 1193 TuningSlowIncDec, 1194 TuningFastMOVBE, 1195 TuningFastImm16, 1196 TuningPOPCNTFalseDeps, 1197 TuningInsertVZEROUPPER, 1198 TuningNoDomainDelay]; 1199 list<SubtargetFeature> GLMFeatures = 1200 !listconcat(SLMFeatures, GLMAdditionalFeatures); 1201 1202 // Goldmont Plus 1203 list<SubtargetFeature> GLPAdditionalFeatures = [FeaturePTWRITE, 1204 FeatureRDPID]; 1205 list<SubtargetFeature> GLPTuning = [TuningUseGLMDivSqrtCosts, 1206 TuningSlowTwoMemOps, 1207 TuningSlowLEA, 1208 TuningSlowIncDec, 1209 TuningFastMOVBE, 1210 TuningFastImm16, 1211 TuningInsertVZEROUPPER, 1212 TuningNoDomainDelay]; 1213 list<SubtargetFeature> GLPFeatures = 1214 !listconcat(GLMFeatures, GLPAdditionalFeatures); 1215 1216 // Tremont 1217 list<SubtargetFeature> TRMAdditionalFeatures = [FeatureCLWB, 1218 FeatureGFNI]; 1219 list<SubtargetFeature> TRMTuning = GLPTuning; 1220 list<SubtargetFeature> TRMFeatures = 1221 !listconcat(GLPFeatures, TRMAdditionalFeatures); 1222 1223 // Alderlake 1224 list<SubtargetFeature> ADLAdditionalFeatures = [FeatureSERIALIZE, 1225 FeaturePCONFIG, 1226 FeatureSHSTK, 1227 FeatureWIDEKL, 1228 FeatureINVPCID, 1229 FeatureADX, 1230 FeatureFMA, 1231 FeatureVAES, 1232 FeatureVPCLMULQDQ, 1233 FeatureF16C, 1234 FeatureBMI, 1235 FeatureBMI2, 1236 FeatureLZCNT, 1237 FeatureAVXVNNI, 1238 FeaturePKU, 1239 FeatureHRESET, 1240 FeatureCLDEMOTE, 1241 FeatureMOVDIRI, 1242 FeatureMOVDIR64B, 1243 FeatureWAITPKG]; 1244 list<SubtargetFeature> ADLAdditionalTuning = [TuningPERMFalseDeps, 1245 TuningPreferMovmskOverVTest, 1246 TuningFastImmVectorShift]; 1247 list<SubtargetFeature> ADLTuning = !listconcat(SKLTuning, ADLAdditionalTuning); 1248 list<SubtargetFeature> ADLFeatures = 1249 !listconcat(TRMFeatures, ADLAdditionalFeatures); 1250 1251 // Gracemont 1252 list<SubtargetFeature> GRTTuning = [TuningMacroFusion, 1253 TuningSlow3OpsLEA, 1254 TuningFastScalarFSQRT, 1255 TuningFastVectorFSQRT, 1256 TuningFast15ByteNOP, 1257 TuningFastVariablePerLaneShuffle, 1258 TuningPOPCNTFalseDeps, 1259 TuningInsertVZEROUPPER]; 1260 1261 // Sierraforest 1262 list<SubtargetFeature> SRFAdditionalFeatures = [FeatureCMPCCXADD, 1263 FeatureAVXIFMA, 1264 FeatureAVXNECONVERT, 1265 FeatureENQCMD, 1266 FeatureUINTR, 1267 FeatureAVXVNNIINT8]; 1268 list<SubtargetFeature> SRFFeatures = 1269 !listconcat(ADLFeatures, SRFAdditionalFeatures); 1270 1271 // Arrowlake S 1272 list<SubtargetFeature> ARLSAdditionalFeatures = [FeatureAVXVNNIINT16, 1273 FeatureSHA512, 1274 FeatureSM3, 1275 FeatureSM4]; 1276 list<SubtargetFeature> ARLSFeatures = 1277 !listconcat(SRFFeatures, ARLSAdditionalFeatures); 1278 1279 // Pantherlake 1280 list<SubtargetFeature> PTLAdditionalFeatures = [FeaturePREFETCHI]; 1281 list<SubtargetFeature> PTLFeatures = 1282 !listconcat(ARLSFeatures, PTLAdditionalFeatures); 1283 1284 1285 // Clearwaterforest 1286 list<SubtargetFeature> CWFAdditionalFeatures = [FeaturePREFETCHI, 1287 FeatureUSERMSR]; 1288 list<SubtargetFeature> CWFFeatures = 1289 !listconcat(ARLSFeatures, CWFAdditionalFeatures); 1290 1291 // Knights Landing 1292 list<SubtargetFeature> KNLFeatures = [FeatureX87, 1293 FeatureCX8, 1294 FeatureCMOV, 1295 FeatureMMX, 1296 FeatureFXSR, 1297 FeatureNOPL, 1298 FeatureX86_64, 1299 FeatureCX16, 1300 FeatureCRC32, 1301 FeaturePOPCNT, 1302 FeaturePCLMUL, 1303 FeatureXSAVE, 1304 FeatureXSAVEOPT, 1305 FeatureLAHFSAHF64, 1306 FeatureAES, 1307 FeatureRDRAND, 1308 FeatureF16C, 1309 FeatureFSGSBase, 1310 FeatureAVX512, 1311 FeatureEVEX512, 1312 FeatureCDI, 1313 FeatureADX, 1314 FeatureRDSEED, 1315 FeatureMOVBE, 1316 FeatureLZCNT, 1317 FeatureBMI, 1318 FeatureBMI2, 1319 FeatureFMA, 1320 FeaturePRFCHW]; 1321 list<SubtargetFeature> KNLTuning = [TuningSlowDivide64, 1322 TuningSlow3OpsLEA, 1323 TuningSlowIncDec, 1324 TuningSlowTwoMemOps, 1325 TuningPreferMaskRegisters, 1326 TuningFastGather, 1327 TuningFastMOVBE, 1328 TuningFastImm16, 1329 TuningSlowPMADDWD]; 1330 // TODO Add AVX5124FMAPS/AVX5124VNNIW features 1331 list<SubtargetFeature> KNMFeatures = 1332 !listconcat(KNLFeatures, [FeatureVPOPCNTDQ]); 1333 1334 // Barcelona 1335 list<SubtargetFeature> BarcelonaFeatures = [FeatureX87, 1336 FeatureCX8, 1337 FeatureSSE4A, 1338 FeatureFXSR, 1339 FeatureNOPL, 1340 FeatureCX16, 1341 FeaturePRFCHW, 1342 FeatureLZCNT, 1343 FeaturePOPCNT, 1344 FeatureLAHFSAHF64, 1345 FeatureCMOV, 1346 FeatureX86_64]; 1347 list<SubtargetFeature> BarcelonaTuning = [TuningFastScalarShiftMasks, 1348 TuningSlowDivide64, 1349 TuningSlowSHLD, 1350 TuningSBBDepBreaking, 1351 TuningInsertVZEROUPPER]; 1352 1353 // Bobcat 1354 list<SubtargetFeature> BtVer1Features = [FeatureX87, 1355 FeatureCX8, 1356 FeatureCMOV, 1357 FeatureMMX, 1358 FeatureSSSE3, 1359 FeatureSSE4A, 1360 FeatureFXSR, 1361 FeatureNOPL, 1362 FeatureX86_64, 1363 FeatureCX16, 1364 FeaturePRFCHW, 1365 FeatureLZCNT, 1366 FeaturePOPCNT, 1367 FeatureLAHFSAHF64]; 1368 list<SubtargetFeature> BtVer1Tuning = [TuningFast15ByteNOP, 1369 TuningFastScalarShiftMasks, 1370 TuningFastVectorShiftMasks, 1371 TuningSlowDivide64, 1372 TuningSlowSHLD, 1373 TuningFastImm16, 1374 TuningSBBDepBreaking, 1375 TuningInsertVZEROUPPER]; 1376 1377 // Jaguar 1378 list<SubtargetFeature> BtVer2AdditionalFeatures = [FeatureAVX, 1379 FeatureAES, 1380 FeatureCRC32, 1381 FeaturePCLMUL, 1382 FeatureBMI, 1383 FeatureF16C, 1384 FeatureMOVBE, 1385 FeatureXSAVE, 1386 FeatureXSAVEOPT]; 1387 list<SubtargetFeature> BtVer2Tuning = [TuningFastLZCNT, 1388 TuningFastBEXTR, 1389 TuningFastHorizontalOps, 1390 TuningFast15ByteNOP, 1391 TuningFastScalarShiftMasks, 1392 TuningFastVectorShiftMasks, 1393 TuningFastMOVBE, 1394 TuningFastImm16, 1395 TuningSBBDepBreaking, 1396 TuningSlowDivide64, 1397 TuningSlowSHLD]; 1398 list<SubtargetFeature> BtVer2Features = 1399 !listconcat(BtVer1Features, BtVer2AdditionalFeatures); 1400 1401 // Bulldozer 1402 list<SubtargetFeature> BdVer1Features = [FeatureX87, 1403 FeatureCX8, 1404 FeatureCMOV, 1405 FeatureXOP, 1406 FeatureX86_64, 1407 FeatureCX16, 1408 FeatureAES, 1409 FeatureCRC32, 1410 FeaturePRFCHW, 1411 FeaturePCLMUL, 1412 FeatureMMX, 1413 FeatureFXSR, 1414 FeatureNOPL, 1415 FeatureLZCNT, 1416 FeaturePOPCNT, 1417 FeatureXSAVE, 1418 FeatureLWP, 1419 FeatureLAHFSAHF64]; 1420 list<SubtargetFeature> BdVer1Tuning = [TuningSlowSHLD, 1421 TuningSlowDivide64, 1422 TuningFast11ByteNOP, 1423 TuningFastScalarShiftMasks, 1424 TuningBranchFusion, 1425 TuningSBBDepBreaking, 1426 TuningInsertVZEROUPPER]; 1427 1428 // PileDriver 1429 list<SubtargetFeature> BdVer2AdditionalFeatures = [FeatureF16C, 1430 FeatureBMI, 1431 FeatureTBM, 1432 FeatureFMA]; 1433 list<SubtargetFeature> BdVer2AdditionalTuning = [TuningFastBEXTR, 1434 TuningFastMOVBE]; 1435 list<SubtargetFeature> BdVer2Tuning = 1436 !listconcat(BdVer1Tuning, BdVer2AdditionalTuning); 1437 list<SubtargetFeature> BdVer2Features = 1438 !listconcat(BdVer1Features, BdVer2AdditionalFeatures); 1439 1440 // Steamroller 1441 list<SubtargetFeature> BdVer3AdditionalFeatures = [FeatureXSAVEOPT, 1442 FeatureFSGSBase]; 1443 list<SubtargetFeature> BdVer3Tuning = BdVer2Tuning; 1444 list<SubtargetFeature> BdVer3Features = 1445 !listconcat(BdVer2Features, BdVer3AdditionalFeatures); 1446 1447 // Excavator 1448 list<SubtargetFeature> BdVer4AdditionalFeatures = [FeatureAVX2, 1449 FeatureBMI2, 1450 FeatureMOVBE, 1451 FeatureRDRAND, 1452 FeatureMWAITX]; 1453 list<SubtargetFeature> BdVer4Tuning = BdVer3Tuning; 1454 list<SubtargetFeature> BdVer4Features = 1455 !listconcat(BdVer3Features, BdVer4AdditionalFeatures); 1456 1457 1458 // AMD Zen Processors common ISAs 1459 list<SubtargetFeature> ZNFeatures = [FeatureADX, 1460 FeatureAES, 1461 FeatureAVX2, 1462 FeatureBMI, 1463 FeatureBMI2, 1464 FeatureCLFLUSHOPT, 1465 FeatureCLZERO, 1466 FeatureCMOV, 1467 FeatureX86_64, 1468 FeatureCX16, 1469 FeatureCRC32, 1470 FeatureF16C, 1471 FeatureFMA, 1472 FeatureFSGSBase, 1473 FeatureFXSR, 1474 FeatureNOPL, 1475 FeatureLAHFSAHF64, 1476 FeatureLZCNT, 1477 FeatureMMX, 1478 FeatureMOVBE, 1479 FeatureMWAITX, 1480 FeaturePCLMUL, 1481 FeaturePOPCNT, 1482 FeaturePRFCHW, 1483 FeatureRDRAND, 1484 FeatureRDSEED, 1485 FeatureSHA, 1486 FeatureSSE4A, 1487 FeatureX87, 1488 FeatureXSAVE, 1489 FeatureXSAVEC, 1490 FeatureXSAVEOPT, 1491 FeatureXSAVES]; 1492 list<SubtargetFeature> ZNTuning = [TuningFastLZCNT, 1493 TuningFastBEXTR, 1494 TuningFast15ByteNOP, 1495 TuningBranchFusion, 1496 TuningFastScalarFSQRT, 1497 TuningFastVectorFSQRT, 1498 TuningFastScalarShiftMasks, 1499 TuningFastVariablePerLaneShuffle, 1500 TuningFastMOVBE, 1501 TuningFastImm16, 1502 TuningSlowDivide64, 1503 TuningSlowSHLD, 1504 TuningSBBDepBreaking, 1505 TuningInsertVZEROUPPER, 1506 TuningAllowLight256Bit]; 1507 list<SubtargetFeature> ZN2AdditionalFeatures = [FeatureCLWB, 1508 FeatureRDPID, 1509 FeatureRDPRU, 1510 FeatureWBNOINVD]; 1511 list<SubtargetFeature> ZN2Tuning = ZNTuning; 1512 list<SubtargetFeature> ZN2Features = 1513 !listconcat(ZNFeatures, ZN2AdditionalFeatures); 1514 list<SubtargetFeature> ZN3AdditionalFeatures = [FeatureFSRM, 1515 FeatureINVPCID, 1516 FeaturePKU, 1517 FeatureVAES, 1518 FeatureVPCLMULQDQ]; 1519 list<SubtargetFeature> ZN3AdditionalTuning = [TuningMacroFusion]; 1520 list<SubtargetFeature> ZN3Tuning = 1521 !listconcat(ZN2Tuning, ZN3AdditionalTuning); 1522 list<SubtargetFeature> ZN3Features = 1523 !listconcat(ZN2Features, ZN3AdditionalFeatures); 1524 1525 1526 list<SubtargetFeature> ZN4AdditionalTuning = [TuningFastDPWSSD]; 1527 list<SubtargetFeature> ZN4Tuning = 1528 !listconcat(ZN3Tuning, ZN4AdditionalTuning); 1529 list<SubtargetFeature> ZN4AdditionalFeatures = [FeatureAVX512, 1530 FeatureEVEX512, 1531 FeatureCDI, 1532 FeatureDQI, 1533 FeatureBWI, 1534 FeatureVLX, 1535 FeatureVBMI, 1536 FeatureVBMI2, 1537 FeatureIFMA, 1538 FeatureVNNI, 1539 FeatureBITALG, 1540 FeatureGFNI, 1541 FeatureBF16, 1542 FeatureSHSTK, 1543 FeatureVPOPCNTDQ]; 1544 list<SubtargetFeature> ZN4Features = 1545 !listconcat(ZN3Features, ZN4AdditionalFeatures); 1546 1547 1548 list<SubtargetFeature> ZN5Tuning = ZN4Tuning; 1549 list<SubtargetFeature> ZN5AdditionalFeatures = [FeatureVNNI, 1550 FeatureMOVDIRI, 1551 FeatureMOVDIR64B, 1552 FeatureVP2INTERSECT, 1553 FeaturePREFETCHI, 1554 FeatureAVXVNNI 1555 ]; 1556 list<SubtargetFeature> ZN5Features = 1557 !listconcat(ZN4Features, ZN5AdditionalFeatures); 1558 1559} 1560 1561//===----------------------------------------------------------------------===// 1562// X86 processors supported. 1563//===----------------------------------------------------------------------===// 1564 1565class Proc<string Name, list<SubtargetFeature> Features, 1566 list<SubtargetFeature> TuneFeatures> 1567 : ProcessorModel<Name, GenericModel, Features, TuneFeatures>; 1568 1569class ProcModel<string Name, SchedMachineModel Model, 1570 list<SubtargetFeature> Features, 1571 list<SubtargetFeature> TuneFeatures> 1572 : ProcessorModel<Name, Model, Features, TuneFeatures>; 1573 1574// NOTE: CMPXCHG8B is here for legacy compatibility so that it is only disabled 1575// if i386/i486 is specifically requested. 1576// NOTE: 64Bit is here as "generic" is the default llc CPU. The X86Subtarget 1577// constructor checks that any CPU used in 64-bit mode has FeatureX86_64 1578// enabled. It has no effect on code generation. 1579// NOTE: As a default tuning, "generic" aims to produce code optimized for the 1580// most common X86 processors. The tunings might be changed over time. It is 1581// recommended to use "tune-cpu"="x86-64" in function attribute for consistency. 1582def : ProcModel<"generic", SandyBridgeModel, 1583 [FeatureX87, FeatureCX8, FeatureX86_64], 1584 [TuningSlow3OpsLEA, 1585 TuningSlowDivide64, 1586 TuningMacroFusion, 1587 TuningFastScalarFSQRT, 1588 TuningFast15ByteNOP, 1589 TuningInsertVZEROUPPER]>; 1590 1591def : Proc<"i386", [FeatureX87], 1592 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1593def : Proc<"i486", [FeatureX87], 1594 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1595def : Proc<"i586", [FeatureX87, FeatureCX8], 1596 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1597def : Proc<"pentium", [FeatureX87, FeatureCX8], 1598 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1599foreach P = ["pentium-mmx", "pentium_mmx"] in { 1600 def : Proc<P, [FeatureX87, FeatureCX8, FeatureMMX], 1601 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1602} 1603def : Proc<"i686", [FeatureX87, FeatureCX8, FeatureCMOV], 1604 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1605foreach P = ["pentiumpro", "pentium_pro"] in { 1606 def : Proc<P, [FeatureX87, FeatureCX8, FeatureCMOV, FeatureNOPL], 1607 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1608} 1609foreach P = ["pentium2", "pentium_ii"] in { 1610 def : Proc<P, [FeatureX87, FeatureCX8, FeatureMMX, FeatureCMOV, 1611 FeatureFXSR, FeatureNOPL], 1612 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1613} 1614foreach P = ["pentium3", "pentium3m", "pentium_iii_no_xmm_regs", "pentium_iii"] in { 1615 def : Proc<P, [FeatureX87, FeatureCX8, FeatureMMX, 1616 FeatureSSE1, FeatureFXSR, FeatureNOPL, FeatureCMOV], 1617 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1618} 1619 1620// Enable the PostRAScheduler for SSE2 and SSE3 class cpus. 1621// The intent is to enable it for pentium4 which is the current default 1622// processor in a vanilla 32-bit clang compilation when no specific 1623// architecture is specified. This generally gives a nice performance 1624// increase on silvermont, with largely neutral behavior on other 1625// contemporary large core processors. 1626// pentium-m, pentium4m, prescott and nocona are included as a preventative 1627// measure to avoid performance surprises, in case clang's default cpu 1628// changes slightly. 1629 1630foreach P = ["pentium_m", "pentium-m"] in { 1631def : ProcModel<P, GenericPostRAModel, 1632 [FeatureX87, FeatureCX8, FeatureMMX, FeatureSSE2, 1633 FeatureFXSR, FeatureNOPL, FeatureCMOV], 1634 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1635} 1636 1637foreach P = ["pentium4", "pentium4m", "pentium_4"] in { 1638 def : ProcModel<P, GenericPostRAModel, 1639 [FeatureX87, FeatureCX8, FeatureMMX, FeatureSSE2, 1640 FeatureFXSR, FeatureNOPL, FeatureCMOV], 1641 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1642} 1643 1644// Intel Quark. 1645def : Proc<"lakemont", [FeatureCX8], 1646 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1647 1648// Intel Core Duo. 1649def : ProcModel<"yonah", SandyBridgeModel, 1650 [FeatureX87, FeatureCX8, FeatureMMX, FeatureSSE3, 1651 FeatureFXSR, FeatureNOPL, FeatureCMOV], 1652 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1653 1654// NetBurst. 1655foreach P = ["prescott", "pentium_4_sse3"] in { 1656 def : ProcModel<P, GenericPostRAModel, 1657 [FeatureX87, FeatureCX8, FeatureMMX, FeatureSSE3, 1658 FeatureFXSR, FeatureNOPL, FeatureCMOV], 1659 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1660} 1661def : ProcModel<"nocona", GenericPostRAModel, [ 1662 FeatureX87, 1663 FeatureCX8, 1664 FeatureCMOV, 1665 FeatureMMX, 1666 FeatureSSE3, 1667 FeatureFXSR, 1668 FeatureNOPL, 1669 FeatureX86_64, 1670 FeatureCX16, 1671], 1672[ 1673 TuningSlowUAMem16, 1674 TuningInsertVZEROUPPER 1675]>; 1676 1677// Intel Core 2 Solo/Duo. 1678foreach P = ["core2", "core_2_duo_ssse3"] in { 1679def : ProcModel<P, SandyBridgeModel, [ 1680 FeatureX87, 1681 FeatureCX8, 1682 FeatureCMOV, 1683 FeatureMMX, 1684 FeatureSSSE3, 1685 FeatureFXSR, 1686 FeatureNOPL, 1687 FeatureX86_64, 1688 FeatureCX16, 1689 FeatureLAHFSAHF64 1690], 1691[ 1692 TuningMacroFusion, 1693 TuningSlowUAMem16, 1694 TuningInsertVZEROUPPER 1695]>; 1696} 1697foreach P = ["penryn", "core_2_duo_sse4_1"] in { 1698def : ProcModel<P, SandyBridgeModel, [ 1699 FeatureX87, 1700 FeatureCX8, 1701 FeatureCMOV, 1702 FeatureMMX, 1703 FeatureSSE41, 1704 FeatureFXSR, 1705 FeatureNOPL, 1706 FeatureX86_64, 1707 FeatureCX16, 1708 FeatureLAHFSAHF64 1709], 1710[ 1711 TuningMacroFusion, 1712 TuningSlowUAMem16, 1713 TuningInsertVZEROUPPER 1714]>; 1715} 1716 1717// Atom CPUs. 1718foreach P = ["bonnell", "atom"] in { 1719 def : ProcModel<P, AtomModel, ProcessorFeatures.AtomFeatures, 1720 ProcessorFeatures.AtomTuning>; 1721} 1722 1723foreach P = ["silvermont", "slm", "atom_sse4_2"] in { 1724 def : ProcModel<P, SLMModel, ProcessorFeatures.SLMFeatures, 1725 ProcessorFeatures.SLMTuning>; 1726} 1727 1728def : ProcModel<"atom_sse4_2_movbe", SLMModel, ProcessorFeatures.GLMFeatures, 1729 ProcessorFeatures.SLMTuning>; 1730def : ProcModel<"goldmont", SLMModel, ProcessorFeatures.GLMFeatures, 1731 ProcessorFeatures.GLMTuning>; 1732foreach P = ["goldmont_plus", "goldmont-plus"] in { 1733 def : ProcModel<P, SLMModel, ProcessorFeatures.GLPFeatures, 1734 ProcessorFeatures.GLPTuning>; 1735} 1736def : ProcModel<"tremont", SLMModel, ProcessorFeatures.TRMFeatures, 1737 ProcessorFeatures.TRMTuning>; 1738 1739// "Arrandale" along with corei3 and corei5 1740foreach P = ["nehalem", "corei7", "core_i7_sse4_2"] in { 1741 def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.NHMFeatures, 1742 ProcessorFeatures.NHMTuning>; 1743} 1744 1745// Westmere is the corei3/i5/i7 path from nehalem to sandybridge 1746foreach P = ["westmere", "core_aes_pclmulqdq"] in { 1747 def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.WSMFeatures, 1748 ProcessorFeatures.WSMTuning>; 1749} 1750 1751foreach P = ["sandybridge", "corei7-avx", "core_2nd_gen_avx"] in { 1752 def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.SNBFeatures, 1753 ProcessorFeatures.SNBTuning>; 1754} 1755 1756foreach P = ["ivybridge", "core-avx-i", "core_3rd_gen_avx"] in { 1757 def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.IVBFeatures, 1758 ProcessorFeatures.IVBTuning>; 1759} 1760 1761foreach P = ["haswell", "core-avx2", "core_4th_gen_avx", "core_4th_gen_avx_tsx"] in { 1762 def : ProcModel<P, HaswellModel, ProcessorFeatures.HSWFeatures, 1763 ProcessorFeatures.HSWTuning>; 1764} 1765 1766foreach P = ["broadwell", "core_5th_gen_avx", "core_5th_gen_avx_tsx"] in { 1767 def : ProcModel<P, BroadwellModel, ProcessorFeatures.BDWFeatures, 1768 ProcessorFeatures.BDWTuning>; 1769} 1770 1771def : ProcModel<"skylake", SkylakeClientModel, ProcessorFeatures.SKLFeatures, 1772 ProcessorFeatures.SKLTuning>; 1773 1774// FIXME: define KNL scheduler model 1775foreach P = ["knl", "mic_avx512"] in { 1776 def : ProcModel<P, HaswellModel, ProcessorFeatures.KNLFeatures, 1777 ProcessorFeatures.KNLTuning>; 1778} 1779def : ProcModel<"knm", HaswellModel, ProcessorFeatures.KNMFeatures, 1780 ProcessorFeatures.KNLTuning>; 1781 1782foreach P = ["skylake-avx512", "skx", "skylake_avx512"] in { 1783 def : ProcModel<P, SkylakeServerModel, ProcessorFeatures.SKXFeatures, 1784 ProcessorFeatures.SKXTuning>; 1785} 1786 1787def : ProcModel<"cascadelake", SkylakeServerModel, 1788 ProcessorFeatures.CLXFeatures, ProcessorFeatures.CLXTuning>; 1789def : ProcModel<"cooperlake", SkylakeServerModel, 1790 ProcessorFeatures.CPXFeatures, ProcessorFeatures.CPXTuning>; 1791def : ProcModel<"cannonlake", SkylakeServerModel, 1792 ProcessorFeatures.CNLFeatures, ProcessorFeatures.CNLTuning>; 1793foreach P = ["icelake-client", "icelake_client"] in { 1794def : ProcModel<P, IceLakeModel, 1795 ProcessorFeatures.ICLFeatures, ProcessorFeatures.ICLTuning>; 1796} 1797def : ProcModel<"rocketlake", IceLakeModel, 1798 ProcessorFeatures.ICLFeatures, ProcessorFeatures.ICLTuning>; 1799foreach P = ["icelake-server", "icelake_server"] in { 1800def : ProcModel<P, IceLakeModel, 1801 ProcessorFeatures.ICXFeatures, ProcessorFeatures.ICXTuning>; 1802} 1803def : ProcModel<"tigerlake", IceLakeModel, 1804 ProcessorFeatures.TGLFeatures, ProcessorFeatures.TGLTuning>; 1805def : ProcModel<"sapphirerapids", SapphireRapidsModel, 1806 ProcessorFeatures.SPRFeatures, ProcessorFeatures.SPRTuning>; 1807def : ProcModel<"alderlake", AlderlakePModel, 1808 ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>; 1809// FIXME: Use Gracemont Schedule Model when it is ready. 1810def : ProcModel<"gracemont", AlderlakePModel, 1811 ProcessorFeatures.ADLFeatures, ProcessorFeatures.GRTTuning>; 1812foreach P = ["sierraforest", "grandridge"] in { 1813 def : ProcModel<P, AlderlakePModel, ProcessorFeatures.SRFFeatures, 1814 ProcessorFeatures.GRTTuning>; 1815} 1816def : ProcModel<"raptorlake", AlderlakePModel, 1817 ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>; 1818def : ProcModel<"meteorlake", AlderlakePModel, 1819 ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>; 1820def : ProcModel<"arrowlake", AlderlakePModel, 1821 ProcessorFeatures.SRFFeatures, ProcessorFeatures.ADLTuning>; 1822foreach P = ["arrowlake-s", "arrowlake_s", "lunarlake"] in { 1823def : ProcModel<P, AlderlakePModel, 1824 ProcessorFeatures.ARLSFeatures, ProcessorFeatures.ADLTuning>; 1825} 1826def : ProcModel<"pantherlake", AlderlakePModel, 1827 ProcessorFeatures.PTLFeatures, ProcessorFeatures.ADLTuning>; 1828def : ProcModel<"clearwaterforest", AlderlakePModel, 1829 ProcessorFeatures.CWFFeatures, ProcessorFeatures.ADLTuning>; 1830def : ProcModel<"emeraldrapids", SapphireRapidsModel, 1831 ProcessorFeatures.SPRFeatures, ProcessorFeatures.SPRTuning>; 1832def : ProcModel<"graniterapids", SapphireRapidsModel, 1833 ProcessorFeatures.GNRFeatures, ProcessorFeatures.GNRTuning>; 1834foreach P = ["graniterapids-d", "graniterapids_d"] in { 1835def : ProcModel<P, SapphireRapidsModel, 1836 ProcessorFeatures.GNRDFeatures, ProcessorFeatures.GNRTuning>; 1837} 1838 1839// AMD CPUs. 1840 1841def : Proc<"k6", [FeatureX87, FeatureCX8, FeatureMMX], 1842 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1843def : Proc<"k6-2", [FeatureX87, FeatureCX8, FeatureMMX, FeaturePRFCHW], 1844 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1845def : Proc<"k6-3", [FeatureX87, FeatureCX8, FeatureMMX, FeaturePRFCHW], 1846 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1847 1848foreach P = ["athlon", "athlon-tbird"] in { 1849 def : Proc<P, [FeatureX87, FeatureCX8, FeatureCMOV, FeatureMMX, FeaturePRFCHW, 1850 FeatureNOPL], 1851 [TuningSlowSHLD, TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1852} 1853 1854foreach P = ["athlon-4", "athlon-xp", "athlon-mp"] in { 1855 def : Proc<P, [FeatureX87, FeatureCX8, FeatureCMOV, 1856 FeatureSSE1, FeatureMMX, FeaturePRFCHW, FeatureFXSR, FeatureNOPL], 1857 [TuningSlowSHLD, TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1858} 1859 1860foreach P = ["k8", "opteron", "athlon64", "athlon-fx"] in { 1861 def : Proc<P, [FeatureX87, FeatureCX8, FeatureSSE2, FeatureMMX, FeaturePRFCHW, 1862 FeatureFXSR, FeatureNOPL, FeatureX86_64, FeatureCMOV], 1863 [TuningFastScalarShiftMasks, TuningSlowSHLD, TuningSlowUAMem16, 1864 TuningSBBDepBreaking, TuningInsertVZEROUPPER]>; 1865} 1866 1867foreach P = ["k8-sse3", "opteron-sse3", "athlon64-sse3"] in { 1868 def : Proc<P, [FeatureX87, FeatureCX8, FeatureSSE3, FeatureMMX, FeaturePRFCHW, 1869 FeatureFXSR, FeatureNOPL, FeatureCX16, FeatureCMOV, 1870 FeatureX86_64], 1871 [TuningFastScalarShiftMasks, TuningSlowSHLD, TuningSlowUAMem16, 1872 TuningSBBDepBreaking, TuningInsertVZEROUPPER]>; 1873} 1874 1875foreach P = ["amdfam10", "barcelona"] in { 1876 def : Proc<P, ProcessorFeatures.BarcelonaFeatures, 1877 ProcessorFeatures.BarcelonaTuning>; 1878} 1879 1880// Bobcat 1881def : Proc<"btver1", ProcessorFeatures.BtVer1Features, 1882 ProcessorFeatures.BtVer1Tuning>; 1883// Jaguar 1884def : ProcModel<"btver2", BtVer2Model, ProcessorFeatures.BtVer2Features, 1885 ProcessorFeatures.BtVer2Tuning>; 1886 1887// Bulldozer 1888def : ProcModel<"bdver1", BdVer2Model, ProcessorFeatures.BdVer1Features, 1889 ProcessorFeatures.BdVer1Tuning>; 1890// Piledriver 1891def : ProcModel<"bdver2", BdVer2Model, ProcessorFeatures.BdVer2Features, 1892 ProcessorFeatures.BdVer2Tuning>; 1893// Steamroller 1894def : Proc<"bdver3", ProcessorFeatures.BdVer3Features, 1895 ProcessorFeatures.BdVer3Tuning>; 1896// Excavator 1897def : Proc<"bdver4", ProcessorFeatures.BdVer4Features, 1898 ProcessorFeatures.BdVer4Tuning>; 1899 1900def : ProcModel<"znver1", Znver1Model, ProcessorFeatures.ZNFeatures, 1901 ProcessorFeatures.ZNTuning>; 1902def : ProcModel<"znver2", Znver2Model, ProcessorFeatures.ZN2Features, 1903 ProcessorFeatures.ZN2Tuning>; 1904def : ProcModel<"znver3", Znver3Model, ProcessorFeatures.ZN3Features, 1905 ProcessorFeatures.ZN3Tuning>; 1906def : ProcModel<"znver4", Znver4Model, ProcessorFeatures.ZN4Features, 1907 ProcessorFeatures.ZN4Tuning>; 1908def : ProcModel<"znver5", Znver4Model, ProcessorFeatures.ZN5Features, 1909 ProcessorFeatures.ZN5Tuning>; 1910 1911def : Proc<"geode", [FeatureX87, FeatureCX8, FeatureMMX, FeaturePRFCHW], 1912 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1913 1914def : Proc<"winchip-c6", [FeatureX87, FeatureMMX], 1915 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1916def : Proc<"winchip2", [FeatureX87, FeatureMMX, FeaturePRFCHW], 1917 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1918def : Proc<"c3", [FeatureX87, FeatureMMX, FeaturePRFCHW], 1919 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1920def : Proc<"c3-2", [FeatureX87, FeatureCX8, FeatureMMX, 1921 FeatureSSE1, FeatureFXSR, FeatureCMOV], 1922 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1923 1924// We also provide a generic 64-bit specific x86 processor model which tries to 1925// be good for modern chips without enabling instruction set encodings past the 1926// basic SSE2 and 64-bit ones. It disables slow things from any mainstream and 1927// modern 64-bit x86 chip, and enables features that are generally beneficial. 1928// 1929// We currently use the Sandy Bridge model as the default scheduling model as 1930// we use it across Nehalem, Westmere, Sandy Bridge, and Ivy Bridge which 1931// covers a huge swath of x86 processors. If there are specific scheduling 1932// knobs which need to be tuned differently for AMD chips, we might consider 1933// forming a common base for them. 1934def : ProcModel<"x86-64", SandyBridgeModel, ProcessorFeatures.X86_64V1Features, 1935 ProcessorFeatures.X86_64V1Tuning>; 1936// Close to Sandybridge. 1937def : ProcModel<"x86-64-v2", SandyBridgeModel, ProcessorFeatures.X86_64V2Features, 1938 ProcessorFeatures.X86_64V2Tuning>; 1939// Close to Haswell. 1940def : ProcModel<"x86-64-v3", HaswellModel, ProcessorFeatures.X86_64V3Features, 1941 ProcessorFeatures.X86_64V3Tuning>; 1942// Close to the AVX-512 level implemented by Xeon Scalable Processors. 1943def : ProcModel<"x86-64-v4", SkylakeServerModel, ProcessorFeatures.X86_64V4Features, 1944 ProcessorFeatures.X86_64V4Tuning>; 1945 1946//===----------------------------------------------------------------------===// 1947// Calling Conventions 1948//===----------------------------------------------------------------------===// 1949 1950include "X86CallingConv.td" 1951 1952 1953//===----------------------------------------------------------------------===// 1954// Assembly Parser 1955//===----------------------------------------------------------------------===// 1956 1957def ATTAsmParserVariant : AsmParserVariant { 1958 int Variant = 0; 1959 1960 // Variant name. 1961 string Name = "att"; 1962 1963 // Discard comments in assembly strings. 1964 string CommentDelimiter = "#"; 1965 1966 // Recognize hard coded registers. 1967 string RegisterPrefix = "%"; 1968} 1969 1970def IntelAsmParserVariant : AsmParserVariant { 1971 int Variant = 1; 1972 1973 // Variant name. 1974 string Name = "intel"; 1975 1976 // Discard comments in assembly strings. 1977 string CommentDelimiter = ";"; 1978 1979 // Recognize hard coded registers. 1980 string RegisterPrefix = ""; 1981} 1982 1983//===----------------------------------------------------------------------===// 1984// Assembly Printers 1985//===----------------------------------------------------------------------===// 1986 1987// The X86 target supports two different syntaxes for emitting machine code. 1988// This is controlled by the -x86-asm-syntax={att|intel} 1989def ATTAsmWriter : AsmWriter { 1990 string AsmWriterClassName = "ATTInstPrinter"; 1991 int Variant = 0; 1992} 1993def IntelAsmWriter : AsmWriter { 1994 string AsmWriterClassName = "IntelInstPrinter"; 1995 int Variant = 1; 1996} 1997 1998def X86 : Target { 1999 // Information about the instructions... 2000 let InstructionSet = X86InstrInfo; 2001 let AssemblyParserVariants = [ATTAsmParserVariant, IntelAsmParserVariant]; 2002 let AssemblyWriters = [ATTAsmWriter, IntelAsmWriter]; 2003 let AllowRegisterRenaming = 1; 2004} 2005 2006//===----------------------------------------------------------------------===// 2007// Pfm Counters 2008//===----------------------------------------------------------------------===// 2009 2010include "X86PfmCounters.td" 2011