Lines Matching +full:bypass +full:- +full:enable
1 //===-- X86.td - Target definition file for the Intel X86 --*- tablegen -*-===//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
12 //===----------------------------------------------------------------------===//
14 // Get the target-independent interfaces which we are implementing...
18 //===----------------------------------------------------------------------===//
22 def Is64Bit : SubtargetFeature<"64bit-mode", "Is64Bit", "true",
23 "64-bit mode (x86_64)">;
24 def Is32Bit : SubtargetFeature<"32bit-mode", "Is32Bit", "true",
25 "32-bit mode (80386)">;
26 def Is16Bit : SubtargetFeature<"16bit-mode", "Is16Bit", "true",
27 "16-bit mode (i8086)">;
29 //===----------------------------------------------------------------------===//
31 //===----------------------------------------------------------------------===//
34 "Enable X87 float instructions">;
37 "Enable NOPL instruction (generally pentium pro+)">;
40 "Enable conditional move instructions">;
46 … "Enable SSE 4.2 CRC32 instruction (used when SSE4.2 is supported but function is GPR only)">;
70 "Enable SSE instructions">;
72 "Enable SSE2 instructions",
75 "Enable SSE3 instructions",
78 "Enable SSSE3 instructions",
81 "Enable SSE 4.1 instructions",
84 "Enable SSE 4.2 instructions",
90 "Enable MMX instructions">;
91 // All x86-64 hardware has SSE2, but we don't mark SSE2 as an implied
93 // without disabling 64-bit mode. Nothing should imply this feature bit. It
94 // is used to enforce that only 64-bit capable CPUs are used in 64-bit mode.
96 "Support 64-bit instructions">;
98 … "64-bit with cmpxchg16b (this is true for most x86-64 chips, but not the first AMD chips)",
105 "Enable AVX instructions",
108 "Enable AVX2 instructions",
111 "Enable three-operand fused multiple-add",
114 "Support 16-bit floating point conversion instructions",
117 "Support ZMM and 64-bit mask instructions">;
119 "Enable AVX-512 instructions",
122 "Enable AVX-512 Conflict Detection Instructions",
125 "true", "Enable AVX-512 Population Count Instructions",
131 "Enable AVX-512 Doubleword and Quadword Instructions",
134 "Enable AVX-512 Byte and Word Instructions",
137 "Enable AVX-512 Vector Length eXtensions",
140 "Enable AVX-512 Vector Byte Manipulation Instructions",
143 "Enable AVX-512 further Vector Byte Manipulation Instructions",
146 "Enable AVX-IFMA",
149 "Enable AVX-512 Integer Fused Multiple-Add",
152 "Enable protection keys">;
154 "Enable AVX-512 Vector Neural Network Instructions",
163 "Enable AVX-512 Bit Algorithms",
167 "Enable AVX-512 vp2intersect",
175 "Support 16-bit floating point",
179 "Enable AVX-VNNI-INT8",
183 "Enable AVX-VNNI-INT16",
186 "Enable packed carry-less multiplication instructions",
189 "Enable Galois Field Arithmetic Instructions",
192 "Enable vpclmulqdq instructions",
195 "Enable four-operand fused multiple-add",
198 "Enable XOP instructions",
200 def FeatureSSEUnalignedMem : SubtargetFeature<"sse-unaligned-mem",
204 "Enable AES instructions",
210 "Enable TBM instructions">;
212 "Enable LWP instructions">;
230 "Enable SHA instructions",
235 // Processor supports CET SHSTK - Control-Flow Enforcement Technology
238 "Support CET Shadow-Stack instructions">;
250 "Support LAHF and SAHF instructions in 64-bit mode">;
252 "Enable MONITORX/MWAITX timer functionality">;
254 "Enable Cache Line Zero">;
256 "Enable Cache Line Demote">;
259 def FeatureAMXTILE : SubtargetFeature<"amx-tile", "HasAMXTILE", "true",
260 "Support AMX-TILE instructions">;
261 def FeatureAMXINT8 : SubtargetFeature<"amx-int8", "HasAMXINT8", "true",
262 "Support AMX-INT8 instructions",
264 def FeatureAMXBF16 : SubtargetFeature<"amx-bf16", "HasAMXBF16", "true",
265 "Support AMX-BF16 instructions",
267 def FeatureAMXFP16 : SubtargetFeature<"amx-fp16", "HasAMXFP16", "true",
268 "Support AMX amx-fp16 instructions",
270 def FeatureAMXCOMPLEX : SubtargetFeature<"amx-complex", "HasAMXCOMPLEX", "true",
271 "Support AMX-COMPLEX instructions",
276 "Support RAO-INT instructions",
279 "Support AVX-NE-CONVERT instructions",
282 "Invalidate Process-Context Identifier">;
284 "Enable Software Guard Extensions">;
321 def FeatureAVX10_1 : SubtargetFeature<"avx10.1-256", "HasAVX10_1", "true",
322 "Support AVX10.1 up to 256-bit instruction",
326 def FeatureAVX10_1_512 : SubtargetFeature<"avx10.1-512", "HasAVX10_1_512", "true",
327 "Support AVX10.1 up to 512-bit instruction",
334 "Support Push-Pop Acceleration">;
336 "Support non-destructive destination">;
344 "Support zero-upper SETcc/IMUL">;
346 : SubtargetFeature<"inline-asm-use-gpr32", "UseInlineAsmGPR32", "true",
347 "Enable use of GPR32 in inline assembly for APX">;
366 : SubtargetFeature<"soft-float", "UseSoftFloat", "true",
369 //===----------------------------------------------------------------------===//
371 //===----------------------------------------------------------------------===//
377 "retpoline-indirect-calls", "UseRetpolineIndirectCalls", "true",
385 "retpoline-indirect-branches", "UseRetpolineIndirectBranches", "true",
388 // Deprecated umbrella feature for enabling both `retpoline-indirect-calls` and
389 // `retpoline-indirect-branches` above.
400 // environments such as a kernel that does boot-time hot patching.
403 "retpoline-external-thunk", "UseRetpolineExternalThunk", "true",
412 "lvi-cfi", "UseLVIControlFlowIntegrity", "true",
418 // Enable SESES to mitigate speculative execution attacks
430 "lvi-load-hardening", "UseLVILoadHardening", "true",
436 "tagged-globals", "AllowTaggedGlobals", "true",
443 "harden-sls-ret", "HardenSlsRet", "true",
448 "harden-sls-ijmp", "HardenSlsIJmp", "true",
451 //===----------------------------------------------------------------------===//
453 //===----------------------------------------------------------------------===//
454 def TuningPreferMovmskOverVTest : SubtargetFeature<"prefer-movmsk-over-vtest",
458 def TuningSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true",
461 def TuningSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true",
464 def TuningSlowPMADDWD : SubtargetFeature<"slow-pmaddwd", "IsPMADDWDSlow",
469 def TuningSlowUAMem16 : SubtargetFeature<"slow-unaligned-mem-16",
471 "Slow unaligned 16-byte memory access">;
473 def TuningSlowUAMem32 : SubtargetFeature<"slow-unaligned-mem-32",
475 "Slow unaligned 32-byte memory access">;
477 def TuningLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
480 // True if 8-bit divisions are significantly faster than
481 // 32-bit divisions and should be used when possible.
482 def TuningSlowDivide32 : SubtargetFeature<"idivl-to-divb",
484 "Use 8-bit divide for positive values less than 256">;
486 // True if 32-bit divides are significantly faster than
487 // 64-bit divisions and should be used when possible.
488 def TuningSlowDivide64 : SubtargetFeature<"idivq-to-divl",
490 "Use 32-bit divide for positive values less than 2^32">;
492 def TuningPadShortFunctions : SubtargetFeature<"pad-short-functions",
499 def TuningSlowTwoMemOps : SubtargetFeature<"slow-two-mem-ops",
505 def TuningLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LeaUsesAG", "true",
508 def TuningSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true",
514 def TuningSlow3OpsLEA : SubtargetFeature<"slow-3ops-lea", "Slow3OpsLEA", "true",
518 def TuningSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true",
521 def TuningPOPCNTFalseDeps : SubtargetFeature<"false-deps-popcnt",
525 def TuningLZCNTFalseDeps : SubtargetFeature<"false-deps-lzcnt-tzcnt",
529 def TuningMULCFalseDeps : SubtargetFeature<"false-deps-mulc",
533 def TuningPERMFalseDeps : SubtargetFeature<"false-deps-perm",
537 def TuningRANGEFalseDeps : SubtargetFeature<"false-deps-range",
541 def TuningGETMANTFalseDeps : SubtargetFeature<"false-deps-getmant",
546 def TuningMULLQFalseDeps : SubtargetFeature<"false-deps-mullq",
550 def TuningSBBDepBreaking : SubtargetFeature<"sbb-dep-breaking",
557 : SubtargetFeature<"fast-variable-crosslane-shuffle",
559 "true", "Cross-lane shuffles with variable masks are fast">;
561 : SubtargetFeature<"fast-variable-perlane-shuffle",
563 "true", "Per-lane shuffles with variable masks are fast">;
565 // Goldmont / Tremont (atom in general) has no bypass delay
566 def TuningNoDomainDelay : SubtargetFeature<"no-bypass-delay",
568 "Has no bypass delay when using the 'wrong' domain">;
570 // Many processors (Nehalem+ on Intel) have no bypass delay when
572 def TuningNoDomainDelayMov : SubtargetFeature<"no-bypass-delay-mov",
574 "Has no bypass delay when using the 'wrong' mov type">;
576 // Newer processors (Skylake+ on Intel) have no bypass delay when
578 def TuningNoDomainDelayBlend : SubtargetFeature<"no-bypass-delay-blend",
580 "Has no bypass delay when using the 'wrong' blend type">;
582 // Newer processors (Haswell+ on Intel) have no bypass delay when
584 def TuningNoDomainDelayShuffle : SubtargetFeature<"no-bypass-delay-shuffle",
586 "Has no bypass delay when using the 'wrong' shuffle type">;
590 def TuningPreferShiftShuffle : SubtargetFeature<"faster-shift-than-shuffle",
594 def TuningFastImmVectorShift : SubtargetFeature<"tuning-fast-imm-vector-shift",
614 // RSQRTSS followed by a Newton-Raphson iteration.
616 : SubtargetFeature<"fast-scalar-fsqrt", "HasFastScalarFSQRT",
617 "true", "Scalar SQRT is fast (disable Newton-Raphson)">;
619 // (throughput) as RSQRTPS/VRSQRTPS followed by a Newton-Raphson iteration.
621 : SubtargetFeature<"fast-vector-fsqrt", "HasFastVectorFSQRT",
622 "true", "Vector SQRT is fast (disable Newton-Raphson)">;
628 "fast-lzcnt", "HasFastLZCNT", "true",
631 // If the target can efficiently decode NOPs upto 7-bytes in length.
634 "fast-7bytenop", "HasFast7ByteNOP", "true",
637 // If the target can efficiently decode NOPs upto 11-bytes in length.
640 "fast-11bytenop", "HasFast11ByteNOP", "true",
643 // If the target can efficiently decode NOPs upto 15-bytes in length.
646 "fast-15bytenop", "HasFast15ByteNOP", "true",
654 "fast-shld-rotate", "HasFastSHLDRotate", "true",
673 // similar to Skylake Server (AVX-512).
675 : SubtargetFeature<"fast-gather", "HasFastGather", "true",
676 … "Indicates if gather is reasonably fast (this is true for Skylake client and all AVX-512 CPUs)">;
681 "fast-dpwssd", "HasFastDPWSSD", "true",
685 : SubtargetFeature<"prefer-no-gather", "PreferGather", "false",
688 : SubtargetFeature<"prefer-no-scatter", "PreferScatter", "false",
692 : SubtargetFeature<"prefer-128-bit", "Prefer128Bit", "true",
693 "Prefer 128-bit AVX instructions">;
696 : SubtargetFeature<"prefer-256-bit", "Prefer256Bit", "true",
697 "Prefer 256-bit AVX instructions">;
700 : SubtargetFeature<"allow-light-256-bit", "AllowLight256Bit", "true",
701 "Enable generation of 256-bit load/stores even if we prefer 128-bit">;
704 : SubtargetFeature<"prefer-mask-registers", "PreferMaskRegisters", "true",
707 def TuningFastBEXTR : SubtargetFeature<"fast-bextr", "HasFastBEXTR", "true",
716 "fast-hops", "HasFastHorizontalOps", "true",
722 "fast-scalar-shift-masks", "HasFastScalarShiftMasks", "true",
727 "fast-vector-shift-masks", "HasFastVectorShiftMasks", "true",
731 : SubtargetFeature<"fast-movbe", "HasFastMOVBE", "true",
732 "Prefer a movbe over a single-use load + bswap / single-use bswap + store">;
735 : SubtargetFeature<"fast-imm16", "HasFastImm16", "true",
739 : SubtargetFeature<"use-slm-arith-costs", "UseSLMArithCosts", "true",
743 : SubtargetFeature<"use-glm-div-sqrt-costs", "UseGLMDivSqrtCosts", "true",
748 def TuningBranchHint: SubtargetFeature<"branch-hint", "HasBranchHint", "true",
751 //===----------------------------------------------------------------------===//
753 // TODO: Remove these - use general tuning features to determine codegen.
754 //===----------------------------------------------------------------------===//
759 //===----------------------------------------------------------------------===//
761 //===----------------------------------------------------------------------===//
766 //===----------------------------------------------------------------------===//
768 //===----------------------------------------------------------------------===//
776 //===----------------------------------------------------------------------===//
778 //===----------------------------------------------------------------------===//
797 //===----------------------------------------------------------------------===//
799 //===----------------------------------------------------------------------===//
802 // x86-64 micro-architecture levels: x86-64 and x86-64-v[234]
970 // Skylake-AVX512
1561 //===----------------------------------------------------------------------===//
1563 //===----------------------------------------------------------------------===//
1577 // constructor checks that any CPU used in 64-bit mode has FeatureX86_64
1581 // recommended to use "tune-cpu"="x86-64" in function attribute for consistency.
1599 foreach P = ["pentium-mmx", "pentium_mmx"] in {
1620 // Enable the PostRAScheduler for SSE2 and SSE3 class cpus.
1621 // The intent is to enable it for pentium4 which is the current default
1622 // processor in a vanilla 32-bit clang compilation when no specific
1626 // pentium-m, pentium4m, prescott and nocona are included as a preventative
1630 foreach P = ["pentium_m", "pentium-m"] in {
1732 foreach P = ["goldmont_plus", "goldmont-plus"] in {
1751 foreach P = ["sandybridge", "corei7-avx", "core_2nd_gen_avx"] in {
1756 foreach P = ["ivybridge", "core-avx-i", "core_3rd_gen_avx"] in {
1761 foreach P = ["haswell", "core-avx2", "core_4th_gen_avx", "core_4th_gen_avx_tsx"] in {
1782 foreach P = ["skylake-avx512", "skx", "skylake_avx512"] in {
1793 foreach P = ["icelake-client", "icelake_client"] in {
1799 foreach P = ["icelake-server", "icelake_server"] in {
1822 foreach P = ["arrowlake-s", "arrowlake_s", "lunarlake"] in {
1834 foreach P = ["graniterapids-d", "graniterapids_d"] in {
1843 def : Proc<"k6-2", [FeatureX87, FeatureCX8, FeatureMMX, FeaturePRFCHW],
1845 def : Proc<"k6-3", [FeatureX87, FeatureCX8, FeatureMMX, FeaturePRFCHW],
1848 foreach P = ["athlon", "athlon-tbird"] in {
1854 foreach P = ["athlon-4", "athlon-xp", "athlon-mp"] in {
1860 foreach P = ["k8", "opteron", "athlon64", "athlon-fx"] in {
1867 foreach P = ["k8-sse3", "opteron-sse3", "athlon64-sse3"] in {
1914 def : Proc<"winchip-c6", [FeatureX87, FeatureMMX],
1920 def : Proc<"c3-2", [FeatureX87, FeatureCX8, FeatureMMX,
1924 // We also provide a generic 64-bit specific x86 processor model which tries to
1926 // basic SSE2 and 64-bit ones. It disables slow things from any mainstream and
1927 // modern 64-bit x86 chip, and enables features that are generally beneficial.
1934 def : ProcModel<"x86-64", SandyBridgeModel, ProcessorFeatures.X86_64V1Features,
1937 def : ProcModel<"x86-64-v2", SandyBridgeModel, ProcessorFeatures.X86_64V2Features,
1940 def : ProcModel<"x86-64-v3", HaswellModel, ProcessorFeatures.X86_64V3Features,
1942 // Close to the AVX-512 level implemented by Xeon Scalable Processors.
1943 def : ProcModel<"x86-64-v4", SkylakeServerModel, ProcessorFeatures.X86_64V4Features,
1946 //===----------------------------------------------------------------------===//
1948 //===----------------------------------------------------------------------===//
1953 //===----------------------------------------------------------------------===//
1955 //===----------------------------------------------------------------------===//
1983 //===----------------------------------------------------------------------===//
1985 //===----------------------------------------------------------------------===//
1988 // This is controlled by the -x86-asm-syntax={att|intel}
2006 //===----------------------------------------------------------------------===//
2008 //===----------------------------------------------------------------------===//