X86.td - OpenGrok cross reference for /freebsd/contrib/llvm-project/llvm/lib/Target/X86/X86.td

Lines Matching +full:bypass +full:- +full:enable
1 //===-- X86.td - Target definition file for the Intel X86 --*- tablegen -*-===//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
12 //===----------------------------------------------------------------------===//
14 // Get the target-independent interfaces which we are implementing...
18 //===----------------------------------------------------------------------===//
22 def Is64Bit : SubtargetFeature<"64bit-mode", "Is64Bit", "true",
23                                "64-bit mode (x86_64)">;
24 def Is32Bit : SubtargetFeature<"32bit-mode", "Is32Bit", "true",
25                                "32-bit mode (80386)">;
26 def Is16Bit : SubtargetFeature<"16bit-mode", "Is16Bit", "true",
27                                "16-bit mode (i8086)">;
29 //===----------------------------------------------------------------------===//
31 //===----------------------------------------------------------------------===//
34                                       "Enable X87 float instructions">;
37                                       "Enable NOPL instruction (generally pentium pro+)">;
40                                       "Enable conditional move instructions">;
46 …      "Enable SSE 4.2 CRC32 instruction (used when SSE4.2 is supported but function is GPR only)">;
70                                       "Enable SSE instructions">;
72                                       "Enable SSE2 instructions",
75                                       "Enable SSE3 instructions",
78                                       "Enable SSSE3 instructions",
81                                       "Enable SSE 4.1 instructions",
84                                       "Enable SSE 4.2 instructions",
90                                       "Enable MMX instructions">;
91 // All x86-64 hardware has SSE2, but we don't mark SSE2 as an implied
93 // without disabling 64-bit mode. Nothing should imply this feature bit. It
94 // is used to enforce that only 64-bit capable CPUs are used in 64-bit mode.
96                                       "Support 64-bit instructions">;
98 …        "64-bit with cmpxchg16b (this is true for most x86-64 chips, but not the first AMD chips)",
105                                       "Enable AVX instructions",
108                                       "Enable AVX2 instructions",
111                                       "Enable three-operand fused multiple-add",
114                        "Support 16-bit floating point conversion instructions",
117                         "Support ZMM and 64-bit mask instructions">;
119                                       "Enable AVX-512 instructions",
122                       "Enable AVX-512 Conflict Detection Instructions",
125                        "true", "Enable AVX-512 Population Count Instructions",
131                       "Enable AVX-512 Doubleword and Quadword Instructions",
134                       "Enable AVX-512 Byte and Word Instructions",
137                       "Enable AVX-512 Vector Length eXtensions",
140                       "Enable AVX-512 Vector Byte Manipulation Instructions",
143                       "Enable AVX-512 further Vector Byte Manipulation Instructions",
146                            "Enable AVX-IFMA",
149                       "Enable AVX-512 Integer Fused Multiple-Add",
152                       "Enable protection keys">;
154                           "Enable AVX-512 Vector Neural Network Instructions",
163                        "Enable AVX-512 Bit Algorithms",
167                                             "Enable AVX-512 vp2intersect",
175                            "Support 16-bit floating point",
179                              "Enable AVX-VNNI-INT8",
183                              "Enable AVX-VNNI-INT16",
186                          "Enable packed carry-less multiplication instructions",
189                          "Enable Galois Field Arithmetic Instructions",
192                                          "Enable vpclmulqdq instructions",
195                                       "Enable four-operand fused multiple-add",
198                                       "Enable XOP instructions",
200 def FeatureSSEUnalignedMem : SubtargetFeature<"sse-unaligned-mem",
204                                       "Enable AES instructions",
210                                       "Enable TBM instructions">;
212                                       "Enable LWP instructions">;
230                                       "Enable SHA instructions",
235 // Processor supports CET SHSTK - Control-Flow Enforcement Technology
238                        "Support CET Shadow-Stack instructions">;
250                            "Support LAHF and SAHF instructions in 64-bit mode">;
252                                       "Enable MONITORX/MWAITX timer functionality">;
254                                       "Enable Cache Line Zero">;
256                                       "Enable Cache Line Demote">;
259 def FeatureAMXTILE     : SubtargetFeature<"amx-tile", "HasAMXTILE", "true",
260                                       "Support AMX-TILE instructions">;
261 def FeatureAMXINT8     : SubtargetFeature<"amx-int8", "HasAMXINT8", "true",
262                                       "Support AMX-INT8 instructions",
264 def FeatureAMXBF16     : SubtargetFeature<"amx-bf16", "HasAMXBF16", "true",
265                                       "Support AMX-BF16 instructions",
267 def FeatureAMXFP16     : SubtargetFeature<"amx-fp16", "HasAMXFP16", "true",
268                                       "Support AMX amx-fp16 instructions",
270 def FeatureAMXCOMPLEX : SubtargetFeature<"amx-complex", "HasAMXCOMPLEX", "true",
271                                          "Support AMX-COMPLEX instructions",
276                                      "Support RAO-INT instructions",
279                                            "Support AVX-NE-CONVERT instructions",
282                                       "Invalidate Process-Context Identifier">;
284                                       "Enable Software Guard Extensions">;
321 def FeatureAVX10_1 : SubtargetFeature<"avx10.1-256", "HasAVX10_1", "true",
322                                       "Support AVX10.1 up to 256-bit instruction",
326 def FeatureAVX10_1_512 : SubtargetFeature<"avx10.1-512", "HasAVX10_1_512", "true",
327                                           "Support AVX10.1 up to 512-bit instruction",
334                                   "Support Push-Pop Acceleration">;
336                                   "Support non-destructive destination">;
344                                  "Support zero-upper SETcc/IMUL">;
346     : SubtargetFeature<"inline-asm-use-gpr32", "UseInlineAsmGPR32", "true",
347                        "Enable use of GPR32 in inline assembly for APX">;
366     : SubtargetFeature<"soft-float", "UseSoftFloat", "true",
369 //===----------------------------------------------------------------------===//
371 //===----------------------------------------------------------------------===//
377           "retpoline-indirect-calls", "UseRetpolineIndirectCalls", "true",
385           "retpoline-indirect-branches", "UseRetpolineIndirectBranches", "true",
388 // Deprecated umbrella feature for enabling both `retpoline-indirect-calls` and
389 // `retpoline-indirect-branches` above.
400 // environments such as a kernel that does boot-time hot patching.
403           "retpoline-external-thunk", "UseRetpolineExternalThunk", "true",
412           "lvi-cfi", "UseLVIControlFlowIntegrity", "true",
418 // Enable SESES to mitigate speculative execution attacks
430           "lvi-load-hardening", "UseLVILoadHardening", "true",
436           "tagged-globals", "AllowTaggedGlobals", "true",
443           "harden-sls-ret", "HardenSlsRet", "true",
448           "harden-sls-ijmp", "HardenSlsIJmp", "true",
451 //===----------------------------------------------------------------------===//
453 //===----------------------------------------------------------------------===//
454 def TuningPreferMovmskOverVTest : SubtargetFeature<"prefer-movmsk-over-vtest",
458 def TuningSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true",
461 def TuningSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true",
464 def TuningSlowPMADDWD : SubtargetFeature<"slow-pmaddwd", "IsPMADDWDSlow",
469 def TuningSlowUAMem16 : SubtargetFeature<"slow-unaligned-mem-16",
471                                 "Slow unaligned 16-byte memory access">;
473 def TuningSlowUAMem32 : SubtargetFeature<"slow-unaligned-mem-32",
475                                 "Slow unaligned 32-byte memory access">;
477 def TuningLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
480 // True if 8-bit divisions are significantly faster than
481 // 32-bit divisions and should be used when possible.
482 def TuningSlowDivide32 : SubtargetFeature<"idivl-to-divb",
484                                      "Use 8-bit divide for positive values less than 256">;
486 // True if 32-bit divides are significantly faster than
487 // 64-bit divisions and should be used when possible.
488 def TuningSlowDivide64 : SubtargetFeature<"idivq-to-divl",
490                                      "Use 32-bit divide for positive values less than 2^32">;
492 def TuningPadShortFunctions : SubtargetFeature<"pad-short-functions",
499 def TuningSlowTwoMemOps : SubtargetFeature<"slow-two-mem-ops",
505 def TuningLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LeaUsesAG", "true",
508 def TuningSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true",
514 def TuningSlow3OpsLEA : SubtargetFeature<"slow-3ops-lea", "Slow3OpsLEA", "true",
518 def TuningSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true",
521 def TuningPOPCNTFalseDeps : SubtargetFeature<"false-deps-popcnt",
525 def TuningLZCNTFalseDeps : SubtargetFeature<"false-deps-lzcnt-tzcnt",
529 def TuningMULCFalseDeps : SubtargetFeature<"false-deps-mulc",
533 def TuningPERMFalseDeps : SubtargetFeature<"false-deps-perm",
537 def TuningRANGEFalseDeps : SubtargetFeature<"false-deps-range",
541 def TuningGETMANTFalseDeps : SubtargetFeature<"false-deps-getmant",
546 def TuningMULLQFalseDeps : SubtargetFeature<"false-deps-mullq",
550 def TuningSBBDepBreaking : SubtargetFeature<"sbb-dep-breaking",
557     : SubtargetFeature<"fast-variable-crosslane-shuffle",
559                        "true", "Cross-lane shuffles with variable masks are fast">;
561     : SubtargetFeature<"fast-variable-perlane-shuffle",
563                        "true", "Per-lane shuffles with variable masks are fast">;
565 // Goldmont / Tremont (atom in general) has no bypass delay
566 def TuningNoDomainDelay : SubtargetFeature<"no-bypass-delay",
568                                    "Has no bypass delay when using the 'wrong' domain">;
570 // Many processors (Nehalem+ on Intel) have no bypass delay when
572 def TuningNoDomainDelayMov : SubtargetFeature<"no-bypass-delay-mov",
574                                    "Has no bypass delay when using the 'wrong' mov type">;
576 // Newer processors (Skylake+ on Intel) have no bypass delay when
578 def TuningNoDomainDelayBlend : SubtargetFeature<"no-bypass-delay-blend",
580                                    "Has no bypass delay when using the 'wrong' blend type">;
582 // Newer processors (Haswell+ on Intel) have no bypass delay when
584 def TuningNoDomainDelayShuffle : SubtargetFeature<"no-bypass-delay-shuffle",
586                                    "Has no bypass delay when using the 'wrong' shuffle type">;
590 def TuningPreferShiftShuffle : SubtargetFeature<"faster-shift-than-shuffle",
594 def TuningFastImmVectorShift : SubtargetFeature<"tuning-fast-imm-vector-shift",
614 // RSQRTSS followed by a Newton-Raphson iteration.
616     : SubtargetFeature<"fast-scalar-fsqrt", "HasFastScalarFSQRT",
617                        "true", "Scalar SQRT is fast (disable Newton-Raphson)">;
619 // (throughput) as RSQRTPS/VRSQRTPS followed by a Newton-Raphson iteration.
621     : SubtargetFeature<"fast-vector-fsqrt", "HasFastVectorFSQRT",
622                        "true", "Vector SQRT is fast (disable Newton-Raphson)">;
628           "fast-lzcnt", "HasFastLZCNT", "true",
631 // If the target can efficiently decode NOPs upto 7-bytes in length.
634           "fast-7bytenop", "HasFast7ByteNOP", "true",
637 // If the target can efficiently decode NOPs upto 11-bytes in length.
640           "fast-11bytenop", "HasFast11ByteNOP", "true",
643 // If the target can efficiently decode NOPs upto 15-bytes in length.
646           "fast-15bytenop", "HasFast15ByteNOP", "true",
654           "fast-shld-rotate", "HasFastSHLDRotate", "true",
673 // similar to Skylake Server (AVX-512).
675     : SubtargetFeature<"fast-gather", "HasFastGather", "true",
676 …  "Indicates if gather is reasonably fast (this is true for Skylake client and all AVX-512 CPUs)">;
681           "fast-dpwssd", "HasFastDPWSSD", "true",
685     : SubtargetFeature<"prefer-no-gather", "PreferGather", "false",
688     : SubtargetFeature<"prefer-no-scatter", "PreferScatter", "false",
692     : SubtargetFeature<"prefer-128-bit", "Prefer128Bit", "true",
693                        "Prefer 128-bit AVX instructions">;
696     : SubtargetFeature<"prefer-256-bit", "Prefer256Bit", "true",
697                        "Prefer 256-bit AVX instructions">;
700     : SubtargetFeature<"allow-light-256-bit", "AllowLight256Bit", "true",
701                        "Enable generation of 256-bit load/stores even if we prefer 128-bit">;
704     : SubtargetFeature<"prefer-mask-registers", "PreferMaskRegisters", "true",
707 def TuningFastBEXTR : SubtargetFeature<"fast-bextr", "HasFastBEXTR", "true",
716         "fast-hops", "HasFastHorizontalOps", "true",
722         "fast-scalar-shift-masks", "HasFastScalarShiftMasks", "true",
727         "fast-vector-shift-masks", "HasFastVectorShiftMasks", "true",
731     : SubtargetFeature<"fast-movbe", "HasFastMOVBE", "true",
732     "Prefer a movbe over a single-use load + bswap / single-use bswap + store">;
735     : SubtargetFeature<"fast-imm16", "HasFastImm16", "true",
739     : SubtargetFeature<"use-slm-arith-costs", "UseSLMArithCosts", "true",
743     : SubtargetFeature<"use-glm-div-sqrt-costs", "UseGLMDivSqrtCosts", "true",
748 def TuningBranchHint: SubtargetFeature<"branch-hint", "HasBranchHint", "true",
751 //===----------------------------------------------------------------------===//
753 // TODO: Remove these - use general tuning features to determine codegen.
754 //===----------------------------------------------------------------------===//
759 //===----------------------------------------------------------------------===//
761 //===----------------------------------------------------------------------===//
766 //===----------------------------------------------------------------------===//
768 //===----------------------------------------------------------------------===//
776 //===----------------------------------------------------------------------===//
778 //===----------------------------------------------------------------------===//
797 //===----------------------------------------------------------------------===//
799 //===----------------------------------------------------------------------===//
802   // x86-64 micro-architecture levels: x86-64 and x86-64-v[234]
970   // Skylake-AVX512
1561 //===----------------------------------------------------------------------===//
1563 //===----------------------------------------------------------------------===//
1577 // constructor checks that any CPU used in 64-bit mode has FeatureX86_64
1581 // recommended to use "tune-cpu"="x86-64" in function attribute for consistency.
1599 foreach P = ["pentium-mmx", "pentium_mmx"] in {
1620 // Enable the PostRAScheduler for SSE2 and SSE3 class cpus.
1621 // The intent is to enable it for pentium4 which is the current default
1622 // processor in a vanilla 32-bit clang compilation when no specific
1626 // pentium-m, pentium4m, prescott and nocona are included as a preventative
1630 foreach P = ["pentium_m", "pentium-m"] in {
1732 foreach P = ["goldmont_plus", "goldmont-plus"] in {
1751 foreach P = ["sandybridge", "corei7-avx", "core_2nd_gen_avx"] in {
1756 foreach P = ["ivybridge", "core-avx-i", "core_3rd_gen_avx"] in {
1761 foreach P = ["haswell", "core-avx2", "core_4th_gen_avx", "core_4th_gen_avx_tsx"] in {
1782 foreach P = ["skylake-avx512", "skx", "skylake_avx512"] in {
1793 foreach P = ["icelake-client", "icelake_client"] in {
1799 foreach P = ["icelake-server", "icelake_server"] in {
1822 foreach P = ["arrowlake-s", "arrowlake_s", "lunarlake"] in {
1834 foreach P = ["graniterapids-d", "graniterapids_d"] in {
1843 def : Proc<"k6-2", [FeatureX87, FeatureCX8, FeatureMMX, FeaturePRFCHW],
1845 def : Proc<"k6-3", [FeatureX87, FeatureCX8, FeatureMMX, FeaturePRFCHW],
1848 foreach P = ["athlon", "athlon-tbird"] in {
1854 foreach P = ["athlon-4", "athlon-xp", "athlon-mp"] in {
1860 foreach P = ["k8", "opteron", "athlon64", "athlon-fx"] in {
1867 foreach P = ["k8-sse3", "opteron-sse3", "athlon64-sse3"] in {
1914 def : Proc<"winchip-c6",      [FeatureX87, FeatureMMX],
1920 def : Proc<"c3-2",            [FeatureX87, FeatureCX8, FeatureMMX,
1924 // We also provide a generic 64-bit specific x86 processor model which tries to
1926 // basic SSE2 and 64-bit ones. It disables slow things from any mainstream and
1927 // modern 64-bit x86 chip, and enables features that are generally beneficial.
1934 def : ProcModel<"x86-64", SandyBridgeModel, ProcessorFeatures.X86_64V1Features,
1937 def : ProcModel<"x86-64-v2", SandyBridgeModel, ProcessorFeatures.X86_64V2Features,
1940 def : ProcModel<"x86-64-v3", HaswellModel, ProcessorFeatures.X86_64V3Features,
1942 // Close to the AVX-512 level implemented by Xeon Scalable Processors.
1943 def : ProcModel<"x86-64-v4", SkylakeServerModel, ProcessorFeatures.X86_64V4Features,
1946 //===----------------------------------------------------------------------===//
1948 //===----------------------------------------------------------------------===//
1953 //===----------------------------------------------------------------------===//
1955 //===----------------------------------------------------------------------===//
1983 //===----------------------------------------------------------------------===//
1985 //===----------------------------------------------------------------------===//
1988 // This is controlled by the -x86-asm-syntax={att|intel}
2006 //===----------------------------------------------------------------------===//
2008 //===----------------------------------------------------------------------===//