Lines Matching +full:3 +full:- +full:point

1 //=- X86ScheduleZnver3.td - X86 Znver3 Scheduling ------------*- tablegen -*-=//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
16 // * AMD Zen 3 Ryzen Deep Dive Review
18 //===----------------------------------------------------------------------===//
25 // AMD SOG 19h, 2.10.3
27 // outstanding operations (integer, load/store, and floating-point) and is
30 // to 256 macro ops in-flight in non-SMT mode or 128 per thread in SMT mode.
34 // At each set-way intersection is an entry containing up to 8 macro ops.
37 // the op-cache, we limit the loop buffer to 8*12 = 96 to avoid loop unrolling
38 // leading to excessive filling of the op-cache from frontend.
41 // The L1 data cache has a 4- or 5- cycle integer load-to-use latency.
44 // <...> and can achieve 4-cycle load-to-use integer load latency.
48 // <...> and can achieve <...> 7-cycle load-to-use FP load latency.
67 //===----------------------------------------------------------------------===//
69 //===----------------------------------------------------------------------===//
71 // AMD SOG 19h, 2.10.3 Retire Control Unit
73 // 256 macro ops in-flight in non-SMT mode or 128 per thread in SMT mode. <...>
74 // The retire unit handles in-order commit of up to eight macro ops per cycle.
77 //===----------------------------------------------------------------------===//
79 //===----------------------------------------------------------------------===//
83 //===----------------------------------------------------------------------===//
93 //===----------------------------------------------------------------------===//
109 // address generation. There are also 3 store data movement units
117 //===----------------------------------------------------------------------===//
131 //===----------------------------------------------------------------------===//
154 //===----------------------------------------------------------------------===//
156 // AMD SOG 19h, 2.10.3 Retire Control Unit
171 Zn3ALU3, Zn3BRU1 // scheduler 3
177 //===----------------------------------------------------------------------===//
178 // Floating-Point Unit
182 // The processor uses <...> two decoupled independent floating point schedulers
183 // each servicing two FP pipelines and one store or FP-to-integer pipeline.
187 //===----------------------------------------------------------------------===//
191 // Agner, 22.10 Floating point execution pipes
192 // There are six floating point/vector execution pipes,
201 //===----------------------------------------------------------------------===//
202 // AMD SOG 19h, 2.11.1 Floating Point Execution Resources
204 // (v)FMUL*, (v)FMA*, Floating Point Compares, Blendv(DQ)
217 // AMD SOG 19h, 2.11.1 Floating Point Execution Resources
223 // Moves and Logical operations on Floating Point Data Types
264 //===----------------------------------------------------------------------===//
266 // AMD SOG 19h, 2.11 Floating-Point Unit
267 // Stores and floating point to general purpose register transfer
271 // (v)FMUL*, (v)FMA*, Floating Point Compares, Blendv(DQ)
284 // Moves and Logical operations on Floating Point Data Types
290 // AMD SOG 19h, 2.11 Floating-Point Unit
291 // Stores and floating point to general purpose register transfer
295 // AMD SOG 19h, 2.11 Floating-Point Unit
330 //===----------------------------------------------------------------------===//
332 // Agner, 21.8 Register renaming and out-of-order schedulers
333 // The floating point register file has 160 vector registers
340 // AMD SOG 19h, 2.11 Floating-Point Unit
341 // The floating-point scheduler has a 2*32 entry macro op capacity.
342 // AMD SOG 19h, 2.11 Floating-Point Unit
351 // AMD SOG 19h, 2.11 Floating-Point Unit
353 // even if floating-point scheduler is full.
357 //===----------------------------------------------------------------------===//
358 // Load-Store Unit
361 // AMD SOG 19h, 2.12 Load-Store Unit
362 // The LS unit contains three largely independent pipe-lines
363 // enabling the execution of three 256-bit memory operations per cycle.
364 def Zn3LSU : ProcResource<3>;
366 // AMD SOG 19h, 2.12 Load-Store Unit
369 def Zn3Load : ProcResource<3> {
370 // AMD SOG 19h, 2.12 Load-Store Unit
371 // The LS unit can process up to 72 out-of-order loads.
377 // AMD SOG 19h, 2.12 Load-Store Unit
381 // AMD SOG 19h, 2.12 Load-Store Unit
382 // The LS unit utilizes a 64-entry store queue (STQ).
388 //===----------------------------------------------------------------------===//
390 //===----------------------------------------------------------------------===//
393 // Instructions with folded loads are usually micro-fused, so they only appear
394 // as two micro-ops when dispatched by the schedulers.
473 //===----------------------------------------------------------------------===//
475 //===----------------------------------------------------------------------===//
483 // AMD SOG 19h, 2.11 Floating-Point Unit
486 def : ReadAdvance<ReadInt2Fpu, -1>;
495 // Model the effect of clobbering the read-write mask operand of the GATHER operation.
501 let ReleaseAtCycles = [3, 1];
556 let Latency = 3;
582 // On Znver3, a slow LEA is either a 3Ops LEA (base, index, offset),
586 // A 3-operand LEA (base, index, offset).
604 let Latency = 2; // FIXME: not from llvm-exegesis
612 defm : Zn3WriteResIntPair<WriteIMul8, [Zn3Multiplier], 3, [3], 1>; // Integer 8-bit multiplication.
613 defm : Zn3WriteResIntPair<WriteIMul16, [Zn3Multiplier], 3, [3], 3, /*LoadUOps=*/1>; // Integer 16-b…
614 defm : Zn3WriteResIntPair<WriteIMul16Imm, [Zn3Multiplier], 4, [4], 2>; // Integer 16-bit multiplica…
615 defm : Zn3WriteResIntPair<WriteIMul16Reg, [Zn3Multiplier], 3, [1], 1>; // Integer 16-bit multiplica…
616 defm : Zn3WriteResIntPair<WriteIMul32, [Zn3Multiplier], 3, [3], 2>; // Integer 32-bit multiplica…
617 defm : Zn3WriteResIntPair<WriteMULX32, [Zn3Multiplier], 3, [1], 2>; // Integer 32-bit Unsigned M…
618 defm : Zn3WriteResIntPair<WriteIMul32Imm, [Zn3Multiplier], 3, [1], 1>; // Integer 32-bit multiplica…
619 defm : Zn3WriteResIntPair<WriteIMul32Reg, [Zn3Multiplier], 3, [1], 1>; // Integer 32-bit multiplica…
620 defm : Zn3WriteResIntPair<WriteIMul64, [Zn3Multiplier], 3, [3], 2>; // Integer 64-bit multiplica…
621 defm : Zn3WriteResIntPair<WriteMULX64, [Zn3Multiplier], 3, [1], 2>; // Integer 32-bit Unsigned M…
622 defm : Zn3WriteResIntPair<WriteIMul64Imm, [Zn3Multiplier], 3, [1], 1>; // Integer 64-bit multiplica…
623 defm : Zn3WriteResIntPair<WriteIMul64Reg, [Zn3Multiplier], 3, [1], 1>; // Integer 64-bit multiplica…
627 defm : Zn3WriteResInt<WriteBSWAP32, [Zn3ALU0123], 1, [1], 1>; // Byte Order (Endianness) 32-bit Swa…
628 defm : Zn3WriteResInt<WriteBSWAP64, [Zn3ALU0123], 1, [1], 1>; // Byte Order (Endianness) 64-bit Swa…
630 defm : Zn3WriteResIntPair<WriteCMPXCHG, [Zn3ALU0123], 3, [12], 5>; // Compare and set, compare and …
633 let Latency = 3;
635 let NumMicroOps = 3;
639 defm : Zn3WriteResInt<WriteCMPXCHGRMW, [Zn3ALU0123], 3, [12], 6>; // Compare and set, compare a…
649 let Latency = 3; // FIXME: not from llvm-exegesis
656 let Latency = 4; // FIXME: not from llvm-exegesis
670 let Latency = !add(Znver3Model.LoadLatency, 3); // FIXME: not from llvm-exegesis
677 let Latency = !add(Znver3Model.LoadLatency, 2); // FIXME: not from llvm-exegesis
684 // FIXME: uops for 8-bit division measures as 2. for others it's a guess.
685 // FIXME: latency for 8-bit division measures as 10. for others it's a guess.
695 defm : Zn3WriteResIntPair<WriteBSF, [Zn3ALU1], 3, [3], 6, /*LoadUOps=*/2>; // Bit scan forward.
726 defm : Zn3WriteResInt<WriteFCMOV, [Zn3ALU0123], 7, [28], 7>; // FIXME: not from llvm-exegesis // X8…
728 …CStore, [Zn3ALU03, Zn3AGU012, Zn3Store], 2, [2, 1, 1], 2>; // FIXME: latency not from llvm-exegesis
761 let Latency = 3;
770 let NumMicroOps = !add(Zn3WriteRotateRightRI.NumMicroOps, 3);
791 let Latency = 3;
819 defm : Zn3WriteResInt<WriteSHDrri, [Zn3ALU12], 2, [3], 4>;
820 defm : Zn3WriteResInt<WriteSHDrrcl, [Zn3ALU12], 2, [3], 5>;
835 defm : Zn3WriteResIntPair<WriteJump, [Zn3BRU01], 1, [1], 1>; // FIXME: not from llvm-exegesis
837 // Floating point. This covers both scalar and vector operations.
849 let Latency = 2; // FIXME: not from llvm-exegesis
867 defm : Zn3WriteResXMMPair<WriteFAdd, [Zn3FPFAdd01], 3, [1], 1>; // Floating point add/sub.
870 let Latency = !add(Znver3Model.LoadLatency, 1); // FIXME: not from llvm-exegesis
880 let Latency = !add(Znver3Model.LoadLatency, 1); // FIXME: not from llvm-exegesis
887 defm : Zn3WriteResXMMPair<WriteFAddX, [Zn3FPFAdd01], 3, [1], 1>; // Floating point add/sub (XMM).
888 defm : Zn3WriteResYMMPair<WriteFAddY, [Zn3FPFAdd01], 3, [1], 1>; // Floating point add/sub (YMM).
889 defm : X86WriteResPairUnsupported<WriteFAddZ>; // Floating point add/sub (ZMM).
890 defm : Zn3WriteResXMMPair<WriteFAdd64, [Zn3FPFAdd01], 3, [1], 1>; // Floating point double add/sub.
891 defm : Zn3WriteResXMMPair<WriteFAdd64X, [Zn3FPFAdd01], 3, [1], 1>; // Floating point double add/sub…
892 defm : Zn3WriteResYMMPair<WriteFAdd64Y, [Zn3FPFAdd01], 3, [1], 1>; // Floating point double add/sub…
893 defm : X86WriteResPairUnsupported<WriteFAdd64Z>; // Floating point double add/sub (ZMM).
894 defm : Zn3WriteResXMMPair<WriteFCmp, [Zn3FPFMul01], 1, [1], 1>; // Floating point compare.
895 defm : Zn3WriteResXMMPair<WriteFCmpX, [Zn3FPFMul01], 1, [1], 1>; // Floating point compare (XMM).
896 defm : Zn3WriteResYMMPair<WriteFCmpY, [Zn3FPFMul01], 1, [1], 1>; // Floating point compare (YMM).
897 defm : X86WriteResPairUnsupported<WriteFCmpZ>; // Floating point compare (ZMM).
898 defm : Zn3WriteResXMMPair<WriteFCmp64, [Zn3FPFMul01], 1, [1], 1>; // Floating point double compare.
899 defm : Zn3WriteResXMMPair<WriteFCmp64X, [Zn3FPFMul01], 1, [1], 1>; // Floating point double compare…
900 defm : Zn3WriteResYMMPair<WriteFCmp64Y, [Zn3FPFMul01], 1, [1], 1>; // Floating point double compare…
901 defm : X86WriteResPairUnsupported<WriteFCmp64Z>; // Floating point double compare (ZMM).
902 …riteResXMMPair<WriteFCom, [Zn3FPFMul01], 3, [2], 1>; // FIXME: latency not from llvm-exegesis // …
903 …mX, [Zn3FPFMul01], 4, [2], 2>; // FIXME: latency not from llvm-exegesis // Floating point compare…
904 defm : Zn3WriteResXMMPair<WriteFMul, [Zn3FPFMul01], 3, [1], 1>; // Floating point multiplication.
905 defm : Zn3WriteResXMMPair<WriteFMulX, [Zn3FPFMul01], 3, [1], 1>; // Floating point multiplication (…
906 defm : Zn3WriteResYMMPair<WriteFMulY, [Zn3FPFMul01], 3, [1], 1>; // Floating point multiplication (…
907 defm : X86WriteResPairUnsupported<WriteFMulZ>; // Floating point multiplication (YMM).
908 defm : Zn3WriteResXMMPair<WriteFMul64, [Zn3FPFMul01], 3, [1], 1>; // Floating point double multipl…
909 defm : Zn3WriteResXMMPair<WriteFMul64X, [Zn3FPFMul01], 3, [1], 1>; // Floating point double multipl…
910 defm : Zn3WriteResYMMPair<WriteFMul64Y, [Zn3FPFMul01], 3, [1], 1>; // Floating point double multipl…
911 defm : X86WriteResPairUnsupported<WriteFMul64Z>; // Floating point double multiplication (ZMM).
912 defm : Zn3WriteResXMMPair<WriteFDiv, [Zn3FPFDiv], 11, [3], 1>; // Floating point division.
913 defm : Zn3WriteResXMMPair<WriteFDivX, [Zn3FPFDiv], 11, [3], 1>; // Floating point division (XMM).
914 defm : Zn3WriteResYMMPair<WriteFDivY, [Zn3FPFDiv], 11, [3], 1>; // Floating point division (YMM).
915 defm : X86WriteResPairUnsupported<WriteFDivZ>; // Floating point division (ZMM).
916 defm : Zn3WriteResXMMPair<WriteFDiv64, [Zn3FPFDiv], 13, [5], 1>; // Floating point double division.
917 defm : Zn3WriteResXMMPair<WriteFDiv64X, [Zn3FPFDiv], 13, [5], 1>; // Floating point double division…
918 defm : Zn3WriteResYMMPair<WriteFDiv64Y, [Zn3FPFDiv], 13, [5], 1>; // Floating point double division…
919 defm : X86WriteResPairUnsupported<WriteFDiv64Z>; // Floating point double division (ZMM).
920 defm : Zn3WriteResXMMPair<WriteFSqrt, [Zn3FPFDiv], 15, [5], 1>; // Floating point square root.
921 defm : Zn3WriteResXMMPair<WriteFSqrtX, [Zn3FPFDiv], 15, [5], 1>; // Floating point square root (XM…
922 defm : Zn3WriteResYMMPair<WriteFSqrtY, [Zn3FPFDiv], 15, [5], 1>; // Floating point square root (YM…
923 defm : X86WriteResPairUnsupported<WriteFSqrtZ>; // Floating point square root (ZMM).
924 defm : Zn3WriteResXMMPair<WriteFSqrt64, [Zn3FPFDiv], 21, [9], 1>; // Floating point double square …
925 defm : Zn3WriteResXMMPair<WriteFSqrt64X, [Zn3FPFDiv], 21, [9], 1>; // Floating point double square …
926 defm : Zn3WriteResYMMPair<WriteFSqrt64Y, [Zn3FPFDiv], 21, [9], 1>; // Floating point double square …
927 defm : X86WriteResPairUnsupported<WriteFSqrt64Z>; // Floating point double square root (ZMM).
928 …80, [Zn3FPFDiv], 22, [23], 1>; // FIXME: latency not from llvm-exegesis // Floating point long do…
929 defm : Zn3WriteResXMMPair<WriteFRcp, [Zn3FPFMul01], 3, [1], 1>; // Floating point reciprocal estim…
930 defm : Zn3WriteResXMMPair<WriteFRcpX, [Zn3FPFMul01], 3, [1], 1>; // Floating point reciprocal estim…
931 defm : Zn3WriteResYMMPair<WriteFRcpY, [Zn3FPFMul01], 3, [1], 1>; // Floating point reciprocal estim…
932 defm : X86WriteResPairUnsupported<WriteFRcpZ>; // Floating point reciprocal estimate (ZMM).
933 defm : Zn3WriteResXMMPair<WriteFRsqrt, [Zn3FPFDiv], 3, [1], 1>; // Floating point reciprocal squar…
934 defm : Zn3WriteResXMMPair<WriteFRsqrtX, [Zn3FPFDiv], 3, [1], 1>; // Floating point reciprocal squar…
935 defm : Zn3WriteResYMMPair<WriteFRsqrtY, [Zn3FPFDiv], 3, [1], 1>; // Floating point reciprocal squar…
936 defm : X86WriteResPairUnsupported<WriteFRsqrtZ>; // Floating point reciprocal square root estimate …
941 defm : Zn3WriteResXMMPair<WriteDPPD, [Zn3FPFMul01], 9, [6], 3, /*LoadUOps=*/2>; // Floating point d…
942 defm : Zn3WriteResXMMPair<WriteDPPS, [Zn3FPFMul01], 15, [8], 8, /*LoadUOps=*/2>; // Floating point
943 defm : Zn3WriteResYMMPair<WriteDPPSY, [Zn3FPFMul01], 15, [8], 7, /*LoadUOps=*/1>; // Floating point
944 …gn, [Zn3FPFMul01], 1, [2], 1>; // FIXME: latency not from llvm-exegesis // Floating point fabs/fc…
945 defm : Zn3WriteResXMMPair<WriteFRnd, [Zn3FPFCvt01], 3, [1], 1>; // Floating point rounding.
946 defm : Zn3WriteResYMMPair<WriteFRndY, [Zn3FPFCvt01], 3, [1], 1>; // Floating point rounding (YMM).
947 defm : X86WriteResPairUnsupported<WriteFRndZ>; // Floating point rounding (ZMM).
948 defm : Zn3WriteResXMMPair<WriteFLogic, [Zn3FPVMisc0123], 1, [1], 1>; // Floating point and/or/xor l…
949 defm : Zn3WriteResYMMPair<WriteFLogicY, [Zn3FPVMisc0123], 1, [1], 1>; // Floating point and/or/xor …
950 defm : X86WriteResPairUnsupported<WriteFLogicZ>; // Floating point and/or/xor logicals (ZMM).
951 …st, [Zn3FPFMisc12], 1, [2], 2>; // FIXME: latency not from llvm-exegesis // Floating point TEST in…
952 …tY, [Zn3FPFMisc12], 1, [2], 2>; // FIXME: latency not from llvm-exegesis // Floating point TEST in…
953 defm : X86WriteResPairUnsupported<WriteFTestZ>; // Floating point TEST instructions (ZMM).
954 defm : Zn3WriteResXMMPair<WriteFShuffle, [Zn3FPVShuf01], 1, [1], 1>; // Floating point vector shuff…
955 defm : Zn3WriteResYMMPair<WriteFShuffleY, [Zn3FPVShuf01], 1, [1], 1>; // Floating point vector shuf…
956 defm : X86WriteResPairUnsupported<WriteFShuffleZ>; // Floating point vector shuffles (ZMM).
957 defm : Zn3WriteResXMMPair<WriteFVarShuffle, [Zn3FPVShuf01], 3, [1], 1>; // Floating point vector va…
958 defm : Zn3WriteResYMMPair<WriteFVarShuffleY, [Zn3FPVShuf01], 3, [1], 1>; // Floating point vector v…
959 defm : X86WriteResPairUnsupported<WriteFVarShuffleZ>; // Floating point vector variable shuffles (Z…
960 defm : Zn3WriteResXMMPair<WriteFBlend, [Zn3FPFMul01], 1, [1], 1>; // Floating point vector blends.
961 defm : Zn3WriteResYMMPair<WriteFBlendY, [Zn3FPFMul01], 1, [1], 1>; // Floating point vector blends …
962 defm : X86WriteResPairUnsupported<WriteFBlendZ>; // Floating point vector blends (ZMM).
969 defm : Zn3WriteResYMMPair<WriteFHAddY, [Zn3FPFAdd0], 6, [2], 3, /*LoadUOps=*/1>;
971 defm : Zn3WriteResXMMPair<WritePHAdd, [Zn3FPVAdd0], 2, [2], 3, /*LoadUOps=*/1>;
973 defm : Zn3WriteResYMMPair<WritePHAddY, [Zn3FPVAdd0], 2, [2], 3, /*LoadUOps=*/1>;
1036 let Latency = 3;
1043 let Latency = 3;
1097 …est, [Zn3FPVAdd12, Zn3FPSt], 1, [1, 1], 2>; // FIXME: latency not from llvm-exegesis // Vector in…
1098 …estY, [Zn3FPVAdd12, Zn3FPSt], 1, [1, 1], 2>; // FIXME: latency not from llvm-exegesis // Vector i…
1108 defm : Zn3WriteResXMMPair<WriteVecIMul, [Zn3FPVMul01], 3, [1], 1>; // Vector integer multiply (def…
1109 defm : Zn3WriteResXMMPair<WriteVecIMulX, [Zn3FPVMul01], 3, [1], 1>; // Vector integer multiply (XMM…
1110 defm : Zn3WriteResYMMPair<WriteVecIMulY, [Zn3FPVMul01], 3, [1], 1>; // Vector integer multiply (YMM…
1112 defm : Zn3WriteResXMMPair<WritePMULLD, [Zn3FPVMul01], 3, [1], 1>; // Vector PMULLD.
1113 defm : Zn3WriteResYMMPair<WritePMULLDY, [Zn3FPVMul01], 3, [1], 1>; // Vector PMULLD (YMM).
1129 defm : Zn3WriteResXMMPair<WritePSADBW, [Zn3FPVAdd0123], 3, [2], 1>; // Vector PSADBW.
1130 defm : Zn3WriteResXMMPair<WritePSADBWX, [Zn3FPVAdd0123], 3, [2], 1>; // Vector PSADBW (XMM).
1131 defm : Zn3WriteResYMMPair<WritePSADBWY, [Zn3FPVAdd0123], 3, [2], 1>; // Vector PSADBW (YMM).
1134 defm : Zn3WriteResYMMPair<WriteMPSADY, [Zn3FPVAdd0123], 4, [8], 3, /*LoadUOps=*/1>; // Vector MPSAD…
1136 defm : Zn3WriteResXMMPair<WritePHMINPOS, [Zn3FPVAdd01], 3, [1], 1>; // Vector PHMINPOS.
1139 defm : Zn3WriteResXMMPair<WriteVecInsert, [Zn3FPLd01], 1, [2], 2, /*LoadUOps=*/-1>; // Insert gpr t…
1150 defm : Zn3WriteResXMMPair<WriteCvtSD2I, [Zn3FPFCvt01], 2, [2], 2>; // Double -> Integer.
1151 defm : Zn3WriteResXMMPair<WriteCvtPD2I, [Zn3FPFCvt01], 3, [1], 1>; // Double -> Integer (XMM).
1152 defm : Zn3WriteResYMMPair<WriteCvtPD2IY, [Zn3FPFCvt01], 6, [2], 2>; // Double -> Integer (YMM).
1153 defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>; // Double -> Integer (ZMM).
1162 defm : Zn3WriteResXMMPair<WriteCvtSS2I, [Zn3FPFCvt01], 2, [2], 2>; // Float -> Integer.
1164 defm : Zn3WriteResXMMPair<WriteCvtPS2I, [Zn3FPFCvt01], 3, [1], 1>; // Float -> Integer (XMM).
1165 defm : Zn3WriteResYMMPair<WriteCvtPS2IY, [Zn3FPFCvt01], 3, [1], 1>; // Float -> Integer (YMM).
1166 defm : X86WriteResPairUnsupported<WriteCvtPS2IZ>; // Float -> Integer (ZMM).
1168 defm : Zn3WriteResXMMPair<WriteCvtI2SD, [Zn3FPFCvt01], 3, [2], 2, /*LoadUOps=*/-1>; // Integer -> …
1169 defm : Zn3WriteResXMMPair<WriteCvtI2PD, [Zn3FPFCvt01], 3, [1], 1>; // Integer -> Double (XMM).
1170 defm : Zn3WriteResYMMPair<WriteCvtI2PDY, [Zn3FPFCvt01], 4, [2], 2, /*LoadUOps=*/-1>; // Integer -> …
1171 defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>; // Integer -> Double (ZMM).
1180 defm : Zn3WriteResXMMPair<WriteCvtI2SS, [Zn3FPFCvt01], 3, [2], 2, /*LoadUOps=*/-1>; // Integer -> …
1181 defm : Zn3WriteResXMMPair<WriteCvtI2PS, [Zn3FPFCvt01], 3, [1], 1>; // Integer -> Float (XMM).
1182 defm : Zn3WriteResYMMPair<WriteCvtI2PSY, [Zn3FPFCvt01], 3, [1], 1>; // Integer -> Float (YMM).
1183 defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>; // Integer -> Float (ZMM).
1186 let Latency = 3;
1192 defm : Zn3WriteResXMMPair<WriteCvtSS2SD, [Zn3FPFCvt01], 3, [1], 1>; // Float -> Double size conver…
1193 defm : Zn3WriteResXMMPair<WriteCvtPS2PD, [Zn3FPFCvt01], 3, [1], 1>; // Float -> Double size convers…
1194 defm : Zn3WriteResYMMPair<WriteCvtPS2PDY, [Zn3FPFCvt01], 4, [2], 2, /*LoadUOps=*/-1>; // Float -> D…
1195 defm : X86WriteResPairUnsupported<WriteCvtPS2PDZ>; // Float -> Double size conversion (ZMM).
1197 defm : Zn3WriteResXMMPair<WriteCvtSD2SS, [Zn3FPFCvt01], 3, [1], 1>; // Double -> Float size conver…
1198 defm : Zn3WriteResXMMPair<WriteCvtPD2PS, [Zn3FPFCvt01], 3, [1], 1>; // Double -> Float size convers…
1199 defm : Zn3WriteResYMMPair<WriteCvtPD2PSY, [Zn3FPFCvt01], 6, [2], 2>; // Double -> Float size conver…
1200 defm : X86WriteResPairUnsupported<WriteCvtPD2PSZ>; // Double -> Float size conversion (ZMM).
1202 defm : Zn3WriteResXMMPair<WriteCvtPH2PS, [Zn3FPFCvt01], 3, [1], 1>; // Half -> Float size conversio…
1203 defm : Zn3WriteResYMMPair<WriteCvtPH2PSY, [Zn3FPFCvt01], 4, [2], 2, /*LoadUOps=*/-1>; // Half -> Fl…
1204 defm : X86WriteResPairUnsupported<WriteCvtPH2PSZ>; // Half -> Float size conversion (ZMM).
1206 defm : Zn3WriteResXMM<WriteCvtPS2PH, [Zn3FPFCvt01], 3, [2], 1>; // Float -> Half size conversion.
1207 defm : Zn3WriteResYMM<WriteCvtPS2PHY, [Zn3FPFCvt01], 6, [2], 2>; // Float -> Half size conversion (…
1208 defm : X86WriteResUnsupported<WriteCvtPS2PHZ>; // Float -> Half size conversion (ZMM).
1209 …tPS2PHSt, [Zn3FPFCvt01, Zn3FPSt, Zn3Store], !add(3, Znver3Model.StoreLatency), [1, 1, 1], 2>; // F…
1210 …1, Zn3FPSt, Zn3Store], !add(6, Znver3Model.StoreLatency), [2, 1, 1], 3>; // Float -> Half + store …
1211 defm : X86WriteResUnsupported<WriteCvtPS2PHZSt>; // Float -> Half + store size conversion (ZMM).
1214 defm : Zn3WriteResIntPair<WriteCRC32, [Zn3ALU1], 3, [1], 1>;
1246 let ReleaseAtCycles = [3];
1253 let ReleaseAtCycles = [1, 1, 3];
1259 let Latency = 3;
1288 defm : Zn3WriteResXMMPair<WritePCmpIStrM, [Zn3FPVAdd0123], 6, [8], 3, /*LoadUOps=*/1>;
1301 // Carry-less multiplication instructions.
1305 defm : Zn3WriteResInt<WriteEMMS, [Zn3ALU0123], 2, [1], 1>; // FIXME: latency not from llvm-exegesis
1308 …ALU0123], !add(Znver3Model.LoadLatency, 1), [1, 1, 6], 1>; // FIXME: latency not from llvm-exegesis
1309 …Store], !add(1, Znver3Model.StoreLatency), [60, 1, 1], 2>; // FIXME: latency not from llvm-exegesis
1311 // Catch-all for expensive system instructions.
1315 let Latency = 0; // FIXME: not from llvm-exegesis
1322 let Latency = 10; // FIXME: not from llvm-exegesis
1329 defm : Zn3WriteResYMMPair<WriteFShuffle256, [Zn3FPVShuf], 2, [1], 1, /*LoadUOps=*/2>; // Fp 256-bit…
1330 defm : Zn3WriteResYMMPair<WriteFVarShuffle256, [Zn3FPVShuf], 7, [1], 2, /*LoadUOps=*/1>; // Fp 256-
1331 defm : Zn3WriteResYMMPair<WriteShuffle256, [Zn3FPVShuf], 2, [1], 1>; // 256-bit width vector shuffl…
1334 let Latency = 3;
1350 let NumMicroOps = 3;
1375 …esYMMPair<WriteVPMOV256, [Zn3FPVShuf01], 4, [3], 2, /*LoadUOps=*/-1>; // 256-bit width packed vect…
1376 defm : Zn3WriteResYMMPair<WriteVarShuffle256, [Zn3FPVShuf], 5, [1], 2, /*LoadUOps=*/1>; // 256-bit …
1402 defm : Zn3WriteResInt<WriteNop, [Zn3ALU0123], 0, [1], 1>; // FIXME: latency not from llvm-exegesis
1426 defm : Zn3WriteResInt<WriteXCHG, [Zn3ALU0123], 0, [8], 2>; // Compare+Exchange - TODO RMW su…
1502 // NOTE: XORPSrr, XORPDrr are not zero-cycle!
1517 // NOTE: PXORrr,PANDNrr are not zero-cycle!
1531 // PCMPGTBrr, PCMPGTWrr, PCMPGTDrr, PCMPGTQrr are not zero-cycle!
1545 // GPR Zero-idioms.
1551 // SSE XMM Zero-idioms.
1566 // AVX XMM Zero-idioms.
1581 // AVX YMM Zero-idioms.