xref: /freebsd/contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleZnver2.td (revision 03a88e3de9c68182d21df94b1c8c7ced930dbd1f)
1//=- X86ScheduleZnver2.td - X86 Znver2 Scheduling -------------*- tablegen -*-=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the machine model for Znver2 to support instruction
10// scheduling and other instruction cost heuristics.
11//
12//===----------------------------------------------------------------------===//
13
14def Znver2Model : SchedMachineModel {
15  // Zen can decode 4 instructions per cycle.
16  let IssueWidth = 4;
17  // Based on the reorder buffer we define MicroOpBufferSize
18  let MicroOpBufferSize = 224;
19  let LoadLatency = 4;
20  let MispredictPenalty = 17;
21  let HighLatency = 25;
22  let PostRAScheduler = 1;
23
24  // FIXME: This variable is required for incomplete model.
25  // We haven't catered all instructions.
26  // So, we reset the value of this variable so as to
27  // say that the model is incomplete.
28  let CompleteModel = 0;
29}
30
31let SchedModel = Znver2Model in {
32
33// Zen can issue micro-ops to 10 different units in one cycle.
34// These are
35//  * Four integer ALU units (ZALU0, ZALU1, ZALU2, ZALU3)
36//  * Three AGU units (ZAGU0, ZAGU1, ZAGU2)
37//  * Four FPU units (ZFPU0, ZFPU1, ZFPU2, ZFPU3)
38// AGUs feed load store queues @two loads and 1 store per cycle.
39
40// Four ALU units are defined below
41def Zn2ALU0 : ProcResource<1>;
42def Zn2ALU1 : ProcResource<1>;
43def Zn2ALU2 : ProcResource<1>;
44def Zn2ALU3 : ProcResource<1>;
45
46// Three AGU units are defined below
47def Zn2AGU0 : ProcResource<1>;
48def Zn2AGU1 : ProcResource<1>;
49def Zn2AGU2 : ProcResource<1>;
50
51// Four FPU units are defined below
52def Zn2FPU0 : ProcResource<1>;
53def Zn2FPU1 : ProcResource<1>;
54def Zn2FPU2 : ProcResource<1>;
55def Zn2FPU3 : ProcResource<1>;
56
57// FPU grouping
58def Zn2FPU013  : ProcResGroup<[Zn2FPU0, Zn2FPU1, Zn2FPU3]>;
59def Zn2FPU01   : ProcResGroup<[Zn2FPU0, Zn2FPU1]>;
60def Zn2FPU12   : ProcResGroup<[Zn2FPU1, Zn2FPU2]>;
61def Zn2FPU13   : ProcResGroup<[Zn2FPU1, Zn2FPU3]>;
62def Zn2FPU23   : ProcResGroup<[Zn2FPU2, Zn2FPU3]>;
63def Zn2FPU02   : ProcResGroup<[Zn2FPU0, Zn2FPU2]>;
64def Zn2FPU03   : ProcResGroup<[Zn2FPU0, Zn2FPU3]>;
65
66// Below are the grouping of the units.
67// Micro-ops to be issued to multiple units are tackled this way.
68
69// ALU grouping
70// Zn2ALU03 - 0,3 grouping
71def Zn2ALU03: ProcResGroup<[Zn2ALU0, Zn2ALU3]>;
72
73// 64 Entry (16x4 entries) Int Scheduler
74def Zn2ALU : ProcResGroup<[Zn2ALU0, Zn2ALU1, Zn2ALU2, Zn2ALU3]> {
75  let BufferSize=64;
76}
77
78// 28 Entry (14x2) AGU group. AGUs can't be used for all ALU operations
79// but are relevant for some instructions
80def Zn2AGU : ProcResGroup<[Zn2AGU0, Zn2AGU1, Zn2AGU2]> {
81  let BufferSize=28;
82}
83
84// Integer Multiplication issued on ALU1.
85def Zn2Multiplier : ProcResource<1>;
86
87// Integer division issued on ALU2.
88def Zn2Divider : ProcResource<1>;
89
90// 4 Cycles load-to use Latency is captured
91def : ReadAdvance<ReadAfterLd, 4>;
92
93// 7 Cycles vector load-to use Latency is captured
94def : ReadAdvance<ReadAfterVecLd, 7>;
95def : ReadAdvance<ReadAfterVecXLd, 7>;
96def : ReadAdvance<ReadAfterVecYLd, 7>;
97
98def : ReadAdvance<ReadInt2Fpu, 0>;
99
100// The Integer PRF for Zen is 168 entries, and it holds the architectural and
101// speculative version of the 64-bit integer registers.
102// Reference: "Software Optimization Guide for AMD Family 17h Processors"
103def Zn2IntegerPRF : RegisterFile<168, [GR64, CCR]>;
104
105// 36 Entry (9x4 entries) floating-point Scheduler
106def Zn2FPU     : ProcResGroup<[Zn2FPU0, Zn2FPU1, Zn2FPU2, Zn2FPU3]> {
107  let BufferSize=36;
108}
109
110// The Zen FP Retire Queue renames SIMD and FP uOps onto a pool of 160 128-bit
111// registers. Operations on 256-bit data types are cracked into two COPs.
112// Reference: "Software Optimization Guide for AMD Family 17h Processors"
113def Zn2FpuPRF: RegisterFile<160, [VR64, VR128, VR256], [1, 1, 2]>;
114
115// The unit can track up to 192 macro ops in-flight.
116// The retire unit handles in-order commit of up to 8 macro ops per cycle.
117// Reference: "Software Optimization Guide for AMD Family 17h Processors"
118// To be noted, the retire unit is shared between integer and FP ops.
119// In SMT mode it is 96 entry per thread. But, we do not use the conservative
120// value here because there is currently no way to fully mode the SMT mode,
121// so there is no point in trying.
122def Zn2RCU : RetireControlUnit<192, 8>;
123
124// (a folded load is an instruction that loads and does some operation)
125// Ex: ADDPD xmm,[mem]-> This instruction has two micro-ops
126// Instructions with folded loads are usually micro-fused, so they only appear
127// as two micro-ops.
128//      a. load and
129//      b. addpd
130// This multiclass is for folded loads for integer units.
131multiclass Zn2WriteResPair<X86FoldableSchedWrite SchedRW,
132                          list<ProcResourceKind> ExePorts,
133                          int Lat, list<int> Res = [], int UOps = 1,
134                          int LoadLat = 4, int LoadUOps = 1> {
135  // Register variant takes 1-cycle on Execution Port.
136  def : WriteRes<SchedRW, ExePorts> {
137    let Latency = Lat;
138    let ResourceCycles = Res;
139    let NumMicroOps = UOps;
140  }
141
142  // Memory variant also uses a cycle on Zn2AGU
143  // adds LoadLat cycles to the latency (default = 4).
144  def : WriteRes<SchedRW.Folded, !listconcat([Zn2AGU], ExePorts)> {
145    let Latency = !add(Lat, LoadLat);
146    let ResourceCycles = !if(!empty(Res), [], !listconcat([1], Res));
147    let NumMicroOps = !add(UOps, LoadUOps);
148  }
149}
150
151// This multiclass is for folded loads for floating point units.
152multiclass Zn2WriteResFpuPair<X86FoldableSchedWrite SchedRW,
153                          list<ProcResourceKind> ExePorts,
154                          int Lat, list<int> Res = [], int UOps = 1,
155                          int LoadLat = 7, int LoadUOps = 0> {
156  // Register variant takes 1-cycle on Execution Port.
157  def : WriteRes<SchedRW, ExePorts> {
158    let Latency = Lat;
159    let ResourceCycles = Res;
160    let NumMicroOps = UOps;
161  }
162
163  // Memory variant also uses a cycle on Zn2AGU
164  // adds LoadLat cycles to the latency (default = 7).
165  def : WriteRes<SchedRW.Folded, !listconcat([Zn2AGU], ExePorts)> {
166    let Latency = !add(Lat, LoadLat);
167    let ResourceCycles = !if(!empty(Res), [], !listconcat([1], Res));
168    let NumMicroOps = !add(UOps, LoadUOps);
169  }
170}
171
172// WriteRMW is set for instructions with Memory write
173// operation in codegen
174def : WriteRes<WriteRMW, [Zn2AGU]>;
175
176def : WriteRes<WriteStore,   [Zn2AGU]>;
177def : WriteRes<WriteStoreNT, [Zn2AGU]>;
178def : WriteRes<WriteMove,    [Zn2ALU]>;
179def : WriteRes<WriteLoad,    [Zn2AGU]> { let Latency = 8; }
180
181// Model the effect of clobbering the read-write mask operand of the GATHER operation.
182// Does not cost anything by itself, only has latency, matching that of the WriteLoad,
183def : WriteRes<WriteVecMaskedGatherWriteback, []> { let Latency = 8; let NumMicroOps = 0; }
184
185def : WriteRes<WriteZero,  []>;
186def : WriteRes<WriteLEA, [Zn2ALU]>;
187defm : Zn2WriteResPair<WriteALU,   [Zn2ALU], 1>;
188defm : Zn2WriteResPair<WriteADC,   [Zn2ALU], 1>;
189
190defm : Zn2WriteResPair<WriteIMul8,     [Zn2ALU1, Zn2Multiplier], 4>;
191
192defm : X86WriteRes<WriteBSWAP32, [Zn2ALU], 1, [4], 1>;
193defm : X86WriteRes<WriteBSWAP64, [Zn2ALU], 1, [4], 1>;
194defm : X86WriteRes<WriteCMPXCHG, [Zn2ALU], 3, [1], 1>;
195defm : X86WriteRes<WriteCMPXCHGRMW,[Zn2ALU,Zn2AGU], 8, [1,1], 5>;
196defm : X86WriteRes<WriteXCHG, [Zn2ALU], 1, [2], 2>;
197
198defm : Zn2WriteResPair<WriteShift, [Zn2ALU], 1>;
199defm : Zn2WriteResPair<WriteShiftCL,  [Zn2ALU], 1>;
200defm : Zn2WriteResPair<WriteRotate,   [Zn2ALU], 1>;
201defm : Zn2WriteResPair<WriteRotateCL, [Zn2ALU], 1>;
202
203defm : X86WriteRes<WriteSHDrri, [Zn2ALU], 1, [1], 1>;
204defm : X86WriteResUnsupported<WriteSHDrrcl>;
205defm : X86WriteResUnsupported<WriteSHDmri>;
206defm : X86WriteResUnsupported<WriteSHDmrcl>;
207
208defm : Zn2WriteResPair<WriteJump,  [Zn2ALU], 1>;
209defm : Zn2WriteResFpuPair<WriteCRC32, [Zn2FPU0], 3>;
210
211defm : Zn2WriteResPair<WriteCMOV,   [Zn2ALU], 1>;
212def  : WriteRes<WriteSETCC,  [Zn2ALU]>;
213def  : WriteRes<WriteSETCCStore,  [Zn2ALU, Zn2AGU]>;
214defm : X86WriteRes<WriteLAHFSAHF, [Zn2ALU], 2, [1], 2>;
215
216defm : X86WriteRes<WriteBitTest,         [Zn2ALU], 1, [1], 1>;
217defm : X86WriteRes<WriteBitTestImmLd,    [Zn2ALU,Zn2AGU], 5, [1,1], 2>;
218defm : X86WriteRes<WriteBitTestRegLd,    [Zn2ALU,Zn2AGU], 5, [1,1], 2>;
219defm : X86WriteRes<WriteBitTestSet,      [Zn2ALU], 2, [1], 2>;
220
221// Bit counts.
222defm : Zn2WriteResPair<WriteBSF, [Zn2ALU], 3>;
223defm : Zn2WriteResPair<WriteBSR, [Zn2ALU], 4>;
224defm : Zn2WriteResPair<WriteLZCNT,          [Zn2ALU], 1>;
225defm : Zn2WriteResPair<WriteTZCNT,          [Zn2ALU], 2>;
226defm : Zn2WriteResPair<WritePOPCNT,         [Zn2ALU], 1>;
227
228// Treat misc copies as a move.
229def : InstRW<[WriteMove], (instrs COPY)>;
230
231// BMI1 BEXTR, BMI2 BZHI
232defm : Zn2WriteResPair<WriteBEXTR, [Zn2ALU], 1>;
233defm : Zn2WriteResPair<WriteBZHI, [Zn2ALU], 1>;
234
235// IDIV
236defm : Zn2WriteResPair<WriteDiv8,   [Zn2ALU2, Zn2Divider], 15, [1,15], 1>;
237defm : Zn2WriteResPair<WriteDiv16,  [Zn2ALU2, Zn2Divider], 17, [1,17], 2>;
238defm : Zn2WriteResPair<WriteDiv32,  [Zn2ALU2, Zn2Divider], 25, [1,25], 2>;
239defm : Zn2WriteResPair<WriteDiv64,  [Zn2ALU2, Zn2Divider], 41, [1,41], 2>;
240defm : Zn2WriteResPair<WriteIDiv8,  [Zn2ALU2, Zn2Divider], 15, [1,15], 1>;
241defm : Zn2WriteResPair<WriteIDiv16, [Zn2ALU2, Zn2Divider], 17, [1,17], 2>;
242defm : Zn2WriteResPair<WriteIDiv32, [Zn2ALU2, Zn2Divider], 25, [1,25], 2>;
243defm : Zn2WriteResPair<WriteIDiv64, [Zn2ALU2, Zn2Divider], 41, [1,41], 2>;
244
245// IMULH
246def  : WriteRes<WriteIMulH, [Zn2ALU1, Zn2Multiplier]>{
247  let Latency = 4;
248}
249
250// Floating point operations
251defm : X86WriteRes<WriteFLoad,         [Zn2AGU], 8, [1], 1>;
252defm : X86WriteRes<WriteFLoadX,        [Zn2AGU], 8, [1], 1>;
253defm : X86WriteRes<WriteFLoadY,        [Zn2AGU], 8, [1], 1>;
254defm : X86WriteRes<WriteFMaskedLoad,   [Zn2AGU,Zn2FPU01], 8, [1,1], 1>;
255defm : X86WriteRes<WriteFMaskedLoadY,  [Zn2AGU,Zn2FPU01], 8, [1,1], 2>;
256defm : X86WriteRes<WriteFMaskedStore32,  [Zn2AGU,Zn2FPU01], 4, [1,1], 1>;
257defm : X86WriteRes<WriteFMaskedStore32Y, [Zn2AGU,Zn2FPU01], 5, [1,2], 2>;
258defm : X86WriteRes<WriteFMaskedStore64,  [Zn2AGU,Zn2FPU01], 4, [1,1], 1>;
259defm : X86WriteRes<WriteFMaskedStore64Y, [Zn2AGU,Zn2FPU01], 5, [1,2], 2>;
260
261defm : X86WriteRes<WriteFStore,        [Zn2AGU], 1, [1], 1>;
262defm : X86WriteRes<WriteFStoreX,       [Zn2AGU], 1, [1], 1>;
263defm : X86WriteRes<WriteFStoreY,       [Zn2AGU], 1, [1], 1>;
264defm : X86WriteRes<WriteFStoreNT,      [Zn2AGU,Zn2FPU2], 8, [1,1], 1>;
265defm : X86WriteRes<WriteFStoreNTX,     [Zn2AGU], 1, [1], 1>;
266defm : X86WriteRes<WriteFStoreNTY,     [Zn2AGU], 1, [1], 1>;
267defm : X86WriteRes<WriteFMove,         [Zn2FPU], 1, [1], 1>;
268defm : X86WriteRes<WriteFMoveX,        [Zn2FPU], 1, [1], 1>;
269defm : X86WriteRes<WriteFMoveY,        [Zn2FPU], 1, [1], 1>;
270
271defm : Zn2WriteResFpuPair<WriteFAdd,      [Zn2FPU0],  3>;
272defm : Zn2WriteResFpuPair<WriteFAddX,     [Zn2FPU0],  3>;
273defm : Zn2WriteResFpuPair<WriteFAddY,     [Zn2FPU0],  3>;
274defm : X86WriteResPairUnsupported<WriteFAddZ>;
275defm : Zn2WriteResFpuPair<WriteFAdd64,    [Zn2FPU0],  3>;
276defm : Zn2WriteResFpuPair<WriteFAdd64X,   [Zn2FPU0],  3>;
277defm : Zn2WriteResFpuPair<WriteFAdd64Y,   [Zn2FPU0],  3>;
278defm : X86WriteResPairUnsupported<WriteFAdd64Z>;
279defm : Zn2WriteResFpuPair<WriteFCmp,      [Zn2FPU0],  1>;
280defm : Zn2WriteResFpuPair<WriteFCmpX,     [Zn2FPU0],  1>;
281defm : Zn2WriteResFpuPair<WriteFCmpY,     [Zn2FPU0],  1>;
282defm : X86WriteResPairUnsupported<WriteFCmpZ>;
283defm : Zn2WriteResFpuPair<WriteFCmp64,    [Zn2FPU0],  1>;
284defm : Zn2WriteResFpuPair<WriteFCmp64X,   [Zn2FPU0],  1>;
285defm : Zn2WriteResFpuPair<WriteFCmp64Y,   [Zn2FPU0],  1>;
286defm : X86WriteResPairUnsupported<WriteFCmp64Z>;
287defm : Zn2WriteResFpuPair<WriteFCom,      [Zn2FPU0],  3>;
288defm : Zn2WriteResFpuPair<WriteFComX,     [Zn2FPU0],  3>;
289defm : Zn2WriteResFpuPair<WriteFBlend,    [Zn2FPU01], 1>;
290defm : Zn2WriteResFpuPair<WriteFBlendY,   [Zn2FPU01], 1>;
291defm : X86WriteResPairUnsupported<WriteFBlendZ>;
292defm : Zn2WriteResFpuPair<WriteFVarBlend, [Zn2FPU01], 1>;
293defm : Zn2WriteResFpuPair<WriteFVarBlendY,[Zn2FPU01], 1>;
294defm : X86WriteResPairUnsupported<WriteFVarBlendZ>;
295defm : Zn2WriteResFpuPair<WriteVarBlend,  [Zn2FPU0],  1>;
296defm : Zn2WriteResFpuPair<WriteVarBlendY, [Zn2FPU0],  1>;
297defm : X86WriteResPairUnsupported<WriteVarBlendZ>;
298defm : Zn2WriteResFpuPair<WriteCvtSS2I,   [Zn2FPU3],  5>;
299defm : Zn2WriteResFpuPair<WriteCvtPS2I,   [Zn2FPU3],  5>;
300defm : Zn2WriteResFpuPair<WriteCvtPS2IY,  [Zn2FPU3],  5>;
301defm : X86WriteResPairUnsupported<WriteCvtPS2IZ>;
302defm : Zn2WriteResFpuPair<WriteCvtSD2I,   [Zn2FPU3],  5>;
303defm : Zn2WriteResFpuPair<WriteCvtPD2I,   [Zn2FPU3],  5>;
304defm : Zn2WriteResFpuPair<WriteCvtPD2IY,  [Zn2FPU3],  5>;
305defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>;
306defm : Zn2WriteResFpuPair<WriteCvtI2SS,   [Zn2FPU3],  5>;
307defm : Zn2WriteResFpuPair<WriteCvtI2PS,   [Zn2FPU3],  5>;
308defm : Zn2WriteResFpuPair<WriteCvtI2PSY,  [Zn2FPU3],  5>;
309defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>;
310defm : Zn2WriteResFpuPair<WriteCvtI2SD,   [Zn2FPU3],  5>;
311defm : Zn2WriteResFpuPair<WriteCvtI2PD,   [Zn2FPU3],  5>;
312defm : Zn2WriteResFpuPair<WriteCvtI2PDY,  [Zn2FPU3],  5>;
313defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>;
314defm : Zn2WriteResFpuPair<WriteFDiv,      [Zn2FPU3], 15>;
315defm : Zn2WriteResFpuPair<WriteFDivX,     [Zn2FPU3], 15>;
316defm : X86WriteResPairUnsupported<WriteFDivZ>;
317defm : Zn2WriteResFpuPair<WriteFDiv64,    [Zn2FPU3], 15>;
318defm : Zn2WriteResFpuPair<WriteFDiv64X,   [Zn2FPU3], 15>;
319defm : X86WriteResPairUnsupported<WriteFDiv64Z>;
320defm : Zn2WriteResFpuPair<WriteFSign,     [Zn2FPU3],  2>;
321defm : Zn2WriteResFpuPair<WriteFRnd,      [Zn2FPU3],  3, [1], 1, 7, 0>;
322defm : Zn2WriteResFpuPair<WriteFRndY,     [Zn2FPU3],  3, [1], 1, 7, 0>;
323defm : X86WriteResPairUnsupported<WriteFRndZ>;
324defm : Zn2WriteResFpuPair<WriteFLogic,    [Zn2FPU],   1>;
325defm : Zn2WriteResFpuPair<WriteFLogicY,   [Zn2FPU],   1>;
326defm : X86WriteResPairUnsupported<WriteFLogicZ>;
327defm : Zn2WriteResFpuPair<WriteFTest,     [Zn2FPU],   1>;
328defm : Zn2WriteResFpuPair<WriteFTestY,    [Zn2FPU],   1>;
329defm : X86WriteResPairUnsupported<WriteFTestZ>;
330defm : Zn2WriteResFpuPair<WriteFShuffle,  [Zn2FPU12], 1>;
331defm : Zn2WriteResFpuPair<WriteFShuffleY, [Zn2FPU12], 1>;
332defm : X86WriteResPairUnsupported<WriteFShuffleZ>;
333defm : Zn2WriteResFpuPair<WriteFVarShuffle, [Zn2FPU12], 3>;
334defm : Zn2WriteResFpuPair<WriteFVarShuffleY,[Zn2FPU12], 3>;
335defm : X86WriteResPairUnsupported<WriteFVarShuffleZ>;
336defm : Zn2WriteResFpuPair<WriteFMul,      [Zn2FPU01], 3, [1], 1, 7, 1>;
337defm : Zn2WriteResFpuPair<WriteFMulX,     [Zn2FPU01], 3, [1], 1, 7, 1>;
338defm : Zn2WriteResFpuPair<WriteFMulY,     [Zn2FPU01], 3, [1], 1, 7, 1>;
339defm : X86WriteResPairUnsupported<WriteFMulZ>;
340defm : Zn2WriteResFpuPair<WriteFMul64,    [Zn2FPU01], 3, [1], 1, 7, 1>;
341defm : Zn2WriteResFpuPair<WriteFMul64X,   [Zn2FPU01], 3, [1], 1, 7, 1>;
342defm : Zn2WriteResFpuPair<WriteFMul64Y,   [Zn2FPU01], 3, [1], 1, 7, 1>;
343defm : X86WriteResPairUnsupported<WriteFMul64Z>;
344defm : Zn2WriteResFpuPair<WriteFMA,       [Zn2FPU03], 5>;
345defm : Zn2WriteResFpuPair<WriteFMAX,      [Zn2FPU03], 5>;
346defm : Zn2WriteResFpuPair<WriteFMAY,      [Zn2FPU03], 5>;
347defm : X86WriteResPairUnsupported<WriteFMAZ>;
348defm : Zn2WriteResFpuPair<WriteFRcp,      [Zn2FPU01], 5>;
349defm : Zn2WriteResFpuPair<WriteFRcpX,     [Zn2FPU01], 5>;
350defm : Zn2WriteResFpuPair<WriteFRcpY,     [Zn2FPU01], 5, [1], 1, 7, 2>;
351defm : X86WriteResPairUnsupported<WriteFRcpZ>;
352defm : Zn2WriteResFpuPair<WriteFRsqrtX,   [Zn2FPU01], 5, [1], 1, 7, 1>;
353defm : X86WriteResPairUnsupported<WriteFRsqrtZ>;
354defm : Zn2WriteResFpuPair<WriteFSqrt,     [Zn2FPU3], 20, [20]>;
355defm : Zn2WriteResFpuPair<WriteFSqrtX,    [Zn2FPU3], 20, [20]>;
356defm : Zn2WriteResFpuPair<WriteFSqrtY,    [Zn2FPU3], 28, [28], 1, 7, 1>;
357defm : X86WriteResPairUnsupported<WriteFSqrtZ>;
358defm : Zn2WriteResFpuPair<WriteFSqrt64,   [Zn2FPU3], 20, [20]>;
359defm : Zn2WriteResFpuPair<WriteFSqrt64X,  [Zn2FPU3], 20, [20]>;
360defm : Zn2WriteResFpuPair<WriteFSqrt64Y,  [Zn2FPU3], 20, [20], 1, 7, 1>;
361defm : X86WriteResPairUnsupported<WriteFSqrt64Z>;
362defm : Zn2WriteResFpuPair<WriteFSqrt80,   [Zn2FPU3], 20, [20]>;
363
364// Vector integer operations which uses FPU units
365defm : X86WriteRes<WriteVecLoad,         [Zn2AGU], 8, [1], 1>;
366defm : X86WriteRes<WriteVecLoadX,        [Zn2AGU], 8, [1], 1>;
367defm : X86WriteRes<WriteVecLoadY,        [Zn2AGU], 8, [1], 1>;
368defm : X86WriteRes<WriteVecLoadNT,       [Zn2AGU], 8, [1], 1>;
369defm : X86WriteRes<WriteVecLoadNTY,      [Zn2AGU], 8, [1], 1>;
370defm : X86WriteRes<WriteVecMaskedLoad,   [Zn2AGU,Zn2FPU01], 8, [1,2], 2>;
371defm : X86WriteRes<WriteVecMaskedLoadY,  [Zn2AGU,Zn2FPU01], 8, [1,2], 2>;
372defm : X86WriteRes<WriteVecStore,        [Zn2AGU], 1, [1], 1>;
373defm : X86WriteRes<WriteVecStoreX,       [Zn2AGU], 1, [1], 1>;
374defm : X86WriteRes<WriteVecStoreY,       [Zn2AGU], 1, [1], 1>;
375defm : X86WriteRes<WriteVecStoreNT,      [Zn2AGU], 1, [1], 1>;
376defm : X86WriteRes<WriteVecStoreNTY,     [Zn2AGU], 1, [1], 1>;
377defm : X86WriteRes<WriteVecMaskedStore32,  [Zn2AGU,Zn2FPU01], 4, [1,1], 1>;
378defm : X86WriteRes<WriteVecMaskedStore32Y, [Zn2AGU,Zn2FPU01], 5, [1,2], 2>;
379defm : X86WriteRes<WriteVecMaskedStore64,  [Zn2AGU,Zn2FPU01], 4, [1,1], 1>;
380defm : X86WriteRes<WriteVecMaskedStore64Y, [Zn2AGU,Zn2FPU01], 5, [1,2], 2>;
381defm : X86WriteRes<WriteVecMove,         [Zn2FPU], 1, [1], 1>;
382defm : X86WriteRes<WriteVecMoveX,        [Zn2FPU], 1, [1], 1>;
383defm : X86WriteRes<WriteVecMoveY,        [Zn2FPU], 2, [1], 2>;
384defm : X86WriteRes<WriteVecMoveToGpr,    [Zn2FPU2], 2, [1], 1>;
385defm : X86WriteRes<WriteVecMoveFromGpr,  [Zn2FPU2], 3, [1], 1>;
386defm : X86WriteRes<WriteEMMS,            [Zn2FPU], 2, [1], 1>;
387
388defm : Zn2WriteResFpuPair<WriteVecShift,   [Zn2FPU],   1>;
389defm : Zn2WriteResFpuPair<WriteVecShiftX,  [Zn2FPU2],  1>;
390defm : Zn2WriteResFpuPair<WriteVecShiftY,  [Zn2FPU2],  1>;
391defm : X86WriteResPairUnsupported<WriteVecShiftZ>;
392defm : Zn2WriteResFpuPair<WriteVecShiftImm,  [Zn2FPU], 1>;
393defm : Zn2WriteResFpuPair<WriteVecShiftImmX, [Zn2FPU], 1>;
394defm : Zn2WriteResFpuPair<WriteVecShiftImmY, [Zn2FPU], 1>;
395defm : X86WriteResPairUnsupported<WriteVecShiftImmZ>;
396defm : Zn2WriteResFpuPair<WriteVecLogic,   [Zn2FPU],   1>;
397defm : Zn2WriteResFpuPair<WriteVecLogicX,  [Zn2FPU],   1>;
398defm : Zn2WriteResFpuPair<WriteVecLogicY,  [Zn2FPU],   1>;
399defm : X86WriteResPairUnsupported<WriteVecLogicZ>;
400defm : Zn2WriteResFpuPair<WriteVecTest,    [Zn2FPU12], 1, [2], 1, 7, 1>;
401defm : Zn2WriteResFpuPair<WriteVecTestY,   [Zn2FPU12], 1, [2], 1, 7, 1>;
402defm : X86WriteResPairUnsupported<WriteVecTestZ>;
403defm : Zn2WriteResFpuPair<WriteVecALU,     [Zn2FPU],   1>;
404defm : Zn2WriteResFpuPair<WriteVecALUX,    [Zn2FPU],   1>;
405defm : Zn2WriteResFpuPair<WriteVecALUY,    [Zn2FPU],   1>;
406defm : X86WriteResPairUnsupported<WriteVecALUZ>;
407defm : Zn2WriteResFpuPair<WriteVecIMul,    [Zn2FPU0],  4>;
408defm : Zn2WriteResFpuPair<WriteVecIMulX,   [Zn2FPU0],  4>;
409defm : Zn2WriteResFpuPair<WriteVecIMulY,   [Zn2FPU0],  4>;
410defm : X86WriteResPairUnsupported<WriteVecIMulZ>;
411defm : Zn2WriteResFpuPair<WritePMULLD,     [Zn2FPU0],  4, [1], 1, 7, 1>;
412defm : Zn2WriteResFpuPair<WritePMULLDY,    [Zn2FPU0],  4, [1], 1, 7, 1>;
413defm : X86WriteResPairUnsupported<WritePMULLDZ>;
414defm : Zn2WriteResFpuPair<WriteShuffle,    [Zn2FPU],   1>;
415defm : Zn2WriteResFpuPair<WriteShuffleX,   [Zn2FPU],   1>;
416defm : Zn2WriteResFpuPair<WriteShuffleY,   [Zn2FPU],   1>;
417defm : X86WriteResPairUnsupported<WriteShuffleZ>;
418defm : Zn2WriteResFpuPair<WriteVarShuffle, [Zn2FPU],   1>;
419defm : Zn2WriteResFpuPair<WriteVarShuffleX,[Zn2FPU],   1>;
420defm : Zn2WriteResFpuPair<WriteVarShuffleY,[Zn2FPU],   1>;
421defm : X86WriteResPairUnsupported<WriteVarShuffleZ>;
422defm : Zn2WriteResFpuPair<WriteBlend,      [Zn2FPU01], 1>;
423defm : Zn2WriteResFpuPair<WriteBlendY,     [Zn2FPU01], 1>;
424defm : X86WriteResPairUnsupported<WriteBlendZ>;
425defm : Zn2WriteResFpuPair<WriteShuffle256, [Zn2FPU],   2>;
426defm : Zn2WriteResFpuPair<WriteVPMOV256,   [Zn2FPU12],  4, [1], 2, 4>;
427defm : Zn2WriteResFpuPair<WriteVarShuffle256, [Zn2FPU],   2>;
428defm : Zn2WriteResFpuPair<WritePSADBW,     [Zn2FPU0],  3>;
429defm : Zn2WriteResFpuPair<WritePSADBWX,    [Zn2FPU0],  3>;
430defm : Zn2WriteResFpuPair<WritePSADBWY,    [Zn2FPU0],  3>;
431defm : X86WriteResPairUnsupported<WritePSADBWZ>;
432defm : Zn2WriteResFpuPair<WritePHMINPOS,   [Zn2FPU0],  4>;
433
434// Vector Shift Operations
435defm : Zn2WriteResFpuPair<WriteVarVecShift,  [Zn2FPU12], 3>;
436defm : Zn2WriteResFpuPair<WriteVarVecShiftY, [Zn2FPU12], 3>;
437defm : X86WriteResPairUnsupported<WriteVarVecShiftZ>;
438
439// Vector insert/extract operations.
440defm : Zn2WriteResFpuPair<WriteVecInsert,   [Zn2FPU],   1>;
441
442def : WriteRes<WriteVecExtract, [Zn2FPU12, Zn2FPU2]> {
443  let Latency = 2;
444  let ResourceCycles = [1, 2];
445}
446def : WriteRes<WriteVecExtractSt, [Zn2AGU, Zn2FPU12, Zn2FPU2]> {
447  let Latency = 5;
448  let NumMicroOps = 2;
449  let ResourceCycles = [1, 2, 3];
450}
451
452// MOVMSK Instructions.
453def : WriteRes<WriteFMOVMSK, [Zn2FPU2]>;
454def : WriteRes<WriteMMXMOVMSK, [Zn2FPU2]>;
455def : WriteRes<WriteVecMOVMSK, [Zn2FPU2]>;
456
457def : WriteRes<WriteVecMOVMSKY, [Zn2FPU2]> {
458  let NumMicroOps = 2;
459  let Latency = 2;
460  let ResourceCycles = [2];
461}
462
463// AES Instructions.
464defm : Zn2WriteResFpuPair<WriteAESDecEnc, [Zn2FPU01], 4>;
465defm : Zn2WriteResFpuPair<WriteAESIMC,    [Zn2FPU01], 4>;
466defm : Zn2WriteResFpuPair<WriteAESKeyGen, [Zn2FPU01], 4>;
467
468def : WriteRes<WriteFence,  [Zn2AGU]>;
469def : WriteRes<WriteNop, []>;
470
471// Following instructions with latency=100 are microcoded.
472// We set long latency so as to block the entire pipeline.
473defm : Zn2WriteResFpuPair<WriteFShuffle256, [Zn2FPU], 100>;
474defm : Zn2WriteResFpuPair<WriteFVarShuffle256, [Zn2FPU], 100>;
475
476// Microcoded Instructions
477def Zn2WriteMicrocoded : SchedWriteRes<[]> {
478  let Latency = 100;
479}
480defm : Zn2WriteResPair<WriteDPPS, [], 15>;
481defm : Zn2WriteResPair<WriteFHAdd, [], 7>;
482defm : Zn2WriteResPair<WriteFHAddY, [], 7>;
483defm : Zn2WriteResPair<WritePHAdd, [], 3>;
484defm : Zn2WriteResPair<WritePHAddX, [], 3>;
485defm : Zn2WriteResPair<WritePHAddY, [], 3>;
486
487def : SchedAlias<WriteMicrocoded, Zn2WriteMicrocoded>;
488def : SchedAlias<WriteFCMOV, Zn2WriteMicrocoded>;
489def : SchedAlias<WriteSystem, Zn2WriteMicrocoded>;
490def : SchedAlias<WriteMPSAD, Zn2WriteMicrocoded>;
491def : SchedAlias<WriteMPSADY, Zn2WriteMicrocoded>;
492def : SchedAlias<WriteMPSADLd, Zn2WriteMicrocoded>;
493def : SchedAlias<WriteMPSADYLd, Zn2WriteMicrocoded>;
494def : SchedAlias<WriteCLMul, Zn2WriteMicrocoded>;
495def : SchedAlias<WriteCLMulLd, Zn2WriteMicrocoded>;
496def : SchedAlias<WritePCmpIStrM, Zn2WriteMicrocoded>;
497def : SchedAlias<WritePCmpIStrMLd, Zn2WriteMicrocoded>;
498def : SchedAlias<WritePCmpEStrI, Zn2WriteMicrocoded>;
499def : SchedAlias<WritePCmpEStrILd, Zn2WriteMicrocoded>;
500def : SchedAlias<WritePCmpEStrM, Zn2WriteMicrocoded>;
501def : SchedAlias<WritePCmpEStrMLd, Zn2WriteMicrocoded>;
502def : SchedAlias<WritePCmpIStrI, Zn2WriteMicrocoded>;
503def : SchedAlias<WritePCmpIStrILd, Zn2WriteMicrocoded>;
504def : SchedAlias<WriteLDMXCSR, Zn2WriteMicrocoded>;
505def : SchedAlias<WriteSTMXCSR, Zn2WriteMicrocoded>;
506
507//=== Regex based InstRW ===//
508// Notation:
509// - r: register.
510// - m = memory.
511// - i = immediate
512// - mm: 64 bit mmx register.
513// - x = 128 bit xmm register.
514// - (x)mm = mmx or xmm register.
515// - y = 256 bit ymm register.
516// - v = any vector register.
517
518//=== Integer Instructions ===//
519//-- Move instructions --//
520// MOV.
521// r16,m.
522def : InstRW<[WriteALULd, ReadAfterLd], (instregex "MOV16rm")>;
523
524// MOVSX, MOVZX.
525// r,m.
526def : InstRW<[WriteLoad], (instregex "MOV(S|Z)X32rm(8|16)")>;
527
528// XCHG.
529// r,r.
530def Zn2WriteXCHG : SchedWriteRes<[Zn2ALU]> {
531  let NumMicroOps = 2;
532}
533
534def : InstRW<[Zn2WriteXCHG], (instregex "^XCHG(8|16|32|64)rr", "^XCHG(16|32|64)ar")>;
535
536// r,m.
537def Zn2WriteXCHGrm : SchedWriteRes<[Zn2AGU, Zn2ALU]> {
538  let Latency = 5;
539  let NumMicroOps = 2;
540}
541def : InstRW<[Zn2WriteXCHGrm, ReadAfterLd], (instregex "^XCHG(8|16|32|64)rm")>;
542
543def : InstRW<[WriteMicrocoded], (instrs XLAT)>;
544
545// POP16.
546// r.
547def Zn2WritePop16r : SchedWriteRes<[Zn2AGU]>{
548  let Latency = 5;
549  let NumMicroOps = 2;
550}
551def : InstRW<[Zn2WritePop16r], (instregex "POP16rmm")>;
552def : InstRW<[WriteMicrocoded], (instregex "POPF(16|32)")>;
553def : InstRW<[WriteMicrocoded], (instregex "POPA(16|32)")>;
554
555
556// PUSH.
557// r. Has default values.
558// m.
559def Zn2WritePUSH : SchedWriteRes<[Zn2AGU]>{
560  let Latency = 4;
561}
562def : InstRW<[Zn2WritePUSH], (instregex "PUSH(16|32)rmm")>;
563
564//PUSHF
565def : InstRW<[WriteMicrocoded], (instregex "PUSHF(16|32)")>;
566
567// PUSHA.
568def Zn2WritePushA : SchedWriteRes<[Zn2AGU]> {
569  let Latency = 8;
570}
571def : InstRW<[Zn2WritePushA], (instregex "PUSHA(16|32)")>;
572
573//LAHF
574def : InstRW<[WriteMicrocoded], (instrs LAHF)>;
575
576// MOVBE.
577// r,m.
578def Zn2WriteMOVBE : SchedWriteRes<[Zn2AGU, Zn2ALU]> {
579  let Latency = 5;
580}
581def : InstRW<[Zn2WriteMOVBE, ReadAfterLd], (instregex "MOVBE(16|32|64)rm")>;
582
583// m16,r16.
584def : InstRW<[Zn2WriteMOVBE], (instregex "MOVBE(16|32|64)mr")>;
585
586//-- Arithmetic instructions --//
587
588// ADD SUB.
589// m,r/i.
590def : InstRW<[WriteALULd], (instregex "(ADD|SUB)(8|16|32|64)m(r|i)",
591                          "(ADD|SUB)(8|16|32|64)mi8",
592                          "(ADD|SUB)64mi32")>;
593
594// ADC SBB.
595// m,r/i.
596def : InstRW<[WriteALULd],
597             (instregex "(ADC|SBB)(8|16|32|64)m(r|i)",
598              "(ADC|SBB)(16|32|64)mi8",
599              "(ADC|SBB)64mi32")>;
600
601// INC DEC NOT NEG.
602// m.
603def : InstRW<[WriteALULd],
604             (instregex "(INC|DEC|NOT|NEG)(8|16|32|64)m")>;
605
606// MUL IMUL.
607// r16.
608def Zn2WriteMul16 : SchedWriteRes<[Zn2ALU1, Zn2Multiplier]> {
609  let Latency = 3;
610}
611def Zn2WriteMul16Imm : SchedWriteRes<[Zn2ALU1, Zn2Multiplier]> {
612  let Latency = 4;
613}
614def : SchedAlias<WriteIMul16, Zn2WriteMul16>;
615def : SchedAlias<WriteIMul16Imm, Zn2WriteMul16Imm>;
616def : SchedAlias<WriteIMul16Reg, Zn2WriteMul16>;
617
618// m16.
619def Zn2WriteMul16Ld : SchedWriteRes<[Zn2AGU, Zn2ALU1, Zn2Multiplier]> {
620  let Latency = 7;
621}
622def : SchedAlias<WriteIMul16Ld, Zn2WriteMul16Ld>;
623def : SchedAlias<WriteIMul16ImmLd, Zn2WriteMul16Ld>;
624def : SchedAlias<WriteIMul16RegLd, Zn2WriteMul16Ld>;
625
626// r32.
627def Zn2WriteMul32 : SchedWriteRes<[Zn2ALU1, Zn2Multiplier]> {
628  let Latency = 3;
629}
630def : SchedAlias<WriteIMul32, Zn2WriteMul32>;
631def : SchedAlias<WriteIMul32Imm, Zn2WriteMul32>;
632def : SchedAlias<WriteIMul32Reg, Zn2WriteMul32>;
633
634// m32.
635def Zn2WriteMul32Ld : SchedWriteRes<[Zn2AGU, Zn2ALU1, Zn2Multiplier]> {
636  let Latency = 7;
637}
638def : SchedAlias<WriteIMul32Ld, Zn2WriteMul32Ld>;
639def : SchedAlias<WriteIMul32ImmLd, Zn2WriteMul32Ld>;
640def : SchedAlias<WriteIMul32RegLd, Zn2WriteMul32Ld>;
641
642// r64.
643def Zn2WriteMul64 : SchedWriteRes<[Zn2ALU1, Zn2Multiplier]> {
644  let Latency = 4;
645  let NumMicroOps = 2;
646}
647def : SchedAlias<WriteIMul64, Zn2WriteMul64>;
648def : SchedAlias<WriteIMul64Imm, Zn2WriteMul64>;
649def : SchedAlias<WriteIMul64Reg, Zn2WriteMul64>;
650
651// m64.
652def Zn2WriteMul64Ld : SchedWriteRes<[Zn2AGU, Zn2ALU1, Zn2Multiplier]> {
653  let Latency = 8;
654  let NumMicroOps = 2;
655}
656def : SchedAlias<WriteIMul64Ld, Zn2WriteMul64Ld>;
657def : SchedAlias<WriteIMul64ImmLd, Zn2WriteMul64Ld>;
658def : SchedAlias<WriteIMul64RegLd, Zn2WriteMul64Ld>;
659
660// MULX.
661// r32,r32,r32.
662def Zn2WriteMulX32 : SchedWriteRes<[Zn2ALU1, Zn2Multiplier]> {
663  let Latency = 3;
664  let ResourceCycles = [1, 2];
665}
666def : InstRW<[Zn2WriteMulX32], (instrs MULX32rr)>;
667
668// r32,r32,m32.
669def Zn2WriteMulX32Ld : SchedWriteRes<[Zn2AGU, Zn2ALU1, Zn2Multiplier]> {
670  let Latency = 7;
671  let ResourceCycles = [1, 2, 2];
672}
673def : InstRW<[Zn2WriteMulX32Ld, ReadAfterLd], (instrs MULX32rm)>;
674
675// r64,r64,r64.
676def Zn2WriteMulX64 : SchedWriteRes<[Zn2ALU1]> {
677  let Latency = 3;
678}
679def : InstRW<[Zn2WriteMulX64], (instrs MULX64rr)>;
680
681// r64,r64,m64.
682def Zn2WriteMulX64Ld : SchedWriteRes<[Zn2AGU, Zn2ALU1, Zn2Multiplier]> {
683  let Latency = 7;
684}
685def : InstRW<[Zn2WriteMulX64Ld, ReadAfterLd], (instrs MULX64rm)>;
686
687//-- Control transfer instructions --//
688
689// J(E|R)CXZ.
690def Zn2WriteJCXZ : SchedWriteRes<[Zn2ALU03]>;
691def : InstRW<[Zn2WriteJCXZ], (instrs JCXZ, JECXZ, JRCXZ)>;
692
693// INTO
694def : InstRW<[WriteMicrocoded], (instrs INTO)>;
695
696// LOOP.
697def Zn2WriteLOOP : SchedWriteRes<[Zn2ALU03]>;
698def : InstRW<[Zn2WriteLOOP], (instrs LOOP)>;
699
700// LOOP(N)E, LOOP(N)Z
701def Zn2WriteLOOPE : SchedWriteRes<[Zn2ALU03]>;
702def : InstRW<[Zn2WriteLOOPE], (instrs LOOPE, LOOPNE)>;
703
704// CALL.
705// r.
706def Zn2WriteCALLr : SchedWriteRes<[Zn2AGU, Zn2ALU03]>;
707def : InstRW<[Zn2WriteCALLr], (instregex "CALL(16|32)r")>;
708
709def : InstRW<[WriteMicrocoded], (instregex "CALL(16|32)m")>;
710
711// RET.
712def Zn2WriteRET : SchedWriteRes<[Zn2ALU03]> {
713  let NumMicroOps = 2;
714}
715def : InstRW<[Zn2WriteRET], (instregex "RET(L|Q|W)", "LRET(L|Q|W)",
716                            "IRET(16|32|64)")>;
717
718//-- Logic instructions --//
719
720// AND OR XOR.
721// m,r/i.
722def : InstRW<[WriteALULd],
723             (instregex "(AND|OR|XOR)(8|16|32|64)m(r|i)",
724              "(AND|OR|XOR)(8|16|32|64)mi8", "(AND|OR|XOR)64mi32")>;
725
726// Define ALU latency variants
727def Zn2WriteALULat2 : SchedWriteRes<[Zn2ALU]> {
728  let Latency = 2;
729}
730def Zn2WriteALULat2Ld : SchedWriteRes<[Zn2AGU, Zn2ALU]> {
731  let Latency = 6;
732}
733
734// BT.
735// m,i.
736def : InstRW<[WriteShiftLd], (instregex "BT(16|32|64)mi8")>;
737
738// BTR BTS BTC.
739// r,r,i.
740def Zn2WriteBTRSC : SchedWriteRes<[Zn2ALU]> {
741  let Latency = 2;
742  let NumMicroOps = 2;
743}
744def : InstRW<[Zn2WriteBTRSC], (instregex "BT(R|S|C)(16|32|64)r(r|i8)")>;
745
746// m,r,i.
747def Zn2WriteBTRSCm : SchedWriteRes<[Zn2AGU, Zn2ALU]> {
748  let Latency = 6;
749  let NumMicroOps = 2;
750}
751// m,r,i.
752def : SchedAlias<WriteBitTestSetImmRMW, Zn2WriteBTRSCm>;
753def : SchedAlias<WriteBitTestSetRegRMW, Zn2WriteBTRSCm>;
754
755// BLSI BLSMSK BLSR.
756// r,r.
757def : SchedAlias<WriteBLS, Zn2WriteALULat2>;
758// r,m.
759def : SchedAlias<WriteBLSLd, Zn2WriteALULat2Ld>;
760
761// CLD STD.
762def : InstRW<[WriteALU], (instrs STD, CLD)>;
763
764// PDEP PEXT.
765// r,r,r.
766def : InstRW<[WriteMicrocoded], (instregex "PDEP(32|64)rr", "PEXT(32|64)rr")>;
767// r,r,m.
768def : InstRW<[WriteMicrocoded], (instregex "PDEP(32|64)rm", "PEXT(32|64)rm")>;
769
770// RCR RCL.
771// m,i.
772def : InstRW<[WriteMicrocoded], (instregex "RC(R|L)(8|16|32|64)m(1|i|CL)")>;
773
774// SHR SHL SAR.
775// m,i.
776def : InstRW<[WriteShiftLd], (instregex "S(A|H)(R|L)(8|16|32|64)m(i|1)")>;
777
778// SHRD SHLD.
779// m,r
780def : InstRW<[WriteShiftLd], (instregex "SH(R|L)D(16|32|64)mri8")>;
781
782// r,r,cl.
783def : InstRW<[WriteMicrocoded], (instregex "SH(R|L)D(16|32|64)rrCL")>;
784
785// m,r,cl.
786def : InstRW<[WriteMicrocoded], (instregex "SH(R|L)D(16|32|64)mrCL")>;
787
788//-- Misc instructions --//
789// CMPXCHG8B.
790def Zn2WriteCMPXCHG8B : SchedWriteRes<[Zn2AGU, Zn2ALU]> {
791  let NumMicroOps = 18;
792}
793def : InstRW<[Zn2WriteCMPXCHG8B], (instrs CMPXCHG8B)>;
794
795def : InstRW<[WriteMicrocoded], (instrs CMPXCHG16B)>;
796
797// LEAVE
798def Zn2WriteLEAVE : SchedWriteRes<[Zn2ALU, Zn2AGU]> {
799  let Latency = 8;
800  let NumMicroOps = 2;
801}
802def : InstRW<[Zn2WriteLEAVE], (instregex "LEAVE")>;
803
804// PAUSE.
805def : InstRW<[WriteMicrocoded], (instrs PAUSE)>;
806
807// RDTSC.
808def : InstRW<[WriteMicrocoded], (instregex "RDTSC")>;
809
810// RDPMC.
811def : InstRW<[WriteMicrocoded], (instrs RDPMC)>;
812
813// RDRAND.
814def : InstRW<[WriteMicrocoded], (instregex "RDRAND(16|32|64)r")>;
815
816// XGETBV.
817def : InstRW<[WriteMicrocoded], (instregex "XGETBV")>;
818
819//-- String instructions --//
820// CMPS.
821def : InstRW<[WriteMicrocoded], (instregex "CMPS(B|L|Q|W)")>;
822
823// LODSB/W.
824def : InstRW<[WriteMicrocoded], (instregex "LODS(B|W)")>;
825
826// LODSD/Q.
827def : InstRW<[WriteMicrocoded], (instregex "LODS(L|Q)")>;
828
829// MOVS.
830def : InstRW<[WriteMicrocoded], (instregex "MOVS(B|L|Q|W)")>;
831
832// SCAS.
833def : InstRW<[WriteMicrocoded], (instregex "SCAS(B|W|L|Q)")>;
834
835// STOS
836def : InstRW<[WriteMicrocoded], (instregex "STOS(B|L|Q|W)")>;
837
838// XADD.
839def Zn2XADD : SchedWriteRes<[Zn2ALU]>;
840def : InstRW<[Zn2XADD], (instregex "XADD(8|16|32|64)rr")>;
841def : InstRW<[WriteMicrocoded], (instregex "XADD(8|16|32|64)rm")>;
842
843//=== Floating Point x87 Instructions ===//
844//-- Move instructions --//
845
846def Zn2WriteFLDr : SchedWriteRes<[Zn2FPU13]> ;
847
848def Zn2WriteSTr: SchedWriteRes<[Zn2FPU23]> {
849  let Latency = 5;
850  let NumMicroOps = 2;
851}
852
853// LD_F.
854// r.
855def : InstRW<[Zn2WriteFLDr], (instregex "LD_Frr")>;
856
857// m.
858def Zn2WriteLD_F80m : SchedWriteRes<[Zn2AGU, Zn2FPU13]> {
859  let NumMicroOps = 2;
860}
861def : InstRW<[Zn2WriteLD_F80m], (instregex "LD_F80m")>;
862
863// FBLD.
864def : InstRW<[WriteMicrocoded], (instregex "FBLDm")>;
865
866// FST(P).
867// r.
868def : InstRW<[Zn2WriteSTr], (instregex "ST_(F|FP)rr")>;
869
870// m80.
871def Zn2WriteST_FP80m : SchedWriteRes<[Zn2AGU, Zn2FPU23]> {
872  let Latency = 5;
873}
874def : InstRW<[Zn2WriteST_FP80m], (instregex "ST_FP80m")>;
875
876// FBSTP.
877// m80.
878def : InstRW<[WriteMicrocoded], (instregex "FBSTPm")>;
879
880def Zn2WriteFXCH : SchedWriteRes<[Zn2FPU]>;
881
882// FXCHG.
883def : InstRW<[Zn2WriteFXCH], (instrs XCH_F)>;
884
885// FILD.
886def Zn2WriteFILD : SchedWriteRes<[Zn2AGU, Zn2FPU3]> {
887  let Latency = 11;
888  let NumMicroOps = 2;
889}
890def : InstRW<[Zn2WriteFILD], (instregex "ILD_F(16|32|64)m")>;
891
892// FIST(P) FISTTP.
893def Zn2WriteFIST : SchedWriteRes<[Zn2AGU, Zn2FPU23]> {
894  let Latency = 12;
895}
896def : InstRW<[Zn2WriteFIST], (instregex "IS(T|TT)_(F|FP)(16|32|64)m")>;
897
898def Zn2WriteFPU13 : SchedWriteRes<[Zn2AGU, Zn2FPU13]> {
899  let Latency = 8;
900}
901
902def Zn2WriteFPU3 : SchedWriteRes<[Zn2AGU, Zn2FPU3]> {
903  let Latency = 11;
904}
905
906// FLDZ.
907def : SchedAlias<WriteFLD0, Zn2WriteFPU13>;
908
909// FLD1.
910def : SchedAlias<WriteFLD1, Zn2WriteFPU3>;
911
912// FLDPI FLDL2E etc.
913def : SchedAlias<WriteFLDC, Zn2WriteFPU3>;
914
915// FNSTSW.
916// AX.
917def : InstRW<[WriteMicrocoded], (instrs FNSTSW16r)>;
918
919// m16.
920def : InstRW<[WriteMicrocoded], (instrs FNSTSWm)>;
921
922// FLDCW.
923def : InstRW<[WriteMicrocoded], (instrs FLDCW16m)>;
924
925// FNSTCW.
926def : InstRW<[WriteMicrocoded], (instrs FNSTCW16m)>;
927
928// FINCSTP FDECSTP.
929def : InstRW<[Zn2WriteFPU3], (instrs FINCSTP, FDECSTP)>;
930
931// FFREE.
932def : InstRW<[Zn2WriteFPU3], (instregex "FFREE")>;
933
934// FNSAVE.
935def : InstRW<[WriteMicrocoded], (instregex "FSAVEm")>;
936
937// FRSTOR.
938def : InstRW<[WriteMicrocoded], (instregex "FRSTORm")>;
939
940//-- Arithmetic instructions --//
941
942def Zn2WriteFPU3Lat1 : SchedWriteRes<[Zn2FPU3]> ;
943
944def Zn2WriteFPU0Lat1 : SchedWriteRes<[Zn2FPU0]> ;
945
946def Zn2WriteFPU0Lat1Ld : SchedWriteRes<[Zn2AGU, Zn2FPU0]> {
947  let Latency = 8;
948}
949
950// FCHS.
951def : InstRW<[Zn2WriteFPU3Lat1], (instregex "CHS_F")>;
952
953// FCOM(P) FUCOM(P).
954// r.
955def : InstRW<[Zn2WriteFPU0Lat1], (instregex "COM(P?)_FST0r", "UCOM_F(P?)r")>;
956// m.
957def : InstRW<[Zn2WriteFPU0Lat1Ld], (instregex "FCOM(P?)(32|64)m")>;
958
959// FCOMPP FUCOMPP.
960// r.
961def : InstRW<[Zn2WriteFPU0Lat1], (instrs FCOMPP, UCOM_FPPr)>;
962
963def Zn2WriteFPU02 : SchedWriteRes<[Zn2AGU, Zn2FPU02]>
964{
965  let Latency = 9;
966}
967
968// FCOMI(P) FUCOMI(P).
969// m.
970def : InstRW<[Zn2WriteFPU02], (instrs COM_FIPr, COM_FIr, UCOM_FIPr, UCOM_FIr)>;
971
972def Zn2WriteFPU03 : SchedWriteRes<[Zn2AGU, Zn2FPU03]>
973{
974  let Latency = 12;
975  let NumMicroOps = 2;
976  let ResourceCycles = [1,3];
977}
978
979// FICOM(P).
980def : InstRW<[Zn2WriteFPU03], (instregex "FICOM(P?)(16|32)m")>;
981
982// FTST.
983def : InstRW<[Zn2WriteFPU0Lat1], (instregex "TST_F")>;
984
985// FXAM.
986def : InstRW<[Zn2WriteFPU3Lat1], (instrs XAM_F)>;
987
988// FPREM.
989def : InstRW<[WriteMicrocoded], (instrs FPREM)>;
990
991// FPREM1.
992def : InstRW<[WriteMicrocoded], (instrs FPREM1)>;
993
994// FRNDINT.
995def : InstRW<[WriteMicrocoded], (instrs FRNDINT)>;
996
997// FSCALE.
998def : InstRW<[WriteMicrocoded], (instrs FSCALE)>;
999
1000// FXTRACT.
1001def : InstRW<[WriteMicrocoded], (instrs FXTRACT)>;
1002
1003// FNOP.
1004def : InstRW<[Zn2WriteFPU0Lat1], (instrs FNOP)>;
1005
1006// WAIT.
1007def : InstRW<[Zn2WriteFPU0Lat1], (instrs WAIT)>;
1008
1009// FNCLEX.
1010def : InstRW<[WriteMicrocoded], (instrs FNCLEX)>;
1011
1012// FNINIT.
1013def : InstRW<[WriteMicrocoded], (instrs FNINIT)>;
1014
1015//=== Integer MMX and XMM Instructions ===//
1016
1017// PACKSSWB/DW.
1018// mm <- mm.
1019def Zn2WriteFPU12 : SchedWriteRes<[Zn2FPU12]> ;
1020def Zn2WriteFPU12Y : SchedWriteRes<[Zn2FPU12]> {
1021  let Latency = 4;
1022  let NumMicroOps = 2;
1023}
1024def Zn2WriteFPU12m : SchedWriteRes<[Zn2AGU, Zn2FPU12]> ;
1025def Zn2WriteFPU12Ym : SchedWriteRes<[Zn2AGU, Zn2FPU12]> {
1026  let Latency = 8;
1027  let NumMicroOps = 2;
1028}
1029
1030def : InstRW<[Zn2WriteFPU12], (instrs MMX_PACKSSDWirr,
1031                                     MMX_PACKSSWBirr,
1032                                     MMX_PACKUSWBirr)>;
1033def : InstRW<[Zn2WriteFPU12m], (instrs MMX_PACKSSDWirm,
1034                                      MMX_PACKSSWBirm,
1035                                      MMX_PACKUSWBirm)>;
1036
1037def Zn2WriteFPU013 : SchedWriteRes<[Zn2FPU013]> ;
1038def Zn2WriteFPU013Y : SchedWriteRes<[Zn2FPU013]> ;
1039def Zn2WriteFPU013m : SchedWriteRes<[Zn2AGU, Zn2FPU013]> {
1040  let Latency = 8;
1041  let NumMicroOps = 2;
1042}
1043def Zn2WriteFPU013Ld : SchedWriteRes<[Zn2AGU, Zn2FPU013]> {
1044  let Latency = 8;
1045  let NumMicroOps = 2;
1046}
1047def Zn2WriteFPU013LdY : SchedWriteRes<[Zn2AGU, Zn2FPU013]> {
1048  let Latency = 8;
1049  let NumMicroOps = 2;
1050}
1051
1052// PBLENDW.
1053// x,x,i / v,v,v,i
1054def : InstRW<[Zn2WriteFPU013], (instregex "(V?)PBLENDWrri")>;
1055// ymm
1056def : InstRW<[Zn2WriteFPU013Y], (instrs VPBLENDWYrri)>;
1057
1058// x,m,i / v,v,m,i
1059def : InstRW<[Zn2WriteFPU013Ld], (instregex "(V?)PBLENDWrmi")>;
1060// y,m,i
1061def : InstRW<[Zn2WriteFPU013LdY], (instrs VPBLENDWYrmi)>;
1062
1063def Zn2WriteFPU01 : SchedWriteRes<[Zn2FPU01]> ;
1064def Zn2WriteFPU01Y : SchedWriteRes<[Zn2FPU01]> {
1065  let NumMicroOps = 2;
1066}
1067
1068// VPBLENDD.
1069// v,v,v,i.
1070def : InstRW<[Zn2WriteFPU01], (instrs VPBLENDDrri)>;
1071// ymm
1072def : InstRW<[Zn2WriteFPU01Y], (instrs VPBLENDDYrri)>;
1073
1074// v,v,m,i
1075def Zn2WriteFPU01Op2 : SchedWriteRes<[Zn2AGU, Zn2FPU01]> {
1076  let NumMicroOps = 2;
1077  let Latency = 8;
1078  let ResourceCycles = [1, 2];
1079}
1080def Zn2WriteFPU01Op2Y : SchedWriteRes<[Zn2AGU, Zn2FPU01]> {
1081  let NumMicroOps = 2;
1082  let Latency = 9;
1083  let ResourceCycles = [1, 3];
1084}
1085def : InstRW<[Zn2WriteFPU01Op2], (instrs VPBLENDDrmi)>;
1086def : InstRW<[Zn2WriteFPU01Op2Y], (instrs VPBLENDDYrmi)>;
1087
1088// MASKMOVQ.
1089def : InstRW<[WriteMicrocoded], (instregex "MMX_MASKMOVQ(64)?")>;
1090
1091// MASKMOVDQU.
1092def : InstRW<[WriteMicrocoded], (instregex "(V?)MASKMOVDQU(64)?")>;
1093
1094// VPMASKMOVD.
1095// ymm
1096def : InstRW<[WriteMicrocoded],
1097                               (instregex "VPMASKMOVD(Y?)rm")>;
1098// m, v,v.
1099def : InstRW<[WriteMicrocoded], (instregex "VPMASKMOV(D|Q)(Y?)mr")>;
1100
1101// VPBROADCAST B/W.
1102// x, m8/16.
1103def Zn2WriteVPBROADCAST128Ld : SchedWriteRes<[Zn2AGU, Zn2FPU12]> {
1104  let Latency = 8;
1105  let NumMicroOps = 2;
1106  let ResourceCycles = [1, 2];
1107}
1108def : InstRW<[Zn2WriteVPBROADCAST128Ld],
1109                                     (instregex "VPBROADCAST(B|W)rm")>;
1110
1111// y, m8/16
1112def Zn2WriteVPBROADCAST256Ld : SchedWriteRes<[Zn2AGU, Zn2FPU1]> {
1113  let Latency = 8;
1114  let NumMicroOps = 2;
1115  let ResourceCycles = [1, 2];
1116}
1117def : InstRW<[Zn2WriteVPBROADCAST256Ld],
1118                                     (instregex "VPBROADCAST(B|W)Yrm")>;
1119
1120// VPGATHER.
1121def : InstRW<[WriteMicrocoded], (instregex "VPGATHER(Q|D)(Q|D)(Y?)rm")>;
1122
1123//-- Arithmetic instructions --//
1124
1125// PCMPGTQ.
1126def Zn2WritePCMPGTQr : SchedWriteRes<[Zn2FPU03]>;
1127def : InstRW<[Zn2WritePCMPGTQr], (instregex "(V?)PCMPGTQ(Y?)rr")>;
1128
1129// x <- x,m.
1130def Zn2WritePCMPGTQm : SchedWriteRes<[Zn2AGU, Zn2FPU03]> {
1131  let Latency = 8;
1132}
1133// ymm.
1134def Zn2WritePCMPGTQYm : SchedWriteRes<[Zn2AGU, Zn2FPU03]> {
1135  let Latency = 8;
1136}
1137def : InstRW<[Zn2WritePCMPGTQm], (instregex "(V?)PCMPGTQrm")>;
1138def : InstRW<[Zn2WritePCMPGTQYm], (instrs VPCMPGTQYrm)>;
1139
1140//-- Logic instructions --//
1141
1142// PSLL,PSRL,PSRA W/D/Q.
1143// x,x / v,v,x.
1144def Zn2WritePShift  : SchedWriteRes<[Zn2FPU2]> {
1145  let Latency = 3;
1146}
1147def Zn2WritePShiftY : SchedWriteRes<[Zn2FPU2]> {
1148  let Latency = 3;
1149}
1150
1151// PSLL,PSRL DQ.
1152def : InstRW<[Zn2WritePShift], (instregex "(V?)PS(R|L)LDQri")>;
1153def : InstRW<[Zn2WritePShiftY], (instregex "(V?)PS(R|L)LDQYri")>;
1154
1155//=== Floating Point XMM and YMM Instructions ===//
1156//-- Move instructions --//
1157
1158// VPERM2F128.
1159def : InstRW<[WriteMicrocoded], (instrs VPERM2F128rr)>;
1160def : InstRW<[WriteMicrocoded], (instrs VPERM2F128rm)>;
1161
1162def Zn2WriteBROADCAST : SchedWriteRes<[Zn2AGU, Zn2FPU13]> {
1163  let NumMicroOps = 2;
1164  let Latency = 8;
1165}
1166// VBROADCASTF128.
1167def : InstRW<[Zn2WriteBROADCAST], (instrs VBROADCASTF128)>;
1168
1169// EXTRACTPS.
1170// r32,x,i.
1171def Zn2WriteEXTRACTPSr : SchedWriteRes<[Zn2FPU12, Zn2FPU2]> {
1172  let Latency = 2;
1173  let ResourceCycles = [1, 2];
1174}
1175def : InstRW<[Zn2WriteEXTRACTPSr], (instregex "(V?)EXTRACTPSrr")>;
1176
1177def Zn2WriteEXTRACTPSm : SchedWriteRes<[Zn2AGU,Zn2FPU12, Zn2FPU2]> {
1178  let Latency = 5;
1179  let NumMicroOps = 2;
1180  let ResourceCycles = [5, 1, 2];
1181}
1182// m32,x,i.
1183def : InstRW<[Zn2WriteEXTRACTPSm], (instregex "(V?)EXTRACTPSmr")>;
1184
1185// VEXTRACTF128.
1186// x,y,i.
1187def : InstRW<[Zn2WriteFPU013], (instrs VEXTRACTF128rr)>;
1188
1189// m128,y,i.
1190def : InstRW<[Zn2WriteFPU013m], (instrs VEXTRACTF128mr)>;
1191
1192def Zn2WriteVINSERT128r: SchedWriteRes<[Zn2FPU013]> {
1193  let Latency = 2;
1194//  let ResourceCycles = [2];
1195}
1196def Zn2WriteVINSERT128Ld: SchedWriteRes<[Zn2AGU,Zn2FPU013]> {
1197  let Latency = 9;
1198  let NumMicroOps = 2;
1199}
1200// VINSERTF128.
1201// y,y,x,i.
1202def : InstRW<[Zn2WriteVINSERT128r], (instrs VINSERTF128rr)>;
1203def : InstRW<[Zn2WriteVINSERT128Ld], (instrs VINSERTF128rm)>;
1204
1205// VGATHER.
1206def : InstRW<[WriteMicrocoded], (instregex "VGATHER(Q|D)(PD|PS)(Y?)rm")>;
1207
1208//-- Conversion instructions --//
1209def Zn2WriteCVTPD2PSr: SchedWriteRes<[Zn2FPU3]> {
1210  let Latency = 3;
1211}
1212def Zn2WriteCVTPD2PSYr: SchedWriteRes<[Zn2FPU3]> {
1213  let Latency = 3;
1214}
1215
1216// CVTPD2PS.
1217// x,x.
1218def : SchedAlias<WriteCvtPD2PS,  Zn2WriteCVTPD2PSr>;
1219// y,y.
1220def : SchedAlias<WriteCvtPD2PSY, Zn2WriteCVTPD2PSYr>;
1221// z,z.
1222defm : X86WriteResUnsupported<WriteCvtPD2PSZ>;
1223
1224def Zn2WriteCVTPD2PSLd: SchedWriteRes<[Zn2AGU,Zn2FPU03]> {
1225  let Latency = 10;
1226  let NumMicroOps = 2;
1227}
1228// x,m128.
1229def : SchedAlias<WriteCvtPD2PSLd, Zn2WriteCVTPD2PSLd>;
1230
1231// x,m256.
1232def Zn2WriteCVTPD2PSYLd : SchedWriteRes<[Zn2AGU, Zn2FPU3]> {
1233  let Latency = 10;
1234}
1235def : SchedAlias<WriteCvtPD2PSYLd, Zn2WriteCVTPD2PSYLd>;
1236// z,m512
1237defm : X86WriteResUnsupported<WriteCvtPD2PSZLd>;
1238
1239// CVTSD2SS.
1240// x,x.
1241// Same as WriteCVTPD2PSr
1242def : SchedAlias<WriteCvtSD2SS, Zn2WriteCVTPD2PSr>;
1243
1244// x,m64.
1245def : SchedAlias<WriteCvtSD2SSLd, Zn2WriteCVTPD2PSLd>;
1246
1247// CVTPS2PD.
1248// x,x.
1249def Zn2WriteCVTPS2PDr : SchedWriteRes<[Zn2FPU3]> {
1250  let Latency = 3;
1251}
1252def : SchedAlias<WriteCvtPS2PD, Zn2WriteCVTPS2PDr>;
1253
1254// x,m64.
1255// y,m128.
1256def Zn2WriteCVTPS2PDLd : SchedWriteRes<[Zn2AGU, Zn2FPU3]> {
1257  let Latency = 10;
1258  let NumMicroOps = 2;
1259}
1260def : SchedAlias<WriteCvtPS2PDLd, Zn2WriteCVTPS2PDLd>;
1261def : SchedAlias<WriteCvtPS2PDYLd, Zn2WriteCVTPS2PDLd>;
1262defm : X86WriteResUnsupported<WriteCvtPS2PDZLd>;
1263
1264// y,x.
1265def Zn2WriteVCVTPS2PDY : SchedWriteRes<[Zn2FPU3]> {
1266  let Latency = 3;
1267}
1268def : SchedAlias<WriteCvtPS2PDY, Zn2WriteVCVTPS2PDY>;
1269defm : X86WriteResUnsupported<WriteCvtPS2PDZ>;
1270
1271// CVTSS2SD.
1272// x,x.
1273def Zn2WriteCVTSS2SDr : SchedWriteRes<[Zn2FPU3]> {
1274  let Latency = 3;
1275}
1276def : SchedAlias<WriteCvtSS2SD, Zn2WriteCVTSS2SDr>;
1277
1278// x,m32.
1279def Zn2WriteCVTSS2SDLd : SchedWriteRes<[Zn2AGU, Zn2FPU3]> {
1280  let Latency = 10;
1281  let NumMicroOps = 2;
1282  let ResourceCycles = [1, 2];
1283}
1284def : SchedAlias<WriteCvtSS2SDLd, Zn2WriteCVTSS2SDLd>;
1285
1286def Zn2WriteCVTDQ2PDr: SchedWriteRes<[Zn2FPU12,Zn2FPU3]> {
1287  let Latency = 3;
1288}
1289// CVTDQ2PD.
1290// x,x.
1291def : InstRW<[Zn2WriteCVTDQ2PDr], (instregex "(V)?CVTDQ2P(D|S)rr")>;
1292
1293// Same as xmm
1294// y,x.
1295def : InstRW<[Zn2WriteCVTDQ2PDr], (instrs VCVTDQ2PDYrr)>;
1296def : InstRW<[Zn2WriteCVTDQ2PDr], (instrs VCVTDQ2PSYrr)>;
1297
1298def Zn2WriteCVTPD2DQr: SchedWriteRes<[Zn2FPU12, Zn2FPU3]> {
1299  let Latency = 3;
1300}
1301// CVT(T)P(D|S)2DQ.
1302// x,x.
1303def : InstRW<[Zn2WriteCVTPD2DQr], (instregex "(V?)CVT(T?)P(D|S)2DQrr")>;
1304
1305def Zn2WriteCVTPD2DQLd: SchedWriteRes<[Zn2AGU,Zn2FPU12,Zn2FPU3]> {
1306  let Latency = 10;
1307  let NumMicroOps = 2;
1308}
1309// x,m128.
1310def : InstRW<[Zn2WriteCVTPD2DQLd], (instregex "(V?)CVT(T?)PD2DQrm")>;
1311// same as xmm handling
1312// x,y.
1313def : InstRW<[Zn2WriteCVTPD2DQr], (instregex "VCVT(T?)PD2DQYrr")>;
1314// x,m256.
1315def : InstRW<[Zn2WriteCVTPD2DQLd], (instregex "VCVT(T?)PD2DQYrm")>;
1316
1317def Zn2WriteCVTPS2PIr: SchedWriteRes<[Zn2FPU3]> {
1318  let Latency = 4;
1319}
1320// CVT(T)PS2PI.
1321// mm,x.
1322def : InstRW<[Zn2WriteCVTPS2PIr], (instregex "MMX_CVT(T?)PS2PIirr")>;
1323
1324// CVTPI2PD.
1325// x,mm.
1326def : InstRW<[Zn2WriteCVTPS2PDr], (instrs MMX_CVTPI2PDirr)>;
1327
1328// CVT(T)PD2PI.
1329// mm,x.
1330def : InstRW<[Zn2WriteCVTPS2PIr], (instregex "MMX_CVT(T?)PD2PIirr")>;
1331
1332def Zn2WriteCVSTSI2SSr: SchedWriteRes<[Zn2FPU3]> {
1333  let Latency = 3;
1334}
1335
1336// same as CVTPD2DQr
1337// CVT(T)SS2SI.
1338// r32,x.
1339def : InstRW<[Zn2WriteCVTPD2DQr], (instregex "(V?)CVT(T?)SS2SI(64)?rr")>;
1340// same as CVTPD2DQm
1341// r32,m32.
1342def : InstRW<[Zn2WriteCVTPD2DQLd], (instregex "(V?)CVT(T?)SS2SI(64)?rm")>;
1343
1344def Zn2WriteCVSTSI2SDr: SchedWriteRes<[Zn2FPU013, Zn2FPU3]> {
1345  let Latency = 3;
1346}
1347// CVTSI2SD.
1348// x,r32/64.
1349def : InstRW<[Zn2WriteCVSTSI2SDr], (instregex "(V?)CVTSI(64)?2SDrr")>;
1350
1351
1352def Zn2WriteCVSTSI2SIr: SchedWriteRes<[Zn2FPU3, Zn2FPU2]> {
1353  let Latency = 4;
1354}
1355def Zn2WriteCVSTSI2SILd: SchedWriteRes<[Zn2AGU, Zn2FPU3, Zn2FPU2]> {
1356  let Latency = 11;
1357}
1358// CVTSD2SI.
1359// r32/64
1360def : InstRW<[Zn2WriteCVSTSI2SIr], (instregex "(V?)CVT(T?)SD2SI(64)?rr")>;
1361// r32,m32.
1362def : InstRW<[Zn2WriteCVSTSI2SILd], (instregex "(V?)CVT(T?)SD2SI(64)?rm")>;
1363
1364// VCVTPS2PH.
1365// x,v,i.
1366def : SchedAlias<WriteCvtPS2PH,    Zn2WriteMicrocoded>;
1367def : SchedAlias<WriteCvtPS2PHY,   Zn2WriteMicrocoded>;
1368defm : X86WriteResUnsupported<WriteCvtPS2PHZ>;
1369// m,v,i.
1370def : SchedAlias<WriteCvtPS2PHSt,  Zn2WriteMicrocoded>;
1371def : SchedAlias<WriteCvtPS2PHYSt, Zn2WriteMicrocoded>;
1372defm : X86WriteResUnsupported<WriteCvtPS2PHZSt>;
1373
1374// VCVTPH2PS.
1375// v,x.
1376def : SchedAlias<WriteCvtPH2PS,    Zn2WriteMicrocoded>;
1377def : SchedAlias<WriteCvtPH2PSY,   Zn2WriteMicrocoded>;
1378defm : X86WriteResUnsupported<WriteCvtPH2PSZ>;
1379// v,m.
1380def : SchedAlias<WriteCvtPH2PSLd,  Zn2WriteMicrocoded>;
1381def : SchedAlias<WriteCvtPH2PSYLd, Zn2WriteMicrocoded>;
1382defm : X86WriteResUnsupported<WriteCvtPH2PSZLd>;
1383
1384//-- SSE4A instructions --//
1385// EXTRQ
1386def Zn2WriteEXTRQ: SchedWriteRes<[Zn2FPU12, Zn2FPU2]> {
1387  let Latency = 3;
1388}
1389def : InstRW<[Zn2WriteEXTRQ], (instregex "EXTRQ")>;
1390
1391// INSERTQ
1392def Zn2WriteINSERTQ: SchedWriteRes<[Zn2FPU03,Zn2FPU1]> {
1393  let Latency = 4;
1394}
1395def : InstRW<[Zn2WriteINSERTQ], (instregex "INSERTQ")>;
1396
1397//-- SHA instructions --//
1398// SHA256MSG2
1399def : InstRW<[WriteMicrocoded], (instregex "SHA256MSG2(Y?)r(r|m)")>;
1400
1401// SHA1MSG1, SHA256MSG1
1402// x,x.
1403def Zn2WriteSHA1MSG1r : SchedWriteRes<[Zn2FPU12]> {
1404  let Latency = 2;
1405}
1406def : InstRW<[Zn2WriteSHA1MSG1r], (instregex "SHA(1|256)MSG1rr")>;
1407// x,m.
1408def Zn2WriteSHA1MSG1Ld : SchedWriteRes<[Zn2AGU, Zn2FPU12]> {
1409  let Latency = 9;
1410}
1411def : InstRW<[Zn2WriteSHA1MSG1Ld], (instregex "SHA(1|256)MSG1rm")>;
1412
1413// SHA1MSG2
1414// x,x.
1415def Zn2WriteSHA1MSG2r : SchedWriteRes<[Zn2FPU12]> ;
1416def : InstRW<[Zn2WriteSHA1MSG2r], (instregex "SHA1MSG2rr")>;
1417// x,m.
1418def Zn2WriteSHA1MSG2Ld : SchedWriteRes<[Zn2AGU, Zn2FPU12]> {
1419  let Latency = 8;
1420}
1421def : InstRW<[Zn2WriteSHA1MSG2Ld], (instregex "SHA1MSG2rm")>;
1422
1423// SHA1NEXTE
1424// x,x.
1425def Zn2WriteSHA1NEXTEr : SchedWriteRes<[Zn2FPU1]> ;
1426def : InstRW<[Zn2WriteSHA1NEXTEr], (instregex "SHA1NEXTErr")>;
1427// x,m.
1428def Zn2WriteSHA1NEXTELd : SchedWriteRes<[Zn2AGU, Zn2FPU1]> {
1429  let Latency = 8;
1430}
1431def : InstRW<[Zn2WriteSHA1NEXTELd], (instregex "SHA1NEXTErm")>;
1432
1433// SHA1RNDS4
1434// x,x.
1435def Zn2WriteSHA1RNDS4r : SchedWriteRes<[Zn2FPU1]> {
1436  let Latency = 6;
1437}
1438def : InstRW<[Zn2WriteSHA1RNDS4r], (instregex "SHA1RNDS4rr")>;
1439// x,m.
1440def Zn2WriteSHA1RNDS4Ld : SchedWriteRes<[Zn2AGU, Zn2FPU1]> {
1441  let Latency = 13;
1442}
1443def : InstRW<[Zn2WriteSHA1RNDS4Ld], (instregex "SHA1RNDS4rm")>;
1444
1445// SHA256RNDS2
1446// x,x.
1447def Zn2WriteSHA256RNDS2r : SchedWriteRes<[Zn2FPU1]> {
1448  let Latency = 4;
1449}
1450def : InstRW<[Zn2WriteSHA256RNDS2r], (instregex "SHA256RNDS2rr")>;
1451// x,m.
1452def Zn2WriteSHA256RNDS2Ld : SchedWriteRes<[Zn2AGU, Zn2FPU1]> {
1453  let Latency = 11;
1454}
1455def : InstRW<[Zn2WriteSHA256RNDS2Ld], (instregex "SHA256RNDS2rm")>;
1456
1457//-- Arithmetic instructions --//
1458
1459// VDIVPS.
1460// TODO - convert to Zn2WriteResFpuPair
1461// y,y,y.
1462def Zn2WriteVDIVPSYr : SchedWriteRes<[Zn2FPU3]> {
1463  let Latency = 10;
1464  let ResourceCycles = [10];
1465}
1466def : SchedAlias<WriteFDivY,   Zn2WriteVDIVPSYr>;
1467
1468// y,y,m256.
1469def Zn2WriteVDIVPSYLd : SchedWriteRes<[Zn2AGU, Zn2FPU3]> {
1470  let Latency = 17;
1471  let NumMicroOps = 2;
1472  let ResourceCycles = [1, 17];
1473}
1474def : SchedAlias<WriteFDivYLd,  Zn2WriteVDIVPSYLd>;
1475
1476// VDIVPD.
1477// TODO - convert to Zn2WriteResFpuPair
1478// y,y,y.
1479def Zn2WriteVDIVPDY : SchedWriteRes<[Zn2FPU3]> {
1480  let Latency = 13;
1481  let ResourceCycles = [13];
1482}
1483def : SchedAlias<WriteFDiv64Y, Zn2WriteVDIVPDY>;
1484
1485// y,y,m256.
1486def Zn2WriteVDIVPDYLd : SchedWriteRes<[Zn2AGU, Zn2FPU3]> {
1487  let Latency = 20;
1488  let NumMicroOps = 2;
1489  let ResourceCycles = [1,20];
1490}
1491def : SchedAlias<WriteFDiv64YLd, Zn2WriteVDIVPDYLd>;
1492
1493// DPPS.
1494// x,x,i / v,v,v,i.
1495def : SchedAlias<WriteDPPSY,  Zn2WriteMicrocoded>;
1496
1497// x,m,i / v,v,m,i.
1498def : SchedAlias<WriteDPPSYLd,Zn2WriteMicrocoded>;
1499
1500// DPPD.
1501// x,x,i.
1502def : SchedAlias<WriteDPPD,   Zn2WriteMicrocoded>;
1503
1504// x,m,i.
1505def : SchedAlias<WriteDPPDLd, Zn2WriteMicrocoded>;
1506
1507// RSQRTSS
1508// TODO - convert to Zn2WriteResFpuPair
1509// x,x.
1510def Zn2WriteRSQRTSSr : SchedWriteRes<[Zn2FPU02]> {
1511  let Latency = 5;
1512}
1513def : SchedAlias<WriteFRsqrt, Zn2WriteRSQRTSSr>;
1514
1515// x,m128.
1516def Zn2WriteRSQRTSSLd: SchedWriteRes<[Zn2AGU, Zn2FPU02]> {
1517  let Latency = 12;
1518  let NumMicroOps = 2;
1519  let ResourceCycles = [1,2];
1520}
1521def : SchedAlias<WriteFRsqrtLd, Zn2WriteRSQRTSSLd>;
1522
1523// RSQRTPS
1524// TODO - convert to Zn2WriteResFpuPair
1525// y,y.
1526def Zn2WriteRSQRTPSYr : SchedWriteRes<[Zn2FPU01]> {
1527  let Latency = 5;
1528  let NumMicroOps = 2;
1529  let ResourceCycles = [2];
1530}
1531def : SchedAlias<WriteFRsqrtY, Zn2WriteRSQRTPSYr>;
1532
1533// y,m256.
1534def Zn2WriteRSQRTPSYLd : SchedWriteRes<[Zn2AGU, Zn2FPU01]> {
1535  let Latency = 12;
1536  let NumMicroOps = 2;
1537}
1538def : SchedAlias<WriteFRsqrtYLd, Zn2WriteRSQRTPSYLd>;
1539
1540//-- Other instructions --//
1541
1542// VZEROUPPER.
1543def : InstRW<[WriteALU], (instrs VZEROUPPER)>;
1544
1545// VZEROALL.
1546def : InstRW<[WriteMicrocoded], (instrs VZEROALL)>;
1547
1548} // SchedModel
1549