xref: /freebsd/contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleZnver2.td (revision e9a994639b2af232f994ba2ad23ca45a17718d2b)
1//=- X86ScheduleZnver2.td - X86 Znver2 Scheduling -------------*- tablegen -*-=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the machine model for Znver2 to support instruction
10// scheduling and other instruction cost heuristics.
11//
12//===----------------------------------------------------------------------===//
13
14def Znver2Model : SchedMachineModel {
15  // Zen can decode 4 instructions per cycle.
16  let IssueWidth = 4;
17  // Based on the reorder buffer we define MicroOpBufferSize
18  let MicroOpBufferSize = 224;
19  let LoadLatency = 4;
20  let MispredictPenalty = 17;
21  let HighLatency = 25;
22  let PostRAScheduler = 1;
23
24  // FIXME: This variable is required for incomplete model.
25  // We haven't catered all instructions.
26  // So, we reset the value of this variable so as to
27  // say that the model is incomplete.
28  let CompleteModel = 0;
29}
30
31let SchedModel = Znver2Model in {
32
33// Zen can issue micro-ops to 10 different units in one cycle.
34// These are
35//  * Four integer ALU units (ZALU0, ZALU1, ZALU2, ZALU3)
36//  * Three AGU units (ZAGU0, ZAGU1, ZAGU2)
37//  * Four FPU units (ZFPU0, ZFPU1, ZFPU2, ZFPU3)
38// AGUs feed load store queues @two loads and 1 store per cycle.
39
40// Four ALU units are defined below
41def Zn2ALU0 : ProcResource<1>;
42def Zn2ALU1 : ProcResource<1>;
43def Zn2ALU2 : ProcResource<1>;
44def Zn2ALU3 : ProcResource<1>;
45
46// Three AGU units are defined below
47def Zn2AGU0 : ProcResource<1>;
48def Zn2AGU1 : ProcResource<1>;
49def Zn2AGU2 : ProcResource<1>;
50
51// Four FPU units are defined below
52def Zn2FPU0 : ProcResource<1>;
53def Zn2FPU1 : ProcResource<1>;
54def Zn2FPU2 : ProcResource<1>;
55def Zn2FPU3 : ProcResource<1>;
56
57// FPU grouping
58def Zn2FPU013  : ProcResGroup<[Zn2FPU0, Zn2FPU1, Zn2FPU3]>;
59def Zn2FPU01   : ProcResGroup<[Zn2FPU0, Zn2FPU1]>;
60def Zn2FPU12   : ProcResGroup<[Zn2FPU1, Zn2FPU2]>;
61def Zn2FPU13   : ProcResGroup<[Zn2FPU1, Zn2FPU3]>;
62def Zn2FPU23   : ProcResGroup<[Zn2FPU2, Zn2FPU3]>;
63def Zn2FPU02   : ProcResGroup<[Zn2FPU0, Zn2FPU2]>;
64def Zn2FPU03   : ProcResGroup<[Zn2FPU0, Zn2FPU3]>;
65
66// Below are the grouping of the units.
67// Micro-ops to be issued to multiple units are tackled this way.
68
69// ALU grouping
70// Zn2ALU03 - 0,3 grouping
71def Zn2ALU03: ProcResGroup<[Zn2ALU0, Zn2ALU3]>;
72
73// 64 Entry (16x4 entries) Int Scheduler
74def Zn2ALU : ProcResGroup<[Zn2ALU0, Zn2ALU1, Zn2ALU2, Zn2ALU3]> {
75  let BufferSize=64;
76}
77
78// 28 Entry (14x2) AGU group. AGUs can't be used for all ALU operations
79// but are relevant for some instructions
80def Zn2AGU : ProcResGroup<[Zn2AGU0, Zn2AGU1, Zn2AGU2]> {
81  let BufferSize=28;
82}
83
84// Integer Multiplication issued on ALU1.
85def Zn2Multiplier : ProcResource<1>;
86
87// Integer division issued on ALU2.
88def Zn2Divider : ProcResource<1>;
89
90// 4 Cycles load-to use Latency is captured
91def : ReadAdvance<ReadAfterLd, 4>;
92
93// 7 Cycles vector load-to use Latency is captured
94def : ReadAdvance<ReadAfterVecLd, 7>;
95def : ReadAdvance<ReadAfterVecXLd, 7>;
96def : ReadAdvance<ReadAfterVecYLd, 7>;
97
98def : ReadAdvance<ReadInt2Fpu, 0>;
99
100// The Integer PRF for Zen is 168 entries, and it holds the architectural and
101// speculative version of the 64-bit integer registers.
102// Reference: "Software Optimization Guide for AMD Family 17h Processors"
103def Zn2IntegerPRF : RegisterFile<168, [GR64, CCR]>;
104
105// 36 Entry (9x4 entries) floating-point Scheduler
106def Zn2FPU     : ProcResGroup<[Zn2FPU0, Zn2FPU1, Zn2FPU2, Zn2FPU3]> {
107  let BufferSize=36;
108}
109
110// The Zen FP Retire Queue renames SIMD and FP uOps onto a pool of 160 128-bit
111// registers. Operations on 256-bit data types are cracked into two COPs.
112// Reference: "Software Optimization Guide for AMD Family 17h Processors"
113def Zn2FpuPRF: RegisterFile<160, [VR64, VR128, VR256], [1, 1, 2]>;
114
115// The unit can track up to 192 macro ops in-flight.
116// The retire unit handles in-order commit of up to 8 macro ops per cycle.
117// Reference: "Software Optimization Guide for AMD Family 17h Processors"
118// To be noted, the retire unit is shared between integer and FP ops.
119// In SMT mode it is 96 entry per thread. But, we do not use the conservative
120// value here because there is currently no way to fully mode the SMT mode,
121// so there is no point in trying.
122def Zn2RCU : RetireControlUnit<192, 8>;
123
124// (a folded load is an instruction that loads and does some operation)
125// Ex: ADDPD xmm,[mem]-> This instruction has two micro-ops
126// Instructions with folded loads are usually micro-fused, so they only appear
127// as two micro-ops.
128//      a. load and
129//      b. addpd
130// This multiclass is for folded loads for integer units.
131multiclass Zn2WriteResPair<X86FoldableSchedWrite SchedRW,
132                          list<ProcResourceKind> ExePorts,
133                          int Lat, list<int> Res = [], int UOps = 1,
134                          int LoadLat = 4, int LoadUOps = 1> {
135  // Register variant takes 1-cycle on Execution Port.
136  def : WriteRes<SchedRW, ExePorts> {
137    let Latency = Lat;
138    let ResourceCycles = Res;
139    let NumMicroOps = UOps;
140  }
141
142  // Memory variant also uses a cycle on Zn2AGU
143  // adds LoadLat cycles to the latency (default = 4).
144  def : WriteRes<SchedRW.Folded, !listconcat([Zn2AGU], ExePorts)> {
145    let Latency = !add(Lat, LoadLat);
146    let ResourceCycles = !if(!empty(Res), [], !listconcat([1], Res));
147    let NumMicroOps = !add(UOps, LoadUOps);
148  }
149}
150
151// This multiclass is for folded loads for floating point units.
152multiclass Zn2WriteResFpuPair<X86FoldableSchedWrite SchedRW,
153                          list<ProcResourceKind> ExePorts,
154                          int Lat, list<int> Res = [], int UOps = 1,
155                          int LoadLat = 7, int LoadUOps = 0> {
156  // Register variant takes 1-cycle on Execution Port.
157  def : WriteRes<SchedRW, ExePorts> {
158    let Latency = Lat;
159    let ResourceCycles = Res;
160    let NumMicroOps = UOps;
161  }
162
163  // Memory variant also uses a cycle on Zn2AGU
164  // adds LoadLat cycles to the latency (default = 7).
165  def : WriteRes<SchedRW.Folded, !listconcat([Zn2AGU], ExePorts)> {
166    let Latency = !add(Lat, LoadLat);
167    let ResourceCycles = !if(!empty(Res), [], !listconcat([1], Res));
168    let NumMicroOps = !add(UOps, LoadUOps);
169  }
170}
171
172// WriteRMW is set for instructions with Memory write
173// operation in codegen
174def : WriteRes<WriteRMW, [Zn2AGU]>;
175
176def : WriteRes<WriteStore,   [Zn2AGU]>;
177def : WriteRes<WriteStoreNT, [Zn2AGU]>;
178def : WriteRes<WriteMove,    [Zn2ALU]>;
179def : WriteRes<WriteLoad,    [Zn2AGU]> { let Latency = 8; }
180
181def : WriteRes<WriteZero,  []>;
182def : WriteRes<WriteLEA, [Zn2ALU]>;
183defm : Zn2WriteResPair<WriteALU,   [Zn2ALU], 1>;
184defm : Zn2WriteResPair<WriteADC,   [Zn2ALU], 1>;
185
186defm : Zn2WriteResPair<WriteIMul8,     [Zn2ALU1, Zn2Multiplier], 4>;
187
188defm : X86WriteRes<WriteBSWAP32, [Zn2ALU], 1, [4], 1>;
189defm : X86WriteRes<WriteBSWAP64, [Zn2ALU], 1, [4], 1>;
190defm : X86WriteRes<WriteCMPXCHG, [Zn2ALU], 3, [1], 1>;
191defm : X86WriteRes<WriteCMPXCHGRMW,[Zn2ALU,Zn2AGU], 8, [1,1], 5>;
192defm : X86WriteRes<WriteXCHG, [Zn2ALU], 1, [2], 2>;
193
194defm : Zn2WriteResPair<WriteShift, [Zn2ALU], 1>;
195defm : Zn2WriteResPair<WriteShiftCL,  [Zn2ALU], 1>;
196defm : Zn2WriteResPair<WriteRotate,   [Zn2ALU], 1>;
197defm : Zn2WriteResPair<WriteRotateCL, [Zn2ALU], 1>;
198
199defm : X86WriteRes<WriteSHDrri, [Zn2ALU], 1, [1], 1>;
200defm : X86WriteResUnsupported<WriteSHDrrcl>;
201defm : X86WriteResUnsupported<WriteSHDmri>;
202defm : X86WriteResUnsupported<WriteSHDmrcl>;
203
204defm : Zn2WriteResPair<WriteJump,  [Zn2ALU], 1>;
205defm : Zn2WriteResFpuPair<WriteCRC32, [Zn2FPU0], 3>;
206
207defm : Zn2WriteResPair<WriteCMOV,   [Zn2ALU], 1>;
208def  : WriteRes<WriteSETCC,  [Zn2ALU]>;
209def  : WriteRes<WriteSETCCStore,  [Zn2ALU, Zn2AGU]>;
210defm : X86WriteRes<WriteLAHFSAHF, [Zn2ALU], 2, [1], 2>;
211
212defm : X86WriteRes<WriteBitTest,         [Zn2ALU], 1, [1], 1>;
213defm : X86WriteRes<WriteBitTestImmLd,    [Zn2ALU,Zn2AGU], 5, [1,1], 2>;
214defm : X86WriteRes<WriteBitTestRegLd,    [Zn2ALU,Zn2AGU], 5, [1,1], 2>;
215defm : X86WriteRes<WriteBitTestSet,      [Zn2ALU], 2, [1], 2>;
216
217// Bit counts.
218defm : Zn2WriteResPair<WriteBSF, [Zn2ALU], 3>;
219defm : Zn2WriteResPair<WriteBSR, [Zn2ALU], 4>;
220defm : Zn2WriteResPair<WriteLZCNT,          [Zn2ALU], 1>;
221defm : Zn2WriteResPair<WriteTZCNT,          [Zn2ALU], 2>;
222defm : Zn2WriteResPair<WritePOPCNT,         [Zn2ALU], 1>;
223
224// Treat misc copies as a move.
225def : InstRW<[WriteMove], (instrs COPY)>;
226
227// BMI1 BEXTR, BMI2 BZHI
228defm : Zn2WriteResPair<WriteBEXTR, [Zn2ALU], 1>;
229defm : Zn2WriteResPair<WriteBZHI, [Zn2ALU], 1>;
230
231// IDIV
232defm : Zn2WriteResPair<WriteDiv8,   [Zn2ALU2, Zn2Divider], 15, [1,15], 1>;
233defm : Zn2WriteResPair<WriteDiv16,  [Zn2ALU2, Zn2Divider], 17, [1,17], 2>;
234defm : Zn2WriteResPair<WriteDiv32,  [Zn2ALU2, Zn2Divider], 25, [1,25], 2>;
235defm : Zn2WriteResPair<WriteDiv64,  [Zn2ALU2, Zn2Divider], 41, [1,41], 2>;
236defm : Zn2WriteResPair<WriteIDiv8,  [Zn2ALU2, Zn2Divider], 15, [1,15], 1>;
237defm : Zn2WriteResPair<WriteIDiv16, [Zn2ALU2, Zn2Divider], 17, [1,17], 2>;
238defm : Zn2WriteResPair<WriteIDiv32, [Zn2ALU2, Zn2Divider], 25, [1,25], 2>;
239defm : Zn2WriteResPair<WriteIDiv64, [Zn2ALU2, Zn2Divider], 41, [1,41], 2>;
240
241// IMULH
242def  : WriteRes<WriteIMulH, [Zn2ALU1, Zn2Multiplier]>{
243  let Latency = 4;
244}
245
246// Floating point operations
247defm : X86WriteRes<WriteFLoad,         [Zn2AGU], 8, [1], 1>;
248defm : X86WriteRes<WriteFLoadX,        [Zn2AGU], 8, [1], 1>;
249defm : X86WriteRes<WriteFLoadY,        [Zn2AGU], 8, [1], 1>;
250defm : X86WriteRes<WriteFMaskedLoad,   [Zn2AGU,Zn2FPU01], 8, [1,1], 1>;
251defm : X86WriteRes<WriteFMaskedLoadY,  [Zn2AGU,Zn2FPU01], 8, [1,1], 2>;
252defm : X86WriteRes<WriteFMaskedStore32,  [Zn2AGU,Zn2FPU01], 4, [1,1], 1>;
253defm : X86WriteRes<WriteFMaskedStore32Y, [Zn2AGU,Zn2FPU01], 5, [1,2], 2>;
254defm : X86WriteRes<WriteFMaskedStore64,  [Zn2AGU,Zn2FPU01], 4, [1,1], 1>;
255defm : X86WriteRes<WriteFMaskedStore64Y, [Zn2AGU,Zn2FPU01], 5, [1,2], 2>;
256
257defm : X86WriteRes<WriteFStore,        [Zn2AGU], 1, [1], 1>;
258defm : X86WriteRes<WriteFStoreX,       [Zn2AGU], 1, [1], 1>;
259defm : X86WriteRes<WriteFStoreY,       [Zn2AGU], 1, [1], 1>;
260defm : X86WriteRes<WriteFStoreNT,      [Zn2AGU,Zn2FPU2], 8, [1,1], 1>;
261defm : X86WriteRes<WriteFStoreNTX,     [Zn2AGU], 1, [1], 1>;
262defm : X86WriteRes<WriteFStoreNTY,     [Zn2AGU], 1, [1], 1>;
263defm : X86WriteRes<WriteFMove,         [Zn2FPU], 1, [1], 1>;
264defm : X86WriteRes<WriteFMoveX,        [Zn2FPU], 1, [1], 1>;
265defm : X86WriteRes<WriteFMoveY,        [Zn2FPU], 1, [1], 1>;
266
267defm : Zn2WriteResFpuPair<WriteFAdd,      [Zn2FPU0],  3>;
268defm : Zn2WriteResFpuPair<WriteFAddX,     [Zn2FPU0],  3>;
269defm : Zn2WriteResFpuPair<WriteFAddY,     [Zn2FPU0],  3>;
270defm : X86WriteResPairUnsupported<WriteFAddZ>;
271defm : Zn2WriteResFpuPair<WriteFAdd64,    [Zn2FPU0],  3>;
272defm : Zn2WriteResFpuPair<WriteFAdd64X,   [Zn2FPU0],  3>;
273defm : Zn2WriteResFpuPair<WriteFAdd64Y,   [Zn2FPU0],  3>;
274defm : X86WriteResPairUnsupported<WriteFAdd64Z>;
275defm : Zn2WriteResFpuPair<WriteFCmp,      [Zn2FPU0],  1>;
276defm : Zn2WriteResFpuPair<WriteFCmpX,     [Zn2FPU0],  1>;
277defm : Zn2WriteResFpuPair<WriteFCmpY,     [Zn2FPU0],  1>;
278defm : X86WriteResPairUnsupported<WriteFCmpZ>;
279defm : Zn2WriteResFpuPair<WriteFCmp64,    [Zn2FPU0],  1>;
280defm : Zn2WriteResFpuPair<WriteFCmp64X,   [Zn2FPU0],  1>;
281defm : Zn2WriteResFpuPair<WriteFCmp64Y,   [Zn2FPU0],  1>;
282defm : X86WriteResPairUnsupported<WriteFCmp64Z>;
283defm : Zn2WriteResFpuPair<WriteFCom,      [Zn2FPU0],  3>;
284defm : Zn2WriteResFpuPair<WriteFComX,     [Zn2FPU0],  3>;
285defm : Zn2WriteResFpuPair<WriteFBlend,    [Zn2FPU01], 1>;
286defm : Zn2WriteResFpuPair<WriteFBlendY,   [Zn2FPU01], 1>;
287defm : X86WriteResPairUnsupported<WriteFBlendZ>;
288defm : Zn2WriteResFpuPair<WriteFVarBlend, [Zn2FPU01], 1>;
289defm : Zn2WriteResFpuPair<WriteFVarBlendY,[Zn2FPU01], 1>;
290defm : X86WriteResPairUnsupported<WriteFVarBlendZ>;
291defm : Zn2WriteResFpuPair<WriteVarBlend,  [Zn2FPU0],  1>;
292defm : Zn2WriteResFpuPair<WriteVarBlendY, [Zn2FPU0],  1>;
293defm : X86WriteResPairUnsupported<WriteVarBlendZ>;
294defm : Zn2WriteResFpuPair<WriteCvtSS2I,   [Zn2FPU3],  5>;
295defm : Zn2WriteResFpuPair<WriteCvtPS2I,   [Zn2FPU3],  5>;
296defm : Zn2WriteResFpuPair<WriteCvtPS2IY,  [Zn2FPU3],  5>;
297defm : X86WriteResPairUnsupported<WriteCvtPS2IZ>;
298defm : Zn2WriteResFpuPair<WriteCvtSD2I,   [Zn2FPU3],  5>;
299defm : Zn2WriteResFpuPair<WriteCvtPD2I,   [Zn2FPU3],  5>;
300defm : Zn2WriteResFpuPair<WriteCvtPD2IY,  [Zn2FPU3],  5>;
301defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>;
302defm : Zn2WriteResFpuPair<WriteCvtI2SS,   [Zn2FPU3],  5>;
303defm : Zn2WriteResFpuPair<WriteCvtI2PS,   [Zn2FPU3],  5>;
304defm : Zn2WriteResFpuPair<WriteCvtI2PSY,  [Zn2FPU3],  5>;
305defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>;
306defm : Zn2WriteResFpuPair<WriteCvtI2SD,   [Zn2FPU3],  5>;
307defm : Zn2WriteResFpuPair<WriteCvtI2PD,   [Zn2FPU3],  5>;
308defm : Zn2WriteResFpuPair<WriteCvtI2PDY,  [Zn2FPU3],  5>;
309defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>;
310defm : Zn2WriteResFpuPair<WriteFDiv,      [Zn2FPU3], 15>;
311defm : Zn2WriteResFpuPair<WriteFDivX,     [Zn2FPU3], 15>;
312defm : X86WriteResPairUnsupported<WriteFDivZ>;
313defm : Zn2WriteResFpuPair<WriteFDiv64,    [Zn2FPU3], 15>;
314defm : Zn2WriteResFpuPair<WriteFDiv64X,   [Zn2FPU3], 15>;
315defm : X86WriteResPairUnsupported<WriteFDiv64Z>;
316defm : Zn2WriteResFpuPair<WriteFSign,     [Zn2FPU3],  2>;
317defm : Zn2WriteResFpuPair<WriteFRnd,      [Zn2FPU3],  3, [1], 1, 7, 0>;
318defm : Zn2WriteResFpuPair<WriteFRndY,     [Zn2FPU3],  3, [1], 1, 7, 0>;
319defm : X86WriteResPairUnsupported<WriteFRndZ>;
320defm : Zn2WriteResFpuPair<WriteFLogic,    [Zn2FPU],   1>;
321defm : Zn2WriteResFpuPair<WriteFLogicY,   [Zn2FPU],   1>;
322defm : X86WriteResPairUnsupported<WriteFLogicZ>;
323defm : Zn2WriteResFpuPair<WriteFTest,     [Zn2FPU],   1>;
324defm : Zn2WriteResFpuPair<WriteFTestY,    [Zn2FPU],   1>;
325defm : X86WriteResPairUnsupported<WriteFTestZ>;
326defm : Zn2WriteResFpuPair<WriteFShuffle,  [Zn2FPU12], 1>;
327defm : Zn2WriteResFpuPair<WriteFShuffleY, [Zn2FPU12], 1>;
328defm : X86WriteResPairUnsupported<WriteFShuffleZ>;
329defm : Zn2WriteResFpuPair<WriteFVarShuffle, [Zn2FPU12], 3>;
330defm : Zn2WriteResFpuPair<WriteFVarShuffleY,[Zn2FPU12], 3>;
331defm : X86WriteResPairUnsupported<WriteFVarShuffleZ>;
332defm : Zn2WriteResFpuPair<WriteFMul,      [Zn2FPU01], 3, [1], 1, 7, 1>;
333defm : Zn2WriteResFpuPair<WriteFMulX,     [Zn2FPU01], 3, [1], 1, 7, 1>;
334defm : Zn2WriteResFpuPair<WriteFMulY,     [Zn2FPU01], 3, [1], 1, 7, 1>;
335defm : X86WriteResPairUnsupported<WriteFMulZ>;
336defm : Zn2WriteResFpuPair<WriteFMul64,    [Zn2FPU01], 3, [1], 1, 7, 1>;
337defm : Zn2WriteResFpuPair<WriteFMul64X,   [Zn2FPU01], 3, [1], 1, 7, 1>;
338defm : Zn2WriteResFpuPair<WriteFMul64Y,   [Zn2FPU01], 3, [1], 1, 7, 1>;
339defm : X86WriteResPairUnsupported<WriteFMul64Z>;
340defm : Zn2WriteResFpuPair<WriteFMA,       [Zn2FPU03], 5>;
341defm : Zn2WriteResFpuPair<WriteFMAX,      [Zn2FPU03], 5>;
342defm : Zn2WriteResFpuPair<WriteFMAY,      [Zn2FPU03], 5>;
343defm : X86WriteResPairUnsupported<WriteFMAZ>;
344defm : Zn2WriteResFpuPair<WriteFRcp,      [Zn2FPU01], 5>;
345defm : Zn2WriteResFpuPair<WriteFRcpX,     [Zn2FPU01], 5>;
346defm : Zn2WriteResFpuPair<WriteFRcpY,     [Zn2FPU01], 5, [1], 1, 7, 2>;
347defm : X86WriteResPairUnsupported<WriteFRcpZ>;
348defm : Zn2WriteResFpuPair<WriteFRsqrtX,   [Zn2FPU01], 5, [1], 1, 7, 1>;
349defm : X86WriteResPairUnsupported<WriteFRsqrtZ>;
350defm : Zn2WriteResFpuPair<WriteFSqrt,     [Zn2FPU3], 20, [20]>;
351defm : Zn2WriteResFpuPair<WriteFSqrtX,    [Zn2FPU3], 20, [20]>;
352defm : Zn2WriteResFpuPair<WriteFSqrtY,    [Zn2FPU3], 28, [28], 1, 7, 1>;
353defm : X86WriteResPairUnsupported<WriteFSqrtZ>;
354defm : Zn2WriteResFpuPair<WriteFSqrt64,   [Zn2FPU3], 20, [20]>;
355defm : Zn2WriteResFpuPair<WriteFSqrt64X,  [Zn2FPU3], 20, [20]>;
356defm : Zn2WriteResFpuPair<WriteFSqrt64Y,  [Zn2FPU3], 20, [20], 1, 7, 1>;
357defm : X86WriteResPairUnsupported<WriteFSqrt64Z>;
358defm : Zn2WriteResFpuPair<WriteFSqrt80,   [Zn2FPU3], 20, [20]>;
359
360// Vector integer operations which uses FPU units
361defm : X86WriteRes<WriteVecLoad,         [Zn2AGU], 8, [1], 1>;
362defm : X86WriteRes<WriteVecLoadX,        [Zn2AGU], 8, [1], 1>;
363defm : X86WriteRes<WriteVecLoadY,        [Zn2AGU], 8, [1], 1>;
364defm : X86WriteRes<WriteVecLoadNT,       [Zn2AGU], 8, [1], 1>;
365defm : X86WriteRes<WriteVecLoadNTY,      [Zn2AGU], 8, [1], 1>;
366defm : X86WriteRes<WriteVecMaskedLoad,   [Zn2AGU,Zn2FPU01], 8, [1,2], 2>;
367defm : X86WriteRes<WriteVecMaskedLoadY,  [Zn2AGU,Zn2FPU01], 8, [1,2], 2>;
368defm : X86WriteRes<WriteVecStore,        [Zn2AGU], 1, [1], 1>;
369defm : X86WriteRes<WriteVecStoreX,       [Zn2AGU], 1, [1], 1>;
370defm : X86WriteRes<WriteVecStoreY,       [Zn2AGU], 1, [1], 1>;
371defm : X86WriteRes<WriteVecStoreNT,      [Zn2AGU], 1, [1], 1>;
372defm : X86WriteRes<WriteVecStoreNTY,     [Zn2AGU], 1, [1], 1>;
373defm : X86WriteRes<WriteVecMaskedStore32,  [Zn2AGU,Zn2FPU01], 4, [1,1], 1>;
374defm : X86WriteRes<WriteVecMaskedStore32Y, [Zn2AGU,Zn2FPU01], 5, [1,2], 2>;
375defm : X86WriteRes<WriteVecMaskedStore64,  [Zn2AGU,Zn2FPU01], 4, [1,1], 1>;
376defm : X86WriteRes<WriteVecMaskedStore64Y, [Zn2AGU,Zn2FPU01], 5, [1,2], 2>;
377defm : X86WriteRes<WriteVecMove,         [Zn2FPU], 1, [1], 1>;
378defm : X86WriteRes<WriteVecMoveX,        [Zn2FPU], 1, [1], 1>;
379defm : X86WriteRes<WriteVecMoveY,        [Zn2FPU], 2, [1], 2>;
380defm : X86WriteRes<WriteVecMoveToGpr,    [Zn2FPU2], 2, [1], 1>;
381defm : X86WriteRes<WriteVecMoveFromGpr,  [Zn2FPU2], 3, [1], 1>;
382defm : X86WriteRes<WriteEMMS,            [Zn2FPU], 2, [1], 1>;
383
384defm : Zn2WriteResFpuPair<WriteVecShift,   [Zn2FPU],   1>;
385defm : Zn2WriteResFpuPair<WriteVecShiftX,  [Zn2FPU2],  1>;
386defm : Zn2WriteResFpuPair<WriteVecShiftY,  [Zn2FPU2],  1>;
387defm : X86WriteResPairUnsupported<WriteVecShiftZ>;
388defm : Zn2WriteResFpuPair<WriteVecShiftImm,  [Zn2FPU], 1>;
389defm : Zn2WriteResFpuPair<WriteVecShiftImmX, [Zn2FPU], 1>;
390defm : Zn2WriteResFpuPair<WriteVecShiftImmY, [Zn2FPU], 1>;
391defm : X86WriteResPairUnsupported<WriteVecShiftImmZ>;
392defm : Zn2WriteResFpuPair<WriteVecLogic,   [Zn2FPU],   1>;
393defm : Zn2WriteResFpuPair<WriteVecLogicX,  [Zn2FPU],   1>;
394defm : Zn2WriteResFpuPair<WriteVecLogicY,  [Zn2FPU],   1>;
395defm : X86WriteResPairUnsupported<WriteVecLogicZ>;
396defm : Zn2WriteResFpuPair<WriteVecTest,    [Zn2FPU12], 1, [2], 1, 7, 1>;
397defm : Zn2WriteResFpuPair<WriteVecTestY,   [Zn2FPU12], 1, [2], 1, 7, 1>;
398defm : X86WriteResPairUnsupported<WriteVecTestZ>;
399defm : Zn2WriteResFpuPair<WriteVecALU,     [Zn2FPU],   1>;
400defm : Zn2WriteResFpuPair<WriteVecALUX,    [Zn2FPU],   1>;
401defm : Zn2WriteResFpuPair<WriteVecALUY,    [Zn2FPU],   1>;
402defm : X86WriteResPairUnsupported<WriteVecALUZ>;
403defm : Zn2WriteResFpuPair<WriteVecIMul,    [Zn2FPU0],  4>;
404defm : Zn2WriteResFpuPair<WriteVecIMulX,   [Zn2FPU0],  4>;
405defm : Zn2WriteResFpuPair<WriteVecIMulY,   [Zn2FPU0],  4>;
406defm : X86WriteResPairUnsupported<WriteVecIMulZ>;
407defm : Zn2WriteResFpuPair<WritePMULLD,     [Zn2FPU0],  4, [1], 1, 7, 1>;
408defm : Zn2WriteResFpuPair<WritePMULLDY,    [Zn2FPU0],  4, [1], 1, 7, 1>;
409defm : X86WriteResPairUnsupported<WritePMULLDZ>;
410defm : Zn2WriteResFpuPair<WriteShuffle,    [Zn2FPU],   1>;
411defm : Zn2WriteResFpuPair<WriteShuffleX,   [Zn2FPU],   1>;
412defm : Zn2WriteResFpuPair<WriteShuffleY,   [Zn2FPU],   1>;
413defm : X86WriteResPairUnsupported<WriteShuffleZ>;
414defm : Zn2WriteResFpuPair<WriteVarShuffle, [Zn2FPU],   1>;
415defm : Zn2WriteResFpuPair<WriteVarShuffleX,[Zn2FPU],   1>;
416defm : Zn2WriteResFpuPair<WriteVarShuffleY,[Zn2FPU],   1>;
417defm : X86WriteResPairUnsupported<WriteVarShuffleZ>;
418defm : Zn2WriteResFpuPair<WriteBlend,      [Zn2FPU01], 1>;
419defm : Zn2WriteResFpuPair<WriteBlendY,     [Zn2FPU01], 1>;
420defm : X86WriteResPairUnsupported<WriteBlendZ>;
421defm : Zn2WriteResFpuPair<WriteShuffle256, [Zn2FPU],   2>;
422defm : Zn2WriteResFpuPair<WriteVarShuffle256, [Zn2FPU],   2>;
423defm : Zn2WriteResFpuPair<WritePSADBW,     [Zn2FPU0],  3>;
424defm : Zn2WriteResFpuPair<WritePSADBWX,    [Zn2FPU0],  3>;
425defm : Zn2WriteResFpuPair<WritePSADBWY,    [Zn2FPU0],  3>;
426defm : X86WriteResPairUnsupported<WritePSADBWZ>;
427defm : Zn2WriteResFpuPair<WritePHMINPOS,   [Zn2FPU0],  4>;
428
429// Vector Shift Operations
430defm : Zn2WriteResFpuPair<WriteVarVecShift,  [Zn2FPU12], 3>;
431defm : Zn2WriteResFpuPair<WriteVarVecShiftY, [Zn2FPU12], 3>;
432defm : X86WriteResPairUnsupported<WriteVarVecShiftZ>;
433
434// Vector insert/extract operations.
435defm : Zn2WriteResFpuPair<WriteVecInsert,   [Zn2FPU],   1>;
436
437def : WriteRes<WriteVecExtract, [Zn2FPU12, Zn2FPU2]> {
438  let Latency = 2;
439  let ResourceCycles = [1, 2];
440}
441def : WriteRes<WriteVecExtractSt, [Zn2AGU, Zn2FPU12, Zn2FPU2]> {
442  let Latency = 5;
443  let NumMicroOps = 2;
444  let ResourceCycles = [1, 2, 3];
445}
446
447// MOVMSK Instructions.
448def : WriteRes<WriteFMOVMSK, [Zn2FPU2]>;
449def : WriteRes<WriteMMXMOVMSK, [Zn2FPU2]>;
450def : WriteRes<WriteVecMOVMSK, [Zn2FPU2]>;
451
452def : WriteRes<WriteVecMOVMSKY, [Zn2FPU2]> {
453  let NumMicroOps = 2;
454  let Latency = 2;
455  let ResourceCycles = [2];
456}
457
458// AES Instructions.
459defm : Zn2WriteResFpuPair<WriteAESDecEnc, [Zn2FPU01], 4>;
460defm : Zn2WriteResFpuPair<WriteAESIMC,    [Zn2FPU01], 4>;
461defm : Zn2WriteResFpuPair<WriteAESKeyGen, [Zn2FPU01], 4>;
462
463def : WriteRes<WriteFence,  [Zn2AGU]>;
464def : WriteRes<WriteNop, []>;
465
466// Following instructions with latency=100 are microcoded.
467// We set long latency so as to block the entire pipeline.
468defm : Zn2WriteResFpuPair<WriteFShuffle256, [Zn2FPU], 100>;
469defm : Zn2WriteResFpuPair<WriteFVarShuffle256, [Zn2FPU], 100>;
470
471// Microcoded Instructions
472def Zn2WriteMicrocoded : SchedWriteRes<[]> {
473  let Latency = 100;
474}
475defm : Zn2WriteResPair<WriteDPPS, [], 15>;
476defm : Zn2WriteResPair<WriteFHAdd, [], 7>;
477defm : Zn2WriteResPair<WriteFHAddY, [], 7>;
478defm : Zn2WriteResPair<WritePHAdd, [], 3>;
479defm : Zn2WriteResPair<WritePHAddX, [], 3>;
480defm : Zn2WriteResPair<WritePHAddY, [], 3>;
481
482def : SchedAlias<WriteMicrocoded, Zn2WriteMicrocoded>;
483def : SchedAlias<WriteFCMOV, Zn2WriteMicrocoded>;
484def : SchedAlias<WriteSystem, Zn2WriteMicrocoded>;
485def : SchedAlias<WriteMPSAD, Zn2WriteMicrocoded>;
486def : SchedAlias<WriteMPSADY, Zn2WriteMicrocoded>;
487def : SchedAlias<WriteMPSADLd, Zn2WriteMicrocoded>;
488def : SchedAlias<WriteMPSADYLd, Zn2WriteMicrocoded>;
489def : SchedAlias<WriteCLMul, Zn2WriteMicrocoded>;
490def : SchedAlias<WriteCLMulLd, Zn2WriteMicrocoded>;
491def : SchedAlias<WritePCmpIStrM, Zn2WriteMicrocoded>;
492def : SchedAlias<WritePCmpIStrMLd, Zn2WriteMicrocoded>;
493def : SchedAlias<WritePCmpEStrI, Zn2WriteMicrocoded>;
494def : SchedAlias<WritePCmpEStrILd, Zn2WriteMicrocoded>;
495def : SchedAlias<WritePCmpEStrM, Zn2WriteMicrocoded>;
496def : SchedAlias<WritePCmpEStrMLd, Zn2WriteMicrocoded>;
497def : SchedAlias<WritePCmpIStrI, Zn2WriteMicrocoded>;
498def : SchedAlias<WritePCmpIStrILd, Zn2WriteMicrocoded>;
499def : SchedAlias<WriteLDMXCSR, Zn2WriteMicrocoded>;
500def : SchedAlias<WriteSTMXCSR, Zn2WriteMicrocoded>;
501
502//=== Regex based InstRW ===//
503// Notation:
504// - r: register.
505// - m = memory.
506// - i = immediate
507// - mm: 64 bit mmx register.
508// - x = 128 bit xmm register.
509// - (x)mm = mmx or xmm register.
510// - y = 256 bit ymm register.
511// - v = any vector register.
512
513//=== Integer Instructions ===//
514//-- Move instructions --//
515// MOV.
516// r16,m.
517def : InstRW<[WriteALULd, ReadAfterLd], (instregex "MOV16rm")>;
518
519// MOVSX, MOVZX.
520// r,m.
521def : InstRW<[WriteLoad], (instregex "MOV(S|Z)X32rm(8|16)")>;
522
523// XCHG.
524// r,r.
525def Zn2WriteXCHG : SchedWriteRes<[Zn2ALU]> {
526  let NumMicroOps = 2;
527}
528
529def : InstRW<[Zn2WriteXCHG], (instregex "^XCHG(8|16|32|64)rr", "^XCHG(16|32|64)ar")>;
530
531// r,m.
532def Zn2WriteXCHGrm : SchedWriteRes<[Zn2AGU, Zn2ALU]> {
533  let Latency = 5;
534  let NumMicroOps = 2;
535}
536def : InstRW<[Zn2WriteXCHGrm, ReadAfterLd], (instregex "^XCHG(8|16|32|64)rm")>;
537
538def : InstRW<[WriteMicrocoded], (instrs XLAT)>;
539
540// POP16.
541// r.
542def Zn2WritePop16r : SchedWriteRes<[Zn2AGU]>{
543  let Latency = 5;
544  let NumMicroOps = 2;
545}
546def : InstRW<[Zn2WritePop16r], (instregex "POP16rmm")>;
547def : InstRW<[WriteMicrocoded], (instregex "POPF(16|32)")>;
548def : InstRW<[WriteMicrocoded], (instregex "POPA(16|32)")>;
549
550
551// PUSH.
552// r. Has default values.
553// m.
554def Zn2WritePUSH : SchedWriteRes<[Zn2AGU]>{
555  let Latency = 4;
556}
557def : InstRW<[Zn2WritePUSH], (instregex "PUSH(16|32)rmm")>;
558
559//PUSHF
560def : InstRW<[WriteMicrocoded], (instregex "PUSHF(16|32)")>;
561
562// PUSHA.
563def Zn2WritePushA : SchedWriteRes<[Zn2AGU]> {
564  let Latency = 8;
565}
566def : InstRW<[Zn2WritePushA], (instregex "PUSHA(16|32)")>;
567
568//LAHF
569def : InstRW<[WriteMicrocoded], (instrs LAHF)>;
570
571// MOVBE.
572// r,m.
573def Zn2WriteMOVBE : SchedWriteRes<[Zn2AGU, Zn2ALU]> {
574  let Latency = 5;
575}
576def : InstRW<[Zn2WriteMOVBE, ReadAfterLd], (instregex "MOVBE(16|32|64)rm")>;
577
578// m16,r16.
579def : InstRW<[Zn2WriteMOVBE], (instregex "MOVBE(16|32|64)mr")>;
580
581//-- Arithmetic instructions --//
582
583// ADD SUB.
584// m,r/i.
585def : InstRW<[WriteALULd], (instregex "(ADD|SUB)(8|16|32|64)m(r|i)",
586                          "(ADD|SUB)(8|16|32|64)mi8",
587                          "(ADD|SUB)64mi32")>;
588
589// ADC SBB.
590// m,r/i.
591def : InstRW<[WriteALULd],
592             (instregex "(ADC|SBB)(8|16|32|64)m(r|i)",
593              "(ADC|SBB)(16|32|64)mi8",
594              "(ADC|SBB)64mi32")>;
595
596// INC DEC NOT NEG.
597// m.
598def : InstRW<[WriteALULd],
599             (instregex "(INC|DEC|NOT|NEG)(8|16|32|64)m")>;
600
601// MUL IMUL.
602// r16.
603def Zn2WriteMul16 : SchedWriteRes<[Zn2ALU1, Zn2Multiplier]> {
604  let Latency = 3;
605}
606def Zn2WriteMul16Imm : SchedWriteRes<[Zn2ALU1, Zn2Multiplier]> {
607  let Latency = 4;
608}
609def : SchedAlias<WriteIMul16, Zn2WriteMul16>;
610def : SchedAlias<WriteIMul16Imm, Zn2WriteMul16Imm>;
611def : SchedAlias<WriteIMul16Reg, Zn2WriteMul16>;
612
613// m16.
614def Zn2WriteMul16Ld : SchedWriteRes<[Zn2AGU, Zn2ALU1, Zn2Multiplier]> {
615  let Latency = 7;
616}
617def : SchedAlias<WriteIMul16Ld, Zn2WriteMul16Ld>;
618def : SchedAlias<WriteIMul16ImmLd, Zn2WriteMul16Ld>;
619def : SchedAlias<WriteIMul16RegLd, Zn2WriteMul16Ld>;
620
621// r32.
622def Zn2WriteMul32 : SchedWriteRes<[Zn2ALU1, Zn2Multiplier]> {
623  let Latency = 3;
624}
625def : SchedAlias<WriteIMul32, Zn2WriteMul32>;
626def : SchedAlias<WriteIMul32Imm, Zn2WriteMul32>;
627def : SchedAlias<WriteIMul32Reg, Zn2WriteMul32>;
628
629// m32.
630def Zn2WriteMul32Ld : SchedWriteRes<[Zn2AGU, Zn2ALU1, Zn2Multiplier]> {
631  let Latency = 7;
632}
633def : SchedAlias<WriteIMul32Ld, Zn2WriteMul32Ld>;
634def : SchedAlias<WriteIMul32ImmLd, Zn2WriteMul32Ld>;
635def : SchedAlias<WriteIMul32RegLd, Zn2WriteMul32Ld>;
636
637// r64.
638def Zn2WriteMul64 : SchedWriteRes<[Zn2ALU1, Zn2Multiplier]> {
639  let Latency = 4;
640  let NumMicroOps = 2;
641}
642def : SchedAlias<WriteIMul64, Zn2WriteMul64>;
643def : SchedAlias<WriteIMul64Imm, Zn2WriteMul64>;
644def : SchedAlias<WriteIMul64Reg, Zn2WriteMul64>;
645
646// m64.
647def Zn2WriteMul64Ld : SchedWriteRes<[Zn2AGU, Zn2ALU1, Zn2Multiplier]> {
648  let Latency = 8;
649  let NumMicroOps = 2;
650}
651def : SchedAlias<WriteIMul64Ld, Zn2WriteMul64Ld>;
652def : SchedAlias<WriteIMul64ImmLd, Zn2WriteMul64Ld>;
653def : SchedAlias<WriteIMul64RegLd, Zn2WriteMul64Ld>;
654
655// MULX.
656// r32,r32,r32.
657def Zn2WriteMulX32 : SchedWriteRes<[Zn2ALU1, Zn2Multiplier]> {
658  let Latency = 3;
659  let ResourceCycles = [1, 2];
660}
661def : InstRW<[Zn2WriteMulX32], (instrs MULX32rr)>;
662
663// r32,r32,m32.
664def Zn2WriteMulX32Ld : SchedWriteRes<[Zn2AGU, Zn2ALU1, Zn2Multiplier]> {
665  let Latency = 7;
666  let ResourceCycles = [1, 2, 2];
667}
668def : InstRW<[Zn2WriteMulX32Ld, ReadAfterLd], (instrs MULX32rm)>;
669
670// r64,r64,r64.
671def Zn2WriteMulX64 : SchedWriteRes<[Zn2ALU1]> {
672  let Latency = 3;
673}
674def : InstRW<[Zn2WriteMulX64], (instrs MULX64rr)>;
675
676// r64,r64,m64.
677def Zn2WriteMulX64Ld : SchedWriteRes<[Zn2AGU, Zn2ALU1, Zn2Multiplier]> {
678  let Latency = 7;
679}
680def : InstRW<[Zn2WriteMulX64Ld, ReadAfterLd], (instrs MULX64rm)>;
681
682//-- Control transfer instructions --//
683
684// J(E|R)CXZ.
685def Zn2WriteJCXZ : SchedWriteRes<[Zn2ALU03]>;
686def : InstRW<[Zn2WriteJCXZ], (instrs JCXZ, JECXZ, JRCXZ)>;
687
688// INTO
689def : InstRW<[WriteMicrocoded], (instrs INTO)>;
690
691// LOOP.
692def Zn2WriteLOOP : SchedWriteRes<[Zn2ALU03]>;
693def : InstRW<[Zn2WriteLOOP], (instrs LOOP)>;
694
695// LOOP(N)E, LOOP(N)Z
696def Zn2WriteLOOPE : SchedWriteRes<[Zn2ALU03]>;
697def : InstRW<[Zn2WriteLOOPE], (instrs LOOPE, LOOPNE)>;
698
699// CALL.
700// r.
701def Zn2WriteCALLr : SchedWriteRes<[Zn2AGU, Zn2ALU03]>;
702def : InstRW<[Zn2WriteCALLr], (instregex "CALL(16|32)r")>;
703
704def : InstRW<[WriteMicrocoded], (instregex "CALL(16|32)m")>;
705
706// RET.
707def Zn2WriteRET : SchedWriteRes<[Zn2ALU03]> {
708  let NumMicroOps = 2;
709}
710def : InstRW<[Zn2WriteRET], (instregex "RET(L|Q|W)", "LRET(L|Q|W)",
711                            "IRET(16|32|64)")>;
712
713//-- Logic instructions --//
714
715// AND OR XOR.
716// m,r/i.
717def : InstRW<[WriteALULd],
718             (instregex "(AND|OR|XOR)(8|16|32|64)m(r|i)",
719              "(AND|OR|XOR)(8|16|32|64)mi8", "(AND|OR|XOR)64mi32")>;
720
721// Define ALU latency variants
722def Zn2WriteALULat2 : SchedWriteRes<[Zn2ALU]> {
723  let Latency = 2;
724}
725def Zn2WriteALULat2Ld : SchedWriteRes<[Zn2AGU, Zn2ALU]> {
726  let Latency = 6;
727}
728
729// BT.
730// m,i.
731def : InstRW<[WriteShiftLd], (instregex "BT(16|32|64)mi8")>;
732
733// BTR BTS BTC.
734// r,r,i.
735def Zn2WriteBTRSC : SchedWriteRes<[Zn2ALU]> {
736  let Latency = 2;
737  let NumMicroOps = 2;
738}
739def : InstRW<[Zn2WriteBTRSC], (instregex "BT(R|S|C)(16|32|64)r(r|i8)")>;
740
741// m,r,i.
742def Zn2WriteBTRSCm : SchedWriteRes<[Zn2AGU, Zn2ALU]> {
743  let Latency = 6;
744  let NumMicroOps = 2;
745}
746// m,r,i.
747def : SchedAlias<WriteBitTestSetImmRMW, Zn2WriteBTRSCm>;
748def : SchedAlias<WriteBitTestSetRegRMW, Zn2WriteBTRSCm>;
749
750// BLSI BLSMSK BLSR.
751// r,r.
752def : SchedAlias<WriteBLS, Zn2WriteALULat2>;
753// r,m.
754def : SchedAlias<WriteBLSLd, Zn2WriteALULat2Ld>;
755
756// CLD STD.
757def : InstRW<[WriteALU], (instrs STD, CLD)>;
758
759// PDEP PEXT.
760// r,r,r.
761def : InstRW<[WriteMicrocoded], (instregex "PDEP(32|64)rr", "PEXT(32|64)rr")>;
762// r,r,m.
763def : InstRW<[WriteMicrocoded], (instregex "PDEP(32|64)rm", "PEXT(32|64)rm")>;
764
765// RCR RCL.
766// m,i.
767def : InstRW<[WriteMicrocoded], (instregex "RC(R|L)(8|16|32|64)m(1|i|CL)")>;
768
769// SHR SHL SAR.
770// m,i.
771def : InstRW<[WriteShiftLd], (instregex "S(A|H)(R|L)(8|16|32|64)m(i|1)")>;
772
773// SHRD SHLD.
774// m,r
775def : InstRW<[WriteShiftLd], (instregex "SH(R|L)D(16|32|64)mri8")>;
776
777// r,r,cl.
778def : InstRW<[WriteMicrocoded], (instregex "SH(R|L)D(16|32|64)rrCL")>;
779
780// m,r,cl.
781def : InstRW<[WriteMicrocoded], (instregex "SH(R|L)D(16|32|64)mrCL")>;
782
783//-- Misc instructions --//
784// CMPXCHG8B.
785def Zn2WriteCMPXCHG8B : SchedWriteRes<[Zn2AGU, Zn2ALU]> {
786  let NumMicroOps = 18;
787}
788def : InstRW<[Zn2WriteCMPXCHG8B], (instrs CMPXCHG8B)>;
789
790def : InstRW<[WriteMicrocoded], (instrs CMPXCHG16B)>;
791
792// LEAVE
793def Zn2WriteLEAVE : SchedWriteRes<[Zn2ALU, Zn2AGU]> {
794  let Latency = 8;
795  let NumMicroOps = 2;
796}
797def : InstRW<[Zn2WriteLEAVE], (instregex "LEAVE")>;
798
799// PAUSE.
800def : InstRW<[WriteMicrocoded], (instrs PAUSE)>;
801
802// RDTSC.
803def : InstRW<[WriteMicrocoded], (instregex "RDTSC")>;
804
805// RDPMC.
806def : InstRW<[WriteMicrocoded], (instrs RDPMC)>;
807
808// RDRAND.
809def : InstRW<[WriteMicrocoded], (instregex "RDRAND(16|32|64)r")>;
810
811// XGETBV.
812def : InstRW<[WriteMicrocoded], (instregex "XGETBV")>;
813
814//-- String instructions --//
815// CMPS.
816def : InstRW<[WriteMicrocoded], (instregex "CMPS(B|L|Q|W)")>;
817
818// LODSB/W.
819def : InstRW<[WriteMicrocoded], (instregex "LODS(B|W)")>;
820
821// LODSD/Q.
822def : InstRW<[WriteMicrocoded], (instregex "LODS(L|Q)")>;
823
824// MOVS.
825def : InstRW<[WriteMicrocoded], (instregex "MOVS(B|L|Q|W)")>;
826
827// SCAS.
828def : InstRW<[WriteMicrocoded], (instregex "SCAS(B|W|L|Q)")>;
829
830// STOS
831def : InstRW<[WriteMicrocoded], (instregex "STOS(B|L|Q|W)")>;
832
833// XADD.
834def Zn2XADD : SchedWriteRes<[Zn2ALU]>;
835def : InstRW<[Zn2XADD], (instregex "XADD(8|16|32|64)rr")>;
836def : InstRW<[WriteMicrocoded], (instregex "XADD(8|16|32|64)rm")>;
837
838//=== Floating Point x87 Instructions ===//
839//-- Move instructions --//
840
841def Zn2WriteFLDr : SchedWriteRes<[Zn2FPU13]> ;
842
843def Zn2WriteSTr: SchedWriteRes<[Zn2FPU23]> {
844  let Latency = 5;
845  let NumMicroOps = 2;
846}
847
848// LD_F.
849// r.
850def : InstRW<[Zn2WriteFLDr], (instregex "LD_Frr")>;
851
852// m.
853def Zn2WriteLD_F80m : SchedWriteRes<[Zn2AGU, Zn2FPU13]> {
854  let NumMicroOps = 2;
855}
856def : InstRW<[Zn2WriteLD_F80m], (instregex "LD_F80m")>;
857
858// FBLD.
859def : InstRW<[WriteMicrocoded], (instregex "FBLDm")>;
860
861// FST(P).
862// r.
863def : InstRW<[Zn2WriteSTr], (instregex "ST_(F|FP)rr")>;
864
865// m80.
866def Zn2WriteST_FP80m : SchedWriteRes<[Zn2AGU, Zn2FPU23]> {
867  let Latency = 5;
868}
869def : InstRW<[Zn2WriteST_FP80m], (instregex "ST_FP80m")>;
870
871// FBSTP.
872// m80.
873def : InstRW<[WriteMicrocoded], (instregex "FBSTPm")>;
874
875def Zn2WriteFXCH : SchedWriteRes<[Zn2FPU]>;
876
877// FXCHG.
878def : InstRW<[Zn2WriteFXCH], (instrs XCH_F)>;
879
880// FILD.
881def Zn2WriteFILD : SchedWriteRes<[Zn2AGU, Zn2FPU3]> {
882  let Latency = 11;
883  let NumMicroOps = 2;
884}
885def : InstRW<[Zn2WriteFILD], (instregex "ILD_F(16|32|64)m")>;
886
887// FIST(P) FISTTP.
888def Zn2WriteFIST : SchedWriteRes<[Zn2AGU, Zn2FPU23]> {
889  let Latency = 12;
890}
891def : InstRW<[Zn2WriteFIST], (instregex "IS(T|TT)_(F|FP)(16|32|64)m")>;
892
893def Zn2WriteFPU13 : SchedWriteRes<[Zn2AGU, Zn2FPU13]> {
894  let Latency = 8;
895}
896
897def Zn2WriteFPU3 : SchedWriteRes<[Zn2AGU, Zn2FPU3]> {
898  let Latency = 11;
899}
900
901// FLDZ.
902def : SchedAlias<WriteFLD0, Zn2WriteFPU13>;
903
904// FLD1.
905def : SchedAlias<WriteFLD1, Zn2WriteFPU3>;
906
907// FLDPI FLDL2E etc.
908def : SchedAlias<WriteFLDC, Zn2WriteFPU3>;
909
910// FNSTSW.
911// AX.
912def : InstRW<[WriteMicrocoded], (instrs FNSTSW16r)>;
913
914// m16.
915def : InstRW<[WriteMicrocoded], (instrs FNSTSWm)>;
916
917// FLDCW.
918def : InstRW<[WriteMicrocoded], (instrs FLDCW16m)>;
919
920// FNSTCW.
921def : InstRW<[WriteMicrocoded], (instrs FNSTCW16m)>;
922
923// FINCSTP FDECSTP.
924def : InstRW<[Zn2WriteFPU3], (instrs FINCSTP, FDECSTP)>;
925
926// FFREE.
927def : InstRW<[Zn2WriteFPU3], (instregex "FFREE")>;
928
929// FNSAVE.
930def : InstRW<[WriteMicrocoded], (instregex "FSAVEm")>;
931
932// FRSTOR.
933def : InstRW<[WriteMicrocoded], (instregex "FRSTORm")>;
934
935//-- Arithmetic instructions --//
936
937def Zn2WriteFPU3Lat1 : SchedWriteRes<[Zn2FPU3]> ;
938
939def Zn2WriteFPU0Lat1 : SchedWriteRes<[Zn2FPU0]> ;
940
941def Zn2WriteFPU0Lat1Ld : SchedWriteRes<[Zn2AGU, Zn2FPU0]> {
942  let Latency = 8;
943}
944
945// FCHS.
946def : InstRW<[Zn2WriteFPU3Lat1], (instregex "CHS_F")>;
947
948// FCOM(P) FUCOM(P).
949// r.
950def : InstRW<[Zn2WriteFPU0Lat1], (instregex "COM(P?)_FST0r", "UCOM_F(P?)r")>;
951// m.
952def : InstRW<[Zn2WriteFPU0Lat1Ld], (instregex "FCOM(P?)(32|64)m")>;
953
954// FCOMPP FUCOMPP.
955// r.
956def : InstRW<[Zn2WriteFPU0Lat1], (instrs FCOMPP, UCOM_FPPr)>;
957
958def Zn2WriteFPU02 : SchedWriteRes<[Zn2AGU, Zn2FPU02]>
959{
960  let Latency = 9;
961}
962
963// FCOMI(P) FUCOMI(P).
964// m.
965def : InstRW<[Zn2WriteFPU02], (instrs COM_FIPr, COM_FIr, UCOM_FIPr, UCOM_FIr)>;
966
967def Zn2WriteFPU03 : SchedWriteRes<[Zn2AGU, Zn2FPU03]>
968{
969  let Latency = 12;
970  let NumMicroOps = 2;
971  let ResourceCycles = [1,3];
972}
973
974// FICOM(P).
975def : InstRW<[Zn2WriteFPU03], (instregex "FICOM(P?)(16|32)m")>;
976
977// FTST.
978def : InstRW<[Zn2WriteFPU0Lat1], (instregex "TST_F")>;
979
980// FXAM.
981def : InstRW<[Zn2WriteFPU3Lat1], (instrs FXAM)>;
982
983// FPREM.
984def : InstRW<[WriteMicrocoded], (instrs FPREM)>;
985
986// FPREM1.
987def : InstRW<[WriteMicrocoded], (instrs FPREM1)>;
988
989// FRNDINT.
990def : InstRW<[WriteMicrocoded], (instrs FRNDINT)>;
991
992// FSCALE.
993def : InstRW<[WriteMicrocoded], (instrs FSCALE)>;
994
995// FXTRACT.
996def : InstRW<[WriteMicrocoded], (instrs FXTRACT)>;
997
998// FNOP.
999def : InstRW<[Zn2WriteFPU0Lat1], (instrs FNOP)>;
1000
1001// WAIT.
1002def : InstRW<[Zn2WriteFPU0Lat1], (instrs WAIT)>;
1003
1004// FNCLEX.
1005def : InstRW<[WriteMicrocoded], (instrs FNCLEX)>;
1006
1007// FNINIT.
1008def : InstRW<[WriteMicrocoded], (instrs FNINIT)>;
1009
1010//=== Integer MMX and XMM Instructions ===//
1011
1012// PACKSSWB/DW.
1013// mm <- mm.
1014def Zn2WriteFPU12 : SchedWriteRes<[Zn2FPU12]> ;
1015def Zn2WriteFPU12Y : SchedWriteRes<[Zn2FPU12]> {
1016  let Latency = 4;
1017  let NumMicroOps = 2;
1018}
1019def Zn2WriteFPU12m : SchedWriteRes<[Zn2AGU, Zn2FPU12]> ;
1020def Zn2WriteFPU12Ym : SchedWriteRes<[Zn2AGU, Zn2FPU12]> {
1021  let Latency = 8;
1022  let NumMicroOps = 2;
1023}
1024
1025def : InstRW<[Zn2WriteFPU12], (instrs MMX_PACKSSDWirr,
1026                                     MMX_PACKSSWBirr,
1027                                     MMX_PACKUSWBirr)>;
1028def : InstRW<[Zn2WriteFPU12m], (instrs MMX_PACKSSDWirm,
1029                                      MMX_PACKSSWBirm,
1030                                      MMX_PACKUSWBirm)>;
1031
1032// VPMOVSX/ZX BW BD BQ WD WQ DQ.
1033// y <- x.
1034def : InstRW<[Zn2WriteFPU12Y], (instregex "VPMOV(SX|ZX)(BW|BD|BQ|WD|WQ|DQ)Yrr")>;
1035def : InstRW<[Zn2WriteFPU12Ym], (instregex "VPMOV(SX|ZX)(BW|BD|BQ|WD|WQ|DQ)Yrm")>;
1036
1037def Zn2WriteFPU013 : SchedWriteRes<[Zn2FPU013]> ;
1038def Zn2WriteFPU013Y : SchedWriteRes<[Zn2FPU013]> ;
1039def Zn2WriteFPU013m : SchedWriteRes<[Zn2AGU, Zn2FPU013]> {
1040  let Latency = 8;
1041  let NumMicroOps = 2;
1042}
1043def Zn2WriteFPU013Ld : SchedWriteRes<[Zn2AGU, Zn2FPU013]> {
1044  let Latency = 8;
1045  let NumMicroOps = 2;
1046}
1047def Zn2WriteFPU013LdY : SchedWriteRes<[Zn2AGU, Zn2FPU013]> {
1048  let Latency = 8;
1049  let NumMicroOps = 2;
1050}
1051
1052// PBLENDW.
1053// x,x,i / v,v,v,i
1054def : InstRW<[Zn2WriteFPU013], (instregex "(V?)PBLENDWrri")>;
1055// ymm
1056def : InstRW<[Zn2WriteFPU013Y], (instrs VPBLENDWYrri)>;
1057
1058// x,m,i / v,v,m,i
1059def : InstRW<[Zn2WriteFPU013Ld], (instregex "(V?)PBLENDWrmi")>;
1060// y,m,i
1061def : InstRW<[Zn2WriteFPU013LdY], (instrs VPBLENDWYrmi)>;
1062
1063def Zn2WriteFPU01 : SchedWriteRes<[Zn2FPU01]> ;
1064def Zn2WriteFPU01Y : SchedWriteRes<[Zn2FPU01]> {
1065  let NumMicroOps = 2;
1066}
1067
1068// VPBLENDD.
1069// v,v,v,i.
1070def : InstRW<[Zn2WriteFPU01], (instrs VPBLENDDrri)>;
1071// ymm
1072def : InstRW<[Zn2WriteFPU01Y], (instrs VPBLENDDYrri)>;
1073
1074// v,v,m,i
1075def Zn2WriteFPU01Op2 : SchedWriteRes<[Zn2AGU, Zn2FPU01]> {
1076  let NumMicroOps = 2;
1077  let Latency = 8;
1078  let ResourceCycles = [1, 2];
1079}
1080def Zn2WriteFPU01Op2Y : SchedWriteRes<[Zn2AGU, Zn2FPU01]> {
1081  let NumMicroOps = 2;
1082  let Latency = 9;
1083  let ResourceCycles = [1, 3];
1084}
1085def : InstRW<[Zn2WriteFPU01Op2], (instrs VPBLENDDrmi)>;
1086def : InstRW<[Zn2WriteFPU01Op2Y], (instrs VPBLENDDYrmi)>;
1087
1088// MASKMOVQ.
1089def : InstRW<[WriteMicrocoded], (instregex "MMX_MASKMOVQ(64)?")>;
1090
1091// MASKMOVDQU.
1092def : InstRW<[WriteMicrocoded], (instregex "(V?)MASKMOVDQU(64)?")>;
1093
1094// VPMASKMOVD.
1095// ymm
1096def : InstRW<[WriteMicrocoded],
1097                               (instregex "VPMASKMOVD(Y?)rm")>;
1098// m, v,v.
1099def : InstRW<[WriteMicrocoded], (instregex "VPMASKMOV(D|Q)(Y?)mr")>;
1100
1101// VPBROADCAST B/W.
1102// x, m8/16.
1103def Zn2WriteVPBROADCAST128Ld : SchedWriteRes<[Zn2AGU, Zn2FPU12]> {
1104  let Latency = 8;
1105  let NumMicroOps = 2;
1106  let ResourceCycles = [1, 2];
1107}
1108def : InstRW<[Zn2WriteVPBROADCAST128Ld],
1109                                     (instregex "VPBROADCAST(B|W)rm")>;
1110
1111// y, m8/16
1112def Zn2WriteVPBROADCAST256Ld : SchedWriteRes<[Zn2AGU, Zn2FPU1]> {
1113  let Latency = 8;
1114  let NumMicroOps = 2;
1115  let ResourceCycles = [1, 2];
1116}
1117def : InstRW<[Zn2WriteVPBROADCAST256Ld],
1118                                     (instregex "VPBROADCAST(B|W)Yrm")>;
1119
1120// VPGATHER.
1121def : InstRW<[WriteMicrocoded], (instregex "VPGATHER(Q|D)(Q|D)(Y?)rm")>;
1122
1123//-- Arithmetic instructions --//
1124
1125// PCMPGTQ.
1126def Zn2WritePCMPGTQr : SchedWriteRes<[Zn2FPU03]>;
1127def : InstRW<[Zn2WritePCMPGTQr], (instregex "(V?)PCMPGTQ(Y?)rr")>;
1128
1129// x <- x,m.
1130def Zn2WritePCMPGTQm : SchedWriteRes<[Zn2AGU, Zn2FPU03]> {
1131  let Latency = 8;
1132}
1133// ymm.
1134def Zn2WritePCMPGTQYm : SchedWriteRes<[Zn2AGU, Zn2FPU03]> {
1135  let Latency = 8;
1136}
1137def : InstRW<[Zn2WritePCMPGTQm], (instregex "(V?)PCMPGTQrm")>;
1138def : InstRW<[Zn2WritePCMPGTQYm], (instrs VPCMPGTQYrm)>;
1139
1140//-- Logic instructions --//
1141
1142// PSLL,PSRL,PSRA W/D/Q.
1143// x,x / v,v,x.
1144def Zn2WritePShift  : SchedWriteRes<[Zn2FPU2]> {
1145  let Latency = 3;
1146}
1147def Zn2WritePShiftY : SchedWriteRes<[Zn2FPU2]> {
1148  let Latency = 3;
1149}
1150
1151// PSLL,PSRL DQ.
1152def : InstRW<[Zn2WritePShift], (instregex "(V?)PS(R|L)LDQri")>;
1153def : InstRW<[Zn2WritePShiftY], (instregex "(V?)PS(R|L)LDQYri")>;
1154
1155//=== Floating Point XMM and YMM Instructions ===//
1156//-- Move instructions --//
1157
1158// VPERM2F128.
1159def : InstRW<[WriteMicrocoded], (instrs VPERM2F128rr)>;
1160def : InstRW<[WriteMicrocoded], (instrs VPERM2F128rm)>;
1161
1162def Zn2WriteBROADCAST : SchedWriteRes<[Zn2AGU, Zn2FPU13]> {
1163  let NumMicroOps = 2;
1164  let Latency = 8;
1165}
1166// VBROADCASTF128.
1167def : InstRW<[Zn2WriteBROADCAST], (instrs VBROADCASTF128)>;
1168
1169// EXTRACTPS.
1170// r32,x,i.
1171def Zn2WriteEXTRACTPSr : SchedWriteRes<[Zn2FPU12, Zn2FPU2]> {
1172  let Latency = 2;
1173  let ResourceCycles = [1, 2];
1174}
1175def : InstRW<[Zn2WriteEXTRACTPSr], (instregex "(V?)EXTRACTPSrr")>;
1176
1177def Zn2WriteEXTRACTPSm : SchedWriteRes<[Zn2AGU,Zn2FPU12, Zn2FPU2]> {
1178  let Latency = 5;
1179  let NumMicroOps = 2;
1180  let ResourceCycles = [5, 1, 2];
1181}
1182// m32,x,i.
1183def : InstRW<[Zn2WriteEXTRACTPSm], (instregex "(V?)EXTRACTPSmr")>;
1184
1185// VEXTRACTF128.
1186// x,y,i.
1187def : InstRW<[Zn2WriteFPU013], (instrs VEXTRACTF128rr)>;
1188
1189// m128,y,i.
1190def : InstRW<[Zn2WriteFPU013m], (instrs VEXTRACTF128mr)>;
1191
1192def Zn2WriteVINSERT128r: SchedWriteRes<[Zn2FPU013]> {
1193  let Latency = 2;
1194//  let ResourceCycles = [2];
1195}
1196def Zn2WriteVINSERT128Ld: SchedWriteRes<[Zn2AGU,Zn2FPU013]> {
1197  let Latency = 9;
1198  let NumMicroOps = 2;
1199}
1200// VINSERTF128.
1201// y,y,x,i.
1202def : InstRW<[Zn2WriteVINSERT128r], (instrs VINSERTF128rr)>;
1203def : InstRW<[Zn2WriteVINSERT128Ld], (instrs VINSERTF128rm)>;
1204
1205// VGATHER.
1206def : InstRW<[WriteMicrocoded], (instregex "VGATHER(Q|D)(PD|PS)(Y?)rm")>;
1207
1208//-- Conversion instructions --//
1209def Zn2WriteCVTPD2PSr: SchedWriteRes<[Zn2FPU3]> {
1210  let Latency = 3;
1211}
1212def Zn2WriteCVTPD2PSYr: SchedWriteRes<[Zn2FPU3]> {
1213  let Latency = 3;
1214}
1215
1216// CVTPD2PS.
1217// x,x.
1218def : SchedAlias<WriteCvtPD2PS,  Zn2WriteCVTPD2PSr>;
1219// y,y.
1220def : SchedAlias<WriteCvtPD2PSY, Zn2WriteCVTPD2PSYr>;
1221// z,z.
1222defm : X86WriteResUnsupported<WriteCvtPD2PSZ>;
1223
1224def Zn2WriteCVTPD2PSLd: SchedWriteRes<[Zn2AGU,Zn2FPU03]> {
1225  let Latency = 10;
1226  let NumMicroOps = 2;
1227}
1228// x,m128.
1229def : SchedAlias<WriteCvtPD2PSLd, Zn2WriteCVTPD2PSLd>;
1230
1231// x,m256.
1232def Zn2WriteCVTPD2PSYLd : SchedWriteRes<[Zn2AGU, Zn2FPU3]> {
1233  let Latency = 10;
1234}
1235def : SchedAlias<WriteCvtPD2PSYLd, Zn2WriteCVTPD2PSYLd>;
1236// z,m512
1237defm : X86WriteResUnsupported<WriteCvtPD2PSZLd>;
1238
1239// CVTSD2SS.
1240// x,x.
1241// Same as WriteCVTPD2PSr
1242def : SchedAlias<WriteCvtSD2SS, Zn2WriteCVTPD2PSr>;
1243
1244// x,m64.
1245def : SchedAlias<WriteCvtSD2SSLd, Zn2WriteCVTPD2PSLd>;
1246
1247// CVTPS2PD.
1248// x,x.
1249def Zn2WriteCVTPS2PDr : SchedWriteRes<[Zn2FPU3]> {
1250  let Latency = 3;
1251}
1252def : SchedAlias<WriteCvtPS2PD, Zn2WriteCVTPS2PDr>;
1253
1254// x,m64.
1255// y,m128.
1256def Zn2WriteCVTPS2PDLd : SchedWriteRes<[Zn2AGU, Zn2FPU3]> {
1257  let Latency = 10;
1258  let NumMicroOps = 2;
1259}
1260def : SchedAlias<WriteCvtPS2PDLd, Zn2WriteCVTPS2PDLd>;
1261def : SchedAlias<WriteCvtPS2PDYLd, Zn2WriteCVTPS2PDLd>;
1262defm : X86WriteResUnsupported<WriteCvtPS2PDZLd>;
1263
1264// y,x.
1265def Zn2WriteVCVTPS2PDY : SchedWriteRes<[Zn2FPU3]> {
1266  let Latency = 3;
1267}
1268def : SchedAlias<WriteCvtPS2PDY, Zn2WriteVCVTPS2PDY>;
1269defm : X86WriteResUnsupported<WriteCvtPS2PDZ>;
1270
1271// CVTSS2SD.
1272// x,x.
1273def Zn2WriteCVTSS2SDr : SchedWriteRes<[Zn2FPU3]> {
1274  let Latency = 3;
1275}
1276def : SchedAlias<WriteCvtSS2SD, Zn2WriteCVTSS2SDr>;
1277
1278// x,m32.
1279def Zn2WriteCVTSS2SDLd : SchedWriteRes<[Zn2AGU, Zn2FPU3]> {
1280  let Latency = 10;
1281  let NumMicroOps = 2;
1282  let ResourceCycles = [1, 2];
1283}
1284def : SchedAlias<WriteCvtSS2SDLd, Zn2WriteCVTSS2SDLd>;
1285
1286def Zn2WriteCVTDQ2PDr: SchedWriteRes<[Zn2FPU12,Zn2FPU3]> {
1287  let Latency = 3;
1288}
1289// CVTDQ2PD.
1290// x,x.
1291def : InstRW<[Zn2WriteCVTDQ2PDr], (instregex "(V)?CVTDQ2P(D|S)rr")>;
1292
1293// Same as xmm
1294// y,x.
1295def : InstRW<[Zn2WriteCVTDQ2PDr], (instrs VCVTDQ2PDYrr)>;
1296def : InstRW<[Zn2WriteCVTDQ2PDr], (instrs VCVTDQ2PSYrr)>;
1297
1298def Zn2WriteCVTPD2DQr: SchedWriteRes<[Zn2FPU12, Zn2FPU3]> {
1299  let Latency = 3;
1300}
1301// CVT(T)P(D|S)2DQ.
1302// x,x.
1303def : InstRW<[Zn2WriteCVTPD2DQr], (instregex "(V?)CVT(T?)P(D|S)2DQrr")>;
1304
1305def Zn2WriteCVTPD2DQLd: SchedWriteRes<[Zn2AGU,Zn2FPU12,Zn2FPU3]> {
1306  let Latency = 10;
1307  let NumMicroOps = 2;
1308}
1309// x,m128.
1310def : InstRW<[Zn2WriteCVTPD2DQLd], (instregex "(V?)CVT(T?)PD2DQrm")>;
1311// same as xmm handling
1312// x,y.
1313def : InstRW<[Zn2WriteCVTPD2DQr], (instregex "VCVT(T?)PD2DQYrr")>;
1314// x,m256.
1315def : InstRW<[Zn2WriteCVTPD2DQLd], (instregex "VCVT(T?)PD2DQYrm")>;
1316
1317def Zn2WriteCVTPS2PIr: SchedWriteRes<[Zn2FPU3]> {
1318  let Latency = 4;
1319}
1320// CVT(T)PS2PI.
1321// mm,x.
1322def : InstRW<[Zn2WriteCVTPS2PIr], (instregex "MMX_CVT(T?)PS2PIirr")>;
1323
1324// CVTPI2PD.
1325// x,mm.
1326def : InstRW<[Zn2WriteCVTPS2PDr], (instrs MMX_CVTPI2PDirr)>;
1327
1328// CVT(T)PD2PI.
1329// mm,x.
1330def : InstRW<[Zn2WriteCVTPS2PIr], (instregex "MMX_CVT(T?)PD2PIirr")>;
1331
1332def Zn2WriteCVSTSI2SSr: SchedWriteRes<[Zn2FPU3]> {
1333  let Latency = 3;
1334}
1335
1336// same as CVTPD2DQr
1337// CVT(T)SS2SI.
1338// r32,x.
1339def : InstRW<[Zn2WriteCVTPD2DQr], (instregex "(V?)CVT(T?)SS2SI(64)?rr")>;
1340// same as CVTPD2DQm
1341// r32,m32.
1342def : InstRW<[Zn2WriteCVTPD2DQLd], (instregex "(V?)CVT(T?)SS2SI(64)?rm")>;
1343
1344def Zn2WriteCVSTSI2SDr: SchedWriteRes<[Zn2FPU013, Zn2FPU3]> {
1345  let Latency = 3;
1346}
1347// CVTSI2SD.
1348// x,r32/64.
1349def : InstRW<[Zn2WriteCVSTSI2SDr], (instregex "(V?)CVTSI(64)?2SDrr")>;
1350
1351
1352def Zn2WriteCVSTSI2SIr: SchedWriteRes<[Zn2FPU3, Zn2FPU2]> {
1353  let Latency = 4;
1354}
1355def Zn2WriteCVSTSI2SILd: SchedWriteRes<[Zn2AGU, Zn2FPU3, Zn2FPU2]> {
1356  let Latency = 11;
1357}
1358// CVTSD2SI.
1359// r32/64
1360def : InstRW<[Zn2WriteCVSTSI2SIr], (instregex "(V?)CVT(T?)SD2SI(64)?rr")>;
1361// r32,m32.
1362def : InstRW<[Zn2WriteCVSTSI2SILd], (instregex "(V?)CVT(T?)SD2SI(64)?rm")>;
1363
1364// VCVTPS2PH.
1365// x,v,i.
1366def : SchedAlias<WriteCvtPS2PH,    Zn2WriteMicrocoded>;
1367def : SchedAlias<WriteCvtPS2PHY,   Zn2WriteMicrocoded>;
1368defm : X86WriteResUnsupported<WriteCvtPS2PHZ>;
1369// m,v,i.
1370def : SchedAlias<WriteCvtPS2PHSt,  Zn2WriteMicrocoded>;
1371def : SchedAlias<WriteCvtPS2PHYSt, Zn2WriteMicrocoded>;
1372defm : X86WriteResUnsupported<WriteCvtPS2PHZSt>;
1373
1374// VCVTPH2PS.
1375// v,x.
1376def : SchedAlias<WriteCvtPH2PS,    Zn2WriteMicrocoded>;
1377def : SchedAlias<WriteCvtPH2PSY,   Zn2WriteMicrocoded>;
1378defm : X86WriteResUnsupported<WriteCvtPH2PSZ>;
1379// v,m.
1380def : SchedAlias<WriteCvtPH2PSLd,  Zn2WriteMicrocoded>;
1381def : SchedAlias<WriteCvtPH2PSYLd, Zn2WriteMicrocoded>;
1382defm : X86WriteResUnsupported<WriteCvtPH2PSZLd>;
1383
1384//-- SSE4A instructions --//
1385// EXTRQ
1386def Zn2WriteEXTRQ: SchedWriteRes<[Zn2FPU12, Zn2FPU2]> {
1387  let Latency = 3;
1388}
1389def : InstRW<[Zn2WriteEXTRQ], (instregex "EXTRQ")>;
1390
1391// INSERTQ
1392def Zn2WriteINSERTQ: SchedWriteRes<[Zn2FPU03,Zn2FPU1]> {
1393  let Latency = 4;
1394}
1395def : InstRW<[Zn2WriteINSERTQ], (instregex "INSERTQ")>;
1396
1397//-- SHA instructions --//
1398// SHA256MSG2
1399def : InstRW<[WriteMicrocoded], (instregex "SHA256MSG2(Y?)r(r|m)")>;
1400
1401// SHA1MSG1, SHA256MSG1
1402// x,x.
1403def Zn2WriteSHA1MSG1r : SchedWriteRes<[Zn2FPU12]> {
1404  let Latency = 2;
1405}
1406def : InstRW<[Zn2WriteSHA1MSG1r], (instregex "SHA(1|256)MSG1rr")>;
1407// x,m.
1408def Zn2WriteSHA1MSG1Ld : SchedWriteRes<[Zn2AGU, Zn2FPU12]> {
1409  let Latency = 9;
1410}
1411def : InstRW<[Zn2WriteSHA1MSG1Ld], (instregex "SHA(1|256)MSG1rm")>;
1412
1413// SHA1MSG2
1414// x,x.
1415def Zn2WriteSHA1MSG2r : SchedWriteRes<[Zn2FPU12]> ;
1416def : InstRW<[Zn2WriteSHA1MSG2r], (instregex "SHA1MSG2rr")>;
1417// x,m.
1418def Zn2WriteSHA1MSG2Ld : SchedWriteRes<[Zn2AGU, Zn2FPU12]> {
1419  let Latency = 8;
1420}
1421def : InstRW<[Zn2WriteSHA1MSG2Ld], (instregex "SHA1MSG2rm")>;
1422
1423// SHA1NEXTE
1424// x,x.
1425def Zn2WriteSHA1NEXTEr : SchedWriteRes<[Zn2FPU1]> ;
1426def : InstRW<[Zn2WriteSHA1NEXTEr], (instregex "SHA1NEXTErr")>;
1427// x,m.
1428def Zn2WriteSHA1NEXTELd : SchedWriteRes<[Zn2AGU, Zn2FPU1]> {
1429  let Latency = 8;
1430}
1431def : InstRW<[Zn2WriteSHA1NEXTELd], (instregex "SHA1NEXTErm")>;
1432
1433// SHA1RNDS4
1434// x,x.
1435def Zn2WriteSHA1RNDS4r : SchedWriteRes<[Zn2FPU1]> {
1436  let Latency = 6;
1437}
1438def : InstRW<[Zn2WriteSHA1RNDS4r], (instregex "SHA1RNDS4rr")>;
1439// x,m.
1440def Zn2WriteSHA1RNDS4Ld : SchedWriteRes<[Zn2AGU, Zn2FPU1]> {
1441  let Latency = 13;
1442}
1443def : InstRW<[Zn2WriteSHA1RNDS4Ld], (instregex "SHA1RNDS4rm")>;
1444
1445// SHA256RNDS2
1446// x,x.
1447def Zn2WriteSHA256RNDS2r : SchedWriteRes<[Zn2FPU1]> {
1448  let Latency = 4;
1449}
1450def : InstRW<[Zn2WriteSHA256RNDS2r], (instregex "SHA256RNDS2rr")>;
1451// x,m.
1452def Zn2WriteSHA256RNDS2Ld : SchedWriteRes<[Zn2AGU, Zn2FPU1]> {
1453  let Latency = 11;
1454}
1455def : InstRW<[Zn2WriteSHA256RNDS2Ld], (instregex "SHA256RNDS2rm")>;
1456
1457//-- Arithmetic instructions --//
1458
1459// VDIVPS.
1460// TODO - convert to Zn2WriteResFpuPair
1461// y,y,y.
1462def Zn2WriteVDIVPSYr : SchedWriteRes<[Zn2FPU3]> {
1463  let Latency = 10;
1464  let ResourceCycles = [10];
1465}
1466def : SchedAlias<WriteFDivY,   Zn2WriteVDIVPSYr>;
1467
1468// y,y,m256.
1469def Zn2WriteVDIVPSYLd : SchedWriteRes<[Zn2AGU, Zn2FPU3]> {
1470  let Latency = 17;
1471  let NumMicroOps = 2;
1472  let ResourceCycles = [1, 17];
1473}
1474def : SchedAlias<WriteFDivYLd,  Zn2WriteVDIVPSYLd>;
1475
1476// VDIVPD.
1477// TODO - convert to Zn2WriteResFpuPair
1478// y,y,y.
1479def Zn2WriteVDIVPDY : SchedWriteRes<[Zn2FPU3]> {
1480  let Latency = 13;
1481  let ResourceCycles = [13];
1482}
1483def : SchedAlias<WriteFDiv64Y, Zn2WriteVDIVPDY>;
1484
1485// y,y,m256.
1486def Zn2WriteVDIVPDYLd : SchedWriteRes<[Zn2AGU, Zn2FPU3]> {
1487  let Latency = 20;
1488  let NumMicroOps = 2;
1489  let ResourceCycles = [1,20];
1490}
1491def : SchedAlias<WriteFDiv64YLd, Zn2WriteVDIVPDYLd>;
1492
1493// DPPS.
1494// x,x,i / v,v,v,i.
1495def : SchedAlias<WriteDPPSY,  Zn2WriteMicrocoded>;
1496
1497// x,m,i / v,v,m,i.
1498def : SchedAlias<WriteDPPSYLd,Zn2WriteMicrocoded>;
1499
1500// DPPD.
1501// x,x,i.
1502def : SchedAlias<WriteDPPD,   Zn2WriteMicrocoded>;
1503
1504// x,m,i.
1505def : SchedAlias<WriteDPPDLd, Zn2WriteMicrocoded>;
1506
1507// RSQRTSS
1508// TODO - convert to Zn2WriteResFpuPair
1509// x,x.
1510def Zn2WriteRSQRTSSr : SchedWriteRes<[Zn2FPU02]> {
1511  let Latency = 5;
1512}
1513def : SchedAlias<WriteFRsqrt, Zn2WriteRSQRTSSr>;
1514
1515// x,m128.
1516def Zn2WriteRSQRTSSLd: SchedWriteRes<[Zn2AGU, Zn2FPU02]> {
1517  let Latency = 12;
1518  let NumMicroOps = 2;
1519  let ResourceCycles = [1,2];
1520}
1521def : SchedAlias<WriteFRsqrtLd, Zn2WriteRSQRTSSLd>;
1522
1523// RSQRTPS
1524// TODO - convert to Zn2WriteResFpuPair
1525// y,y.
1526def Zn2WriteRSQRTPSYr : SchedWriteRes<[Zn2FPU01]> {
1527  let Latency = 5;
1528  let NumMicroOps = 2;
1529  let ResourceCycles = [2];
1530}
1531def : SchedAlias<WriteFRsqrtY, Zn2WriteRSQRTPSYr>;
1532
1533// y,m256.
1534def Zn2WriteRSQRTPSYLd : SchedWriteRes<[Zn2AGU, Zn2FPU01]> {
1535  let Latency = 12;
1536  let NumMicroOps = 2;
1537}
1538def : SchedAlias<WriteFRsqrtYLd, Zn2WriteRSQRTPSYLd>;
1539
1540//-- Other instructions --//
1541
1542// VZEROUPPER.
1543def : InstRW<[WriteALU], (instrs VZEROUPPER)>;
1544
1545// VZEROALL.
1546def : InstRW<[WriteMicrocoded], (instrs VZEROALL)>;
1547
1548} // SchedModel
1549