xref: /freebsd/contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleZnver2.td (revision f126d349810fdb512c0b01e101342d430b947488)
1//=- X86ScheduleZnver2.td - X86 Znver2 Scheduling -------------*- tablegen -*-=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the machine model for Znver2 to support instruction
10// scheduling and other instruction cost heuristics.
11//
12//===----------------------------------------------------------------------===//
13
14def Znver2Model : SchedMachineModel {
15  // Zen can decode 4 instructions per cycle.
16  let IssueWidth = 4;
17  // Based on the reorder buffer we define MicroOpBufferSize
18  let MicroOpBufferSize = 224;
19  let LoadLatency = 4;
20  let MispredictPenalty = 17;
21  let HighLatency = 25;
22  let PostRAScheduler = 1;
23
24  // FIXME: This variable is required for incomplete model.
25  // We haven't catered all instructions.
26  // So, we reset the value of this variable so as to
27  // say that the model is incomplete.
28  let CompleteModel = 0;
29}
30
31let SchedModel = Znver2Model in {
32
33// Zen can issue micro-ops to 10 different units in one cycle.
34// These are
35//  * Four integer ALU units (ZALU0, ZALU1, ZALU2, ZALU3)
36//  * Three AGU units (ZAGU0, ZAGU1, ZAGU2)
37//  * Four FPU units (ZFPU0, ZFPU1, ZFPU2, ZFPU3)
38// AGUs feed load store queues @two loads and 1 store per cycle.
39
40// Four ALU units are defined below
41def Zn2ALU0 : ProcResource<1>;
42def Zn2ALU1 : ProcResource<1>;
43def Zn2ALU2 : ProcResource<1>;
44def Zn2ALU3 : ProcResource<1>;
45
46// Three AGU units are defined below
47def Zn2AGU0 : ProcResource<1>;
48def Zn2AGU1 : ProcResource<1>;
49def Zn2AGU2 : ProcResource<1>;
50
51// Four FPU units are defined below
52def Zn2FPU0 : ProcResource<1>;
53def Zn2FPU1 : ProcResource<1>;
54def Zn2FPU2 : ProcResource<1>;
55def Zn2FPU3 : ProcResource<1>;
56
57// FPU grouping
58def Zn2FPU013  : ProcResGroup<[Zn2FPU0, Zn2FPU1, Zn2FPU3]>;
59def Zn2FPU01   : ProcResGroup<[Zn2FPU0, Zn2FPU1]>;
60def Zn2FPU12   : ProcResGroup<[Zn2FPU1, Zn2FPU2]>;
61def Zn2FPU13   : ProcResGroup<[Zn2FPU1, Zn2FPU3]>;
62def Zn2FPU23   : ProcResGroup<[Zn2FPU2, Zn2FPU3]>;
63def Zn2FPU02   : ProcResGroup<[Zn2FPU0, Zn2FPU2]>;
64def Zn2FPU03   : ProcResGroup<[Zn2FPU0, Zn2FPU3]>;
65
66// Below are the grouping of the units.
67// Micro-ops to be issued to multiple units are tackled this way.
68
69// ALU grouping
70// Zn2ALU03 - 0,3 grouping
71def Zn2ALU03: ProcResGroup<[Zn2ALU0, Zn2ALU3]>;
72
73// 64 Entry (16x4 entries) Int Scheduler
74def Zn2ALU : ProcResGroup<[Zn2ALU0, Zn2ALU1, Zn2ALU2, Zn2ALU3]> {
75  let BufferSize=64;
76}
77
78// 28 Entry (14x2) AGU group. AGUs can't be used for all ALU operations
79// but are relevant for some instructions
80def Zn2AGU : ProcResGroup<[Zn2AGU0, Zn2AGU1, Zn2AGU2]> {
81  let BufferSize=28;
82}
83
84// Integer Multiplication issued on ALU1.
85def Zn2Multiplier : ProcResource<1>;
86
87// Integer division issued on ALU2.
88def Zn2Divider : ProcResource<1>;
89
90// 4 Cycles load-to use Latency is captured
91def : ReadAdvance<ReadAfterLd, 4>;
92
93// 7 Cycles vector load-to use Latency is captured
94def : ReadAdvance<ReadAfterVecLd, 7>;
95def : ReadAdvance<ReadAfterVecXLd, 7>;
96def : ReadAdvance<ReadAfterVecYLd, 7>;
97
98def : ReadAdvance<ReadInt2Fpu, 0>;
99
100// The Integer PRF for Zen is 168 entries, and it holds the architectural and
101// speculative version of the 64-bit integer registers.
102// Reference: "Software Optimization Guide for AMD Family 17h Processors"
103def Zn2IntegerPRF : RegisterFile<168, [GR64, CCR]>;
104
105// 36 Entry (9x4 entries) floating-point Scheduler
106def Zn2FPU     : ProcResGroup<[Zn2FPU0, Zn2FPU1, Zn2FPU2, Zn2FPU3]> {
107  let BufferSize=36;
108}
109
110// The Zen FP Retire Queue renames SIMD and FP uOps onto a pool of 160 128-bit
111// registers. Operations on 256-bit data types are cracked into two COPs.
112// Reference: "Software Optimization Guide for AMD Family 17h Processors"
113def Zn2FpuPRF: RegisterFile<160, [VR64, VR128, VR256], [1, 1, 2]>;
114
115// The unit can track up to 192 macro ops in-flight.
116// The retire unit handles in-order commit of up to 8 macro ops per cycle.
117// Reference: "Software Optimization Guide for AMD Family 17h Processors"
118// To be noted, the retire unit is shared between integer and FP ops.
119// In SMT mode it is 96 entry per thread. But, we do not use the conservative
120// value here because there is currently no way to fully mode the SMT mode,
121// so there is no point in trying.
122def Zn2RCU : RetireControlUnit<192, 8>;
123
124// (a folded load is an instruction that loads and does some operation)
125// Ex: ADDPD xmm,[mem]-> This instruction has two micro-ops
126// Instructions with folded loads are usually micro-fused, so they only appear
127// as two micro-ops.
128//      a. load and
129//      b. addpd
130// This multiclass is for folded loads for integer units.
131multiclass Zn2WriteResPair<X86FoldableSchedWrite SchedRW,
132                          list<ProcResourceKind> ExePorts,
133                          int Lat, list<int> Res = [], int UOps = 1,
134                          int LoadLat = 4, int LoadUOps = 1> {
135  // Register variant takes 1-cycle on Execution Port.
136  def : WriteRes<SchedRW, ExePorts> {
137    let Latency = Lat;
138    let ResourceCycles = Res;
139    let NumMicroOps = UOps;
140  }
141
142  // Memory variant also uses a cycle on Zn2AGU
143  // adds LoadLat cycles to the latency (default = 4).
144  def : WriteRes<SchedRW.Folded, !listconcat([Zn2AGU], ExePorts)> {
145    let Latency = !add(Lat, LoadLat);
146    let ResourceCycles = !if(!empty(Res), [], !listconcat([1], Res));
147    let NumMicroOps = !add(UOps, LoadUOps);
148  }
149}
150
151// This multiclass is for folded loads for floating point units.
152multiclass Zn2WriteResFpuPair<X86FoldableSchedWrite SchedRW,
153                          list<ProcResourceKind> ExePorts,
154                          int Lat, list<int> Res = [], int UOps = 1,
155                          int LoadLat = 7, int LoadUOps = 0> {
156  // Register variant takes 1-cycle on Execution Port.
157  def : WriteRes<SchedRW, ExePorts> {
158    let Latency = Lat;
159    let ResourceCycles = Res;
160    let NumMicroOps = UOps;
161  }
162
163  // Memory variant also uses a cycle on Zn2AGU
164  // adds LoadLat cycles to the latency (default = 7).
165  def : WriteRes<SchedRW.Folded, !listconcat([Zn2AGU], ExePorts)> {
166    let Latency = !add(Lat, LoadLat);
167    let ResourceCycles = !if(!empty(Res), [], !listconcat([1], Res));
168    let NumMicroOps = !add(UOps, LoadUOps);
169  }
170}
171
172// WriteRMW is set for instructions with Memory write
173// operation in codegen
174def : WriteRes<WriteRMW, [Zn2AGU]>;
175
176def : WriteRes<WriteStore,   [Zn2AGU]>;
177def : WriteRes<WriteStoreNT, [Zn2AGU]>;
178def : WriteRes<WriteMove,    [Zn2ALU]>;
179def : WriteRes<WriteLoad,    [Zn2AGU]> { let Latency = 8; }
180
181// Model the effect of clobbering the read-write mask operand of the GATHER operation.
182// Does not cost anything by itself, only has latency, matching that of the WriteLoad,
183def : WriteRes<WriteVecMaskedGatherWriteback, []> { let Latency = 8; let NumMicroOps = 0; }
184
185def : WriteRes<WriteZero,  []>;
186def : WriteRes<WriteLEA, [Zn2ALU]>;
187defm : Zn2WriteResPair<WriteALU,   [Zn2ALU], 1>;
188defm : Zn2WriteResPair<WriteADC,   [Zn2ALU], 1>;
189
190defm : Zn2WriteResPair<WriteIMul8,     [Zn2ALU1, Zn2Multiplier], 4>;
191
192defm : X86WriteRes<WriteBSWAP32, [Zn2ALU], 1, [4], 1>;
193defm : X86WriteRes<WriteBSWAP64, [Zn2ALU], 1, [4], 1>;
194defm : X86WriteRes<WriteCMPXCHG, [Zn2ALU], 3, [1], 1>;
195defm : X86WriteRes<WriteCMPXCHGRMW,[Zn2ALU,Zn2AGU], 8, [1,1], 5>;
196defm : X86WriteRes<WriteXCHG, [Zn2ALU], 1, [2], 2>;
197
198defm : Zn2WriteResPair<WriteShift, [Zn2ALU], 1>;
199defm : Zn2WriteResPair<WriteShiftCL,  [Zn2ALU], 1>;
200defm : Zn2WriteResPair<WriteRotate,   [Zn2ALU], 1>;
201defm : Zn2WriteResPair<WriteRotateCL, [Zn2ALU], 1>;
202
203defm : X86WriteRes<WriteSHDrri, [Zn2ALU], 1, [1], 1>;
204defm : X86WriteResUnsupported<WriteSHDrrcl>;
205defm : X86WriteResUnsupported<WriteSHDmri>;
206defm : X86WriteResUnsupported<WriteSHDmrcl>;
207
208defm : Zn2WriteResPair<WriteJump,  [Zn2ALU], 1>;
209defm : Zn2WriteResFpuPair<WriteCRC32, [Zn2FPU0], 3>;
210
211defm : Zn2WriteResPair<WriteCMOV,   [Zn2ALU], 1>;
212def  : WriteRes<WriteSETCC,  [Zn2ALU]>;
213def  : WriteRes<WriteSETCCStore,  [Zn2ALU, Zn2AGU]>;
214defm : X86WriteRes<WriteLAHFSAHF, [Zn2ALU], 2, [1], 2>;
215
216defm : X86WriteRes<WriteBitTest,         [Zn2ALU], 1, [1], 1>;
217defm : X86WriteRes<WriteBitTestImmLd,    [Zn2ALU,Zn2AGU], 5, [1,1], 2>;
218defm : X86WriteRes<WriteBitTestRegLd,    [Zn2ALU,Zn2AGU], 5, [1,1], 2>;
219defm : X86WriteRes<WriteBitTestSet,      [Zn2ALU], 2, [1], 2>;
220
221// Bit counts.
222defm : Zn2WriteResPair<WriteBSF, [Zn2ALU], 3>;
223defm : Zn2WriteResPair<WriteBSR, [Zn2ALU], 4>;
224defm : Zn2WriteResPair<WriteLZCNT,          [Zn2ALU], 1>;
225defm : Zn2WriteResPair<WriteTZCNT,          [Zn2ALU], 2>;
226defm : Zn2WriteResPair<WritePOPCNT,         [Zn2ALU], 1>;
227
228// Treat misc copies as a move.
229def : InstRW<[WriteMove], (instrs COPY)>;
230
231// BMI1 BEXTR, BMI2 BZHI
232defm : Zn2WriteResPair<WriteBEXTR, [Zn2ALU], 1>;
233defm : Zn2WriteResPair<WriteBZHI, [Zn2ALU], 1>;
234
235// IDIV
236defm : Zn2WriteResPair<WriteDiv8,   [Zn2ALU2, Zn2Divider], 15, [1,15], 1>;
237defm : Zn2WriteResPair<WriteDiv16,  [Zn2ALU2, Zn2Divider], 17, [1,17], 2>;
238defm : Zn2WriteResPair<WriteDiv32,  [Zn2ALU2, Zn2Divider], 25, [1,25], 2>;
239defm : Zn2WriteResPair<WriteDiv64,  [Zn2ALU2, Zn2Divider], 41, [1,41], 2>;
240defm : Zn2WriteResPair<WriteIDiv8,  [Zn2ALU2, Zn2Divider], 15, [1,15], 1>;
241defm : Zn2WriteResPair<WriteIDiv16, [Zn2ALU2, Zn2Divider], 17, [1,17], 2>;
242defm : Zn2WriteResPair<WriteIDiv32, [Zn2ALU2, Zn2Divider], 25, [1,25], 2>;
243defm : Zn2WriteResPair<WriteIDiv64, [Zn2ALU2, Zn2Divider], 41, [1,41], 2>;
244
245// IMULH
246def Zn2WriteIMulH : WriteRes<WriteIMulH, [Zn2Multiplier]>{
247  let Latency = 3;
248  let NumMicroOps = 0;
249}
250
251def  : WriteRes<WriteIMulHLd, [Zn2Multiplier]>{
252  let Latency = !add(Zn2WriteIMulH.Latency, Znver2Model.LoadLatency);
253  let NumMicroOps = Zn2WriteIMulH.NumMicroOps;
254}
255
256
257// Floating point operations
258defm : X86WriteRes<WriteFLoad,         [Zn2AGU], 8, [1], 1>;
259defm : X86WriteRes<WriteFLoadX,        [Zn2AGU], 8, [1], 1>;
260defm : X86WriteRes<WriteFLoadY,        [Zn2AGU], 8, [1], 1>;
261defm : X86WriteRes<WriteFMaskedLoad,   [Zn2AGU,Zn2FPU01], 8, [1,1], 1>;
262defm : X86WriteRes<WriteFMaskedLoadY,  [Zn2AGU,Zn2FPU01], 8, [1,1], 2>;
263defm : X86WriteRes<WriteFMaskedStore32,  [Zn2AGU,Zn2FPU01], 4, [1,1], 1>;
264defm : X86WriteRes<WriteFMaskedStore32Y, [Zn2AGU,Zn2FPU01], 5, [1,2], 2>;
265defm : X86WriteRes<WriteFMaskedStore64,  [Zn2AGU,Zn2FPU01], 4, [1,1], 1>;
266defm : X86WriteRes<WriteFMaskedStore64Y, [Zn2AGU,Zn2FPU01], 5, [1,2], 2>;
267
268defm : X86WriteRes<WriteFStore,        [Zn2AGU], 1, [1], 1>;
269defm : X86WriteRes<WriteFStoreX,       [Zn2AGU], 1, [1], 1>;
270defm : X86WriteRes<WriteFStoreY,       [Zn2AGU], 1, [1], 1>;
271defm : X86WriteRes<WriteFStoreNT,      [Zn2AGU,Zn2FPU2], 8, [1,1], 1>;
272defm : X86WriteRes<WriteFStoreNTX,     [Zn2AGU], 1, [1], 1>;
273defm : X86WriteRes<WriteFStoreNTY,     [Zn2AGU], 1, [1], 1>;
274defm : X86WriteRes<WriteFMove,         [Zn2FPU], 1, [1], 1>;
275defm : X86WriteRes<WriteFMoveX,        [Zn2FPU], 1, [1], 1>;
276defm : X86WriteRes<WriteFMoveY,        [Zn2FPU], 1, [1], 1>;
277defm : X86WriteResUnsupported<WriteFMoveZ>;
278
279defm : Zn2WriteResFpuPair<WriteFAdd,      [Zn2FPU0],  3>;
280defm : Zn2WriteResFpuPair<WriteFAddX,     [Zn2FPU0],  3>;
281defm : Zn2WriteResFpuPair<WriteFAddY,     [Zn2FPU0],  3>;
282defm : X86WriteResPairUnsupported<WriteFAddZ>;
283defm : Zn2WriteResFpuPair<WriteFAdd64,    [Zn2FPU0],  3>;
284defm : Zn2WriteResFpuPair<WriteFAdd64X,   [Zn2FPU0],  3>;
285defm : Zn2WriteResFpuPair<WriteFAdd64Y,   [Zn2FPU0],  3>;
286defm : X86WriteResPairUnsupported<WriteFAdd64Z>;
287defm : Zn2WriteResFpuPair<WriteFCmp,      [Zn2FPU0],  1>;
288defm : Zn2WriteResFpuPair<WriteFCmpX,     [Zn2FPU0],  1>;
289defm : Zn2WriteResFpuPair<WriteFCmpY,     [Zn2FPU0],  1>;
290defm : X86WriteResPairUnsupported<WriteFCmpZ>;
291defm : Zn2WriteResFpuPair<WriteFCmp64,    [Zn2FPU0],  1>;
292defm : Zn2WriteResFpuPair<WriteFCmp64X,   [Zn2FPU0],  1>;
293defm : Zn2WriteResFpuPair<WriteFCmp64Y,   [Zn2FPU0],  1>;
294defm : X86WriteResPairUnsupported<WriteFCmp64Z>;
295defm : Zn2WriteResFpuPair<WriteFCom,      [Zn2FPU0],  3>;
296defm : Zn2WriteResFpuPair<WriteFComX,     [Zn2FPU0],  3>;
297defm : Zn2WriteResFpuPair<WriteFBlend,    [Zn2FPU01], 1>;
298defm : Zn2WriteResFpuPair<WriteFBlendY,   [Zn2FPU01], 1>;
299defm : X86WriteResPairUnsupported<WriteFBlendZ>;
300defm : Zn2WriteResFpuPair<WriteFVarBlend, [Zn2FPU01], 1>;
301defm : Zn2WriteResFpuPair<WriteFVarBlendY,[Zn2FPU01], 1>;
302defm : X86WriteResPairUnsupported<WriteFVarBlendZ>;
303defm : Zn2WriteResFpuPair<WriteVarBlend,  [Zn2FPU0],  1>;
304defm : Zn2WriteResFpuPair<WriteVarBlendY, [Zn2FPU0],  1>;
305defm : X86WriteResPairUnsupported<WriteVarBlendZ>;
306defm : Zn2WriteResFpuPair<WriteCvtSS2I,   [Zn2FPU3],  5>;
307defm : Zn2WriteResFpuPair<WriteCvtPS2I,   [Zn2FPU3],  5>;
308defm : Zn2WriteResFpuPair<WriteCvtPS2IY,  [Zn2FPU3],  5>;
309defm : X86WriteResPairUnsupported<WriteCvtPS2IZ>;
310defm : Zn2WriteResFpuPair<WriteCvtSD2I,   [Zn2FPU3],  5>;
311defm : Zn2WriteResFpuPair<WriteCvtPD2I,   [Zn2FPU3],  5>;
312defm : Zn2WriteResFpuPair<WriteCvtPD2IY,  [Zn2FPU3],  5>;
313defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>;
314defm : Zn2WriteResFpuPair<WriteCvtI2SS,   [Zn2FPU3],  5>;
315defm : Zn2WriteResFpuPair<WriteCvtI2PS,   [Zn2FPU3],  5>;
316defm : Zn2WriteResFpuPair<WriteCvtI2PSY,  [Zn2FPU3],  5>;
317defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>;
318defm : Zn2WriteResFpuPair<WriteCvtI2SD,   [Zn2FPU3],  5>;
319defm : Zn2WriteResFpuPair<WriteCvtI2PD,   [Zn2FPU3],  5>;
320defm : Zn2WriteResFpuPair<WriteCvtI2PDY,  [Zn2FPU3],  5>;
321defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>;
322defm : Zn2WriteResFpuPair<WriteFDiv,      [Zn2FPU3], 15>;
323defm : Zn2WriteResFpuPair<WriteFDivX,     [Zn2FPU3], 15>;
324defm : X86WriteResPairUnsupported<WriteFDivZ>;
325defm : Zn2WriteResFpuPair<WriteFDiv64,    [Zn2FPU3], 15>;
326defm : Zn2WriteResFpuPair<WriteFDiv64X,   [Zn2FPU3], 15>;
327defm : X86WriteResPairUnsupported<WriteFDiv64Z>;
328defm : Zn2WriteResFpuPair<WriteFSign,     [Zn2FPU3],  2>;
329defm : Zn2WriteResFpuPair<WriteFRnd,      [Zn2FPU3],  3, [1], 1, 7, 0>;
330defm : Zn2WriteResFpuPair<WriteFRndY,     [Zn2FPU3],  3, [1], 1, 7, 0>;
331defm : X86WriteResPairUnsupported<WriteFRndZ>;
332defm : Zn2WriteResFpuPair<WriteFLogic,    [Zn2FPU],   1>;
333defm : Zn2WriteResFpuPair<WriteFLogicY,   [Zn2FPU],   1>;
334defm : X86WriteResPairUnsupported<WriteFLogicZ>;
335defm : Zn2WriteResFpuPair<WriteFTest,     [Zn2FPU],   1>;
336defm : Zn2WriteResFpuPair<WriteFTestY,    [Zn2FPU],   1>;
337defm : X86WriteResPairUnsupported<WriteFTestZ>;
338defm : Zn2WriteResFpuPair<WriteFShuffle,  [Zn2FPU12], 1>;
339defm : Zn2WriteResFpuPair<WriteFShuffleY, [Zn2FPU12], 1>;
340defm : X86WriteResPairUnsupported<WriteFShuffleZ>;
341defm : Zn2WriteResFpuPair<WriteFVarShuffle, [Zn2FPU12], 3>;
342defm : Zn2WriteResFpuPair<WriteFVarShuffleY,[Zn2FPU12], 3>;
343defm : X86WriteResPairUnsupported<WriteFVarShuffleZ>;
344defm : Zn2WriteResFpuPair<WriteFMul,      [Zn2FPU01], 3, [1], 1, 7, 1>;
345defm : Zn2WriteResFpuPair<WriteFMulX,     [Zn2FPU01], 3, [1], 1, 7, 1>;
346defm : Zn2WriteResFpuPair<WriteFMulY,     [Zn2FPU01], 3, [1], 1, 7, 1>;
347defm : X86WriteResPairUnsupported<WriteFMulZ>;
348defm : Zn2WriteResFpuPair<WriteFMul64,    [Zn2FPU01], 3, [1], 1, 7, 1>;
349defm : Zn2WriteResFpuPair<WriteFMul64X,   [Zn2FPU01], 3, [1], 1, 7, 1>;
350defm : Zn2WriteResFpuPair<WriteFMul64Y,   [Zn2FPU01], 3, [1], 1, 7, 1>;
351defm : X86WriteResPairUnsupported<WriteFMul64Z>;
352defm : Zn2WriteResFpuPair<WriteFMA,       [Zn2FPU03], 5>;
353defm : Zn2WriteResFpuPair<WriteFMAX,      [Zn2FPU03], 5>;
354defm : Zn2WriteResFpuPair<WriteFMAY,      [Zn2FPU03], 5>;
355defm : X86WriteResPairUnsupported<WriteFMAZ>;
356defm : Zn2WriteResFpuPair<WriteFRcp,      [Zn2FPU01], 5>;
357defm : Zn2WriteResFpuPair<WriteFRcpX,     [Zn2FPU01], 5>;
358defm : Zn2WriteResFpuPair<WriteFRcpY,     [Zn2FPU01], 5, [1], 1, 7, 2>;
359defm : X86WriteResPairUnsupported<WriteFRcpZ>;
360defm : Zn2WriteResFpuPair<WriteFRsqrtX,   [Zn2FPU01], 5, [1], 1, 7, 1>;
361defm : X86WriteResPairUnsupported<WriteFRsqrtZ>;
362defm : Zn2WriteResFpuPair<WriteFSqrt,     [Zn2FPU3], 20, [20]>;
363defm : Zn2WriteResFpuPair<WriteFSqrtX,    [Zn2FPU3], 20, [20]>;
364defm : Zn2WriteResFpuPair<WriteFSqrtY,    [Zn2FPU3], 28, [28], 1, 7, 1>;
365defm : X86WriteResPairUnsupported<WriteFSqrtZ>;
366defm : Zn2WriteResFpuPair<WriteFSqrt64,   [Zn2FPU3], 20, [20]>;
367defm : Zn2WriteResFpuPair<WriteFSqrt64X,  [Zn2FPU3], 20, [20]>;
368defm : Zn2WriteResFpuPair<WriteFSqrt64Y,  [Zn2FPU3], 20, [20], 1, 7, 1>;
369defm : X86WriteResPairUnsupported<WriteFSqrt64Z>;
370defm : Zn2WriteResFpuPair<WriteFSqrt80,   [Zn2FPU3], 20, [20]>;
371
372// Vector integer operations which uses FPU units
373defm : X86WriteRes<WriteVecLoad,         [Zn2AGU], 8, [1], 1>;
374defm : X86WriteRes<WriteVecLoadX,        [Zn2AGU], 8, [1], 1>;
375defm : X86WriteRes<WriteVecLoadY,        [Zn2AGU], 8, [1], 1>;
376defm : X86WriteRes<WriteVecLoadNT,       [Zn2AGU], 8, [1], 1>;
377defm : X86WriteRes<WriteVecLoadNTY,      [Zn2AGU], 8, [1], 1>;
378defm : X86WriteRes<WriteVecMaskedLoad,   [Zn2AGU,Zn2FPU01], 8, [1,2], 2>;
379defm : X86WriteRes<WriteVecMaskedLoadY,  [Zn2AGU,Zn2FPU01], 8, [1,2], 2>;
380defm : X86WriteRes<WriteVecStore,        [Zn2AGU], 1, [1], 1>;
381defm : X86WriteRes<WriteVecStoreX,       [Zn2AGU], 1, [1], 1>;
382defm : X86WriteRes<WriteVecStoreY,       [Zn2AGU], 1, [1], 1>;
383defm : X86WriteRes<WriteVecStoreNT,      [Zn2AGU], 1, [1], 1>;
384defm : X86WriteRes<WriteVecStoreNTY,     [Zn2AGU], 1, [1], 1>;
385defm : X86WriteRes<WriteVecMaskedStore32,  [Zn2AGU,Zn2FPU01], 4, [1,1], 1>;
386defm : X86WriteRes<WriteVecMaskedStore32Y, [Zn2AGU,Zn2FPU01], 5, [1,2], 2>;
387defm : X86WriteRes<WriteVecMaskedStore64,  [Zn2AGU,Zn2FPU01], 4, [1,1], 1>;
388defm : X86WriteRes<WriteVecMaskedStore64Y, [Zn2AGU,Zn2FPU01], 5, [1,2], 2>;
389defm : X86WriteRes<WriteVecMove,         [Zn2FPU], 1, [1], 1>;
390defm : X86WriteRes<WriteVecMoveX,        [Zn2FPU], 1, [1], 1>;
391defm : X86WriteRes<WriteVecMoveY,        [Zn2FPU], 2, [1], 2>;
392defm : X86WriteResUnsupported<WriteVecMoveZ>;
393defm : X86WriteRes<WriteVecMoveToGpr,    [Zn2FPU2], 2, [1], 1>;
394defm : X86WriteRes<WriteVecMoveFromGpr,  [Zn2FPU2], 3, [1], 1>;
395defm : X86WriteRes<WriteEMMS,            [Zn2FPU], 2, [1], 1>;
396
397defm : Zn2WriteResFpuPair<WriteVecShift,   [Zn2FPU],   1>;
398defm : Zn2WriteResFpuPair<WriteVecShiftX,  [Zn2FPU2],  1>;
399defm : Zn2WriteResFpuPair<WriteVecShiftY,  [Zn2FPU2],  1>;
400defm : X86WriteResPairUnsupported<WriteVecShiftZ>;
401defm : Zn2WriteResFpuPair<WriteVecShiftImm,  [Zn2FPU], 1>;
402defm : Zn2WriteResFpuPair<WriteVecShiftImmX, [Zn2FPU], 1>;
403defm : Zn2WriteResFpuPair<WriteVecShiftImmY, [Zn2FPU], 1>;
404defm : X86WriteResPairUnsupported<WriteVecShiftImmZ>;
405defm : Zn2WriteResFpuPair<WriteVecLogic,   [Zn2FPU],   1>;
406defm : Zn2WriteResFpuPair<WriteVecLogicX,  [Zn2FPU],   1>;
407defm : Zn2WriteResFpuPair<WriteVecLogicY,  [Zn2FPU],   1>;
408defm : X86WriteResPairUnsupported<WriteVecLogicZ>;
409defm : Zn2WriteResFpuPair<WriteVecTest,    [Zn2FPU12], 1, [2], 1, 7, 1>;
410defm : Zn2WriteResFpuPair<WriteVecTestY,   [Zn2FPU12], 1, [2], 1, 7, 1>;
411defm : X86WriteResPairUnsupported<WriteVecTestZ>;
412defm : Zn2WriteResFpuPair<WriteVecALU,     [Zn2FPU],   1>;
413defm : Zn2WriteResFpuPair<WriteVecALUX,    [Zn2FPU],   1>;
414defm : Zn2WriteResFpuPair<WriteVecALUY,    [Zn2FPU],   1>;
415defm : X86WriteResPairUnsupported<WriteVecALUZ>;
416defm : Zn2WriteResFpuPair<WriteVecIMul,    [Zn2FPU0],  4>;
417defm : Zn2WriteResFpuPair<WriteVecIMulX,   [Zn2FPU0],  4>;
418defm : Zn2WriteResFpuPair<WriteVecIMulY,   [Zn2FPU0],  4>;
419defm : X86WriteResPairUnsupported<WriteVecIMulZ>;
420defm : Zn2WriteResFpuPair<WritePMULLD,     [Zn2FPU0],  4, [1], 1, 7, 1>;
421defm : Zn2WriteResFpuPair<WritePMULLDY,    [Zn2FPU0],  4, [1], 1, 7, 1>;
422defm : X86WriteResPairUnsupported<WritePMULLDZ>;
423defm : Zn2WriteResFpuPair<WriteShuffle,    [Zn2FPU],   1>;
424defm : Zn2WriteResFpuPair<WriteShuffleX,   [Zn2FPU],   1>;
425defm : Zn2WriteResFpuPair<WriteShuffleY,   [Zn2FPU],   1>;
426defm : X86WriteResPairUnsupported<WriteShuffleZ>;
427defm : Zn2WriteResFpuPair<WriteVarShuffle, [Zn2FPU],   1>;
428defm : Zn2WriteResFpuPair<WriteVarShuffleX,[Zn2FPU],   1>;
429defm : Zn2WriteResFpuPair<WriteVarShuffleY,[Zn2FPU],   1>;
430defm : X86WriteResPairUnsupported<WriteVarShuffleZ>;
431defm : Zn2WriteResFpuPair<WriteBlend,      [Zn2FPU01], 1>;
432defm : Zn2WriteResFpuPair<WriteBlendY,     [Zn2FPU01], 1>;
433defm : X86WriteResPairUnsupported<WriteBlendZ>;
434defm : Zn2WriteResFpuPair<WriteShuffle256, [Zn2FPU],   2>;
435defm : Zn2WriteResFpuPair<WriteVPMOV256,   [Zn2FPU12],  4, [1], 2, 4>;
436defm : Zn2WriteResFpuPair<WriteVarShuffle256, [Zn2FPU],   2>;
437defm : Zn2WriteResFpuPair<WritePSADBW,     [Zn2FPU0],  3>;
438defm : Zn2WriteResFpuPair<WritePSADBWX,    [Zn2FPU0],  3>;
439defm : Zn2WriteResFpuPair<WritePSADBWY,    [Zn2FPU0],  3>;
440defm : X86WriteResPairUnsupported<WritePSADBWZ>;
441defm : Zn2WriteResFpuPair<WritePHMINPOS,   [Zn2FPU0],  4>;
442
443// Vector Shift Operations
444defm : Zn2WriteResFpuPair<WriteVarVecShift,  [Zn2FPU12], 3>;
445defm : Zn2WriteResFpuPair<WriteVarVecShiftY, [Zn2FPU12], 3>;
446defm : X86WriteResPairUnsupported<WriteVarVecShiftZ>;
447
448// Vector insert/extract operations.
449defm : Zn2WriteResFpuPair<WriteVecInsert,   [Zn2FPU],   1>;
450
451def : WriteRes<WriteVecExtract, [Zn2FPU12, Zn2FPU2]> {
452  let Latency = 2;
453  let ResourceCycles = [1, 2];
454}
455def : WriteRes<WriteVecExtractSt, [Zn2AGU, Zn2FPU12, Zn2FPU2]> {
456  let Latency = 5;
457  let NumMicroOps = 2;
458  let ResourceCycles = [1, 2, 3];
459}
460
461// MOVMSK Instructions.
462def : WriteRes<WriteFMOVMSK, [Zn2FPU2]>;
463def : WriteRes<WriteMMXMOVMSK, [Zn2FPU2]>;
464def : WriteRes<WriteVecMOVMSK, [Zn2FPU2]>;
465
466def : WriteRes<WriteVecMOVMSKY, [Zn2FPU2]> {
467  let NumMicroOps = 2;
468  let Latency = 2;
469  let ResourceCycles = [2];
470}
471
472// AES Instructions.
473defm : Zn2WriteResFpuPair<WriteAESDecEnc, [Zn2FPU01], 4>;
474defm : Zn2WriteResFpuPair<WriteAESIMC,    [Zn2FPU01], 4>;
475defm : Zn2WriteResFpuPair<WriteAESKeyGen, [Zn2FPU01], 4>;
476
477def : WriteRes<WriteFence,  [Zn2AGU]>;
478def : WriteRes<WriteNop, []>;
479
480// Following instructions with latency=100 are microcoded.
481// We set long latency so as to block the entire pipeline.
482defm : Zn2WriteResFpuPair<WriteFShuffle256, [Zn2FPU], 100>;
483defm : Zn2WriteResFpuPair<WriteFVarShuffle256, [Zn2FPU], 100>;
484
485// Microcoded Instructions
486def Zn2WriteMicrocoded : SchedWriteRes<[]> {
487  let Latency = 100;
488}
489defm : Zn2WriteResPair<WriteDPPS, [], 15>;
490defm : Zn2WriteResPair<WriteFHAdd, [], 7>;
491defm : Zn2WriteResPair<WriteFHAddY, [], 7>;
492defm : Zn2WriteResPair<WritePHAdd, [], 3>;
493defm : Zn2WriteResPair<WritePHAddX, [], 3>;
494defm : Zn2WriteResPair<WritePHAddY, [], 3>;
495
496def : SchedAlias<WriteMicrocoded, Zn2WriteMicrocoded>;
497def : SchedAlias<WriteFCMOV, Zn2WriteMicrocoded>;
498def : SchedAlias<WriteSystem, Zn2WriteMicrocoded>;
499def : SchedAlias<WriteMPSAD, Zn2WriteMicrocoded>;
500def : SchedAlias<WriteMPSADY, Zn2WriteMicrocoded>;
501def : SchedAlias<WriteMPSADLd, Zn2WriteMicrocoded>;
502def : SchedAlias<WriteMPSADYLd, Zn2WriteMicrocoded>;
503def : SchedAlias<WriteCLMul, Zn2WriteMicrocoded>;
504def : SchedAlias<WriteCLMulLd, Zn2WriteMicrocoded>;
505def : SchedAlias<WritePCmpIStrM, Zn2WriteMicrocoded>;
506def : SchedAlias<WritePCmpIStrMLd, Zn2WriteMicrocoded>;
507def : SchedAlias<WritePCmpEStrI, Zn2WriteMicrocoded>;
508def : SchedAlias<WritePCmpEStrILd, Zn2WriteMicrocoded>;
509def : SchedAlias<WritePCmpEStrM, Zn2WriteMicrocoded>;
510def : SchedAlias<WritePCmpEStrMLd, Zn2WriteMicrocoded>;
511def : SchedAlias<WritePCmpIStrI, Zn2WriteMicrocoded>;
512def : SchedAlias<WritePCmpIStrILd, Zn2WriteMicrocoded>;
513def : SchedAlias<WriteLDMXCSR, Zn2WriteMicrocoded>;
514def : SchedAlias<WriteSTMXCSR, Zn2WriteMicrocoded>;
515
516//=== Regex based InstRW ===//
517// Notation:
518// - r: register.
519// - m = memory.
520// - i = immediate
521// - mm: 64 bit mmx register.
522// - x = 128 bit xmm register.
523// - (x)mm = mmx or xmm register.
524// - y = 256 bit ymm register.
525// - v = any vector register.
526
527//=== Integer Instructions ===//
528//-- Move instructions --//
529// MOV.
530// r16,m.
531def : InstRW<[WriteALULd, ReadAfterLd], (instregex "MOV16rm")>;
532
533// MOVSX, MOVZX.
534// r,m.
535def : InstRW<[WriteLoad], (instregex "MOV(S|Z)X32rm(8|16)")>;
536
537// XCHG.
538// r,r.
539def Zn2WriteXCHG : SchedWriteRes<[Zn2ALU]> {
540  let NumMicroOps = 2;
541}
542
543def : InstRW<[Zn2WriteXCHG], (instregex "^XCHG(8|16|32|64)rr", "^XCHG(16|32|64)ar")>;
544
545// r,m.
546def Zn2WriteXCHGrm : SchedWriteRes<[Zn2AGU, Zn2ALU]> {
547  let Latency = 5;
548  let NumMicroOps = 2;
549}
550def : InstRW<[Zn2WriteXCHGrm, ReadAfterLd], (instregex "^XCHG(8|16|32|64)rm")>;
551
552def : InstRW<[WriteMicrocoded], (instrs XLAT)>;
553
554// POP16.
555// r.
556def Zn2WritePop16r : SchedWriteRes<[Zn2AGU]>{
557  let Latency = 5;
558  let NumMicroOps = 2;
559}
560def : InstRW<[Zn2WritePop16r], (instregex "POP16rmm")>;
561def : InstRW<[WriteMicrocoded], (instregex "POPF(16|32)")>;
562def : InstRW<[WriteMicrocoded], (instregex "POPA(16|32)")>;
563
564
565// PUSH.
566// r. Has default values.
567// m.
568def Zn2WritePUSH : SchedWriteRes<[Zn2AGU]>{
569  let Latency = 4;
570}
571def : InstRW<[Zn2WritePUSH], (instregex "PUSH(16|32)rmm")>;
572
573//PUSHF
574def : InstRW<[WriteMicrocoded], (instregex "PUSHF(16|32)")>;
575
576// PUSHA.
577def Zn2WritePushA : SchedWriteRes<[Zn2AGU]> {
578  let Latency = 8;
579}
580def : InstRW<[Zn2WritePushA], (instregex "PUSHA(16|32)")>;
581
582//LAHF
583def : InstRW<[WriteMicrocoded], (instrs LAHF)>;
584
585// MOVBE.
586// r,m.
587def Zn2WriteMOVBE : SchedWriteRes<[Zn2AGU, Zn2ALU]> {
588  let Latency = 5;
589}
590def : InstRW<[Zn2WriteMOVBE, ReadAfterLd], (instregex "MOVBE(16|32|64)rm")>;
591
592// m16,r16.
593def : InstRW<[Zn2WriteMOVBE], (instregex "MOVBE(16|32|64)mr")>;
594
595//-- Arithmetic instructions --//
596
597// ADD SUB.
598// m,r/i.
599def : InstRW<[WriteALULd], (instregex "(ADD|SUB)(8|16|32|64)m(r|i)",
600                          "(ADD|SUB)(8|16|32|64)mi8",
601                          "(ADD|SUB)64mi32")>;
602
603// ADC SBB.
604// m,r/i.
605def : InstRW<[WriteALULd],
606             (instregex "(ADC|SBB)(8|16|32|64)m(r|i)",
607              "(ADC|SBB)(16|32|64)mi8",
608              "(ADC|SBB)64mi32")>;
609
610// INC DEC NOT NEG.
611// m.
612def : InstRW<[WriteALULd],
613             (instregex "(INC|DEC|NOT|NEG)(8|16|32|64)m")>;
614
615// MUL IMUL.
616// r16.
617def Zn2WriteMul16 : SchedWriteRes<[Zn2ALU1, Zn2Multiplier]> {
618  let Latency = 3;
619}
620def Zn2WriteMul16Imm : SchedWriteRes<[Zn2ALU1, Zn2Multiplier]> {
621  let Latency = 4;
622}
623def : SchedAlias<WriteIMul16, Zn2WriteMul16>;
624def : SchedAlias<WriteIMul16Imm, Zn2WriteMul16Imm>;
625def : SchedAlias<WriteIMul16Reg, Zn2WriteMul16>;
626
627// m16.
628def Zn2WriteMul16Ld : SchedWriteRes<[Zn2AGU, Zn2ALU1, Zn2Multiplier]> {
629  let Latency = 7;
630}
631def : SchedAlias<WriteIMul16Ld, Zn2WriteMul16Ld>;
632def : SchedAlias<WriteIMul16ImmLd, Zn2WriteMul16Ld>;
633def : SchedAlias<WriteIMul16RegLd, Zn2WriteMul16Ld>;
634
635// r32.
636def Zn2WriteMul32 : SchedWriteRes<[Zn2ALU1, Zn2Multiplier]> {
637  let Latency = 3;
638}
639def : SchedAlias<WriteIMul32, Zn2WriteMul32>;
640def : SchedAlias<WriteIMul32Imm, Zn2WriteMul32>;
641def : SchedAlias<WriteIMul32Reg, Zn2WriteMul32>;
642
643// m32.
644def Zn2WriteMul32Ld : SchedWriteRes<[Zn2AGU, Zn2ALU1, Zn2Multiplier]> {
645  let Latency = 7;
646}
647def : SchedAlias<WriteIMul32Ld, Zn2WriteMul32Ld>;
648def : SchedAlias<WriteIMul32ImmLd, Zn2WriteMul32Ld>;
649def : SchedAlias<WriteIMul32RegLd, Zn2WriteMul32Ld>;
650
651// r64.
652def Zn2WriteMul64 : SchedWriteRes<[Zn2ALU1, Zn2Multiplier]> {
653  let Latency = 4;
654  let NumMicroOps = 2;
655}
656def : SchedAlias<WriteIMul64, Zn2WriteMul64>;
657def : SchedAlias<WriteIMul64Imm, Zn2WriteMul64>;
658def : SchedAlias<WriteIMul64Reg, Zn2WriteMul64>;
659
660// m64.
661def Zn2WriteMul64Ld : SchedWriteRes<[Zn2AGU, Zn2ALU1, Zn2Multiplier]> {
662  let Latency = 8;
663  let NumMicroOps = 2;
664}
665def : SchedAlias<WriteIMul64Ld, Zn2WriteMul64Ld>;
666def : SchedAlias<WriteIMul64ImmLd, Zn2WriteMul64Ld>;
667def : SchedAlias<WriteIMul64RegLd, Zn2WriteMul64Ld>;
668
669// MULX.
670// Numbers are based on the AMD SOG for Family 17h - Instruction Latencies.
671defm : Zn2WriteResPair<WriteMULX32, [Zn2ALU1, Zn2Multiplier], 3, [1, 1], 1, 4, 0>;
672defm : Zn2WriteResPair<WriteMULX64, [Zn2ALU1, Zn2Multiplier], 3, [1, 1], 1, 4, 0>;
673
674//-- Control transfer instructions --//
675
676// J(E|R)CXZ.
677def Zn2WriteJCXZ : SchedWriteRes<[Zn2ALU03]>;
678def : InstRW<[Zn2WriteJCXZ], (instrs JCXZ, JECXZ, JRCXZ)>;
679
680// INTO
681def : InstRW<[WriteMicrocoded], (instrs INTO)>;
682
683// LOOP.
684def Zn2WriteLOOP : SchedWriteRes<[Zn2ALU03]>;
685def : InstRW<[Zn2WriteLOOP], (instrs LOOP)>;
686
687// LOOP(N)E, LOOP(N)Z
688def Zn2WriteLOOPE : SchedWriteRes<[Zn2ALU03]>;
689def : InstRW<[Zn2WriteLOOPE], (instrs LOOPE, LOOPNE)>;
690
691// CALL.
692// r.
693def Zn2WriteCALLr : SchedWriteRes<[Zn2AGU, Zn2ALU03]>;
694def : InstRW<[Zn2WriteCALLr], (instregex "CALL(16|32)r")>;
695
696def : InstRW<[WriteMicrocoded], (instregex "CALL(16|32)m")>;
697
698// RET.
699def Zn2WriteRET : SchedWriteRes<[Zn2ALU03]> {
700  let NumMicroOps = 2;
701}
702def : InstRW<[Zn2WriteRET], (instregex "RET(16|32|64)", "LRET(16|32|64)",
703                            "IRET(16|32|64)")>;
704
705//-- Logic instructions --//
706
707// AND OR XOR.
708// m,r/i.
709def : InstRW<[WriteALULd],
710             (instregex "(AND|OR|XOR)(8|16|32|64)m(r|i)",
711              "(AND|OR|XOR)(8|16|32|64)mi8", "(AND|OR|XOR)64mi32")>;
712
713// Define ALU latency variants
714def Zn2WriteALULat2 : SchedWriteRes<[Zn2ALU]> {
715  let Latency = 2;
716}
717def Zn2WriteALULat2Ld : SchedWriteRes<[Zn2AGU, Zn2ALU]> {
718  let Latency = 6;
719}
720
721// BT.
722// m,i.
723def : InstRW<[WriteShiftLd], (instregex "BT(16|32|64)mi8")>;
724
725// BTR BTS BTC.
726// r,r,i.
727def Zn2WriteBTRSC : SchedWriteRes<[Zn2ALU]> {
728  let Latency = 2;
729  let NumMicroOps = 2;
730}
731def : InstRW<[Zn2WriteBTRSC], (instregex "BT(R|S|C)(16|32|64)r(r|i8)")>;
732
733// m,r,i.
734def Zn2WriteBTRSCm : SchedWriteRes<[Zn2AGU, Zn2ALU]> {
735  let Latency = 6;
736  let NumMicroOps = 2;
737}
738// m,r,i.
739def : SchedAlias<WriteBitTestSetImmRMW, Zn2WriteBTRSCm>;
740def : SchedAlias<WriteBitTestSetRegRMW, Zn2WriteBTRSCm>;
741
742// BLSI BLSMSK BLSR.
743// r,r.
744def : SchedAlias<WriteBLS, Zn2WriteALULat2>;
745// r,m.
746def : SchedAlias<WriteBLSLd, Zn2WriteALULat2Ld>;
747
748// CLD STD.
749def : InstRW<[WriteALU], (instrs STD, CLD)>;
750
751// PDEP PEXT.
752// r,r,r.
753def : InstRW<[WriteMicrocoded], (instregex "PDEP(32|64)rr", "PEXT(32|64)rr")>;
754// r,r,m.
755def : InstRW<[WriteMicrocoded], (instregex "PDEP(32|64)rm", "PEXT(32|64)rm")>;
756
757// RCR RCL.
758// m,i.
759def : InstRW<[WriteMicrocoded], (instregex "RC(R|L)(8|16|32|64)m(1|i|CL)")>;
760
761// SHR SHL SAR.
762// m,i.
763def : InstRW<[WriteShiftLd], (instregex "S(A|H)(R|L)(8|16|32|64)m(i|1)")>;
764
765// SHRD SHLD.
766// m,r
767def : InstRW<[WriteShiftLd], (instregex "SH(R|L)D(16|32|64)mri8")>;
768
769// r,r,cl.
770def : InstRW<[WriteMicrocoded], (instregex "SH(R|L)D(16|32|64)rrCL")>;
771
772// m,r,cl.
773def : InstRW<[WriteMicrocoded], (instregex "SH(R|L)D(16|32|64)mrCL")>;
774
775//-- Misc instructions --//
776// CMPXCHG8B.
777def Zn2WriteCMPXCHG8B : SchedWriteRes<[Zn2AGU, Zn2ALU]> {
778  let NumMicroOps = 18;
779}
780def : InstRW<[Zn2WriteCMPXCHG8B], (instrs CMPXCHG8B)>;
781
782def : InstRW<[WriteMicrocoded], (instrs CMPXCHG16B)>;
783
784// LEAVE
785def Zn2WriteLEAVE : SchedWriteRes<[Zn2ALU, Zn2AGU]> {
786  let Latency = 8;
787  let NumMicroOps = 2;
788}
789def : InstRW<[Zn2WriteLEAVE], (instregex "LEAVE")>;
790
791// PAUSE.
792def : InstRW<[WriteMicrocoded], (instrs PAUSE)>;
793
794// RDTSC.
795def : InstRW<[WriteMicrocoded], (instregex "RDTSC")>;
796
797// RDPMC.
798def : InstRW<[WriteMicrocoded], (instrs RDPMC)>;
799
800// RDRAND.
801def : InstRW<[WriteMicrocoded], (instregex "RDRAND(16|32|64)r")>;
802
803// XGETBV.
804def : InstRW<[WriteMicrocoded], (instregex "XGETBV")>;
805
806//-- String instructions --//
807// CMPS.
808def : InstRW<[WriteMicrocoded], (instregex "CMPS(B|L|Q|W)")>;
809
810// LODSB/W.
811def : InstRW<[WriteMicrocoded], (instregex "LODS(B|W)")>;
812
813// LODSD/Q.
814def : InstRW<[WriteMicrocoded], (instregex "LODS(L|Q)")>;
815
816// MOVS.
817def : InstRW<[WriteMicrocoded], (instregex "MOVS(B|L|Q|W)")>;
818
819// SCAS.
820def : InstRW<[WriteMicrocoded], (instregex "SCAS(B|W|L|Q)")>;
821
822// STOS
823def : InstRW<[WriteMicrocoded], (instregex "STOS(B|L|Q|W)")>;
824
825// XADD.
826def Zn2XADD : SchedWriteRes<[Zn2ALU]>;
827def : InstRW<[Zn2XADD], (instregex "XADD(8|16|32|64)rr")>;
828def : InstRW<[WriteMicrocoded], (instregex "XADD(8|16|32|64)rm")>;
829
830//=== Floating Point x87 Instructions ===//
831//-- Move instructions --//
832
833def Zn2WriteFLDr : SchedWriteRes<[Zn2FPU13]> ;
834
835def Zn2WriteSTr: SchedWriteRes<[Zn2FPU23]> {
836  let Latency = 5;
837  let NumMicroOps = 2;
838}
839
840// LD_F.
841// r.
842def : InstRW<[Zn2WriteFLDr], (instregex "LD_Frr")>;
843
844// m.
845def Zn2WriteLD_F80m : SchedWriteRes<[Zn2AGU, Zn2FPU13]> {
846  let NumMicroOps = 2;
847}
848def : InstRW<[Zn2WriteLD_F80m], (instregex "LD_F80m")>;
849
850// FBLD.
851def : InstRW<[WriteMicrocoded], (instregex "FBLDm")>;
852
853// FST(P).
854// r.
855def : InstRW<[Zn2WriteSTr], (instregex "ST_(F|FP)rr")>;
856
857// m80.
858def Zn2WriteST_FP80m : SchedWriteRes<[Zn2AGU, Zn2FPU23]> {
859  let Latency = 5;
860}
861def : InstRW<[Zn2WriteST_FP80m], (instregex "ST_FP80m")>;
862
863// FBSTP.
864// m80.
865def : InstRW<[WriteMicrocoded], (instregex "FBSTPm")>;
866
867def Zn2WriteFXCH : SchedWriteRes<[Zn2FPU]>;
868
869// FXCHG.
870def : InstRW<[Zn2WriteFXCH], (instrs XCH_F)>;
871
872// FILD.
873def Zn2WriteFILD : SchedWriteRes<[Zn2AGU, Zn2FPU3]> {
874  let Latency = 11;
875  let NumMicroOps = 2;
876}
877def : InstRW<[Zn2WriteFILD], (instregex "ILD_F(16|32|64)m")>;
878
879// FIST(P) FISTTP.
880def Zn2WriteFIST : SchedWriteRes<[Zn2AGU, Zn2FPU23]> {
881  let Latency = 12;
882}
883def : InstRW<[Zn2WriteFIST], (instregex "IS(T|TT)_(F|FP)(16|32|64)m")>;
884
885def Zn2WriteFPU13 : SchedWriteRes<[Zn2AGU, Zn2FPU13]> {
886  let Latency = 8;
887}
888
889def Zn2WriteFPU3 : SchedWriteRes<[Zn2AGU, Zn2FPU3]> {
890  let Latency = 11;
891}
892
893// FLDZ.
894def : SchedAlias<WriteFLD0, Zn2WriteFPU13>;
895
896// FLD1.
897def : SchedAlias<WriteFLD1, Zn2WriteFPU3>;
898
899// FLDPI FLDL2E etc.
900def : SchedAlias<WriteFLDC, Zn2WriteFPU3>;
901
902// FNSTSW.
903// AX.
904def : InstRW<[WriteMicrocoded], (instrs FNSTSW16r)>;
905
906// m16.
907def : InstRW<[WriteMicrocoded], (instrs FNSTSWm)>;
908
909// FLDCW.
910def : InstRW<[WriteMicrocoded], (instrs FLDCW16m)>;
911
912// FNSTCW.
913def : InstRW<[WriteMicrocoded], (instrs FNSTCW16m)>;
914
915// FINCSTP FDECSTP.
916def : InstRW<[Zn2WriteFPU3], (instrs FINCSTP, FDECSTP)>;
917
918// FFREE.
919def : InstRW<[Zn2WriteFPU3], (instregex "FFREE")>;
920
921// FNSAVE.
922def : InstRW<[WriteMicrocoded], (instregex "FSAVEm")>;
923
924// FRSTOR.
925def : InstRW<[WriteMicrocoded], (instregex "FRSTORm")>;
926
927//-- Arithmetic instructions --//
928
929def Zn2WriteFPU3Lat1 : SchedWriteRes<[Zn2FPU3]> ;
930
931def Zn2WriteFPU0Lat1 : SchedWriteRes<[Zn2FPU0]> ;
932
933def Zn2WriteFPU0Lat1Ld : SchedWriteRes<[Zn2AGU, Zn2FPU0]> {
934  let Latency = 8;
935}
936
937// FCHS.
938def : InstRW<[Zn2WriteFPU3Lat1], (instregex "CHS_F")>;
939
940// FCOM(P) FUCOM(P).
941// r.
942def : InstRW<[Zn2WriteFPU0Lat1], (instregex "COM(P?)_FST0r", "UCOM_F(P?)r")>;
943// m.
944def : InstRW<[Zn2WriteFPU0Lat1Ld], (instregex "FCOM(P?)(32|64)m")>;
945
946// FCOMPP FUCOMPP.
947// r.
948def : InstRW<[Zn2WriteFPU0Lat1], (instrs FCOMPP, UCOM_FPPr)>;
949
950def Zn2WriteFPU02 : SchedWriteRes<[Zn2AGU, Zn2FPU02]>
951{
952  let Latency = 9;
953}
954
955// FCOMI(P) FUCOMI(P).
956// m.
957def : InstRW<[Zn2WriteFPU02], (instrs COM_FIPr, COM_FIr, UCOM_FIPr, UCOM_FIr)>;
958
959def Zn2WriteFPU03 : SchedWriteRes<[Zn2AGU, Zn2FPU03]>
960{
961  let Latency = 12;
962  let NumMicroOps = 2;
963  let ResourceCycles = [1,3];
964}
965
966// FICOM(P).
967def : InstRW<[Zn2WriteFPU03], (instregex "FICOM(P?)(16|32)m")>;
968
969// FTST.
970def : InstRW<[Zn2WriteFPU0Lat1], (instregex "TST_F")>;
971
972// FXAM.
973def : InstRW<[Zn2WriteFPU3Lat1], (instrs XAM_F)>;
974
975// FPREM.
976def : InstRW<[WriteMicrocoded], (instrs FPREM)>;
977
978// FPREM1.
979def : InstRW<[WriteMicrocoded], (instrs FPREM1)>;
980
981// FRNDINT.
982def : InstRW<[WriteMicrocoded], (instrs FRNDINT)>;
983
984// FSCALE.
985def : InstRW<[WriteMicrocoded], (instrs FSCALE)>;
986
987// FXTRACT.
988def : InstRW<[WriteMicrocoded], (instrs FXTRACT)>;
989
990// FNOP.
991def : InstRW<[Zn2WriteFPU0Lat1], (instrs FNOP)>;
992
993// WAIT.
994def : InstRW<[Zn2WriteFPU0Lat1], (instrs WAIT)>;
995
996// FNCLEX.
997def : InstRW<[WriteMicrocoded], (instrs FNCLEX)>;
998
999// FNINIT.
1000def : InstRW<[WriteMicrocoded], (instrs FNINIT)>;
1001
1002//=== Integer MMX and XMM Instructions ===//
1003
1004// PACKSSWB/DW.
1005// mm <- mm.
1006def Zn2WriteFPU12 : SchedWriteRes<[Zn2FPU12]> ;
1007def Zn2WriteFPU12Y : SchedWriteRes<[Zn2FPU12]> {
1008  let Latency = 4;
1009  let NumMicroOps = 2;
1010}
1011def Zn2WriteFPU12m : SchedWriteRes<[Zn2AGU, Zn2FPU12]> ;
1012def Zn2WriteFPU12Ym : SchedWriteRes<[Zn2AGU, Zn2FPU12]> {
1013  let Latency = 8;
1014  let NumMicroOps = 2;
1015}
1016
1017def : InstRW<[Zn2WriteFPU12], (instrs MMX_PACKSSDWrr,
1018                                     MMX_PACKSSWBrr,
1019                                     MMX_PACKUSWBrr)>;
1020def : InstRW<[Zn2WriteFPU12m], (instrs MMX_PACKSSDWrm,
1021                                      MMX_PACKSSWBrm,
1022                                      MMX_PACKUSWBrm)>;
1023
1024def Zn2WriteFPU013 : SchedWriteRes<[Zn2FPU013]> ;
1025def Zn2WriteFPU013Y : SchedWriteRes<[Zn2FPU013]> ;
1026def Zn2WriteFPU013m : SchedWriteRes<[Zn2AGU, Zn2FPU013]> {
1027  let Latency = 8;
1028  let NumMicroOps = 2;
1029}
1030def Zn2WriteFPU013Ld : SchedWriteRes<[Zn2AGU, Zn2FPU013]> {
1031  let Latency = 8;
1032  let NumMicroOps = 2;
1033}
1034def Zn2WriteFPU013LdY : SchedWriteRes<[Zn2AGU, Zn2FPU013]> {
1035  let Latency = 8;
1036  let NumMicroOps = 2;
1037}
1038
1039// PBLENDW.
1040// x,x,i / v,v,v,i
1041def : InstRW<[Zn2WriteFPU013], (instregex "(V?)PBLENDWrri")>;
1042// ymm
1043def : InstRW<[Zn2WriteFPU013Y], (instrs VPBLENDWYrri)>;
1044
1045// x,m,i / v,v,m,i
1046def : InstRW<[Zn2WriteFPU013Ld], (instregex "(V?)PBLENDWrmi")>;
1047// y,m,i
1048def : InstRW<[Zn2WriteFPU013LdY], (instrs VPBLENDWYrmi)>;
1049
1050def Zn2WriteFPU01 : SchedWriteRes<[Zn2FPU01]> ;
1051def Zn2WriteFPU01Y : SchedWriteRes<[Zn2FPU01]> {
1052  let NumMicroOps = 2;
1053}
1054
1055// VPBLENDD.
1056// v,v,v,i.
1057def : InstRW<[Zn2WriteFPU01], (instrs VPBLENDDrri)>;
1058// ymm
1059def : InstRW<[Zn2WriteFPU01Y], (instrs VPBLENDDYrri)>;
1060
1061// v,v,m,i
1062def Zn2WriteFPU01Op2 : SchedWriteRes<[Zn2AGU, Zn2FPU01]> {
1063  let NumMicroOps = 2;
1064  let Latency = 8;
1065  let ResourceCycles = [1, 2];
1066}
1067def Zn2WriteFPU01Op2Y : SchedWriteRes<[Zn2AGU, Zn2FPU01]> {
1068  let NumMicroOps = 2;
1069  let Latency = 9;
1070  let ResourceCycles = [1, 3];
1071}
1072def : InstRW<[Zn2WriteFPU01Op2], (instrs VPBLENDDrmi)>;
1073def : InstRW<[Zn2WriteFPU01Op2Y], (instrs VPBLENDDYrmi)>;
1074
1075// MASKMOVQ.
1076def : InstRW<[WriteMicrocoded], (instregex "MMX_MASKMOVQ(64)?")>;
1077
1078// MASKMOVDQU.
1079def : InstRW<[WriteMicrocoded], (instregex "(V?)MASKMOVDQU(64)?")>;
1080
1081// VPMASKMOVD.
1082// ymm
1083def : InstRW<[WriteMicrocoded],
1084                               (instregex "VPMASKMOVD(Y?)rm")>;
1085// m, v,v.
1086def : InstRW<[WriteMicrocoded], (instregex "VPMASKMOV(D|Q)(Y?)mr")>;
1087
1088// VPBROADCAST B/W.
1089// x, m8/16.
1090def Zn2WriteVPBROADCAST128Ld : SchedWriteRes<[Zn2AGU, Zn2FPU12]> {
1091  let Latency = 8;
1092  let NumMicroOps = 2;
1093  let ResourceCycles = [1, 2];
1094}
1095def : InstRW<[Zn2WriteVPBROADCAST128Ld],
1096                                     (instregex "VPBROADCAST(B|W)rm")>;
1097
1098// y, m8/16
1099def Zn2WriteVPBROADCAST256Ld : SchedWriteRes<[Zn2AGU, Zn2FPU1]> {
1100  let Latency = 8;
1101  let NumMicroOps = 2;
1102  let ResourceCycles = [1, 2];
1103}
1104def : InstRW<[Zn2WriteVPBROADCAST256Ld],
1105                                     (instregex "VPBROADCAST(B|W)Yrm")>;
1106
1107// VPGATHER.
1108def : InstRW<[WriteMicrocoded], (instregex "VPGATHER(Q|D)(Q|D)(Y?)rm")>;
1109
1110//-- Arithmetic instructions --//
1111
1112// PCMPGTQ.
1113def Zn2WritePCMPGTQr : SchedWriteRes<[Zn2FPU03]>;
1114def : InstRW<[Zn2WritePCMPGTQr], (instregex "(V?)PCMPGTQ(Y?)rr")>;
1115
1116// x <- x,m.
1117def Zn2WritePCMPGTQm : SchedWriteRes<[Zn2AGU, Zn2FPU03]> {
1118  let Latency = 8;
1119}
1120// ymm.
1121def Zn2WritePCMPGTQYm : SchedWriteRes<[Zn2AGU, Zn2FPU03]> {
1122  let Latency = 8;
1123}
1124def : InstRW<[Zn2WritePCMPGTQm], (instregex "(V?)PCMPGTQrm")>;
1125def : InstRW<[Zn2WritePCMPGTQYm], (instrs VPCMPGTQYrm)>;
1126
1127//-- Logic instructions --//
1128
1129// PSLL,PSRL,PSRA W/D/Q.
1130// x,x / v,v,x.
1131def Zn2WritePShift  : SchedWriteRes<[Zn2FPU2]> {
1132  let Latency = 3;
1133}
1134def Zn2WritePShiftY : SchedWriteRes<[Zn2FPU2]> {
1135  let Latency = 3;
1136}
1137
1138// PSLL,PSRL DQ.
1139def : InstRW<[Zn2WritePShift], (instregex "(V?)PS(R|L)LDQri")>;
1140def : InstRW<[Zn2WritePShiftY], (instregex "(V?)PS(R|L)LDQYri")>;
1141
1142//=== Floating Point XMM and YMM Instructions ===//
1143//-- Move instructions --//
1144
1145// VPERM2F128.
1146def : InstRW<[WriteMicrocoded], (instrs VPERM2F128rr)>;
1147def : InstRW<[WriteMicrocoded], (instrs VPERM2F128rm)>;
1148
1149def Zn2WriteBROADCAST : SchedWriteRes<[Zn2AGU, Zn2FPU13]> {
1150  let NumMicroOps = 2;
1151  let Latency = 8;
1152}
1153// VBROADCASTF128.
1154def : InstRW<[Zn2WriteBROADCAST], (instrs VBROADCASTF128)>;
1155
1156// EXTRACTPS.
1157// r32,x,i.
1158def Zn2WriteEXTRACTPSr : SchedWriteRes<[Zn2FPU12, Zn2FPU2]> {
1159  let Latency = 2;
1160  let ResourceCycles = [1, 2];
1161}
1162def : InstRW<[Zn2WriteEXTRACTPSr], (instregex "(V?)EXTRACTPSrr")>;
1163
1164def Zn2WriteEXTRACTPSm : SchedWriteRes<[Zn2AGU,Zn2FPU12, Zn2FPU2]> {
1165  let Latency = 5;
1166  let NumMicroOps = 2;
1167  let ResourceCycles = [5, 1, 2];
1168}
1169// m32,x,i.
1170def : InstRW<[Zn2WriteEXTRACTPSm], (instregex "(V?)EXTRACTPSmr")>;
1171
1172// VEXTRACTF128.
1173// x,y,i.
1174def : InstRW<[Zn2WriteFPU013], (instrs VEXTRACTF128rr)>;
1175
1176// m128,y,i.
1177def : InstRW<[Zn2WriteFPU013m], (instrs VEXTRACTF128mr)>;
1178
1179def Zn2WriteVINSERT128r: SchedWriteRes<[Zn2FPU013]> {
1180  let Latency = 2;
1181//  let ResourceCycles = [2];
1182}
1183def Zn2WriteVINSERT128Ld: SchedWriteRes<[Zn2AGU,Zn2FPU013]> {
1184  let Latency = 9;
1185  let NumMicroOps = 2;
1186}
1187// VINSERTF128.
1188// y,y,x,i.
1189def : InstRW<[Zn2WriteVINSERT128r], (instrs VINSERTF128rr)>;
1190def : InstRW<[Zn2WriteVINSERT128Ld], (instrs VINSERTF128rm)>;
1191
1192// VGATHER.
1193def : InstRW<[WriteMicrocoded], (instregex "VGATHER(Q|D)(PD|PS)(Y?)rm")>;
1194
1195//-- Conversion instructions --//
1196def Zn2WriteCVTPD2PSr: SchedWriteRes<[Zn2FPU3]> {
1197  let Latency = 3;
1198}
1199def Zn2WriteCVTPD2PSYr: SchedWriteRes<[Zn2FPU3]> {
1200  let Latency = 3;
1201}
1202
1203// CVTPD2PS.
1204// x,x.
1205def : SchedAlias<WriteCvtPD2PS,  Zn2WriteCVTPD2PSr>;
1206// y,y.
1207def : SchedAlias<WriteCvtPD2PSY, Zn2WriteCVTPD2PSYr>;
1208// z,z.
1209defm : X86WriteResUnsupported<WriteCvtPD2PSZ>;
1210
1211def Zn2WriteCVTPD2PSLd: SchedWriteRes<[Zn2AGU,Zn2FPU03]> {
1212  let Latency = 10;
1213  let NumMicroOps = 2;
1214}
1215// x,m128.
1216def : SchedAlias<WriteCvtPD2PSLd, Zn2WriteCVTPD2PSLd>;
1217
1218// x,m256.
1219def Zn2WriteCVTPD2PSYLd : SchedWriteRes<[Zn2AGU, Zn2FPU3]> {
1220  let Latency = 10;
1221}
1222def : SchedAlias<WriteCvtPD2PSYLd, Zn2WriteCVTPD2PSYLd>;
1223// z,m512
1224defm : X86WriteResUnsupported<WriteCvtPD2PSZLd>;
1225
1226// CVTSD2SS.
1227// x,x.
1228// Same as WriteCVTPD2PSr
1229def : SchedAlias<WriteCvtSD2SS, Zn2WriteCVTPD2PSr>;
1230
1231// x,m64.
1232def : SchedAlias<WriteCvtSD2SSLd, Zn2WriteCVTPD2PSLd>;
1233
1234// CVTPS2PD.
1235// x,x.
1236def Zn2WriteCVTPS2PDr : SchedWriteRes<[Zn2FPU3]> {
1237  let Latency = 3;
1238}
1239def : SchedAlias<WriteCvtPS2PD, Zn2WriteCVTPS2PDr>;
1240
1241// x,m64.
1242// y,m128.
1243def Zn2WriteCVTPS2PDLd : SchedWriteRes<[Zn2AGU, Zn2FPU3]> {
1244  let Latency = 10;
1245  let NumMicroOps = 2;
1246}
1247def : SchedAlias<WriteCvtPS2PDLd, Zn2WriteCVTPS2PDLd>;
1248def : SchedAlias<WriteCvtPS2PDYLd, Zn2WriteCVTPS2PDLd>;
1249defm : X86WriteResUnsupported<WriteCvtPS2PDZLd>;
1250
1251// y,x.
1252def Zn2WriteVCVTPS2PDY : SchedWriteRes<[Zn2FPU3]> {
1253  let Latency = 3;
1254}
1255def : SchedAlias<WriteCvtPS2PDY, Zn2WriteVCVTPS2PDY>;
1256defm : X86WriteResUnsupported<WriteCvtPS2PDZ>;
1257
1258// CVTSS2SD.
1259// x,x.
1260def Zn2WriteCVTSS2SDr : SchedWriteRes<[Zn2FPU3]> {
1261  let Latency = 3;
1262}
1263def : SchedAlias<WriteCvtSS2SD, Zn2WriteCVTSS2SDr>;
1264
1265// x,m32.
1266def Zn2WriteCVTSS2SDLd : SchedWriteRes<[Zn2AGU, Zn2FPU3]> {
1267  let Latency = 10;
1268  let NumMicroOps = 2;
1269  let ResourceCycles = [1, 2];
1270}
1271def : SchedAlias<WriteCvtSS2SDLd, Zn2WriteCVTSS2SDLd>;
1272
1273def Zn2WriteCVTDQ2PDr: SchedWriteRes<[Zn2FPU12,Zn2FPU3]> {
1274  let Latency = 3;
1275}
1276// CVTDQ2PD.
1277// x,x.
1278def : InstRW<[Zn2WriteCVTDQ2PDr], (instregex "(V)?CVTDQ2P(D|S)rr")>;
1279
1280// Same as xmm
1281// y,x.
1282def : InstRW<[Zn2WriteCVTDQ2PDr], (instrs VCVTDQ2PDYrr)>;
1283def : InstRW<[Zn2WriteCVTDQ2PDr], (instrs VCVTDQ2PSYrr)>;
1284
1285def Zn2WriteCVTPD2DQr: SchedWriteRes<[Zn2FPU12, Zn2FPU3]> {
1286  let Latency = 3;
1287}
1288// CVT(T)P(D|S)2DQ.
1289// x,x.
1290def : InstRW<[Zn2WriteCVTPD2DQr], (instregex "(V?)CVT(T?)P(D|S)2DQrr")>;
1291
1292def Zn2WriteCVTPD2DQLd: SchedWriteRes<[Zn2AGU,Zn2FPU12,Zn2FPU3]> {
1293  let Latency = 10;
1294  let NumMicroOps = 2;
1295}
1296// x,m128.
1297def : InstRW<[Zn2WriteCVTPD2DQLd], (instregex "(V?)CVT(T?)PD2DQrm")>;
1298// same as xmm handling
1299// x,y.
1300def : InstRW<[Zn2WriteCVTPD2DQr], (instregex "VCVT(T?)PD2DQYrr")>;
1301// x,m256.
1302def : InstRW<[Zn2WriteCVTPD2DQLd], (instregex "VCVT(T?)PD2DQYrm")>;
1303
1304def Zn2WriteCVTPS2PIr: SchedWriteRes<[Zn2FPU3]> {
1305  let Latency = 4;
1306}
1307// CVT(T)PS2PI.
1308// mm,x.
1309def : InstRW<[Zn2WriteCVTPS2PIr], (instregex "MMX_CVT(T?)PS2PIrr")>;
1310
1311// CVTPI2PD.
1312// x,mm.
1313def : InstRW<[Zn2WriteCVTPS2PDr], (instrs MMX_CVTPI2PDrr)>;
1314
1315// CVT(T)PD2PI.
1316// mm,x.
1317def : InstRW<[Zn2WriteCVTPS2PIr], (instregex "MMX_CVT(T?)PD2PIrr")>;
1318
1319def Zn2WriteCVSTSI2SSr: SchedWriteRes<[Zn2FPU3]> {
1320  let Latency = 3;
1321}
1322
1323// same as CVTPD2DQr
1324// CVT(T)SS2SI.
1325// r32,x.
1326def : InstRW<[Zn2WriteCVTPD2DQr], (instregex "(V?)CVT(T?)SS2SI(64)?rr")>;
1327// same as CVTPD2DQm
1328// r32,m32.
1329def : InstRW<[Zn2WriteCVTPD2DQLd], (instregex "(V?)CVT(T?)SS2SI(64)?rm")>;
1330
1331def Zn2WriteCVSTSI2SDr: SchedWriteRes<[Zn2FPU013, Zn2FPU3]> {
1332  let Latency = 3;
1333}
1334// CVTSI2SD.
1335// x,r32/64.
1336def : InstRW<[Zn2WriteCVSTSI2SDr], (instregex "(V?)CVTSI(64)?2SDrr")>;
1337
1338
1339def Zn2WriteCVSTSI2SIr: SchedWriteRes<[Zn2FPU3, Zn2FPU2]> {
1340  let Latency = 4;
1341}
1342def Zn2WriteCVSTSI2SILd: SchedWriteRes<[Zn2AGU, Zn2FPU3, Zn2FPU2]> {
1343  let Latency = 11;
1344}
1345// CVTSD2SI.
1346// r32/64
1347def : InstRW<[Zn2WriteCVSTSI2SIr], (instregex "(V?)CVT(T?)SD2SI(64)?rr")>;
1348// r32,m32.
1349def : InstRW<[Zn2WriteCVSTSI2SILd], (instregex "(V?)CVT(T?)SD2SI(64)?rm")>;
1350
1351// VCVTPS2PH.
1352// x,v,i.
1353def : SchedAlias<WriteCvtPS2PH,    Zn2WriteMicrocoded>;
1354def : SchedAlias<WriteCvtPS2PHY,   Zn2WriteMicrocoded>;
1355defm : X86WriteResUnsupported<WriteCvtPS2PHZ>;
1356// m,v,i.
1357def : SchedAlias<WriteCvtPS2PHSt,  Zn2WriteMicrocoded>;
1358def : SchedAlias<WriteCvtPS2PHYSt, Zn2WriteMicrocoded>;
1359defm : X86WriteResUnsupported<WriteCvtPS2PHZSt>;
1360
1361// VCVTPH2PS.
1362// v,x.
1363def : SchedAlias<WriteCvtPH2PS,    Zn2WriteMicrocoded>;
1364def : SchedAlias<WriteCvtPH2PSY,   Zn2WriteMicrocoded>;
1365defm : X86WriteResUnsupported<WriteCvtPH2PSZ>;
1366// v,m.
1367def : SchedAlias<WriteCvtPH2PSLd,  Zn2WriteMicrocoded>;
1368def : SchedAlias<WriteCvtPH2PSYLd, Zn2WriteMicrocoded>;
1369defm : X86WriteResUnsupported<WriteCvtPH2PSZLd>;
1370
1371//-- SSE4A instructions --//
1372// EXTRQ
1373def Zn2WriteEXTRQ: SchedWriteRes<[Zn2FPU12, Zn2FPU2]> {
1374  let Latency = 3;
1375}
1376def : InstRW<[Zn2WriteEXTRQ], (instregex "EXTRQ")>;
1377
1378// INSERTQ
1379def Zn2WriteINSERTQ: SchedWriteRes<[Zn2FPU03,Zn2FPU1]> {
1380  let Latency = 4;
1381}
1382def : InstRW<[Zn2WriteINSERTQ], (instregex "INSERTQ")>;
1383
1384//-- SHA instructions --//
1385// SHA256MSG2
1386def : InstRW<[WriteMicrocoded], (instregex "SHA256MSG2(Y?)r(r|m)")>;
1387
1388// SHA1MSG1, SHA256MSG1
1389// x,x.
1390def Zn2WriteSHA1MSG1r : SchedWriteRes<[Zn2FPU12]> {
1391  let Latency = 2;
1392}
1393def : InstRW<[Zn2WriteSHA1MSG1r], (instregex "SHA(1|256)MSG1rr")>;
1394// x,m.
1395def Zn2WriteSHA1MSG1Ld : SchedWriteRes<[Zn2AGU, Zn2FPU12]> {
1396  let Latency = 9;
1397}
1398def : InstRW<[Zn2WriteSHA1MSG1Ld], (instregex "SHA(1|256)MSG1rm")>;
1399
1400// SHA1MSG2
1401// x,x.
1402def Zn2WriteSHA1MSG2r : SchedWriteRes<[Zn2FPU12]> ;
1403def : InstRW<[Zn2WriteSHA1MSG2r], (instregex "SHA1MSG2rr")>;
1404// x,m.
1405def Zn2WriteSHA1MSG2Ld : SchedWriteRes<[Zn2AGU, Zn2FPU12]> {
1406  let Latency = 8;
1407}
1408def : InstRW<[Zn2WriteSHA1MSG2Ld], (instregex "SHA1MSG2rm")>;
1409
1410// SHA1NEXTE
1411// x,x.
1412def Zn2WriteSHA1NEXTEr : SchedWriteRes<[Zn2FPU1]> ;
1413def : InstRW<[Zn2WriteSHA1NEXTEr], (instregex "SHA1NEXTErr")>;
1414// x,m.
1415def Zn2WriteSHA1NEXTELd : SchedWriteRes<[Zn2AGU, Zn2FPU1]> {
1416  let Latency = 8;
1417}
1418def : InstRW<[Zn2WriteSHA1NEXTELd], (instregex "SHA1NEXTErm")>;
1419
1420// SHA1RNDS4
1421// x,x.
1422def Zn2WriteSHA1RNDS4r : SchedWriteRes<[Zn2FPU1]> {
1423  let Latency = 6;
1424}
1425def : InstRW<[Zn2WriteSHA1RNDS4r], (instregex "SHA1RNDS4rr")>;
1426// x,m.
1427def Zn2WriteSHA1RNDS4Ld : SchedWriteRes<[Zn2AGU, Zn2FPU1]> {
1428  let Latency = 13;
1429}
1430def : InstRW<[Zn2WriteSHA1RNDS4Ld], (instregex "SHA1RNDS4rm")>;
1431
1432// SHA256RNDS2
1433// x,x.
1434def Zn2WriteSHA256RNDS2r : SchedWriteRes<[Zn2FPU1]> {
1435  let Latency = 4;
1436}
1437def : InstRW<[Zn2WriteSHA256RNDS2r], (instregex "SHA256RNDS2rr")>;
1438// x,m.
1439def Zn2WriteSHA256RNDS2Ld : SchedWriteRes<[Zn2AGU, Zn2FPU1]> {
1440  let Latency = 11;
1441}
1442def : InstRW<[Zn2WriteSHA256RNDS2Ld], (instregex "SHA256RNDS2rm")>;
1443
1444//-- Arithmetic instructions --//
1445
1446// VDIVPS.
1447// TODO - convert to Zn2WriteResFpuPair
1448// y,y,y.
1449def Zn2WriteVDIVPSYr : SchedWriteRes<[Zn2FPU3]> {
1450  let Latency = 10;
1451  let ResourceCycles = [10];
1452}
1453def : SchedAlias<WriteFDivY,   Zn2WriteVDIVPSYr>;
1454
1455// y,y,m256.
1456def Zn2WriteVDIVPSYLd : SchedWriteRes<[Zn2AGU, Zn2FPU3]> {
1457  let Latency = 17;
1458  let NumMicroOps = 2;
1459  let ResourceCycles = [1, 17];
1460}
1461def : SchedAlias<WriteFDivYLd,  Zn2WriteVDIVPSYLd>;
1462
1463// VDIVPD.
1464// TODO - convert to Zn2WriteResFpuPair
1465// y,y,y.
1466def Zn2WriteVDIVPDY : SchedWriteRes<[Zn2FPU3]> {
1467  let Latency = 13;
1468  let ResourceCycles = [13];
1469}
1470def : SchedAlias<WriteFDiv64Y, Zn2WriteVDIVPDY>;
1471
1472// y,y,m256.
1473def Zn2WriteVDIVPDYLd : SchedWriteRes<[Zn2AGU, Zn2FPU3]> {
1474  let Latency = 20;
1475  let NumMicroOps = 2;
1476  let ResourceCycles = [1,20];
1477}
1478def : SchedAlias<WriteFDiv64YLd, Zn2WriteVDIVPDYLd>;
1479
1480// DPPS.
1481// x,x,i / v,v,v,i.
1482def : SchedAlias<WriteDPPSY,  Zn2WriteMicrocoded>;
1483
1484// x,m,i / v,v,m,i.
1485def : SchedAlias<WriteDPPSYLd,Zn2WriteMicrocoded>;
1486
1487// DPPD.
1488// x,x,i.
1489def : SchedAlias<WriteDPPD,   Zn2WriteMicrocoded>;
1490
1491// x,m,i.
1492def : SchedAlias<WriteDPPDLd, Zn2WriteMicrocoded>;
1493
1494// RSQRTSS
1495// TODO - convert to Zn2WriteResFpuPair
1496// x,x.
1497def Zn2WriteRSQRTSSr : SchedWriteRes<[Zn2FPU02]> {
1498  let Latency = 5;
1499}
1500def : SchedAlias<WriteFRsqrt, Zn2WriteRSQRTSSr>;
1501
1502// x,m128.
1503def Zn2WriteRSQRTSSLd: SchedWriteRes<[Zn2AGU, Zn2FPU02]> {
1504  let Latency = 12;
1505  let NumMicroOps = 2;
1506  let ResourceCycles = [1,2];
1507}
1508def : SchedAlias<WriteFRsqrtLd, Zn2WriteRSQRTSSLd>;
1509
1510// RSQRTPS
1511// TODO - convert to Zn2WriteResFpuPair
1512// y,y.
1513def Zn2WriteRSQRTPSYr : SchedWriteRes<[Zn2FPU01]> {
1514  let Latency = 5;
1515  let NumMicroOps = 2;
1516  let ResourceCycles = [2];
1517}
1518def : SchedAlias<WriteFRsqrtY, Zn2WriteRSQRTPSYr>;
1519
1520// y,m256.
1521def Zn2WriteRSQRTPSYLd : SchedWriteRes<[Zn2AGU, Zn2FPU01]> {
1522  let Latency = 12;
1523  let NumMicroOps = 2;
1524}
1525def : SchedAlias<WriteFRsqrtYLd, Zn2WriteRSQRTPSYLd>;
1526
1527//-- Other instructions --//
1528
1529// VZEROUPPER.
1530def : InstRW<[WriteALU], (instrs VZEROUPPER)>;
1531
1532// VZEROALL.
1533def : InstRW<[WriteMicrocoded], (instrs VZEROALL)>;
1534
1535///////////////////////////////////////////////////////////////////////////////
1536// Dependency breaking instructions.
1537///////////////////////////////////////////////////////////////////////////////
1538
1539def : IsZeroIdiomFunction<[
1540  // GPR Zero-idioms.
1541  DepBreakingClass<[
1542    SUB32rr, SUB64rr,
1543    XOR32rr, XOR64rr
1544  ], ZeroIdiomPredicate>,
1545
1546  // MMX Zero-idioms.
1547  DepBreakingClass<[
1548    MMX_PXORrr, MMX_PANDNrr, MMX_PSUBBrr,
1549    MMX_PSUBDrr, MMX_PSUBQrr, MMX_PSUBWrr,
1550    MMX_PSUBSBrr, MMX_PSUBSWrr, MMX_PSUBUSBrr, MMX_PSUBUSWrr,
1551    MMX_PCMPGTBrr, MMX_PCMPGTDrr, MMX_PCMPGTWrr
1552  ], ZeroIdiomPredicate>,
1553
1554  // SSE Zero-idioms.
1555  DepBreakingClass<[
1556    // fp variants.
1557    XORPSrr, XORPDrr, ANDNPSrr, ANDNPDrr,
1558
1559    // int variants.
1560    PXORrr, PANDNrr,
1561    PSUBBrr, PSUBWrr, PSUBDrr, PSUBQrr,
1562    PCMPGTBrr, PCMPGTDrr, PCMPGTQrr, PCMPGTWrr
1563  ], ZeroIdiomPredicate>,
1564
1565  // AVX XMM Zero-idioms.
1566  DepBreakingClass<[
1567    // fp variants.
1568    VXORPSrr, VXORPDrr, VANDNPSrr, VANDNPDrr,
1569
1570    // int variants.
1571    VPXORrr, VPANDNrr,
1572    VPSUBBrr, VPSUBWrr, VPSUBDrr, VPSUBQrr,
1573    VPCMPGTBrr, VPCMPGTWrr, VPCMPGTDrr, VPCMPGTQrr
1574  ], ZeroIdiomPredicate>,
1575
1576  // AVX YMM Zero-idioms.
1577  DepBreakingClass<[
1578    // fp variants
1579    VXORPSYrr, VXORPDYrr, VANDNPSYrr, VANDNPDYrr,
1580
1581    // int variants
1582    VPXORYrr, VPANDNYrr,
1583    VPSUBBYrr, VPSUBWYrr, VPSUBDYrr, VPSUBQYrr,
1584    VPCMPGTBYrr, VPCMPGTWYrr, VPCMPGTDYrr, VPCMPGTQYrr
1585  ], ZeroIdiomPredicate>
1586]>;
1587
1588def : IsDepBreakingFunction<[
1589  // GPR
1590  DepBreakingClass<[ SBB32rr, SBB64rr ], ZeroIdiomPredicate>,
1591  DepBreakingClass<[ CMP32rr, CMP64rr ], CheckSameRegOperand<0, 1> >,
1592
1593  // MMX
1594  DepBreakingClass<[
1595    MMX_PCMPEQBrr, MMX_PCMPEQWrr, MMX_PCMPEQDrr
1596  ], ZeroIdiomPredicate>,
1597
1598  // SSE
1599  DepBreakingClass<[
1600    PCMPEQBrr, PCMPEQWrr, PCMPEQDrr, PCMPEQQrr
1601  ], ZeroIdiomPredicate>,
1602
1603  // AVX XMM
1604  DepBreakingClass<[
1605    VPCMPEQBrr, VPCMPEQWrr, VPCMPEQDrr, VPCMPEQQrr
1606  ], ZeroIdiomPredicate>,
1607
1608  // AVX YMM
1609  DepBreakingClass<[
1610    VPCMPEQBYrr, VPCMPEQWYrr, VPCMPEQDYrr, VPCMPEQQYrr
1611  ], ZeroIdiomPredicate>,
1612]>;
1613
1614} // SchedModel
1615