xref: /freebsd/contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleZnver1.td (revision 357378bbdedf24ce2b90e9bd831af4a9db3ec70a)
1//=- X86ScheduleZnver1.td - X86 Znver1 Scheduling -------------*- tablegen -*-=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the machine model for Znver1 to support instruction
10// scheduling and other instruction cost heuristics.
11//
12//===----------------------------------------------------------------------===//
13
14def Znver1Model : SchedMachineModel {
15  // Zen can decode 4 instructions per cycle.
16  let IssueWidth = 4;
17  // Based on the reorder buffer we define MicroOpBufferSize
18  let MicroOpBufferSize = 192;
19  let LoadLatency = 4;
20  let MispredictPenalty = 17;
21  let HighLatency = 25;
22  let PostRAScheduler = 1;
23
24  // FIXME: This variable is required for incomplete model.
25  // We haven't catered all instructions.
26  // So, we reset the value of this variable so as to
27  // say that the model is incomplete.
28  let CompleteModel = 0;
29}
30
31let SchedModel = Znver1Model in {
32
33// Zen can issue micro-ops to 10 different units in one cycle.
34// These are
35//  * Four integer ALU units (ZALU0, ZALU1, ZALU2, ZALU3)
36//  * Two AGU units (ZAGU0, ZAGU1)
37//  * Four FPU units (ZFPU0, ZFPU1, ZFPU2, ZFPU3)
38// AGUs feed load store queues @two loads and 1 store per cycle.
39
40// Four ALU units are defined below
41def ZnALU0 : ProcResource<1>;
42def ZnALU1 : ProcResource<1>;
43def ZnALU2 : ProcResource<1>;
44def ZnALU3 : ProcResource<1>;
45
46// Two AGU units are defined below
47def ZnAGU0 : ProcResource<1>;
48def ZnAGU1 : ProcResource<1>;
49
50// Four FPU units are defined below
51def ZnFPU0 : ProcResource<1>;
52def ZnFPU1 : ProcResource<1>;
53def ZnFPU2 : ProcResource<1>;
54def ZnFPU3 : ProcResource<1>;
55
56// FPU grouping
57def ZnFPU013  : ProcResGroup<[ZnFPU0, ZnFPU1, ZnFPU3]>;
58def ZnFPU01   : ProcResGroup<[ZnFPU0, ZnFPU1]>;
59def ZnFPU12   : ProcResGroup<[ZnFPU1, ZnFPU2]>;
60def ZnFPU13   : ProcResGroup<[ZnFPU1, ZnFPU3]>;
61def ZnFPU23   : ProcResGroup<[ZnFPU2, ZnFPU3]>;
62def ZnFPU02   : ProcResGroup<[ZnFPU0, ZnFPU2]>;
63def ZnFPU03   : ProcResGroup<[ZnFPU0, ZnFPU3]>;
64
65// Below are the grouping of the units.
66// Micro-ops to be issued to multiple units are tackled this way.
67
68// ALU grouping
69// ZnALU03 - 0,3 grouping
70def ZnALU03: ProcResGroup<[ZnALU0, ZnALU3]>;
71
72// 56 Entry (14x4 entries) Int Scheduler
73def ZnALU : ProcResGroup<[ZnALU0, ZnALU1, ZnALU2, ZnALU3]> {
74  let BufferSize=56;
75}
76
77// 28 Entry (14x2) AGU group. AGUs can't be used for all ALU operations
78// but are relevant for some instructions
79def ZnAGU : ProcResGroup<[ZnAGU0, ZnAGU1]> {
80  let BufferSize=28;
81}
82
83// Integer Multiplication issued on ALU1.
84def ZnMultiplier : ProcResource<1>;
85
86// Integer division issued on ALU2.
87def ZnDivider : ProcResource<1>;
88
89// 4 Cycles integer load-to use Latency is captured
90def : ReadAdvance<ReadAfterLd, 4>;
91
92// 8 Cycles vector load-to use Latency is captured
93def : ReadAdvance<ReadAfterVecLd, 8>;
94def : ReadAdvance<ReadAfterVecXLd, 8>;
95def : ReadAdvance<ReadAfterVecYLd, 8>;
96
97def : ReadAdvance<ReadInt2Fpu, 0>;
98
99// The Integer PRF for Zen is 168 entries, and it holds the architectural and
100// speculative version of the 64-bit integer registers.
101// Reference: "Software Optimization Guide for AMD Family 17h Processors"
102def ZnIntegerPRF : RegisterFile<168, [GR64, CCR]>;
103
104// 36 Entry (9x4 entries) floating-point Scheduler
105def ZnFPU     : ProcResGroup<[ZnFPU0, ZnFPU1, ZnFPU2, ZnFPU3]> {
106let BufferSize=36;
107}
108
109// The Zen FP Retire Queue renames SIMD and FP uOps onto a pool of 160 128-bit
110// registers. Operations on 256-bit data types are cracked into two COPs.
111// Reference: "Software Optimization Guide for AMD Family 17h Processors"
112def ZnFpuPRF: RegisterFile<160, [VR64, VR128, VR256], [1, 1, 2]>;
113
114// The unit can track up to 192 macro ops in-flight.
115// The retire unit handles in-order commit of up to 8 macro ops per cycle.
116// Reference: "Software Optimization Guide for AMD Family 17h Processors"
117// To be noted, the retire unit is shared between integer and FP ops.
118// In SMT mode it is 96 entry per thread. But, we do not use the conservative
119// value here because there is currently no way to fully mode the SMT mode,
120// so there is no point in trying.
121def ZnRCU : RetireControlUnit<192, 8>;
122
123// FIXME: there are 72 read buffers and 44 write buffers.
124
125// (a folded load is an instruction that loads and does some operation)
126// Ex: ADDPD xmm,[mem]-> This instruction has two micro-ops
127// Instructions with folded loads are usually micro-fused, so they only appear
128// as two micro-ops.
129//      a. load and
130//      b. addpd
131// This multiclass is for folded loads for integer units.
132multiclass ZnWriteResPair<X86FoldableSchedWrite SchedRW,
133                          list<ProcResourceKind> ExePorts,
134                          int Lat, list<int> Res = [], int UOps = 1,
135                          int LoadLat = 4, int LoadUOps = 1> {
136  // Register variant takes 1-cycle on Execution Port.
137  def : WriteRes<SchedRW, ExePorts> {
138    let Latency = Lat;
139    let ReleaseAtCycles = Res;
140    let NumMicroOps = UOps;
141  }
142
143  // Memory variant also uses a cycle on ZnAGU
144  // adds LoadLat cycles to the latency (default = 4).
145  def : WriteRes<SchedRW.Folded, !listconcat([ZnAGU], ExePorts)> {
146    let Latency = !add(Lat, LoadLat);
147    let ReleaseAtCycles = !if(!empty(Res), [], !listconcat([1], Res));
148    let NumMicroOps = !add(UOps, LoadUOps);
149  }
150}
151
152// This multiclass is for folded loads for floating point units.
153multiclass ZnWriteResFpuPair<X86FoldableSchedWrite SchedRW,
154                          list<ProcResourceKind> ExePorts,
155                          int Lat, list<int> Res = [], int UOps = 1,
156                          int LoadLat = 7, int LoadUOps = 0> {
157  // Register variant takes 1-cycle on Execution Port.
158  def : WriteRes<SchedRW, ExePorts> {
159    let Latency = Lat;
160    let ReleaseAtCycles = Res;
161    let NumMicroOps = UOps;
162  }
163
164  // Memory variant also uses a cycle on ZnAGU
165  // adds LoadLat cycles to the latency (default = 7).
166  def : WriteRes<SchedRW.Folded, !listconcat([ZnAGU], ExePorts)> {
167    let Latency = !add(Lat, LoadLat);
168    let ReleaseAtCycles = !if(!empty(Res), [], !listconcat([1], Res));
169    let NumMicroOps = !add(UOps, LoadUOps);
170  }
171}
172
173// WriteRMW is set for instructions with Memory write
174// operation in codegen
175def : WriteRes<WriteRMW, [ZnAGU]>;
176
177def : WriteRes<WriteStore,   [ZnAGU]>;
178def : WriteRes<WriteStoreNT, [ZnAGU]>;
179def : WriteRes<WriteMove,    [ZnALU]>;
180def : WriteRes<WriteLoad,    [ZnAGU]> { let Latency = 4; }
181
182// Model the effect of clobbering the read-write mask operand of the GATHER operation.
183// Does not cost anything by itself, only has latency, matching that of the WriteLoad,
184def : WriteRes<WriteVecMaskedGatherWriteback, []> { let Latency = 8; let NumMicroOps = 0; }
185
186def : WriteRes<WriteZero,  []>;
187def : WriteRes<WriteLEA, [ZnALU]>;
188defm : ZnWriteResPair<WriteALU,   [ZnALU], 1>;
189defm : ZnWriteResPair<WriteADC,   [ZnALU], 1>;
190
191defm : ZnWriteResPair<WriteIMul8,     [ZnALU1, ZnMultiplier], 4>;
192
193defm : X86WriteRes<WriteBSWAP32, [ZnALU], 1, [4], 1>;
194defm : X86WriteRes<WriteBSWAP64, [ZnALU], 1, [4], 1>;
195defm : X86WriteRes<WriteCMPXCHG, [ZnALU], 1, [1], 1>;
196defm : X86WriteRes<WriteCMPXCHGRMW,[ZnALU,ZnAGU], 8, [1,1], 5>;
197defm : X86WriteRes<WriteXCHG, [ZnALU], 1, [2], 2>;
198
199defm : ZnWriteResPair<WriteShift,    [ZnALU], 1>;
200defm : ZnWriteResPair<WriteShiftCL,  [ZnALU], 1>;
201defm : ZnWriteResPair<WriteRotate,   [ZnALU], 1>;
202defm : ZnWriteResPair<WriteRotateCL, [ZnALU], 1>;
203
204defm : X86WriteRes<WriteSHDrri, [ZnALU], 1, [1], 1>;
205defm : X86WriteResUnsupported<WriteSHDrrcl>;
206defm : X86WriteResUnsupported<WriteSHDmri>;
207defm : X86WriteResUnsupported<WriteSHDmrcl>;
208
209defm : ZnWriteResPair<WriteJump,  [ZnALU], 1>;
210defm : ZnWriteResFpuPair<WriteCRC32, [ZnFPU0], 3>;
211
212defm : ZnWriteResPair<WriteCMOV,   [ZnALU], 1>;
213def  : WriteRes<WriteSETCC,  [ZnALU]>;
214def  : WriteRes<WriteSETCCStore,  [ZnALU, ZnAGU]>;
215defm : X86WriteRes<WriteLAHFSAHF, [ZnALU], 2, [1], 2>;
216
217defm : X86WriteRes<WriteBitTest,         [ZnALU], 1, [1], 1>;
218defm : X86WriteRes<WriteBitTestImmLd,    [ZnALU,ZnAGU], 5, [1,1], 2>;
219defm : X86WriteRes<WriteBitTestRegLd,    [ZnALU,ZnAGU], 5, [1,1], 2>;
220defm : X86WriteRes<WriteBitTestSet,      [ZnALU], 2, [1], 2>;
221
222// Bit counts.
223defm : ZnWriteResPair<WriteBSF, [ZnALU], 3, [12], 6, 4, 2>;
224defm : ZnWriteResPair<WriteBSR, [ZnALU], 4, [16], 6, 4, 2>;
225defm : ZnWriteResPair<WriteLZCNT,          [ZnALU], 2>;
226defm : ZnWriteResPair<WriteTZCNT,          [ZnALU], 2, [2], 2, 4, 0>;
227defm : ZnWriteResPair<WritePOPCNT,         [ZnALU], 1>;
228
229// Treat misc copies as a move.
230def : InstRW<[WriteMove], (instrs COPY)>;
231
232// BMI1 BEXTR, BMI2 BZHI
233defm : ZnWriteResPair<WriteBEXTR, [ZnALU], 1, [1], 1, 4, 1>;
234defm : ZnWriteResPair<WriteBLS,   [ZnALU], 2, [2], 2, 4, 1>;
235defm : ZnWriteResPair<WriteBZHI,  [ZnALU], 1>;
236
237// IDIV
238defm : ZnWriteResPair<WriteDiv8,   [ZnALU2, ZnDivider], 15, [1,15], 1>;
239defm : ZnWriteResPair<WriteDiv16,  [ZnALU2, ZnDivider], 17, [1,17], 2>;
240defm : ZnWriteResPair<WriteDiv32,  [ZnALU2, ZnDivider], 25, [1,25], 2>;
241defm : ZnWriteResPair<WriteDiv64,  [ZnALU2, ZnDivider], 41, [1,41], 2>;
242defm : ZnWriteResPair<WriteIDiv8,  [ZnALU2, ZnDivider], 15, [1,15], 1>;
243defm : ZnWriteResPair<WriteIDiv16, [ZnALU2, ZnDivider], 17, [1,17], 2>;
244defm : ZnWriteResPair<WriteIDiv32, [ZnALU2, ZnDivider], 25, [1,25], 2>;
245defm : ZnWriteResPair<WriteIDiv64, [ZnALU2, ZnDivider], 41, [1,41], 2>;
246
247// IMULH
248def ZnWriteIMulH : WriteRes<WriteIMulH, [ZnMultiplier]>{
249  let Latency = 3;
250  let NumMicroOps = 0;
251}
252def  : WriteRes<WriteIMulHLd, [ZnMultiplier]> {
253  let Latency = !add(ZnWriteIMulH.Latency, Znver1Model.LoadLatency);
254  let NumMicroOps = ZnWriteIMulH.NumMicroOps;
255}
256
257// Floating point operations
258defm : X86WriteRes<WriteFLoad,         [ZnAGU], 8, [1], 1>;
259defm : X86WriteRes<WriteFLoadX,        [ZnAGU], 8, [1], 1>;
260defm : X86WriteRes<WriteFLoadY,        [ZnAGU], 8, [1], 1>;
261defm : X86WriteRes<WriteFMaskedLoad,   [ZnAGU,ZnFPU01], 8, [1,1], 1>;
262defm : X86WriteRes<WriteFMaskedLoadY,  [ZnAGU,ZnFPU01], 8, [1,2], 2>;
263
264defm : X86WriteRes<WriteFStore,        [ZnAGU], 1, [1], 1>;
265defm : X86WriteRes<WriteFStoreX,       [ZnAGU], 1, [1], 1>;
266defm : X86WriteRes<WriteFStoreY,       [ZnAGU], 1, [1], 1>;
267defm : X86WriteRes<WriteFStoreNT,      [ZnAGU,ZnFPU2], 8, [1,1], 1>;
268defm : X86WriteRes<WriteFStoreNTX,     [ZnAGU], 1, [1], 1>;
269defm : X86WriteRes<WriteFStoreNTY,     [ZnAGU], 1, [1], 1>;
270defm : X86WriteRes<WriteFMaskedStore32,  [ZnAGU,ZnFPU01], 4, [1,1], 1>;
271defm : X86WriteRes<WriteFMaskedStore32Y, [ZnAGU,ZnFPU01], 5, [1,2], 2>;
272defm : X86WriteRes<WriteFMaskedStore64,  [ZnAGU,ZnFPU01], 4, [1,1], 1>;
273defm : X86WriteRes<WriteFMaskedStore64Y, [ZnAGU,ZnFPU01], 5, [1,2], 2>;
274
275defm : X86WriteRes<WriteFMove,         [ZnFPU], 1, [1], 1>;
276defm : X86WriteRes<WriteFMoveX,        [ZnFPU], 1, [1], 1>;
277defm : X86WriteRes<WriteFMoveY,        [ZnFPU], 1, [1], 1>;
278defm : X86WriteResUnsupported<WriteFMoveZ>;
279
280defm : ZnWriteResFpuPair<WriteFAdd,      [ZnFPU23], 3>;
281defm : ZnWriteResFpuPair<WriteFAddX,     [ZnFPU23], 3>;
282defm : ZnWriteResFpuPair<WriteFAddY,     [ZnFPU23], 3, [2], 2>;
283defm : X86WriteResPairUnsupported<WriteFAddZ>;
284defm : ZnWriteResFpuPair<WriteFAdd64,    [ZnFPU23], 3>;
285defm : ZnWriteResFpuPair<WriteFAdd64X,   [ZnFPU23], 3>;
286defm : ZnWriteResFpuPair<WriteFAdd64Y,   [ZnFPU23], 3, [2], 2>;
287defm : X86WriteResPairUnsupported<WriteFAdd64Z>;
288defm : ZnWriteResFpuPair<WriteFCmp,      [ZnFPU01], 1>;
289defm : ZnWriteResFpuPair<WriteFCmpX,     [ZnFPU01], 1>;
290defm : ZnWriteResFpuPair<WriteFCmpY,     [ZnFPU01], 1, [2], 2>;
291defm : X86WriteResPairUnsupported<WriteFCmpZ>;
292defm : ZnWriteResFpuPair<WriteFCmp64,    [ZnFPU01], 1>;
293defm : ZnWriteResFpuPair<WriteFCmp64X,   [ZnFPU01], 1>;
294defm : ZnWriteResFpuPair<WriteFCmp64Y,   [ZnFPU01], 1, [2], 2>;
295defm : X86WriteResPairUnsupported<WriteFCmp64Z>;
296defm : ZnWriteResFpuPair<WriteFCom,      [ZnFPU01,ZnFPU2], 3, [1,1], 2>;
297defm : ZnWriteResFpuPair<WriteFComX,     [ZnFPU01,ZnFPU2], 3, [1,1], 2>;
298defm : ZnWriteResFpuPair<WriteFBlend,    [ZnFPU01], 1>;
299defm : ZnWriteResFpuPair<WriteFBlendY,   [ZnFPU01], 1>;
300defm : X86WriteResPairUnsupported<WriteFBlendZ>;
301defm : ZnWriteResFpuPair<WriteFVarBlend, [ZnFPU01], 1>;
302defm : ZnWriteResFpuPair<WriteFVarBlendY,[ZnFPU01], 1, [2], 2>;
303defm : X86WriteResPairUnsupported<WriteFVarBlendZ>;
304defm : ZnWriteResFpuPair<WriteCvtSS2I,   [ZnFPU3],  5>;
305defm : ZnWriteResFpuPair<WriteCvtPS2I,   [ZnFPU3],  5>;
306defm : ZnWriteResFpuPair<WriteCvtPS2IY,  [ZnFPU3],  5>;
307defm : X86WriteResPairUnsupported<WriteCvtPS2IZ>;
308defm : ZnWriteResFpuPair<WriteCvtSD2I,   [ZnFPU3],  5>;
309defm : ZnWriteResFpuPair<WriteCvtPD2I,   [ZnFPU3],  5>;
310defm : ZnWriteResFpuPair<WriteCvtPD2IY,  [ZnFPU3],  5>;
311defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>;
312defm : ZnWriteResFpuPair<WriteCvtI2SS,   [ZnFPU3],  5>;
313defm : ZnWriteResFpuPair<WriteCvtI2PS,   [ZnFPU3],  5>;
314defm : ZnWriteResFpuPair<WriteCvtI2PSY,  [ZnFPU3],  5>;
315defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>;
316defm : ZnWriteResFpuPair<WriteCvtI2SD,   [ZnFPU3],  5>;
317defm : ZnWriteResFpuPair<WriteCvtI2PD,   [ZnFPU3],  5>;
318defm : ZnWriteResFpuPair<WriteCvtI2PDY,  [ZnFPU3],  5>;
319defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>;
320defm : ZnWriteResFpuPair<WriteFDiv,      [ZnFPU3], 10, [3]>;
321defm : ZnWriteResFpuPair<WriteFDivX,     [ZnFPU3], 10, [3]>;
322defm : ZnWriteResFpuPair<WriteFDivY,     [ZnFPU3], 10, [6], 2>;
323defm : X86WriteResPairUnsupported<WriteFDivZ>;
324defm : ZnWriteResFpuPair<WriteFDiv64,    [ZnFPU3], 13, [5]>;
325defm : ZnWriteResFpuPair<WriteFDiv64X,   [ZnFPU3], 13, [5]>;
326defm : ZnWriteResFpuPair<WriteFDiv64Y,   [ZnFPU3], 15, [9], 2>;
327defm : X86WriteResPairUnsupported<WriteFDiv64Z>;
328defm : ZnWriteResFpuPair<WriteFSign,     [ZnFPU3],  2>;
329defm : ZnWriteResFpuPair<WriteFRnd,      [ZnFPU3],  4, [1], 1, 7, 1>; // FIXME: Should folds require 1 extra uops?
330defm : ZnWriteResFpuPair<WriteFRndY,     [ZnFPU3],  4, [1], 1, 7, 1>; // FIXME: Should folds require 1 extra uops?
331defm : X86WriteResPairUnsupported<WriteFRndZ>;
332defm : ZnWriteResFpuPair<WriteFLogic,    [ZnFPU],   1>;
333defm : ZnWriteResFpuPair<WriteFLogicY,   [ZnFPU],   1, [2], 2>;
334defm : X86WriteResPairUnsupported<WriteFLogicZ>;
335defm : ZnWriteResFpuPair<WriteFTest,     [ZnFPU12], 2, [2], 1, 7, 1>;
336defm : ZnWriteResFpuPair<WriteFTestY,    [ZnFPU12], 4, [4], 3, 7, 2>;
337defm : X86WriteResPairUnsupported<WriteFTestZ>;
338defm : ZnWriteResFpuPair<WriteFShuffle,  [ZnFPU12], 1>;
339defm : ZnWriteResFpuPair<WriteFShuffleY, [ZnFPU12], 1, [2], 2>;
340defm : X86WriteResPairUnsupported<WriteFShuffleZ>;
341defm : ZnWriteResFpuPair<WriteFVarShuffle, [ZnFPU12], 1>;
342defm : ZnWriteResFpuPair<WriteFVarShuffleY,[ZnFPU12], 1, [2], 2>;
343defm : X86WriteResPairUnsupported<WriteFVarShuffleZ>;
344defm : ZnWriteResFpuPair<WriteFMul,      [ZnFPU01], 3>;
345defm : ZnWriteResFpuPair<WriteFMulX,     [ZnFPU01], 3>;
346defm : ZnWriteResFpuPair<WriteFMulY,     [ZnFPU01], 3, [2], 2>;
347defm : X86WriteResPairUnsupported<WriteFMulZ>;
348defm : ZnWriteResFpuPair<WriteFMul64,    [ZnFPU01], 4>;
349defm : ZnWriteResFpuPair<WriteFMul64X,   [ZnFPU01], 4>;
350defm : ZnWriteResFpuPair<WriteFMul64Y,   [ZnFPU01], 4, [2], 2>;
351defm : X86WriteResPairUnsupported<WriteFMul64Z>;
352defm : ZnWriteResFpuPair<WriteFMA,       [ZnFPU01], 5>;
353defm : ZnWriteResFpuPair<WriteFMAX,      [ZnFPU01], 5>;
354defm : ZnWriteResFpuPair<WriteFMAY,      [ZnFPU01], 5, [2], 2>;
355defm : X86WriteResPairUnsupported<WriteFMAZ>;
356defm : ZnWriteResFpuPair<WriteFRcp,      [ZnFPU01], 5>;
357defm : ZnWriteResFpuPair<WriteFRcpX,     [ZnFPU01], 5>;
358defm : ZnWriteResFpuPair<WriteFRcpY,     [ZnFPU01], 5, [2], 2>;
359defm : X86WriteResPairUnsupported<WriteFRcpZ>;
360defm : ZnWriteResFpuPair<WriteFRsqrt,    [ZnFPU01], 5>;
361defm : ZnWriteResFpuPair<WriteFRsqrtX,   [ZnFPU01], 5>;
362defm : ZnWriteResFpuPair<WriteFRsqrtY,   [ZnFPU01], 5, [2], 2>;
363defm : X86WriteResPairUnsupported<WriteFRsqrtZ>;
364defm : ZnWriteResFpuPair<WriteFSqrt,     [ZnFPU3], 14, [5]>;
365defm : ZnWriteResFpuPair<WriteFSqrtX,    [ZnFPU3], 14, [5]>;
366defm : ZnWriteResFpuPair<WriteFSqrtY,    [ZnFPU3], 14, [10], 2>;
367defm : X86WriteResPairUnsupported<WriteFSqrtZ>;
368defm : ZnWriteResFpuPair<WriteFSqrt64,   [ZnFPU3], 20, [8]>;
369defm : ZnWriteResFpuPair<WriteFSqrt64X,  [ZnFPU3], 20, [8]>;
370defm : ZnWriteResFpuPair<WriteFSqrt64Y,  [ZnFPU3], 20, [16], 2>;
371defm : X86WriteResPairUnsupported<WriteFSqrt64Z>;
372defm : ZnWriteResFpuPair<WriteFSqrt80,   [ZnFPU3], 20, [20]>;
373defm : ZnWriteResFpuPair<WriteFShuffle256, [ZnFPU12], 2, [2], 2>;
374defm : ZnWriteResFpuPair<WriteFVarShuffle256, [ZnFPU12], 2, [2], 2>;
375
376// Vector integer operations which uses FPU units
377defm : X86WriteRes<WriteVecLoad,         [ZnAGU], 8, [1], 1>;
378defm : X86WriteRes<WriteVecLoadX,        [ZnAGU], 8, [1], 1>;
379defm : X86WriteRes<WriteVecLoadY,        [ZnAGU], 8, [1], 1>;
380defm : X86WriteRes<WriteVecLoadNT,       [ZnAGU], 8, [1], 1>;
381defm : X86WriteRes<WriteVecLoadNTY,      [ZnAGU], 8, [1], 1>;
382defm : X86WriteRes<WriteVecMaskedLoad,   [ZnAGU,ZnFPU01], 8, [1,2], 2>;
383defm : X86WriteRes<WriteVecMaskedLoadY,  [ZnAGU,ZnFPU01], 9, [1,3], 2>;
384defm : X86WriteRes<WriteVecStore,        [ZnAGU], 1, [1], 1>;
385defm : X86WriteRes<WriteVecStoreX,       [ZnAGU], 1, [1], 1>;
386defm : X86WriteRes<WriteVecStoreY,       [ZnAGU], 1, [1], 1>;
387defm : X86WriteRes<WriteVecStoreNT,      [ZnAGU], 1, [1], 1>;
388defm : X86WriteRes<WriteVecStoreNTY,     [ZnAGU], 1, [1], 1>;
389defm : X86WriteRes<WriteVecMaskedStore32,  [ZnAGU,ZnFPU01], 4, [1,1], 1>;
390defm : X86WriteRes<WriteVecMaskedStore32Y, [ZnAGU,ZnFPU01], 5, [1,2], 2>;
391defm : X86WriteRes<WriteVecMaskedStore64,  [ZnAGU,ZnFPU01], 4, [1,1], 1>;
392defm : X86WriteRes<WriteVecMaskedStore64Y, [ZnAGU,ZnFPU01], 5, [1,2], 2>;
393defm : X86WriteRes<WriteVecMove,         [ZnFPU], 1, [1], 1>;
394defm : X86WriteRes<WriteVecMoveX,        [ZnFPU], 1, [1], 1>;
395defm : X86WriteRes<WriteVecMoveY,        [ZnFPU], 2, [1], 2>;
396defm : X86WriteResUnsupported<WriteVecMoveZ>;
397defm : X86WriteRes<WriteVecMoveToGpr,    [ZnFPU2], 2, [1], 1>;
398defm : X86WriteRes<WriteVecMoveFromGpr,  [ZnFPU2], 3, [1], 1>;
399defm : X86WriteRes<WriteEMMS,            [ZnFPU], 2, [1], 1>;
400
401defm : ZnWriteResFpuPair<WriteVecShift,   [ZnFPU2],  1>;
402defm : ZnWriteResFpuPair<WriteVecShiftX,  [ZnFPU2],  1>;
403defm : ZnWriteResFpuPair<WriteVecShiftY,  [ZnFPU2],  1, [2], 2>;
404defm : X86WriteResPairUnsupported<WriteVecShiftZ>;
405defm : ZnWriteResFpuPair<WriteVecShiftImm,  [ZnFPU2], 1>;
406defm : ZnWriteResFpuPair<WriteVecShiftImmX, [ZnFPU2], 1>;
407defm : ZnWriteResFpuPair<WriteVecShiftImmY, [ZnFPU2], 1, [2], 2>;
408defm : X86WriteResPairUnsupported<WriteVecShiftImmZ>;
409defm : ZnWriteResFpuPair<WriteVarVecShift,  [ZnFPU1], 3, [2], 1>;
410defm : ZnWriteResFpuPair<WriteVarVecShiftY, [ZnFPU1], 3, [4], 2>;
411defm : X86WriteResPairUnsupported<WriteVarVecShiftZ>;
412defm : ZnWriteResFpuPair<WriteVecLogic,   [ZnFPU],   1>;
413defm : ZnWriteResFpuPair<WriteVecLogicX,  [ZnFPU],   1>;
414defm : ZnWriteResFpuPair<WriteVecLogicY,  [ZnFPU],   1, [2], 2>;
415defm : X86WriteResPairUnsupported<WriteVecLogicZ>;
416defm : ZnWriteResFpuPair<WriteVecTest,    [ZnFPU12], 2, [2], 1, 7, 1>;
417defm : ZnWriteResFpuPair<WriteVecTestY,   [ZnFPU12], 4, [4], 3, 7, 2>;
418defm : X86WriteResPairUnsupported<WriteVecTestZ>;
419defm : ZnWriteResFpuPair<WriteVecALU,     [ZnFPU013],   1>;
420defm : ZnWriteResFpuPair<WriteVecALUX,    [ZnFPU013],   1>;
421defm : ZnWriteResFpuPair<WriteVecALUY,    [ZnFPU013],   1, [2], 2>;
422defm : X86WriteResPairUnsupported<WriteVecALUZ>;
423defm : ZnWriteResFpuPair<WriteVecIMul,    [ZnFPU0],  4>;
424defm : ZnWriteResFpuPair<WriteVecIMulX,   [ZnFPU0],  4>;
425defm : ZnWriteResFpuPair<WriteVecIMulY,   [ZnFPU0],  4, [2], 2>;
426defm : X86WriteResPairUnsupported<WriteVecIMulZ>;
427defm : ZnWriteResFpuPair<WritePMULLD,     [ZnFPU0],  4, [2]>;
428defm : ZnWriteResFpuPair<WritePMULLDY,    [ZnFPU0],  4, [4], 2>;
429defm : X86WriteResPairUnsupported<WritePMULLDZ>;
430defm : ZnWriteResFpuPair<WriteShuffle,    [ZnFPU12],   1>;
431defm : ZnWriteResFpuPair<WriteShuffleX,   [ZnFPU12],   1>;
432defm : ZnWriteResFpuPair<WriteShuffleY,   [ZnFPU12],   1, [2], 2>;
433defm : X86WriteResPairUnsupported<WriteShuffleZ>;
434defm : ZnWriteResFpuPair<WriteVarShuffle, [ZnFPU12],   1>;
435defm : ZnWriteResFpuPair<WriteVarShuffleX,[ZnFPU12],   1>;
436defm : ZnWriteResFpuPair<WriteVarShuffleY,[ZnFPU12],   1, [2], 2>;
437defm : X86WriteResPairUnsupported<WriteVarShuffleZ>;
438defm : ZnWriteResFpuPair<WriteBlend,      [ZnFPU013], 1>;
439defm : ZnWriteResFpuPair<WriteBlendY,     [ZnFPU013], 1, [2], 2>;
440defm : X86WriteResPairUnsupported<WriteBlendZ>;
441defm : ZnWriteResFpuPair<WriteVarBlend,   [ZnFPU0],  1>;
442defm : ZnWriteResFpuPair<WriteVarBlendY,  [ZnFPU0],  1, [2], 2>;
443defm : X86WriteResPairUnsupported<WriteVarBlendZ>;
444defm : ZnWriteResFpuPair<WriteShuffle256, [ZnFPU12],   2, [2], 2>;
445defm : ZnWriteResFpuPair<WriteVPMOV256,   [ZnFPU12],   1, [4], 3>;
446defm : ZnWriteResFpuPair<WriteVarShuffle256, [ZnFPU12],2, [2], 2>;
447defm : ZnWriteResFpuPair<WritePSADBW,     [ZnFPU0],  3>;
448defm : ZnWriteResFpuPair<WritePSADBWX,    [ZnFPU0],  3>;
449defm : ZnWriteResFpuPair<WritePSADBWY,    [ZnFPU0],  3, [2], 2>;
450defm : X86WriteResPairUnsupported<WritePSADBWZ>;
451defm : ZnWriteResFpuPair<WritePHMINPOS,   [ZnFPU0],  4>;
452
453// Vector insert/extract operations.
454defm : ZnWriteResFpuPair<WriteVecInsert,   [ZnFPU],   1>;
455
456def : WriteRes<WriteVecExtract, [ZnFPU12, ZnFPU2]> {
457  let Latency = 2;
458  let ReleaseAtCycles = [1, 2];
459}
460def : WriteRes<WriteVecExtractSt, [ZnAGU, ZnFPU12, ZnFPU2]> {
461  let Latency = 5;
462  let NumMicroOps = 2;
463  let ReleaseAtCycles = [1, 2, 3];
464}
465
466// MOVMSK Instructions.
467def : WriteRes<WriteFMOVMSK, [ZnFPU2]>;
468def : WriteRes<WriteMMXMOVMSK, [ZnFPU2]>;
469def : WriteRes<WriteVecMOVMSK, [ZnFPU2]>;
470
471def : WriteRes<WriteVecMOVMSKY, [ZnFPU2]> {
472  let NumMicroOps = 2;
473  let Latency = 2;
474  let ReleaseAtCycles = [2];
475}
476
477// AES Instructions.
478defm : ZnWriteResFpuPair<WriteAESDecEnc, [ZnFPU01], 4>;
479defm : ZnWriteResFpuPair<WriteAESIMC,    [ZnFPU01], 4>;
480defm : ZnWriteResFpuPair<WriteAESKeyGen, [ZnFPU01], 4>;
481
482def : WriteRes<WriteFence,  [ZnAGU]>;
483def : WriteRes<WriteNop, []>;
484
485// Microcoded Instructions
486def ZnWriteMicrocoded : SchedWriteRes<[]> {
487  let Latency = 100;
488}
489
490def : SchedAlias<WriteMicrocoded, ZnWriteMicrocoded>;
491def : SchedAlias<WriteFCMOV, ZnWriteMicrocoded>;
492def : SchedAlias<WriteSystem, ZnWriteMicrocoded>;
493def : SchedAlias<WriteMPSAD, ZnWriteMicrocoded>;
494def : SchedAlias<WriteMPSADY, ZnWriteMicrocoded>;
495def : SchedAlias<WriteMPSADLd, ZnWriteMicrocoded>;
496def : SchedAlias<WriteMPSADYLd, ZnWriteMicrocoded>;
497def : SchedAlias<WriteCLMul, ZnWriteMicrocoded>;
498def : SchedAlias<WriteCLMulLd, ZnWriteMicrocoded>;
499def : SchedAlias<WritePCmpIStrM, ZnWriteMicrocoded>;
500def : SchedAlias<WritePCmpIStrMLd, ZnWriteMicrocoded>;
501def : SchedAlias<WritePCmpEStrI, ZnWriteMicrocoded>;
502def : SchedAlias<WritePCmpEStrILd, ZnWriteMicrocoded>;
503def : SchedAlias<WritePCmpEStrM, ZnWriteMicrocoded>;
504def : SchedAlias<WritePCmpEStrMLd, ZnWriteMicrocoded>;
505def : SchedAlias<WritePCmpIStrI, ZnWriteMicrocoded>;
506def : SchedAlias<WritePCmpIStrILd, ZnWriteMicrocoded>;
507def : SchedAlias<WriteLDMXCSR, ZnWriteMicrocoded>;
508def : SchedAlias<WriteSTMXCSR, ZnWriteMicrocoded>;
509
510//=== Regex based InstRW ===//
511// Notation:
512// - r: register.
513// - m = memory.
514// - i = immediate
515// - mm: 64 bit mmx register.
516// - x = 128 bit xmm register.
517// - (x)mm = mmx or xmm register.
518// - y = 256 bit ymm register.
519// - v = any vector register.
520
521//=== Integer Instructions ===//
522//-- Move instructions --//
523// MOV.
524// r16,m.
525def : InstRW<[WriteALULd, ReadAfterLd], (instrs MOV16rm)>;
526
527// XCHG.
528// r,m.
529def ZnWriteXCHGrm : SchedWriteRes<[ZnAGU, ZnALU]> {
530  let Latency = 5;
531  let NumMicroOps = 2;
532}
533def : InstRW<[ZnWriteXCHGrm, ReadAfterLd], (instregex "XCHG(8|16|32|64)rm")>;
534
535def : InstRW<[WriteMicrocoded], (instrs XLAT)>;
536
537// POP16.
538// r.
539def ZnWritePop16r : SchedWriteRes<[ZnAGU]>{
540  let Latency = 5;
541  let NumMicroOps = 2;
542}
543def : InstRW<[ZnWritePop16r], (instrs POP16rmm)>;
544def : InstRW<[WriteMicrocoded], (instregex "POPF(16|32)")>;
545def : InstRW<[WriteMicrocoded], (instregex "POPA(16|32)")>;
546
547
548// PUSH.
549// r. Has default values.
550// m.
551def ZnWritePUSH : SchedWriteRes<[ZnAGU]>{
552  let Latency = 4;
553}
554def : InstRW<[ZnWritePUSH], (instregex "PUSH(16|32)rmm")>;
555
556// PUSHF
557def : InstRW<[WriteMicrocoded], (instregex "PUSHF(16|32)")>;
558
559// PUSHA.
560def ZnWritePushA : SchedWriteRes<[ZnAGU]> {
561  let Latency = 8;
562}
563def : InstRW<[ZnWritePushA], (instregex "PUSHA(16|32)")>;
564
565//LAHF
566def : InstRW<[WriteMicrocoded], (instrs LAHF)>;
567
568// MOVBE.
569// r,m.
570def ZnWriteMOVBE : SchedWriteRes<[ZnAGU, ZnALU]> {
571  let Latency = 5;
572}
573def : InstRW<[ZnWriteMOVBE, ReadAfterLd], (instregex "MOVBE(16|32|64)rm")>;
574
575// m16,r16.
576def : InstRW<[ZnWriteMOVBE], (instregex "MOVBE(16|32|64)mr")>;
577
578//-- Arithmetic instructions --//
579
580// ADD SUB.
581// m,r/i.
582def : InstRW<[WriteALULd], (instregex "(ADD|SUB)(8|16|32|64)m(r|i)",
583                          "(ADD|SUB)(8|16|32|64)mi8",
584                          "(ADD|SUB)64mi32")>;
585
586// ADC SBB.
587// m,r/i.
588def : InstRW<[WriteALULd],
589             (instregex "(ADC|SBB)(8|16|32|64)m(r|i)",
590              "(ADC|SBB)(16|32|64)mi8",
591              "(ADC|SBB)64mi32")>;
592
593// INC DEC NOT NEG.
594// m.
595def : InstRW<[WriteALULd],
596             (instregex "(INC|DEC|NOT|NEG)(8|16|32|64)m")>;
597
598// MUL IMUL.
599// r16.
600def ZnWriteMul16 : SchedWriteRes<[ZnALU1, ZnMultiplier]> {
601  let Latency = 3;
602}
603def : SchedAlias<WriteIMul16, ZnWriteMul16>;
604def : SchedAlias<WriteIMul16Imm, ZnWriteMul16>; // TODO: is this right?
605def : SchedAlias<WriteIMul16Reg, ZnWriteMul16>; // TODO: is this right?
606
607// m16.
608def ZnWriteMul16Ld : SchedWriteRes<[ZnAGU, ZnALU1, ZnMultiplier]> {
609  let Latency = 8;
610}
611def : SchedAlias<WriteIMul16Ld, ZnWriteMul16Ld>;
612def : SchedAlias<WriteIMul16ImmLd, ZnWriteMul16>; // TODO: this is definitely wrong but matches what the instregex did.
613def : SchedAlias<WriteIMul16RegLd, ZnWriteMul16>; // TODO: this is definitely wrong but matches what the instregex did.
614// r32.
615def ZnWriteMul32 : SchedWriteRes<[ZnALU1, ZnMultiplier]> {
616  let Latency = 3;
617}
618def : SchedAlias<WriteIMul32, ZnWriteMul32>;
619def : SchedAlias<WriteIMul32Imm, ZnWriteMul32>; // TODO: is this right?
620def : SchedAlias<WriteIMul32Reg, ZnWriteMul32>; // TODO: is this right?
621
622// m32.
623def ZnWriteMul32Ld : SchedWriteRes<[ZnAGU, ZnALU1, ZnMultiplier]> {
624  let Latency = 8;
625}
626def : SchedAlias<WriteIMul32Ld, ZnWriteMul32Ld>;
627def : SchedAlias<WriteIMul32ImmLd, ZnWriteMul32>; // TODO: this is definitely wrong but matches what the instregex did.
628def : SchedAlias<WriteIMul32RegLd, ZnWriteMul32>; // TODO: this is definitely wrong but matches what the instregex did.
629
630// r64.
631def ZnWriteMul64 : SchedWriteRes<[ZnALU1, ZnMultiplier]> {
632  let Latency = 4;
633  let NumMicroOps = 2;
634}
635def : SchedAlias<WriteIMul64, ZnWriteMul64>;
636def : SchedAlias<WriteIMul64Imm, ZnWriteMul64>; // TODO: is this right?
637def : SchedAlias<WriteIMul64Reg, ZnWriteMul64>; // TODO: is this right?
638
639// m64.
640def ZnWriteMul64Ld : SchedWriteRes<[ZnAGU, ZnALU1, ZnMultiplier]> {
641  let Latency = 9;
642  let NumMicroOps = 2;
643}
644def : SchedAlias<WriteIMul64Ld, ZnWriteMul64Ld>;
645def : SchedAlias<WriteIMul64ImmLd, ZnWriteMul64>; // TODO: this is definitely wrong but matches what the instregex did.
646def : SchedAlias<WriteIMul64RegLd, ZnWriteMul64>; // TODO: this is definitely wrong but matches what the instregex did.
647
648// MULX
649// Numbers are based on the AMD SOG for Family 17h - Instruction Latencies.
650defm : ZnWriteResPair<WriteMULX32, [ZnALU1, ZnMultiplier], 3, [1, 1], 1, 5, 0>;
651defm : ZnWriteResPair<WriteMULX64, [ZnALU1, ZnMultiplier], 3, [1, 1], 1, 5, 0>;
652
653//-- Control transfer instructions --//
654
655// J(E|R)CXZ.
656def ZnWriteJCXZ : SchedWriteRes<[ZnALU03]>;
657def : InstRW<[ZnWriteJCXZ], (instrs JCXZ, JECXZ, JRCXZ)>;
658
659// LOOP.
660def ZnWriteLOOP : SchedWriteRes<[ZnALU03]>;
661def : InstRW<[ZnWriteLOOP], (instrs LOOP)>;
662
663// LOOP(N)E, LOOP(N)Z
664def ZnWriteLOOPE : SchedWriteRes<[ZnALU03]>;
665def : InstRW<[ZnWriteLOOPE], (instrs LOOPE, LOOPNE)>;
666
667// CALL.
668// r.
669def ZnWriteCALLr : SchedWriteRes<[ZnAGU, ZnALU03]>;
670def : InstRW<[ZnWriteCALLr], (instregex "CALL(16|32)r")>;
671
672def : InstRW<[WriteMicrocoded], (instregex "CALL(16|32)m")>;
673
674// RET.
675def ZnWriteRET : SchedWriteRes<[ZnALU03]> {
676  let NumMicroOps = 2;
677}
678def : InstRW<[ZnWriteRET], (instregex "RET(16|32|64)", "LRET(16|32|64)",
679                            "IRET(16|32|64)")>;
680
681//-- Logic instructions --//
682
683// AND OR XOR.
684// m,r/i.
685def : InstRW<[WriteALULd],
686             (instregex "(AND|OR|XOR)(8|16|32|64)m(r|i)",
687              "(AND|OR|XOR)(8|16|32|64)mi8", "(AND|OR|XOR)64mi32")>;
688
689// Define ALU latency variants
690def ZnWriteALULat2 : SchedWriteRes<[ZnALU]> {
691  let Latency = 2;
692}
693def ZnWriteALULat2Ld : SchedWriteRes<[ZnAGU, ZnALU]> {
694  let Latency = 6;
695}
696
697// BTR BTS BTC.
698// m,r,i.
699def ZnWriteBTRSCm : SchedWriteRes<[ZnAGU, ZnALU]> {
700  let Latency = 6;
701  let NumMicroOps = 2;
702}
703// m,r,i.
704def : SchedAlias<WriteBitTestSetImmRMW, ZnWriteBTRSCm>;
705def : SchedAlias<WriteBitTestSetRegRMW, ZnWriteBTRSCm>;
706
707// PDEP PEXT.
708// r,r,r.
709def : InstRW<[WriteMicrocoded], (instregex "PDEP(32|64)rr", "PEXT(32|64)rr")>;
710// r,r,m.
711def : InstRW<[WriteMicrocoded], (instregex "PDEP(32|64)rm", "PEXT(32|64)rm")>;
712
713// RCR RCL.
714// m,i.
715def : InstRW<[WriteMicrocoded], (instregex "RC(R|L)(8|16|32|64)m(1|i|CL)")>;
716
717// SHR SHL SAR.
718// m,i.
719def : InstRW<[WriteShiftLd], (instregex "S(A|H)(R|L)(8|16|32|64)m(i|1)")>;
720
721// SHRD SHLD.
722// m,r
723def : InstRW<[WriteShiftLd], (instregex "SH(R|L)D(16|32|64)mri8")>;
724
725// r,r,cl.
726def : InstRW<[WriteMicrocoded], (instregex "SH(R|L)D(16|32|64)rrCL")>;
727
728// m,r,cl.
729def : InstRW<[WriteMicrocoded], (instregex "SH(R|L)D(16|32|64)mrCL")>;
730
731//-- Misc instructions --//
732// CMPXCHG8B.
733def ZnWriteCMPXCHG8B : SchedWriteRes<[ZnAGU, ZnALU]> {
734  let NumMicroOps = 18;
735}
736def : InstRW<[ZnWriteCMPXCHG8B], (instrs CMPXCHG8B)>;
737
738def : InstRW<[WriteMicrocoded], (instrs CMPXCHG16B)>;
739
740// LEAVE
741def ZnWriteLEAVE : SchedWriteRes<[ZnALU, ZnAGU]> {
742  let Latency = 8;
743  let NumMicroOps = 2;
744}
745def : InstRW<[ZnWriteLEAVE], (instregex "LEAVE")>;
746
747// PAUSE.
748def : InstRW<[WriteMicrocoded], (instrs PAUSE)>;
749
750// XADD.
751def ZnXADD : SchedWriteRes<[ZnALU]>;
752def : InstRW<[ZnXADD], (instregex "XADD(8|16|32|64)rr")>;
753def : InstRW<[WriteMicrocoded], (instregex "XADD(8|16|32|64)rm")>;
754
755//=== Floating Point x87 Instructions ===//
756//-- Move instructions --//
757
758def ZnWriteFLDr : SchedWriteRes<[ZnFPU13]> ;
759
760def ZnWriteSTr: SchedWriteRes<[ZnFPU23]> {
761  let Latency = 5;
762  let NumMicroOps = 2;
763}
764
765// LD_F.
766// r.
767def : InstRW<[ZnWriteFLDr], (instrs LD_Frr)>;
768
769// m.
770def ZnWriteLD_F80m : SchedWriteRes<[ZnAGU, ZnFPU13]> {
771  let NumMicroOps = 2;
772}
773def : InstRW<[ZnWriteLD_F80m], (instrs LD_F80m)>;
774
775// FST(P).
776// r.
777def : InstRW<[ZnWriteSTr], (instregex "ST_(F|FP)rr")>;
778
779// m80.
780def ZnWriteST_FP80m : SchedWriteRes<[ZnAGU, ZnFPU23]> {
781  let Latency = 5;
782}
783def : InstRW<[ZnWriteST_FP80m], (instrs ST_FP80m)>;
784
785def ZnWriteFXCH : SchedWriteRes<[ZnFPU]>;
786
787// FXCHG.
788def : InstRW<[ZnWriteFXCH], (instrs XCH_F)>;
789
790// FILD.
791def ZnWriteFILD : SchedWriteRes<[ZnAGU, ZnFPU3]> {
792  let Latency = 11;
793  let NumMicroOps = 2;
794}
795def : InstRW<[ZnWriteFILD], (instregex "ILD_F(16|32|64)m")>;
796
797// FIST(P) FISTTP.
798def ZnWriteFIST : SchedWriteRes<[ZnAGU, ZnFPU23]> {
799  let Latency = 12;
800}
801def : InstRW<[ZnWriteFIST], (instregex "IS(T|TT)_(F|FP)(16|32|64)m")>;
802
803def ZnWriteFPU13 : SchedWriteRes<[ZnAGU, ZnFPU13]> {
804  let Latency = 8;
805}
806
807def ZnWriteFPU3 : SchedWriteRes<[ZnAGU, ZnFPU3]> {
808  let Latency = 11;
809}
810
811// FLDZ.
812def : SchedAlias<WriteFLD0, ZnWriteFPU13>;
813
814// FLD1.
815def : SchedAlias<WriteFLD1, ZnWriteFPU3>;
816
817// FLDPI FLDL2E etc.
818def : SchedAlias<WriteFLDC, ZnWriteFPU3>;
819
820// FNSTSW.
821// AX.
822def : InstRW<[WriteMicrocoded], (instrs FNSTSW16r)>;
823
824// FLDCW.
825def : InstRW<[WriteMicrocoded], (instrs FLDCW16m)>;
826
827// FNSTCW.
828def : InstRW<[WriteMicrocoded], (instrs FNSTCW16m)>;
829
830// FINCSTP FDECSTP.
831def : InstRW<[ZnWriteFPU3], (instrs FINCSTP, FDECSTP)>;
832
833// FFREE.
834def : InstRW<[ZnWriteFPU3], (instregex "FFREE")>;
835
836//-- Arithmetic instructions --//
837
838def ZnWriteFPU3Lat1 : SchedWriteRes<[ZnFPU3]> ;
839
840def ZnWriteFPU0Lat1 : SchedWriteRes<[ZnFPU0]> ;
841
842def ZnWriteFPU0Lat1Ld : SchedWriteRes<[ZnAGU, ZnFPU0]> {
843  let Latency = 8;
844}
845
846// FCHS.
847def : InstRW<[ZnWriteFPU3Lat1], (instregex "CHS_F")>;
848
849// FCOM(P) FUCOM(P).
850// r.
851def : InstRW<[ZnWriteFPU0Lat1], (instregex "COM(P?)_FST0r", "UCOM_F(P?)r")>;
852// m.
853def : InstRW<[ZnWriteFPU0Lat1Ld], (instregex "FCOM(P?)(32|64)m")>;
854
855// FCOMPP FUCOMPP.
856// r.
857def : InstRW<[ZnWriteFPU0Lat1], (instrs FCOMPP, UCOM_FPPr)>;
858
859def ZnWriteFPU02 : SchedWriteRes<[ZnAGU, ZnFPU02]>
860{
861  let Latency = 9;
862}
863
864// FCOMI(P) FUCOMI(P).
865// m.
866def : InstRW<[ZnWriteFPU02], (instrs COM_FIPr, COM_FIr, UCOM_FIPr, UCOM_FIr)>;
867
868def ZnWriteFPU03 : SchedWriteRes<[ZnAGU, ZnFPU03]>
869{
870  let Latency = 12;
871  let NumMicroOps = 2;
872  let ReleaseAtCycles = [1,3];
873}
874
875// FICOM(P).
876def : InstRW<[ZnWriteFPU03], (instregex "FICOM(P?)(16|32)m")>;
877
878// FTST.
879def : InstRW<[ZnWriteFPU0Lat1], (instregex "TST_F")>;
880
881// FXAM.
882def : InstRW<[ZnWriteFPU3Lat1], (instrs XAM_F)>;
883
884// FNOP.
885def : InstRW<[ZnWriteFPU0Lat1], (instrs FNOP)>;
886
887// WAIT.
888def : InstRW<[ZnWriteFPU0Lat1], (instrs WAIT)>;
889
890//=== Integer MMX and XMM Instructions ===//
891
892def ZnWriteFPU013 : SchedWriteRes<[ZnFPU013]> ;
893def ZnWriteFPU013m : SchedWriteRes<[ZnAGU, ZnFPU013]> {
894  let Latency = 8;
895  let NumMicroOps = 2;
896}
897
898def ZnWriteFPU01 : SchedWriteRes<[ZnFPU01]> ;
899def ZnWriteFPU01Y : SchedWriteRes<[ZnFPU01]> {
900  let NumMicroOps = 2;
901}
902
903// VPBLENDD.
904// v,v,v,i.
905def : InstRW<[ZnWriteFPU01], (instrs VPBLENDDrri)>;
906// ymm
907def : InstRW<[ZnWriteFPU01Y], (instrs VPBLENDDYrri)>;
908
909// v,v,m,i
910def ZnWriteFPU01Op2 : SchedWriteRes<[ZnAGU, ZnFPU01]> {
911  let NumMicroOps = 2;
912  let Latency = 8;
913  let ReleaseAtCycles = [1, 2];
914}
915def ZnWriteFPU01Op2Y : SchedWriteRes<[ZnAGU, ZnFPU01]> {
916  let NumMicroOps = 2;
917  let Latency = 9;
918  let ReleaseAtCycles = [1, 3];
919}
920def : InstRW<[ZnWriteFPU01Op2], (instrs VPBLENDDrmi)>;
921def : InstRW<[ZnWriteFPU01Op2Y], (instrs VPBLENDDYrmi)>;
922
923// MASKMOVQ.
924def : InstRW<[WriteMicrocoded], (instregex "MMX_MASKMOVQ(64)?")>;
925
926// MASKMOVDQU.
927def : InstRW<[WriteMicrocoded], (instregex "(V?)MASKMOVDQU(64)?")>;
928
929// VPMASKMOVD.
930// ymm
931def : InstRW<[WriteMicrocoded],
932                               (instregex "VPMASKMOVD(Y?)rm")>;
933// m, v,v.
934def : InstRW<[WriteMicrocoded], (instregex "VPMASKMOV(D|Q)(Y?)mr")>;
935
936// VPBROADCAST B/W.
937// x, m8/16.
938def ZnWriteVPBROADCAST128Ld : SchedWriteRes<[ZnAGU, ZnFPU12]> {
939  let Latency = 8;
940  let NumMicroOps = 2;
941  let ReleaseAtCycles = [1, 2];
942}
943def : InstRW<[ZnWriteVPBROADCAST128Ld],
944                                     (instregex "VPBROADCAST(B|W)rm")>;
945
946// y, m8/16
947def ZnWriteVPBROADCAST256Ld : SchedWriteRes<[ZnAGU, ZnFPU1]> {
948  let Latency = 8;
949  let NumMicroOps = 2;
950  let ReleaseAtCycles = [1, 2];
951}
952def : InstRW<[ZnWriteVPBROADCAST256Ld],
953                                     (instregex "VPBROADCAST(B|W)Yrm")>;
954
955// VPGATHER.
956def : InstRW<[WriteMicrocoded], (instregex "VPGATHER(Q|D)(Q|D)(Y?)rm")>;
957
958//-- Arithmetic instructions --//
959
960// HADD, HSUB PS/PD
961// PHADD|PHSUB (S) W/D.
962defm : ZnWriteResFpuPair<WriteFHAdd, [], 7>;
963defm : ZnWriteResFpuPair<WriteFHAddY, [], 7>;
964defm : ZnWriteResFpuPair<WritePHAdd, [], 3>;
965defm : ZnWriteResFpuPair<WritePHAddX, [], 3>;
966defm : ZnWriteResFpuPair<WritePHAddY, [], 3>;
967
968// PCMPGTQ.
969def ZnWritePCMPGTQr : SchedWriteRes<[ZnFPU03]>;
970def : InstRW<[ZnWritePCMPGTQr], (instregex "(V?)PCMPGTQ(Y?)rr")>;
971
972// x <- x,m.
973def ZnWritePCMPGTQm : SchedWriteRes<[ZnAGU, ZnFPU03]> {
974  let Latency = 8;
975}
976// ymm.
977def ZnWritePCMPGTQYm : SchedWriteRes<[ZnAGU, ZnFPU03]> {
978  let Latency = 8;
979  let NumMicroOps = 2;
980  let ReleaseAtCycles = [1,2];
981}
982def : InstRW<[ZnWritePCMPGTQm], (instregex "(V?)PCMPGTQrm")>;
983def : InstRW<[ZnWritePCMPGTQYm], (instrs VPCMPGTQYrm)>;
984
985//=== Floating Point XMM and YMM Instructions ===//
986//-- Move instructions --//
987
988// VPERM2F128 / VPERM2I128.
989def : InstRW<[WriteMicrocoded], (instrs VPERM2F128rr,
990                                        VPERM2I128rr)>;
991def : InstRW<[WriteMicrocoded], (instrs VPERM2F128rm,
992                                        VPERM2I128rm)>;
993
994def ZnWriteBROADCAST : SchedWriteRes<[ZnAGU, ZnFPU13]> {
995  let NumMicroOps = 2;
996  let Latency = 8;
997}
998// VBROADCASTF128 / VBROADCASTI128.
999def : InstRW<[ZnWriteBROADCAST], (instrs VBROADCASTF128rm,
1000                                         VBROADCASTI128rm)>;
1001
1002// EXTRACTPS.
1003// r32,x,i.
1004def ZnWriteEXTRACTPSr : SchedWriteRes<[ZnFPU12, ZnFPU2]> {
1005  let Latency = 2;
1006  let NumMicroOps = 2;
1007  let ReleaseAtCycles = [1, 2];
1008}
1009def : InstRW<[ZnWriteEXTRACTPSr], (instregex "(V?)EXTRACTPSrr")>;
1010
1011def ZnWriteEXTRACTPSm : SchedWriteRes<[ZnAGU,ZnFPU12, ZnFPU2]> {
1012  let Latency = 5;
1013  let NumMicroOps = 2;
1014  let ReleaseAtCycles = [5, 1, 2];
1015}
1016// m32,x,i.
1017def : InstRW<[ZnWriteEXTRACTPSm], (instregex "(V?)EXTRACTPSmr")>;
1018
1019// VEXTRACTF128 / VEXTRACTI128.
1020// x,y,i.
1021def : InstRW<[ZnWriteFPU013], (instrs VEXTRACTF128rr,
1022                                      VEXTRACTI128rr)>;
1023
1024// m128,y,i.
1025def : InstRW<[ZnWriteFPU013m], (instrs VEXTRACTF128mr,
1026                                       VEXTRACTI128mr)>;
1027
1028def ZnWriteVINSERT128r: SchedWriteRes<[ZnFPU013]> {
1029  let Latency = 2;
1030  let ReleaseAtCycles = [2];
1031}
1032def ZnWriteVINSERT128Ld: SchedWriteRes<[ZnAGU,ZnFPU013]> {
1033  let Latency = 9;
1034  let NumMicroOps = 2;
1035  let ReleaseAtCycles = [1, 2];
1036}
1037// VINSERTF128 / VINSERTI128.
1038// y,y,x,i.
1039def : InstRW<[ZnWriteVINSERT128r], (instrs VINSERTF128rr,
1040                                           VINSERTI128rr)>;
1041def : InstRW<[ZnWriteVINSERT128Ld], (instrs VINSERTF128rm,
1042                                            VINSERTI128rm)>;
1043
1044// VGATHER.
1045def : InstRW<[WriteMicrocoded], (instregex "VGATHER(Q|D)(PD|PS)(Y?)rm")>;
1046
1047//-- Conversion instructions --//
1048def ZnWriteCVTPD2PSr: SchedWriteRes<[ZnFPU3]> {
1049  let Latency = 4;
1050}
1051def ZnWriteCVTPD2PSYr: SchedWriteRes<[ZnFPU3]> {
1052  let Latency = 5;
1053  let NumMicroOps = 2;
1054  let ReleaseAtCycles = [2];
1055}
1056
1057// CVTPD2PS.
1058// x,x.
1059def : SchedAlias<WriteCvtPD2PS,  ZnWriteCVTPD2PSr>;
1060// y,y.
1061def : SchedAlias<WriteCvtPD2PSY, ZnWriteCVTPD2PSYr>;
1062// z,z.
1063defm : X86WriteResUnsupported<WriteCvtPD2PSZ>;
1064
1065def ZnWriteCVTPD2PSLd: SchedWriteRes<[ZnAGU,ZnFPU3]> {
1066  let Latency = 11;
1067}
1068// x,m128.
1069def : SchedAlias<WriteCvtPD2PSLd, ZnWriteCVTPD2PSLd>;
1070
1071// x,m256.
1072def ZnWriteCVTPD2PSYLd : SchedWriteRes<[ZnAGU, ZnFPU3]> {
1073  let Latency = 11;
1074  let NumMicroOps = 2;
1075  let ReleaseAtCycles = [1,2];
1076}
1077def : SchedAlias<WriteCvtPD2PSYLd, ZnWriteCVTPD2PSYLd>;
1078// z,m512
1079defm : X86WriteResUnsupported<WriteCvtPD2PSZLd>;
1080
1081// CVTSD2SS.
1082// x,x.
1083// Same as WriteCVTPD2PSr
1084def : SchedAlias<WriteCvtSD2SS, ZnWriteCVTPD2PSr>;
1085
1086// x,m64.
1087def : SchedAlias<WriteCvtSD2SSLd, ZnWriteCVTPD2PSLd>;
1088
1089// CVTPS2PD.
1090// x,x.
1091def ZnWriteCVTPS2PDr : SchedWriteRes<[ZnFPU3]> {
1092  let Latency = 3;
1093}
1094def : SchedAlias<WriteCvtPS2PD, ZnWriteCVTPS2PDr>;
1095
1096// x,m64.
1097// y,m128.
1098def ZnWriteCVTPS2PDLd : SchedWriteRes<[ZnAGU, ZnFPU3]> {
1099  let Latency = 10;
1100  let NumMicroOps = 2;
1101}
1102def : SchedAlias<WriteCvtPS2PDLd, ZnWriteCVTPS2PDLd>;
1103def : SchedAlias<WriteCvtPS2PDYLd, ZnWriteCVTPS2PDLd>;
1104defm : X86WriteResUnsupported<WriteCvtPS2PDZLd>;
1105
1106// y,x.
1107def ZnWriteVCVTPS2PDY : SchedWriteRes<[ZnFPU3]> {
1108  let Latency = 3;
1109}
1110def : SchedAlias<WriteCvtPS2PDY, ZnWriteVCVTPS2PDY>;
1111defm : X86WriteResUnsupported<WriteCvtPS2PDZ>;
1112
1113// CVTSS2SD.
1114// x,x.
1115def ZnWriteCVTSS2SDr : SchedWriteRes<[ZnFPU3]> {
1116  let Latency = 4;
1117}
1118def : SchedAlias<WriteCvtSS2SD, ZnWriteCVTSS2SDr>;
1119
1120// x,m32.
1121def ZnWriteCVTSS2SDLd : SchedWriteRes<[ZnAGU, ZnFPU3]> {
1122  let Latency = 11;
1123  let NumMicroOps = 2;
1124  let ReleaseAtCycles = [1, 2];
1125}
1126def : SchedAlias<WriteCvtSS2SDLd, ZnWriteCVTSS2SDLd>;
1127
1128def ZnWriteCVTDQ2PDr: SchedWriteRes<[ZnFPU12,ZnFPU3]> {
1129  let Latency = 5;
1130}
1131// CVTDQ2PD.
1132// x,x.
1133def : InstRW<[ZnWriteCVTDQ2PDr], (instregex "(V)?CVTDQ2PDrr")>;
1134
1135// Same as xmm
1136// y,x.
1137def : InstRW<[ZnWriteCVTDQ2PDr], (instrs VCVTDQ2PDYrr)>;
1138
1139def ZnWriteCVTPD2DQr: SchedWriteRes<[ZnFPU12, ZnFPU3]> {
1140  let Latency = 5;
1141}
1142// CVT(T)PD2DQ.
1143// x,x.
1144def : InstRW<[ZnWriteCVTPD2DQr], (instregex "(V?)CVT(T?)PD2DQrr")>;
1145
1146def ZnWriteCVTPD2DQLd: SchedWriteRes<[ZnAGU,ZnFPU12,ZnFPU3]> {
1147  let Latency = 12;
1148  let NumMicroOps = 2;
1149}
1150// x,m128.
1151def : InstRW<[ZnWriteCVTPD2DQLd], (instregex "(V?)CVT(T?)PD2DQrm")>;
1152// same as xmm handling
1153// x,y.
1154def : InstRW<[ZnWriteCVTPD2DQr], (instregex "VCVT(T?)PD2DQYrr")>;
1155// x,m256.
1156def : InstRW<[ZnWriteCVTPD2DQLd], (instregex "VCVT(T?)PD2DQYrm")>;
1157
1158def ZnWriteCVTPS2PIr: SchedWriteRes<[ZnFPU3]> {
1159  let Latency = 4;
1160}
1161// CVT(T)PS2PI.
1162// mm,x.
1163def : InstRW<[ZnWriteCVTPS2PIr], (instregex "MMX_CVT(T?)PS2PIrr")>;
1164
1165// CVTPI2PD.
1166// x,mm.
1167def : InstRW<[ZnWriteCVTPS2PDr], (instrs MMX_CVTPI2PDrr)>;
1168
1169// CVT(T)PD2PI.
1170// mm,x.
1171def : InstRW<[ZnWriteCVTPS2PIr], (instregex "MMX_CVT(T?)PD2PIrr")>;
1172
1173def ZnWriteCVSTSI2SSr: SchedWriteRes<[ZnFPU3]> {
1174  let Latency = 5;
1175}
1176
1177// same as CVTPD2DQr
1178// CVT(T)SS2SI.
1179// r32,x.
1180def : InstRW<[ZnWriteCVTPD2DQr], (instregex "(V?)CVT(T?)SS2SI(64)?rr")>;
1181// same as CVTPD2DQm
1182// r32,m32.
1183def : InstRW<[ZnWriteCVTPD2DQLd], (instregex "(V?)CVT(T?)SS2SI(64)?rm")>;
1184
1185def ZnWriteCVSTSI2SDr: SchedWriteRes<[ZnFPU013, ZnFPU3]> {
1186  let Latency = 5;
1187}
1188// CVTSI2SD.
1189// x,r32/64.
1190def : InstRW<[ZnWriteCVSTSI2SDr], (instregex "(V?)CVTSI(64)?2SDrr")>;
1191
1192
1193def ZnWriteCVSTSI2SIr: SchedWriteRes<[ZnFPU3, ZnFPU2]> {
1194  let Latency = 5;
1195}
1196def ZnWriteCVSTSI2SILd: SchedWriteRes<[ZnAGU, ZnFPU3, ZnFPU2]> {
1197  let Latency = 12;
1198}
1199// CVTSD2SI.
1200// r32/64
1201def : InstRW<[ZnWriteCVSTSI2SIr], (instregex "(V?)CVT(T?)SD2SI(64)?rr")>;
1202// r32,m32.
1203def : InstRW<[ZnWriteCVSTSI2SILd], (instregex "(V?)CVT(T?)SD2SI(64)?rm")>;
1204
1205// VCVTPS2PH.
1206// x,v,i.
1207def : SchedAlias<WriteCvtPS2PH,    ZnWriteMicrocoded>;
1208def : SchedAlias<WriteCvtPS2PHY,   ZnWriteMicrocoded>;
1209defm : X86WriteResUnsupported<WriteCvtPS2PHZ>;
1210// m,v,i.
1211def : SchedAlias<WriteCvtPS2PHSt,  ZnWriteMicrocoded>;
1212def : SchedAlias<WriteCvtPS2PHYSt, ZnWriteMicrocoded>;
1213defm : X86WriteResUnsupported<WriteCvtPS2PHZSt>;
1214
1215// VCVTPH2PS.
1216// v,x.
1217def : SchedAlias<WriteCvtPH2PS,    ZnWriteMicrocoded>;
1218def : SchedAlias<WriteCvtPH2PSY,   ZnWriteMicrocoded>;
1219defm : X86WriteResUnsupported<WriteCvtPH2PSZ>;
1220// v,m.
1221def : SchedAlias<WriteCvtPH2PSLd,  ZnWriteMicrocoded>;
1222def : SchedAlias<WriteCvtPH2PSYLd, ZnWriteMicrocoded>;
1223defm : X86WriteResUnsupported<WriteCvtPH2PSZLd>;
1224
1225//-- SSE4A instructions --//
1226// EXTRQ
1227def ZnWriteEXTRQ: SchedWriteRes<[ZnFPU12, ZnFPU2]> {
1228  let Latency = 2;
1229}
1230def : InstRW<[ZnWriteEXTRQ], (instregex "EXTRQ")>;
1231
1232// INSERTQ
1233def ZnWriteINSERTQ: SchedWriteRes<[ZnFPU03,ZnFPU1]> {
1234  let Latency = 4;
1235}
1236def : InstRW<[ZnWriteINSERTQ], (instregex "INSERTQ")>;
1237
1238//-- SHA instructions --//
1239// SHA256MSG2
1240def : InstRW<[WriteMicrocoded], (instregex "SHA256MSG2(Y?)r(r|m)")>;
1241
1242// SHA1MSG1, SHA256MSG1
1243// x,x.
1244def ZnWriteSHA1MSG1r : SchedWriteRes<[ZnFPU12]> {
1245  let Latency = 2;
1246  let ReleaseAtCycles = [2];
1247}
1248def : InstRW<[ZnWriteSHA1MSG1r], (instregex "SHA(1|256)MSG1rr")>;
1249// x,m.
1250def ZnWriteSHA1MSG1Ld : SchedWriteRes<[ZnAGU, ZnFPU12]> {
1251  let Latency = 9;
1252  let ReleaseAtCycles = [1,2];
1253}
1254def : InstRW<[ZnWriteSHA1MSG1Ld], (instregex "SHA(1|256)MSG1rm")>;
1255
1256// SHA1MSG2
1257// x,x.
1258def ZnWriteSHA1MSG2r : SchedWriteRes<[ZnFPU12]> ;
1259def : InstRW<[ZnWriteSHA1MSG2r], (instrs SHA1MSG2rr)>;
1260// x,m.
1261def ZnWriteSHA1MSG2Ld : SchedWriteRes<[ZnAGU, ZnFPU12]> {
1262  let Latency = 8;
1263}
1264def : InstRW<[ZnWriteSHA1MSG2Ld], (instrs SHA1MSG2rm)>;
1265
1266// SHA1NEXTE
1267// x,x.
1268def ZnWriteSHA1NEXTEr : SchedWriteRes<[ZnFPU1]> ;
1269def : InstRW<[ZnWriteSHA1NEXTEr], (instrs SHA1NEXTErr)>;
1270// x,m.
1271def ZnWriteSHA1NEXTELd : SchedWriteRes<[ZnAGU, ZnFPU1]> {
1272  let Latency = 8;
1273}
1274def : InstRW<[ZnWriteSHA1NEXTELd], (instrs SHA1NEXTErm)>;
1275
1276// SHA1RNDS4
1277// x,x.
1278def ZnWriteSHA1RNDS4r : SchedWriteRes<[ZnFPU1]> {
1279  let Latency = 6;
1280}
1281def : InstRW<[ZnWriteSHA1RNDS4r], (instrs SHA1RNDS4rri)>;
1282// x,m.
1283def ZnWriteSHA1RNDS4Ld : SchedWriteRes<[ZnAGU, ZnFPU1]> {
1284  let Latency = 13;
1285}
1286def : InstRW<[ZnWriteSHA1RNDS4Ld], (instrs SHA1RNDS4rmi)>;
1287
1288// SHA256RNDS2
1289// x,x.
1290def ZnWriteSHA256RNDS2r : SchedWriteRes<[ZnFPU1]> {
1291  let Latency = 4;
1292}
1293def : InstRW<[ZnWriteSHA256RNDS2r], (instrs SHA256RNDS2rr)>;
1294// x,m.
1295def ZnWriteSHA256RNDS2Ld : SchedWriteRes<[ZnAGU, ZnFPU1]> {
1296  let Latency = 11;
1297}
1298def : InstRW<[ZnWriteSHA256RNDS2Ld], (instrs SHA256RNDS2rm)>;
1299
1300//-- Arithmetic instructions --//
1301
1302// DPPS.
1303// x,x,i / v,v,v,i.
1304def : SchedAlias<WriteDPPS,   ZnWriteMicrocoded>;
1305def : SchedAlias<WriteDPPSY,  ZnWriteMicrocoded>;
1306
1307// x,m,i / v,v,m,i.
1308def : SchedAlias<WriteDPPSLd, ZnWriteMicrocoded>;
1309def : SchedAlias<WriteDPPSYLd,ZnWriteMicrocoded>;
1310
1311// DPPD.
1312// x,x,i.
1313def : SchedAlias<WriteDPPD,   ZnWriteMicrocoded>;
1314
1315// x,m,i.
1316def : SchedAlias<WriteDPPDLd, ZnWriteMicrocoded>;
1317
1318///////////////////////////////////////////////////////////////////////////////
1319// Dependency breaking instructions.
1320///////////////////////////////////////////////////////////////////////////////
1321
1322def : IsZeroIdiomFunction<[
1323  // GPR Zero-idioms.
1324  DepBreakingClass<[
1325    SUB32rr, SUB64rr,
1326    XOR32rr, XOR64rr
1327  ], ZeroIdiomPredicate>,
1328
1329  // MMX Zero-idioms.
1330  DepBreakingClass<[
1331    MMX_PXORrr, MMX_PANDNrr, MMX_PSUBBrr,
1332    MMX_PSUBDrr, MMX_PSUBQrr, MMX_PSUBWrr,
1333    MMX_PSUBSBrr, MMX_PSUBSWrr, MMX_PSUBUSBrr, MMX_PSUBUSWrr,
1334    MMX_PCMPGTBrr, MMX_PCMPGTDrr, MMX_PCMPGTWrr
1335  ], ZeroIdiomPredicate>,
1336
1337  // SSE Zero-idioms.
1338  DepBreakingClass<[
1339    // fp variants.
1340    XORPSrr, XORPDrr, ANDNPSrr, ANDNPDrr,
1341
1342    // int variants.
1343    PXORrr, PANDNrr,
1344    PSUBBrr, PSUBWrr, PSUBDrr, PSUBQrr,
1345    PCMPGTBrr, PCMPGTDrr, PCMPGTQrr, PCMPGTWrr
1346  ], ZeroIdiomPredicate>,
1347
1348  // AVX XMM Zero-idioms.
1349  DepBreakingClass<[
1350    // fp variants.
1351    VXORPSrr, VXORPDrr, VANDNPSrr, VANDNPDrr,
1352
1353    // int variants.
1354    VPXORrr, VPANDNrr,
1355    VPSUBBrr, VPSUBWrr, VPSUBDrr, VPSUBQrr,
1356    VPCMPGTBrr, VPCMPGTWrr, VPCMPGTDrr, VPCMPGTQrr
1357  ], ZeroIdiomPredicate>,
1358
1359  // AVX YMM Zero-idioms.
1360  DepBreakingClass<[
1361    // fp variants
1362    VXORPSYrr, VXORPDYrr, VANDNPSYrr, VANDNPDYrr,
1363
1364    // int variants
1365    VPXORYrr, VPANDNYrr,
1366    VPSUBBYrr, VPSUBWYrr, VPSUBDYrr, VPSUBQYrr,
1367    VPCMPGTBYrr, VPCMPGTWYrr, VPCMPGTDYrr, VPCMPGTQYrr
1368  ], ZeroIdiomPredicate>
1369]>;
1370
1371def : IsDepBreakingFunction<[
1372  // GPR
1373  DepBreakingClass<[ SBB32rr, SBB64rr ], ZeroIdiomPredicate>,
1374  DepBreakingClass<[ CMP32rr, CMP64rr ], CheckSameRegOperand<0, 1> >,
1375
1376  // MMX
1377  DepBreakingClass<[
1378    MMX_PCMPEQBrr, MMX_PCMPEQWrr, MMX_PCMPEQDrr
1379  ], ZeroIdiomPredicate>,
1380
1381  // SSE
1382  DepBreakingClass<[
1383    PCMPEQBrr, PCMPEQWrr, PCMPEQDrr, PCMPEQQrr
1384  ], ZeroIdiomPredicate>,
1385
1386  // AVX XMM
1387  DepBreakingClass<[
1388    VPCMPEQBrr, VPCMPEQWrr, VPCMPEQDrr, VPCMPEQQrr
1389  ], ZeroIdiomPredicate>,
1390
1391  // AVX YMM
1392  DepBreakingClass<[
1393    VPCMPEQBYrr, VPCMPEQWYrr, VPCMPEQDYrr, VPCMPEQQYrr
1394  ], ZeroIdiomPredicate>,
1395]>;
1396
1397} // SchedModel
1398