xref: /freebsd/contrib/llvm-project/llvm/lib/Target/X86/X86Schedule.td (revision 2f513db72b034fd5ef7f080b11be5c711c15186a)
1//===-- X86Schedule.td - X86 Scheduling Definitions --------*- tablegen -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9//===----------------------------------------------------------------------===//
10// InstrSchedModel annotations for out-of-order CPUs.
11
12// Instructions with folded loads need to read the memory operand immediately,
13// but other register operands don't have to be read until the load is ready.
14// These operands are marked with ReadAfterLd.
15def ReadAfterLd : SchedRead;
16def ReadAfterVecLd : SchedRead;
17def ReadAfterVecXLd : SchedRead;
18def ReadAfterVecYLd : SchedRead;
19
20// Instructions that move data between general purpose registers and vector
21// registers may be subject to extra latency due to data bypass delays.
22// This SchedRead describes a bypass delay caused by data being moved from the
23// integer unit to the floating point unit.
24def ReadInt2Fpu : SchedRead;
25
26// Instructions with both a load and a store folded are modeled as a folded
27// load + WriteRMW.
28def WriteRMW : SchedWrite;
29
30// Helper to set SchedWrite ExePorts/Latency/ResourceCycles/NumMicroOps.
31multiclass X86WriteRes<SchedWrite SchedRW,
32                       list<ProcResourceKind> ExePorts,
33                       int Lat, list<int> Res, int UOps> {
34  def : WriteRes<SchedRW, ExePorts> {
35    let Latency = Lat;
36    let ResourceCycles = Res;
37    let NumMicroOps = UOps;
38  }
39}
40
41// Most instructions can fold loads, so almost every SchedWrite comes in two
42// variants: With and without a folded load.
43// An X86FoldableSchedWrite holds a reference to the corresponding SchedWrite
44// with a folded load.
45class X86FoldableSchedWrite : SchedWrite {
46  // The SchedWrite to use when a load is folded into the instruction.
47  SchedWrite Folded;
48  // The SchedRead to tag register operands than don't need to be ready
49  // until the folded load has completed.
50  SchedRead ReadAfterFold;
51}
52
53// Multiclass that produces a linked pair of SchedWrites.
54multiclass X86SchedWritePair<SchedRead ReadAfter = ReadAfterLd> {
55  // Register-Memory operation.
56  def Ld : SchedWrite;
57  // Register-Register operation.
58  def NAME : X86FoldableSchedWrite {
59    let Folded = !cast<SchedWrite>(NAME#"Ld");
60    let ReadAfterFold = ReadAfter;
61  }
62}
63
64// Helpers to mark SchedWrites as unsupported.
65multiclass X86WriteResUnsupported<SchedWrite SchedRW> {
66  let Unsupported = 1 in {
67    def : WriteRes<SchedRW, []>;
68  }
69}
70multiclass X86WriteResPairUnsupported<X86FoldableSchedWrite SchedRW> {
71  let Unsupported = 1 in {
72    def : WriteRes<SchedRW, []>;
73    def : WriteRes<SchedRW.Folded, []>;
74  }
75}
76
77// Multiclass that wraps X86FoldableSchedWrite for each vector width.
78class X86SchedWriteWidths<X86FoldableSchedWrite sScl,
79                          X86FoldableSchedWrite s128,
80                          X86FoldableSchedWrite s256,
81                          X86FoldableSchedWrite s512> {
82  X86FoldableSchedWrite Scl = sScl; // Scalar float/double operations.
83  X86FoldableSchedWrite MMX = sScl; // MMX operations.
84  X86FoldableSchedWrite XMM = s128; // XMM operations.
85  X86FoldableSchedWrite YMM = s256; // YMM operations.
86  X86FoldableSchedWrite ZMM = s512; // ZMM operations.
87}
88
89// Multiclass that wraps X86SchedWriteWidths for each fp vector type.
90class X86SchedWriteSizes<X86SchedWriteWidths sPS,
91                         X86SchedWriteWidths sPD> {
92  X86SchedWriteWidths PS = sPS;
93  X86SchedWriteWidths PD = sPD;
94}
95
96// Multiclass that wraps move/load/store triple for a vector width.
97class X86SchedWriteMoveLS<SchedWrite MoveRR,
98                          SchedWrite LoadRM,
99                          SchedWrite StoreMR> {
100  SchedWrite RR = MoveRR;
101  SchedWrite RM = LoadRM;
102  SchedWrite MR = StoreMR;
103}
104
105// Multiclass that wraps X86SchedWriteMoveLS for each vector width.
106class X86SchedWriteMoveLSWidths<X86SchedWriteMoveLS sScl,
107                                X86SchedWriteMoveLS s128,
108                                X86SchedWriteMoveLS s256,
109                                X86SchedWriteMoveLS s512> {
110  X86SchedWriteMoveLS Scl = sScl; // Scalar float/double operations.
111  X86SchedWriteMoveLS MMX = sScl; // MMX operations.
112  X86SchedWriteMoveLS XMM = s128; // XMM operations.
113  X86SchedWriteMoveLS YMM = s256; // YMM operations.
114  X86SchedWriteMoveLS ZMM = s512; // ZMM operations.
115}
116
117// Loads, stores, and moves, not folded with other operations.
118def WriteLoad    : SchedWrite;
119def WriteStore   : SchedWrite;
120def WriteStoreNT : SchedWrite;
121def WriteMove    : SchedWrite;
122def WriteCopy    : WriteSequence<[WriteLoad, WriteStore]>; // mem->mem copy
123
124// Arithmetic.
125defm WriteALU    : X86SchedWritePair; // Simple integer ALU op.
126defm WriteADC    : X86SchedWritePair; // Integer ALU + flags op.
127def  WriteALURMW : WriteSequence<[WriteALULd, WriteRMW]>;
128def  WriteADCRMW : WriteSequence<[WriteADCLd, WriteRMW]>;
129def  WriteLEA    : SchedWrite;        // LEA instructions can't fold loads.
130
131// Integer multiplication
132defm WriteIMul8     : X86SchedWritePair; // Integer 8-bit multiplication.
133defm WriteIMul16    : X86SchedWritePair; // Integer 16-bit multiplication.
134defm WriteIMul16Imm : X86SchedWritePair; // Integer 16-bit multiplication by immediate.
135defm WriteIMul16Reg : X86SchedWritePair; // Integer 16-bit multiplication by register.
136defm WriteIMul32    : X86SchedWritePair; // Integer 32-bit multiplication.
137defm WriteIMul32Imm : X86SchedWritePair; // Integer 32-bit multiplication by immediate.
138defm WriteIMul32Reg : X86SchedWritePair; // Integer 32-bit multiplication by register.
139defm WriteIMul64    : X86SchedWritePair; // Integer 64-bit multiplication.
140defm WriteIMul64Imm : X86SchedWritePair; // Integer 64-bit multiplication by immediate.
141defm WriteIMul64Reg : X86SchedWritePair; // Integer 64-bit multiplication by register.
142def  WriteIMulH     : SchedWrite;        // Integer multiplication, high part.
143
144def  WriteBSWAP32 : SchedWrite; // Byte Order (Endianness) 32-bit Swap.
145def  WriteBSWAP64 : SchedWrite; // Byte Order (Endianness) 64-bit Swap.
146defm WriteCMPXCHG : X86SchedWritePair; // Compare and set, compare and swap.
147def  WriteCMPXCHGRMW : SchedWrite;     // Compare and set, compare and swap.
148def  WriteXCHG    : SchedWrite;        // Compare+Exchange - TODO RMW support.
149
150// Integer division.
151defm WriteDiv8   : X86SchedWritePair;
152defm WriteDiv16  : X86SchedWritePair;
153defm WriteDiv32  : X86SchedWritePair;
154defm WriteDiv64  : X86SchedWritePair;
155defm WriteIDiv8  : X86SchedWritePair;
156defm WriteIDiv16 : X86SchedWritePair;
157defm WriteIDiv32 : X86SchedWritePair;
158defm WriteIDiv64 : X86SchedWritePair;
159
160defm WriteBSF : X86SchedWritePair; // Bit scan forward.
161defm WriteBSR : X86SchedWritePair; // Bit scan reverse.
162defm WritePOPCNT : X86SchedWritePair; // Bit population count.
163defm WriteLZCNT : X86SchedWritePair; // Leading zero count.
164defm WriteTZCNT : X86SchedWritePair; // Trailing zero count.
165defm WriteCMOV  : X86SchedWritePair; // Conditional move.
166def  WriteFCMOV : SchedWrite; // X87 conditional move.
167def  WriteSETCC : SchedWrite; // Set register based on condition code.
168def  WriteSETCCStore : SchedWrite;
169def  WriteLAHFSAHF : SchedWrite; // Load/Store flags in AH.
170
171def  WriteBitTest      : SchedWrite; // Bit Test
172def  WriteBitTestImmLd : SchedWrite;
173def  WriteBitTestRegLd : SchedWrite;
174
175def  WriteBitTestSet       : SchedWrite; // Bit Test + Set
176def  WriteBitTestSetImmLd  : SchedWrite;
177def  WriteBitTestSetRegLd  : SchedWrite;
178def  WriteBitTestSetImmRMW : WriteSequence<[WriteBitTestSetImmLd, WriteRMW]>;
179def  WriteBitTestSetRegRMW : WriteSequence<[WriteBitTestSetRegLd, WriteRMW]>;
180
181// Integer shifts and rotates.
182defm WriteShift    : X86SchedWritePair;
183defm WriteShiftCL  : X86SchedWritePair;
184defm WriteRotate   : X86SchedWritePair;
185defm WriteRotateCL : X86SchedWritePair;
186
187// Double shift instructions.
188def  WriteSHDrri  : SchedWrite;
189def  WriteSHDrrcl : SchedWrite;
190def  WriteSHDmri  : SchedWrite;
191def  WriteSHDmrcl : SchedWrite;
192
193// BMI1 BEXTR/BLS, BMI2 BZHI
194defm WriteBEXTR : X86SchedWritePair;
195defm WriteBLS   : X86SchedWritePair;
196defm WriteBZHI  : X86SchedWritePair;
197
198// Idioms that clear a register, like xorps %xmm0, %xmm0.
199// These can often bypass execution ports completely.
200def WriteZero : SchedWrite;
201
202// Branches don't produce values, so they have no latency, but they still
203// consume resources. Indirect branches can fold loads.
204defm WriteJump : X86SchedWritePair;
205
206// Floating point. This covers both scalar and vector operations.
207def  WriteFLD0          : SchedWrite;
208def  WriteFLD1          : SchedWrite;
209def  WriteFLDC          : SchedWrite;
210def  WriteFLoad         : SchedWrite;
211def  WriteFLoadX        : SchedWrite;
212def  WriteFLoadY        : SchedWrite;
213def  WriteFMaskedLoad   : SchedWrite;
214def  WriteFMaskedLoadY  : SchedWrite;
215def  WriteFStore        : SchedWrite;
216def  WriteFStoreX       : SchedWrite;
217def  WriteFStoreY       : SchedWrite;
218def  WriteFStoreNT      : SchedWrite;
219def  WriteFStoreNTX     : SchedWrite;
220def  WriteFStoreNTY     : SchedWrite;
221def  WriteFMaskedStore  : SchedWrite;
222def  WriteFMaskedStoreY : SchedWrite;
223def  WriteFMove         : SchedWrite;
224def  WriteFMoveX        : SchedWrite;
225def  WriteFMoveY        : SchedWrite;
226
227defm WriteFAdd    : X86SchedWritePair<ReadAfterVecLd>;  // Floating point add/sub.
228defm WriteFAddX   : X86SchedWritePair<ReadAfterVecXLd>; // Floating point add/sub (XMM).
229defm WriteFAddY   : X86SchedWritePair<ReadAfterVecYLd>; // Floating point add/sub (YMM).
230defm WriteFAddZ   : X86SchedWritePair<ReadAfterVecYLd>; // Floating point add/sub (ZMM).
231defm WriteFAdd64  : X86SchedWritePair<ReadAfterVecLd>;  // Floating point double add/sub.
232defm WriteFAdd64X : X86SchedWritePair<ReadAfterVecXLd>; // Floating point double add/sub (XMM).
233defm WriteFAdd64Y : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double add/sub (YMM).
234defm WriteFAdd64Z : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double add/sub (ZMM).
235defm WriteFCmp    : X86SchedWritePair<ReadAfterVecLd>;  // Floating point compare.
236defm WriteFCmpX   : X86SchedWritePair<ReadAfterVecXLd>; // Floating point compare (XMM).
237defm WriteFCmpY   : X86SchedWritePair<ReadAfterVecYLd>; // Floating point compare (YMM).
238defm WriteFCmpZ   : X86SchedWritePair<ReadAfterVecYLd>; // Floating point compare (ZMM).
239defm WriteFCmp64  : X86SchedWritePair<ReadAfterVecLd>;  // Floating point double compare.
240defm WriteFCmp64X : X86SchedWritePair<ReadAfterVecXLd>; // Floating point double compare (XMM).
241defm WriteFCmp64Y : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double compare (YMM).
242defm WriteFCmp64Z : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double compare (ZMM).
243defm WriteFCom    : X86SchedWritePair<ReadAfterVecLd>;  // Floating point compare to flags.
244defm WriteFMul    : X86SchedWritePair<ReadAfterVecLd>;  // Floating point multiplication.
245defm WriteFMulX   : X86SchedWritePair<ReadAfterVecXLd>; // Floating point multiplication (XMM).
246defm WriteFMulY   : X86SchedWritePair<ReadAfterVecYLd>; // Floating point multiplication (YMM).
247defm WriteFMulZ   : X86SchedWritePair<ReadAfterVecYLd>; // Floating point multiplication (YMM).
248defm WriteFMul64  : X86SchedWritePair<ReadAfterVecLd>;  // Floating point double multiplication.
249defm WriteFMul64X : X86SchedWritePair<ReadAfterVecXLd>; // Floating point double multiplication (XMM).
250defm WriteFMul64Y : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double multiplication (YMM).
251defm WriteFMul64Z : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double multiplication (ZMM).
252defm WriteFDiv    : X86SchedWritePair<ReadAfterVecLd>;  // Floating point division.
253defm WriteFDivX   : X86SchedWritePair<ReadAfterVecXLd>; // Floating point division (XMM).
254defm WriteFDivY   : X86SchedWritePair<ReadAfterVecYLd>; // Floating point division (YMM).
255defm WriteFDivZ   : X86SchedWritePair<ReadAfterVecYLd>; // Floating point division (ZMM).
256defm WriteFDiv64  : X86SchedWritePair<ReadAfterVecLd>;  // Floating point double division.
257defm WriteFDiv64X : X86SchedWritePair<ReadAfterVecXLd>; // Floating point double division (XMM).
258defm WriteFDiv64Y : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double division (YMM).
259defm WriteFDiv64Z : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double division (ZMM).
260defm WriteFSqrt  : X86SchedWritePair<ReadAfterVecLd>;   // Floating point square root.
261defm WriteFSqrtX : X86SchedWritePair<ReadAfterVecXLd>;  // Floating point square root (XMM).
262defm WriteFSqrtY : X86SchedWritePair<ReadAfterVecYLd>;  // Floating point square root (YMM).
263defm WriteFSqrtZ : X86SchedWritePair<ReadAfterVecYLd>;  // Floating point square root (ZMM).
264defm WriteFSqrt64  : X86SchedWritePair<ReadAfterVecLd>;  // Floating point double square root.
265defm WriteFSqrt64X : X86SchedWritePair<ReadAfterVecXLd>; // Floating point double square root (XMM).
266defm WriteFSqrt64Y : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double square root (YMM).
267defm WriteFSqrt64Z : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double square root (ZMM).
268defm WriteFSqrt80  : X86SchedWritePair<ReadAfterVecLd>;  // Floating point long double square root.
269defm WriteFRcp   : X86SchedWritePair<ReadAfterVecLd>;  // Floating point reciprocal estimate.
270defm WriteFRcpX  : X86SchedWritePair<ReadAfterVecXLd>; // Floating point reciprocal estimate (XMM).
271defm WriteFRcpY  : X86SchedWritePair<ReadAfterVecYLd>; // Floating point reciprocal estimate (YMM).
272defm WriteFRcpZ  : X86SchedWritePair<ReadAfterVecYLd>; // Floating point reciprocal estimate (ZMM).
273defm WriteFRsqrt : X86SchedWritePair<ReadAfterVecLd>;  // Floating point reciprocal square root estimate.
274defm WriteFRsqrtX: X86SchedWritePair<ReadAfterVecXLd>; // Floating point reciprocal square root estimate (XMM).
275defm WriteFRsqrtY: X86SchedWritePair<ReadAfterVecYLd>; // Floating point reciprocal square root estimate (YMM).
276defm WriteFRsqrtZ: X86SchedWritePair<ReadAfterVecYLd>; // Floating point reciprocal square root estimate (ZMM).
277defm WriteFMA    : X86SchedWritePair<ReadAfterVecLd>;  // Fused Multiply Add.
278defm WriteFMAX   : X86SchedWritePair<ReadAfterVecXLd>; // Fused Multiply Add (XMM).
279defm WriteFMAY   : X86SchedWritePair<ReadAfterVecYLd>; // Fused Multiply Add (YMM).
280defm WriteFMAZ   : X86SchedWritePair<ReadAfterVecYLd>; // Fused Multiply Add (ZMM).
281defm WriteDPPD   : X86SchedWritePair<ReadAfterVecXLd>; // Floating point double dot product.
282defm WriteDPPS   : X86SchedWritePair<ReadAfterVecXLd>; // Floating point single dot product.
283defm WriteDPPSY  : X86SchedWritePair<ReadAfterVecYLd>; // Floating point single dot product (YMM).
284defm WriteDPPSZ  : X86SchedWritePair<ReadAfterVecYLd>; // Floating point single dot product (ZMM).
285defm WriteFSign  : X86SchedWritePair<ReadAfterVecLd>;  // Floating point fabs/fchs.
286defm WriteFRnd   : X86SchedWritePair<ReadAfterVecXLd>; // Floating point rounding.
287defm WriteFRndY  : X86SchedWritePair<ReadAfterVecYLd>; // Floating point rounding (YMM).
288defm WriteFRndZ  : X86SchedWritePair<ReadAfterVecYLd>; // Floating point rounding (ZMM).
289defm WriteFLogic  : X86SchedWritePair<ReadAfterVecXLd>; // Floating point and/or/xor logicals.
290defm WriteFLogicY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point and/or/xor logicals (YMM).
291defm WriteFLogicZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point and/or/xor logicals (ZMM).
292defm WriteFTest   : X86SchedWritePair<ReadAfterVecXLd>; // Floating point TEST instructions.
293defm WriteFTestY  : X86SchedWritePair<ReadAfterVecYLd>; // Floating point TEST instructions (YMM).
294defm WriteFTestZ  : X86SchedWritePair<ReadAfterVecYLd>; // Floating point TEST instructions (ZMM).
295defm WriteFShuffle  : X86SchedWritePair<ReadAfterVecXLd>; // Floating point vector shuffles.
296defm WriteFShuffleY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point vector shuffles (YMM).
297defm WriteFShuffleZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point vector shuffles (ZMM).
298defm WriteFVarShuffle  : X86SchedWritePair<ReadAfterVecXLd>; // Floating point vector variable shuffles.
299defm WriteFVarShuffleY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point vector variable shuffles (YMM).
300defm WriteFVarShuffleZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point vector variable shuffles (ZMM).
301defm WriteFBlend  : X86SchedWritePair<ReadAfterVecXLd>; // Floating point vector blends.
302defm WriteFBlendY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point vector blends (YMM).
303defm WriteFBlendZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point vector blends (ZMM).
304defm WriteFVarBlend  : X86SchedWritePair<ReadAfterVecXLd>; // Fp vector variable blends.
305defm WriteFVarBlendY : X86SchedWritePair<ReadAfterVecYLd>; // Fp vector variable blends (YMM).
306defm WriteFVarBlendZ : X86SchedWritePair<ReadAfterVecYLd>; // Fp vector variable blends (YMZMM).
307
308// FMA Scheduling helper class.
309class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; }
310
311// Horizontal Add/Sub (float and integer)
312defm WriteFHAdd  : X86SchedWritePair<ReadAfterVecXLd>;
313defm WriteFHAddY : X86SchedWritePair<ReadAfterVecYLd>;
314defm WriteFHAddZ : X86SchedWritePair<ReadAfterVecYLd>;
315defm WritePHAdd  : X86SchedWritePair<ReadAfterVecLd>;
316defm WritePHAddX : X86SchedWritePair<ReadAfterVecXLd>;
317defm WritePHAddY : X86SchedWritePair<ReadAfterVecYLd>;
318defm WritePHAddZ : X86SchedWritePair<ReadAfterVecYLd>;
319
320// Vector integer operations.
321def  WriteVecLoad         : SchedWrite;
322def  WriteVecLoadX        : SchedWrite;
323def  WriteVecLoadY        : SchedWrite;
324def  WriteVecLoadNT       : SchedWrite;
325def  WriteVecLoadNTY      : SchedWrite;
326def  WriteVecMaskedLoad   : SchedWrite;
327def  WriteVecMaskedLoadY  : SchedWrite;
328def  WriteVecStore        : SchedWrite;
329def  WriteVecStoreX       : SchedWrite;
330def  WriteVecStoreY       : SchedWrite;
331def  WriteVecStoreNT      : SchedWrite;
332def  WriteVecStoreNTY     : SchedWrite;
333def  WriteVecMaskedStore  : SchedWrite;
334def  WriteVecMaskedStoreY : SchedWrite;
335def  WriteVecMove         : SchedWrite;
336def  WriteVecMoveX        : SchedWrite;
337def  WriteVecMoveY        : SchedWrite;
338def  WriteVecMoveToGpr    : SchedWrite;
339def  WriteVecMoveFromGpr  : SchedWrite;
340
341defm WriteVecALU    : X86SchedWritePair<ReadAfterVecLd>;  // Vector integer ALU op, no logicals.
342defm WriteVecALUX   : X86SchedWritePair<ReadAfterVecXLd>; // Vector integer ALU op, no logicals (XMM).
343defm WriteVecALUY   : X86SchedWritePair<ReadAfterVecYLd>; // Vector integer ALU op, no logicals (YMM).
344defm WriteVecALUZ   : X86SchedWritePair<ReadAfterVecYLd>; // Vector integer ALU op, no logicals (ZMM).
345defm WriteVecLogic  : X86SchedWritePair<ReadAfterVecLd>;  // Vector integer and/or/xor logicals.
346defm WriteVecLogicX : X86SchedWritePair<ReadAfterVecXLd>; // Vector integer and/or/xor logicals (XMM).
347defm WriteVecLogicY : X86SchedWritePair<ReadAfterVecYLd>; // Vector integer and/or/xor logicals (YMM).
348defm WriteVecLogicZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector integer and/or/xor logicals (ZMM).
349defm WriteVecTest  : X86SchedWritePair<ReadAfterVecXLd>;  // Vector integer TEST instructions.
350defm WriteVecTestY : X86SchedWritePair<ReadAfterVecYLd>;  // Vector integer TEST instructions (YMM).
351defm WriteVecTestZ : X86SchedWritePair<ReadAfterVecYLd>;  // Vector integer TEST instructions (ZMM).
352defm WriteVecShift  : X86SchedWritePair<ReadAfterVecLd>;  // Vector integer shifts (default).
353defm WriteVecShiftX : X86SchedWritePair<ReadAfterVecXLd>; // Vector integer shifts (XMM).
354defm WriteVecShiftY : X86SchedWritePair<ReadAfterVecYLd>; // Vector integer shifts (YMM).
355defm WriteVecShiftZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector integer shifts (ZMM).
356defm WriteVecShiftImm : X86SchedWritePair<ReadAfterVecLd>;  // Vector integer immediate shifts (default).
357defm WriteVecShiftImmX: X86SchedWritePair<ReadAfterVecXLd>; // Vector integer immediate shifts (XMM).
358defm WriteVecShiftImmY: X86SchedWritePair<ReadAfterVecYLd>; // Vector integer immediate shifts (YMM).
359defm WriteVecShiftImmZ: X86SchedWritePair<ReadAfterVecYLd>; // Vector integer immediate shifts (ZMM).
360defm WriteVecIMul  : X86SchedWritePair<ReadAfterVecLd>;  // Vector integer multiply (default).
361defm WriteVecIMulX : X86SchedWritePair<ReadAfterVecXLd>; // Vector integer multiply (XMM).
362defm WriteVecIMulY : X86SchedWritePair<ReadAfterVecYLd>; // Vector integer multiply (YMM).
363defm WriteVecIMulZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector integer multiply (ZMM).
364defm WritePMULLD   : X86SchedWritePair<ReadAfterVecXLd>; // Vector PMULLD.
365defm WritePMULLDY  : X86SchedWritePair<ReadAfterVecYLd>; // Vector PMULLD (YMM).
366defm WritePMULLDZ  : X86SchedWritePair<ReadAfterVecYLd>; // Vector PMULLD (ZMM).
367defm WriteShuffle  : X86SchedWritePair<ReadAfterVecLd>;  // Vector shuffles.
368defm WriteShuffleX : X86SchedWritePair<ReadAfterVecXLd>; // Vector shuffles (XMM).
369defm WriteShuffleY : X86SchedWritePair<ReadAfterVecYLd>; // Vector shuffles (YMM).
370defm WriteShuffleZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector shuffles (ZMM).
371defm WriteVarShuffle  : X86SchedWritePair<ReadAfterVecLd>;  // Vector variable shuffles.
372defm WriteVarShuffleX : X86SchedWritePair<ReadAfterVecXLd>; // Vector variable shuffles (XMM).
373defm WriteVarShuffleY : X86SchedWritePair<ReadAfterVecYLd>; // Vector variable shuffles (YMM).
374defm WriteVarShuffleZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector variable shuffles (ZMM).
375defm WriteBlend  : X86SchedWritePair<ReadAfterVecXLd>; // Vector blends.
376defm WriteBlendY : X86SchedWritePair<ReadAfterVecYLd>; // Vector blends (YMM).
377defm WriteBlendZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector blends (ZMM).
378defm WriteVarBlend  : X86SchedWritePair<ReadAfterVecXLd>; // Vector variable blends.
379defm WriteVarBlendY : X86SchedWritePair<ReadAfterVecYLd>; // Vector variable blends (YMM).
380defm WriteVarBlendZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector variable blends (ZMM).
381defm WritePSADBW  : X86SchedWritePair<ReadAfterVecLd>;  // Vector PSADBW.
382defm WritePSADBWX : X86SchedWritePair<ReadAfterVecXLd>; // Vector PSADBW (XMM).
383defm WritePSADBWY : X86SchedWritePair<ReadAfterVecYLd>; // Vector PSADBW (YMM).
384defm WritePSADBWZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector PSADBW (ZMM).
385defm WriteMPSAD  : X86SchedWritePair<ReadAfterVecXLd>; // Vector MPSAD.
386defm WriteMPSADY : X86SchedWritePair<ReadAfterVecYLd>; // Vector MPSAD (YMM).
387defm WriteMPSADZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector MPSAD (ZMM).
388defm WritePHMINPOS : X86SchedWritePair<ReadAfterVecXLd>;  // Vector PHMINPOS.
389
390// Vector insert/extract operations.
391defm WriteVecInsert : X86SchedWritePair; // Insert gpr to vector element.
392def  WriteVecExtract : SchedWrite; // Extract vector element to gpr.
393def  WriteVecExtractSt : SchedWrite; // Extract vector element and store.
394
395// MOVMSK operations.
396def WriteFMOVMSK    : SchedWrite;
397def WriteVecMOVMSK  : SchedWrite;
398def WriteVecMOVMSKY : SchedWrite;
399def WriteMMXMOVMSK  : SchedWrite;
400
401// Conversion between integer and float.
402defm WriteCvtSD2I  : X86SchedWritePair<ReadAfterVecLd>;  // Double -> Integer.
403defm WriteCvtPD2I  : X86SchedWritePair<ReadAfterVecXLd>; // Double -> Integer (XMM).
404defm WriteCvtPD2IY : X86SchedWritePair<ReadAfterVecYLd>; // Double -> Integer (YMM).
405defm WriteCvtPD2IZ : X86SchedWritePair<ReadAfterVecYLd>; // Double -> Integer (ZMM).
406
407defm WriteCvtSS2I  : X86SchedWritePair<ReadAfterVecLd>;  // Float -> Integer.
408defm WriteCvtPS2I  : X86SchedWritePair<ReadAfterVecXLd>; // Float -> Integer (XMM).
409defm WriteCvtPS2IY : X86SchedWritePair<ReadAfterVecYLd>; // Float -> Integer (YMM).
410defm WriteCvtPS2IZ : X86SchedWritePair<ReadAfterVecYLd>; // Float -> Integer (ZMM).
411
412defm WriteCvtI2SD  : X86SchedWritePair<ReadAfterVecLd>;  // Integer -> Double.
413defm WriteCvtI2PD  : X86SchedWritePair<ReadAfterVecXLd>; // Integer -> Double (XMM).
414defm WriteCvtI2PDY : X86SchedWritePair<ReadAfterVecYLd>; // Integer -> Double (YMM).
415defm WriteCvtI2PDZ : X86SchedWritePair<ReadAfterVecYLd>; // Integer -> Double (ZMM).
416
417defm WriteCvtI2SS  : X86SchedWritePair<ReadAfterVecLd>;  // Integer -> Float.
418defm WriteCvtI2PS  : X86SchedWritePair<ReadAfterVecXLd>; // Integer -> Float (XMM).
419defm WriteCvtI2PSY : X86SchedWritePair<ReadAfterVecYLd>; // Integer -> Float (YMM).
420defm WriteCvtI2PSZ : X86SchedWritePair<ReadAfterVecYLd>; // Integer -> Float (ZMM).
421
422defm WriteCvtSS2SD  : X86SchedWritePair<ReadAfterVecLd>;  // Float -> Double size conversion.
423defm WriteCvtPS2PD  : X86SchedWritePair<ReadAfterVecXLd>; // Float -> Double size conversion (XMM).
424defm WriteCvtPS2PDY : X86SchedWritePair<ReadAfterVecYLd>; // Float -> Double size conversion (YMM).
425defm WriteCvtPS2PDZ : X86SchedWritePair<ReadAfterVecYLd>; // Float -> Double size conversion (ZMM).
426
427defm WriteCvtSD2SS  : X86SchedWritePair<ReadAfterVecLd>;  // Double -> Float size conversion.
428defm WriteCvtPD2PS  : X86SchedWritePair<ReadAfterVecXLd>; // Double -> Float size conversion (XMM).
429defm WriteCvtPD2PSY : X86SchedWritePair<ReadAfterVecYLd>; // Double -> Float size conversion (YMM).
430defm WriteCvtPD2PSZ : X86SchedWritePair<ReadAfterVecYLd>; // Double -> Float size conversion (ZMM).
431
432defm WriteCvtPH2PS  : X86SchedWritePair<ReadAfterVecXLd>; // Half -> Float size conversion.
433defm WriteCvtPH2PSY : X86SchedWritePair<ReadAfterVecYLd>; // Half -> Float size conversion (YMM).
434defm WriteCvtPH2PSZ : X86SchedWritePair<ReadAfterVecYLd>; // Half -> Float size conversion (ZMM).
435
436def  WriteCvtPS2PH    : SchedWrite; // // Float -> Half size conversion.
437def  WriteCvtPS2PHY   : SchedWrite; // // Float -> Half size conversion (YMM).
438def  WriteCvtPS2PHZ   : SchedWrite; // // Float -> Half size conversion (ZMM).
439def  WriteCvtPS2PHSt  : SchedWrite; // // Float -> Half + store size conversion.
440def  WriteCvtPS2PHYSt : SchedWrite; // // Float -> Half + store size conversion (YMM).
441def  WriteCvtPS2PHZSt : SchedWrite; // // Float -> Half + store size conversion (ZMM).
442
443// CRC32 instruction.
444defm WriteCRC32 : X86SchedWritePair<ReadAfterLd>;
445
446// Strings instructions.
447// Packed Compare Implicit Length Strings, Return Mask
448defm WritePCmpIStrM : X86SchedWritePair<ReadAfterVecXLd>;
449// Packed Compare Explicit Length Strings, Return Mask
450defm WritePCmpEStrM : X86SchedWritePair<ReadAfterVecXLd>;
451// Packed Compare Implicit Length Strings, Return Index
452defm WritePCmpIStrI : X86SchedWritePair<ReadAfterVecXLd>;
453// Packed Compare Explicit Length Strings, Return Index
454defm WritePCmpEStrI : X86SchedWritePair<ReadAfterVecXLd>;
455
456// AES instructions.
457defm WriteAESDecEnc : X86SchedWritePair<ReadAfterVecXLd>; // Decryption, encryption.
458defm WriteAESIMC : X86SchedWritePair<ReadAfterVecXLd>; // InvMixColumn.
459defm WriteAESKeyGen : X86SchedWritePair<ReadAfterVecXLd>; // Key Generation.
460
461// Carry-less multiplication instructions.
462defm WriteCLMul : X86SchedWritePair<ReadAfterVecXLd>;
463
464// EMMS/FEMMS
465def WriteEMMS : SchedWrite;
466
467// Load/store MXCSR
468def WriteLDMXCSR : SchedWrite;
469def WriteSTMXCSR : SchedWrite;
470
471// Catch-all for expensive system instructions.
472def WriteSystem : SchedWrite;
473
474// AVX2.
475defm WriteFShuffle256 : X86SchedWritePair<ReadAfterVecYLd>; // Fp 256-bit width vector shuffles.
476defm WriteFVarShuffle256 : X86SchedWritePair<ReadAfterVecYLd>; // Fp 256-bit width variable shuffles.
477defm WriteShuffle256 : X86SchedWritePair<ReadAfterVecYLd>; // 256-bit width vector shuffles.
478defm WriteVarShuffle256 : X86SchedWritePair<ReadAfterVecYLd>; // 256-bit width vector variable shuffles.
479defm WriteVarVecShift  : X86SchedWritePair<ReadAfterVecXLd>; // Variable vector shifts.
480defm WriteVarVecShiftY : X86SchedWritePair<ReadAfterVecYLd>; // Variable vector shifts (YMM).
481defm WriteVarVecShiftZ : X86SchedWritePair<ReadAfterVecYLd>; // Variable vector shifts (ZMM).
482
483// Old microcoded instructions that nobody use.
484def WriteMicrocoded : SchedWrite;
485
486// Fence instructions.
487def WriteFence : SchedWrite;
488
489// Nop, not very useful expect it provides a model for nops!
490def WriteNop : SchedWrite;
491
492// Move/Load/Store wrappers.
493def WriteFMoveLS
494 : X86SchedWriteMoveLS<WriteFMove, WriteFLoad, WriteFStore>;
495def WriteFMoveLSX
496 : X86SchedWriteMoveLS<WriteFMoveX, WriteFLoadX, WriteFStoreX>;
497def WriteFMoveLSY
498 : X86SchedWriteMoveLS<WriteFMoveY, WriteFLoadY, WriteFStoreY>;
499def SchedWriteFMoveLS
500  : X86SchedWriteMoveLSWidths<WriteFMoveLS, WriteFMoveLSX,
501                              WriteFMoveLSY, WriteFMoveLSY>;
502
503def WriteFMoveLSNT
504 : X86SchedWriteMoveLS<WriteFMove, WriteFLoad, WriteFStoreNT>;
505def WriteFMoveLSNTX
506 : X86SchedWriteMoveLS<WriteFMove, WriteFLoad, WriteFStoreNTX>;
507def WriteFMoveLSNTY
508 : X86SchedWriteMoveLS<WriteFMoveY, WriteFLoadY, WriteFStoreNTY>;
509def SchedWriteFMoveLSNT
510  : X86SchedWriteMoveLSWidths<WriteFMoveLSNT, WriteFMoveLSNTX,
511                              WriteFMoveLSNTY, WriteFMoveLSNTY>;
512
513def WriteVecMoveLS
514 : X86SchedWriteMoveLS<WriteVecMove, WriteVecLoad, WriteVecStore>;
515def WriteVecMoveLSX
516 : X86SchedWriteMoveLS<WriteVecMoveX, WriteVecLoadX, WriteVecStoreX>;
517def WriteVecMoveLSY
518 : X86SchedWriteMoveLS<WriteVecMoveY, WriteVecLoadY, WriteVecStoreY>;
519def SchedWriteVecMoveLS
520  : X86SchedWriteMoveLSWidths<WriteVecMoveLS, WriteVecMoveLSX,
521                              WriteVecMoveLSY, WriteVecMoveLSY>;
522
523def WriteVecMoveLSNT
524 : X86SchedWriteMoveLS<WriteVecMove, WriteVecLoadNT, WriteVecStoreNT>;
525def WriteVecMoveLSNTX
526 : X86SchedWriteMoveLS<WriteVecMoveX, WriteVecLoadNT, WriteVecStoreNT>;
527def WriteVecMoveLSNTY
528 : X86SchedWriteMoveLS<WriteVecMoveY, WriteVecLoadNTY, WriteVecStoreNTY>;
529def SchedWriteVecMoveLSNT
530  : X86SchedWriteMoveLSWidths<WriteVecMoveLSNT, WriteVecMoveLSNTX,
531                              WriteVecMoveLSNTY, WriteVecMoveLSNTY>;
532
533// Vector width wrappers.
534def SchedWriteFAdd
535 : X86SchedWriteWidths<WriteFAdd, WriteFAddX, WriteFAddY, WriteFAddZ>;
536def SchedWriteFAdd64
537 : X86SchedWriteWidths<WriteFAdd64, WriteFAdd64X, WriteFAdd64Y, WriteFAdd64Z>;
538def SchedWriteFHAdd
539 : X86SchedWriteWidths<WriteFHAdd, WriteFHAdd, WriteFHAddY, WriteFHAddZ>;
540def SchedWriteFCmp
541 : X86SchedWriteWidths<WriteFCmp, WriteFCmpX, WriteFCmpY, WriteFCmpZ>;
542def SchedWriteFCmp64
543 : X86SchedWriteWidths<WriteFCmp64, WriteFCmp64X, WriteFCmp64Y, WriteFCmp64Z>;
544def SchedWriteFMul
545 : X86SchedWriteWidths<WriteFMul, WriteFMulX, WriteFMulY, WriteFMulZ>;
546def SchedWriteFMul64
547 : X86SchedWriteWidths<WriteFMul64, WriteFMul64X, WriteFMul64Y, WriteFMul64Z>;
548def SchedWriteFMA
549 : X86SchedWriteWidths<WriteFMA, WriteFMAX, WriteFMAY, WriteFMAZ>;
550def SchedWriteDPPD
551 : X86SchedWriteWidths<WriteDPPD, WriteDPPD, WriteDPPD, WriteDPPD>;
552def SchedWriteDPPS
553 : X86SchedWriteWidths<WriteDPPS, WriteDPPS, WriteDPPSY, WriteDPPSZ>;
554def SchedWriteFDiv
555 : X86SchedWriteWidths<WriteFDiv, WriteFDivX, WriteFDivY, WriteFDivZ>;
556def SchedWriteFDiv64
557 : X86SchedWriteWidths<WriteFDiv64, WriteFDiv64X, WriteFDiv64Y, WriteFDiv64Z>;
558def SchedWriteFSqrt
559 : X86SchedWriteWidths<WriteFSqrt, WriteFSqrtX,
560                       WriteFSqrtY, WriteFSqrtZ>;
561def SchedWriteFSqrt64
562 : X86SchedWriteWidths<WriteFSqrt64, WriteFSqrt64X,
563                       WriteFSqrt64Y, WriteFSqrt64Z>;
564def SchedWriteFRcp
565 : X86SchedWriteWidths<WriteFRcp, WriteFRcpX, WriteFRcpY, WriteFRcpZ>;
566def SchedWriteFRsqrt
567 : X86SchedWriteWidths<WriteFRsqrt, WriteFRsqrtX, WriteFRsqrtY, WriteFRsqrtZ>;
568def SchedWriteFRnd
569 : X86SchedWriteWidths<WriteFRnd, WriteFRnd, WriteFRndY, WriteFRndZ>;
570def SchedWriteFLogic
571 : X86SchedWriteWidths<WriteFLogic, WriteFLogic, WriteFLogicY, WriteFLogicZ>;
572def SchedWriteFTest
573 : X86SchedWriteWidths<WriteFTest, WriteFTest, WriteFTestY, WriteFTestZ>;
574
575def SchedWriteFShuffle
576 : X86SchedWriteWidths<WriteFShuffle, WriteFShuffle,
577                       WriteFShuffleY, WriteFShuffleZ>;
578def SchedWriteFVarShuffle
579 : X86SchedWriteWidths<WriteFVarShuffle, WriteFVarShuffle,
580                       WriteFVarShuffleY, WriteFVarShuffleZ>;
581def SchedWriteFBlend
582 : X86SchedWriteWidths<WriteFBlend, WriteFBlend, WriteFBlendY, WriteFBlendZ>;
583def SchedWriteFVarBlend
584 : X86SchedWriteWidths<WriteFVarBlend, WriteFVarBlend,
585                       WriteFVarBlendY, WriteFVarBlendZ>;
586
587def SchedWriteCvtDQ2PD
588 : X86SchedWriteWidths<WriteCvtI2SD, WriteCvtI2PD,
589                       WriteCvtI2PDY, WriteCvtI2PDZ>;
590def SchedWriteCvtDQ2PS
591 : X86SchedWriteWidths<WriteCvtI2SS, WriteCvtI2PS,
592                       WriteCvtI2PSY, WriteCvtI2PSZ>;
593def SchedWriteCvtPD2DQ
594 : X86SchedWriteWidths<WriteCvtSD2I, WriteCvtPD2I,
595                       WriteCvtPD2IY, WriteCvtPD2IZ>;
596def SchedWriteCvtPS2DQ
597 : X86SchedWriteWidths<WriteCvtSS2I, WriteCvtPS2I,
598                       WriteCvtPS2IY, WriteCvtPS2IZ>;
599def SchedWriteCvtPS2PD
600 : X86SchedWriteWidths<WriteCvtSS2SD, WriteCvtPS2PD,
601                       WriteCvtPS2PDY, WriteCvtPS2PDZ>;
602def SchedWriteCvtPD2PS
603 : X86SchedWriteWidths<WriteCvtSD2SS, WriteCvtPD2PS,
604                       WriteCvtPD2PSY, WriteCvtPD2PSZ>;
605
606def SchedWriteVecALU
607 : X86SchedWriteWidths<WriteVecALU, WriteVecALUX, WriteVecALUY, WriteVecALUZ>;
608def SchedWritePHAdd
609 : X86SchedWriteWidths<WritePHAdd, WritePHAddX, WritePHAddY, WritePHAddZ>;
610def SchedWriteVecLogic
611 : X86SchedWriteWidths<WriteVecLogic, WriteVecLogicX,
612                       WriteVecLogicY, WriteVecLogicZ>;
613def SchedWriteVecTest
614 : X86SchedWriteWidths<WriteVecTest, WriteVecTest,
615                       WriteVecTestY, WriteVecTestZ>;
616def SchedWriteVecShift
617 : X86SchedWriteWidths<WriteVecShift, WriteVecShiftX,
618                       WriteVecShiftY, WriteVecShiftZ>;
619def SchedWriteVecShiftImm
620 : X86SchedWriteWidths<WriteVecShiftImm, WriteVecShiftImmX,
621                       WriteVecShiftImmY, WriteVecShiftImmZ>;
622def SchedWriteVarVecShift
623 : X86SchedWriteWidths<WriteVarVecShift, WriteVarVecShift,
624                       WriteVarVecShiftY, WriteVarVecShiftZ>;
625def SchedWriteVecIMul
626 : X86SchedWriteWidths<WriteVecIMul, WriteVecIMulX,
627                       WriteVecIMulY, WriteVecIMulZ>;
628def SchedWritePMULLD
629 : X86SchedWriteWidths<WritePMULLD, WritePMULLD,
630                       WritePMULLDY, WritePMULLDZ>;
631def SchedWriteMPSAD
632 : X86SchedWriteWidths<WriteMPSAD, WriteMPSAD,
633                       WriteMPSADY, WriteMPSADZ>;
634def SchedWritePSADBW
635 : X86SchedWriteWidths<WritePSADBW, WritePSADBWX,
636                       WritePSADBWY, WritePSADBWZ>;
637
638def SchedWriteShuffle
639 : X86SchedWriteWidths<WriteShuffle, WriteShuffleX,
640                       WriteShuffleY, WriteShuffleZ>;
641def SchedWriteVarShuffle
642 : X86SchedWriteWidths<WriteVarShuffle, WriteVarShuffleX,
643                       WriteVarShuffleY, WriteVarShuffleZ>;
644def SchedWriteBlend
645 : X86SchedWriteWidths<WriteBlend, WriteBlend, WriteBlendY, WriteBlendZ>;
646def SchedWriteVarBlend
647 : X86SchedWriteWidths<WriteVarBlend, WriteVarBlend,
648                       WriteVarBlendY, WriteVarBlendZ>;
649
650// Vector size wrappers.
651def SchedWriteFAddSizes
652 : X86SchedWriteSizes<SchedWriteFAdd, SchedWriteFAdd64>;
653def SchedWriteFCmpSizes
654 : X86SchedWriteSizes<SchedWriteFCmp, SchedWriteFCmp64>;
655def SchedWriteFMulSizes
656 : X86SchedWriteSizes<SchedWriteFMul, SchedWriteFMul64>;
657def SchedWriteFDivSizes
658 : X86SchedWriteSizes<SchedWriteFDiv, SchedWriteFDiv64>;
659def SchedWriteFSqrtSizes
660 : X86SchedWriteSizes<SchedWriteFSqrt, SchedWriteFSqrt64>;
661def SchedWriteFLogicSizes
662 : X86SchedWriteSizes<SchedWriteFLogic, SchedWriteFLogic>;
663def SchedWriteFShuffleSizes
664 : X86SchedWriteSizes<SchedWriteFShuffle, SchedWriteFShuffle>;
665
666//===----------------------------------------------------------------------===//
667// Generic Processor Scheduler Models.
668
669// IssueWidth is analogous to the number of decode units. Core and its
670// descendents, including Nehalem and SandyBridge have 4 decoders.
671// Resources beyond the decoder operate on micro-ops and are bufferred
672// so adjacent micro-ops don't directly compete.
673//
674// MicroOpBufferSize > 1 indicates that RAW dependencies can be
675// decoded in the same cycle. The value 32 is a reasonably arbitrary
676// number of in-flight instructions.
677//
678// HighLatency=10 is optimistic. X86InstrInfo::isHighLatencyDef
679// indicates high latency opcodes. Alternatively, InstrItinData
680// entries may be included here to define specific operand
681// latencies. Since these latencies are not used for pipeline hazards,
682// they do not need to be exact.
683//
684// The GenericX86Model contains no instruction schedules
685// and disables PostRAScheduler.
686class GenericX86Model : SchedMachineModel {
687  let IssueWidth = 4;
688  let MicroOpBufferSize = 32;
689  let LoadLatency = 4;
690  let HighLatency = 10;
691  let PostRAScheduler = 0;
692  let CompleteModel = 0;
693}
694
695def GenericModel : GenericX86Model;
696
697// Define a model with the PostRAScheduler enabled.
698def GenericPostRAModel : GenericX86Model {
699  let PostRAScheduler = 1;
700}
701