xref: /freebsd/contrib/llvm-project/llvm/lib/Target/ARM/ARMSchedule.td (revision b9128a37faafede823eb456aa65a11ac69997284)
1//===-- ARMSchedule.td - ARM Scheduling Definitions --------*- tablegen -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//===----------------------------------------------------------------------===//
9// Instruction scheduling annotations for in-order and out-of-order CPUs.
10// These annotations are independent of the itinerary class defined below.
11// Here we define the subtarget independent read/write per-operand resources.
12// The subtarget schedule definitions will then map these to the subtarget's
13// resource usages.
14// For example:
15// The instruction cycle timings table might contain an entry for an operation
16// like the following:
17// Rd <- ADD Rn, Rm, <shift> Rs
18//  Uops | Latency from register | Uops - resource requirements - latency
19//  2    | Rn: 1 Rm: 4 Rs: 4     | uop T0, Rm, Rs - P01 - 3
20//       |                       | uopc Rd, Rn, T0 -  P01 - 1
21// This is telling us that the result will be available in destination register
22// Rd after a minimum of three cycles after the result in Rm and Rs is available
23// and one cycle after the result in Rn is available. The micro-ops can execute
24// on resource P01.
25// To model this, we need to express that we need to dispatch two micro-ops,
26// that the resource P01 is needed and that the latency to Rn is different than
27// the latency to Rm and Rs. The scheduler can decrease Rn's producer latency by
28// two.
29// We will do this by assigning (abstract) resources to register defs/uses.
30// ARMSchedule.td:
31//   def WriteALUsr : SchedWrite;
32//   def ReadAdvanceALUsr : ScheRead;
33//
34// ARMInstrInfo.td:
35//   def ADDrs : I<>, Sched<[WriteALUsr, ReadAdvanceALUsr, ReadDefault,
36//                           ReadDefault]> { ...}
37// ReadAdvance read resources allow us to define "pipeline by-passes" or
38// shorter latencies to certain registers as needed in the example above.
39// The "ReadDefault" can be omitted.
40// Next, the subtarget td file assigns resources to the abstract resources
41// defined here.
42// ARMScheduleSubtarget.td:
43//  // Resources.
44//  def P01 : ProcResource<3>; // ALU unit (3 of it).
45//  ...
46//  // Resource usages.
47//  def : WriteRes<WriteALUsr, [P01, P01]> {
48//    Latency = 4; // Latency of 4.
49//    NumMicroOps = 2; // Dispatch 2 micro-ops.
50//    // The two instances of resource P01 are occupied for one cycle. It is one
51//    // cycle because these resources happen to be pipelined.
52//    ReleaseAtCycles = [1, 1];
53//  }
54//  def : ReadAdvance<ReadAdvanceALUsr, 3>;
55
56//===----------------------------------------------------------------------===//
57// Sched definitions for integer pipeline instructions
58//
59// Basic ALU operation.
60def WriteALU : SchedWrite;
61def ReadALU : SchedRead;
62
63// Basic ALU with shifts.
64def WriteALUsi : SchedWrite; // Shift by immediate.
65def WriteALUsr : SchedWrite; // Shift by register.
66def WriteALUSsr : SchedWrite; // Shift by register (flag setting).
67def ReadALUsr : SchedRead; // Some operands are read later.
68
69// Compares.
70def WriteCMP : SchedWrite;
71def WriteCMPsi : SchedWrite;
72def WriteCMPsr : SchedWrite;
73
74// Multiplys.
75def WriteMUL16   : SchedWrite; // 16-bit multiply.
76def WriteMUL32   : SchedWrite; // 32-bit multiply.
77def WriteMUL64Lo : SchedWrite; // 64-bit result. Low reg.
78def WriteMUL64Hi : SchedWrite; // 64-bit result. High reg.
79def ReadMUL  : SchedRead;
80
81// Multiply-accumulates.
82def WriteMAC16   : SchedWrite; // 16-bit mac.
83def WriteMAC32   : SchedWrite; // 32-bit mac.
84def WriteMAC64Lo : SchedWrite; // 64-bit mac. Low reg.
85def WriteMAC64Hi : SchedWrite; // 64-bit mac. High reg.
86def ReadMAC : SchedRead;
87
88// Divisions.
89def WriteDIV : SchedWrite;
90
91// Loads/Stores.
92def WriteLd : SchedWrite;
93def WritePreLd : SchedWrite;
94def WriteST : SchedWrite;
95
96// Branches.
97def WriteBr : SchedWrite;
98def WriteBrL : SchedWrite;
99def WriteBrTbl : SchedWrite;
100
101// Noop.
102def WriteNoop : SchedWrite;
103
104//===----------------------------------------------------------------------===//
105// Sched definitions for floating-point and neon instructions
106//
107// Floating point conversions
108def WriteFPCVT : SchedWrite;
109def WriteFPMOV : SchedWrite; // FP -> GPR and vice-versa
110
111// ALU operations (32/64-bit)
112def WriteFPALU32 : SchedWrite;
113def WriteFPALU64 : SchedWrite;
114
115// Multiplication
116def WriteFPMUL32 : SchedWrite;
117def WriteFPMUL64 : SchedWrite;
118def ReadFPMUL    : SchedRead; // multiplier read
119def ReadFPMAC    : SchedRead; // accumulator read
120
121// Multiply-accumulate
122def WriteFPMAC32 : SchedWrite;
123def WriteFPMAC64 : SchedWrite;
124
125// Division
126def WriteFPDIV32 : SchedWrite;
127def WriteFPDIV64 : SchedWrite;
128
129// Square-root
130def WriteFPSQRT32 : SchedWrite;
131def WriteFPSQRT64 : SchedWrite;
132
133// Vector load and stores
134def WriteVLD1 : SchedWrite;
135def WriteVLD2 : SchedWrite;
136def WriteVLD3 : SchedWrite;
137def WriteVLD4 : SchedWrite;
138def WriteVST1 : SchedWrite;
139def WriteVST2 : SchedWrite;
140def WriteVST3 : SchedWrite;
141def WriteVST4 : SchedWrite;
142
143
144// Define TII for use in SchedVariant Predicates.
145def : PredicateProlog<[{
146  const ARMBaseInstrInfo *TII =
147    static_cast<const ARMBaseInstrInfo*>(SchedModel->getInstrInfo());
148  (void)TII;
149  const ARMSubtarget *STI =
150    static_cast<const ARMSubtarget*>(SchedModel->getSubtargetInfo());
151  (void)STI;
152}]>;
153
154def IsPredicated : CheckFunctionPredicateWithTII<
155  "ARM_MC::isPredicated",
156  "isPredicated"
157>;
158def IsPredicatedPred : MCSchedPredicate<IsPredicated>;
159
160def IsCPSRDefined : CheckFunctionPredicateWithTII<
161  "ARM_MC::isCPSRDefined",
162  "ARMBaseInstrInfo::isCPSRDefined"
163>;
164
165def IsCPSRDefinedPred : MCSchedPredicate<IsCPSRDefined>;
166
167let FunctionMapper = "ARM_AM::getAM2ShiftOpc" in {
168  class CheckAM2NoShift<int n> : CheckImmOperand_s<n, "ARM_AM::no_shift">;
169  class CheckAM2ShiftLSL<int n> : CheckImmOperand_s<n, "ARM_AM::lsl">;
170}
171
172let FunctionMapper = "ARM_AM::getAM2Op" in {
173  class CheckAM2OpAdd<int n> : CheckImmOperand_s<n, "ARM_AM::add"> {}
174  class CheckAM2OpSub<int n> : CheckImmOperand_s<n, "ARM_AM::sub"> {}
175}
176
177let FunctionMapper = "ARM_AM::getAM2Offset" in {
178  class CheckAM2Offset<int n, int of> : CheckImmOperand<n, of> {}
179}
180
181def IsLDMBaseRegInList : CheckFunctionPredicate<
182  "ARM_MC::isLDMBaseRegInList", "ARM_MC::isLDMBaseRegInList"
183>;
184
185let FunctionMapper = "ARM_AM::getAM3Op" in {
186  class CheckAM3OpSub<int n> : CheckImmOperand_s<n, "ARM_AM::sub"> {}
187}
188
189// LDM, base reg in list
190def IsLDMBaseRegInListPred : MCSchedPredicate<IsLDMBaseRegInList>;
191
192class IsRegPCPred<int n> : MCSchedPredicate<CheckRegOperand<n, PC>>;
193
194class BranchWriteRes<int lat, int uops, list<ProcResourceKind> resl,
195                     list<int> rcl, SchedWriteRes wr> :
196  SchedWriteRes<!listconcat(wr.ProcResources, resl)> {
197  let Latency = !add(wr.Latency, lat);
198  let ReleaseAtCycles = !listconcat(wr.ReleaseAtCycles, rcl);
199  let NumMicroOps = !add(wr.NumMicroOps, uops);
200  SchedWriteRes BaseWr = wr;
201}
202
203class CheckBranchForm<int n, BranchWriteRes br> :
204  SchedWriteVariant<[
205    SchedVar<IsRegPCPred<n>, [br]>,
206    SchedVar<NoSchedPred,    [br.BaseWr]>
207  ]>;
208
209//===----------------------------------------------------------------------===//
210// Instruction Itinerary classes used for ARM
211//
212def IIC_iALUx      : InstrItinClass;
213def IIC_iALUi      : InstrItinClass;
214def IIC_iALUr      : InstrItinClass;
215def IIC_iALUsi     : InstrItinClass;
216def IIC_iALUsir    : InstrItinClass;
217def IIC_iALUsr     : InstrItinClass;
218def IIC_iBITi      : InstrItinClass;
219def IIC_iBITr      : InstrItinClass;
220def IIC_iBITsi     : InstrItinClass;
221def IIC_iBITsr     : InstrItinClass;
222def IIC_iUNAr      : InstrItinClass;
223def IIC_iUNAsi     : InstrItinClass;
224def IIC_iEXTr      : InstrItinClass;
225def IIC_iEXTAr     : InstrItinClass;
226def IIC_iEXTAsr    : InstrItinClass;
227def IIC_iCMPi      : InstrItinClass;
228def IIC_iCMPr      : InstrItinClass;
229def IIC_iCMPsi     : InstrItinClass;
230def IIC_iCMPsr     : InstrItinClass;
231def IIC_iTSTi      : InstrItinClass;
232def IIC_iTSTr      : InstrItinClass;
233def IIC_iTSTsi     : InstrItinClass;
234def IIC_iTSTsr     : InstrItinClass;
235def IIC_iMOVi      : InstrItinClass;
236def IIC_iMOVr      : InstrItinClass;
237def IIC_iMOVsi     : InstrItinClass;
238def IIC_iMOVsr     : InstrItinClass;
239def IIC_iMOVix2    : InstrItinClass;
240def IIC_iMOVix2addpc : InstrItinClass;
241def IIC_iMOVix2ld  : InstrItinClass;
242def IIC_iMVNi      : InstrItinClass;
243def IIC_iMVNr      : InstrItinClass;
244def IIC_iMVNsi     : InstrItinClass;
245def IIC_iMVNsr     : InstrItinClass;
246def IIC_iCMOVi     : InstrItinClass;
247def IIC_iCMOVr     : InstrItinClass;
248def IIC_iCMOVsi    : InstrItinClass;
249def IIC_iCMOVsr    : InstrItinClass;
250def IIC_iCMOVix2   : InstrItinClass;
251def IIC_iMUL16     : InstrItinClass;
252def IIC_iMAC16     : InstrItinClass;
253def IIC_iMUL32     : InstrItinClass;
254def IIC_iMAC32     : InstrItinClass;
255def IIC_iMUL64     : InstrItinClass;
256def IIC_iMAC64     : InstrItinClass;
257def IIC_iDIV     : InstrItinClass;
258def IIC_iLoad_i    : InstrItinClass;
259def IIC_iLoad_r    : InstrItinClass;
260def IIC_iLoad_si   : InstrItinClass;
261def IIC_iLoad_iu   : InstrItinClass;
262def IIC_iLoad_ru   : InstrItinClass;
263def IIC_iLoad_siu  : InstrItinClass;
264def IIC_iLoad_bh_i   : InstrItinClass;
265def IIC_iLoad_bh_r   : InstrItinClass;
266def IIC_iLoad_bh_si  : InstrItinClass;
267def IIC_iLoad_bh_iu  : InstrItinClass;
268def IIC_iLoad_bh_ru  : InstrItinClass;
269def IIC_iLoad_bh_siu : InstrItinClass;
270def IIC_iLoad_d_i  : InstrItinClass;
271def IIC_iLoad_d_r  : InstrItinClass;
272def IIC_iLoad_d_ru : InstrItinClass;
273def IIC_iLoad_m    : InstrItinClass;
274def IIC_iLoad_mu   : InstrItinClass;
275def IIC_iLoad_mBr  : InstrItinClass;
276def IIC_iPop       : InstrItinClass;
277def IIC_iPop_Br    : InstrItinClass;
278def IIC_iLoadiALU  : InstrItinClass;
279def IIC_iStore_i   : InstrItinClass;
280def IIC_iStore_r   : InstrItinClass;
281def IIC_iStore_si  : InstrItinClass;
282def IIC_iStore_iu  : InstrItinClass;
283def IIC_iStore_ru  : InstrItinClass;
284def IIC_iStore_siu : InstrItinClass;
285def IIC_iStore_bh_i   : InstrItinClass;
286def IIC_iStore_bh_r   : InstrItinClass;
287def IIC_iStore_bh_si  : InstrItinClass;
288def IIC_iStore_bh_iu  : InstrItinClass;
289def IIC_iStore_bh_ru  : InstrItinClass;
290def IIC_iStore_bh_siu : InstrItinClass;
291def IIC_iStore_d_i   : InstrItinClass;
292def IIC_iStore_d_r   : InstrItinClass;
293def IIC_iStore_d_ru  : InstrItinClass;
294def IIC_iStore_m   : InstrItinClass;
295def IIC_iStore_mu  : InstrItinClass;
296def IIC_Preload    : InstrItinClass;
297def IIC_Br         : InstrItinClass;
298def IIC_fpSTAT     : InstrItinClass;
299def IIC_fpUNA16    : InstrItinClass;
300def IIC_fpUNA32    : InstrItinClass;
301def IIC_fpUNA64    : InstrItinClass;
302def IIC_fpCMP16    : InstrItinClass;
303def IIC_fpCMP32    : InstrItinClass;
304def IIC_fpCMP64    : InstrItinClass;
305def IIC_fpCVTSD    : InstrItinClass;
306def IIC_fpCVTDS    : InstrItinClass;
307def IIC_fpCVTSH    : InstrItinClass;
308def IIC_fpCVTHS    : InstrItinClass;
309def IIC_fpCVTIH    : InstrItinClass;
310def IIC_fpCVTIS    : InstrItinClass;
311def IIC_fpCVTID    : InstrItinClass;
312def IIC_fpCVTHI    : InstrItinClass;
313def IIC_fpCVTSI    : InstrItinClass;
314def IIC_fpCVTDI    : InstrItinClass;
315def IIC_fpMOVIS    : InstrItinClass;
316def IIC_fpMOVID    : InstrItinClass;
317def IIC_fpMOVSI    : InstrItinClass;
318def IIC_fpMOVDI    : InstrItinClass;
319def IIC_fpALU16    : InstrItinClass;
320def IIC_fpALU32    : InstrItinClass;
321def IIC_fpALU64    : InstrItinClass;
322def IIC_fpMUL16    : InstrItinClass;
323def IIC_fpMUL32    : InstrItinClass;
324def IIC_fpMUL64    : InstrItinClass;
325def IIC_fpMAC16    : InstrItinClass;
326def IIC_fpMAC32    : InstrItinClass;
327def IIC_fpMAC64    : InstrItinClass;
328def IIC_fpFMAC16   : InstrItinClass;
329def IIC_fpFMAC32   : InstrItinClass;
330def IIC_fpFMAC64   : InstrItinClass;
331def IIC_fpDIV16    : InstrItinClass;
332def IIC_fpDIV32    : InstrItinClass;
333def IIC_fpDIV64    : InstrItinClass;
334def IIC_fpSQRT16   : InstrItinClass;
335def IIC_fpSQRT32   : InstrItinClass;
336def IIC_fpSQRT64   : InstrItinClass;
337def IIC_fpLoad16   : InstrItinClass;
338def IIC_fpLoad32   : InstrItinClass;
339def IIC_fpLoad64   : InstrItinClass;
340def IIC_fpLoad_m   : InstrItinClass;
341def IIC_fpLoad_mu  : InstrItinClass;
342def IIC_fpStore16  : InstrItinClass;
343def IIC_fpStore32  : InstrItinClass;
344def IIC_fpStore64  : InstrItinClass;
345def IIC_fpStore_m  : InstrItinClass;
346def IIC_fpStore_mu : InstrItinClass;
347def IIC_VLD1       : InstrItinClass;
348def IIC_VLD1x2     : InstrItinClass;
349def IIC_VLD1x3     : InstrItinClass;
350def IIC_VLD1x4     : InstrItinClass;
351def IIC_VLD1u      : InstrItinClass;
352def IIC_VLD1x2u    : InstrItinClass;
353def IIC_VLD1x3u    : InstrItinClass;
354def IIC_VLD1x4u    : InstrItinClass;
355def IIC_VLD1ln     : InstrItinClass;
356def IIC_VLD1lnu    : InstrItinClass;
357def IIC_VLD1dup    : InstrItinClass;
358def IIC_VLD1dupu   : InstrItinClass;
359def IIC_VLD2       : InstrItinClass;
360def IIC_VLD2x2     : InstrItinClass;
361def IIC_VLD2u      : InstrItinClass;
362def IIC_VLD2x2u    : InstrItinClass;
363def IIC_VLD2ln     : InstrItinClass;
364def IIC_VLD2lnu    : InstrItinClass;
365def IIC_VLD2dup    : InstrItinClass;
366def IIC_VLD2dupu   : InstrItinClass;
367def IIC_VLD3       : InstrItinClass;
368def IIC_VLD3ln     : InstrItinClass;
369def IIC_VLD3u      : InstrItinClass;
370def IIC_VLD3lnu    : InstrItinClass;
371def IIC_VLD3dup    : InstrItinClass;
372def IIC_VLD3dupu   : InstrItinClass;
373def IIC_VLD4       : InstrItinClass;
374def IIC_VLD4ln     : InstrItinClass;
375def IIC_VLD4u      : InstrItinClass;
376def IIC_VLD4lnu    : InstrItinClass;
377def IIC_VLD4dup    : InstrItinClass;
378def IIC_VLD4dupu   : InstrItinClass;
379def IIC_VST1       : InstrItinClass;
380def IIC_VST1x2     : InstrItinClass;
381def IIC_VST1x3     : InstrItinClass;
382def IIC_VST1x4     : InstrItinClass;
383def IIC_VST1u      : InstrItinClass;
384def IIC_VST1x2u    : InstrItinClass;
385def IIC_VST1x3u    : InstrItinClass;
386def IIC_VST1x4u    : InstrItinClass;
387def IIC_VST1ln     : InstrItinClass;
388def IIC_VST1lnu    : InstrItinClass;
389def IIC_VST2       : InstrItinClass;
390def IIC_VST2x2     : InstrItinClass;
391def IIC_VST2u      : InstrItinClass;
392def IIC_VST2x2u    : InstrItinClass;
393def IIC_VST2ln     : InstrItinClass;
394def IIC_VST2lnu    : InstrItinClass;
395def IIC_VST3       : InstrItinClass;
396def IIC_VST3u      : InstrItinClass;
397def IIC_VST3ln     : InstrItinClass;
398def IIC_VST3lnu    : InstrItinClass;
399def IIC_VST4       : InstrItinClass;
400def IIC_VST4u      : InstrItinClass;
401def IIC_VST4ln     : InstrItinClass;
402def IIC_VST4lnu    : InstrItinClass;
403def IIC_VUNAD      : InstrItinClass;
404def IIC_VUNAQ      : InstrItinClass;
405def IIC_VBIND      : InstrItinClass;
406def IIC_VBINQ      : InstrItinClass;
407def IIC_VPBIND     : InstrItinClass;
408def IIC_VFMULD     : InstrItinClass;
409def IIC_VFMULQ     : InstrItinClass;
410def IIC_VMOV       : InstrItinClass;
411def IIC_VMOVImm    : InstrItinClass;
412def IIC_VMOVD      : InstrItinClass;
413def IIC_VMOVQ      : InstrItinClass;
414def IIC_VMOVIS     : InstrItinClass;
415def IIC_VMOVID     : InstrItinClass;
416def IIC_VMOVISL    : InstrItinClass;
417def IIC_VMOVSI     : InstrItinClass;
418def IIC_VMOVDI     : InstrItinClass;
419def IIC_VMOVN      : InstrItinClass;
420def IIC_VPERMD     : InstrItinClass;
421def IIC_VPERMQ     : InstrItinClass;
422def IIC_VPERMQ3    : InstrItinClass;
423def IIC_VMACD      : InstrItinClass;
424def IIC_VMACQ      : InstrItinClass;
425def IIC_VFMACD     : InstrItinClass;
426def IIC_VFMACQ     : InstrItinClass;
427def IIC_VRECSD     : InstrItinClass;
428def IIC_VRECSQ     : InstrItinClass;
429def IIC_VCNTiD     : InstrItinClass;
430def IIC_VCNTiQ     : InstrItinClass;
431def IIC_VUNAiD     : InstrItinClass;
432def IIC_VUNAiQ     : InstrItinClass;
433def IIC_VQUNAiD    : InstrItinClass;
434def IIC_VQUNAiQ    : InstrItinClass;
435def IIC_VBINiD     : InstrItinClass;
436def IIC_VBINiQ     : InstrItinClass;
437def IIC_VSUBiD     : InstrItinClass;
438def IIC_VSUBiQ     : InstrItinClass;
439def IIC_VBINi4D    : InstrItinClass;
440def IIC_VBINi4Q    : InstrItinClass;
441def IIC_VSUBi4D    : InstrItinClass;
442def IIC_VSUBi4Q    : InstrItinClass;
443def IIC_VABAD      : InstrItinClass;
444def IIC_VABAQ      : InstrItinClass;
445def IIC_VSHLiD     : InstrItinClass;
446def IIC_VSHLiQ     : InstrItinClass;
447def IIC_VSHLi4D    : InstrItinClass;
448def IIC_VSHLi4Q    : InstrItinClass;
449def IIC_VPALiD     : InstrItinClass;
450def IIC_VPALiQ     : InstrItinClass;
451def IIC_VMULi16D   : InstrItinClass;
452def IIC_VMULi32D   : InstrItinClass;
453def IIC_VMULi16Q   : InstrItinClass;
454def IIC_VMULi32Q   : InstrItinClass;
455def IIC_VMACi16D   : InstrItinClass;
456def IIC_VMACi32D   : InstrItinClass;
457def IIC_VMACi16Q   : InstrItinClass;
458def IIC_VMACi32Q   : InstrItinClass;
459def IIC_VEXTD      : InstrItinClass;
460def IIC_VEXTQ      : InstrItinClass;
461def IIC_VTB1       : InstrItinClass;
462def IIC_VTB2       : InstrItinClass;
463def IIC_VTB3       : InstrItinClass;
464def IIC_VTB4       : InstrItinClass;
465def IIC_VTBX1      : InstrItinClass;
466def IIC_VTBX2      : InstrItinClass;
467def IIC_VTBX3      : InstrItinClass;
468def IIC_VTBX4      : InstrItinClass;
469def IIC_VDOTPROD   : InstrItinClass;
470