xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedThunderX.td (revision 92b14858b44dc4b3b57154a10e9de1b39d791e41)
1//==- AArch64SchedThunderX.td - Cavium ThunderX T8X Scheduling Definitions -*- tablegen -*-=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the itinerary class data for the ARM ThunderX T8X
10// (T88, T81, T83) processors.
11// Loosely based on Cortex-A53 which is somewhat similar.
12//
13//===----------------------------------------------------------------------===//
14
15// ===---------------------------------------------------------------------===//
16// The following definitions describe the simpler per-operand machine model.
17// This works with MachineScheduler. See llvm/MC/MCSchedule.h for details.
18
19// Cavium ThunderX T8X scheduling machine model.
20def ThunderXT8XModel : SchedMachineModel {
21  let IssueWidth = 2;         // 2 micro-ops dispatched per cycle.
22  let MicroOpBufferSize = 0;  // ThunderX T88/T81/T83 are in-order.
23  let LoadLatency = 3;        // Optimistic load latency.
24  let MispredictPenalty = 8;  // Branch mispredict penalty.
25  let PostRAScheduler = 1;    // Use PostRA scheduler.
26  let CompleteModel = 1;
27
28  list<Predicate> UnsupportedFeatures = SVEUnsupported.F;
29
30  // FIXME: Remove when all errors have been fixed.
31  let FullInstRWOverlapCheck = 0;
32}
33
34// Modeling each pipeline with BufferSize == 0 since T8X is in-order.
35def THXT8XUnitALU    : ProcResource<2> { let BufferSize = 0; } // Int ALU
36def THXT8XUnitMAC    : ProcResource<1> { let BufferSize = 0; } // Int MAC
37def THXT8XUnitDiv    : ProcResource<1> { let BufferSize = 0; } // Int Division
38def THXT8XUnitLdSt   : ProcResource<1> { let BufferSize = 0; } // Load/Store
39def THXT8XUnitBr     : ProcResource<1> { let BufferSize = 0; } // Branch
40def THXT8XUnitFPALU  : ProcResource<1> { let BufferSize = 0; } // FP ALU
41def THXT8XUnitFPMDS  : ProcResource<1> { let BufferSize = 0; } // FP Mul/Div/Sqrt
42
43//===----------------------------------------------------------------------===//
44// Subtarget-specific SchedWrite types mapping the ProcResources and
45// latencies.
46
47let SchedModel = ThunderXT8XModel in {
48
49// ALU
50def : WriteRes<WriteImm, [THXT8XUnitALU]> { let Latency = 1; }
51def : WriteRes<WriteI, [THXT8XUnitALU]> { let Latency = 1; }
52def : WriteRes<WriteISReg, [THXT8XUnitALU]> { let Latency = 2; }
53def : WriteRes<WriteIEReg, [THXT8XUnitALU]> { let Latency = 2; }
54def : WriteRes<WriteIS, [THXT8XUnitALU]> { let Latency = 2; }
55def : WriteRes<WriteExtr, [THXT8XUnitALU]> { let Latency = 2; }
56
57// MAC
58def : WriteRes<WriteIM32, [THXT8XUnitMAC]> {
59  let Latency = 4;
60  let ResourceCycles = [1];
61}
62
63def : WriteRes<WriteIM64, [THXT8XUnitMAC]> {
64  let Latency = 4;
65  let ResourceCycles = [1];
66}
67
68// Div
69def : WriteRes<WriteID32, [THXT8XUnitDiv]> {
70  let Latency = 12;
71  let ResourceCycles = [6];
72}
73
74def : WriteRes<WriteID64, [THXT8XUnitDiv]> {
75  let Latency = 14;
76  let ResourceCycles = [8];
77}
78
79// Load
80def : WriteRes<WriteLD, [THXT8XUnitLdSt]> { let Latency = 3; }
81def : WriteRes<WriteLDIdx, [THXT8XUnitLdSt]> { let Latency = 3; }
82def : WriteRes<WriteLDHi, [THXT8XUnitLdSt]> { let Latency = 3; }
83
84// Vector Load
85def : WriteRes<WriteVLD, [THXT8XUnitLdSt]> {
86  let Latency = 8;
87  let ResourceCycles = [3];
88}
89
90def THXT8XWriteVLD1 : SchedWriteRes<[THXT8XUnitLdSt]> {
91  let Latency = 6;
92  let ResourceCycles = [1];
93}
94
95def THXT8XWriteVLD2 : SchedWriteRes<[THXT8XUnitLdSt]> {
96  let Latency = 11;
97  let ResourceCycles = [7];
98}
99
100def THXT8XWriteVLD3 : SchedWriteRes<[THXT8XUnitLdSt]> {
101  let Latency = 12;
102  let ResourceCycles = [8];
103}
104
105def THXT8XWriteVLD4 : SchedWriteRes<[THXT8XUnitLdSt]> {
106  let Latency = 13;
107  let ResourceCycles = [9];
108}
109
110def THXT8XWriteVLD5 : SchedWriteRes<[THXT8XUnitLdSt]> {
111  let Latency = 13;
112  let ResourceCycles = [9];
113}
114
115// Pre/Post Indexing
116def : WriteRes<WriteAdr, []> { let Latency = 0; }
117
118// Store
119def : WriteRes<WriteST, [THXT8XUnitLdSt]> { let Latency = 1; }
120def : WriteRes<WriteSTP, [THXT8XUnitLdSt]> { let Latency = 1; }
121def : WriteRes<WriteSTIdx, [THXT8XUnitLdSt]> { let Latency = 1; }
122def : WriteRes<WriteSTX, [THXT8XUnitLdSt]> { let Latency = 1; }
123
124// Vector Store
125def : WriteRes<WriteVST, [THXT8XUnitLdSt]>;
126def THXT8XWriteVST1 : SchedWriteRes<[THXT8XUnitLdSt]>;
127
128def THXT8XWriteVST2 : SchedWriteRes<[THXT8XUnitLdSt]> {
129  let Latency = 10;
130  let ResourceCycles = [9];
131}
132
133def THXT8XWriteVST3 : SchedWriteRes<[THXT8XUnitLdSt]> {
134  let Latency = 11;
135  let ResourceCycles = [10];
136}
137
138def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
139
140// Branch
141def : WriteRes<WriteBr, [THXT8XUnitBr]>;
142def THXT8XWriteBR : SchedWriteRes<[THXT8XUnitBr]>;
143def : WriteRes<WriteBrReg, [THXT8XUnitBr]>;
144def THXT8XWriteBRR : SchedWriteRes<[THXT8XUnitBr]>;
145def THXT8XWriteRET : SchedWriteRes<[THXT8XUnitALU]>;
146def : WriteRes<WriteSys, [THXT8XUnitBr]>;
147def : WriteRes<WriteBarrier, [THXT8XUnitBr]>;
148def : WriteRes<WriteHint, [THXT8XUnitBr]>;
149
150// FP ALU
151def : WriteRes<WriteF, [THXT8XUnitFPALU]> { let Latency = 6; }
152def : WriteRes<WriteFCmp, [THXT8XUnitFPALU]> { let Latency = 6; }
153def : WriteRes<WriteFCvt, [THXT8XUnitFPALU]> { let Latency = 6; }
154def : WriteRes<WriteFCopy, [THXT8XUnitFPALU]> { let Latency = 6; }
155def : WriteRes<WriteFImm, [THXT8XUnitFPALU]> { let Latency = 6; }
156def : WriteRes<WriteV, [THXT8XUnitFPALU]> { let Latency = 6; }
157
158// FP Mul, Div, Sqrt
159def : WriteRes<WriteFMul, [THXT8XUnitFPMDS]> { let Latency = 6; }
160def : WriteRes<WriteFDiv, [THXT8XUnitFPMDS]> {
161  let Latency = 22;
162  let ResourceCycles = [19];
163}
164
165def THXT8XWriteFMAC : SchedWriteRes<[THXT8XUnitFPMDS]> { let Latency = 10; }
166
167def THXT8XWriteFDivSP : SchedWriteRes<[THXT8XUnitFPMDS]> {
168  let Latency = 12;
169  let ResourceCycles = [9];
170}
171
172def THXT8XWriteFDivDP : SchedWriteRes<[THXT8XUnitFPMDS]> {
173  let Latency = 22;
174  let ResourceCycles = [19];
175}
176
177def THXT8XWriteFSqrtSP : SchedWriteRes<[THXT8XUnitFPMDS]> {
178  let Latency = 17;
179  let ResourceCycles = [14];
180}
181
182def THXT8XWriteFSqrtDP : SchedWriteRes<[THXT8XUnitFPMDS]> {
183  let Latency = 31;
184  let ResourceCycles = [28];
185}
186
187//===----------------------------------------------------------------------===//
188// Subtarget-specific SchedRead types.
189
190// No forwarding for these reads.
191def : ReadAdvance<ReadExtrHi, 1>;
192def : ReadAdvance<ReadAdrBase, 2>;
193def : ReadAdvance<ReadVLD, 2>;
194
195// FIXME: This needs more targeted benchmarking.
196// ALU - Most operands in the ALU pipes are not needed for two cycles. Shiftable
197//       operands are needed one cycle later if and only if they are to be
198//       shifted. Otherwise, they too are needed two cycles later. This same
199//       ReadAdvance applies to Extended registers as well, even though there is
200//       a separate SchedPredicate for them.
201def : ReadAdvance<ReadI, 2, [WriteImm, WriteI,
202                             WriteISReg, WriteIEReg, WriteIS,
203                             WriteID32, WriteID64,
204                             WriteIM32, WriteIM64]>;
205def THXT8XReadShifted : SchedReadAdvance<1, [WriteImm, WriteI,
206                                          WriteISReg, WriteIEReg, WriteIS,
207                                          WriteID32, WriteID64,
208                                          WriteIM32, WriteIM64]>;
209def THXT8XReadNotShifted : SchedReadAdvance<2, [WriteImm, WriteI,
210                                             WriteISReg, WriteIEReg, WriteIS,
211                                             WriteID32, WriteID64,
212                                             WriteIM32, WriteIM64]>;
213def THXT8XReadISReg : SchedReadVariant<[
214	SchedVar<RegShiftedPred, [THXT8XReadShifted]>,
215	SchedVar<NoSchedPred, [THXT8XReadNotShifted]>]>;
216def : SchedAlias<ReadISReg, THXT8XReadISReg>;
217
218def THXT8XReadIEReg : SchedReadVariant<[
219	SchedVar<RegExtendedPred, [THXT8XReadShifted]>,
220	SchedVar<NoSchedPred, [THXT8XReadNotShifted]>]>;
221def : SchedAlias<ReadIEReg, THXT8XReadIEReg>;
222
223// MAC - Operands are generally needed one cycle later in the MAC pipe.
224//       Accumulator operands are needed two cycles later.
225def : ReadAdvance<ReadIM, 1, [WriteImm,WriteI,
226                              WriteISReg, WriteIEReg, WriteIS,
227                              WriteID32, WriteID64,
228                              WriteIM32, WriteIM64]>;
229def : ReadAdvance<ReadIMA, 2, [WriteImm, WriteI,
230                               WriteISReg, WriteIEReg, WriteIS,
231                               WriteID32, WriteID64,
232                               WriteIM32, WriteIM64]>;
233
234// Div
235def : ReadAdvance<ReadID, 1, [WriteImm, WriteI,
236                              WriteISReg, WriteIEReg, WriteIS,
237                              WriteID32, WriteID64,
238                              WriteIM32, WriteIM64]>;
239
240//===----------------------------------------------------------------------===//
241// Subtarget-specific InstRW.
242
243//---
244// Branch
245//---
246def : InstRW<[THXT8XWriteBR], (instregex "^B$")>;
247def : InstRW<[THXT8XWriteBR], (instregex "^BL$")>;
248def : InstRW<[THXT8XWriteBR], (instregex "^B..$")>;
249def : InstRW<[THXT8XWriteBR], (instregex "^CBNZ")>;
250def : InstRW<[THXT8XWriteBR], (instregex "^CBZ")>;
251def : InstRW<[THXT8XWriteBR], (instregex "^TBNZ")>;
252def : InstRW<[THXT8XWriteBR], (instregex "^TBZ")>;
253def : InstRW<[THXT8XWriteBRR], (instregex "^BR$")>;
254def : InstRW<[THXT8XWriteBRR], (instregex "^BLR$")>;
255
256//---
257// Ret
258//---
259def : InstRW<[THXT8XWriteRET], (instregex "^RET$")>;
260
261//---
262// Miscellaneous
263//---
264def : InstRW<[WriteI], (instrs COPY)>;
265
266//---
267// Vector Loads
268//---
269def : InstRW<[THXT8XWriteVLD1], (instregex "LD1i(8|16|32|64)$")>;
270def : InstRW<[THXT8XWriteVLD1], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
271def : InstRW<[THXT8XWriteVLD1], (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
272def : InstRW<[THXT8XWriteVLD2], (instregex "LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
273def : InstRW<[THXT8XWriteVLD3], (instregex "LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
274def : InstRW<[THXT8XWriteVLD4], (instregex "LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
275def : InstRW<[THXT8XWriteVLD1, WriteAdr], (instregex "LD1i(8|16|32|64)_POST$")>;
276def : InstRW<[THXT8XWriteVLD1, WriteAdr], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
277def : InstRW<[THXT8XWriteVLD1, WriteAdr], (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
278def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
279def : InstRW<[THXT8XWriteVLD3, WriteAdr], (instregex "LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
280def : InstRW<[THXT8XWriteVLD4, WriteAdr], (instregex "LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
281
282def : InstRW<[THXT8XWriteVLD1], (instregex "LD2i(8|16|32|64)$")>;
283def : InstRW<[THXT8XWriteVLD1], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
284def : InstRW<[THXT8XWriteVLD2], (instregex "LD2Twov(8b|4h|2s)$")>;
285def : InstRW<[THXT8XWriteVLD4], (instregex "LD2Twov(16b|8h|4s|2d)$")>;
286def : InstRW<[THXT8XWriteVLD1, WriteAdr], (instregex "LD2i(8|16|32|64)(_POST)?$")>;
287def : InstRW<[THXT8XWriteVLD1, WriteAdr], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>;
288def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD2Twov(8b|4h|2s)(_POST)?$")>;
289def : InstRW<[THXT8XWriteVLD4, WriteAdr], (instregex "LD2Twov(16b|8h|4s|2d)(_POST)?$")>;
290
291def : InstRW<[THXT8XWriteVLD2], (instregex "LD3i(8|16|32|64)$")>;
292def : InstRW<[THXT8XWriteVLD2], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
293def : InstRW<[THXT8XWriteVLD4], (instregex "LD3Threev(8b|4h|2s|1d|16b|8h|4s)$")>;
294def : InstRW<[THXT8XWriteVLD3], (instregex "LD3Threev(2d)$")>;
295def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD3i(8|16|32|64)_POST$")>;
296def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
297def : InstRW<[THXT8XWriteVLD4, WriteAdr], (instregex "LD3Threev(8b|4h|2s|1d|16b|8h|4s)_POST$")>;
298def : InstRW<[THXT8XWriteVLD3, WriteAdr], (instregex "LD3Threev(2d)_POST$")>;
299
300def : InstRW<[THXT8XWriteVLD2], (instregex "LD4i(8|16|32|64)$")>;
301def : InstRW<[THXT8XWriteVLD2], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
302def : InstRW<[THXT8XWriteVLD5], (instregex "LD4Fourv(8b|4h|2s|1d|16b|8h|4s)$")>;
303def : InstRW<[THXT8XWriteVLD4], (instregex "LD4Fourv(2d)$")>;
304def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD4i(8|16|32|64)_POST$")>;
305def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
306def : InstRW<[THXT8XWriteVLD5, WriteAdr], (instregex "LD4Fourv(8b|4h|2s|1d|16b|8h|4s)_POST$")>;
307def : InstRW<[THXT8XWriteVLD4, WriteAdr], (instregex "LD4Fourv(2d)_POST$")>;
308
309//---
310// Vector Stores
311//---
312def : InstRW<[THXT8XWriteVST1], (instregex "ST1i(8|16|32|64)$")>;
313def : InstRW<[THXT8XWriteVST1], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
314def : InstRW<[THXT8XWriteVST1], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
315def : InstRW<[THXT8XWriteVST2], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
316def : InstRW<[THXT8XWriteVST2], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
317def : InstRW<[THXT8XWriteVST1, WriteAdr], (instregex "ST1i(8|16|32|64)_POST$")>;
318def : InstRW<[THXT8XWriteVST1, WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
319def : InstRW<[THXT8XWriteVST1, WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
320def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
321def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
322
323def : InstRW<[THXT8XWriteVST1], (instregex "ST2i(8|16|32|64)$")>;
324def : InstRW<[THXT8XWriteVST1], (instregex "ST2Twov(8b|4h|2s)$")>;
325def : InstRW<[THXT8XWriteVST2], (instregex "ST2Twov(16b|8h|4s|2d)$")>;
326def : InstRW<[THXT8XWriteVST1, WriteAdr], (instregex "ST2i(8|16|32|64)_POST$")>;
327def : InstRW<[THXT8XWriteVST1, WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>;
328def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>;
329
330def : InstRW<[THXT8XWriteVST2], (instregex "ST3i(8|16|32|64)$")>;
331def : InstRW<[THXT8XWriteVST3], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s)$")>;
332def : InstRW<[THXT8XWriteVST2], (instregex "ST3Threev(2d)$")>;
333def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST3i(8|16|32|64)_POST$")>;
334def : InstRW<[THXT8XWriteVST3, WriteAdr], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s)_POST$")>;
335def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST3Threev(2d)_POST$")>;
336
337def : InstRW<[THXT8XWriteVST2], (instregex "ST4i(8|16|32|64)$")>;
338def : InstRW<[THXT8XWriteVST3], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s)$")>;
339def : InstRW<[THXT8XWriteVST2], (instregex "ST4Fourv(2d)$")>;
340def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST4i(8|16|32|64)_POST$")>;
341def : InstRW<[THXT8XWriteVST3, WriteAdr], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s)_POST$")>;
342def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST4Fourv(2d)_POST$")>;
343
344//---
345// Floating Point MAC, DIV, SQRT
346//---
347def : InstRW<[THXT8XWriteFMAC], (instregex "^FN?M(ADD|SUB).*")>;
348def : InstRW<[THXT8XWriteFMAC], (instregex "^FML(A|S).*")>;
349def : InstRW<[THXT8XWriteFDivSP], (instrs FDIVSrr)>;
350def : InstRW<[THXT8XWriteFDivDP], (instrs FDIVDrr)>;
351def : InstRW<[THXT8XWriteFDivSP], (instregex "^FDIVv.*32$")>;
352def : InstRW<[THXT8XWriteFDivDP], (instregex "^FDIVv.*64$")>;
353def : InstRW<[THXT8XWriteFSqrtSP], (instregex "^.*SQRT.*32$")>;
354def : InstRW<[THXT8XWriteFSqrtDP], (instregex "^.*SQRT.*64$")>;
355
356}
357