xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedThunderX.td (revision b64c5a0ace59af62eff52bfe110a521dc73c937b)
1//==- AArch64SchedThunderX.td - Cavium ThunderX T8X Scheduling Definitions -*- tablegen -*-=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the itinerary class data for the ARM ThunderX T8X
10// (T88, T81, T83) processors.
11// Loosely based on Cortex-A53 which is somewhat similar.
12//
13//===----------------------------------------------------------------------===//
14
15// ===---------------------------------------------------------------------===//
16// The following definitions describe the simpler per-operand machine model.
17// This works with MachineScheduler. See llvm/MC/MCSchedule.h for details.
18
19// Cavium ThunderX T8X scheduling machine model.
20def ThunderXT8XModel : SchedMachineModel {
21  let IssueWidth = 2;         // 2 micro-ops dispatched per cycle.
22  let MicroOpBufferSize = 0;  // ThunderX T88/T81/T83 are in-order.
23  let LoadLatency = 3;        // Optimistic load latency.
24  let MispredictPenalty = 8;  // Branch mispredict penalty.
25  let PostRAScheduler = 1;    // Use PostRA scheduler.
26  let CompleteModel = 1;
27
28  list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
29                                                    PAUnsupported.F,
30                                                    SMEUnsupported.F,
31                                                    [HasMTE, HasCSSC]);
32  // FIXME: Remove when all errors have been fixed.
33  let FullInstRWOverlapCheck = 0;
34}
35
36// Modeling each pipeline with BufferSize == 0 since T8X is in-order.
37def THXT8XUnitALU    : ProcResource<2> { let BufferSize = 0; } // Int ALU
38def THXT8XUnitMAC    : ProcResource<1> { let BufferSize = 0; } // Int MAC
39def THXT8XUnitDiv    : ProcResource<1> { let BufferSize = 0; } // Int Division
40def THXT8XUnitLdSt   : ProcResource<1> { let BufferSize = 0; } // Load/Store
41def THXT8XUnitBr     : ProcResource<1> { let BufferSize = 0; } // Branch
42def THXT8XUnitFPALU  : ProcResource<1> { let BufferSize = 0; } // FP ALU
43def THXT8XUnitFPMDS  : ProcResource<1> { let BufferSize = 0; } // FP Mul/Div/Sqrt
44
45//===----------------------------------------------------------------------===//
46// Subtarget-specific SchedWrite types mapping the ProcResources and
47// latencies.
48
49let SchedModel = ThunderXT8XModel in {
50
51// ALU
52def : WriteRes<WriteImm, [THXT8XUnitALU]> { let Latency = 1; }
53def : WriteRes<WriteI, [THXT8XUnitALU]> { let Latency = 1; }
54def : WriteRes<WriteISReg, [THXT8XUnitALU]> { let Latency = 2; }
55def : WriteRes<WriteIEReg, [THXT8XUnitALU]> { let Latency = 2; }
56def : WriteRes<WriteIS, [THXT8XUnitALU]> { let Latency = 2; }
57def : WriteRes<WriteExtr, [THXT8XUnitALU]> { let Latency = 2; }
58
59// MAC
60def : WriteRes<WriteIM32, [THXT8XUnitMAC]> {
61  let Latency = 4;
62  let ReleaseAtCycles = [1];
63}
64
65def : WriteRes<WriteIM64, [THXT8XUnitMAC]> {
66  let Latency = 4;
67  let ReleaseAtCycles = [1];
68}
69
70// Div
71def : WriteRes<WriteID32, [THXT8XUnitDiv]> {
72  let Latency = 12;
73  let ReleaseAtCycles = [6];
74}
75
76def : WriteRes<WriteID64, [THXT8XUnitDiv]> {
77  let Latency = 14;
78  let ReleaseAtCycles = [8];
79}
80
81// Load
82def : WriteRes<WriteLD, [THXT8XUnitLdSt]> { let Latency = 3; }
83def : WriteRes<WriteLDIdx, [THXT8XUnitLdSt]> { let Latency = 3; }
84def : WriteRes<WriteLDHi, [THXT8XUnitLdSt]> { let Latency = 3; }
85
86// Vector Load
87def : WriteRes<WriteVLD, [THXT8XUnitLdSt]> {
88  let Latency = 8;
89  let ReleaseAtCycles = [3];
90}
91
92def THXT8XWriteVLD1 : SchedWriteRes<[THXT8XUnitLdSt]> {
93  let Latency = 6;
94  let ReleaseAtCycles = [1];
95}
96
97def THXT8XWriteVLD2 : SchedWriteRes<[THXT8XUnitLdSt]> {
98  let Latency = 11;
99  let ReleaseAtCycles = [7];
100}
101
102def THXT8XWriteVLD3 : SchedWriteRes<[THXT8XUnitLdSt]> {
103  let Latency = 12;
104  let ReleaseAtCycles = [8];
105}
106
107def THXT8XWriteVLD4 : SchedWriteRes<[THXT8XUnitLdSt]> {
108  let Latency = 13;
109  let ReleaseAtCycles = [9];
110}
111
112def THXT8XWriteVLD5 : SchedWriteRes<[THXT8XUnitLdSt]> {
113  let Latency = 13;
114  let ReleaseAtCycles = [9];
115}
116
117// Pre/Post Indexing
118def : WriteRes<WriteAdr, []> { let Latency = 0; }
119
120// Store
121def : WriteRes<WriteST, [THXT8XUnitLdSt]> { let Latency = 1; }
122def : WriteRes<WriteSTP, [THXT8XUnitLdSt]> { let Latency = 1; }
123def : WriteRes<WriteSTIdx, [THXT8XUnitLdSt]> { let Latency = 1; }
124def : WriteRes<WriteSTX, [THXT8XUnitLdSt]> { let Latency = 1; }
125
126// Vector Store
127def : WriteRes<WriteVST, [THXT8XUnitLdSt]>;
128def THXT8XWriteVST1 : SchedWriteRes<[THXT8XUnitLdSt]>;
129
130def THXT8XWriteVST2 : SchedWriteRes<[THXT8XUnitLdSt]> {
131  let Latency = 10;
132  let ReleaseAtCycles = [9];
133}
134
135def THXT8XWriteVST3 : SchedWriteRes<[THXT8XUnitLdSt]> {
136  let Latency = 11;
137  let ReleaseAtCycles = [10];
138}
139
140def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
141
142// Branch
143def : WriteRes<WriteBr, [THXT8XUnitBr]>;
144def THXT8XWriteBR : SchedWriteRes<[THXT8XUnitBr]>;
145def : WriteRes<WriteBrReg, [THXT8XUnitBr]>;
146def THXT8XWriteBRR : SchedWriteRes<[THXT8XUnitBr]>;
147def THXT8XWriteRET : SchedWriteRes<[THXT8XUnitALU]>;
148def : WriteRes<WriteSys, [THXT8XUnitBr]>;
149def : WriteRes<WriteBarrier, [THXT8XUnitBr]>;
150def : WriteRes<WriteHint, [THXT8XUnitBr]>;
151
152// FP ALU
153def : WriteRes<WriteF, [THXT8XUnitFPALU]> { let Latency = 6; }
154def : WriteRes<WriteFCmp, [THXT8XUnitFPALU]> { let Latency = 6; }
155def : WriteRes<WriteFCvt, [THXT8XUnitFPALU]> { let Latency = 6; }
156def : WriteRes<WriteFCopy, [THXT8XUnitFPALU]> { let Latency = 6; }
157def : WriteRes<WriteFImm, [THXT8XUnitFPALU]> { let Latency = 6; }
158def : WriteRes<WriteVd, [THXT8XUnitFPALU]> { let Latency = 6; }
159def : WriteRes<WriteVq, [THXT8XUnitFPALU]> { let Latency = 6; }
160
161// FP Mul, Div, Sqrt
162def : WriteRes<WriteFMul, [THXT8XUnitFPMDS]> { let Latency = 6; }
163def : WriteRes<WriteFDiv, [THXT8XUnitFPMDS]> {
164  let Latency = 22;
165  let ReleaseAtCycles = [19];
166}
167
168def THXT8XWriteFMAC : SchedWriteRes<[THXT8XUnitFPMDS]> { let Latency = 10; }
169
170def THXT8XWriteFDivSP : SchedWriteRes<[THXT8XUnitFPMDS]> {
171  let Latency = 12;
172  let ReleaseAtCycles = [9];
173}
174
175def THXT8XWriteFDivDP : SchedWriteRes<[THXT8XUnitFPMDS]> {
176  let Latency = 22;
177  let ReleaseAtCycles = [19];
178}
179
180def THXT8XWriteFSqrtSP : SchedWriteRes<[THXT8XUnitFPMDS]> {
181  let Latency = 17;
182  let ReleaseAtCycles = [14];
183}
184
185def THXT8XWriteFSqrtDP : SchedWriteRes<[THXT8XUnitFPMDS]> {
186  let Latency = 31;
187  let ReleaseAtCycles = [28];
188}
189
190//===----------------------------------------------------------------------===//
191// Subtarget-specific SchedRead types.
192
193// No forwarding for these reads.
194def : ReadAdvance<ReadExtrHi, 1>;
195def : ReadAdvance<ReadAdrBase, 2>;
196def : ReadAdvance<ReadVLD, 2>;
197def : ReadAdvance<ReadST, 2>;
198
199// FIXME: This needs more targeted benchmarking.
200// ALU - Most operands in the ALU pipes are not needed for two cycles. Shiftable
201//       operands are needed one cycle later if and only if they are to be
202//       shifted. Otherwise, they too are needed two cycles later. This same
203//       ReadAdvance applies to Extended registers as well, even though there is
204//       a separate SchedPredicate for them.
205def : ReadAdvance<ReadI, 2, [WriteImm, WriteI,
206                             WriteISReg, WriteIEReg, WriteIS,
207                             WriteID32, WriteID64,
208                             WriteIM32, WriteIM64]>;
209def THXT8XReadShifted : SchedReadAdvance<1, [WriteImm, WriteI,
210                                          WriteISReg, WriteIEReg, WriteIS,
211                                          WriteID32, WriteID64,
212                                          WriteIM32, WriteIM64]>;
213def THXT8XReadNotShifted : SchedReadAdvance<2, [WriteImm, WriteI,
214                                             WriteISReg, WriteIEReg, WriteIS,
215                                             WriteID32, WriteID64,
216                                             WriteIM32, WriteIM64]>;
217def THXT8XReadISReg : SchedReadVariant<[
218	SchedVar<RegShiftedPred, [THXT8XReadShifted]>,
219	SchedVar<NoSchedPred, [THXT8XReadNotShifted]>]>;
220def : SchedAlias<ReadISReg, THXT8XReadISReg>;
221
222def THXT8XReadIEReg : SchedReadVariant<[
223	SchedVar<RegExtendedPred, [THXT8XReadShifted]>,
224	SchedVar<NoSchedPred, [THXT8XReadNotShifted]>]>;
225def : SchedAlias<ReadIEReg, THXT8XReadIEReg>;
226
227// MAC - Operands are generally needed one cycle later in the MAC pipe.
228//       Accumulator operands are needed two cycles later.
229def : ReadAdvance<ReadIM, 1, [WriteImm,WriteI,
230                              WriteISReg, WriteIEReg, WriteIS,
231                              WriteID32, WriteID64,
232                              WriteIM32, WriteIM64]>;
233def : ReadAdvance<ReadIMA, 2, [WriteImm, WriteI,
234                               WriteISReg, WriteIEReg, WriteIS,
235                               WriteID32, WriteID64,
236                               WriteIM32, WriteIM64]>;
237
238// Div
239def : ReadAdvance<ReadID, 1, [WriteImm, WriteI,
240                              WriteISReg, WriteIEReg, WriteIS,
241                              WriteID32, WriteID64,
242                              WriteIM32, WriteIM64]>;
243
244//===----------------------------------------------------------------------===//
245// Subtarget-specific InstRW.
246
247//---
248// Branch
249//---
250def : InstRW<[THXT8XWriteBR], (instregex "^B$")>;
251def : InstRW<[THXT8XWriteBR], (instregex "^BL$")>;
252def : InstRW<[THXT8XWriteBR], (instregex "^B..$")>;
253def : InstRW<[THXT8XWriteBR], (instregex "^CBNZ")>;
254def : InstRW<[THXT8XWriteBR], (instregex "^CBZ")>;
255def : InstRW<[THXT8XWriteBR], (instregex "^TBNZ")>;
256def : InstRW<[THXT8XWriteBR], (instregex "^TBZ")>;
257def : InstRW<[THXT8XWriteBRR], (instregex "^BR$")>;
258def : InstRW<[THXT8XWriteBRR], (instregex "^BLR$")>;
259
260//---
261// Ret
262//---
263def : InstRW<[THXT8XWriteRET], (instregex "^RET$")>;
264
265//---
266// Miscellaneous
267//---
268def : InstRW<[WriteI], (instrs COPY)>;
269
270//---
271// Vector Loads
272//---
273def : InstRW<[THXT8XWriteVLD1], (instregex "LD1i(8|16|32|64)$")>;
274def : InstRW<[THXT8XWriteVLD1], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
275def : InstRW<[THXT8XWriteVLD1], (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
276def : InstRW<[THXT8XWriteVLD2], (instregex "LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
277def : InstRW<[THXT8XWriteVLD3], (instregex "LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
278def : InstRW<[THXT8XWriteVLD4], (instregex "LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
279def : InstRW<[THXT8XWriteVLD1, WriteAdr], (instregex "LD1i(8|16|32|64)_POST$")>;
280def : InstRW<[THXT8XWriteVLD1, WriteAdr], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
281def : InstRW<[THXT8XWriteVLD1, WriteAdr], (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
282def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
283def : InstRW<[THXT8XWriteVLD3, WriteAdr], (instregex "LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
284def : InstRW<[THXT8XWriteVLD4, WriteAdr], (instregex "LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
285
286def : InstRW<[THXT8XWriteVLD1], (instregex "LD2i(8|16|32|64)$")>;
287def : InstRW<[THXT8XWriteVLD1], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
288def : InstRW<[THXT8XWriteVLD2], (instregex "LD2Twov(8b|4h|2s)$")>;
289def : InstRW<[THXT8XWriteVLD4], (instregex "LD2Twov(16b|8h|4s|2d)$")>;
290def : InstRW<[THXT8XWriteVLD1, WriteAdr], (instregex "LD2i(8|16|32|64)(_POST)?$")>;
291def : InstRW<[THXT8XWriteVLD1, WriteAdr], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>;
292def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD2Twov(8b|4h|2s)(_POST)?$")>;
293def : InstRW<[THXT8XWriteVLD4, WriteAdr], (instregex "LD2Twov(16b|8h|4s|2d)(_POST)?$")>;
294
295def : InstRW<[THXT8XWriteVLD2], (instregex "LD3i(8|16|32|64)$")>;
296def : InstRW<[THXT8XWriteVLD2], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
297def : InstRW<[THXT8XWriteVLD4], (instregex "LD3Threev(8b|4h|2s|1d|16b|8h|4s)$")>;
298def : InstRW<[THXT8XWriteVLD3], (instregex "LD3Threev(2d)$")>;
299def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD3i(8|16|32|64)_POST$")>;
300def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
301def : InstRW<[THXT8XWriteVLD4, WriteAdr], (instregex "LD3Threev(8b|4h|2s|1d|16b|8h|4s)_POST$")>;
302def : InstRW<[THXT8XWriteVLD3, WriteAdr], (instregex "LD3Threev(2d)_POST$")>;
303
304def : InstRW<[THXT8XWriteVLD2], (instregex "LD4i(8|16|32|64)$")>;
305def : InstRW<[THXT8XWriteVLD2], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
306def : InstRW<[THXT8XWriteVLD5], (instregex "LD4Fourv(8b|4h|2s|1d|16b|8h|4s)$")>;
307def : InstRW<[THXT8XWriteVLD4], (instregex "LD4Fourv(2d)$")>;
308def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD4i(8|16|32|64)_POST$")>;
309def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
310def : InstRW<[THXT8XWriteVLD5, WriteAdr], (instregex "LD4Fourv(8b|4h|2s|1d|16b|8h|4s)_POST$")>;
311def : InstRW<[THXT8XWriteVLD4, WriteAdr], (instregex "LD4Fourv(2d)_POST$")>;
312
313//---
314// Vector Stores
315//---
316def : InstRW<[THXT8XWriteVST1], (instregex "ST1i(8|16|32|64)$")>;
317def : InstRW<[THXT8XWriteVST1], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
318def : InstRW<[THXT8XWriteVST1], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
319def : InstRW<[THXT8XWriteVST2], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
320def : InstRW<[THXT8XWriteVST2], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
321def : InstRW<[THXT8XWriteVST1, WriteAdr], (instregex "ST1i(8|16|32|64)_POST$")>;
322def : InstRW<[THXT8XWriteVST1, WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
323def : InstRW<[THXT8XWriteVST1, WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
324def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
325def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
326
327def : InstRW<[THXT8XWriteVST1], (instregex "ST2i(8|16|32|64)$")>;
328def : InstRW<[THXT8XWriteVST1], (instregex "ST2Twov(8b|4h|2s)$")>;
329def : InstRW<[THXT8XWriteVST2], (instregex "ST2Twov(16b|8h|4s|2d)$")>;
330def : InstRW<[THXT8XWriteVST1, WriteAdr], (instregex "ST2i(8|16|32|64)_POST$")>;
331def : InstRW<[THXT8XWriteVST1, WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>;
332def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>;
333
334def : InstRW<[THXT8XWriteVST2], (instregex "ST3i(8|16|32|64)$")>;
335def : InstRW<[THXT8XWriteVST3], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s)$")>;
336def : InstRW<[THXT8XWriteVST2], (instregex "ST3Threev(2d)$")>;
337def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST3i(8|16|32|64)_POST$")>;
338def : InstRW<[THXT8XWriteVST3, WriteAdr], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s)_POST$")>;
339def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST3Threev(2d)_POST$")>;
340
341def : InstRW<[THXT8XWriteVST2], (instregex "ST4i(8|16|32|64)$")>;
342def : InstRW<[THXT8XWriteVST3], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s)$")>;
343def : InstRW<[THXT8XWriteVST2], (instregex "ST4Fourv(2d)$")>;
344def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST4i(8|16|32|64)_POST$")>;
345def : InstRW<[THXT8XWriteVST3, WriteAdr], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s)_POST$")>;
346def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST4Fourv(2d)_POST$")>;
347
348//---
349// Floating Point MAC, DIV, SQRT
350//---
351def : InstRW<[THXT8XWriteFMAC], (instregex "^FN?M(ADD|SUB).*")>;
352def : InstRW<[THXT8XWriteFMAC], (instregex "^FML(A|S).*")>;
353def : InstRW<[THXT8XWriteFDivSP], (instrs FDIVSrr)>;
354def : InstRW<[THXT8XWriteFDivDP], (instrs FDIVDrr)>;
355def : InstRW<[THXT8XWriteFDivSP], (instregex "^FDIVv.*32$")>;
356def : InstRW<[THXT8XWriteFDivDP], (instregex "^FDIVv.*64$")>;
357def : InstRW<[THXT8XWriteFSqrtSP], (instregex "^.*SQRT.*32$")>;
358def : InstRW<[THXT8XWriteFSqrtDP], (instregex "^.*SQRT.*64$")>;
359
360}
361