xref: /freebsd/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCScheduleP10.td (revision 5b56413d04e608379c9a306373554a8e4d321bc0)
1//===--- PPCScheduleP10.td - P10 Scheduling Definitions -*- tablegen -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8// Automatically generated file, do not edit!
9//
10// This file defines the resources required by P10 instructions.
11//===----------------------------------------------------------------------===//
12// Modeling pipeline forwarding logic.
13def P10BR_Read : SchedRead;
14def P10DF_Read : SchedRead;
15def P10DV_Read : SchedRead;
16def P10DX_Read : SchedRead;
17def P10F2_Read : SchedRead;
18def P10FX_Read : SchedRead;
19def P10LD_Read : SchedRead;
20def P10MU_Read : SchedRead;
21def P10PM_Read : SchedRead;
22def P10ST_Read : SchedRead;
23def P10SX_Read : SchedRead;
24def P10vMU_Read : SchedRead;
25
26def P10Model : SchedMachineModel {
27  let IssueWidth = 8;
28  let MicroOpBufferSize = 44;
29  let LoopMicroOpBufferSize = 60;
30  let CompleteModel = 1;
31
32  // Power 10 does not support instructions from SPE, Book E and HTM.
33  let UnsupportedFeatures = [HasSPE, IsE500, IsBookE, IsISAFuture, HasHTM];
34}
35
36let SchedModel = P10Model in {
37
38  // ***************** Processor Resources *****************
39
40  // Pipeline Groups
41
42  def P10_BF : ProcResource<4>; // Four Binary Floating Point pipelines.
43  def P10_BR : ProcResource<2>; // Two Branch pipelines.
44  def P10_CY : ProcResource<4>; // Four Crypto pipelines.
45  def P10_DF : ProcResource<1>; // One Decimal Floating Point pipelines.
46  def P10_DV : ProcResource<2>; // Two Fixed-point divide (DIV) pipelines.
47  def P10_DX : ProcResource<2>; // Two 128-bit fixed-point and BCD pipelines.
48  def P10_FX : ProcResource<4>; // Four ALU pipelines.
49  def P10_LD : ProcResource<2>; // Two Load pipelines.
50  def P10_MM : ProcResource<2>; // Two 512-bit SIMD matrix multiply engine pipelines.
51  def P10_PM : ProcResource<4>; // Four 128-bit permute (PM) pipelines.
52  def P10_ST : ProcResource<2>; // Two ST-D pipelines.
53  def P10_SX : ProcResource<2>; // Two Simple Fixed-point (SFX) pipelines.
54
55  // Dispatch Groups
56
57  // Dispatch to any slots
58  def P10_ANY_SLOT : ProcResource<8>;
59
60  let Super = P10_ANY_SLOT in {
61
62    // Dispatch to even slots
63    def P10_EVEN_SLOT : ProcResource<4>;
64
65    // Dispatch to odd slots
66    def P10_ODD_SLOT : ProcResource<4>;
67  }
68
69  // Dispatch Rules
70  let NumMicroOps = 0, Latency = 1 in {
71    // Dispatch Rule '-'
72    def P10W_DISP_ANY : SchedWriteRes<[P10_ANY_SLOT]>;
73
74    // Dispatch Rule '-', even slot
75    def P10W_DISP_EVEN : SchedWriteRes<[P10_EVEN_SLOT]>;
76
77    // Dispatch Rule 'P'
78    def P10W_DISP_PAIR : SchedWriteRes<[P10_EVEN_SLOT, P10_ODD_SLOT]>;
79  }
80
81  // ***************** SchedWriteRes Definitions *****************
82
83  // A BF pipeline may take from 7 to 36 cycles to complete.
84  // Some BF operations may keep the pipeline busy for up to 10 cycles.
85  def P10W_BF_7C : SchedWriteRes<[P10_BF]> {
86    let Latency = 7;
87  }
88
89  def P10W_BF_22C : SchedWriteRes<[P10_BF]> {
90    let ReleaseAtCycles = [ 5 ];
91    let Latency = 22;
92  }
93
94  def P10W_BF_24C : SchedWriteRes<[P10_BF]> {
95    let ReleaseAtCycles = [ 8 ];
96    let Latency = 24;
97  }
98
99  def P10W_BF_26C : SchedWriteRes<[P10_BF]> {
100    let ReleaseAtCycles = [ 5 ];
101    let Latency = 26;
102  }
103
104  def P10W_BF_27C : SchedWriteRes<[P10_BF]> {
105    let ReleaseAtCycles = [ 7 ];
106    let Latency = 27;
107  }
108
109  def P10W_BF_36C : SchedWriteRes<[P10_BF]> {
110    let ReleaseAtCycles = [ 10 ];
111    let Latency = 36;
112  }
113
114  // A BR pipeline may take 2 cycles to complete.
115  def P10W_BR_2C : SchedWriteRes<[P10_BR]> {
116    let Latency = 2;
117  }
118
119  // A CY pipeline may take 7 cycles to complete.
120  def P10W_CY_7C : SchedWriteRes<[P10_CY]> {
121    let Latency = 7;
122  }
123
124  // A DF pipeline may take from 13 to 174 cycles to complete.
125  // Some DF operations may keep the pipeline busy for up to 67 cycles.
126  def P10W_DF_13C : SchedWriteRes<[P10_DF]> {
127    let Latency = 13;
128  }
129
130  def P10W_DF_24C : SchedWriteRes<[P10_DF]> {
131    let ReleaseAtCycles = [ 16 ];
132    let Latency = 24;
133  }
134
135  def P10W_DF_25C : SchedWriteRes<[P10_DF]> {
136    let ReleaseAtCycles = [ 17 ];
137    let Latency = 25;
138  }
139
140  def P10W_DF_26C : SchedWriteRes<[P10_DF]> {
141    let ReleaseAtCycles = [ 18 ];
142    let Latency = 26;
143  }
144
145  def P10W_DF_32C : SchedWriteRes<[P10_DF]> {
146    let ReleaseAtCycles = [ 22 ];
147    let Latency = 32;
148  }
149
150  def P10W_DF_33C : SchedWriteRes<[P10_DF]> {
151    let ReleaseAtCycles = [ 25 ];
152    let Latency = 33;
153  }
154
155  def P10W_DF_34C : SchedWriteRes<[P10_DF]> {
156    let ReleaseAtCycles = [ 25 ];
157    let Latency = 34;
158  }
159
160  def P10W_DF_38C : SchedWriteRes<[P10_DF]> {
161    let ReleaseAtCycles = [ 30 ];
162    let Latency = 38;
163  }
164
165  def P10W_DF_40C : SchedWriteRes<[P10_DF]> {
166    let ReleaseAtCycles = [ 17 ];
167    let Latency = 40;
168  }
169
170  def P10W_DF_43C : SchedWriteRes<[P10_DF]> {
171    let ReleaseAtCycles = [ 34 ];
172    let Latency = 43;
173  }
174
175  def P10W_DF_59C : SchedWriteRes<[P10_DF]> {
176    let ReleaseAtCycles = [ 49 ];
177    let Latency = 59;
178  }
179
180  def P10W_DF_61C : SchedWriteRes<[P10_DF]> {
181    let ReleaseAtCycles = [ 12 ];
182    let Latency = 61;
183  }
184
185  def P10W_DF_68C : SchedWriteRes<[P10_DF]> {
186    let ReleaseAtCycles = [ 15 ];
187    let Latency = 68;
188  }
189
190  def P10W_DF_77C : SchedWriteRes<[P10_DF]> {
191    let ReleaseAtCycles = [ 67 ];
192    let Latency = 77;
193  }
194
195  def P10W_DF_87C : SchedWriteRes<[P10_DF]> {
196    let ReleaseAtCycles = [ 12 ];
197    let Latency = 87;
198  }
199
200  def P10W_DF_100C : SchedWriteRes<[P10_DF]> {
201    let ReleaseAtCycles = [ 32 ];
202    let Latency = 100;
203  }
204
205  def P10W_DF_174C : SchedWriteRes<[P10_DF]> {
206    let ReleaseAtCycles = [ 33 ];
207    let Latency = 174;
208  }
209
210  // A DV pipeline may take from 20 to 83 cycles to complete.
211  // Some DV operations may keep the pipeline busy for up to 33 cycles.
212  def P10W_DV_20C : SchedWriteRes<[P10_DV]> {
213    let ReleaseAtCycles = [ 10 ];
214    let Latency = 20;
215  }
216
217  def P10W_DV_25C : SchedWriteRes<[P10_DV]> {
218    let ReleaseAtCycles = [ 10 ];
219    let Latency = 25;
220  }
221
222  def P10W_DV_27C : SchedWriteRes<[P10_DV]> {
223    let ReleaseAtCycles = [ 10 ];
224    let Latency = 27;
225  }
226
227  def P10W_DV_41C : SchedWriteRes<[P10_DV]> {
228    let ReleaseAtCycles = [ 10 ];
229    let Latency = 41;
230  }
231
232  def P10W_DV_43C : SchedWriteRes<[P10_DV]> {
233    let ReleaseAtCycles = [ 21 ];
234    let Latency = 43;
235  }
236
237  def P10W_DV_47C : SchedWriteRes<[P10_DV]> {
238    let ReleaseAtCycles = [ 21 ];
239    let Latency = 47;
240  }
241
242  def P10W_DV_54C : SchedWriteRes<[P10_DV]> {
243    let ReleaseAtCycles = [ 33 ];
244    let Latency = 54;
245  }
246
247  def P10W_DV_60C : SchedWriteRes<[P10_DV]> {
248    let ReleaseAtCycles = [ 33 ];
249    let Latency = 60;
250  }
251
252  def P10W_DV_75C : SchedWriteRes<[P10_DV]> {
253    let ReleaseAtCycles = [ 21 ];
254    let Latency = 75;
255  }
256
257  def P10W_DV_83C : SchedWriteRes<[P10_DV]> {
258    let ReleaseAtCycles = [ 33 ];
259    let Latency = 83;
260  }
261
262  // A DX pipeline may take 5 cycles to complete.
263  def P10W_DX_5C : SchedWriteRes<[P10_DX]> {
264    let Latency = 5;
265  }
266
267  // A F2 pipeline may take 4 cycles to complete.
268  def P10W_F2_4C : SchedWriteRes<[P10_FX]> {
269    let Latency = 4;
270  }
271
272  // A FX pipeline may take from 2 to 3 cycles to complete.
273  def P10W_FX_2C : SchedWriteRes<[P10_FX]> {
274    let Latency = 2;
275  }
276
277  def P10W_FX_3C : SchedWriteRes<[P10_FX]> {
278    let Latency = 3;
279  }
280
281  // A LD pipeline may take 6 cycles to complete.
282  def P10W_LD_6C : SchedWriteRes<[P10_LD]> {
283    let Latency = 6;
284  }
285
286  // A MF pipeline may take 13 cycles to complete.
287  def P10W_MF_13C : SchedWriteRes<[P10_SX]> {
288    let Latency = 13;
289  }
290
291  // A MFL pipeline may take 13 cycles to complete.
292  def P10W_MFL_13C : SchedWriteRes<[P10_SX]> {
293    let Latency = 13;
294  }
295
296  // A MM pipeline may take 10 cycles to complete.
297  def P10W_MM_10C : SchedWriteRes<[P10_MM]> {
298    let Latency = 10;
299  }
300
301  // A MU pipeline may take 5 cycles to complete.
302  def P10W_MU_5C : SchedWriteRes<[P10_BF]> {
303    let Latency = 5;
304  }
305
306  // A PM pipeline may take 4 cycles to complete.
307  def P10W_PM_4C : SchedWriteRes<[P10_PM]> {
308    let Latency = 4;
309  }
310
311  // A ST pipeline may take 3 cycles to complete.
312  def P10W_ST_3C : SchedWriteRes<[P10_ST]> {
313    let Latency = 3;
314  }
315
316  // A SX pipeline may take from 0 to 3 cycles to complete.
317  def P10W_SX : SchedWriteRes<[P10_SX]> {
318    let Latency = 0;
319  }
320
321  def P10W_SX_3C : SchedWriteRes<[P10_SX]> {
322    let Latency = 3;
323  }
324
325  // A vMU pipeline may take 7 cycles to complete.
326  def P10W_vMU_7C : SchedWriteRes<[P10_BF]> {
327    let Latency = 7;
328  }
329
330  // ***************** Read Advance Definitions *****************
331
332  // Modeling pipeline forwarding logic.
333  def P10BF_Read_1C : SchedReadAdvance<1, [P10W_DX_5C, P10W_MU_5C, P10W_vMU_7C, P10W_CY_7C, P10W_DF_13C, P10W_MM_10C]>;
334  def P10BF_Read_2C : SchedReadAdvance<2, [P10W_BF_7C]>;
335  def P10BR_Read_1C : SchedReadAdvance<1, [P10W_FX_3C, P10W_F2_4C]>;
336  def P10CY_Read_1C : SchedReadAdvance<1, [P10W_DX_5C, P10W_MU_5C, P10W_vMU_7C, P10W_BF_7C, P10W_DF_13C, P10W_MM_10C]>;
337  def P10CY_Read_3C : SchedReadAdvance<3, [P10W_CY_7C]>;
338  def P10DF_Read_1C : SchedReadAdvance<1, [P10W_DX_5C, P10W_MU_5C, P10W_vMU_7C, P10W_BF_7C, P10W_CY_7C, P10W_DF_13C, P10W_MM_10C]>;
339  def P10DV_Read_1C : SchedReadAdvance<1, [P10W_DX_5C, P10W_MU_5C, P10W_vMU_7C, P10W_BF_7C, P10W_CY_7C, P10W_DF_13C, P10W_MM_10C]>;
340  def P10DX_Read_1C : SchedReadAdvance<1, [P10W_DX_5C, P10W_MU_5C, P10W_vMU_7C, P10W_BF_7C, P10W_CY_7C, P10W_DF_13C, P10W_MM_10C]>;
341  def P10F2_Read_1C : SchedReadAdvance<1, [P10W_ST_3C, P10W_SX_3C, P10W_FX_3C, P10W_F2_4C, P10W_PM_4C]>;
342  def P10FX_Read_1C : SchedReadAdvance<1, [P10W_ST_3C, P10W_SX_3C, P10W_FX_3C, P10W_F2_4C, P10W_PM_4C]>;
343  def P10LD_Read_1C : SchedReadAdvance<1, [P10W_ST_3C, P10W_SX_3C, P10W_FX_3C, P10W_F2_4C]>;
344  def P10MM_Read_1C : SchedReadAdvance<1, [P10W_DX_5C, P10W_MU_5C, P10W_vMU_7C, P10W_BF_7C, P10W_CY_7C, P10W_DF_13C]>;
345  def P10MM_Read_6C : SchedReadAdvance<6, [P10W_MM_10C]>;
346  def P10MU_Read_1C : SchedReadAdvance<1, [P10W_DX_5C, P10W_MU_5C, P10W_DF_13C]>;
347  def P10PM_Read_1C : SchedReadAdvance<1, [P10W_ST_3C, P10W_SX_3C, P10W_FX_3C, P10W_F2_4C, P10W_PM_4C]>;
348  def P10ST_Read_1C : SchedReadAdvance<1, [P10W_ST_3C, P10W_SX_3C, P10W_FX_3C, P10W_F2_4C, P10W_PM_4C]>;
349  def P10SX_Read_1C : SchedReadAdvance<1, [P10W_ST_3C, P10W_SX_3C, P10W_FX_3C, P10W_F2_4C, P10W_PM_4C, P10W_MM_10C]>;
350  def P10vMU_Read_1C : SchedReadAdvance<1, [P10W_DX_5C, P10W_MU_5C, P10W_vMU_7C, P10W_BF_7C, P10W_CY_7C, P10W_DF_13C, P10W_MM_10C]>;
351
352  // Save 1 cycles if pipeline BF reads the data from pipelines DX, MU, vMU, CY, DF, MM.
353  // Save 2 cycles if pipeline BF reads the data from pipelines BF.
354  def P10BF_Read : SchedReadVariant<[
355        SchedVar<P10W_BF_7C_Pred, [P10BF_Read_2C]>,
356        SchedVar<NoSchedPred,     [P10BF_Read_1C]>
357  ]>;
358
359  // Save 1 cycles if pipeline CY reads the data from pipelines DX, MU, vMU, BF, DF, MM.
360  // Save 3 cycles if pipeline CY reads the data from pipelines CY.
361  def P10CY_Read : SchedReadVariant<[
362        SchedVar<P10W_CY_7C_Pred, [P10CY_Read_3C]>,
363        SchedVar<NoSchedPred,     [P10CY_Read_1C]>
364  ]>;
365
366  // Save 1 cycles if pipeline MM reads the data from pipelines DX, MU, vMU, BF, CY, DF.
367  // Save 6 cycles if pipeline MM reads the data from pipelines MM.
368  def P10MM_Read : SchedReadVariant<[
369        SchedVar<P10W_MM_10C_Pred, [P10MM_Read_6C]>,
370        SchedVar<NoSchedPred,     [P10MM_Read_1C]>
371  ]>;
372
373  // Save 1 cycles if pipeline BR reads the data from pipelines FX, F2.
374  def : SchedAlias<P10BR_Read, P10BR_Read_1C>;
375
376  // Save 1 cycles if pipeline DF reads the data from pipelines DX, MU, vMU, BF, CY, DF, MM.
377  def : SchedAlias<P10DF_Read, P10DF_Read_1C>;
378
379  // Save 1 cycles if pipeline DV reads the data from pipelines DX, MU, vMU, BF, CY, DF, MM.
380  def : SchedAlias<P10DV_Read, P10DV_Read_1C>;
381
382  // Save 1 cycles if pipeline DX reads the data from pipelines DX, MU, vMU, BF, CY, DF, MM.
383  def : SchedAlias<P10DX_Read, P10DX_Read_1C>;
384
385  // Save 1 cycles if pipeline F2 reads the data from pipelines ST, SX, FX, F2, PM.
386  def : SchedAlias<P10F2_Read, P10F2_Read_1C>;
387
388  // Save 1 cycles if pipeline FX reads the data from pipelines ST, SX, FX, F2, PM.
389  def : SchedAlias<P10FX_Read, P10FX_Read_1C>;
390
391  // Save 1 cycles if pipeline LD reads the data from pipelines ST, SX, FX, F2.
392  def : SchedAlias<P10LD_Read, P10LD_Read_1C>;
393
394  // Save 1 cycles if pipeline MU reads the data from pipelines DX, MU, DF.
395  def : SchedAlias<P10MU_Read, P10MU_Read_1C>;
396
397  // Save 1 cycles if pipeline PM reads the data from pipelines ST, SX, FX, F2, PM.
398  def : SchedAlias<P10PM_Read, P10PM_Read_1C>;
399
400  // Save 1 cycles if pipeline ST reads the data from pipelines ST, SX, FX, F2, PM.
401  def : SchedAlias<P10ST_Read, P10ST_Read_1C>;
402
403  // Save 1 cycles if pipeline SX reads the data from pipelines ST, SX, FX, F2, PM, MM.
404  def : SchedAlias<P10SX_Read, P10SX_Read_1C>;
405
406  // Save 1 cycles if pipeline vMU reads the data from pipelines DX, MU, vMU, BF, CY, DF, MM.
407  def : SchedAlias<P10vMU_Read, P10vMU_Read_1C>;
408
409  include "P10InstrResources.td"
410}
411