xref: /freebsd/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCScheduleP10.td (revision c9539b89010900499a200cdd6c0265ea5d950875)
1//===--- PPCScheduleP10.td - P10 Scheduling Definitions -*- tablegen -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8// Automatically generated file, do not edit!
9//
10// This file defines the resources required by P10 instructions.
11//===----------------------------------------------------------------------===//
12// Modeling pipeline forwarding logic.
13def P10BR_Read : SchedRead;
14def P10DF_Read : SchedRead;
15def P10DV_Read : SchedRead;
16def P10DX_Read : SchedRead;
17def P10F2_Read : SchedRead;
18def P10FX_Read : SchedRead;
19def P10LD_Read : SchedRead;
20def P10MU_Read : SchedRead;
21def P10PM_Read : SchedRead;
22def P10ST_Read : SchedRead;
23def P10SX_Read : SchedRead;
24def P10vMU_Read : SchedRead;
25
26def P10Model : SchedMachineModel {
27  let IssueWidth = 8;
28
29  // TODO - Need to be updated according to P10 UM.
30  let MicroOpBufferSize = 44;
31
32  // TODO - tune this on real HW once it arrives. For now, we will use the same
33  // value as we do on P9.
34  let LoopMicroOpBufferSize = 60;
35
36  let CompleteModel = 1;
37
38  // Do not support SPE (Signal Procesing Engine) on Power 10.
39  let UnsupportedFeatures = [HasSPE, IsE500, IsBookE, IsISAFuture];
40}
41
42let SchedModel = P10Model in {
43
44  // ***************** Processor Resources *****************
45
46  // Pipeline Groups
47
48  def P10_BF : ProcResource<4>; // Four Binary Floating Point pipelines.
49  def P10_BR : ProcResource<2>; // Two Branch pipelines.
50  def P10_CY : ProcResource<4>; // Four Crypto pipelines.
51  def P10_DF : ProcResource<1>; // One Decimal Floating Point pipelines.
52  def P10_DV : ProcResource<2>; // Two Fixed-point divide (DIV) pipelines.
53  def P10_DX : ProcResource<2>; // Two 128-bit fixed-point and BCD pipelines.
54  def P10_FX : ProcResource<4>; // Four ALU pipelines.
55  def P10_LD : ProcResource<2>; // Two Load pipelines.
56  def P10_MM : ProcResource<2>; // Two 512-bit SIMD matrix multiply engine pipelines.
57  def P10_PM : ProcResource<4>; // Four 128-bit permute (PM) pipelines.
58  def P10_ST : ProcResource<2>; // Two ST-D pipelines.
59  def P10_SX : ProcResource<2>; // Two Simple Fixed-point (SFX) pipelines.
60
61  // Dispatch Groups
62
63  // Dispatch to any slots
64  def P10_ANY_SLOT : ProcResource<8>;
65
66  let Super = P10_ANY_SLOT in {
67
68    // Dispatch to even slots
69    def P10_EVEN_SLOT : ProcResource<4>;
70
71    // Dispatch to odd slots
72    def P10_ODD_SLOT : ProcResource<4>;
73  }
74
75  // Dispatch Rules
76  let NumMicroOps = 0, Latency = 1 in {
77    // Dispatch Rule '-'
78    def P10W_DISP_ANY : SchedWriteRes<[P10_ANY_SLOT]>;
79
80    // Dispatch Rule '-', even slot
81    def P10W_DISP_EVEN : SchedWriteRes<[P10_EVEN_SLOT]>;
82
83    // Dispatch Rule 'P'
84    def P10W_DISP_PAIR : SchedWriteRes<[P10_EVEN_SLOT, P10_ODD_SLOT]>;
85  }
86
87  // ***************** SchedWriteRes Definitions *****************
88
89  // A BF pipeline may take from 7 to 36 cycles to complete.
90  // Some BF operations may keep the pipeline busy for up to 10 cycles.
91  def P10W_BF_7C : SchedWriteRes<[P10_BF]> {
92    let Latency = 7;
93  }
94
95  def P10W_BF_22C : SchedWriteRes<[P10_BF]> {
96    let ResourceCycles = [ 5 ];
97    let Latency = 22;
98  }
99
100  def P10W_BF_24C : SchedWriteRes<[P10_BF]> {
101    let ResourceCycles = [ 8 ];
102    let Latency = 24;
103  }
104
105  def P10W_BF_26C : SchedWriteRes<[P10_BF]> {
106    let ResourceCycles = [ 5 ];
107    let Latency = 26;
108  }
109
110  def P10W_BF_27C : SchedWriteRes<[P10_BF]> {
111    let ResourceCycles = [ 7 ];
112    let Latency = 27;
113  }
114
115  def P10W_BF_36C : SchedWriteRes<[P10_BF]> {
116    let ResourceCycles = [ 10 ];
117    let Latency = 36;
118  }
119
120  // A BR pipeline may take 2 cycles to complete.
121  def P10W_BR_2C : SchedWriteRes<[P10_BR]> {
122    let Latency = 2;
123  }
124
125  // A CY pipeline may take 7 cycles to complete.
126  def P10W_CY_7C : SchedWriteRes<[P10_CY]> {
127    let Latency = 7;
128  }
129
130  // A DF pipeline may take from 13 to 174 cycles to complete.
131  // Some DF operations may keep the pipeline busy for up to 67 cycles.
132  def P10W_DF_13C : SchedWriteRes<[P10_DF]> {
133    let Latency = 13;
134  }
135
136  def P10W_DF_24C : SchedWriteRes<[P10_DF]> {
137    let ResourceCycles = [ 16 ];
138    let Latency = 24;
139  }
140
141  def P10W_DF_25C : SchedWriteRes<[P10_DF]> {
142    let ResourceCycles = [ 17 ];
143    let Latency = 25;
144  }
145
146  def P10W_DF_26C : SchedWriteRes<[P10_DF]> {
147    let ResourceCycles = [ 18 ];
148    let Latency = 26;
149  }
150
151  def P10W_DF_32C : SchedWriteRes<[P10_DF]> {
152    let ResourceCycles = [ 22 ];
153    let Latency = 32;
154  }
155
156  def P10W_DF_33C : SchedWriteRes<[P10_DF]> {
157    let ResourceCycles = [ 25 ];
158    let Latency = 33;
159  }
160
161  def P10W_DF_34C : SchedWriteRes<[P10_DF]> {
162    let ResourceCycles = [ 25 ];
163    let Latency = 34;
164  }
165
166  def P10W_DF_38C : SchedWriteRes<[P10_DF]> {
167    let ResourceCycles = [ 30 ];
168    let Latency = 38;
169  }
170
171  def P10W_DF_40C : SchedWriteRes<[P10_DF]> {
172    let ResourceCycles = [ 17 ];
173    let Latency = 40;
174  }
175
176  def P10W_DF_43C : SchedWriteRes<[P10_DF]> {
177    let ResourceCycles = [ 34 ];
178    let Latency = 43;
179  }
180
181  def P10W_DF_59C : SchedWriteRes<[P10_DF]> {
182    let ResourceCycles = [ 49 ];
183    let Latency = 59;
184  }
185
186  def P10W_DF_61C : SchedWriteRes<[P10_DF]> {
187    let ResourceCycles = [ 12 ];
188    let Latency = 61;
189  }
190
191  def P10W_DF_68C : SchedWriteRes<[P10_DF]> {
192    let ResourceCycles = [ 15 ];
193    let Latency = 68;
194  }
195
196  def P10W_DF_77C : SchedWriteRes<[P10_DF]> {
197    let ResourceCycles = [ 67 ];
198    let Latency = 77;
199  }
200
201  def P10W_DF_87C : SchedWriteRes<[P10_DF]> {
202    let ResourceCycles = [ 12 ];
203    let Latency = 87;
204  }
205
206  def P10W_DF_100C : SchedWriteRes<[P10_DF]> {
207    let ResourceCycles = [ 32 ];
208    let Latency = 100;
209  }
210
211  def P10W_DF_174C : SchedWriteRes<[P10_DF]> {
212    let ResourceCycles = [ 33 ];
213    let Latency = 174;
214  }
215
216  // A DV pipeline may take from 20 to 83 cycles to complete.
217  // Some DV operations may keep the pipeline busy for up to 33 cycles.
218  def P10W_DV_20C : SchedWriteRes<[P10_DV]> {
219    let ResourceCycles = [ 10 ];
220    let Latency = 20;
221  }
222
223  def P10W_DV_25C : SchedWriteRes<[P10_DV]> {
224    let ResourceCycles = [ 10 ];
225    let Latency = 25;
226  }
227
228  def P10W_DV_27C : SchedWriteRes<[P10_DV]> {
229    let ResourceCycles = [ 10 ];
230    let Latency = 27;
231  }
232
233  def P10W_DV_41C : SchedWriteRes<[P10_DV]> {
234    let ResourceCycles = [ 10 ];
235    let Latency = 41;
236  }
237
238  def P10W_DV_43C : SchedWriteRes<[P10_DV]> {
239    let ResourceCycles = [ 21 ];
240    let Latency = 43;
241  }
242
243  def P10W_DV_47C : SchedWriteRes<[P10_DV]> {
244    let ResourceCycles = [ 21 ];
245    let Latency = 47;
246  }
247
248  def P10W_DV_54C : SchedWriteRes<[P10_DV]> {
249    let ResourceCycles = [ 33 ];
250    let Latency = 54;
251  }
252
253  def P10W_DV_60C : SchedWriteRes<[P10_DV]> {
254    let ResourceCycles = [ 33 ];
255    let Latency = 60;
256  }
257
258  def P10W_DV_75C : SchedWriteRes<[P10_DV]> {
259    let ResourceCycles = [ 21 ];
260    let Latency = 75;
261  }
262
263  def P10W_DV_83C : SchedWriteRes<[P10_DV]> {
264    let ResourceCycles = [ 33 ];
265    let Latency = 83;
266  }
267
268  // A DX pipeline may take 5 cycles to complete.
269  def P10W_DX_5C : SchedWriteRes<[P10_DX]> {
270    let Latency = 5;
271  }
272
273  // A F2 pipeline may take 4 cycles to complete.
274  def P10W_F2_4C : SchedWriteRes<[P10_FX]> {
275    let Latency = 4;
276  }
277
278  // A FX pipeline may take from 2 to 3 cycles to complete.
279  def P10W_FX_2C : SchedWriteRes<[P10_FX]> {
280    let Latency = 2;
281  }
282
283  def P10W_FX_3C : SchedWriteRes<[P10_FX]> {
284    let Latency = 3;
285  }
286
287  // A LD pipeline may take 6 cycles to complete.
288  def P10W_LD_6C : SchedWriteRes<[P10_LD]> {
289    let Latency = 6;
290  }
291
292  // A MF pipeline may take 13 cycles to complete.
293  def P10W_MF_13C : SchedWriteRes<[P10_SX]> {
294    let Latency = 13;
295  }
296
297  // A MFL pipeline may take 13 cycles to complete.
298  def P10W_MFL_13C : SchedWriteRes<[P10_SX]> {
299    let Latency = 13;
300  }
301
302  // A MM pipeline may take 10 cycles to complete.
303  def P10W_MM_10C : SchedWriteRes<[P10_MM]> {
304    let Latency = 10;
305  }
306
307  // A MU pipeline may take 5 cycles to complete.
308  def P10W_MU_5C : SchedWriteRes<[P10_BF]> {
309    let Latency = 5;
310  }
311
312  // A PM pipeline may take 4 cycles to complete.
313  def P10W_PM_4C : SchedWriteRes<[P10_PM]> {
314    let Latency = 4;
315  }
316
317  // A ST pipeline may take 3 cycles to complete.
318  def P10W_ST_3C : SchedWriteRes<[P10_ST]> {
319    let Latency = 3;
320  }
321
322  // A SX pipeline may take from 0 to 3 cycles to complete.
323  def P10W_SX : SchedWriteRes<[P10_SX]> {
324    let Latency = 0;
325  }
326
327  def P10W_SX_3C : SchedWriteRes<[P10_SX]> {
328    let Latency = 3;
329  }
330
331  // A vMU pipeline may take 7 cycles to complete.
332  def P10W_vMU_7C : SchedWriteRes<[P10_BF]> {
333    let Latency = 7;
334  }
335
336  // ***************** Read Advance Definitions *****************
337
338  // Modeling pipeline forwarding logic.
339  def P10BF_Read_1C : SchedReadAdvance<1, [P10W_DX_5C, P10W_MU_5C, P10W_vMU_7C, P10W_CY_7C, P10W_DF_13C, P10W_MM_10C]>;
340  def P10BF_Read_2C : SchedReadAdvance<2, [P10W_BF_7C]>;
341  def P10BR_Read_1C : SchedReadAdvance<1, [P10W_FX_3C, P10W_F2_4C]>;
342  def P10CY_Read_1C : SchedReadAdvance<1, [P10W_DX_5C, P10W_MU_5C, P10W_vMU_7C, P10W_BF_7C, P10W_DF_13C, P10W_MM_10C]>;
343  def P10CY_Read_3C : SchedReadAdvance<3, [P10W_CY_7C]>;
344  def P10DF_Read_1C : SchedReadAdvance<1, [P10W_DX_5C, P10W_MU_5C, P10W_vMU_7C, P10W_BF_7C, P10W_CY_7C, P10W_DF_13C, P10W_MM_10C]>;
345  def P10DV_Read_1C : SchedReadAdvance<1, [P10W_DX_5C, P10W_MU_5C, P10W_vMU_7C, P10W_BF_7C, P10W_CY_7C, P10W_DF_13C, P10W_MM_10C]>;
346  def P10DX_Read_1C : SchedReadAdvance<1, [P10W_DX_5C, P10W_MU_5C, P10W_vMU_7C, P10W_BF_7C, P10W_CY_7C, P10W_DF_13C, P10W_MM_10C]>;
347  def P10F2_Read_1C : SchedReadAdvance<1, [P10W_ST_3C, P10W_SX_3C, P10W_FX_3C, P10W_F2_4C, P10W_PM_4C]>;
348  def P10FX_Read_1C : SchedReadAdvance<1, [P10W_ST_3C, P10W_SX_3C, P10W_FX_3C, P10W_F2_4C, P10W_PM_4C]>;
349  def P10LD_Read_1C : SchedReadAdvance<1, [P10W_ST_3C, P10W_SX_3C, P10W_FX_3C, P10W_F2_4C]>;
350  def P10MM_Read_1C : SchedReadAdvance<1, [P10W_DX_5C, P10W_MU_5C, P10W_vMU_7C, P10W_BF_7C, P10W_CY_7C, P10W_DF_13C]>;
351  def P10MM_Read_6C : SchedReadAdvance<6, [P10W_MM_10C]>;
352  def P10MU_Read_1C : SchedReadAdvance<1, [P10W_DX_5C, P10W_MU_5C, P10W_DF_13C]>;
353  def P10PM_Read_1C : SchedReadAdvance<1, [P10W_ST_3C, P10W_SX_3C, P10W_FX_3C, P10W_F2_4C, P10W_PM_4C]>;
354  def P10ST_Read_1C : SchedReadAdvance<1, [P10W_ST_3C, P10W_SX_3C, P10W_FX_3C, P10W_F2_4C, P10W_PM_4C]>;
355  def P10SX_Read_1C : SchedReadAdvance<1, [P10W_ST_3C, P10W_SX_3C, P10W_FX_3C, P10W_F2_4C, P10W_PM_4C, P10W_MM_10C]>;
356  def P10vMU_Read_1C : SchedReadAdvance<1, [P10W_DX_5C, P10W_MU_5C, P10W_vMU_7C, P10W_BF_7C, P10W_CY_7C, P10W_DF_13C, P10W_MM_10C]>;
357
358  // Save 1 cycles if pipeline BF reads the data from pipelines DX, MU, vMU, CY, DF, MM.
359  // Save 2 cycles if pipeline BF reads the data from pipelines BF.
360  def P10BF_Read : SchedReadVariant<[
361        SchedVar<P10W_BF_7C_Pred, [P10BF_Read_2C]>,
362        SchedVar<NoSchedPred,     [P10BF_Read_1C]>
363  ]>;
364
365  // Save 1 cycles if pipeline CY reads the data from pipelines DX, MU, vMU, BF, DF, MM.
366  // Save 3 cycles if pipeline CY reads the data from pipelines CY.
367  def P10CY_Read : SchedReadVariant<[
368        SchedVar<P10W_CY_7C_Pred, [P10CY_Read_3C]>,
369        SchedVar<NoSchedPred,     [P10CY_Read_1C]>
370  ]>;
371
372  // Save 1 cycles if pipeline MM reads the data from pipelines DX, MU, vMU, BF, CY, DF.
373  // Save 6 cycles if pipeline MM reads the data from pipelines MM.
374  def P10MM_Read : SchedReadVariant<[
375        SchedVar<P10W_MM_10C_Pred, [P10MM_Read_6C]>,
376        SchedVar<NoSchedPred,     [P10MM_Read_1C]>
377  ]>;
378
379  // Save 1 cycles if pipeline BR reads the data from pipelines FX, F2.
380  def : SchedAlias<P10BR_Read, P10BR_Read_1C>;
381
382  // Save 1 cycles if pipeline DF reads the data from pipelines DX, MU, vMU, BF, CY, DF, MM.
383  def : SchedAlias<P10DF_Read, P10DF_Read_1C>;
384
385  // Save 1 cycles if pipeline DV reads the data from pipelines DX, MU, vMU, BF, CY, DF, MM.
386  def : SchedAlias<P10DV_Read, P10DV_Read_1C>;
387
388  // Save 1 cycles if pipeline DX reads the data from pipelines DX, MU, vMU, BF, CY, DF, MM.
389  def : SchedAlias<P10DX_Read, P10DX_Read_1C>;
390
391  // Save 1 cycles if pipeline F2 reads the data from pipelines ST, SX, FX, F2, PM.
392  def : SchedAlias<P10F2_Read, P10F2_Read_1C>;
393
394  // Save 1 cycles if pipeline FX reads the data from pipelines ST, SX, FX, F2, PM.
395  def : SchedAlias<P10FX_Read, P10FX_Read_1C>;
396
397  // Save 1 cycles if pipeline LD reads the data from pipelines ST, SX, FX, F2.
398  def : SchedAlias<P10LD_Read, P10LD_Read_1C>;
399
400  // Save 1 cycles if pipeline MU reads the data from pipelines DX, MU, DF.
401  def : SchedAlias<P10MU_Read, P10MU_Read_1C>;
402
403  // Save 1 cycles if pipeline PM reads the data from pipelines ST, SX, FX, F2, PM.
404  def : SchedAlias<P10PM_Read, P10PM_Read_1C>;
405
406  // Save 1 cycles if pipeline ST reads the data from pipelines ST, SX, FX, F2, PM.
407  def : SchedAlias<P10ST_Read, P10ST_Read_1C>;
408
409  // Save 1 cycles if pipeline SX reads the data from pipelines ST, SX, FX, F2, PM, MM.
410  def : SchedAlias<P10SX_Read, P10SX_Read_1C>;
411
412  // Save 1 cycles if pipeline vMU reads the data from pipelines DX, MU, vMU, BF, CY, DF, MM.
413  def : SchedAlias<P10vMU_Read, P10vMU_Read_1C>;
414
415  include "P10InstrResources.td"
416}
417