xref: /freebsd/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedGenericOOO.td (revision 700637cbb5e582861067a11aaca4d053546871d2)
1//===-- RISCVSchedGenericOOO.td - Generic OOO Processor ----*- tablegen -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9//===----------------------------------------------------------------------===//
10// We assume that:
11// * 6-issue out-of-order CPU with 192 ROB entries.
12// * Units:
13//   * IXU (Integer ALU Unit): 4 units, only one can execute mul/div.
14//   * FXU (Floating-point Unit): 2 units.
15//   * LSU (Load/Store Unit): 2 units.
16// * Latency:
17//   * Integer instructions: 1 cycle.
18//   * Multiplication instructions: 4 cycles.
19//   * Division instructions: 13-21 cycles.
20//   * Floating-point instructions: 2-6 cycles.
21//   * Floating-point fdiv/fsqrt instructions: 9-21 cycles.
22//   * Load/Store:
23//     * IXU: 4 cycles.
24//     * FXU: 4 cycles.
25// * Integer/floating-point/vector div/rem/sqrt/... are non-pipelined.
26//
27// TODO: Add vector scheduling.
28//===----------------------------------------------------------------------===//
29
30def GenericOOOModel : SchedMachineModel {
31  int IssueWidth = 6;
32  int MicroOpBufferSize = 192;
33  int LoadLatency = 4;
34  int MispredictPenalty = 8;
35  let CompleteModel = 0;
36}
37
38let SchedModel = GenericOOOModel in {
39//===----------------------------------------------------------------------===//
40// Resource groups
41//===----------------------------------------------------------------------===//
42def GenericOOOBranch : ProcResource<1>;
43def GenericOOOMulDiv : ProcResource<1>;
44def GenericOOOInt : ProcResource<2>;
45def GenericOOOALU
46    : ProcResGroup<[GenericOOOBranch, GenericOOOMulDiv, GenericOOOInt]>;
47def GenericOOOLSU : ProcResource<2>;
48def GenericOOOFMulDiv : ProcResource<1>;
49def GenericOOOFloat : ProcResource<1>;
50def GenericOOOFPU : ProcResGroup<[GenericOOOFMulDiv, GenericOOOFloat]>;
51
52//===----------------------------------------------------------------------===//
53// Branches
54//===----------------------------------------------------------------------===//
55def : WriteRes<WriteJmp, [GenericOOOBranch]>;
56def : WriteRes<WriteJalr, [GenericOOOBranch]>;
57def : WriteRes<WriteJal, [GenericOOOBranch]>;
58
59//===----------------------------------------------------------------------===//
60// Integer arithmetic and logic
61//===----------------------------------------------------------------------===//
62def : WriteRes<WriteIALU, [GenericOOOALU]>;
63def : WriteRes<WriteIALU32, [GenericOOOALU]>;
64def : WriteRes<WriteShiftImm, [GenericOOOALU]>;
65def : WriteRes<WriteShiftImm32, [GenericOOOALU]>;
66def : WriteRes<WriteShiftReg, [GenericOOOALU]>;
67def : WriteRes<WriteShiftReg32, [GenericOOOALU]>;
68
69//===----------------------------------------------------------------------===//
70// Integer multiplication
71//===----------------------------------------------------------------------===//
72let Latency = 4 in {
73  def : WriteRes<WriteIMul, [GenericOOOMulDiv]>;
74  def : WriteRes<WriteIMul32, [GenericOOOMulDiv]>;
75}
76
77//===----------------------------------------------------------------------===//
78// Integer division
79//===----------------------------------------------------------------------===//
80def : WriteRes<WriteIDiv32, [GenericOOOMulDiv]> {
81  let Latency = 13;
82  let ReleaseAtCycles = [13];
83}
84def : WriteRes<WriteIDiv, [GenericOOOMulDiv]> {
85  let Latency = 21;
86  let ReleaseAtCycles = [21];
87}
88def : WriteRes<WriteIRem32, [GenericOOOMulDiv]> {
89  let Latency = 13;
90  let ReleaseAtCycles = [13];
91}
92def : WriteRes<WriteIRem, [GenericOOOMulDiv]> {
93  let Latency = 21;
94  let ReleaseAtCycles = [21];
95}
96
97//===----------------------------------------------------------------------===//
98// Integer memory
99//===----------------------------------------------------------------------===//
100// Load
101let Latency = 4 in {
102  def : WriteRes<WriteLDB, [GenericOOOLSU]>;
103  def : WriteRes<WriteLDH, [GenericOOOLSU]>;
104  def : WriteRes<WriteLDW, [GenericOOOLSU]>;
105  def : WriteRes<WriteLDD, [GenericOOOLSU]>;
106}
107
108// Store
109def : WriteRes<WriteSTB, [GenericOOOLSU]>;
110def : WriteRes<WriteSTH, [GenericOOOLSU]>;
111def : WriteRes<WriteSTW, [GenericOOOLSU]>;
112def : WriteRes<WriteSTD, [GenericOOOLSU]>;
113
114//===----------------------------------------------------------------------===//
115// Atomic
116//===----------------------------------------------------------------------===//
117let Latency = 4 in {
118  def : WriteRes<WriteAtomicLDW, [GenericOOOLSU]>;
119  def : WriteRes<WriteAtomicLDD, [GenericOOOLSU]>;
120}
121
122let Latency = 5 in {
123  def : WriteRes<WriteAtomicW, [GenericOOOLSU]>;
124  def : WriteRes<WriteAtomicD, [GenericOOOLSU]>;
125}
126
127def : WriteRes<WriteAtomicSTW, [GenericOOOLSU]>;
128def : WriteRes<WriteAtomicSTD, [GenericOOOLSU]>;
129
130//===----------------------------------------------------------------------===//
131// Floating-point
132//===----------------------------------------------------------------------===//
133// Floating-point load
134let Latency = 4 in {
135  def : WriteRes<WriteFLD32, [GenericOOOLSU]>;
136  def : WriteRes<WriteFLD64, [GenericOOOLSU]>;
137}
138
139// Floating-point store
140def : WriteRes<WriteFST32, [GenericOOOLSU]>;
141def : WriteRes<WriteFST64, [GenericOOOLSU]>;
142
143// Arithmetic and logic
144let Latency = 2 in {
145  def : WriteRes<WriteFAdd32, [GenericOOOFPU]>;
146  def : WriteRes<WriteFAdd64, [GenericOOOFPU]>;
147}
148
149def : WriteRes<WriteFSGNJ32, [GenericOOOFPU]>;
150def : WriteRes<WriteFSGNJ64, [GenericOOOFPU]>;
151def : WriteRes<WriteFMinMax32, [GenericOOOFPU]>;
152def : WriteRes<WriteFMinMax64, [GenericOOOFPU]>;
153
154// Compare
155let Latency = 2 in {
156  def : WriteRes<WriteFCmp32, [GenericOOOFPU]>;
157  def : WriteRes<WriteFCmp64, [GenericOOOFPU]>;
158}
159
160// Multiplication
161let Latency = 4 in {
162  def : WriteRes<WriteFMul32, [GenericOOOFMulDiv]>;
163  def : WriteRes<WriteFMul64, [GenericOOOFMulDiv]>;
164}
165
166// FMA
167let Latency = 6 in {
168  def : WriteRes<WriteFMA32, [GenericOOOFMulDiv]>;
169  def : WriteRes<WriteFMA64, [GenericOOOFMulDiv]>;
170}
171
172// Division
173let Latency = 13, ReleaseAtCycles = [13] in {
174  def : WriteRes<WriteFDiv32, [GenericOOOFMulDiv]>;
175  def : WriteRes<WriteFSqrt32, [GenericOOOFMulDiv]>;
176}
177
178let Latency = 17, ReleaseAtCycles = [17] in {
179  def : WriteRes<WriteFDiv64, [GenericOOOFMulDiv]>;
180  def : WriteRes<WriteFSqrt64, [GenericOOOFMulDiv]>;
181}
182
183// Conversions
184let Latency = 2 in {
185  def : WriteRes<WriteFCvtI32ToF32, [GenericOOOFPU]>;
186  def : WriteRes<WriteFCvtI32ToF64, [GenericOOOFPU]>;
187  def : WriteRes<WriteFCvtI64ToF32, [GenericOOOFPU]>;
188  def : WriteRes<WriteFCvtI64ToF64, [GenericOOOFPU]>;
189}
190
191let Latency = 2 in {
192  def : WriteRes<WriteFCvtF32ToI32, [GenericOOOFPU]>;
193  def : WriteRes<WriteFCvtF32ToI64, [GenericOOOFPU]>;
194}
195
196let Latency = 2 in {
197  def : WriteRes<WriteFCvtF64ToI32, [GenericOOOFPU]>;
198  def : WriteRes<WriteFCvtF64ToI64, [GenericOOOFPU]>;
199}
200
201let Latency = 2 in {
202  def : WriteRes<WriteFCvtF64ToF32, [GenericOOOFPU]>;
203  def : WriteRes<WriteFCvtF32ToF64, [GenericOOOFPU]>;
204}
205
206let Latency = 2 in {
207  def : WriteRes<WriteFMovI32ToF32, [GenericOOOFPU]>;
208  def : WriteRes<WriteFMovI64ToF64, [GenericOOOFPU]>;
209  def : WriteRes<WriteFMovF32ToI32, [GenericOOOFPU]>;
210  def : WriteRes<WriteFMovF64ToI64, [GenericOOOFPU]>;
211}
212
213// Classify
214def : WriteRes<WriteFClass32, [GenericOOOFPU]>;
215def : WriteRes<WriteFClass64, [GenericOOOFPU]>;
216
217//===----------------------------------------------------------------------===//
218// Zicsr extension
219//===----------------------------------------------------------------------===//
220def : WriteRes<WriteCSR, [GenericOOOALU]>;
221
222//===----------------------------------------------------------------------===//
223// Zabha extension
224//===----------------------------------------------------------------------===//
225let Latency = 5 in {
226  def : WriteRes<WriteAtomicB, [GenericOOOLSU]>;
227  def : WriteRes<WriteAtomicH, [GenericOOOLSU]>;
228}
229
230//===----------------------------------------------------------------------===//
231// Zba extension
232//===----------------------------------------------------------------------===//
233def : WriteRes<WriteSHXADD, [GenericOOOALU]>;
234def : WriteRes<WriteSHXADD32, [GenericOOOALU]>;
235
236//===----------------------------------------------------------------------===//
237// Zbb extension
238//===----------------------------------------------------------------------===//
239def : WriteRes<WriteCLZ, [GenericOOOALU]>;
240def : WriteRes<WriteCTZ, [GenericOOOALU]>;
241def : WriteRes<WriteCPOP, [GenericOOOALU]>;
242def : WriteRes<WriteCLZ32, [GenericOOOALU]>;
243def : WriteRes<WriteCTZ32, [GenericOOOALU]>;
244def : WriteRes<WriteCPOP32, [GenericOOOALU]>;
245def : WriteRes<WriteRotateReg, [GenericOOOALU]>;
246def : WriteRes<WriteRotateImm, [GenericOOOALU]>;
247def : WriteRes<WriteRotateReg32, [GenericOOOALU]>;
248def : WriteRes<WriteRotateImm32, [GenericOOOALU]>;
249def : WriteRes<WriteREV8, [GenericOOOALU]>;
250def : WriteRes<WriteORCB, [GenericOOOALU]>;
251def : WriteRes<WriteIMinMax, [GenericOOOALU]>;
252
253//===----------------------------------------------------------------------===//
254// Zbc extension
255//===----------------------------------------------------------------------===//
256def : WriteRes<WriteCLMUL, [GenericOOOALU]>;
257
258//===----------------------------------------------------------------------===//
259// Zbs extension
260//===----------------------------------------------------------------------===//
261def : WriteRes<WriteSingleBit, [GenericOOOALU]>;
262def : WriteRes<WriteSingleBitImm, [GenericOOOALU]>;
263def : WriteRes<WriteBEXT, [GenericOOOALU]>;
264def : WriteRes<WriteBEXTI, [GenericOOOALU]>;
265
266//===----------------------------------------------------------------------===//
267// Zbkb extension
268//===----------------------------------------------------------------------===//
269def : WriteRes<WriteBREV8, [GenericOOOALU]>;
270def : WriteRes<WritePACK, [GenericOOOALU]>;
271def : WriteRes<WritePACK32, [GenericOOOALU]>;
272def : WriteRes<WriteZIP, [GenericOOOALU]>;
273
274//===----------------------------------------------------------------------===//
275// Zbkx extension
276//===----------------------------------------------------------------------===//
277def : WriteRes<WriteXPERM, [GenericOOOALU]>;
278
279//===----------------------------------------------------------------------===//
280// Zfa extension
281//===----------------------------------------------------------------------===//
282let Latency = 2 in {
283  def : WriteRes<WriteFRoundF16, [GenericOOOFPU]>;
284  def : WriteRes<WriteFRoundF32, [GenericOOOFPU]>;
285  def : WriteRes<WriteFRoundF64, [GenericOOOFPU]>;
286}
287
288let Latency = 2 in {
289  def : WriteRes<WriteFLI16, [GenericOOOFPU]>;
290  def : WriteRes<WriteFLI32, [GenericOOOFPU]>;
291  def : WriteRes<WriteFLI64, [GenericOOOFPU]>;
292}
293
294//===----------------------------------------------------------------------===//
295// Zfh extension
296//===----------------------------------------------------------------------===//
297// Zfhmin
298// Load/Store
299let Latency = 4 in
300def : WriteRes<WriteFLD16, [GenericOOOLSU]>;
301def : WriteRes<WriteFST16, [GenericOOOLSU]>;
302
303// Conversions
304let Latency = 2 in {
305  def : WriteRes<WriteFCvtF16ToF64, [GenericOOOFPU]>;
306  def : WriteRes<WriteFCvtF64ToF16, [GenericOOOFPU]>;
307  def : WriteRes<WriteFCvtF32ToF16, [GenericOOOFPU]>;
308  def : WriteRes<WriteFCvtF16ToF32, [GenericOOOFPU]>;
309}
310
311let Latency = 2 in {
312  def : WriteRes<WriteFMovI16ToF16, [GenericOOOFPU]>;
313  def : WriteRes<WriteFMovF16ToI16, [GenericOOOFPU]>;
314}
315
316// Other than Zfhmin
317let Latency = 2 in {
318  def : WriteRes<WriteFCvtI64ToF16, [GenericOOOFPU]>;
319  def : WriteRes<WriteFCvtI32ToF16, [GenericOOOFPU]>;
320  def : WriteRes<WriteFCvtF16ToI64, [GenericOOOFPU]>;
321  def : WriteRes<WriteFCvtF16ToI32, [GenericOOOFPU]>;
322}
323
324// Arithmetic and logic
325let Latency = 2 in
326def : WriteRes<WriteFAdd16, [GenericOOOFPU]>;
327
328def : WriteRes<WriteFSGNJ16, [GenericOOOFPU]>;
329def : WriteRes<WriteFMinMax16, [GenericOOOFPU]>;
330
331// Compare
332let Latency = 2 in
333def : WriteRes<WriteFCmp16, [GenericOOOFPU]>;
334
335// Multiplication
336let Latency = 4 in
337def : WriteRes<WriteFMul16, [GenericOOOFMulDiv]>;
338
339// FMA
340let Latency = 6 in
341def : WriteRes<WriteFMA16, [GenericOOOFMulDiv]>;
342
343// Division
344let Latency = 9, ReleaseAtCycles = [9] in {
345  def : WriteRes<WriteFDiv16, [GenericOOOFMulDiv]>;
346  def : WriteRes<WriteFSqrt16, [GenericOOOFMulDiv]>;
347}
348
349// Classify
350def : WriteRes<WriteFClass16, [GenericOOOFPU]>;
351
352//===----------------------------------------------------------------------===//
353// Misc
354//===----------------------------------------------------------------------===//
355let Latency = 0 in
356def : WriteRes<WriteNop, [GenericOOOALU]>;
357
358//===----------------------------------------------------------------------===//
359// Bypass and advance
360//===----------------------------------------------------------------------===//
361def : ReadAdvance<ReadJmp, 0>;
362def : ReadAdvance<ReadJalr, 0>;
363def : ReadAdvance<ReadCSR, 0>;
364def : ReadAdvance<ReadStoreData, 0>;
365def : ReadAdvance<ReadMemBase, 0>;
366def : ReadAdvance<ReadIALU, 0>;
367def : ReadAdvance<ReadIALU32, 0>;
368def : ReadAdvance<ReadShiftImm, 0>;
369def : ReadAdvance<ReadShiftImm32, 0>;
370def : ReadAdvance<ReadShiftReg, 0>;
371def : ReadAdvance<ReadShiftReg32, 0>;
372def : ReadAdvance<ReadIDiv, 0>;
373def : ReadAdvance<ReadIDiv32, 0>;
374def : ReadAdvance<ReadIRem, 0>;
375def : ReadAdvance<ReadIRem32, 0>;
376def : ReadAdvance<ReadIMul, 0>;
377def : ReadAdvance<ReadIMul32, 0>;
378def : ReadAdvance<ReadAtomicWA, 0>;
379def : ReadAdvance<ReadAtomicWD, 0>;
380def : ReadAdvance<ReadAtomicDA, 0>;
381def : ReadAdvance<ReadAtomicDD, 0>;
382def : ReadAdvance<ReadAtomicLDW, 0>;
383def : ReadAdvance<ReadAtomicLDD, 0>;
384def : ReadAdvance<ReadAtomicSTW, 0>;
385def : ReadAdvance<ReadAtomicSTD, 0>;
386def : ReadAdvance<ReadFStoreData, 0>;
387def : ReadAdvance<ReadFMemBase, 0>;
388def : ReadAdvance<ReadFAdd32, 0>;
389def : ReadAdvance<ReadFAdd64, 0>;
390def : ReadAdvance<ReadFMul32, 0>;
391def : ReadAdvance<ReadFMA32, 0>;
392def : ReadAdvance<ReadFMA32Addend, 0>;
393def : ReadAdvance<ReadFMul64, 0>;
394def : ReadAdvance<ReadFMA64, 0>;
395def : ReadAdvance<ReadFMA64Addend, 0>;
396def : ReadAdvance<ReadFDiv32, 0>;
397def : ReadAdvance<ReadFDiv64, 0>;
398def : ReadAdvance<ReadFSqrt32, 0>;
399def : ReadAdvance<ReadFSqrt64, 0>;
400def : ReadAdvance<ReadFCmp32, 0>;
401def : ReadAdvance<ReadFCmp64, 0>;
402def : ReadAdvance<ReadFSGNJ32, 0>;
403def : ReadAdvance<ReadFSGNJ64, 0>;
404def : ReadAdvance<ReadFMinMax32, 0>;
405def : ReadAdvance<ReadFMinMax64, 0>;
406def : ReadAdvance<ReadFCvtF32ToI32, 0>;
407def : ReadAdvance<ReadFCvtF32ToI64, 0>;
408def : ReadAdvance<ReadFCvtF64ToI32, 0>;
409def : ReadAdvance<ReadFCvtF64ToI64, 0>;
410def : ReadAdvance<ReadFCvtI32ToF32, 0>;
411def : ReadAdvance<ReadFCvtI32ToF64, 0>;
412def : ReadAdvance<ReadFCvtI64ToF32, 0>;
413def : ReadAdvance<ReadFCvtI64ToF64, 0>;
414def : ReadAdvance<ReadFCvtF32ToF64, 0>;
415def : ReadAdvance<ReadFCvtF64ToF32, 0>;
416def : ReadAdvance<ReadFMovF32ToI32, 0>;
417def : ReadAdvance<ReadFMovI32ToF32, 0>;
418def : ReadAdvance<ReadFMovF64ToI64, 0>;
419def : ReadAdvance<ReadFMovI64ToF64, 0>;
420def : ReadAdvance<ReadFClass32, 0>;
421def : ReadAdvance<ReadFClass64, 0>;
422
423// Zabha
424def : ReadAdvance<ReadAtomicBA, 0>;
425def : ReadAdvance<ReadAtomicBD, 0>;
426def : ReadAdvance<ReadAtomicHA, 0>;
427def : ReadAdvance<ReadAtomicHD, 0>;
428
429// Zba extension
430def : ReadAdvance<ReadSHXADD, 0>;
431def : ReadAdvance<ReadSHXADD32, 0>;
432
433// Zbb extension
434def : ReadAdvance<ReadRotateImm, 0>;
435def : ReadAdvance<ReadRotateImm32, 0>;
436def : ReadAdvance<ReadRotateReg, 0>;
437def : ReadAdvance<ReadRotateReg32, 0>;
438def : ReadAdvance<ReadCLZ, 0>;
439def : ReadAdvance<ReadCLZ32, 0>;
440def : ReadAdvance<ReadCTZ, 0>;
441def : ReadAdvance<ReadCTZ32, 0>;
442def : ReadAdvance<ReadCPOP, 0>;
443def : ReadAdvance<ReadCPOP32, 0>;
444def : ReadAdvance<ReadREV8, 0>;
445def : ReadAdvance<ReadORCB, 0>;
446def : ReadAdvance<ReadIMinMax, 0>;
447
448// Zbc extension
449def : ReadAdvance<ReadCLMUL, 0>;
450
451// Zbs extension
452def : ReadAdvance<ReadSingleBit, 0>;
453def : ReadAdvance<ReadSingleBitImm, 0>;
454
455// Zbkb
456def : ReadAdvance<ReadBREV8, 0>;
457def : ReadAdvance<ReadPACK, 0>;
458def : ReadAdvance<ReadPACK32, 0>;
459def : ReadAdvance<ReadZIP, 0>;
460
461// Zbkx
462def : ReadAdvance<ReadXPERM, 0>;
463
464// Zfa extension
465def : ReadAdvance<ReadFRoundF32, 0>;
466def : ReadAdvance<ReadFRoundF64, 0>;
467def : ReadAdvance<ReadFRoundF16, 0>;
468
469// Zfh extension
470def : ReadAdvance<ReadFCvtF16ToF64, 0>;
471def : ReadAdvance<ReadFCvtF64ToF16, 0>;
472def : ReadAdvance<ReadFCvtF32ToF16, 0>;
473def : ReadAdvance<ReadFCvtF16ToF32, 0>;
474def : ReadAdvance<ReadFMovI16ToF16, 0>;
475def : ReadAdvance<ReadFMovF16ToI16, 0>;
476
477def : ReadAdvance<ReadFAdd16, 0>;
478def : ReadAdvance<ReadFClass16, 0>;
479def : ReadAdvance<ReadFCvtI64ToF16, 0>;
480def : ReadAdvance<ReadFCvtI32ToF16, 0>;
481def : ReadAdvance<ReadFCvtF16ToI64, 0>;
482def : ReadAdvance<ReadFCvtF16ToI32, 0>;
483def : ReadAdvance<ReadFDiv16, 0>;
484def : ReadAdvance<ReadFCmp16, 0>;
485def : ReadAdvance<ReadFMA16, 0>;
486def : ReadAdvance<ReadFMA16Addend, 0>;
487def : ReadAdvance<ReadFMinMax16, 0>;
488def : ReadAdvance<ReadFMul16, 0>;
489def : ReadAdvance<ReadFSGNJ16, 0>;
490def : ReadAdvance<ReadFSqrt16, 0>;
491
492//===----------------------------------------------------------------------===//
493// Unsupported extensions
494//===----------------------------------------------------------------------===//
495defm : UnsupportedSchedQ;
496defm : UnsupportedSchedV;
497defm : UnsupportedSchedZfaWithQ;
498defm : UnsupportedSchedZvk;
499defm : UnsupportedSchedSFB;
500defm : UnsupportedSchedXsf;
501}
502