xref: /freebsd/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR7.td (revision 700637cbb5e582861067a11aaca4d053546871d2)
1//==- RISCVSchedSyntacoreSCR7.td - Syntacore SCR7 Sched Defs -*- tablegen -*-=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9//===----------------------------------------------------------------------===//
10
11// This file covers scheduling model for rv64imafdcv_zba_zbb_zbc_zbs
12// configuration of Syntacore SCR7 processor.
13// Overview: https://syntacore.com/products/scr7
14
15// SCR7 is an out-of-order superscalar dual-issue core.
16// FIXME: add V and Zkn extensions scheduling model
17def SyntacoreSCR7Model : SchedMachineModel {
18  let MicroOpBufferSize = 36;
19  let IssueWidth = 2;
20  let MispredictPenalty = 9;
21  let LoadLatency = 3;
22  let CompleteModel = 0;
23  let UnsupportedFeatures = [HasStdExtZksed, HasStdExtZksh, HasStdExtZkr,
24                             HasStdExtZcmt, HasVInstructions];
25}
26
27// Branching
28multiclass SCR7_Branching<ProcResourceKind BRU> {
29  def : WriteRes<WriteJmp, [BRU]>;
30  def : WriteRes<WriteJal, [BRU]>;
31  def : WriteRes<WriteJalr, [BRU]>;
32}
33
34// Single-cycle integer arithmetic and logic
35multiclass SCR7_IntALU<ProcResourceKind ALU> {
36  def : WriteRes<WriteIALU, [ALU]>;
37  def : WriteRes<WriteIALU32, [ALU]>;
38  def : WriteRes<WriteShiftImm, [ALU]>;
39  def : WriteRes<WriteShiftImm32, [ALU]>;
40  def : WriteRes<WriteShiftReg, [ALU]>;
41  def : WriteRes<WriteShiftReg32, [ALU]>;
42}
43
44// Pipelined integer multiplication
45multiclass SCR7_IntMul<list<ProcResourceKind> Resources> {
46  let Latency = 3 in {
47    def : WriteRes<WriteIMul, Resources>;
48    def : WriteRes<WriteIMul32, Resources>;
49  }
50}
51
52// Common implementation for WriteIDiv and WriteIDiv32 sched writes.
53multiclass SCR7_IntDivImpl<list<ProcResourceKind> Resources,
54                           list<int> ReleaseCycles, int DivLatency,
55                           SchedWrite DivWrite, SchedWrite RemWrite> {
56  let Latency = DivLatency, ReleaseAtCycles = ReleaseCycles in {
57   def : WriteRes<DivWrite, Resources>;
58   def : WriteRes<RemWrite, Resources>;
59  }
60}
61
62// Non-pipelined integer division
63multiclass SCR7_IntDiv<list<ProcResourceKind> Resources,
64                       list<int> ReleaseCycles,
65                       int DivLatency> {
66  defm : SCR7_IntDivImpl<Resources,
67                         ReleaseCycles,
68                         DivLatency,
69                         WriteIDiv,
70                         WriteIRem>;
71}
72
73multiclass SCR7_IntDiv32<list<ProcResourceKind> Resources,
74                         list<int> ReleaseCycles,
75                         int DivLatency> {
76  defm : SCR7_IntDivImpl<Resources,
77                         ReleaseCycles,
78                         DivLatency,
79                         WriteIDiv32,
80                         WriteIRem32>;
81}
82
83multiclass SCR7_Bitmanip<ProcResourceKind BMU> {
84  let Latency = 1 in {
85    // Zba
86    def : WriteRes<WriteSHXADD, [BMU]>;
87    def : WriteRes<WriteSHXADD32, [BMU]>;
88    // Zbb
89    def : WriteRes<WriteRotateImm, [BMU]>;
90    def : WriteRes<WriteRotateImm32, [BMU]>;
91    def : WriteRes<WriteRotateReg, [BMU]>;
92    def : WriteRes<WriteRotateReg32, [BMU]>;
93    def : WriteRes<WriteCLZ, [BMU]>;
94    def : WriteRes<WriteCLZ32, [BMU]>;
95    def : WriteRes<WriteCTZ, [BMU]>;
96    def : WriteRes<WriteCTZ32, [BMU]>;
97    def : WriteRes<WriteCPOP, [BMU]>;
98    def : WriteRes<WriteCPOP32, [BMU]>;
99    def : WriteRes<WriteREV8, [BMU]>;
100    def : WriteRes<WriteORCB, [BMU]>;
101    def : WriteRes<WriteIMinMax, [BMU]>;
102    // Zbs
103    def : WriteRes<WriteSingleBit, [BMU]>;
104    def : WriteRes<WriteSingleBitImm, [BMU]>;
105    // Zbc
106    def : WriteRes<WriteCLMUL, [BMU]>;
107    def : WriteRes<WriteBEXT, [BMU]>;
108    def : WriteRes<WriteBEXTI, [BMU]>;
109  }
110}
111
112multiclass SCR7_ScalarCrypto<ProcResourceKind SCU> {
113  let Latency = 1 in {
114    // Zbkb
115    def : WriteRes<WriteBREV8, [SCU]>;
116    def : WriteRes<WritePACK, [SCU]>;
117    def : WriteRes<WritePACK32, [SCU]>;
118    def : WriteRes<WriteZIP, [SCU]>;
119    // Zbkx
120    def : WriteRes<WriteXPERM, [SCU]>;
121  }
122}
123
124multiclass SCR7_IntPipeline<ProcResourceKind ALU_Any,
125                            ProcResourceKind ALU_DIV_IS,
126                            ProcResourceKind DIV,
127                            ProcResourceKind ALU_MUL_IS,
128                            ProcResourceKind MUL> {
129  defm : SCR7_Branching<ALU_Any>;
130  defm : SCR7_Bitmanip<ALU_Any>;
131  defm : SCR7_ScalarCrypto<ALU_Any>;
132  defm : SCR7_IntALU<ALU_Any>;
133  defm : SCR7_IntMul<[ALU_MUL_IS, MUL]>;
134  defm : SCR7_IntDiv<[ALU_DIV_IS, DIV],
135                      /* ReleaseAtCycles */[1, 35],
136                      /* Latency */ 35>;
137  defm : SCR7_IntDiv32<[ALU_DIV_IS, DIV],
138                        /* ReleaseAtCycles */[1, 19],
139                        /* Latency */ 19>;
140}
141
142// Load/store instructions
143multiclass SCR7_BasicMemory<ProcResourceKind LSU> {
144  let Latency = 3 in {
145    def : WriteRes<WriteSTB, [LSU]>;
146    def : WriteRes<WriteSTH, [LSU]>;
147    def : WriteRes<WriteSTW, [LSU]>;
148    def : WriteRes<WriteSTD, [LSU]>;
149    def : WriteRes<WriteLDB, [LSU]>;
150    def : WriteRes<WriteLDH, [LSU]>;
151    def : WriteRes<WriteLDW, [LSU]>;
152    def : WriteRes<WriteLDD, [LSU]>;
153    def : WriteRes<WriteFST32, [LSU]>;
154    def : WriteRes<WriteFST64, [LSU]>;
155    def : WriteRes<WriteFLD32, [LSU]>;
156    def : WriteRes<WriteFLD64, [LSU]>;
157  }
158}
159
160// Atomic memory
161multiclass SCR7_AtomicMemory<ProcResourceKind LSU> {
162  let Latency = 19 in {
163    def : WriteRes<WriteAtomicLDW, [LSU]>;
164    def : WriteRes<WriteAtomicLDD, [LSU]>;
165  }
166  let Latency = 21 in {
167    def : WriteRes<WriteAtomicW, [LSU]>;
168    def : WriteRes<WriteAtomicD, [LSU]>;
169    def : WriteRes<WriteAtomicSTW, [LSU]>;
170    def : WriteRes<WriteAtomicSTD, [LSU]>;
171  }
172}
173multiclass SCR7_FPU<ProcResourceKind FPU_IS, ProcResourceKind FALU,
174                    ProcResourceKind FMA, ProcResourceKind FDIVSQRT> {
175  // FALU operations
176  let Latency = 4 in {
177    def : WriteRes<WriteFAdd32, [FPU_IS, FALU]>;
178    def : WriteRes<WriteFAdd64, [FPU_IS, FALU]>;
179    def : WriteRes<WriteFSGNJ32, [FPU_IS, FALU]>;
180    def : WriteRes<WriteFSGNJ64, [FPU_IS, FALU]>;
181    def : WriteRes<WriteFMinMax32, [FPU_IS, FALU]>;
182    def : WriteRes<WriteFMinMax64, [FPU_IS, FALU]>;
183
184    def : WriteRes<WriteFCvtI32ToF32, [FPU_IS, FALU]>;
185    def : WriteRes<WriteFCvtI32ToF64, [FPU_IS, FALU]>;
186    def : WriteRes<WriteFCvtI64ToF32, [FPU_IS, FALU]>;
187    def : WriteRes<WriteFCvtI64ToF64, [FPU_IS, FALU]>;
188    def : WriteRes<WriteFCvtF32ToF64, [FPU_IS, FALU]>;
189    def : WriteRes<WriteFCvtF64ToF32, [FPU_IS, FALU]>;
190    def : WriteRes<WriteFCvtF32ToI32, [FPU_IS, FALU]>;
191    def : WriteRes<WriteFCvtF32ToI64, [FPU_IS, FALU]>;
192    def : WriteRes<WriteFCvtF64ToI32, [FPU_IS, FALU]>;
193    def : WriteRes<WriteFCvtF64ToI64, [FPU_IS, FALU]>;
194
195    def : WriteRes<WriteFClass32, [FPU_IS, FALU]>;
196    def : WriteRes<WriteFClass64, [FPU_IS, FALU]>;
197
198    def : WriteRes<WriteFCmp32, [FPU_IS, FALU]>;
199    def : WriteRes<WriteFCmp64, [FPU_IS, FALU]>;
200
201    def : WriteRes<WriteFMovI32ToF32, [FPU_IS, FALU]>;
202    def : WriteRes<WriteFMovF32ToI32, [FPU_IS, FALU]>;
203    def : WriteRes<WriteFMovI64ToF64, [FPU_IS, FALU]>;
204    def : WriteRes<WriteFMovF64ToI64, [FPU_IS, FALU]>;
205  }
206
207  // FMA operations
208  let Latency = 6 in {
209    def : WriteRes<WriteFMul32, [FPU_IS, FMA]>;
210    def : WriteRes<WriteFMul64, [FPU_IS, FMA]>;
211    def : WriteRes<WriteFMA32, [FPU_IS, FMA]>;
212    def : WriteRes<WriteFMA64, [FPU_IS, FMA]>;
213  }
214
215  def : WriteRes<WriteFDiv32, [FPU_IS, FDIVSQRT]> {
216    let Latency = 16;
217    let ReleaseAtCycles = [1, 15];
218  }
219  def : WriteRes<WriteFDiv64, [FPU_IS, FDIVSQRT]> {
220    let Latency = 30;
221    let ReleaseAtCycles = [1, 29];
222  }
223
224  def : WriteRes<WriteFSqrt32, [FPU_IS, FDIVSQRT]> {
225    let Latency = 18;
226    let ReleaseAtCycles = [1, 16];
227  }
228  def : WriteRes<WriteFSqrt64, [FPU_IS, FDIVSQRT]> {
229    let Latency = 32;
230    let ReleaseAtCycles = [1, 30];
231  }
232}
233
234// Others
235multiclass SCR7_Other {
236  def : WriteRes<WriteCSR, []>;
237  def : WriteRes<WriteNop, []>;
238
239  def : InstRW<[WriteIALU], (instrs COPY)>;
240}
241
242// Unsupported scheduling classes for SCR7.
243multiclass SCR7_Unsupported {
244  defm : UnsupportedSchedQ;
245  defm : UnsupportedSchedSFB;
246  defm : UnsupportedSchedV;
247  defm : UnsupportedSchedZabha;
248  defm : UnsupportedSchedZfa;
249  defm : UnsupportedSchedZfhmin;
250  defm : UnsupportedSchedZvk;
251  defm : UnsupportedSchedXsf;
252}
253
254
255// Bypasses (none)
256multiclass SCR7_NoReadAdvances {
257  def : ReadAdvance<ReadJmp, 0>;
258  def : ReadAdvance<ReadJalr, 0>;
259  def : ReadAdvance<ReadCSR, 0>;
260  def : ReadAdvance<ReadStoreData, 0>;
261  def : ReadAdvance<ReadMemBase, 0>;
262  def : ReadAdvance<ReadIALU, 0>;
263  def : ReadAdvance<ReadIALU32, 0>;
264  def : ReadAdvance<ReadShiftImm, 0>;
265  def : ReadAdvance<ReadShiftImm32, 0>;
266  def : ReadAdvance<ReadShiftReg, 0>;
267  def : ReadAdvance<ReadShiftReg32, 0>;
268  def : ReadAdvance<ReadIDiv, 0>;
269  def : ReadAdvance<ReadIDiv32, 0>;
270  def : ReadAdvance<ReadIRem, 0>;
271  def : ReadAdvance<ReadIRem32, 0>;
272  def : ReadAdvance<ReadIMul, 0>;
273  def : ReadAdvance<ReadIMul32, 0>;
274  def : ReadAdvance<ReadAtomicWA, 0>;
275  def : ReadAdvance<ReadAtomicWD, 0>;
276  def : ReadAdvance<ReadAtomicDA, 0>;
277  def : ReadAdvance<ReadAtomicDD, 0>;
278  def : ReadAdvance<ReadAtomicLDW, 0>;
279  def : ReadAdvance<ReadAtomicLDD, 0>;
280  def : ReadAdvance<ReadAtomicSTW, 0>;
281  def : ReadAdvance<ReadAtomicSTD, 0>;
282  def : ReadAdvance<ReadSHXADD, 0>;
283  def : ReadAdvance<ReadSHXADD32, 0>;
284  def : ReadAdvance<ReadRotateImm, 0>;
285  def : ReadAdvance<ReadRotateImm32, 0>;
286  def : ReadAdvance<ReadRotateReg, 0>;
287  def : ReadAdvance<ReadRotateReg32, 0>;
288  def : ReadAdvance<ReadCLZ, 0>;
289  def : ReadAdvance<ReadCLZ32, 0>;
290  def : ReadAdvance<ReadCTZ, 0>;
291  def : ReadAdvance<ReadCTZ32, 0>;
292  def : ReadAdvance<ReadCPOP, 0>;
293  def : ReadAdvance<ReadCPOP32, 0>;
294  def : ReadAdvance<ReadREV8, 0>;
295  def : ReadAdvance<ReadORCB, 0>;
296  def : ReadAdvance<ReadIMinMax, 0>;
297  def : ReadAdvance<ReadCLMUL, 0>;
298  def : ReadAdvance<ReadBREV8, 0>;
299  def : ReadAdvance<ReadPACK, 0>;
300  def : ReadAdvance<ReadPACK32, 0>;
301  def : ReadAdvance<ReadZIP, 0>;
302  def : ReadAdvance<ReadXPERM, 0>;
303  def : ReadAdvance<ReadSingleBit, 0>;
304  def : ReadAdvance<ReadSingleBitImm, 0>;
305  def : ReadAdvance<ReadFStoreData, 0>;
306  def : ReadAdvance<ReadFMemBase, 0>;
307  def : ReadAdvance<ReadFAdd32, 0>;
308  def : ReadAdvance<ReadFAdd64, 0>;
309  def : ReadAdvance<ReadFMul32, 0>;
310  def : ReadAdvance<ReadFMul64, 0>;
311  def : ReadAdvance<ReadFMA32, 0>;
312  def : ReadAdvance<ReadFMA32Addend, 0>;
313  def : ReadAdvance<ReadFMA64, 0>;
314  def : ReadAdvance<ReadFMA64Addend, 0>;
315  def : ReadAdvance<ReadFDiv32, 0>;
316  def : ReadAdvance<ReadFDiv64, 0>;
317  def : ReadAdvance<ReadFSqrt32, 0>;
318  def : ReadAdvance<ReadFSqrt64, 0>;
319  def : ReadAdvance<ReadFCmp32, 0>;
320  def : ReadAdvance<ReadFCmp64, 0>;
321  def : ReadAdvance<ReadFSGNJ32, 0>;
322  def : ReadAdvance<ReadFSGNJ64, 0>;
323  def : ReadAdvance<ReadFMinMax32, 0>;
324  def : ReadAdvance<ReadFMinMax64, 0>;
325  def : ReadAdvance<ReadFCvtF32ToI32, 0>;
326  def : ReadAdvance<ReadFCvtF32ToI64, 0>;
327  def : ReadAdvance<ReadFCvtF64ToI32, 0>;
328  def : ReadAdvance<ReadFCvtF64ToI64, 0>;
329  def : ReadAdvance<ReadFCvtI32ToF32, 0>;
330  def : ReadAdvance<ReadFCvtI32ToF64, 0>;
331  def : ReadAdvance<ReadFCvtI64ToF32, 0>;
332  def : ReadAdvance<ReadFCvtI64ToF64, 0>;
333  def : ReadAdvance<ReadFCvtF32ToF64, 0>;
334  def : ReadAdvance<ReadFCvtF64ToF32, 0>;
335  def : ReadAdvance<ReadFMovF32ToI32, 0>;
336  def : ReadAdvance<ReadFMovI32ToF32, 0>;
337  def : ReadAdvance<ReadFMovF64ToI64, 0>;
338  def : ReadAdvance<ReadFMovI64ToF64, 0>;
339  def : ReadAdvance<ReadFClass32, 0>;
340  def : ReadAdvance<ReadFClass64, 0>;
341}
342
343let SchedModel = SyntacoreSCR7Model in {
344  // Integer pipeline has two reservation stations with single issue port
345  // each. Every station has eight entries:
346  // First station:
347  //   - ALU (+ bitmanip and scalar crypto)
348  //   - Pipelined Multiplier (3 stage)
349  // Second station:
350  //   - ALU (+ bitmanip and scalar crypto)
351  //   - Non-pipelined divider (other units are not blocked)
352  def SCR7_ALU_MUL_IS : ProcResource<1> { let BufferSize = 8; }
353  def SCR7_ALU_DIV_IS : ProcResource<1> { let BufferSize = 8; }
354  def SCR7_ALU_Any : ProcResGroup<[SCR7_ALU_MUL_IS, SCR7_ALU_DIV_IS]>;
355  def SCR7_MUL : ProcResource<1> { let BufferSize = 1; }
356  def SCR7_DIV : ProcResource<1> { let BufferSize = 1; }
357
358  defm : SCR7_IntPipeline<SCR7_ALU_Any,
359                         SCR7_ALU_DIV_IS, SCR7_DIV,
360                         SCR7_ALU_MUL_IS, SCR7_MUL>;
361
362  // SCR7 single-issue LSU with sixteen entries.
363  def SCR7_LSU : ProcResource<1> { let BufferSize = 16; }
364  defm : SCR7_BasicMemory<SCR7_LSU>;
365  defm : SCR7_AtomicMemory<SCR7_LSU>;
366
367  // FPU has one issue slot with eight entries:
368  //   - FP ALU
369  //   - FMA
370  //   - Non-pipelined FDIV/FSQRT
371  def SCR7_FPU_IS : ProcResource<1> { let BufferSize = 8; }
372  def SCR7_FALU : ProcResource<1> { let BufferSize = 1; }
373  def SCR7_FMA : ProcResource<1> { let BufferSize = 1; }
374  def SCR7_FDIVSQRT : ProcResource<1> { let BufferSize = 1; }
375  defm : SCR7_FPU<SCR7_FPU_IS, SCR7_FALU, SCR7_FMA, SCR7_FDIVSQRT>;
376
377  defm : SCR7_Other;
378  defm : SCR7_Unsupported;
379  defm : SCR7_NoReadAdvances;
380}
381