xref: /freebsd/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td (revision 700637cbb5e582861067a11aaca4d053546871d2)
1//=- RISCVSchedSpacemitX60.td - Spacemit X60 Scheduling Defs -*- tablegen -*-=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9//===----------------------------------------------------------------------===//
10//
11// Scheduler model for the SpacemiT-X60 processor based on documentation of the
12// C908 and experiments on real hardware (bpi-f3).
13//
14//===----------------------------------------------------------------------===//
15
16def SpacemitX60Model : SchedMachineModel {
17  let IssueWidth        = 2; // dual-issue
18  let MicroOpBufferSize = 0; // in-order
19  let LoadLatency       = 3; // worse case: >= 3
20  let MispredictPenalty = 9; // nine-stage
21
22  let CompleteModel = 0;
23
24  let UnsupportedFeatures = [HasStdExtZknd, HasStdExtZkne, HasStdExtZknh,
25                             HasStdExtZksed, HasStdExtZksh, HasStdExtZkr];
26}
27
28let SchedModel = SpacemitX60Model in {
29
30//===----------------------------------------------------------------------===//
31// Define processor resources for Spacemit-X60
32
33// Information gathered from the C908 user manual:
34let BufferSize = 0 in {
35  // The LSU supports dual issue for scalar store/load instructions
36  def SMX60_LS : ProcResource<2>;
37
38  // An IEU can decode and issue two instructions at the same time
39  def SMX60_IEUA : ProcResource<1>;
40  def SMX60_IEUB : ProcResource<1>;
41  def SMX60_IEU : ProcResGroup<[SMX60_IEUA, SMX60_IEUB]>;
42
43  // Although the X60 does appear to support multiple issue for at least some
44  // floating point instructions, this model assumes single issue as
45  // increasing it reduces the gains we saw in performance
46  def SMX60_FP : ProcResource<1>;
47}
48
49//===----------------------------------------------------------------------===//
50
51// Branching
52def : WriteRes<WriteJmp, [SMX60_IEUA]>;
53def : WriteRes<WriteJal, [SMX60_IEUA]>;
54def : WriteRes<WriteJalr, [SMX60_IEUA]>;
55
56// Integer arithmetic and logic
57// Latency of ALU instructions is 1, but add.uw is 2
58def : WriteRes<WriteIALU32, [SMX60_IEU]>;
59def : WriteRes<WriteIALU, [SMX60_IEU]>;
60def : WriteRes<WriteShiftImm32, [SMX60_IEU]>;
61def : WriteRes<WriteShiftImm, [SMX60_IEU]>;
62def : WriteRes<WriteShiftReg32, [SMX60_IEU]>;
63def : WriteRes<WriteShiftReg, [SMX60_IEU]>;
64
65// Integer multiplication
66def : WriteRes<WriteIMul32, [SMX60_IEU]>  { let Latency = 3; }
67
68// The latency of mul is 5, while in mulh, mulhsu, mulhu is 6
69// Worst case latency is used
70def : WriteRes<WriteIMul, [SMX60_IEU]>  { let Latency = 6; }
71
72// Integer division/remainder
73// TODO: Latency set based on C908 datasheet and hasn't been
74// confirmed experimentally.
75let Latency = 12, ReleaseAtCycles = [12] in {
76  def : WriteRes<WriteIDiv32, [SMX60_IEUA]>;
77  def : WriteRes<WriteIRem32, [SMX60_IEUA]>;
78}
79let Latency = 20, ReleaseAtCycles = [20] in {
80  def : WriteRes<WriteIDiv, [SMX60_IEUA]>;
81  def : WriteRes<WriteIRem, [SMX60_IEUA]>;
82}
83
84// Bitmanip
85def : WriteRes<WriteRotateImm, [SMX60_IEU]>;
86def : WriteRes<WriteRotateImm32, [SMX60_IEU]>;
87def : WriteRes<WriteRotateReg, [SMX60_IEU]>;
88def : WriteRes<WriteRotateReg32, [SMX60_IEU]>;
89
90def : WriteRes<WriteCLZ, [SMX60_IEU]>;
91def : WriteRes<WriteCLZ32, [SMX60_IEU]>;
92def : WriteRes<WriteCTZ, [SMX60_IEU]>;
93def : WriteRes<WriteCTZ32, [SMX60_IEU]>;
94
95let Latency = 2 in {
96  def : WriteRes<WriteCPOP, [SMX60_IEU]>;
97  def : WriteRes<WriteCPOP32, [SMX60_IEU]>;
98}
99
100def : WriteRes<WriteORCB, [SMX60_IEU]>;
101def : WriteRes<WriteIMinMax, [SMX60_IEU]>;
102def : WriteRes<WriteREV8, [SMX60_IEU]>;
103
104let Latency = 2 in {
105  def : WriteRes<WriteSHXADD, [SMX60_IEU]>;
106  def : WriteRes<WriteSHXADD32, [SMX60_IEU]>;
107  def : WriteRes<WriteCLMUL, [SMX60_IEU]>;
108}
109
110// Single-bit instructions
111def : WriteRes<WriteSingleBit, [SMX60_IEU]>;
112def : WriteRes<WriteSingleBitImm, [SMX60_IEU]>;
113def : WriteRes<WriteBEXT, [SMX60_IEU]>;
114def : WriteRes<WriteBEXTI, [SMX60_IEU]>;
115
116// Memory/Atomic memory
117let Latency = 4 in {
118  def : WriteRes<WriteSTB, [SMX60_LS]>;
119  def : WriteRes<WriteSTH, [SMX60_LS]>;
120  def : WriteRes<WriteSTW, [SMX60_LS]>;
121  def : WriteRes<WriteSTD, [SMX60_LS]>;
122  def : WriteRes<WriteFST16, [SMX60_LS]>;
123  def : WriteRes<WriteFST32, [SMX60_LS]>;
124  def : WriteRes<WriteFST64, [SMX60_LS]>;
125
126  def : WriteRes<WriteLDB, [SMX60_LS]>;
127  def : WriteRes<WriteLDH, [SMX60_LS]>;
128  def : WriteRes<WriteLDW, [SMX60_LS]>;
129  def : WriteRes<WriteLDD, [SMX60_LS]>;
130  def : WriteRes<WriteFLD16, [SMX60_LS]>;
131  def : WriteRes<WriteFLD32, [SMX60_LS]>;
132  def : WriteRes<WriteFLD64, [SMX60_LS]>;
133}
134
135// Atomics
136let Latency = 8 in {
137  def : WriteRes<WriteAtomicSTW, [SMX60_LS]>;
138  def : WriteRes<WriteAtomicSTD, [SMX60_LS]>;
139  def : WriteRes<WriteAtomicLDW, [SMX60_LS]>;
140  def : WriteRes<WriteAtomicLDD, [SMX60_LS]>;
141}
142
143let Latency = 12 in {
144  def : WriteRes<WriteAtomicW, [SMX60_LS]>;
145  def : WriteRes<WriteAtomicD, [SMX60_LS]>;
146}
147
148// Floating point units Half precision
149let Latency = 4 in {
150  def : WriteRes<WriteFAdd16, [SMX60_FP]>;
151  def : WriteRes<WriteFMul16, [SMX60_FP]>;
152  def : WriteRes<WriteFSGNJ16, [SMX60_FP]>;
153  def : WriteRes<WriteFMinMax16, [SMX60_FP]>;
154}
155def : WriteRes<WriteFMA16, [SMX60_FP]> { let Latency = 5; }
156
157let Latency = 12, ReleaseAtCycles = [12] in {
158  def :  WriteRes<WriteFDiv16, [SMX60_FP]>;
159  def :  WriteRes<WriteFSqrt16, [SMX60_FP]>;
160}
161
162// Single precision
163let Latency = 4 in {
164  def : WriteRes<WriteFAdd32, [SMX60_FP]>;
165  def : WriteRes<WriteFMul32, [SMX60_FP]>;
166  def : WriteRes<WriteFSGNJ32, [SMX60_FP]>;
167  def : WriteRes<WriteFMinMax32, [SMX60_FP]>;
168}
169def : WriteRes<WriteFMA32, [SMX60_FP]> { let Latency = 5; }
170
171let Latency = 15, ReleaseAtCycles = [15] in {
172  def :  WriteRes<WriteFDiv32, [SMX60_FP]>;
173  def :  WriteRes<WriteFSqrt32, [SMX60_FP]>;
174}
175
176// Double precision
177let Latency = 5 in {
178  def : WriteRes<WriteFAdd64, [SMX60_FP]>;
179  def : WriteRes<WriteFMul64, [SMX60_FP]>;
180  def : WriteRes<WriteFSGNJ64, [SMX60_FP]>;
181}
182def : WriteRes<WriteFMinMax64, [SMX60_FP]> { let Latency = 4; }
183def : WriteRes<WriteFMA64, [SMX60_FP]> { let Latency = 6; }
184
185let Latency = 22, ReleaseAtCycles = [22] in {
186  def :  WriteRes<WriteFDiv64, [SMX60_FP]>;
187  def :  WriteRes<WriteFSqrt64, [SMX60_FP]>;
188}
189
190// Conversions
191let Latency = 6 in {
192  def : WriteRes<WriteFCvtF16ToI32, [SMX60_IEU]>;
193  def : WriteRes<WriteFCvtF32ToI32, [SMX60_IEU]>;
194  def : WriteRes<WriteFCvtF32ToI64, [SMX60_IEU]>;
195  def : WriteRes<WriteFCvtF64ToI64, [SMX60_IEU]>;
196  def : WriteRes<WriteFCvtF64ToI32, [SMX60_IEU]>;
197  def : WriteRes<WriteFCvtF16ToI64, [SMX60_IEU]>;
198}
199
200let Latency = 4 in {
201  def : WriteRes<WriteFCvtI32ToF16, [SMX60_IEU]>;
202  def : WriteRes<WriteFCvtI32ToF32, [SMX60_IEU]>;
203  def : WriteRes<WriteFCvtI32ToF64, [SMX60_IEU]>;
204  def : WriteRes<WriteFCvtI64ToF16, [SMX60_IEU]>;
205  def : WriteRes<WriteFCvtI64ToF32, [SMX60_IEU]>;
206  def : WriteRes<WriteFCvtI64ToF64, [SMX60_IEU]>;
207  def : WriteRes<WriteFCvtF16ToF32, [SMX60_FP]>;
208  def : WriteRes<WriteFCvtF16ToF64, [SMX60_FP]>;
209  def : WriteRes<WriteFCvtF32ToF16, [SMX60_FP]>;
210  def : WriteRes<WriteFCvtF32ToF64, [SMX60_FP]>;
211  def : WriteRes<WriteFCvtF64ToF16, [SMX60_FP]>;
212  def : WriteRes<WriteFCvtF64ToF32, [SMX60_FP]>;
213}
214
215let Latency = 6 in {
216  def : WriteRes<WriteFClass16, [SMX60_FP]>;
217  def : WriteRes<WriteFClass32, [SMX60_FP]>;
218  def : WriteRes<WriteFClass64, [SMX60_FP]>;
219
220  def : WriteRes<WriteFCmp16, [SMX60_FP]>;
221  def : WriteRes<WriteFCmp32, [SMX60_FP]>;
222  def : WriteRes<WriteFCmp64, [SMX60_FP]>;
223
224  def : WriteRes<WriteFMovF32ToI32, [SMX60_IEU]>;
225  def : WriteRes<WriteFMovF16ToI16, [SMX60_IEU]>;
226}
227
228let Latency = 4 in {
229  def : WriteRes<WriteFMovI16ToF16, [SMX60_IEU]>;
230  def : WriteRes<WriteFMovF64ToI64, [SMX60_IEU]>;
231  def : WriteRes<WriteFMovI64ToF64, [SMX60_IEU]>;
232  def : WriteRes<WriteFMovI32ToF32, [SMX60_IEU]>;
233}
234
235// Others
236def : WriteRes<WriteCSR, [SMX60_IEU]>;
237def : WriteRes<WriteNop, [SMX60_IEU]>;
238
239//===----------------------------------------------------------------------===//
240// Bypass and advance
241def : ReadAdvance<ReadJmp, 0>;
242def : ReadAdvance<ReadJalr, 0>;
243def : ReadAdvance<ReadCSR, 0>;
244def : ReadAdvance<ReadStoreData, 0>;
245def : ReadAdvance<ReadMemBase, 0>;
246def : ReadAdvance<ReadIALU, 0>;
247def : ReadAdvance<ReadIALU32, 0>;
248def : ReadAdvance<ReadShiftImm, 0>;
249def : ReadAdvance<ReadShiftImm32, 0>;
250def : ReadAdvance<ReadShiftReg, 0>;
251def : ReadAdvance<ReadShiftReg32, 0>;
252def : ReadAdvance<ReadIDiv, 0>;
253def : ReadAdvance<ReadIDiv32, 0>;
254def : ReadAdvance<ReadIRem, 0>;
255def : ReadAdvance<ReadIRem32, 0>;
256def : ReadAdvance<ReadIMul, 0>;
257def : ReadAdvance<ReadIMul32, 0>;
258def : ReadAdvance<ReadAtomicWA, 0>;
259def : ReadAdvance<ReadAtomicWD, 0>;
260def : ReadAdvance<ReadAtomicDA, 0>;
261def : ReadAdvance<ReadAtomicDD, 0>;
262def : ReadAdvance<ReadAtomicLDW, 0>;
263def : ReadAdvance<ReadAtomicLDD, 0>;
264def : ReadAdvance<ReadAtomicSTW, 0>;
265def : ReadAdvance<ReadAtomicSTD, 0>;
266def : ReadAdvance<ReadFStoreData, 0>;
267def : ReadAdvance<ReadFMemBase, 0>;
268def : ReadAdvance<ReadFAdd16, 0>;
269def : ReadAdvance<ReadFAdd32, 0>;
270def : ReadAdvance<ReadFAdd64, 0>;
271def : ReadAdvance<ReadFMul16, 0>;
272def : ReadAdvance<ReadFMA16, 0>;
273def : ReadAdvance<ReadFMA16Addend, 0>;
274def : ReadAdvance<ReadFMul32, 0>;
275def : ReadAdvance<ReadFMul64, 0>;
276def : ReadAdvance<ReadFMA32, 0>;
277def : ReadAdvance<ReadFMA32Addend, 0>;
278def : ReadAdvance<ReadFMA64, 0>;
279def : ReadAdvance<ReadFMA64Addend, 0>;
280def : ReadAdvance<ReadFDiv16, 0>;
281def : ReadAdvance<ReadFDiv32, 0>;
282def : ReadAdvance<ReadFDiv64, 0>;
283def : ReadAdvance<ReadFSqrt16, 0>;
284def : ReadAdvance<ReadFSqrt32, 0>;
285def : ReadAdvance<ReadFSqrt64, 0>;
286def : ReadAdvance<ReadFCmp16, 0>;
287def : ReadAdvance<ReadFCmp32, 0>;
288def : ReadAdvance<ReadFCmp64, 0>;
289def : ReadAdvance<ReadFSGNJ16, 0>;
290def : ReadAdvance<ReadFSGNJ32, 0>;
291def : ReadAdvance<ReadFSGNJ64, 0>;
292def : ReadAdvance<ReadFMinMax16, 0>;
293def : ReadAdvance<ReadFMinMax32, 0>;
294def : ReadAdvance<ReadFMinMax64, 0>;
295def : ReadAdvance<ReadFCvtF16ToI32, 0>;
296def : ReadAdvance<ReadFCvtF16ToI64, 0>;
297def : ReadAdvance<ReadFCvtF32ToI32, 0>;
298def : ReadAdvance<ReadFCvtF32ToI64, 0>;
299def : ReadAdvance<ReadFCvtF64ToI32, 0>;
300def : ReadAdvance<ReadFCvtF64ToI64, 0>;
301def : ReadAdvance<ReadFCvtI32ToF16, 0>;
302def : ReadAdvance<ReadFCvtI32ToF32, 0>;
303def : ReadAdvance<ReadFCvtI32ToF64, 0>;
304def : ReadAdvance<ReadFCvtI64ToF16, 0>;
305def : ReadAdvance<ReadFCvtI64ToF32, 0>;
306def : ReadAdvance<ReadFCvtI64ToF64, 0>;
307def : ReadAdvance<ReadFCvtF32ToF64, 0>;
308def : ReadAdvance<ReadFCvtF64ToF32, 0>;
309def : ReadAdvance<ReadFCvtF16ToF32, 0>;
310def : ReadAdvance<ReadFCvtF32ToF16, 0>;
311def : ReadAdvance<ReadFCvtF16ToF64, 0>;
312def : ReadAdvance<ReadFCvtF64ToF16, 0>;
313def : ReadAdvance<ReadFMovF16ToI16, 0>;
314def : ReadAdvance<ReadFMovI16ToF16, 0>;
315def : ReadAdvance<ReadFMovF32ToI32, 0>;
316def : ReadAdvance<ReadFMovI32ToF32, 0>;
317def : ReadAdvance<ReadFMovF64ToI64, 0>;
318def : ReadAdvance<ReadFMovI64ToF64, 0>;
319def : ReadAdvance<ReadFClass16, 0>;
320def : ReadAdvance<ReadFClass32, 0>;
321def : ReadAdvance<ReadFClass64, 0>;
322
323// Bitmanip
324def : ReadAdvance<ReadRotateImm, 0>;
325def : ReadAdvance<ReadRotateImm32, 0>;
326def : ReadAdvance<ReadRotateReg, 0>;
327def : ReadAdvance<ReadRotateReg32, 0>;
328def : ReadAdvance<ReadCLZ, 0>;
329def : ReadAdvance<ReadCLZ32, 0>;
330def : ReadAdvance<ReadCTZ, 0>;
331def : ReadAdvance<ReadCTZ32, 0>;
332def : ReadAdvance<ReadCPOP, 0>;
333def : ReadAdvance<ReadCPOP32, 0>;
334def : ReadAdvance<ReadORCB, 0>;
335def : ReadAdvance<ReadIMinMax, 0>;
336def : ReadAdvance<ReadREV8, 0>;
337def : ReadAdvance<ReadSHXADD, 0>;
338def : ReadAdvance<ReadSHXADD32, 0>;
339def : ReadAdvance<ReadCLMUL, 0>;
340// Single-bit instructions
341def : ReadAdvance<ReadSingleBit, 0>;
342def : ReadAdvance<ReadSingleBitImm, 0>;
343
344//===----------------------------------------------------------------------===//
345// Unsupported extensions
346defm : UnsupportedSchedQ;
347defm : UnsupportedSchedV;
348defm : UnsupportedSchedZabha;
349defm : UnsupportedSchedZbkb;
350defm : UnsupportedSchedZbkx;
351defm : UnsupportedSchedZfa;
352defm : UnsupportedSchedZvk;
353defm : UnsupportedSchedSFB;
354defm : UnsupportedSchedXsf;
355}
356