xref: /freebsd/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td (revision b2d2a78ad80ec68d4a17f5aef97d21686cb1e29b)
1//==- RISCVSchedXiangShanNanHu.td - XS-NanHu Scheduling Defs -*- tablegen -*-=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9//===----------------------------------------------------------------------===//
10
11// XiangShan is a high-performance open-source RISC-V processor developed by
12// the Institute of Computing Technology (ICT), Chinese Academy of Sciences.
13// Source: https://github.com/OpenXiangShan/XiangShan
14// Documentation: https://github.com/OpenXiangShan/XiangShan-doc
15
16// XiangShan-NanHu is the second generation of XiangShan processor series.
17// Overview: https://xiangshan-doc.readthedocs.io/zh-cn/latest/integration/overview/
18
19def XiangShanNanHuModel : SchedMachineModel {
20  let MicroOpBufferSize = 256;
21  let LoopMicroOpBufferSize = 48;  // Instruction queue size
22  let IssueWidth = 6;  // 6-way decode and dispatch
23  let LoadLatency = 4;
24  let MispredictPenalty = 11; // Based on estimate of pipeline depth.
25  let CompleteModel = 0;
26  let UnsupportedFeatures = [HasStdExtZcmt, HasStdExtZkr, HasVInstructions,
27                             HasVInstructionsI64];
28}
29
30let SchedModel = XiangShanNanHuModel in {
31
32// The reservation stations are distributed and grouped as 32-entry or 16-entry smaller ones.
33let BufferSize = 16 in {
34  def XS2ALU : ProcResource<4>;
35  def XS2MDU : ProcResource<2>;
36  def XS2MISC : ProcResource<1>;
37
38  def XS2FMAC : ProcResource<4>;
39  def XS2FMISC : ProcResource<2>;
40
41  // Load/Store queues are ignored.
42  def XS2LD : ProcResource<2>;
43  def XS2ST : ProcResource<2>;
44}
45
46// Branching
47def : WriteRes<WriteJmp, [XS2MISC]>;
48def : WriteRes<WriteJal, [XS2MISC]>;
49def : WriteRes<WriteJalr, [XS2MISC]>;
50
51// Integer arithmetic and logic
52let Latency = 1 in {
53def : WriteRes<WriteIALU, [XS2ALU]>;
54def : WriteRes<WriteIALU32, [XS2ALU]>;
55def : WriteRes<WriteShiftImm, [XS2ALU]>;
56def : WriteRes<WriteShiftImm32, [XS2ALU]>;
57def : WriteRes<WriteShiftReg, [XS2ALU]>;
58def : WriteRes<WriteShiftReg32, [XS2ALU]>;
59}
60
61// Integer multiplication
62let Latency = 3 in {
63def : WriteRes<WriteIMul, [XS2MDU]>;
64def : WriteRes<WriteIMul32, [XS2MDU]>;
65}
66
67// Integer division/remainder
68// SRT16 algorithm
69let Latency = 20, ReleaseAtCycles = [20] in {
70def : WriteRes<WriteIDiv32, [XS2MDU]>;
71def : WriteRes<WriteIDiv, [XS2MDU]>;
72def : WriteRes<WriteIRem32, [XS2MDU]>;
73def : WriteRes<WriteIRem, [XS2MDU]>;
74}
75
76// Zb*
77let Latency = 1 in {
78// Zba
79def : WriteRes<WriteSHXADD, [XS2ALU]>;
80def : WriteRes<WriteSHXADD32, [XS2ALU]>;
81
82// Zbb
83def : WriteRes<WriteRotateImm, [XS2ALU]>;
84def : WriteRes<WriteRotateImm32, [XS2ALU]>;
85def : WriteRes<WriteRotateReg, [XS2ALU]>;
86def : WriteRes<WriteRotateReg32, [XS2ALU]>;
87def : WriteRes<WriteORCB, [XS2ALU]>;
88def : WriteRes<WriteIMinMax, [XS2ALU]>;
89def : WriteRes<WriteREV8, [XS2ALU]>;
90
91// Zbkb
92def : WriteRes<WriteBREV8, [XS2ALU]>;
93def : WriteRes<WritePACK, [XS2ALU]>;
94def : WriteRes<WritePACK32, [XS2ALU]>;
95def : WriteRes<WriteZIP, [XS2ALU]>;
96
97// Zbs
98def : WriteRes<WriteSingleBit, [XS2ALU]>;
99def : WriteRes<WriteSingleBitImm, [XS2ALU]>;
100def : WriteRes<WriteBEXT, [XS2ALU]>;
101def : WriteRes<WriteBEXTI, [XS2ALU]>;
102}
103
104let Latency = 3 in {
105// Zbb
106def : WriteRes<WriteCLZ, [XS2MDU]>;
107def : WriteRes<WriteCLZ32, [XS2MDU]>;
108def : WriteRes<WriteCTZ, [XS2MDU]>;
109def : WriteRes<WriteCTZ32, [XS2MDU]>;
110def : WriteRes<WriteCPOP, [XS2MDU]>;
111def : WriteRes<WriteCPOP32, [XS2MDU]>;
112
113// Zbkc
114def : WriteRes<WriteCLMUL, [XS2MDU]>;
115
116// Zbkx
117def : WriteRes<WriteXPERM, [XS2MDU]>;
118}
119
120// Memory
121def : WriteRes<WriteSTB, [XS2ST]>;
122def : WriteRes<WriteSTH, [XS2ST]>;
123def : WriteRes<WriteSTW, [XS2ST]>;
124def : WriteRes<WriteSTD, [XS2ST]>;
125def : WriteRes<WriteFST32, [XS2ST]>;
126def : WriteRes<WriteFST64, [XS2ST]>;
127def : WriteRes<WriteAtomicSTW, [XS2ST]>;
128def : WriteRes<WriteAtomicSTD, [XS2ST]>;
129
130let Latency = 5 in {
131def : WriteRes<WriteLDB, [XS2LD]>;
132def : WriteRes<WriteLDH, [XS2LD]>;
133def : WriteRes<WriteLDW, [XS2LD]>;
134def : WriteRes<WriteLDD, [XS2LD]>;
135
136def : WriteRes<WriteAtomicW, [XS2LD]>;
137def : WriteRes<WriteAtomicD, [XS2LD]>;
138def : WriteRes<WriteAtomicLDW, [XS2LD]>;
139def : WriteRes<WriteAtomicLDD, [XS2LD]>;
140
141def : WriteRes<WriteFLD32, [XS2LD]>;
142def : WriteRes<WriteFLD64, [XS2LD]>;
143}
144
145// XiangShan-NanHu uses FuDian FPU instead of Berkeley HardFloat.
146// Documentation: https://github.com/OpenXiangShan/fudian
147
148let Latency = 3 in {
149def : WriteRes<WriteFAdd32, [XS2FMAC]>;
150def : WriteRes<WriteFSGNJ32, [XS2FMAC]>;
151def : WriteRes<WriteFMinMax32, [XS2FMAC]>;
152def : WriteRes<WriteFAdd64, [XS2FMAC]>;
153def : WriteRes<WriteFSGNJ64, [XS2FMAC]>;
154def : WriteRes<WriteFMinMax64, [XS2FMAC]>;
155
156def : WriteRes<WriteFCvtI32ToF32, [XS2FMAC]>;
157def : WriteRes<WriteFCvtI32ToF64, [XS2FMAC]>;
158def : WriteRes<WriteFCvtI64ToF32, [XS2FMAC]>;
159def : WriteRes<WriteFCvtI64ToF64, [XS2FMAC]>;
160def : WriteRes<WriteFCvtF32ToI32, [XS2FMAC]>;
161def : WriteRes<WriteFCvtF32ToI64, [XS2FMAC]>;
162def : WriteRes<WriteFCvtF64ToI32, [XS2FMAC]>;
163def : WriteRes<WriteFCvtF64ToI64, [XS2FMAC]>;
164def : WriteRes<WriteFCvtF32ToF64, [XS2FMAC]>;
165def : WriteRes<WriteFCvtF64ToF32, [XS2FMAC]>;
166
167def : WriteRes<WriteFClass32, [XS2FMAC]>;
168def : WriteRes<WriteFClass64, [XS2FMAC]>;
169def : WriteRes<WriteFCmp32, [XS2FMAC]>;
170def : WriteRes<WriteFCmp64, [XS2FMAC]>;
171def : WriteRes<WriteFMovF32ToI32, [XS2FMAC]>;
172def : WriteRes<WriteFMovI32ToF32, [XS2FMAC]>;
173def : WriteRes<WriteFMovF64ToI64, [XS2FMAC]>;
174def : WriteRes<WriteFMovI64ToF64, [XS2FMAC]>;
175}
176
177// FP multiplication
178let Latency = 3 in {
179def : WriteRes<WriteFMul32, [XS2FMAC]>;
180def : WriteRes<WriteFMul64, [XS2FMAC]>;
181}
182
183let Latency = 5 in {
184def : WriteRes<WriteFMA32, [XS2FMAC]>;
185def : WriteRes<WriteFMA64, [XS2FMAC]>;
186}
187
188// FP division
189def : WriteRes<WriteFDiv32, [XS2FMISC]> {
190    let Latency = 11;
191}
192def : WriteRes<WriteFDiv64, [XS2FMISC]> {
193    let Latency = 18;
194}
195
196def : WriteRes<WriteFSqrt32, [XS2FMISC]> {
197    let Latency = 17;
198}
199def : WriteRes<WriteFSqrt64, [XS2FMISC]> {
200    let Latency = 31;
201}
202
203// Others
204def : WriteRes<WriteCSR, [XS2MISC]>;
205def : WriteRes<WriteNop, []>;
206
207def : InstRW<[WriteIALU], (instrs COPY)>;
208
209// Bypass and advance
210
211class XS2LoadToALUBypass<SchedRead read>
212    : ReadAdvance<read, 1, [WriteLDB, WriteLDH, WriteLDW, WriteLDD, WriteAtomicW, WriteAtomicD, WriteAtomicLDW, WriteAtomicLDD]>;
213
214def : ReadAdvance<ReadJmp, 0>;
215def : ReadAdvance<ReadJalr, 0>;
216def : ReadAdvance<ReadCSR, 0>;
217def : ReadAdvance<ReadStoreData, 0>;
218def : ReadAdvance<ReadMemBase, 0>;
219def : XS2LoadToALUBypass<ReadIALU>;
220def : XS2LoadToALUBypass<ReadIALU32>;
221def : XS2LoadToALUBypass<ReadShiftImm>;
222def : XS2LoadToALUBypass<ReadShiftImm32>;
223def : XS2LoadToALUBypass<ReadShiftReg>;
224def : XS2LoadToALUBypass<ReadShiftReg32>;
225def : ReadAdvance<ReadIDiv, 0>;
226def : ReadAdvance<ReadIDiv32, 0>;
227def : ReadAdvance<ReadIRem, 0>;
228def : ReadAdvance<ReadIRem32, 0>;
229def : ReadAdvance<ReadIMul, 0>;
230def : ReadAdvance<ReadIMul32, 0>;
231def : ReadAdvance<ReadAtomicWA, 0>;
232def : ReadAdvance<ReadAtomicWD, 0>;
233def : ReadAdvance<ReadAtomicDA, 0>;
234def : ReadAdvance<ReadAtomicDD, 0>;
235def : ReadAdvance<ReadAtomicLDW, 0>;
236def : ReadAdvance<ReadAtomicLDD, 0>;
237def : ReadAdvance<ReadAtomicSTW, 0>;
238def : ReadAdvance<ReadAtomicSTD, 0>;
239def : ReadAdvance<ReadFStoreData, 0>;
240def : ReadAdvance<ReadFMemBase, 0>;
241def : ReadAdvance<ReadFAdd32, 0>;
242def : ReadAdvance<ReadFAdd64, 0>;
243def : ReadAdvance<ReadFMul32, 0>;
244def : ReadAdvance<ReadFMul64, 0>;
245def : ReadAdvance<ReadFMA32, 0>;
246def : ReadAdvance<ReadFMA32Addend, 2>; // Cascade FMA
247def : ReadAdvance<ReadFMA64, 0>;
248def : ReadAdvance<ReadFMA64Addend, 2>; // Cascade FMA
249def : ReadAdvance<ReadFDiv32, 0>;
250def : ReadAdvance<ReadFDiv64, 0>;
251def : ReadAdvance<ReadFSqrt32, 0>;
252def : ReadAdvance<ReadFSqrt64, 0>;
253def : ReadAdvance<ReadFCmp32, 0>;
254def : ReadAdvance<ReadFCmp64, 0>;
255def : ReadAdvance<ReadFSGNJ32, 0>;
256def : ReadAdvance<ReadFSGNJ64, 0>;
257def : ReadAdvance<ReadFMinMax32, 0>;
258def : ReadAdvance<ReadFMinMax64, 0>;
259def : ReadAdvance<ReadFCvtF32ToI32, 0>;
260def : ReadAdvance<ReadFCvtF32ToI64, 0>;
261def : ReadAdvance<ReadFCvtF64ToI32, 0>;
262def : ReadAdvance<ReadFCvtF64ToI64, 0>;
263def : ReadAdvance<ReadFCvtI32ToF32, 0>;
264def : ReadAdvance<ReadFCvtI32ToF64, 0>;
265def : ReadAdvance<ReadFCvtI64ToF32, 0>;
266def : ReadAdvance<ReadFCvtI64ToF64, 0>;
267def : ReadAdvance<ReadFCvtF32ToF64, 0>;
268def : ReadAdvance<ReadFCvtF64ToF32, 0>;
269def : ReadAdvance<ReadFMovF32ToI32, 0>;
270def : ReadAdvance<ReadFMovI32ToF32, 0>;
271def : ReadAdvance<ReadFMovF64ToI64, 0>;
272def : ReadAdvance<ReadFMovI64ToF64, 0>;
273def : ReadAdvance<ReadFClass32, 0>;
274def : ReadAdvance<ReadFClass64, 0>;
275
276// Zb*
277// Zba
278def : XS2LoadToALUBypass<ReadSHXADD>;
279def : XS2LoadToALUBypass<ReadSHXADD32>;
280// Zbb
281def : XS2LoadToALUBypass<ReadRotateImm>;
282def : XS2LoadToALUBypass<ReadRotateImm32>;
283def : XS2LoadToALUBypass<ReadRotateReg>;
284def : XS2LoadToALUBypass<ReadRotateReg32>;
285def : ReadAdvance<ReadCLZ, 0>;
286def : ReadAdvance<ReadCLZ32, 0>;
287def : ReadAdvance<ReadCTZ, 0>;
288def : ReadAdvance<ReadCTZ32, 0>;
289def : ReadAdvance<ReadCPOP, 0>;
290def : ReadAdvance<ReadCPOP32, 0>;
291def : XS2LoadToALUBypass<ReadORCB>;
292def : XS2LoadToALUBypass<ReadIMinMax>;
293def : XS2LoadToALUBypass<ReadREV8>;
294// Zbkc
295def : ReadAdvance<ReadCLMUL, 0>;
296// Zbs
297def : XS2LoadToALUBypass<ReadSingleBit>;
298def : XS2LoadToALUBypass<ReadSingleBitImm>;
299// Zbkb
300def : XS2LoadToALUBypass<ReadBREV8>;
301def : XS2LoadToALUBypass<ReadPACK>;
302def : XS2LoadToALUBypass<ReadPACK32>;
303def : XS2LoadToALUBypass<ReadZIP>;
304// Zbkx
305def : ReadAdvance<ReadXPERM, 0>;
306
307//===----------------------------------------------------------------------===//
308// Unsupported extensions
309defm : UnsupportedSchedV;
310defm : UnsupportedSchedZfa;
311defm : UnsupportedSchedZfh;
312defm : UnsupportedSchedSFB;
313defm : UnsupportedSchedZabha;
314defm : UnsupportedSchedXsfvcp;
315defm : UnsupportedSchedZvk;
316}
317