xref: /freebsd/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td (revision 152382e6613d7998fe6f5233767df54d3fdec329)
1//==- RISCVSchedSiFive7.td - SiFive7 Scheduling Definitions --*- tablegen -*-=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9//===----------------------------------------------------------------------===//
10
11/// c is true if mx has the worst case behavior compared to LMULs in MxList.
12/// On the SiFive7, the worst case LMUL is the Largest LMUL
13/// and the worst case sew is the smallest SEW for that LMUL.
14class SiFive7IsWorstCaseMX<string mx, list<string> MxList> {
15  defvar LLMUL = LargestLMUL<MxList>.r;
16  bit c = !eq(mx, LLMUL);
17}
18
19/// c is true if mx and sew have the worst case behavior compared to LMULs in
20/// MxList. On the SiFive7, the worst case LMUL is the Largest LMUL
21/// and the worst case sew is the smallest SEW for that LMUL.
22class SiFive7IsWorstCaseMXSEW<string mx, int sew, list<string> MxList,
23                               bit isF = 0> {
24  defvar LLMUL = LargestLMUL<MxList>.r;
25  defvar SSEW = SmallestSEW<mx, isF>.r;
26  bit c = !and(!eq(mx, LLMUL), !eq(sew, SSEW));
27}
28
29/// Number of DLEN parts = (LMUL * VLEN) / DLEN.
30/// Since DLEN = VLEN / 2, Num DLEN parts = 2 * LMUL.
31class SiFive7GetCyclesDefault<string mx> {
32  int c = !cond(
33    !eq(mx, "M1") : 2,
34    !eq(mx, "M2") : 4,
35    !eq(mx, "M4") : 8,
36    !eq(mx, "M8") : 16,
37    !eq(mx, "MF2") : 1,
38    !eq(mx, "MF4") : 1,
39    !eq(mx, "MF8") : 1
40  );
41}
42
43class SiFive7GetCyclesNarrowing<string mx> {
44  int c = !cond(
45    !eq(mx, "M1") : 4,
46    !eq(mx, "M2") : 8,
47    !eq(mx, "M4") : 16,
48    !eq(mx, "MF2") : 2,
49    !eq(mx, "MF4") : 1,
50    !eq(mx, "MF8") : 1
51  );
52}
53
54class SiFive7GetCyclesVMask<string mx> {
55  int c = !cond(
56    !eq(mx, "M1") : 1,
57    !eq(mx, "M2") : 1,
58    !eq(mx, "M4") : 1,
59    !eq(mx, "M8") : 2,
60    !eq(mx, "MF2") : 1,
61    !eq(mx, "MF4") : 1,
62    !eq(mx, "MF8") : 1
63  );
64}
65
66/// VLDM and VSTM can't read/write more than 2 DLENs of data.
67/// 2 DLENs when LMUL=8. 1 DLEN for all other DLENs
68class SiFive7GetMaskLoadStoreCycles<string mx> {
69  int c = !cond(
70    !eq(mx, "M8")  : 2,
71    true : 1
72  );
73}
74
75// Cycles for nf=2 segmented loads and stores are calculated using the
76// formula (2 * VLEN * LMUL) / DLEN = 4 * LMUL
77class SiFive7GetCyclesSegmentedSeg2<string mx> {
78  int c = !cond(
79    !eq(mx, "M1") :  4,
80    !eq(mx, "M2") :  8,
81    !eq(mx, "M4") :  16,
82    !eq(mx, "M8") :  32,
83    !eq(mx, "MF2") : 2,
84    !eq(mx, "MF4") : 1,
85    !eq(mx, "MF8") : 1
86  );
87}
88
89// Cycles for segmented loads and stores are calculated using the
90// formula vl * ceil((SEW * nf) / DLEN), where SEW * nf is the segment size.
91class SiFive7GetCyclesSegmented<string mx, int sew, int nf> {
92  defvar VLEN = 512;
93  defvar DLEN = 256;
94  // (VLEN * LMUL) / SEW
95  defvar VLUpperBound  = !cond(
96    !eq(mx, "M1") : !div(VLEN, sew),
97    !eq(mx, "M2") : !div(!mul(VLEN, 2), sew),
98    !eq(mx, "M4") : !div(!mul(VLEN, 4), sew),
99    !eq(mx, "M8") : !div(!mul(VLEN, 8), sew),
100    !eq(mx, "MF2") : !div(!div(VLEN, 2), sew),
101    !eq(mx, "MF4") : !div(!div(VLEN, 4), sew),
102    !eq(mx, "MF8") : !div(!div(VLEN, 8), sew),
103  );
104  // We can calculate ceil(a/b) using (a + b - 1) / b.
105  defvar a = !mul(sew, nf);
106  defvar b = DLEN;
107  int c = !mul(VLUpperBound, !div(!sub(!add(a, b), 1), b));
108}
109
110class SiFive7GetCyclesOnePerElement<string mx, int sew> {
111  // FIXME: On SiFive7, VLEN is 512. Although a user can request the compiler
112  // to use a different VLEN, this model will not make scheduling decisions
113  // based on the user specified VLEN.
114  // c = ceil(VLEN / SEW) * LMUL
115  // Note: c >= 1 since the smallest VLEN is 512 / 8 = 8, and the
116  // largest division performed on VLEN is in MF8 case with division
117  // by 8. Therefore, there is no need to ceil the result.
118  int VLEN = !div(512, sew);
119  int c = !cond(
120    !eq(mx, "M1")  : VLEN,
121    !eq(mx, "M2")  : !mul(VLEN, 2),
122    !eq(mx, "M4")  : !mul(VLEN, 4),
123    !eq(mx, "M8")  : !mul(VLEN, 8),
124    !eq(mx, "MF2") : !div(VLEN, 2),
125    !eq(mx, "MF4") : !div(VLEN, 4),
126    !eq(mx, "MF8") : !div(VLEN, 8)
127  );
128}
129
130class SiFive7GetDivOrSqrtFactor<int sew> {
131  int c = !cond(
132    // TODO: Add SchedSEWSetFP upstream and remove the SEW=8 case.
133    !eq(sew, 8) : 15,
134    !eq(sew, 16) : 15,
135    !eq(sew, 32) : 28,
136    !eq(sew, 64) : 57
137  );
138}
139
140/// Cycles for reductions take approximately VL*SEW/DLEN + 5(4 + log(DLEN/SEW))
141/// cycles.
142class SiFive7GetReductionCycles<string mx, int sew> {
143  // VLUpperBound*SEW/DLEN is equivalent to 2*LMUL since
144  // VLUpperBound=(VLEN*LMUL)/SEW.
145  defvar VLEN = 512;
146  defvar DLEN = !div(VLEN, 2);
147  defvar TwoTimesLMUL = !cond(
148    !eq(mx, "M1") : 2,
149    !eq(mx, "M2") : 4,
150    !eq(mx, "M4") : 8,
151    !eq(mx, "M8") : 16,
152    !eq(mx, "MF2") : 1,
153    !eq(mx, "MF4") : 1,
154    !eq(mx, "MF8") : 1
155  );
156  int c = !add(
157    TwoTimesLMUL,
158    !mul(5, !add(4, !logtwo(!div(DLEN, sew))))
159  );
160}
161
162/// Cycles for ordered reductions take approximatley 6*VL cycles
163class SiFive7GetOrderedReductionCycles<string mx, int sew> {
164  defvar VLEN = 512;
165  // (VLEN * LMUL) / SEW
166  defvar VLUpperBound  = !cond(
167    !eq(mx, "M1") : !div(VLEN, sew),
168    !eq(mx, "M2") : !div(!mul(VLEN, 2), sew),
169    !eq(mx, "M4") : !div(!mul(VLEN, 4), sew),
170    !eq(mx, "M8") : !div(!mul(VLEN, 8), sew),
171    !eq(mx, "MF2") : !div(!div(VLEN, 2), sew),
172    !eq(mx, "MF4") : !div(!div(VLEN, 4), sew),
173    !eq(mx, "MF8") : !div(!div(VLEN, 8), sew),
174  );
175  int c = !mul(6, VLUpperBound);
176}
177
178class SiFive7AnyToGPRBypass<SchedRead read, int cycles = 2>
179    : ReadAdvance<read, cycles, [WriteIALU, WriteIALU32,
180                                 WriteShiftImm, WriteShiftImm32,
181                                 WriteShiftReg, WriteShiftReg32,
182                                 WriteSHXADD, WriteSHXADD32,
183                                 WriteRotateImm, WriteRotateImm32,
184                                 WriteRotateReg, WriteRotateReg32,
185                                 WriteSingleBit, WriteSingleBitImm,
186                                 WriteBEXT, WriteBEXTI,
187                                 WriteCLZ, WriteCLZ32, WriteCTZ, WriteCTZ32,
188                                 WriteCPOP, WriteCPOP32,
189                                 WriteREV8, WriteORCB, WriteIMinMax, WriteSFB,
190                                 WriteIMul, WriteIMul32,
191                                 WriteIDiv, WriteIDiv32,
192                                 WriteIRem, WriteIRem32,
193                                 WriteLDB, WriteLDH, WriteLDW, WriteLDD]>;
194
195// SiFive7 machine model for scheduling and other instruction cost heuristics.
196def SiFive7Model : SchedMachineModel {
197  let MicroOpBufferSize = 0; // Explicitly set to zero since SiFive7 is in-order.
198  let IssueWidth = 2;        // 2 micro-ops are dispatched per cycle.
199  let LoadLatency = 3;
200  let MispredictPenalty = 3;
201  let CompleteModel = 0;
202  let EnableIntervals = true;
203  let UnsupportedFeatures = [HasStdExtZbkb, HasStdExtZbkc, HasStdExtZbkx,
204                             HasStdExtZcmt, HasStdExtZknd, HasStdExtZkne,
205                             HasStdExtZknh, HasStdExtZksed, HasStdExtZksh,
206                             HasStdExtZkr];
207}
208
209// The SiFive7 microarchitecture has three pipelines: A, B, V.
210// Pipe A can handle memory, integer alu and vector operations.
211// Pipe B can handle integer alu, control flow, integer multiply and divide,
212// and floating point computation.
213// The V pipeline is modeled by the VCQ, VA, VL, and VS resources.
214let SchedModel = SiFive7Model in {
215let BufferSize = 0 in {
216def SiFive7PipeA       : ProcResource<1>;
217def SiFive7PipeB       : ProcResource<1>;
218def SiFive7IDiv        : ProcResource<1>; // Int Division
219def SiFive7FDiv        : ProcResource<1>; // FP Division/Sqrt
220def SiFive7VA          : ProcResource<1>; // Arithmetic sequencer
221def SiFive7VL          : ProcResource<1>; // Load sequencer
222def SiFive7VS          : ProcResource<1>; // Store sequencer
223// The VCQ accepts instructions from the the A Pipe and holds them until the
224// vector unit is ready to dequeue them. The unit dequeues up to one instruction
225// per cycle, in order, as soon as the sequencer for that type of instruction is
226// avaliable. This resource is meant to be used for 1 cycle by all vector
227// instructions, to model that only one vector instruction may be dequed at a
228// time. The actual dequeueing into the sequencer is modeled by the VA, VL, and
229// VS sequencer resources below. Each of them will only accept a single
230// instruction at a time and remain busy for the number of cycles associated
231// with that instruction.
232def SiFive7VCQ         : ProcResource<1>; // Vector Command Queue
233}
234
235def SiFive7PipeAB : ProcResGroup<[SiFive7PipeA, SiFive7PipeB]>;
236
237// Branching
238let Latency = 3 in {
239def : WriteRes<WriteJmp, [SiFive7PipeB]>;
240def : WriteRes<WriteJal, [SiFive7PipeB]>;
241def : WriteRes<WriteJalr, [SiFive7PipeB]>;
242}
243
244//Short forward branch
245def : WriteRes<WriteSFB, [SiFive7PipeA, SiFive7PipeB]> {
246  let Latency = 3;
247  let NumMicroOps = 2;
248}
249
250// Integer arithmetic and logic
251let Latency = 3 in {
252def : WriteRes<WriteIALU, [SiFive7PipeAB]>;
253def : WriteRes<WriteIALU32, [SiFive7PipeAB]>;
254def : WriteRes<WriteShiftImm, [SiFive7PipeAB]>;
255def : WriteRes<WriteShiftImm32, [SiFive7PipeAB]>;
256def : WriteRes<WriteShiftReg, [SiFive7PipeAB]>;
257def : WriteRes<WriteShiftReg32, [SiFive7PipeAB]>;
258}
259
260// Integer multiplication
261let Latency = 3 in {
262def : WriteRes<WriteIMul, [SiFive7PipeB]>;
263def : WriteRes<WriteIMul32, [SiFive7PipeB]>;
264}
265
266// Integer division
267def : WriteRes<WriteIDiv, [SiFive7PipeB, SiFive7IDiv]> {
268  let Latency = 66;
269  let ReleaseAtCycles = [1, 65];
270}
271def : WriteRes<WriteIDiv32,  [SiFive7PipeB, SiFive7IDiv]> {
272  let Latency = 34;
273  let ReleaseAtCycles = [1, 33];
274}
275
276// Integer remainder
277def : WriteRes<WriteIRem, [SiFive7PipeB, SiFive7IDiv]> {
278  let Latency = 66;
279  let ReleaseAtCycles = [1, 65];
280}
281def : WriteRes<WriteIRem32,  [SiFive7PipeB, SiFive7IDiv]> {
282  let Latency = 34;
283  let ReleaseAtCycles = [1, 33];
284}
285
286// Bitmanip
287let Latency = 3 in {
288// Rotates are in the late-B ALU.
289def : WriteRes<WriteRotateImm, [SiFive7PipeB]>;
290def : WriteRes<WriteRotateImm32, [SiFive7PipeB]>;
291def : WriteRes<WriteRotateReg, [SiFive7PipeB]>;
292def : WriteRes<WriteRotateReg32, [SiFive7PipeB]>;
293
294// clz[w]/ctz[w] are in the late-B ALU.
295def : WriteRes<WriteCLZ, [SiFive7PipeB]>;
296def : WriteRes<WriteCLZ32, [SiFive7PipeB]>;
297def : WriteRes<WriteCTZ, [SiFive7PipeB]>;
298def : WriteRes<WriteCTZ32, [SiFive7PipeB]>;
299
300// cpop[w] look exactly like multiply.
301def : WriteRes<WriteCPOP, [SiFive7PipeB]>;
302def : WriteRes<WriteCPOP32, [SiFive7PipeB]>;
303
304// orc.b is in the late-B ALU.
305def : WriteRes<WriteORCB, [SiFive7PipeB]>;
306
307// min/max are in the late-B ALU
308def : WriteRes<WriteIMinMax, [SiFive7PipeB]>;
309
310// rev8 is in the late-A and late-B ALUs.
311def : WriteRes<WriteREV8, [SiFive7PipeAB]>;
312
313// shNadd[.uw] is on the early-B and late-B ALUs.
314def : WriteRes<WriteSHXADD, [SiFive7PipeB]>;
315def : WriteRes<WriteSHXADD32, [SiFive7PipeB]>;
316}
317
318// Single-bit instructions
319// BEXT[I] instruction is available on all ALUs and the other instructions
320// are only available on the SiFive7B pipe.
321let Latency = 3 in {
322def : WriteRes<WriteSingleBit, [SiFive7PipeB]>;
323def : WriteRes<WriteSingleBitImm, [SiFive7PipeB]>;
324def : WriteRes<WriteBEXT, [SiFive7PipeAB]>;
325def : WriteRes<WriteBEXTI, [SiFive7PipeAB]>;
326}
327
328// Memory
329def : WriteRes<WriteSTB, [SiFive7PipeA]>;
330def : WriteRes<WriteSTH, [SiFive7PipeA]>;
331def : WriteRes<WriteSTW, [SiFive7PipeA]>;
332def : WriteRes<WriteSTD, [SiFive7PipeA]>;
333def : WriteRes<WriteFST16, [SiFive7PipeA]>;
334def : WriteRes<WriteFST32, [SiFive7PipeA]>;
335def : WriteRes<WriteFST64, [SiFive7PipeA]>;
336
337let Latency = 3 in {
338def : WriteRes<WriteLDB, [SiFive7PipeA]>;
339def : WriteRes<WriteLDH, [SiFive7PipeA]>;
340def : WriteRes<WriteLDW, [SiFive7PipeA]>;
341def : WriteRes<WriteLDD, [SiFive7PipeA]>;
342}
343
344let Latency = 2 in {
345def : WriteRes<WriteFLD16, [SiFive7PipeA]>;
346def : WriteRes<WriteFLD32, [SiFive7PipeA]>;
347def : WriteRes<WriteFLD64, [SiFive7PipeA]>;
348}
349
350// Atomic memory
351def : WriteRes<WriteAtomicSTW, [SiFive7PipeA]>;
352def : WriteRes<WriteAtomicSTD, [SiFive7PipeA]>;
353
354let Latency = 3 in {
355def : WriteRes<WriteAtomicW, [SiFive7PipeA]>;
356def : WriteRes<WriteAtomicD, [SiFive7PipeA]>;
357def : WriteRes<WriteAtomicLDW, [SiFive7PipeA]>;
358def : WriteRes<WriteAtomicLDD, [SiFive7PipeA]>;
359}
360
361// Half precision.
362let Latency = 5 in {
363def : WriteRes<WriteFAdd16, [SiFive7PipeB]>;
364def : WriteRes<WriteFMul16, [SiFive7PipeB]>;
365def : WriteRes<WriteFMA16, [SiFive7PipeB]>;
366}
367let Latency = 3 in {
368def : WriteRes<WriteFSGNJ16, [SiFive7PipeB]>;
369def : WriteRes<WriteFMinMax16, [SiFive7PipeB]>;
370}
371
372let Latency = 14, ReleaseAtCycles = [1, 13] in {
373def :  WriteRes<WriteFDiv16, [SiFive7PipeB, SiFive7FDiv]>;
374def :  WriteRes<WriteFSqrt16, [SiFive7PipeB, SiFive7FDiv]>;
375}
376
377// Single precision.
378let Latency = 5 in {
379def : WriteRes<WriteFAdd32, [SiFive7PipeB]>;
380def : WriteRes<WriteFMul32, [SiFive7PipeB]>;
381def : WriteRes<WriteFMA32, [SiFive7PipeB]>;
382}
383let Latency = 3 in {
384def : WriteRes<WriteFSGNJ32, [SiFive7PipeB]>;
385def : WriteRes<WriteFMinMax32, [SiFive7PipeB]>;
386}
387
388def : WriteRes<WriteFDiv32, [SiFive7PipeB, SiFive7FDiv]> { let Latency = 27;
389                                                         let ReleaseAtCycles = [1, 26]; }
390def : WriteRes<WriteFSqrt32, [SiFive7PipeB, SiFive7FDiv]> { let Latency = 27;
391                                                          let ReleaseAtCycles = [1, 26]; }
392
393// Double precision
394let Latency = 7 in {
395def : WriteRes<WriteFAdd64, [SiFive7PipeB]>;
396def : WriteRes<WriteFMul64, [SiFive7PipeB]>;
397def : WriteRes<WriteFMA64, [SiFive7PipeB]>;
398}
399let Latency = 3 in {
400def : WriteRes<WriteFSGNJ64, [SiFive7PipeB]>;
401def : WriteRes<WriteFMinMax64, [SiFive7PipeB]>;
402}
403
404def : WriteRes<WriteFDiv64, [SiFive7PipeB, SiFive7FDiv]> { let Latency = 56;
405                                                         let ReleaseAtCycles = [1, 55]; }
406def : WriteRes<WriteFSqrt64, [SiFive7PipeB, SiFive7FDiv]> { let Latency = 56;
407                                                          let ReleaseAtCycles = [1, 55]; }
408
409// Conversions
410let Latency = 3 in {
411def : WriteRes<WriteFCvtI32ToF16, [SiFive7PipeB]>;
412def : WriteRes<WriteFCvtI32ToF32, [SiFive7PipeB]>;
413def : WriteRes<WriteFCvtI32ToF64, [SiFive7PipeB]>;
414def : WriteRes<WriteFCvtI64ToF16, [SiFive7PipeB]>;
415def : WriteRes<WriteFCvtI64ToF32, [SiFive7PipeB]>;
416def : WriteRes<WriteFCvtI64ToF64, [SiFive7PipeB]>;
417def : WriteRes<WriteFCvtF16ToI32, [SiFive7PipeB]>;
418def : WriteRes<WriteFCvtF16ToI64, [SiFive7PipeB]>;
419def : WriteRes<WriteFCvtF16ToF32, [SiFive7PipeB]>;
420def : WriteRes<WriteFCvtF16ToF64, [SiFive7PipeB]>;
421def : WriteRes<WriteFCvtF32ToI32, [SiFive7PipeB]>;
422def : WriteRes<WriteFCvtF32ToI64, [SiFive7PipeB]>;
423def : WriteRes<WriteFCvtF32ToF16, [SiFive7PipeB]>;
424def : WriteRes<WriteFCvtF32ToF64, [SiFive7PipeB]>;
425def : WriteRes<WriteFCvtF64ToI32, [SiFive7PipeB]>;
426def : WriteRes<WriteFCvtF64ToI64, [SiFive7PipeB]>;
427def : WriteRes<WriteFCvtF64ToF16, [SiFive7PipeB]>;
428def : WriteRes<WriteFCvtF64ToF32, [SiFive7PipeB]>;
429
430def : WriteRes<WriteFClass16, [SiFive7PipeB]>;
431def : WriteRes<WriteFClass32, [SiFive7PipeB]>;
432def : WriteRes<WriteFClass64, [SiFive7PipeB]>;
433def : WriteRes<WriteFCmp16, [SiFive7PipeB]>;
434def : WriteRes<WriteFCmp32, [SiFive7PipeB]>;
435def : WriteRes<WriteFCmp64, [SiFive7PipeB]>;
436def : WriteRes<WriteFMovI16ToF16, [SiFive7PipeB]>;
437def : WriteRes<WriteFMovF16ToI16, [SiFive7PipeB]>;
438def : WriteRes<WriteFMovI32ToF32, [SiFive7PipeB]>;
439def : WriteRes<WriteFMovF32ToI32, [SiFive7PipeB]>;
440def : WriteRes<WriteFMovI64ToF64, [SiFive7PipeB]>;
441def : WriteRes<WriteFMovF64ToI64, [SiFive7PipeB]>;
442}
443
444// 6. Configuration-Setting Instructions
445let Latency = 3 in {
446def : WriteRes<WriteVSETVLI, [SiFive7PipeA]>;
447def : WriteRes<WriteVSETIVLI, [SiFive7PipeA]>;
448def : WriteRes<WriteVSETVL, [SiFive7PipeA]>;
449}
450
451// 7. Vector Loads and Stores
452// Unit-stride loads and stores can operate at the full bandwidth of the memory
453// pipe. The memory pipe is DLEN bits wide on x280.
454foreach mx = SchedMxList in {
455  defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
456  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
457  let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
458    defm "" : LMULWriteResMX<"WriteVLDE",    [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
459    defm "" : LMULWriteResMX<"WriteVLDFF",   [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
460  }
461  let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in
462  defm "" : LMULWriteResMX<"WriteVSTE",    [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
463}
464
465foreach mx = SchedMxList in {
466  defvar Cycles = SiFive7GetMaskLoadStoreCycles<mx>.c;
467  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
468  let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in
469  defm "" : LMULWriteResMX<"WriteVLDM",    [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
470  let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in
471  defm "" : LMULWriteResMX<"WriteVSTM",    [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
472}
473
474// Strided loads and stores operate at one element per cycle and should be
475// scheduled accordingly. Indexed loads and stores operate at one element per
476// cycle, and they stall the machine until all addresses have been generated,
477// so they cannot be scheduled. Indexed and strided loads and stores have LMUL
478// specific suffixes, but since SEW is already encoded in the name of the
479// resource, we do not need to use LMULSEWXXX constructors. However, we do
480// use the SEW from the name to determine the number of Cycles.
481
482// This predicate is true when the rs2 operand of vlse or vsse is x0, false
483// otherwise.
484def VLDSX0Pred : MCSchedPredicate<CheckRegOperand<3, X0>>;
485
486foreach mx = SchedMxList in {
487  defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c;
488  defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 8>.c;
489  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
490  defm SiFive7 : LMULWriteResMXVariant<"WriteVLDS8",  VLDSX0Pred, [SiFive7VCQ, SiFive7VL],
491                                       4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles),
492                                       [0, 1], [1, !add(1, Cycles)], mx, IsWorstCase>;
493  let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
494    defm "" : LMULWriteResMX<"WriteVLDUX8", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
495    defm "" : LMULWriteResMX<"WriteVLDOX8", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
496  }
497  let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
498    defm "" : LMULWriteResMX<"WriteVSTS8",  [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
499    defm "" : LMULWriteResMX<"WriteVSTUX8", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
500    defm "" : LMULWriteResMX<"WriteVSTOX8", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
501  }
502}
503// TODO: The MxLists need to be filtered by EEW. We only need to support
504// LMUL >= SEW_min/ELEN. Here, the smallest EEW prevents us from having MF8
505// since LMUL >= 16/64.
506foreach mx = ["MF4", "MF2", "M1", "M2", "M4", "M8"] in {
507  defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c;
508  defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 16>.c;
509  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
510  defm SiFive7 : LMULWriteResMXVariant<"WriteVLDS16",  VLDSX0Pred, [SiFive7VCQ, SiFive7VL],
511                                       4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles),
512                                       [0, 1], [1, !add(1, Cycles)], mx, IsWorstCase>;
513  let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
514    defm "" : LMULWriteResMX<"WriteVLDUX16", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
515    defm "" : LMULWriteResMX<"WriteVLDOX16", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
516  }
517  let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
518    defm "" : LMULWriteResMX<"WriteVSTS16",  [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
519    defm "" : LMULWriteResMX<"WriteVSTUX16", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
520    defm "" : LMULWriteResMX<"WriteVSTOX16", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
521  }
522}
523foreach mx = ["MF2", "M1", "M2", "M4", "M8"] in {
524  defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c;
525  defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 32>.c;
526  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
527  defm SiFive7 : LMULWriteResMXVariant<"WriteVLDS32",  VLDSX0Pred, [SiFive7VCQ, SiFive7VL],
528                                       4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles),
529                                       [0, 1], [1, !add(1, Cycles)], mx, IsWorstCase>;
530  let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
531    defm "" : LMULWriteResMX<"WriteVLDUX32", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
532    defm "" : LMULWriteResMX<"WriteVLDOX32", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
533  }
534  let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
535    defm "" : LMULWriteResMX<"WriteVSTS32",  [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
536    defm "" : LMULWriteResMX<"WriteVSTUX32", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
537    defm "" : LMULWriteResMX<"WriteVSTOX32", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
538  }
539}
540foreach mx = ["M1", "M2", "M4", "M8"] in {
541  defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c;
542  defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 64>.c;
543  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
544  defm SiFive7 : LMULWriteResMXVariant<"WriteVLDS64",  VLDSX0Pred, [SiFive7VCQ, SiFive7VL],
545                                       4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles),
546                                       [0, 1], [1, !add(1, Cycles)], mx, IsWorstCase>;
547  let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
548    defm "" : LMULWriteResMX<"WriteVLDUX64", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
549    defm "" : LMULWriteResMX<"WriteVLDOX64", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
550  }
551  let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
552    defm "" : LMULWriteResMX<"WriteVSTS64",  [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
553    defm "" : LMULWriteResMX<"WriteVSTUX64", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
554    defm "" : LMULWriteResMX<"WriteVSTOX64", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
555  }
556}
557
558// VLD*R is LMUL aware
559let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 2)] in
560  def : WriteRes<WriteVLD1R,  [SiFive7VCQ, SiFive7VL]>;
561let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 4)] in
562  def : WriteRes<WriteVLD2R,  [SiFive7VCQ, SiFive7VL]>;
563let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 8)] in
564  def : WriteRes<WriteVLD4R,  [SiFive7VCQ, SiFive7VL]>;
565let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 16)] in
566  def : WriteRes<WriteVLD8R,  [SiFive7VCQ, SiFive7VL]>;
567// VST*R is LMUL aware
568let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 2)] in
569  def : WriteRes<WriteVST1R,   [SiFive7VCQ, SiFive7VS]>;
570let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 4)] in
571  def : WriteRes<WriteVST2R,   [SiFive7VCQ, SiFive7VS]>;
572let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 8)] in
573  def : WriteRes<WriteVST4R,   [SiFive7VCQ, SiFive7VS]>;
574let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 16)] in
575  def : WriteRes<WriteVST8R,   [SiFive7VCQ, SiFive7VS]>;
576
577// Segmented Loads and Stores
578// Unit-stride segmented loads and stores are effectively converted into strided
579// segment loads and stores. Strided segment loads and stores operate at up to
580// one segment per cycle if the segment fits within one aligned memory beat.
581// Indexed segment loads and stores operate at the same rate as strided ones,
582// but they stall the machine until all addresses have been generated.
583foreach mx = SchedMxList in {
584  foreach eew = [8, 16, 32, 64] in {
585    defvar Cycles = SiFive7GetCyclesSegmentedSeg2<mx>.c;
586    defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
587    // Does not chain so set latency high
588    let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
589      defm "" : LMULWriteResMX<"WriteVLSEG2e" # eew,   [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
590      defm "" : LMULWriteResMX<"WriteVLSEGFF2e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
591    }
592    let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in
593    defm "" : LMULWriteResMX<"WriteVSSEG2e" # eew,   [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
594    foreach nf=3-8 in {
595      defvar Cycles = SiFive7GetCyclesSegmented<mx, eew, nf>.c;
596      defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
597      // Does not chain so set latency high
598      let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
599        defm "" : LMULWriteResMX<"WriteVLSEG" # nf # "e" # eew,   [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
600        defm "" : LMULWriteResMX<"WriteVLSEGFF" # nf # "e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
601      }
602      let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in
603      defm "" : LMULWriteResMX<"WriteVSSEG" # nf # "e" # eew,   [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
604    }
605  }
606}
607foreach mx = SchedMxList in {
608  foreach nf=2-8 in {
609    foreach eew = [8, 16, 32, 64] in {
610      defvar Cycles = SiFive7GetCyclesSegmented<mx, eew, nf>.c;
611      defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
612      // Does not chain so set latency high
613      let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
614        defm "" : LMULWriteResMX<"WriteVLSSEG" # nf # "e" # eew,  [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
615        defm "" : LMULWriteResMX<"WriteVLUXSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
616        defm "" : LMULWriteResMX<"WriteVLOXSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
617      }
618      let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
619        defm "" : LMULWriteResMX<"WriteVSSSEG" # nf # "e" # eew,  [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
620        defm "" : LMULWriteResMX<"WriteVSUXSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
621        defm "" : LMULWriteResMX<"WriteVSOXSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
622      }
623    }
624  }
625}
626
627// 11. Vector Integer Arithmetic Instructions
628foreach mx = SchedMxList in {
629  defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
630  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
631  let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
632    defm "" : LMULWriteResMX<"WriteVIALUV",     [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
633    defm "" : LMULWriteResMX<"WriteVIALUX",     [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
634    defm "" : LMULWriteResMX<"WriteVIALUI",     [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
635    defm "" : LMULWriteResMX<"WriteVICALUV",    [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
636    defm "" : LMULWriteResMX<"WriteVICALUX",    [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
637    defm "" : LMULWriteResMX<"WriteVICALUI",    [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
638    defm "" : LMULWriteResMX<"WriteVShiftV",    [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
639    defm "" : LMULWriteResMX<"WriteVShiftX",    [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
640    defm "" : LMULWriteResMX<"WriteVShiftI",    [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
641    defm "" : LMULWriteResMX<"WriteVIMinMaxV",  [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
642    defm "" : LMULWriteResMX<"WriteVIMinMaxX",  [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
643    defm "" : LMULWriteResMX<"WriteVIMulV",     [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
644    defm "" : LMULWriteResMX<"WriteVIMulX",     [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
645    defm "" : LMULWriteResMX<"WriteVIMulAddV",  [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
646    defm "" : LMULWriteResMX<"WriteVIMulAddX",  [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
647    defm "" : LMULWriteResMX<"WriteVIMergeV",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
648    defm "" : LMULWriteResMX<"WriteVIMergeX",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
649    defm "" : LMULWriteResMX<"WriteVIMergeI",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
650    defm "" : LMULWriteResMX<"WriteVIMovV",     [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
651    defm "" : LMULWriteResMX<"WriteVIMovX",     [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
652    defm "" : LMULWriteResMX<"WriteVIMovI",     [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
653  }
654  // Mask results can't chain.
655  let Latency = !add(Cycles, 3), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
656    defm "" : LMULWriteResMX<"WriteVICmpV",     [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
657    defm "" : LMULWriteResMX<"WriteVICmpX",     [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
658    defm "" : LMULWriteResMX<"WriteVICmpI",     [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
659  }
660}
661foreach mx = SchedMxList in {
662  defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
663  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
664  let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
665    defm "" : LMULWriteResMX<"WriteVExtV",      [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
666  }
667}
668foreach mx = SchedMxList in {
669  foreach sew = SchedSEWSet<mx>.val in {
670    defvar Cycles = !mul(SiFive7GetDivOrSqrtFactor<sew>.c,
671                         !div(SiFive7GetCyclesOnePerElement<mx, sew>.c, 4));
672    defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
673    let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
674      defm "" : LMULSEWWriteResMXSEW<"WriteVIDivV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
675      defm "" : LMULSEWWriteResMXSEW<"WriteVIDivX", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
676    }
677  }
678}
679
680// Widening
681foreach mx = SchedMxListW in {
682  defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
683  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c;
684  let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
685    defm "" : LMULWriteResMX<"WriteVIWALUV",    [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
686    defm "" : LMULWriteResMX<"WriteVIWALUX",    [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
687    defm "" : LMULWriteResMX<"WriteVIWALUI",    [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
688    defm "" : LMULWriteResMX<"WriteVIWMulV",    [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
689    defm "" : LMULWriteResMX<"WriteVIWMulX",    [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
690    defm "" : LMULWriteResMX<"WriteVIWMulAddV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
691    defm "" : LMULWriteResMX<"WriteVIWMulAddX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
692  }
693}
694// Narrowing
695foreach mx = SchedMxListW in {
696  defvar Cycles = SiFive7GetCyclesNarrowing<mx>.c;
697  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c;
698  let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
699    defm "" : LMULWriteResMX<"WriteVNShiftV",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
700    defm "" : LMULWriteResMX<"WriteVNShiftX",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
701    defm "" : LMULWriteResMX<"WriteVNShiftI",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
702  }
703}
704
705// 12. Vector Fixed-Point Arithmetic Instructions
706foreach mx = SchedMxList in {
707  defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
708  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
709  let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
710    defm "" : LMULWriteResMX<"WriteVSALUV",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
711    defm "" : LMULWriteResMX<"WriteVSALUX",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
712    defm "" : LMULWriteResMX<"WriteVSALUI",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
713    defm "" : LMULWriteResMX<"WriteVAALUV",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
714    defm "" : LMULWriteResMX<"WriteVAALUX",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
715    defm "" : LMULWriteResMX<"WriteVSMulV",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
716    defm "" : LMULWriteResMX<"WriteVSMulX",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
717    defm "" : LMULWriteResMX<"WriteVSShiftV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
718    defm "" : LMULWriteResMX<"WriteVSShiftX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
719    defm "" : LMULWriteResMX<"WriteVSShiftI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
720  }
721}
722// Narrowing
723foreach mx = SchedMxListW in {
724  defvar Cycles = SiFive7GetCyclesNarrowing<mx>.c;
725  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c;
726  let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
727    defm "" : LMULWriteResMX<"WriteVNClipV",  [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
728    defm "" : LMULWriteResMX<"WriteVNClipX",  [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
729    defm "" : LMULWriteResMX<"WriteVNClipI",  [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
730  }
731}
732
733// 13. Vector Floating-Point Instructions
734foreach mx = SchedMxListF in {
735  foreach sew = SchedSEWSet<mx, isF=1>.val in {
736    defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
737    defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListF, isF=1>.c;
738    let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
739      defm "" : LMULSEWWriteResMXSEW<"WriteVFALUV",  [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
740      defm "" : LMULSEWWriteResMXSEW<"WriteVFALUF",  [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
741      defm "" : LMULSEWWriteResMXSEW<"WriteVFMulV",  [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
742      defm "" : LMULSEWWriteResMXSEW<"WriteVFMulF",  [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
743      defm "" : LMULSEWWriteResMXSEW<"WriteVFMulAddV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
744      defm "" : LMULSEWWriteResMXSEW<"WriteVFMulAddF", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
745      defm "" : LMULSEWWriteResMXSEW<"WriteVFRecpV",   [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
746      defm "" : LMULSEWWriteResMXSEW<"WriteVFCvtIToFV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
747    }
748    let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
749      defm "" : LMULSEWWriteResMXSEW<"WriteVFMinMaxV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
750      defm "" : LMULSEWWriteResMXSEW<"WriteVFMinMaxF", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
751      defm "" : LMULSEWWriteResMXSEW<"WriteVFSgnjV",   [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
752      defm "" : LMULSEWWriteResMXSEW<"WriteVFSgnjF",   [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
753    }
754  }
755}
756foreach mx = SchedMxList in {
757  defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
758  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
759  let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
760    defm "" : LMULWriteResMX<"WriteVFCvtFToIV",  [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
761  }
762  let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
763    defm "" : LMULWriteResMX<"WriteVFClassV",    [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
764    defm "" : LMULWriteResMX<"WriteVFMergeV",    [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
765    defm "" : LMULWriteResMX<"WriteVFMovV",      [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
766  }
767  // Mask results can't chain.
768  let Latency = !add(Cycles, 3), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
769    defm "" : LMULWriteResMX<"WriteVFCmpV",      [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
770    defm "" : LMULWriteResMX<"WriteVFCmpF",      [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
771  }
772}
773foreach mx = SchedMxListF in {
774  foreach sew = SchedSEWSet<mx, isF=1>.val in {
775    defvar Cycles = !mul(SiFive7GetDivOrSqrtFactor<sew>.c,
776                         !div(SiFive7GetCyclesOnePerElement<mx, sew>.c, 4));
777    defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListF, 1>.c;
778    let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
779      defm "" : LMULSEWWriteResMXSEW<"WriteVFSqrtV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
780      defm "" : LMULSEWWriteResMXSEW<"WriteVFDivV",  [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
781      defm "" : LMULSEWWriteResMXSEW<"WriteVFDivF",  [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
782    }
783  }
784}
785
786// Widening
787foreach mx = SchedMxListW in {
788  foreach sew = SchedSEWSet<mx, isF=0, isWidening=1>.val in {
789    defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
790    defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListW>.c;
791    let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in
792    defm "" : LMULSEWWriteResMXSEW<"WriteVFWCvtIToFV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
793  }
794}
795foreach mx = SchedMxListFW in {
796  foreach sew = SchedSEWSet<mx, isF=1, isWidening=1>.val in {
797    defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
798    defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListFW, isF=1>.c;
799    let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
800      defm "" : LMULSEWWriteResMXSEW<"WriteVFWALUV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
801      defm "" : LMULSEWWriteResMXSEW<"WriteVFWALUF", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
802      defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
803      defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulF", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
804      defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulAddV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
805      defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulAddF", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
806      defm "" : LMULSEWWriteResMXSEW<"WriteVFWCvtFToFV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
807    }
808  }
809  defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
810  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListFW>.c;
811  let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in
812  defm "" : LMULWriteResMX<"WriteVFWCvtFToIV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
813}
814// Narrowing
815foreach mx = SchedMxListW in {
816  defvar Cycles = SiFive7GetCyclesNarrowing<mx>.c;
817  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c;
818  let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
819    defm "" : LMULWriteResMX<"WriteVFNCvtFToIV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
820  }
821}
822foreach mx = SchedMxListFW in {
823  foreach sew = SchedSEWSet<mx, isF=1, isWidening=1>.val in {
824    defvar Cycles = SiFive7GetCyclesNarrowing<mx>.c;
825    defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListFW, isF=1>.c;
826    let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
827      defm "" : LMULSEWWriteResMXSEW<"WriteVFNCvtIToFV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
828      defm "" : LMULSEWWriteResMXSEW<"WriteVFNCvtFToFV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
829    }
830  }
831}
832
833// 14. Vector Reduction Operations
834foreach mx = SchedMxList in {
835  foreach sew = SchedSEWSet<mx>.val in {
836    defvar Cycles = SiFive7GetReductionCycles<mx, sew>.c;
837    defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
838    let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
839      defm "" : LMULSEWWriteResMXSEW<"WriteVIRedV_From", [SiFive7VCQ, SiFive7VA],
840                                     mx, sew, IsWorstCase>;
841      defm "" : LMULSEWWriteResMXSEW<"WriteVIRedMinMaxV_From", [SiFive7VCQ, SiFive7VA],
842                                     mx, sew, IsWorstCase>;
843    }
844  }
845}
846
847foreach mx = SchedMxListWRed in {
848  foreach sew = SchedSEWSet<mx, 0, 1>.val in {
849    defvar Cycles = SiFive7GetReductionCycles<mx, sew>.c;
850    defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListWRed>.c;
851    let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in
852    defm "" : LMULSEWWriteResMXSEW<"WriteVIWRedV_From", [SiFive7VCQ, SiFive7VA],
853                                   mx, sew, IsWorstCase>;
854  }
855}
856
857foreach mx = SchedMxListF in {
858  foreach sew = SchedSEWSet<mx, 1>.val in {
859    defvar RedCycles = SiFive7GetReductionCycles<mx, sew>.c;
860    defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListF, 1>.c;
861    let Latency = RedCycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, RedCycles)] in {
862      defm "" : LMULSEWWriteResMXSEW<"WriteVFRedV_From", [SiFive7VCQ, SiFive7VA],
863                                     mx, sew, IsWorstCase>;
864      defm "" : LMULSEWWriteResMXSEW<"WriteVFRedMinMaxV_From", [SiFive7VCQ, SiFive7VA],
865                                     mx, sew, IsWorstCase>;
866    }
867    defvar OrdRedCycles = SiFive7GetOrderedReductionCycles<mx, sew>.c;
868    let Latency = OrdRedCycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, OrdRedCycles)] in
869    defm "" : LMULSEWWriteResMXSEW<"WriteVFRedOV_From", [SiFive7VCQ, SiFive7VA],
870                                   mx, sew, IsWorstCase>;
871  }
872}
873
874foreach mx = SchedMxListFWRed in {
875  foreach sew = SchedSEWSet<mx, 1, 1>.val in {
876    defvar RedCycles = SiFive7GetReductionCycles<mx, sew>.c;
877    defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListFWRed, 1>.c;
878    let Latency = RedCycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, RedCycles)] in
879    defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedV_From", [SiFive7VCQ, SiFive7VA],
880                                   mx, sew, IsWorstCase>;
881    defvar OrdRedCycles = SiFive7GetOrderedReductionCycles<mx, sew>.c;
882    let Latency = OrdRedCycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, OrdRedCycles)] in
883    defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedOV_From", [SiFive7VCQ, SiFive7VA],
884                                   mx, sew, IsWorstCase>;
885  }
886}
887
888// 15. Vector Mask Instructions
889foreach mx = SchedMxList in {
890  defvar Cycles = SiFive7GetCyclesVMask<mx>.c;
891  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
892  let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
893    defm "" : LMULWriteResMX<"WriteVMALUV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
894    defm "" : LMULWriteResMX<"WriteVMPopV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
895    defm "" : LMULWriteResMX<"WriteVMFFSV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
896    defm "" : LMULWriteResMX<"WriteVMSFSV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
897  }
898}
899foreach mx = SchedMxList in {
900  defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
901  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
902  let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
903    defm "" : LMULWriteResMX<"WriteVIotaV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
904    defm "" : LMULWriteResMX<"WriteVIdxV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
905  }
906}
907
908// 16. Vector Permutation Instructions
909let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 1)] in {
910  def : WriteRes<WriteVMovSX, [SiFive7VCQ, SiFive7VA]>;
911  def : WriteRes<WriteVMovXS, [SiFive7VCQ, SiFive7VA]>;
912  def : WriteRes<WriteVMovSF, [SiFive7VCQ, SiFive7VA]>;
913  def : WriteRes<WriteVMovFS, [SiFive7VCQ, SiFive7VA]>;
914}
915foreach mx = SchedMxList in {
916  defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
917  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
918  let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
919    defm "" : LMULWriteResMX<"WriteVRGatherVX",    [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
920    defm "" : LMULWriteResMX<"WriteVRGatherVI",    [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
921  }
922}
923
924foreach mx = SchedMxList in {
925  foreach sew = SchedSEWSet<mx>.val in {
926    defvar Cycles = SiFive7GetCyclesOnePerElement<mx, sew>.c;
927    defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
928    let Latency = !add(Cycles, 3), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
929      defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherVV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
930      defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherEI16VV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
931      defm "" : LMULSEWWriteResMXSEW<"WriteVCompressV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
932    }
933  }
934}
935
936foreach mx = SchedMxList in {
937  defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
938  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
939  let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
940    defm "" : LMULWriteResMX<"WriteVSlideUpX",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
941    defm "" : LMULWriteResMX<"WriteVSlideDownX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
942    defm "" : LMULWriteResMX<"WriteVSlideI",     [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
943    defm "" : LMULWriteResMX<"WriteVISlide1X",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
944    defm "" : LMULWriteResMX<"WriteVFSlide1F",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
945  }
946}
947
948// VMov*V is LMUL Aware
949let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 2)] in
950  def : WriteRes<WriteVMov1V,     [SiFive7VCQ, SiFive7VA]>;
951let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 4)] in
952  def : WriteRes<WriteVMov2V,     [SiFive7VCQ, SiFive7VA]>;
953let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 8)] in
954  def : WriteRes<WriteVMov4V,     [SiFive7VCQ, SiFive7VA]>;
955let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 16)] in
956  def : WriteRes<WriteVMov8V,     [SiFive7VCQ, SiFive7VA]>;
957
958// Others
959def : WriteRes<WriteCSR, [SiFive7PipeB]>;
960def : WriteRes<WriteNop, []>;
961let Latency = 3 in
962  def : WriteRes<WriteRdVLENB, [SiFive7PipeB]>;
963
964def : InstRW<[WriteIALU], (instrs COPY)>;
965
966// VCIX
967//
968// In principle we don't know the latency of any VCIX instructions. But instead
969// of taking the default of 1, which can lead to issues [1], we assume that they
970// have a fairly high latency.
971//
972// [1] https://github.com/llvm/llvm-project/issues/83391
973foreach mx = SchedMxList in {
974  defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
975  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
976  let Latency = !mul(Cycles, 10),
977      AcquireAtCycles = [0, 1],
978      ReleaseAtCycles = [1, !add(1, Cycles)] in {
979    defm "" : LMULWriteResMX<"WriteVC_V_I",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
980    defm "" : LMULWriteResMX<"WriteVC_V_X",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
981    defm "" : LMULWriteResMX<"WriteVC_V_IV",  [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
982    defm "" : LMULWriteResMX<"WriteVC_V_VV",  [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
983    defm "" : LMULWriteResMX<"WriteVC_V_XV",  [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
984    defm "" : LMULWriteResMX<"WriteVC_V_IVV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
985    defm "" : LMULWriteResMX<"WriteVC_V_IVW", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
986    defm "" : LMULWriteResMX<"WriteVC_V_VVV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
987    defm "" : LMULWriteResMX<"WriteVC_V_VVW", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
988    defm "" : LMULWriteResMX<"WriteVC_V_XVV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
989    defm "" : LMULWriteResMX<"WriteVC_V_XVW", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
990    foreach f = ["FPR16", "FPR32", "FPR64"] in {
991      defm "" : LMULWriteResMX<"WriteVC_V_" # f # "V",  [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
992      defm "" : LMULWriteResMX<"WriteVC_V_" # f # "VV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
993      defm "" : LMULWriteResMX<"WriteVC_V_" # f # "VW", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
994    }
995    defm "" : LMULWriteResMX<"WriteVC_I",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
996    defm "" : LMULWriteResMX<"WriteVC_X",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
997    defm "" : LMULWriteResMX<"WriteVC_IV",  [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
998    defm "" : LMULWriteResMX<"WriteVC_VV",  [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
999    defm "" : LMULWriteResMX<"WriteVC_XV",  [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
1000    defm "" : LMULWriteResMX<"WriteVC_IVV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
1001    defm "" : LMULWriteResMX<"WriteVC_IVW", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
1002    defm "" : LMULWriteResMX<"WriteVC_VVV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
1003    defm "" : LMULWriteResMX<"WriteVC_VVW", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
1004    defm "" : LMULWriteResMX<"WriteVC_XVV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
1005    defm "" : LMULWriteResMX<"WriteVC_XVW", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
1006    foreach f = ["FPR16", "FPR32", "FPR64"] in {
1007      defm "" : LMULWriteResMX<"WriteVC_" # f # "V",  [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
1008      defm "" : LMULWriteResMX<"WriteVC_" # f # "VV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
1009      defm "" : LMULWriteResMX<"WriteVC_" # f # "VW", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
1010    }
1011  }
1012}
1013
1014//===----------------------------------------------------------------------===//
1015
1016// Bypass and advance
1017def : SiFive7AnyToGPRBypass<ReadJmp>;
1018def : SiFive7AnyToGPRBypass<ReadJalr>;
1019def : ReadAdvance<ReadCSR, 0>;
1020def : SiFive7AnyToGPRBypass<ReadStoreData>;
1021def : ReadAdvance<ReadMemBase, 0>;
1022def : SiFive7AnyToGPRBypass<ReadIALU>;
1023def : SiFive7AnyToGPRBypass<ReadIALU32>;
1024def : SiFive7AnyToGPRBypass<ReadShiftImm>;
1025def : SiFive7AnyToGPRBypass<ReadShiftImm32>;
1026def : SiFive7AnyToGPRBypass<ReadShiftReg>;
1027def : SiFive7AnyToGPRBypass<ReadShiftReg32>;
1028def : ReadAdvance<ReadIDiv, 0>;
1029def : ReadAdvance<ReadIDiv32, 0>;
1030def : ReadAdvance<ReadIRem, 0>;
1031def : ReadAdvance<ReadIRem32, 0>;
1032def : ReadAdvance<ReadIMul, 0>;
1033def : ReadAdvance<ReadIMul32, 0>;
1034def : ReadAdvance<ReadAtomicWA, 0>;
1035def : ReadAdvance<ReadAtomicWD, 0>;
1036def : ReadAdvance<ReadAtomicDA, 0>;
1037def : ReadAdvance<ReadAtomicDD, 0>;
1038def : ReadAdvance<ReadAtomicLDW, 0>;
1039def : ReadAdvance<ReadAtomicLDD, 0>;
1040def : ReadAdvance<ReadAtomicSTW, 0>;
1041def : ReadAdvance<ReadAtomicSTD, 0>;
1042def : ReadAdvance<ReadFStoreData, 0>;
1043def : ReadAdvance<ReadFMemBase, 0>;
1044def : ReadAdvance<ReadFAdd16, 0>;
1045def : ReadAdvance<ReadFAdd32, 0>;
1046def : ReadAdvance<ReadFAdd64, 0>;
1047def : ReadAdvance<ReadFMul16, 0>;
1048def : ReadAdvance<ReadFMA16, 0>;
1049def : ReadAdvance<ReadFMA16Addend, 0>;
1050def : ReadAdvance<ReadFMul32, 0>;
1051def : ReadAdvance<ReadFMul64, 0>;
1052def : ReadAdvance<ReadFMA32, 0>;
1053def : ReadAdvance<ReadFMA32Addend, 0>;
1054def : ReadAdvance<ReadFMA64, 0>;
1055def : ReadAdvance<ReadFMA64Addend, 0>;
1056def : ReadAdvance<ReadFDiv16, 0>;
1057def : ReadAdvance<ReadFDiv32, 0>;
1058def : ReadAdvance<ReadFDiv64, 0>;
1059def : ReadAdvance<ReadFSqrt16, 0>;
1060def : ReadAdvance<ReadFSqrt32, 0>;
1061def : ReadAdvance<ReadFSqrt64, 0>;
1062def : ReadAdvance<ReadFCmp16, 0>;
1063def : ReadAdvance<ReadFCmp32, 0>;
1064def : ReadAdvance<ReadFCmp64, 0>;
1065def : ReadAdvance<ReadFSGNJ16, 0>;
1066def : ReadAdvance<ReadFSGNJ32, 0>;
1067def : ReadAdvance<ReadFSGNJ64, 0>;
1068def : ReadAdvance<ReadFMinMax16, 0>;
1069def : ReadAdvance<ReadFMinMax32, 0>;
1070def : ReadAdvance<ReadFMinMax64, 0>;
1071def : ReadAdvance<ReadFCvtF16ToI32, 0>;
1072def : ReadAdvance<ReadFCvtF16ToI64, 0>;
1073def : ReadAdvance<ReadFCvtF32ToI32, 0>;
1074def : ReadAdvance<ReadFCvtF32ToI64, 0>;
1075def : ReadAdvance<ReadFCvtF64ToI32, 0>;
1076def : ReadAdvance<ReadFCvtF64ToI64, 0>;
1077def : ReadAdvance<ReadFCvtI32ToF16, 0>;
1078def : ReadAdvance<ReadFCvtI32ToF32, 0>;
1079def : ReadAdvance<ReadFCvtI32ToF64, 0>;
1080def : ReadAdvance<ReadFCvtI64ToF16, 0>;
1081def : ReadAdvance<ReadFCvtI64ToF32, 0>;
1082def : ReadAdvance<ReadFCvtI64ToF64, 0>;
1083def : ReadAdvance<ReadFCvtF32ToF64, 0>;
1084def : ReadAdvance<ReadFCvtF64ToF32, 0>;
1085def : ReadAdvance<ReadFCvtF16ToF32, 0>;
1086def : ReadAdvance<ReadFCvtF32ToF16, 0>;
1087def : ReadAdvance<ReadFCvtF16ToF64, 0>;
1088def : ReadAdvance<ReadFCvtF64ToF16, 0>;
1089def : ReadAdvance<ReadFMovF16ToI16, 0>;
1090def : ReadAdvance<ReadFMovI16ToF16, 0>;
1091def : ReadAdvance<ReadFMovF32ToI32, 0>;
1092def : ReadAdvance<ReadFMovI32ToF32, 0>;
1093def : ReadAdvance<ReadFMovF64ToI64, 0>;
1094def : ReadAdvance<ReadFMovI64ToF64, 0>;
1095def : ReadAdvance<ReadFClass16, 0>;
1096def : ReadAdvance<ReadFClass32, 0>;
1097def : ReadAdvance<ReadFClass64, 0>;
1098
1099def : SiFive7AnyToGPRBypass<ReadSFBJmp, 0>;
1100def : SiFive7AnyToGPRBypass<ReadSFBALU, 0>;
1101
1102// Bitmanip
1103def : SiFive7AnyToGPRBypass<ReadRotateImm>;
1104def : SiFive7AnyToGPRBypass<ReadRotateImm32>;
1105def : SiFive7AnyToGPRBypass<ReadRotateReg>;
1106def : SiFive7AnyToGPRBypass<ReadRotateReg32>;
1107def : SiFive7AnyToGPRBypass<ReadCLZ>;
1108def : SiFive7AnyToGPRBypass<ReadCLZ32>;
1109def : SiFive7AnyToGPRBypass<ReadCTZ>;
1110def : SiFive7AnyToGPRBypass<ReadCTZ32>;
1111def : ReadAdvance<ReadCPOP, 0>;
1112def : ReadAdvance<ReadCPOP32, 0>;
1113def : SiFive7AnyToGPRBypass<ReadORCB>;
1114def : SiFive7AnyToGPRBypass<ReadIMinMax>;
1115def : SiFive7AnyToGPRBypass<ReadREV8>;
1116def : SiFive7AnyToGPRBypass<ReadSHXADD>;
1117def : SiFive7AnyToGPRBypass<ReadSHXADD32>;
1118// Single-bit instructions
1119def : SiFive7AnyToGPRBypass<ReadSingleBit>;
1120def : SiFive7AnyToGPRBypass<ReadSingleBitImm>;
1121
1122// 6. Configuration-Setting Instructions
1123def : ReadAdvance<ReadVSETVLI, 2>;
1124def : ReadAdvance<ReadVSETVL, 2>;
1125
1126// 7. Vector Loads and Stores
1127def : ReadAdvance<ReadVLDX, 0>;
1128def : ReadAdvance<ReadVSTX, 0>;
1129defm "" : LMULReadAdvance<"ReadVSTEV", 0>;
1130defm "" : LMULReadAdvance<"ReadVSTM", 0>;
1131def : ReadAdvance<ReadVLDSX, 0>;
1132def : ReadAdvance<ReadVSTSX, 0>;
1133defm "" : LMULReadAdvance<"ReadVSTS8V", 0>;
1134defm "" : LMULReadAdvance<"ReadVSTS16V", 0>;
1135defm "" : LMULReadAdvance<"ReadVSTS32V", 0>;
1136defm "" : LMULReadAdvance<"ReadVSTS64V", 0>;
1137defm "" : LMULReadAdvance<"ReadVLDUXV", 0>;
1138defm "" : LMULReadAdvance<"ReadVLDOXV", 0>;
1139defm "" : LMULReadAdvance<"ReadVSTUX8", 0>;
1140defm "" : LMULReadAdvance<"ReadVSTUX16", 0>;
1141defm "" : LMULReadAdvance<"ReadVSTUX32", 0>;
1142defm "" : LMULReadAdvance<"ReadVSTUX64", 0>;
1143defm "" : LMULReadAdvance<"ReadVSTUXV", 0>;
1144defm "" : LMULReadAdvance<"ReadVSTUX8V", 0>;
1145defm "" : LMULReadAdvance<"ReadVSTUX16V", 0>;
1146defm "" : LMULReadAdvance<"ReadVSTUX32V", 0>;
1147defm "" : LMULReadAdvance<"ReadVSTUX64V", 0>;
1148defm "" : LMULReadAdvance<"ReadVSTOX8", 0>;
1149defm "" : LMULReadAdvance<"ReadVSTOX16", 0>;
1150defm "" : LMULReadAdvance<"ReadVSTOX32", 0>;
1151defm "" : LMULReadAdvance<"ReadVSTOX64", 0>;
1152defm "" : LMULReadAdvance<"ReadVSTOXV", 0>;
1153defm "" : LMULReadAdvance<"ReadVSTOX8V", 0>;
1154defm "" : LMULReadAdvance<"ReadVSTOX16V", 0>;
1155defm "" : LMULReadAdvance<"ReadVSTOX32V", 0>;
1156defm "" : LMULReadAdvance<"ReadVSTOX64V", 0>;
1157// LMUL Aware
1158def : ReadAdvance<ReadVST1R, 0>;
1159def : ReadAdvance<ReadVST2R, 0>;
1160def : ReadAdvance<ReadVST4R, 0>;
1161def : ReadAdvance<ReadVST8R, 0>;
1162
1163// 12. Vector Integer Arithmetic Instructions
1164defm : LMULReadAdvance<"ReadVIALUV", 0>;
1165defm : LMULReadAdvance<"ReadVIALUX", 0>;
1166defm : LMULReadAdvanceW<"ReadVIWALUV", 0>;
1167defm : LMULReadAdvanceW<"ReadVIWALUX", 0>;
1168defm : LMULReadAdvance<"ReadVExtV", 0>;
1169defm : LMULReadAdvance<"ReadVICALUV", 0>;
1170defm : LMULReadAdvance<"ReadVICALUX", 0>;
1171defm : LMULReadAdvance<"ReadVShiftV", 0>;
1172defm : LMULReadAdvance<"ReadVShiftX", 0>;
1173defm : LMULReadAdvanceW<"ReadVNShiftV", 0>;
1174defm : LMULReadAdvanceW<"ReadVNShiftX", 0>;
1175defm : LMULReadAdvance<"ReadVICmpV", 0>;
1176defm : LMULReadAdvance<"ReadVICmpX", 0>;
1177defm : LMULReadAdvance<"ReadVIMinMaxV", 0>;
1178defm : LMULReadAdvance<"ReadVIMinMaxX", 0>;
1179defm : LMULReadAdvance<"ReadVIMulV", 0>;
1180defm : LMULReadAdvance<"ReadVIMulX", 0>;
1181defm : LMULSEWReadAdvance<"ReadVIDivV", 0>;
1182defm : LMULSEWReadAdvance<"ReadVIDivX", 0>;
1183defm : LMULReadAdvanceW<"ReadVIWMulV", 0>;
1184defm : LMULReadAdvanceW<"ReadVIWMulX", 0>;
1185defm : LMULReadAdvance<"ReadVIMulAddV", 0>;
1186defm : LMULReadAdvance<"ReadVIMulAddX", 0>;
1187defm : LMULReadAdvanceW<"ReadVIWMulAddV", 0>;
1188defm : LMULReadAdvanceW<"ReadVIWMulAddX", 0>;
1189defm : LMULReadAdvance<"ReadVIMergeV", 0>;
1190defm : LMULReadAdvance<"ReadVIMergeX", 0>;
1191defm : LMULReadAdvance<"ReadVIMovV", 0>;
1192defm : LMULReadAdvance<"ReadVIMovX", 0>;
1193
1194// 13. Vector Fixed-Point Arithmetic Instructions
1195defm "" : LMULReadAdvance<"ReadVSALUV", 0>;
1196defm "" : LMULReadAdvance<"ReadVSALUX", 0>;
1197defm "" : LMULReadAdvance<"ReadVAALUV", 0>;
1198defm "" : LMULReadAdvance<"ReadVAALUX", 0>;
1199defm "" : LMULReadAdvance<"ReadVSMulV", 0>;
1200defm "" : LMULReadAdvance<"ReadVSMulX", 0>;
1201defm "" : LMULReadAdvance<"ReadVSShiftV", 0>;
1202defm "" : LMULReadAdvance<"ReadVSShiftX", 0>;
1203defm "" : LMULReadAdvanceW<"ReadVNClipV", 0>;
1204defm "" : LMULReadAdvanceW<"ReadVNClipX", 0>;
1205
1206// 14. Vector Floating-Point Instructions
1207defm "" : LMULSEWReadAdvanceF<"ReadVFALUV", 0>;
1208defm "" : LMULSEWReadAdvanceF<"ReadVFALUF", 0>;
1209defm "" : LMULSEWReadAdvanceFW<"ReadVFWALUV", 0>;
1210defm "" : LMULSEWReadAdvanceFW<"ReadVFWALUF", 0>;
1211defm "" : LMULSEWReadAdvanceF<"ReadVFMulV", 0>;
1212defm "" : LMULSEWReadAdvanceF<"ReadVFMulF", 0>;
1213defm "" : LMULSEWReadAdvanceF<"ReadVFDivV", 0>;
1214defm "" : LMULSEWReadAdvanceF<"ReadVFDivF", 0>;
1215defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulV", 0>;
1216defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulF", 0>;
1217defm "" : LMULSEWReadAdvanceF<"ReadVFMulAddV", 0>;
1218defm "" : LMULSEWReadAdvanceF<"ReadVFMulAddF", 0>;
1219defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulAddV", 0>;
1220defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulAddF", 0>;
1221defm "" : LMULSEWReadAdvanceF<"ReadVFSqrtV", 0>;
1222defm "" : LMULSEWReadAdvanceF<"ReadVFRecpV", 0>;
1223defm "" : LMULSEWReadAdvanceF<"ReadVFMinMaxV", 0>;
1224defm "" : LMULSEWReadAdvanceF<"ReadVFMinMaxF", 0>;
1225defm "" : LMULSEWReadAdvanceF<"ReadVFSgnjV", 0>;
1226defm "" : LMULSEWReadAdvanceF<"ReadVFSgnjF", 0>;
1227defm "" : LMULReadAdvance<"ReadVFCmpV", 0>;
1228defm "" : LMULReadAdvance<"ReadVFCmpF", 0>;
1229defm "" : LMULReadAdvance<"ReadVFClassV", 0>;
1230defm "" : LMULReadAdvance<"ReadVFMergeV", 0>;
1231defm "" : LMULReadAdvance<"ReadVFMergeF", 0>;
1232defm "" : LMULReadAdvance<"ReadVFMovF", 0>;
1233defm "" : LMULSEWReadAdvanceF<"ReadVFCvtIToFV", 0>;
1234defm "" : LMULReadAdvance<"ReadVFCvtFToIV", 0>;
1235defm "" : LMULSEWReadAdvanceW<"ReadVFWCvtIToFV", 0>;
1236defm "" : LMULReadAdvanceFW<"ReadVFWCvtFToIV", 0>;
1237defm "" : LMULSEWReadAdvanceFW<"ReadVFWCvtFToFV", 0>;
1238defm "" : LMULSEWReadAdvanceFW<"ReadVFNCvtIToFV", 0>;
1239defm "" : LMULReadAdvanceW<"ReadVFNCvtFToIV", 0>;
1240defm "" : LMULSEWReadAdvanceFW<"ReadVFNCvtFToFV", 0>;
1241
1242// 15. Vector Reduction Operations
1243def : ReadAdvance<ReadVIRedV, 0>;
1244def : ReadAdvance<ReadVIRedV0, 0>;
1245def : ReadAdvance<ReadVIWRedV, 0>;
1246def : ReadAdvance<ReadVIWRedV0, 0>;
1247def : ReadAdvance<ReadVFRedV, 0>;
1248def : ReadAdvance<ReadVFRedV0, 0>;
1249def : ReadAdvance<ReadVFRedOV, 0>;
1250def : ReadAdvance<ReadVFRedOV0, 0>;
1251def : ReadAdvance<ReadVFWRedV, 0>;
1252def : ReadAdvance<ReadVFWRedV0, 0>;
1253def : ReadAdvance<ReadVFWRedOV, 0>;
1254def : ReadAdvance<ReadVFWRedOV0, 0>;
1255
1256// 16. Vector Mask Instructions
1257defm "" : LMULReadAdvance<"ReadVMALUV", 0>;
1258defm "" : LMULReadAdvance<"ReadVMPopV", 0>;
1259defm "" : LMULReadAdvance<"ReadVMFFSV", 0>;
1260defm "" : LMULReadAdvance<"ReadVMSFSV", 0>;
1261defm "" : LMULReadAdvance<"ReadVIotaV", 0>;
1262
1263// 17. Vector Permutation Instructions
1264def : ReadAdvance<ReadVMovXS, 0>;
1265def : ReadAdvance<ReadVMovSX_V, 0>;
1266def : ReadAdvance<ReadVMovSX_X, 0>;
1267def : ReadAdvance<ReadVMovFS, 0>;
1268def : ReadAdvance<ReadVMovSF_V, 0>;
1269def : ReadAdvance<ReadVMovSF_F, 0>;
1270defm "" : LMULReadAdvance<"ReadVISlideV", 0>;
1271defm "" : LMULReadAdvance<"ReadVISlideX", 0>;
1272defm "" : LMULReadAdvance<"ReadVFSlideV", 0>;
1273defm "" : LMULReadAdvance<"ReadVFSlideF", 0>;
1274defm "" : LMULSEWReadAdvance<"ReadVRGatherVV_data", 0>;
1275defm "" : LMULSEWReadAdvance<"ReadVRGatherVV_index", 0>;
1276defm "" : LMULSEWReadAdvance<"ReadVRGatherEI16VV_data", 0>;
1277defm "" : LMULSEWReadAdvance<"ReadVRGatherEI16VV_index", 0>;
1278defm "" : LMULReadAdvance<"ReadVRGatherVX_data", 0>;
1279defm "" : LMULReadAdvance<"ReadVRGatherVX_index", 0>;
1280defm "" : LMULReadAdvance<"ReadVRGatherVI_data", 0>;
1281defm "" : LMULSEWReadAdvance<"ReadVCompressV", 0>;
1282// LMUL Aware
1283def : ReadAdvance<ReadVMov1V, 0>;
1284def : ReadAdvance<ReadVMov2V, 0>;
1285def : ReadAdvance<ReadVMov4V, 0>;
1286def : ReadAdvance<ReadVMov8V, 0>;
1287
1288// Others
1289def : ReadAdvance<ReadVMask, 0>;
1290def : ReadAdvance<ReadVMergeOp_WorstCase, 0>;
1291foreach mx = SchedMxList in {
1292  def : ReadAdvance<!cast<SchedRead>("ReadVMergeOp_" # mx), 0>;
1293  foreach sew = SchedSEWSet<mx>.val in
1294    def : ReadAdvance<!cast<SchedRead>("ReadVMergeOp_" # mx  # "_E" # sew), 0>;
1295}
1296
1297//===----------------------------------------------------------------------===//
1298// Unsupported extensions
1299defm : UnsupportedSchedZabha;
1300defm : UnsupportedSchedZbc;
1301defm : UnsupportedSchedZbkb;
1302defm : UnsupportedSchedZbkx;
1303defm : UnsupportedSchedZfa;
1304defm : UnsupportedSchedZvk;
1305}
1306