xref: /freebsd/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td (revision 67be1e195acfaec99ce4fffeb17111ce085755f7)
1//==- RISCVSchedSiFive7.td - SiFive7 Scheduling Definitions --*- tablegen -*-=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9//===----------------------------------------------------------------------===//
10
11/// c is true if mx has the worst case behavior compared to LMULs in MxList.
12/// On the SiFive7, the worst case LMUL is the Largest LMUL
13/// and the worst case sew is the smallest SEW for that LMUL.
14class SiFive7IsWorstCaseMX<string mx, list<string> MxList> {
15  defvar LLMUL = LargestLMUL<MxList>.r;
16  bit c = !eq(mx, LLMUL);
17}
18
19/// c is true if mx and sew have the worst case behavior compared to LMULs in
20/// MxList. On the SiFive7, the worst case LMUL is the Largest LMUL
21/// and the worst case sew is the smallest SEW for that LMUL.
22class SiFive7IsWorstCaseMXSEW<string mx, int sew, list<string> MxList,
23                               bit isF = 0> {
24  defvar LLMUL = LargestLMUL<MxList>.r;
25  defvar SSEW = SmallestSEW<mx, isF>.r;
26  bit c = !and(!eq(mx, LLMUL), !eq(sew, SSEW));
27}
28
29/// Number of DLEN parts = (LMUL * VLEN) / DLEN.
30/// Since DLEN = VLEN / 2, Num DLEN parts = 2 * LMUL.
31class SiFive7GetCyclesDefault<string mx> {
32  int c = !cond(
33    !eq(mx, "M1") : 2,
34    !eq(mx, "M2") : 4,
35    !eq(mx, "M4") : 8,
36    !eq(mx, "M8") : 16,
37    !eq(mx, "MF2") : 1,
38    !eq(mx, "MF4") : 1,
39    !eq(mx, "MF8") : 1
40  );
41}
42
43class SiFive7GetCyclesNarrowing<string mx> {
44  int c = !cond(
45    !eq(mx, "M1") : 4,
46    !eq(mx, "M2") : 8,
47    !eq(mx, "M4") : 16,
48    !eq(mx, "MF2") : 2,
49    !eq(mx, "MF4") : 1,
50    !eq(mx, "MF8") : 1
51  );
52}
53
54class SiFive7GetCyclesVMask<string mx> {
55  int c = !cond(
56    !eq(mx, "M1") : 1,
57    !eq(mx, "M2") : 1,
58    !eq(mx, "M4") : 1,
59    !eq(mx, "M8") : 2,
60    !eq(mx, "MF2") : 1,
61    !eq(mx, "MF4") : 1,
62    !eq(mx, "MF8") : 1
63  );
64}
65
66/// VLDM and VSTM can't read/write more than 2 DLENs of data.
67/// 2 DLENs when LMUL=8. 1 DLEN for all other DLENs
68class SiFive7GetMaskLoadStoreCycles<string mx> {
69  int c = !cond(
70    !eq(mx, "M8")  : 2,
71    true : 1
72  );
73}
74
75// Cycles for nf=2 segmented loads and stores are calculated using the
76// formula (2 * VLEN * LMUL) / DLEN = 4 * LMUL
77class SiFive7GetCyclesSegmentedSeg2<string mx> {
78  int c = !cond(
79    !eq(mx, "M1") :  4,
80    !eq(mx, "M2") :  8,
81    !eq(mx, "M4") :  16,
82    !eq(mx, "M8") :  32,
83    !eq(mx, "MF2") : 2,
84    !eq(mx, "MF4") : 1,
85    !eq(mx, "MF8") : 1
86  );
87}
88
89// Cycles for segmented loads and stores are calculated using the
90// formula vl * ceil((SEW * nf) / DLEN), where SEW * nf is the segment size.
91class SiFive7GetCyclesSegmented<string mx, int sew, int nf> {
92  defvar VLEN = 512;
93  defvar DLEN = 256;
94  // (VLEN * LMUL) / SEW
95  defvar VLUpperBound  = !cond(
96    !eq(mx, "M1") : !div(VLEN, sew),
97    !eq(mx, "M2") : !div(!mul(VLEN, 2), sew),
98    !eq(mx, "M4") : !div(!mul(VLEN, 4), sew),
99    !eq(mx, "M8") : !div(!mul(VLEN, 8), sew),
100    !eq(mx, "MF2") : !div(!div(VLEN, 2), sew),
101    !eq(mx, "MF4") : !div(!div(VLEN, 4), sew),
102    !eq(mx, "MF8") : !div(!div(VLEN, 8), sew),
103  );
104  // We can calculate ceil(a/b) using (a + b - 1) / b.
105  defvar a = !mul(sew, nf);
106  defvar b = DLEN;
107  int c = !mul(VLUpperBound, !div(!sub(!add(a, b), 1), b));
108}
109
110class SiFive7GetCyclesOnePerElement<string mx, int sew> {
111  // FIXME: On SiFive7, VLEN is 512. Although a user can request the compiler
112  // to use a different VLEN, this model will not make scheduling decisions
113  // based on the user specified VLEN.
114  // c = ceil(VLEN / SEW) * LMUL
115  // Note: c >= 1 since the smallest VLEN is 512 / 8 = 8, and the
116  // largest division performed on VLEN is in MF8 case with division
117  // by 8. Therefore, there is no need to ceil the result.
118  int VLEN = !div(512, sew);
119  int c = !cond(
120    !eq(mx, "M1")  : VLEN,
121    !eq(mx, "M2")  : !mul(VLEN, 2),
122    !eq(mx, "M4")  : !mul(VLEN, 4),
123    !eq(mx, "M8")  : !mul(VLEN, 8),
124    !eq(mx, "MF2") : !div(VLEN, 2),
125    !eq(mx, "MF4") : !div(VLEN, 4),
126    !eq(mx, "MF8") : !div(VLEN, 8)
127  );
128}
129
130class SiFive7GetDivOrSqrtFactor<int sew> {
131  int c = !cond(
132    // TODO: Add SchedSEWSetFP upstream and remove the SEW=8 case.
133    !eq(sew, 8) : 15,
134    !eq(sew, 16) : 15,
135    !eq(sew, 32) : 28,
136    !eq(sew, 64) : 57
137  );
138}
139
140/// Cycles for reductions take approximately VL*SEW/DLEN + 5(4 + log(DLEN/SEW))
141/// cycles.
142class SiFive7GetReductionCycles<string mx, int sew> {
143  // VLUpperBound*SEW/DLEN is equivalent to 2*LMUL since
144  // VLUpperBound=(VLEN*LMUL)/SEW.
145  defvar VLEN = 512;
146  defvar DLEN = !div(VLEN, 2);
147  defvar TwoTimesLMUL = !cond(
148    !eq(mx, "M1") : 2,
149    !eq(mx, "M2") : 4,
150    !eq(mx, "M4") : 8,
151    !eq(mx, "M8") : 16,
152    !eq(mx, "MF2") : 1,
153    !eq(mx, "MF4") : 1,
154    !eq(mx, "MF8") : 1
155  );
156  int c = !add(
157    TwoTimesLMUL,
158    !mul(5, !add(4, !logtwo(!div(DLEN, sew))))
159  );
160}
161
162/// Cycles for ordered reductions take approximatley 6*VL cycles
163class SiFive7GetOrderedReductionCycles<string mx, int sew> {
164  defvar VLEN = 512;
165  // (VLEN * LMUL) / SEW
166  defvar VLUpperBound  = !cond(
167    !eq(mx, "M1") : !div(VLEN, sew),
168    !eq(mx, "M2") : !div(!mul(VLEN, 2), sew),
169    !eq(mx, "M4") : !div(!mul(VLEN, 4), sew),
170    !eq(mx, "M8") : !div(!mul(VLEN, 8), sew),
171    !eq(mx, "MF2") : !div(!div(VLEN, 2), sew),
172    !eq(mx, "MF4") : !div(!div(VLEN, 4), sew),
173    !eq(mx, "MF8") : !div(!div(VLEN, 8), sew),
174  );
175  int c = !mul(6, VLUpperBound);
176}
177
178class SiFive7AnyToGPRBypass<SchedRead read, int cycles = 2>
179    : ReadAdvance<read, cycles, [WriteIALU, WriteIALU32,
180                                 WriteShiftImm, WriteShiftImm32,
181                                 WriteShiftReg, WriteShiftReg32,
182                                 WriteSHXADD, WriteSHXADD32,
183                                 WriteRotateImm, WriteRotateImm32,
184                                 WriteRotateReg, WriteRotateReg32,
185                                 WriteSingleBit, WriteSingleBitImm,
186                                 WriteBEXT, WriteBEXTI,
187                                 WriteCLZ, WriteCLZ32, WriteCTZ, WriteCTZ32,
188                                 WriteCPOP, WriteCPOP32,
189                                 WriteREV8, WriteORCB, WriteSFB,
190                                 WriteIMul, WriteIMul32,
191                                 WriteIDiv, WriteIDiv32,
192                                 WriteLDB, WriteLDH, WriteLDW, WriteLDD]>;
193
194// SiFive7 machine model for scheduling and other instruction cost heuristics.
195def SiFive7Model : SchedMachineModel {
196  let MicroOpBufferSize = 0; // Explicitly set to zero since SiFive7 is in-order.
197  let IssueWidth = 2;        // 2 micro-ops are dispatched per cycle.
198  let LoadLatency = 3;
199  let MispredictPenalty = 3;
200  let CompleteModel = 0;
201  let EnableIntervals = true;
202  let UnsupportedFeatures = [HasStdExtZbkb, HasStdExtZbkc, HasStdExtZbkx,
203                             HasStdExtZcmt, HasStdExtZknd, HasStdExtZkne,
204                             HasStdExtZknh, HasStdExtZksed, HasStdExtZksh,
205                             HasStdExtZkr];
206}
207
208// The SiFive7 microarchitecture has three pipelines: A, B, V.
209// Pipe A can handle memory, integer alu and vector operations.
210// Pipe B can handle integer alu, control flow, integer multiply and divide,
211// and floating point computation.
212// The V pipeline is modeled by the VCQ, VA, VL, and VS resources.
213let SchedModel = SiFive7Model in {
214let BufferSize = 0 in {
215def SiFive7PipeA       : ProcResource<1>;
216def SiFive7PipeB       : ProcResource<1>;
217def SiFive7IDiv        : ProcResource<1>; // Int Division
218def SiFive7FDiv        : ProcResource<1>; // FP Division/Sqrt
219def SiFive7VA          : ProcResource<1>; // Arithmetic sequencer
220def SiFive7VL          : ProcResource<1>; // Load sequencer
221def SiFive7VS          : ProcResource<1>; // Store sequencer
222// The VCQ accepts instructions from the the A Pipe and holds them until the
223// vector unit is ready to dequeue them. The unit dequeues up to one instruction
224// per cycle, in order, as soon as the sequencer for that type of instruction is
225// avaliable. This resource is meant to be used for 1 cycle by all vector
226// instructions, to model that only one vector instruction may be dequed at a
227// time. The actual dequeueing into the sequencer is modeled by the VA, VL, and
228// VS sequencer resources below. Each of them will only accept a single
229// instruction at a time and remain busy for the number of cycles associated
230// with that instruction.
231def SiFive7VCQ         : ProcResource<1>; // Vector Command Queue
232}
233
234def SiFive7PipeAB : ProcResGroup<[SiFive7PipeA, SiFive7PipeB]>;
235
236// Branching
237let Latency = 3 in {
238def : WriteRes<WriteJmp, [SiFive7PipeB]>;
239def : WriteRes<WriteJal, [SiFive7PipeB]>;
240def : WriteRes<WriteJalr, [SiFive7PipeB]>;
241}
242
243//Short forward branch
244def : WriteRes<WriteSFB, [SiFive7PipeA, SiFive7PipeB]> {
245  let Latency = 3;
246  let NumMicroOps = 2;
247}
248
249// Integer arithmetic and logic
250let Latency = 3 in {
251def : WriteRes<WriteIALU, [SiFive7PipeAB]>;
252def : WriteRes<WriteIALU32, [SiFive7PipeAB]>;
253def : WriteRes<WriteShiftImm, [SiFive7PipeAB]>;
254def : WriteRes<WriteShiftImm32, [SiFive7PipeAB]>;
255def : WriteRes<WriteShiftReg, [SiFive7PipeAB]>;
256def : WriteRes<WriteShiftReg32, [SiFive7PipeAB]>;
257}
258
259// Integer multiplication
260let Latency = 3 in {
261def : WriteRes<WriteIMul, [SiFive7PipeB]>;
262def : WriteRes<WriteIMul32, [SiFive7PipeB]>;
263}
264
265// Integer division
266def : WriteRes<WriteIDiv, [SiFive7PipeB, SiFive7IDiv]> {
267  let Latency = 66;
268  let ReleaseAtCycles = [1, 65];
269}
270def : WriteRes<WriteIDiv32,  [SiFive7PipeB, SiFive7IDiv]> {
271  let Latency = 34;
272  let ReleaseAtCycles = [1, 33];
273}
274
275// Bitmanip
276let Latency = 3 in {
277// Rotates are in the late-B ALU.
278def : WriteRes<WriteRotateImm, [SiFive7PipeB]>;
279def : WriteRes<WriteRotateImm32, [SiFive7PipeB]>;
280def : WriteRes<WriteRotateReg, [SiFive7PipeB]>;
281def : WriteRes<WriteRotateReg32, [SiFive7PipeB]>;
282
283// clz[w]/ctz[w] are in the late-B ALU.
284def : WriteRes<WriteCLZ, [SiFive7PipeB]>;
285def : WriteRes<WriteCLZ32, [SiFive7PipeB]>;
286def : WriteRes<WriteCTZ, [SiFive7PipeB]>;
287def : WriteRes<WriteCTZ32, [SiFive7PipeB]>;
288
289// cpop[w] look exactly like multiply.
290def : WriteRes<WriteCPOP, [SiFive7PipeB]>;
291def : WriteRes<WriteCPOP32, [SiFive7PipeB]>;
292
293// orc.b is in the late-B ALU.
294def : WriteRes<WriteORCB, [SiFive7PipeB]>;
295
296// rev8 is in the late-A and late-B ALUs.
297def : WriteRes<WriteREV8, [SiFive7PipeAB]>;
298
299// shNadd[.uw] is on the early-B and late-B ALUs.
300def : WriteRes<WriteSHXADD, [SiFive7PipeB]>;
301def : WriteRes<WriteSHXADD32, [SiFive7PipeB]>;
302}
303
304// Single-bit instructions
305// BEXT[I] instruction is available on all ALUs and the other instructions
306// are only available on the SiFive7B pipe.
307let Latency = 3 in {
308def : WriteRes<WriteSingleBit, [SiFive7PipeB]>;
309def : WriteRes<WriteSingleBitImm, [SiFive7PipeB]>;
310def : WriteRes<WriteBEXT, [SiFive7PipeAB]>;
311def : WriteRes<WriteBEXTI, [SiFive7PipeAB]>;
312}
313
314// Memory
315def : WriteRes<WriteSTB, [SiFive7PipeA]>;
316def : WriteRes<WriteSTH, [SiFive7PipeA]>;
317def : WriteRes<WriteSTW, [SiFive7PipeA]>;
318def : WriteRes<WriteSTD, [SiFive7PipeA]>;
319def : WriteRes<WriteFST16, [SiFive7PipeA]>;
320def : WriteRes<WriteFST32, [SiFive7PipeA]>;
321def : WriteRes<WriteFST64, [SiFive7PipeA]>;
322
323let Latency = 3 in {
324def : WriteRes<WriteLDB, [SiFive7PipeA]>;
325def : WriteRes<WriteLDH, [SiFive7PipeA]>;
326def : WriteRes<WriteLDW, [SiFive7PipeA]>;
327def : WriteRes<WriteLDD, [SiFive7PipeA]>;
328}
329
330let Latency = 2 in {
331def : WriteRes<WriteFLD16, [SiFive7PipeA]>;
332def : WriteRes<WriteFLD32, [SiFive7PipeA]>;
333def : WriteRes<WriteFLD64, [SiFive7PipeA]>;
334}
335
336// Atomic memory
337def : WriteRes<WriteAtomicSTW, [SiFive7PipeA]>;
338def : WriteRes<WriteAtomicSTD, [SiFive7PipeA]>;
339
340let Latency = 3 in {
341def : WriteRes<WriteAtomicW, [SiFive7PipeA]>;
342def : WriteRes<WriteAtomicD, [SiFive7PipeA]>;
343def : WriteRes<WriteAtomicLDW, [SiFive7PipeA]>;
344def : WriteRes<WriteAtomicLDD, [SiFive7PipeA]>;
345}
346
347// Half precision.
348let Latency = 5 in {
349def : WriteRes<WriteFAdd16, [SiFive7PipeB]>;
350def : WriteRes<WriteFMul16, [SiFive7PipeB]>;
351def : WriteRes<WriteFMA16, [SiFive7PipeB]>;
352}
353let Latency = 3 in {
354def : WriteRes<WriteFSGNJ16, [SiFive7PipeB]>;
355def : WriteRes<WriteFMinMax16, [SiFive7PipeB]>;
356}
357
358let Latency = 14, ReleaseAtCycles = [1, 13] in {
359def :  WriteRes<WriteFDiv16, [SiFive7PipeB, SiFive7FDiv]>;
360def :  WriteRes<WriteFSqrt16, [SiFive7PipeB, SiFive7FDiv]>;
361}
362
363// Single precision.
364let Latency = 5 in {
365def : WriteRes<WriteFAdd32, [SiFive7PipeB]>;
366def : WriteRes<WriteFMul32, [SiFive7PipeB]>;
367def : WriteRes<WriteFMA32, [SiFive7PipeB]>;
368}
369let Latency = 3 in {
370def : WriteRes<WriteFSGNJ32, [SiFive7PipeB]>;
371def : WriteRes<WriteFMinMax32, [SiFive7PipeB]>;
372}
373
374def : WriteRes<WriteFDiv32, [SiFive7PipeB, SiFive7FDiv]> { let Latency = 27;
375                                                         let ReleaseAtCycles = [1, 26]; }
376def : WriteRes<WriteFSqrt32, [SiFive7PipeB, SiFive7FDiv]> { let Latency = 27;
377                                                          let ReleaseAtCycles = [1, 26]; }
378
379// Double precision
380let Latency = 7 in {
381def : WriteRes<WriteFAdd64, [SiFive7PipeB]>;
382def : WriteRes<WriteFMul64, [SiFive7PipeB]>;
383def : WriteRes<WriteFMA64, [SiFive7PipeB]>;
384}
385let Latency = 3 in {
386def : WriteRes<WriteFSGNJ64, [SiFive7PipeB]>;
387def : WriteRes<WriteFMinMax64, [SiFive7PipeB]>;
388}
389
390def : WriteRes<WriteFDiv64, [SiFive7PipeB, SiFive7FDiv]> { let Latency = 56;
391                                                         let ReleaseAtCycles = [1, 55]; }
392def : WriteRes<WriteFSqrt64, [SiFive7PipeB, SiFive7FDiv]> { let Latency = 56;
393                                                          let ReleaseAtCycles = [1, 55]; }
394
395// Conversions
396let Latency = 3 in {
397def : WriteRes<WriteFCvtI32ToF16, [SiFive7PipeB]>;
398def : WriteRes<WriteFCvtI32ToF32, [SiFive7PipeB]>;
399def : WriteRes<WriteFCvtI32ToF64, [SiFive7PipeB]>;
400def : WriteRes<WriteFCvtI64ToF16, [SiFive7PipeB]>;
401def : WriteRes<WriteFCvtI64ToF32, [SiFive7PipeB]>;
402def : WriteRes<WriteFCvtI64ToF64, [SiFive7PipeB]>;
403def : WriteRes<WriteFCvtF16ToI32, [SiFive7PipeB]>;
404def : WriteRes<WriteFCvtF16ToI64, [SiFive7PipeB]>;
405def : WriteRes<WriteFCvtF16ToF32, [SiFive7PipeB]>;
406def : WriteRes<WriteFCvtF16ToF64, [SiFive7PipeB]>;
407def : WriteRes<WriteFCvtF32ToI32, [SiFive7PipeB]>;
408def : WriteRes<WriteFCvtF32ToI64, [SiFive7PipeB]>;
409def : WriteRes<WriteFCvtF32ToF16, [SiFive7PipeB]>;
410def : WriteRes<WriteFCvtF32ToF64, [SiFive7PipeB]>;
411def : WriteRes<WriteFCvtF64ToI32, [SiFive7PipeB]>;
412def : WriteRes<WriteFCvtF64ToI64, [SiFive7PipeB]>;
413def : WriteRes<WriteFCvtF64ToF16, [SiFive7PipeB]>;
414def : WriteRes<WriteFCvtF64ToF32, [SiFive7PipeB]>;
415
416def : WriteRes<WriteFClass16, [SiFive7PipeB]>;
417def : WriteRes<WriteFClass32, [SiFive7PipeB]>;
418def : WriteRes<WriteFClass64, [SiFive7PipeB]>;
419def : WriteRes<WriteFCmp16, [SiFive7PipeB]>;
420def : WriteRes<WriteFCmp32, [SiFive7PipeB]>;
421def : WriteRes<WriteFCmp64, [SiFive7PipeB]>;
422def : WriteRes<WriteFMovI16ToF16, [SiFive7PipeB]>;
423def : WriteRes<WriteFMovF16ToI16, [SiFive7PipeB]>;
424def : WriteRes<WriteFMovI32ToF32, [SiFive7PipeB]>;
425def : WriteRes<WriteFMovF32ToI32, [SiFive7PipeB]>;
426def : WriteRes<WriteFMovI64ToF64, [SiFive7PipeB]>;
427def : WriteRes<WriteFMovF64ToI64, [SiFive7PipeB]>;
428}
429
430// 6. Configuration-Setting Instructions
431let Latency = 3 in {
432def : WriteRes<WriteVSETVLI, [SiFive7PipeA]>;
433def : WriteRes<WriteVSETIVLI, [SiFive7PipeA]>;
434def : WriteRes<WriteVSETVL, [SiFive7PipeA]>;
435}
436
437// 7. Vector Loads and Stores
438// Unit-stride loads and stores can operate at the full bandwidth of the memory
439// pipe. The memory pipe is DLEN bits wide on x280.
440foreach mx = SchedMxList in {
441  defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
442  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
443  let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
444    defm "" : LMULWriteResMX<"WriteVLDE",    [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
445    defm "" : LMULWriteResMX<"WriteVLDFF",   [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
446  }
447  let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in
448  defm "" : LMULWriteResMX<"WriteVSTE",    [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
449}
450
451foreach mx = SchedMxList in {
452  defvar Cycles = SiFive7GetMaskLoadStoreCycles<mx>.c;
453  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
454  let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in
455  defm "" : LMULWriteResMX<"WriteVLDM",    [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
456  let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in
457  defm "" : LMULWriteResMX<"WriteVSTM",    [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
458}
459
460// Strided loads and stores operate at one element per cycle and should be
461// scheduled accordingly. Indexed loads and stores operate at one element per
462// cycle, and they stall the machine until all addresses have been generated,
463// so they cannot be scheduled. Indexed and strided loads and stores have LMUL
464// specific suffixes, but since SEW is already encoded in the name of the
465// resource, we do not need to use LMULSEWXXX constructors. However, we do
466// use the SEW from the name to determine the number of Cycles.
467
468// This predicate is true when the rs2 operand of vlse or vsse is x0, false
469// otherwise.
470def VLDSX0Pred : MCSchedPredicate<CheckRegOperand<3, X0>>;
471
472foreach mx = SchedMxList in {
473  defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c;
474  defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 8>.c;
475  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
476  defm SiFive7 : LMULWriteResMXVariant<"WriteVLDS8",  VLDSX0Pred, [SiFive7VCQ, SiFive7VL],
477                                       4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles),
478                                       [0, 1], [1, !add(1, Cycles)], mx, IsWorstCase>;
479  let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
480    defm "" : LMULWriteResMX<"WriteVLDUX8", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
481    defm "" : LMULWriteResMX<"WriteVLDOX8", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
482  }
483  let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
484    defm "" : LMULWriteResMX<"WriteVSTS8",  [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
485    defm "" : LMULWriteResMX<"WriteVSTUX8", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
486    defm "" : LMULWriteResMX<"WriteVSTOX8", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
487  }
488}
489// TODO: The MxLists need to be filtered by EEW. We only need to support
490// LMUL >= SEW_min/ELEN. Here, the smallest EEW prevents us from having MF8
491// since LMUL >= 16/64.
492foreach mx = ["MF4", "MF2", "M1", "M2", "M4", "M8"] in {
493  defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c;
494  defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 16>.c;
495  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
496  defm SiFive7 : LMULWriteResMXVariant<"WriteVLDS16",  VLDSX0Pred, [SiFive7VCQ, SiFive7VL],
497                                       4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles),
498                                       [0, 1], [1, !add(1, Cycles)], mx, IsWorstCase>;
499  let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
500    defm "" : LMULWriteResMX<"WriteVLDUX16", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
501    defm "" : LMULWriteResMX<"WriteVLDOX16", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
502  }
503  let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
504    defm "" : LMULWriteResMX<"WriteVSTS16",  [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
505    defm "" : LMULWriteResMX<"WriteVSTUX16", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
506    defm "" : LMULWriteResMX<"WriteVSTOX16", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
507  }
508}
509foreach mx = ["MF2", "M1", "M2", "M4", "M8"] in {
510  defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c;
511  defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 32>.c;
512  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
513  defm SiFive7 : LMULWriteResMXVariant<"WriteVLDS32",  VLDSX0Pred, [SiFive7VCQ, SiFive7VL],
514                                       4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles),
515                                       [0, 1], [1, !add(1, Cycles)], mx, IsWorstCase>;
516  let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
517    defm "" : LMULWriteResMX<"WriteVLDUX32", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
518    defm "" : LMULWriteResMX<"WriteVLDOX32", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
519  }
520  let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
521    defm "" : LMULWriteResMX<"WriteVSTS32",  [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
522    defm "" : LMULWriteResMX<"WriteVSTUX32", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
523    defm "" : LMULWriteResMX<"WriteVSTOX32", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
524  }
525}
526foreach mx = ["M1", "M2", "M4", "M8"] in {
527  defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c;
528  defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 64>.c;
529  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
530  defm SiFive7 : LMULWriteResMXVariant<"WriteVLDS64",  VLDSX0Pred, [SiFive7VCQ, SiFive7VL],
531                                       4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles),
532                                       [0, 1], [1, !add(1, Cycles)], mx, IsWorstCase>;
533  let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
534    defm "" : LMULWriteResMX<"WriteVLDUX64", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
535    defm "" : LMULWriteResMX<"WriteVLDOX64", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
536  }
537  let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
538    defm "" : LMULWriteResMX<"WriteVSTS64",  [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
539    defm "" : LMULWriteResMX<"WriteVSTUX64", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
540    defm "" : LMULWriteResMX<"WriteVSTOX64", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
541  }
542}
543
544// VLD*R is LMUL aware
545let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 2)] in
546  def : WriteRes<WriteVLD1R,  [SiFive7VCQ, SiFive7VL]>;
547let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 4)] in
548  def : WriteRes<WriteVLD2R,  [SiFive7VCQ, SiFive7VL]>;
549let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 8)] in
550  def : WriteRes<WriteVLD4R,  [SiFive7VCQ, SiFive7VL]>;
551let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 16)] in
552  def : WriteRes<WriteVLD8R,  [SiFive7VCQ, SiFive7VL]>;
553// VST*R is LMUL aware
554let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 2)] in
555  def : WriteRes<WriteVST1R,   [SiFive7VCQ, SiFive7VS]>;
556let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 4)] in
557  def : WriteRes<WriteVST2R,   [SiFive7VCQ, SiFive7VS]>;
558let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 8)] in
559  def : WriteRes<WriteVST4R,   [SiFive7VCQ, SiFive7VS]>;
560let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 16)] in
561  def : WriteRes<WriteVST8R,   [SiFive7VCQ, SiFive7VS]>;
562
563// Segmented Loads and Stores
564// Unit-stride segmented loads and stores are effectively converted into strided
565// segment loads and stores. Strided segment loads and stores operate at up to
566// one segment per cycle if the segment fits within one aligned memory beat.
567// Indexed segment loads and stores operate at the same rate as strided ones,
568// but they stall the machine until all addresses have been generated.
569foreach mx = SchedMxList in {
570  foreach eew = [8, 16, 32, 64] in {
571    defvar Cycles = SiFive7GetCyclesSegmentedSeg2<mx>.c;
572    defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
573    // Does not chain so set latency high
574    let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
575      defm "" : LMULWriteResMX<"WriteVLSEG2e" # eew,   [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
576      defm "" : LMULWriteResMX<"WriteVLSEGFF2e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
577    }
578    let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in
579    defm "" : LMULWriteResMX<"WriteVSSEG2e" # eew,   [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
580    foreach nf=3-8 in {
581      defvar Cycles = SiFive7GetCyclesSegmented<mx, eew, nf>.c;
582      defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
583      // Does not chain so set latency high
584      let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
585        defm "" : LMULWriteResMX<"WriteVLSEG" # nf # "e" # eew,   [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
586        defm "" : LMULWriteResMX<"WriteVLSEGFF" # nf # "e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
587      }
588      let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in
589      defm "" : LMULWriteResMX<"WriteVSSEG" # nf # "e" # eew,   [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
590    }
591  }
592}
593foreach mx = SchedMxList in {
594  foreach nf=2-8 in {
595    foreach eew = [8, 16, 32, 64] in {
596      defvar Cycles = SiFive7GetCyclesSegmented<mx, eew, nf>.c;
597      defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
598      // Does not chain so set latency high
599      let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
600        defm "" : LMULWriteResMX<"WriteVLSSEG" # nf # "e" # eew,  [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
601        defm "" : LMULWriteResMX<"WriteVLUXSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
602        defm "" : LMULWriteResMX<"WriteVLOXSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
603      }
604      let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
605        defm "" : LMULWriteResMX<"WriteVSSSEG" # nf # "e" # eew,  [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
606        defm "" : LMULWriteResMX<"WriteVSUXSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
607        defm "" : LMULWriteResMX<"WriteVSOXSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
608      }
609    }
610  }
611}
612
613// 11. Vector Integer Arithmetic Instructions
614foreach mx = SchedMxList in {
615  defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
616  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
617  let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
618    defm "" : LMULWriteResMX<"WriteVIALUV",     [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
619    defm "" : LMULWriteResMX<"WriteVIALUX",     [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
620    defm "" : LMULWriteResMX<"WriteVIALUI",     [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
621    defm "" : LMULWriteResMX<"WriteVICALUV",    [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
622    defm "" : LMULWriteResMX<"WriteVICALUX",    [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
623    defm "" : LMULWriteResMX<"WriteVICALUI",    [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
624    defm "" : LMULWriteResMX<"WriteVShiftV",    [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
625    defm "" : LMULWriteResMX<"WriteVShiftX",    [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
626    defm "" : LMULWriteResMX<"WriteVShiftI",    [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
627    defm "" : LMULWriteResMX<"WriteVIMinMaxV",  [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
628    defm "" : LMULWriteResMX<"WriteVIMinMaxX",  [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
629    defm "" : LMULWriteResMX<"WriteVIMulV",     [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
630    defm "" : LMULWriteResMX<"WriteVIMulX",     [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
631    defm "" : LMULWriteResMX<"WriteVIMulAddV",  [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
632    defm "" : LMULWriteResMX<"WriteVIMulAddX",  [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
633    defm "" : LMULWriteResMX<"WriteVIMergeV",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
634    defm "" : LMULWriteResMX<"WriteVIMergeX",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
635    defm "" : LMULWriteResMX<"WriteVIMergeI",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
636    defm "" : LMULWriteResMX<"WriteVIMovV",     [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
637    defm "" : LMULWriteResMX<"WriteVIMovX",     [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
638    defm "" : LMULWriteResMX<"WriteVIMovI",     [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
639  }
640  // Mask results can't chain.
641  let Latency = !add(Cycles, 3), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
642    defm "" : LMULWriteResMX<"WriteVICmpV",     [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
643    defm "" : LMULWriteResMX<"WriteVICmpX",     [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
644    defm "" : LMULWriteResMX<"WriteVICmpI",     [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
645  }
646}
647foreach mx = SchedMxList in {
648  defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
649  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
650  let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
651    defm "" : LMULWriteResMX<"WriteVExtV",      [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
652  }
653}
654foreach mx = SchedMxList in {
655  foreach sew = SchedSEWSet<mx>.val in {
656    defvar Cycles = !mul(SiFive7GetDivOrSqrtFactor<sew>.c,
657                         !div(SiFive7GetCyclesOnePerElement<mx, sew>.c, 4));
658    defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
659    let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
660      defm "" : LMULSEWWriteResMXSEW<"WriteVIDivV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
661      defm "" : LMULSEWWriteResMXSEW<"WriteVIDivX", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
662    }
663  }
664}
665
666// Widening
667foreach mx = SchedMxListW in {
668  defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
669  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c;
670  let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
671    defm "" : LMULWriteResMX<"WriteVIWALUV",    [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
672    defm "" : LMULWriteResMX<"WriteVIWALUX",    [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
673    defm "" : LMULWriteResMX<"WriteVIWALUI",    [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
674    defm "" : LMULWriteResMX<"WriteVIWMulV",    [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
675    defm "" : LMULWriteResMX<"WriteVIWMulX",    [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
676    defm "" : LMULWriteResMX<"WriteVIWMulAddV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
677    defm "" : LMULWriteResMX<"WriteVIWMulAddX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
678  }
679}
680// Narrowing
681foreach mx = SchedMxListW in {
682  defvar Cycles = SiFive7GetCyclesNarrowing<mx>.c;
683  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c;
684  let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
685    defm "" : LMULWriteResMX<"WriteVNShiftV",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
686    defm "" : LMULWriteResMX<"WriteVNShiftX",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
687    defm "" : LMULWriteResMX<"WriteVNShiftI",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
688  }
689}
690
691// 12. Vector Fixed-Point Arithmetic Instructions
692foreach mx = SchedMxList in {
693  defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
694  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
695  let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
696    defm "" : LMULWriteResMX<"WriteVSALUV",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
697    defm "" : LMULWriteResMX<"WriteVSALUX",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
698    defm "" : LMULWriteResMX<"WriteVSALUI",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
699    defm "" : LMULWriteResMX<"WriteVAALUV",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
700    defm "" : LMULWriteResMX<"WriteVAALUX",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
701    defm "" : LMULWriteResMX<"WriteVSMulV",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
702    defm "" : LMULWriteResMX<"WriteVSMulX",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
703    defm "" : LMULWriteResMX<"WriteVSShiftV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
704    defm "" : LMULWriteResMX<"WriteVSShiftX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
705    defm "" : LMULWriteResMX<"WriteVSShiftI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
706  }
707}
708// Narrowing
709foreach mx = SchedMxListW in {
710  defvar Cycles = SiFive7GetCyclesNarrowing<mx>.c;
711  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c;
712  let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
713    defm "" : LMULWriteResMX<"WriteVNClipV",  [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
714    defm "" : LMULWriteResMX<"WriteVNClipX",  [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
715    defm "" : LMULWriteResMX<"WriteVNClipI",  [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
716  }
717}
718
719// 13. Vector Floating-Point Instructions
720foreach mx = SchedMxList in {
721  defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
722  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
723  let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
724    defm "" : LMULWriteResMX<"WriteVFALUV",      [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
725    defm "" : LMULWriteResMX<"WriteVFALUF",      [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
726    defm "" : LMULWriteResMX<"WriteVFMulV",      [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
727    defm "" : LMULWriteResMX<"WriteVFMulF",      [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
728    defm "" : LMULWriteResMX<"WriteVFMulAddV",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
729    defm "" : LMULWriteResMX<"WriteVFMulAddF",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
730    defm "" : LMULWriteResMX<"WriteVFRecpV",     [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
731    defm "" : LMULWriteResMX<"WriteVFCvtIToFV",  [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
732    defm "" : LMULWriteResMX<"WriteVFCvtFToIV",  [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
733  }
734  let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
735    defm "" : LMULWriteResMX<"WriteVFSgnjV",     [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
736    defm "" : LMULWriteResMX<"WriteVFSgnjF",     [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
737    defm "" : LMULWriteResMX<"WriteVFMinMaxV",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
738    defm "" : LMULWriteResMX<"WriteVFMinMaxF",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
739    defm "" : LMULWriteResMX<"WriteVFClassV",    [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
740    defm "" : LMULWriteResMX<"WriteVFMergeV",    [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
741    defm "" : LMULWriteResMX<"WriteVFMovV",      [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
742  }
743  // Mask results can't chain.
744  let Latency = !add(Cycles, 3), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
745    defm "" : LMULWriteResMX<"WriteVFCmpV",      [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
746    defm "" : LMULWriteResMX<"WriteVFCmpF",      [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
747  }
748}
749foreach mx = SchedMxListF in {
750  foreach sew = SchedSEWSet<mx, isF=1>.val in {
751    defvar Cycles = !mul(SiFive7GetDivOrSqrtFactor<sew>.c,
752                         !div(SiFive7GetCyclesOnePerElement<mx, sew>.c, 4));
753    defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListF, 1>.c;
754    let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
755      defm "" : LMULSEWWriteResMXSEW<"WriteVFSqrtV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
756      defm "" : LMULSEWWriteResMXSEW<"WriteVFDivV",  [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
757      defm "" : LMULSEWWriteResMXSEW<"WriteVFDivF",  [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
758    }
759  }
760}
761
762// Widening
763foreach mx = SchedMxListW in {
764  defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
765  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c;
766  let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
767    defm "" : LMULWriteResMX<"WriteVFWCvtIToFV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
768  }
769}
770foreach mx = SchedMxListFW in {
771  defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
772  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListFW>.c;
773  let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
774    defm "" : LMULWriteResMX<"WriteVFWALUV",     [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
775    defm "" : LMULWriteResMX<"WriteVFWMulV",     [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
776    defm "" : LMULWriteResMX<"WriteVFWMulAddV",  [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
777    defm "" : LMULWriteResMX<"WriteVFWCvtFToIV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
778    defm "" : LMULWriteResMX<"WriteVFWCvtFToFV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
779    defm "" : LMULWriteResMX<"WriteVFWMulAddF",  [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
780    defm "" : LMULWriteResMX<"WriteVFWMulF",     [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
781    defm "" : LMULWriteResMX<"WriteVFWALUF",     [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
782  }
783}
784// Narrowing
785foreach mx = SchedMxListW in {
786  defvar Cycles = SiFive7GetCyclesNarrowing<mx>.c;
787  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c;
788  let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
789    defm "" : LMULWriteResMX<"WriteVFNCvtFToIV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
790  }
791}
792foreach mx = SchedMxListFW in {
793  defvar Cycles = SiFive7GetCyclesNarrowing<mx>.c;
794  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListFW>.c;
795  let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
796    defm "" : LMULWriteResMX<"WriteVFNCvtIToFV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
797    defm "" : LMULWriteResMX<"WriteVFNCvtFToFV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
798  }
799}
800
801// 14. Vector Reduction Operations
802foreach mx = SchedMxList in {
803  foreach sew = SchedSEWSet<mx>.val in {
804    defvar Cycles = SiFive7GetReductionCycles<mx, sew>.c;
805    defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
806    let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
807      defm "" : LMULSEWWriteResMXSEW<"WriteVIRedV_From", [SiFive7VCQ, SiFive7VA],
808                                     mx, sew, IsWorstCase>;
809      defm "" : LMULSEWWriteResMXSEW<"WriteVIRedMinMaxV_From", [SiFive7VCQ, SiFive7VA],
810                                     mx, sew, IsWorstCase>;
811    }
812  }
813}
814
815foreach mx = SchedMxListWRed in {
816  foreach sew = SchedSEWSet<mx, 0, 1>.val in {
817    defvar Cycles = SiFive7GetReductionCycles<mx, sew>.c;
818    defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListWRed>.c;
819    let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in
820    defm "" : LMULSEWWriteResMXSEW<"WriteVIWRedV_From", [SiFive7VCQ, SiFive7VA],
821                                   mx, sew, IsWorstCase>;
822  }
823}
824
825foreach mx = SchedMxListF in {
826  foreach sew = SchedSEWSet<mx, 1>.val in {
827    defvar RedCycles = SiFive7GetReductionCycles<mx, sew>.c;
828    defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListF, 1>.c;
829    let Latency = RedCycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, RedCycles)] in {
830      defm "" : LMULSEWWriteResMXSEW<"WriteVFRedV_From", [SiFive7VCQ, SiFive7VA],
831                                     mx, sew, IsWorstCase>;
832      defm "" : LMULSEWWriteResMXSEW<"WriteVFRedMinMaxV_From", [SiFive7VCQ, SiFive7VA],
833                                     mx, sew, IsWorstCase>;
834    }
835    defvar OrdRedCycles = SiFive7GetOrderedReductionCycles<mx, sew>.c;
836    let Latency = OrdRedCycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, OrdRedCycles)] in
837    defm "" : LMULSEWWriteResMXSEW<"WriteVFRedOV_From", [SiFive7VCQ, SiFive7VA],
838                                   mx, sew, IsWorstCase>;
839  }
840}
841
842foreach mx = SchedMxListFWRed in {
843  foreach sew = SchedSEWSet<mx, 1, 1>.val in {
844    defvar RedCycles = SiFive7GetReductionCycles<mx, sew>.c;
845    defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListFWRed, 1>.c;
846    let Latency = RedCycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, RedCycles)] in
847    defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedV_From", [SiFive7VCQ, SiFive7VA],
848                                   mx, sew, IsWorstCase>;
849    defvar OrdRedCycles = SiFive7GetOrderedReductionCycles<mx, sew>.c;
850    let Latency = OrdRedCycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, OrdRedCycles)] in
851    defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedOV_From", [SiFive7VCQ, SiFive7VA],
852                                   mx, sew, IsWorstCase>;
853  }
854}
855
856// 15. Vector Mask Instructions
857foreach mx = SchedMxList in {
858  defvar Cycles = SiFive7GetCyclesVMask<mx>.c;
859  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
860  let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
861    defm "" : LMULWriteResMX<"WriteVMALUV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
862    defm "" : LMULWriteResMX<"WriteVMPopV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
863    defm "" : LMULWriteResMX<"WriteVMFFSV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
864    defm "" : LMULWriteResMX<"WriteVMSFSV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
865  }
866}
867foreach mx = SchedMxList in {
868  defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
869  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
870  let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
871    defm "" : LMULWriteResMX<"WriteVMIotV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
872    defm "" : LMULWriteResMX<"WriteVMIdxV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
873  }
874}
875
876// 16. Vector Permutation Instructions
877let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 1)] in {
878  def : WriteRes<WriteVIMovVX, [SiFive7VCQ, SiFive7VA]>;
879  def : WriteRes<WriteVIMovXV, [SiFive7VCQ, SiFive7VA]>;
880  def : WriteRes<WriteVFMovVF, [SiFive7VCQ, SiFive7VA]>;
881  def : WriteRes<WriteVFMovFV, [SiFive7VCQ, SiFive7VA]>;
882}
883foreach mx = SchedMxList in {
884  defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
885  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
886  let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
887    defm "" : LMULWriteResMX<"WriteVRGatherVX",    [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
888    defm "" : LMULWriteResMX<"WriteVRGatherVI",    [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
889  }
890}
891
892foreach mx = SchedMxList in {
893  foreach sew = SchedSEWSet<mx>.val in {
894    defvar Cycles = SiFive7GetCyclesOnePerElement<mx, sew>.c;
895    defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
896    let Latency = !add(Cycles, 3), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
897      defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherVV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
898      defm "" : LMULSEWWriteResMXSEW<"WriteVCompressV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
899    }
900  }
901}
902
903foreach mx = SchedMxList in {
904  defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
905  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
906  let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
907    defm "" : LMULWriteResMX<"WriteVISlideX",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
908    defm "" : LMULWriteResMX<"WriteVISlideI",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
909    defm "" : LMULWriteResMX<"WriteVISlide1X",  [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
910    defm "" : LMULWriteResMX<"WriteVFSlide1F",  [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
911  }
912}
913
914// VMov*V is LMUL Aware
915let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 2)] in
916  def : WriteRes<WriteVMov1V,     [SiFive7VCQ, SiFive7VA]>;
917let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 4)] in
918  def : WriteRes<WriteVMov2V,     [SiFive7VCQ, SiFive7VA]>;
919let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 8)] in
920  def : WriteRes<WriteVMov4V,     [SiFive7VCQ, SiFive7VA]>;
921let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 16)] in
922  def : WriteRes<WriteVMov8V,     [SiFive7VCQ, SiFive7VA]>;
923
924// Others
925def : WriteRes<WriteCSR, [SiFive7PipeB]>;
926def : WriteRes<WriteNop, []>;
927let Latency = 3 in
928  def : WriteRes<WriteRdVLENB, [SiFive7PipeB]>;
929
930def : InstRW<[WriteIALU], (instrs COPY)>;
931
932//===----------------------------------------------------------------------===//
933
934// Bypass and advance
935def : SiFive7AnyToGPRBypass<ReadJmp>;
936def : SiFive7AnyToGPRBypass<ReadJalr>;
937def : ReadAdvance<ReadCSR, 0>;
938def : ReadAdvance<ReadStoreData, 0>;
939def : ReadAdvance<ReadMemBase, 0>;
940def : SiFive7AnyToGPRBypass<ReadIALU>;
941def : SiFive7AnyToGPRBypass<ReadIALU32>;
942def : SiFive7AnyToGPRBypass<ReadShiftImm>;
943def : SiFive7AnyToGPRBypass<ReadShiftImm32>;
944def : SiFive7AnyToGPRBypass<ReadShiftReg>;
945def : SiFive7AnyToGPRBypass<ReadShiftReg32>;
946def : ReadAdvance<ReadIDiv, 0>;
947def : ReadAdvance<ReadIDiv32, 0>;
948def : ReadAdvance<ReadIMul, 0>;
949def : ReadAdvance<ReadIMul32, 0>;
950def : ReadAdvance<ReadAtomicWA, 0>;
951def : ReadAdvance<ReadAtomicWD, 0>;
952def : ReadAdvance<ReadAtomicDA, 0>;
953def : ReadAdvance<ReadAtomicDD, 0>;
954def : ReadAdvance<ReadAtomicLDW, 0>;
955def : ReadAdvance<ReadAtomicLDD, 0>;
956def : ReadAdvance<ReadAtomicSTW, 0>;
957def : ReadAdvance<ReadAtomicSTD, 0>;
958def : ReadAdvance<ReadFStoreData, 0>;
959def : ReadAdvance<ReadFMemBase, 0>;
960def : ReadAdvance<ReadFAdd16, 0>;
961def : ReadAdvance<ReadFAdd32, 0>;
962def : ReadAdvance<ReadFAdd64, 0>;
963def : ReadAdvance<ReadFMul16, 0>;
964def : ReadAdvance<ReadFMA16, 0>;
965def : ReadAdvance<ReadFMA16Addend, 0>;
966def : ReadAdvance<ReadFMul32, 0>;
967def : ReadAdvance<ReadFMul64, 0>;
968def : ReadAdvance<ReadFMA32, 0>;
969def : ReadAdvance<ReadFMA32Addend, 0>;
970def : ReadAdvance<ReadFMA64, 0>;
971def : ReadAdvance<ReadFMA64Addend, 0>;
972def : ReadAdvance<ReadFDiv16, 0>;
973def : ReadAdvance<ReadFDiv32, 0>;
974def : ReadAdvance<ReadFDiv64, 0>;
975def : ReadAdvance<ReadFSqrt16, 0>;
976def : ReadAdvance<ReadFSqrt32, 0>;
977def : ReadAdvance<ReadFSqrt64, 0>;
978def : ReadAdvance<ReadFCmp16, 0>;
979def : ReadAdvance<ReadFCmp32, 0>;
980def : ReadAdvance<ReadFCmp64, 0>;
981def : ReadAdvance<ReadFSGNJ16, 0>;
982def : ReadAdvance<ReadFSGNJ32, 0>;
983def : ReadAdvance<ReadFSGNJ64, 0>;
984def : ReadAdvance<ReadFMinMax16, 0>;
985def : ReadAdvance<ReadFMinMax32, 0>;
986def : ReadAdvance<ReadFMinMax64, 0>;
987def : ReadAdvance<ReadFCvtF16ToI32, 0>;
988def : ReadAdvance<ReadFCvtF16ToI64, 0>;
989def : ReadAdvance<ReadFCvtF32ToI32, 0>;
990def : ReadAdvance<ReadFCvtF32ToI64, 0>;
991def : ReadAdvance<ReadFCvtF64ToI32, 0>;
992def : ReadAdvance<ReadFCvtF64ToI64, 0>;
993def : ReadAdvance<ReadFCvtI32ToF16, 0>;
994def : ReadAdvance<ReadFCvtI32ToF32, 0>;
995def : ReadAdvance<ReadFCvtI32ToF64, 0>;
996def : ReadAdvance<ReadFCvtI64ToF16, 0>;
997def : ReadAdvance<ReadFCvtI64ToF32, 0>;
998def : ReadAdvance<ReadFCvtI64ToF64, 0>;
999def : ReadAdvance<ReadFCvtF32ToF64, 0>;
1000def : ReadAdvance<ReadFCvtF64ToF32, 0>;
1001def : ReadAdvance<ReadFCvtF16ToF32, 0>;
1002def : ReadAdvance<ReadFCvtF32ToF16, 0>;
1003def : ReadAdvance<ReadFCvtF16ToF64, 0>;
1004def : ReadAdvance<ReadFCvtF64ToF16, 0>;
1005def : ReadAdvance<ReadFMovF16ToI16, 0>;
1006def : ReadAdvance<ReadFMovI16ToF16, 0>;
1007def : ReadAdvance<ReadFMovF32ToI32, 0>;
1008def : ReadAdvance<ReadFMovI32ToF32, 0>;
1009def : ReadAdvance<ReadFMovF64ToI64, 0>;
1010def : ReadAdvance<ReadFMovI64ToF64, 0>;
1011def : ReadAdvance<ReadFClass16, 0>;
1012def : ReadAdvance<ReadFClass32, 0>;
1013def : ReadAdvance<ReadFClass64, 0>;
1014
1015def : SiFive7AnyToGPRBypass<ReadSFBJmp, 0>;
1016def : SiFive7AnyToGPRBypass<ReadSFBALU, 0>;
1017
1018// Bitmanip
1019def : SiFive7AnyToGPRBypass<ReadRotateImm>;
1020def : SiFive7AnyToGPRBypass<ReadRotateImm32>;
1021def : SiFive7AnyToGPRBypass<ReadRotateReg>;
1022def : SiFive7AnyToGPRBypass<ReadRotateReg32>;
1023def : SiFive7AnyToGPRBypass<ReadCLZ>;
1024def : SiFive7AnyToGPRBypass<ReadCLZ32>;
1025def : SiFive7AnyToGPRBypass<ReadCTZ>;
1026def : SiFive7AnyToGPRBypass<ReadCTZ32>;
1027def : ReadAdvance<ReadCPOP, 0>;
1028def : ReadAdvance<ReadCPOP32, 0>;
1029def : SiFive7AnyToGPRBypass<ReadORCB>;
1030def : SiFive7AnyToGPRBypass<ReadREV8>;
1031def : SiFive7AnyToGPRBypass<ReadSHXADD>;
1032def : SiFive7AnyToGPRBypass<ReadSHXADD32>;
1033// Single-bit instructions
1034def : SiFive7AnyToGPRBypass<ReadSingleBit>;
1035def : SiFive7AnyToGPRBypass<ReadSingleBitImm>;
1036
1037// 6. Configuration-Setting Instructions
1038def : ReadAdvance<ReadVSETVLI, 2>;
1039def : ReadAdvance<ReadVSETVL, 2>;
1040
1041// 7. Vector Loads and Stores
1042def : ReadAdvance<ReadVLDX, 0>;
1043def : ReadAdvance<ReadVSTX, 0>;
1044defm "" : LMULReadAdvance<"ReadVSTEV", 0>;
1045defm "" : LMULReadAdvance<"ReadVSTM", 0>;
1046def : ReadAdvance<ReadVLDSX, 0>;
1047def : ReadAdvance<ReadVSTSX, 0>;
1048defm "" : LMULReadAdvance<"ReadVSTS8V", 0>;
1049defm "" : LMULReadAdvance<"ReadVSTS16V", 0>;
1050defm "" : LMULReadAdvance<"ReadVSTS32V", 0>;
1051defm "" : LMULReadAdvance<"ReadVSTS64V", 0>;
1052defm "" : LMULReadAdvance<"ReadVLDUXV", 0>;
1053defm "" : LMULReadAdvance<"ReadVLDOXV", 0>;
1054defm "" : LMULReadAdvance<"ReadVSTUX8", 0>;
1055defm "" : LMULReadAdvance<"ReadVSTUX16", 0>;
1056defm "" : LMULReadAdvance<"ReadVSTUX32", 0>;
1057defm "" : LMULReadAdvance<"ReadVSTUX64", 0>;
1058defm "" : LMULReadAdvance<"ReadVSTUXV", 0>;
1059defm "" : LMULReadAdvance<"ReadVSTUX8V", 0>;
1060defm "" : LMULReadAdvance<"ReadVSTUX16V", 0>;
1061defm "" : LMULReadAdvance<"ReadVSTUX32V", 0>;
1062defm "" : LMULReadAdvance<"ReadVSTUX64V", 0>;
1063defm "" : LMULReadAdvance<"ReadVSTOX8", 0>;
1064defm "" : LMULReadAdvance<"ReadVSTOX16", 0>;
1065defm "" : LMULReadAdvance<"ReadVSTOX32", 0>;
1066defm "" : LMULReadAdvance<"ReadVSTOX64", 0>;
1067defm "" : LMULReadAdvance<"ReadVSTOXV", 0>;
1068defm "" : LMULReadAdvance<"ReadVSTOX8V", 0>;
1069defm "" : LMULReadAdvance<"ReadVSTOX16V", 0>;
1070defm "" : LMULReadAdvance<"ReadVSTOX32V", 0>;
1071defm "" : LMULReadAdvance<"ReadVSTOX64V", 0>;
1072// LMUL Aware
1073def : ReadAdvance<ReadVST1R, 0>;
1074def : ReadAdvance<ReadVST2R, 0>;
1075def : ReadAdvance<ReadVST4R, 0>;
1076def : ReadAdvance<ReadVST8R, 0>;
1077
1078// 12. Vector Integer Arithmetic Instructions
1079defm : LMULReadAdvance<"ReadVIALUV", 0>;
1080defm : LMULReadAdvance<"ReadVIALUX", 0>;
1081defm : LMULReadAdvanceW<"ReadVIWALUV", 0>;
1082defm : LMULReadAdvanceW<"ReadVIWALUX", 0>;
1083defm : LMULReadAdvance<"ReadVExtV", 0>;
1084defm : LMULReadAdvance<"ReadVICALUV", 0>;
1085defm : LMULReadAdvance<"ReadVICALUX", 0>;
1086defm : LMULReadAdvance<"ReadVShiftV", 0>;
1087defm : LMULReadAdvance<"ReadVShiftX", 0>;
1088defm : LMULReadAdvanceW<"ReadVNShiftV", 0>;
1089defm : LMULReadAdvanceW<"ReadVNShiftX", 0>;
1090defm : LMULReadAdvance<"ReadVICmpV", 0>;
1091defm : LMULReadAdvance<"ReadVICmpX", 0>;
1092defm : LMULReadAdvance<"ReadVIMinMaxV", 0>;
1093defm : LMULReadAdvance<"ReadVIMinMaxX", 0>;
1094defm : LMULReadAdvance<"ReadVIMulV", 0>;
1095defm : LMULReadAdvance<"ReadVIMulX", 0>;
1096defm : LMULSEWReadAdvance<"ReadVIDivV", 0>;
1097defm : LMULSEWReadAdvance<"ReadVIDivX", 0>;
1098defm : LMULReadAdvanceW<"ReadVIWMulV", 0>;
1099defm : LMULReadAdvanceW<"ReadVIWMulX", 0>;
1100defm : LMULReadAdvance<"ReadVIMulAddV", 0>;
1101defm : LMULReadAdvance<"ReadVIMulAddX", 0>;
1102defm : LMULReadAdvanceW<"ReadVIWMulAddV", 0>;
1103defm : LMULReadAdvanceW<"ReadVIWMulAddX", 0>;
1104defm : LMULReadAdvance<"ReadVIMergeV", 0>;
1105defm : LMULReadAdvance<"ReadVIMergeX", 0>;
1106defm : LMULReadAdvance<"ReadVIMovV", 0>;
1107defm : LMULReadAdvance<"ReadVIMovX", 0>;
1108
1109// 13. Vector Fixed-Point Arithmetic Instructions
1110defm "" : LMULReadAdvance<"ReadVSALUV", 0>;
1111defm "" : LMULReadAdvance<"ReadVSALUX", 0>;
1112defm "" : LMULReadAdvance<"ReadVAALUV", 0>;
1113defm "" : LMULReadAdvance<"ReadVAALUX", 0>;
1114defm "" : LMULReadAdvance<"ReadVSMulV", 0>;
1115defm "" : LMULReadAdvance<"ReadVSMulX", 0>;
1116defm "" : LMULReadAdvance<"ReadVSShiftV", 0>;
1117defm "" : LMULReadAdvance<"ReadVSShiftX", 0>;
1118defm "" : LMULReadAdvanceW<"ReadVNClipV", 0>;
1119defm "" : LMULReadAdvanceW<"ReadVNClipX", 0>;
1120
1121// 14. Vector Floating-Point Instructions
1122defm "" : LMULReadAdvance<"ReadVFALUV", 0>;
1123defm "" : LMULReadAdvance<"ReadVFALUF", 0>;
1124defm "" : LMULReadAdvanceFW<"ReadVFWALUV", 0>;
1125defm "" : LMULReadAdvanceFW<"ReadVFWALUF", 0>;
1126defm "" : LMULReadAdvance<"ReadVFMulV", 0>;
1127defm "" : LMULReadAdvance<"ReadVFMulF", 0>;
1128defm "" : LMULSEWReadAdvanceF<"ReadVFDivV", 0>;
1129defm "" : LMULSEWReadAdvanceF<"ReadVFDivF", 0>;
1130defm "" : LMULReadAdvanceFW<"ReadVFWMulV", 0>;
1131defm "" : LMULReadAdvanceFW<"ReadVFWMulF", 0>;
1132defm "" : LMULReadAdvance<"ReadVFMulAddV", 0>;
1133defm "" : LMULReadAdvance<"ReadVFMulAddF", 0>;
1134defm "" : LMULReadAdvanceFW<"ReadVFWMulAddV", 0>;
1135defm "" : LMULReadAdvanceFW<"ReadVFWMulAddF", 0>;
1136defm "" : LMULSEWReadAdvanceF<"ReadVFSqrtV", 0>;
1137defm "" : LMULReadAdvance<"ReadVFRecpV", 0>;
1138defm "" : LMULReadAdvance<"ReadVFMinMaxV", 0>;
1139defm "" : LMULReadAdvance<"ReadVFMinMaxF", 0>;
1140defm "" : LMULReadAdvance<"ReadVFSgnjV", 0>;
1141defm "" : LMULReadAdvance<"ReadVFSgnjF", 0>;
1142defm "" : LMULReadAdvance<"ReadVFCmpV", 0>;
1143defm "" : LMULReadAdvance<"ReadVFCmpF", 0>;
1144defm "" : LMULReadAdvance<"ReadVFClassV", 0>;
1145defm "" : LMULReadAdvance<"ReadVFMergeV", 0>;
1146defm "" : LMULReadAdvance<"ReadVFMergeF", 0>;
1147defm "" : LMULReadAdvance<"ReadVFMovF", 0>;
1148defm "" : LMULReadAdvance<"ReadVFCvtIToFV", 0>;
1149defm "" : LMULReadAdvance<"ReadVFCvtFToIV", 0>;
1150defm "" : LMULReadAdvanceW<"ReadVFWCvtIToFV", 0>;
1151defm "" : LMULReadAdvanceFW<"ReadVFWCvtFToIV", 0>;
1152defm "" : LMULReadAdvanceFW<"ReadVFWCvtFToFV", 0>;
1153defm "" : LMULReadAdvanceFW<"ReadVFNCvtIToFV", 0>;
1154defm "" : LMULReadAdvanceW<"ReadVFNCvtFToIV", 0>;
1155defm "" : LMULReadAdvanceFW<"ReadVFNCvtFToFV", 0>;
1156
1157// 15. Vector Reduction Operations
1158def : ReadAdvance<ReadVIRedV, 0>;
1159def : ReadAdvance<ReadVIRedV0, 0>;
1160def : ReadAdvance<ReadVIWRedV, 0>;
1161def : ReadAdvance<ReadVIWRedV0, 0>;
1162def : ReadAdvance<ReadVFRedV, 0>;
1163def : ReadAdvance<ReadVFRedV0, 0>;
1164def : ReadAdvance<ReadVFRedOV, 0>;
1165def : ReadAdvance<ReadVFRedOV0, 0>;
1166def : ReadAdvance<ReadVFWRedV, 0>;
1167def : ReadAdvance<ReadVFWRedV0, 0>;
1168def : ReadAdvance<ReadVFWRedOV, 0>;
1169def : ReadAdvance<ReadVFWRedOV0, 0>;
1170
1171// 16. Vector Mask Instructions
1172defm "" : LMULReadAdvance<"ReadVMALUV", 0>;
1173defm "" : LMULReadAdvance<"ReadVMPopV", 0>;
1174defm "" : LMULReadAdvance<"ReadVMFFSV", 0>;
1175defm "" : LMULReadAdvance<"ReadVMSFSV", 0>;
1176defm "" : LMULReadAdvance<"ReadVMIotV", 0>;
1177
1178// 17. Vector Permutation Instructions
1179def : ReadAdvance<ReadVIMovVX, 0>;
1180def : ReadAdvance<ReadVIMovXV, 0>;
1181def : ReadAdvance<ReadVIMovXX, 0>;
1182def : ReadAdvance<ReadVFMovVF, 0>;
1183def : ReadAdvance<ReadVFMovFV, 0>;
1184def : ReadAdvance<ReadVFMovFX, 0>;
1185defm "" : LMULReadAdvance<"ReadVISlideV", 0>;
1186defm "" : LMULReadAdvance<"ReadVISlideX", 0>;
1187defm "" : LMULReadAdvance<"ReadVFSlideV", 0>;
1188defm "" : LMULReadAdvance<"ReadVFSlideF", 0>;
1189defm "" : LMULSEWReadAdvance<"ReadVRGatherVV_data", 0>;
1190defm "" : LMULSEWReadAdvance<"ReadVRGatherVV_index", 0>;
1191defm "" : LMULReadAdvance<"ReadVRGatherVX_data", 0>;
1192defm "" : LMULReadAdvance<"ReadVRGatherVX_index", 0>;
1193defm "" : LMULReadAdvance<"ReadVRGatherVI_data", 0>;
1194defm "" : LMULSEWReadAdvance<"ReadVCompressV", 0>;
1195// LMUL Aware
1196def : ReadAdvance<ReadVMov1V, 0>;
1197def : ReadAdvance<ReadVMov2V, 0>;
1198def : ReadAdvance<ReadVMov4V, 0>;
1199def : ReadAdvance<ReadVMov8V, 0>;
1200
1201// Others
1202def : ReadAdvance<ReadVMask, 0>;
1203def : ReadAdvance<ReadVMergeOp_WorstCase, 0>;
1204foreach mx = SchedMxList in {
1205  def : ReadAdvance<!cast<SchedRead>("ReadVMergeOp_" # mx), 0>;
1206  foreach sew = SchedSEWSet<mx>.val in
1207    def : ReadAdvance<!cast<SchedRead>("ReadVMergeOp_" # mx  # "_E" # sew), 0>;
1208}
1209
1210//===----------------------------------------------------------------------===//
1211// Unsupported extensions
1212defm : UnsupportedSchedZbc;
1213defm : UnsupportedSchedZbkb;
1214defm : UnsupportedSchedZbkx;
1215defm : UnsupportedSchedZfa;
1216}
1217