1//==- RISCVSchedSiFive7.td - SiFive7 Scheduling Definitions --*- tablegen -*-=// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9//===----------------------------------------------------------------------===// 10 11/// c is true if mx has the worst case behavior compared to LMULs in MxList. 12/// On the SiFive7, the worst case LMUL is the Largest LMUL 13/// and the worst case sew is the smallest SEW for that LMUL. 14class SiFive7IsWorstCaseMX<string mx, list<string> MxList> { 15 defvar LLMUL = LargestLMUL<MxList>.r; 16 bit c = !eq(mx, LLMUL); 17} 18 19/// c is true if mx and sew have the worst case behavior compared to LMULs in 20/// MxList. On the SiFive7, the worst case LMUL is the Largest LMUL 21/// and the worst case sew is the smallest SEW for that LMUL. 22class SiFive7IsWorstCaseMXSEW<string mx, int sew, list<string> MxList, 23 bit isF = 0> { 24 defvar LLMUL = LargestLMUL<MxList>.r; 25 defvar SSEW = SmallestSEW<mx, isF>.r; 26 bit c = !and(!eq(mx, LLMUL), !eq(sew, SSEW)); 27} 28 29/// Number of DLEN parts = (LMUL * VLEN) / DLEN. 30/// Since DLEN = VLEN / 2, Num DLEN parts = 2 * LMUL. 31class SiFive7GetCyclesDefault<string mx> { 32 int c = !cond( 33 !eq(mx, "M1") : 2, 34 !eq(mx, "M2") : 4, 35 !eq(mx, "M4") : 8, 36 !eq(mx, "M8") : 16, 37 !eq(mx, "MF2") : 1, 38 !eq(mx, "MF4") : 1, 39 !eq(mx, "MF8") : 1 40 ); 41} 42 43class SiFive7GetCyclesNarrowing<string mx> { 44 int c = !cond( 45 !eq(mx, "M1") : 4, 46 !eq(mx, "M2") : 8, 47 !eq(mx, "M4") : 16, 48 !eq(mx, "MF2") : 2, 49 !eq(mx, "MF4") : 1, 50 !eq(mx, "MF8") : 1 51 ); 52} 53 54class SiFive7GetCyclesVMask<string mx> { 55 int c = !cond( 56 !eq(mx, "M1") : 1, 57 !eq(mx, "M2") : 1, 58 !eq(mx, "M4") : 1, 59 !eq(mx, "M8") : 2, 60 !eq(mx, "MF2") : 1, 61 !eq(mx, "MF4") : 1, 62 !eq(mx, "MF8") : 1 63 ); 64} 65 66/// VLDM and VSTM can't read/write more than 2 DLENs of data. 67/// 2 DLENs when LMUL=8. 1 DLEN for all other DLENs 68class SiFive7GetMaskLoadStoreCycles<string mx> { 69 int c = !cond( 70 !eq(mx, "M8") : 2, 71 true : 1 72 ); 73} 74 75// Cycles for nf=2 segmented loads and stores are calculated using the 76// formula (2 * VLEN * LMUL) / DLEN = 4 * LMUL 77class SiFive7GetCyclesSegmentedSeg2<string mx> { 78 int c = !cond( 79 !eq(mx, "M1") : 4, 80 !eq(mx, "M2") : 8, 81 !eq(mx, "M4") : 16, 82 !eq(mx, "M8") : 32, 83 !eq(mx, "MF2") : 2, 84 !eq(mx, "MF4") : 1, 85 !eq(mx, "MF8") : 1 86 ); 87} 88 89// Cycles for segmented loads and stores are calculated using the 90// formula vl * ceil((SEW * nf) / DLEN), where SEW * nf is the segment size. 91class SiFive7GetCyclesSegmented<string mx, int sew, int nf> { 92 defvar VLEN = 512; 93 defvar DLEN = 256; 94 // (VLEN * LMUL) / SEW 95 defvar VLUpperBound = !cond( 96 !eq(mx, "M1") : !div(VLEN, sew), 97 !eq(mx, "M2") : !div(!mul(VLEN, 2), sew), 98 !eq(mx, "M4") : !div(!mul(VLEN, 4), sew), 99 !eq(mx, "M8") : !div(!mul(VLEN, 8), sew), 100 !eq(mx, "MF2") : !div(!div(VLEN, 2), sew), 101 !eq(mx, "MF4") : !div(!div(VLEN, 4), sew), 102 !eq(mx, "MF8") : !div(!div(VLEN, 8), sew), 103 ); 104 // We can calculate ceil(a/b) using (a + b - 1) / b. 105 defvar a = !mul(sew, nf); 106 defvar b = DLEN; 107 int c = !mul(VLUpperBound, !div(!sub(!add(a, b), 1), b)); 108} 109 110class SiFive7GetCyclesOnePerElement<string mx, int sew> { 111 // FIXME: On SiFive7, VLEN is 512. Although a user can request the compiler 112 // to use a different VLEN, this model will not make scheduling decisions 113 // based on the user specified VLEN. 114 // c = ceil(VLEN / SEW) * LMUL 115 // Note: c >= 1 since the smallest VLEN is 512 / 8 = 8, and the 116 // largest division performed on VLEN is in MF8 case with division 117 // by 8. Therefore, there is no need to ceil the result. 118 int VLEN = !div(512, sew); 119 int c = !cond( 120 !eq(mx, "M1") : VLEN, 121 !eq(mx, "M2") : !mul(VLEN, 2), 122 !eq(mx, "M4") : !mul(VLEN, 4), 123 !eq(mx, "M8") : !mul(VLEN, 8), 124 !eq(mx, "MF2") : !div(VLEN, 2), 125 !eq(mx, "MF4") : !div(VLEN, 4), 126 !eq(mx, "MF8") : !div(VLEN, 8) 127 ); 128} 129 130class SiFive7GetDivOrSqrtFactor<int sew> { 131 int c = !cond( 132 // TODO: Add SchedSEWSetFP upstream and remove the SEW=8 case. 133 !eq(sew, 8) : 15, 134 !eq(sew, 16) : 15, 135 !eq(sew, 32) : 28, 136 !eq(sew, 64) : 57 137 ); 138} 139 140/// Cycles for reductions take approximately VL*SEW/DLEN + 5(4 + log(DLEN/SEW)) 141/// cycles. 142class SiFive7GetReductionCycles<string mx, int sew> { 143 // VLUpperBound*SEW/DLEN is equivalent to 2*LMUL since 144 // VLUpperBound=(VLEN*LMUL)/SEW. 145 defvar VLEN = 512; 146 defvar DLEN = !div(VLEN, 2); 147 defvar TwoTimesLMUL = !cond( 148 !eq(mx, "M1") : 2, 149 !eq(mx, "M2") : 4, 150 !eq(mx, "M4") : 8, 151 !eq(mx, "M8") : 16, 152 !eq(mx, "MF2") : 1, 153 !eq(mx, "MF4") : 1, 154 !eq(mx, "MF8") : 1 155 ); 156 int c = !add( 157 TwoTimesLMUL, 158 !mul(5, !add(4, !logtwo(!div(DLEN, sew)))) 159 ); 160} 161 162/// Cycles for ordered reductions take approximatley 6*VL cycles 163class SiFive7GetOrderedReductionCycles<string mx, int sew> { 164 defvar VLEN = 512; 165 // (VLEN * LMUL) / SEW 166 defvar VLUpperBound = !cond( 167 !eq(mx, "M1") : !div(VLEN, sew), 168 !eq(mx, "M2") : !div(!mul(VLEN, 2), sew), 169 !eq(mx, "M4") : !div(!mul(VLEN, 4), sew), 170 !eq(mx, "M8") : !div(!mul(VLEN, 8), sew), 171 !eq(mx, "MF2") : !div(!div(VLEN, 2), sew), 172 !eq(mx, "MF4") : !div(!div(VLEN, 4), sew), 173 !eq(mx, "MF8") : !div(!div(VLEN, 8), sew), 174 ); 175 int c = !mul(6, VLUpperBound); 176} 177 178class SiFive7AnyToGPRBypass<SchedRead read, int cycles = 2> 179 : ReadAdvance<read, cycles, [WriteIALU, WriteIALU32, 180 WriteShiftImm, WriteShiftImm32, 181 WriteShiftReg, WriteShiftReg32, 182 WriteSHXADD, WriteSHXADD32, 183 WriteRotateImm, WriteRotateImm32, 184 WriteRotateReg, WriteRotateReg32, 185 WriteSingleBit, WriteSingleBitImm, 186 WriteBEXT, WriteBEXTI, 187 WriteCLZ, WriteCLZ32, WriteCTZ, WriteCTZ32, 188 WriteCPOP, WriteCPOP32, 189 WriteREV8, WriteORCB, WriteSFB, 190 WriteIMul, WriteIMul32, 191 WriteIDiv, WriteIDiv32, 192 WriteLDB, WriteLDH, WriteLDW, WriteLDD]>; 193 194// SiFive7 machine model for scheduling and other instruction cost heuristics. 195def SiFive7Model : SchedMachineModel { 196 let MicroOpBufferSize = 0; // Explicitly set to zero since SiFive7 is in-order. 197 let IssueWidth = 2; // 2 micro-ops are dispatched per cycle. 198 let LoadLatency = 3; 199 let MispredictPenalty = 3; 200 let CompleteModel = 0; 201 let EnableIntervals = true; 202 let UnsupportedFeatures = [HasStdExtZbkb, HasStdExtZbkc, HasStdExtZbkx, 203 HasStdExtZcmt, HasStdExtZknd, HasStdExtZkne, 204 HasStdExtZknh, HasStdExtZksed, HasStdExtZksh, 205 HasStdExtZkr]; 206} 207 208// The SiFive7 microarchitecture has three pipelines: A, B, V. 209// Pipe A can handle memory, integer alu and vector operations. 210// Pipe B can handle integer alu, control flow, integer multiply and divide, 211// and floating point computation. 212// The V pipeline is modeled by the VCQ, VA, VL, and VS resources. 213let SchedModel = SiFive7Model in { 214let BufferSize = 0 in { 215def SiFive7PipeA : ProcResource<1>; 216def SiFive7PipeB : ProcResource<1>; 217def SiFive7IDiv : ProcResource<1>; // Int Division 218def SiFive7FDiv : ProcResource<1>; // FP Division/Sqrt 219def SiFive7VA : ProcResource<1>; // Arithmetic sequencer 220def SiFive7VL : ProcResource<1>; // Load sequencer 221def SiFive7VS : ProcResource<1>; // Store sequencer 222// The VCQ accepts instructions from the the A Pipe and holds them until the 223// vector unit is ready to dequeue them. The unit dequeues up to one instruction 224// per cycle, in order, as soon as the sequencer for that type of instruction is 225// avaliable. This resource is meant to be used for 1 cycle by all vector 226// instructions, to model that only one vector instruction may be dequed at a 227// time. The actual dequeueing into the sequencer is modeled by the VA, VL, and 228// VS sequencer resources below. Each of them will only accept a single 229// instruction at a time and remain busy for the number of cycles associated 230// with that instruction. 231def SiFive7VCQ : ProcResource<1>; // Vector Command Queue 232} 233 234def SiFive7PipeAB : ProcResGroup<[SiFive7PipeA, SiFive7PipeB]>; 235 236// Branching 237let Latency = 3 in { 238def : WriteRes<WriteJmp, [SiFive7PipeB]>; 239def : WriteRes<WriteJal, [SiFive7PipeB]>; 240def : WriteRes<WriteJalr, [SiFive7PipeB]>; 241} 242 243//Short forward branch 244def : WriteRes<WriteSFB, [SiFive7PipeA, SiFive7PipeB]> { 245 let Latency = 3; 246 let NumMicroOps = 2; 247} 248 249// Integer arithmetic and logic 250let Latency = 3 in { 251def : WriteRes<WriteIALU, [SiFive7PipeAB]>; 252def : WriteRes<WriteIALU32, [SiFive7PipeAB]>; 253def : WriteRes<WriteShiftImm, [SiFive7PipeAB]>; 254def : WriteRes<WriteShiftImm32, [SiFive7PipeAB]>; 255def : WriteRes<WriteShiftReg, [SiFive7PipeAB]>; 256def : WriteRes<WriteShiftReg32, [SiFive7PipeAB]>; 257} 258 259// Integer multiplication 260let Latency = 3 in { 261def : WriteRes<WriteIMul, [SiFive7PipeB]>; 262def : WriteRes<WriteIMul32, [SiFive7PipeB]>; 263} 264 265// Integer division 266def : WriteRes<WriteIDiv, [SiFive7PipeB, SiFive7IDiv]> { 267 let Latency = 66; 268 let ReleaseAtCycles = [1, 65]; 269} 270def : WriteRes<WriteIDiv32, [SiFive7PipeB, SiFive7IDiv]> { 271 let Latency = 34; 272 let ReleaseAtCycles = [1, 33]; 273} 274 275// Bitmanip 276let Latency = 3 in { 277// Rotates are in the late-B ALU. 278def : WriteRes<WriteRotateImm, [SiFive7PipeB]>; 279def : WriteRes<WriteRotateImm32, [SiFive7PipeB]>; 280def : WriteRes<WriteRotateReg, [SiFive7PipeB]>; 281def : WriteRes<WriteRotateReg32, [SiFive7PipeB]>; 282 283// clz[w]/ctz[w] are in the late-B ALU. 284def : WriteRes<WriteCLZ, [SiFive7PipeB]>; 285def : WriteRes<WriteCLZ32, [SiFive7PipeB]>; 286def : WriteRes<WriteCTZ, [SiFive7PipeB]>; 287def : WriteRes<WriteCTZ32, [SiFive7PipeB]>; 288 289// cpop[w] look exactly like multiply. 290def : WriteRes<WriteCPOP, [SiFive7PipeB]>; 291def : WriteRes<WriteCPOP32, [SiFive7PipeB]>; 292 293// orc.b is in the late-B ALU. 294def : WriteRes<WriteORCB, [SiFive7PipeB]>; 295 296// rev8 is in the late-A and late-B ALUs. 297def : WriteRes<WriteREV8, [SiFive7PipeAB]>; 298 299// shNadd[.uw] is on the early-B and late-B ALUs. 300def : WriteRes<WriteSHXADD, [SiFive7PipeB]>; 301def : WriteRes<WriteSHXADD32, [SiFive7PipeB]>; 302} 303 304// Single-bit instructions 305// BEXT[I] instruction is available on all ALUs and the other instructions 306// are only available on the SiFive7B pipe. 307let Latency = 3 in { 308def : WriteRes<WriteSingleBit, [SiFive7PipeB]>; 309def : WriteRes<WriteSingleBitImm, [SiFive7PipeB]>; 310def : WriteRes<WriteBEXT, [SiFive7PipeAB]>; 311def : WriteRes<WriteBEXTI, [SiFive7PipeAB]>; 312} 313 314// Memory 315def : WriteRes<WriteSTB, [SiFive7PipeA]>; 316def : WriteRes<WriteSTH, [SiFive7PipeA]>; 317def : WriteRes<WriteSTW, [SiFive7PipeA]>; 318def : WriteRes<WriteSTD, [SiFive7PipeA]>; 319def : WriteRes<WriteFST16, [SiFive7PipeA]>; 320def : WriteRes<WriteFST32, [SiFive7PipeA]>; 321def : WriteRes<WriteFST64, [SiFive7PipeA]>; 322 323let Latency = 3 in { 324def : WriteRes<WriteLDB, [SiFive7PipeA]>; 325def : WriteRes<WriteLDH, [SiFive7PipeA]>; 326def : WriteRes<WriteLDW, [SiFive7PipeA]>; 327def : WriteRes<WriteLDD, [SiFive7PipeA]>; 328} 329 330let Latency = 2 in { 331def : WriteRes<WriteFLD16, [SiFive7PipeA]>; 332def : WriteRes<WriteFLD32, [SiFive7PipeA]>; 333def : WriteRes<WriteFLD64, [SiFive7PipeA]>; 334} 335 336// Atomic memory 337def : WriteRes<WriteAtomicSTW, [SiFive7PipeA]>; 338def : WriteRes<WriteAtomicSTD, [SiFive7PipeA]>; 339 340let Latency = 3 in { 341def : WriteRes<WriteAtomicW, [SiFive7PipeA]>; 342def : WriteRes<WriteAtomicD, [SiFive7PipeA]>; 343def : WriteRes<WriteAtomicLDW, [SiFive7PipeA]>; 344def : WriteRes<WriteAtomicLDD, [SiFive7PipeA]>; 345} 346 347// Half precision. 348let Latency = 5 in { 349def : WriteRes<WriteFAdd16, [SiFive7PipeB]>; 350def : WriteRes<WriteFMul16, [SiFive7PipeB]>; 351def : WriteRes<WriteFMA16, [SiFive7PipeB]>; 352} 353let Latency = 3 in { 354def : WriteRes<WriteFSGNJ16, [SiFive7PipeB]>; 355def : WriteRes<WriteFMinMax16, [SiFive7PipeB]>; 356} 357 358let Latency = 14, ReleaseAtCycles = [1, 13] in { 359def : WriteRes<WriteFDiv16, [SiFive7PipeB, SiFive7FDiv]>; 360def : WriteRes<WriteFSqrt16, [SiFive7PipeB, SiFive7FDiv]>; 361} 362 363// Single precision. 364let Latency = 5 in { 365def : WriteRes<WriteFAdd32, [SiFive7PipeB]>; 366def : WriteRes<WriteFMul32, [SiFive7PipeB]>; 367def : WriteRes<WriteFMA32, [SiFive7PipeB]>; 368} 369let Latency = 3 in { 370def : WriteRes<WriteFSGNJ32, [SiFive7PipeB]>; 371def : WriteRes<WriteFMinMax32, [SiFive7PipeB]>; 372} 373 374def : WriteRes<WriteFDiv32, [SiFive7PipeB, SiFive7FDiv]> { let Latency = 27; 375 let ReleaseAtCycles = [1, 26]; } 376def : WriteRes<WriteFSqrt32, [SiFive7PipeB, SiFive7FDiv]> { let Latency = 27; 377 let ReleaseAtCycles = [1, 26]; } 378 379// Double precision 380let Latency = 7 in { 381def : WriteRes<WriteFAdd64, [SiFive7PipeB]>; 382def : WriteRes<WriteFMul64, [SiFive7PipeB]>; 383def : WriteRes<WriteFMA64, [SiFive7PipeB]>; 384} 385let Latency = 3 in { 386def : WriteRes<WriteFSGNJ64, [SiFive7PipeB]>; 387def : WriteRes<WriteFMinMax64, [SiFive7PipeB]>; 388} 389 390def : WriteRes<WriteFDiv64, [SiFive7PipeB, SiFive7FDiv]> { let Latency = 56; 391 let ReleaseAtCycles = [1, 55]; } 392def : WriteRes<WriteFSqrt64, [SiFive7PipeB, SiFive7FDiv]> { let Latency = 56; 393 let ReleaseAtCycles = [1, 55]; } 394 395// Conversions 396let Latency = 3 in { 397def : WriteRes<WriteFCvtI32ToF16, [SiFive7PipeB]>; 398def : WriteRes<WriteFCvtI32ToF32, [SiFive7PipeB]>; 399def : WriteRes<WriteFCvtI32ToF64, [SiFive7PipeB]>; 400def : WriteRes<WriteFCvtI64ToF16, [SiFive7PipeB]>; 401def : WriteRes<WriteFCvtI64ToF32, [SiFive7PipeB]>; 402def : WriteRes<WriteFCvtI64ToF64, [SiFive7PipeB]>; 403def : WriteRes<WriteFCvtF16ToI32, [SiFive7PipeB]>; 404def : WriteRes<WriteFCvtF16ToI64, [SiFive7PipeB]>; 405def : WriteRes<WriteFCvtF16ToF32, [SiFive7PipeB]>; 406def : WriteRes<WriteFCvtF16ToF64, [SiFive7PipeB]>; 407def : WriteRes<WriteFCvtF32ToI32, [SiFive7PipeB]>; 408def : WriteRes<WriteFCvtF32ToI64, [SiFive7PipeB]>; 409def : WriteRes<WriteFCvtF32ToF16, [SiFive7PipeB]>; 410def : WriteRes<WriteFCvtF32ToF64, [SiFive7PipeB]>; 411def : WriteRes<WriteFCvtF64ToI32, [SiFive7PipeB]>; 412def : WriteRes<WriteFCvtF64ToI64, [SiFive7PipeB]>; 413def : WriteRes<WriteFCvtF64ToF16, [SiFive7PipeB]>; 414def : WriteRes<WriteFCvtF64ToF32, [SiFive7PipeB]>; 415 416def : WriteRes<WriteFClass16, [SiFive7PipeB]>; 417def : WriteRes<WriteFClass32, [SiFive7PipeB]>; 418def : WriteRes<WriteFClass64, [SiFive7PipeB]>; 419def : WriteRes<WriteFCmp16, [SiFive7PipeB]>; 420def : WriteRes<WriteFCmp32, [SiFive7PipeB]>; 421def : WriteRes<WriteFCmp64, [SiFive7PipeB]>; 422def : WriteRes<WriteFMovI16ToF16, [SiFive7PipeB]>; 423def : WriteRes<WriteFMovF16ToI16, [SiFive7PipeB]>; 424def : WriteRes<WriteFMovI32ToF32, [SiFive7PipeB]>; 425def : WriteRes<WriteFMovF32ToI32, [SiFive7PipeB]>; 426def : WriteRes<WriteFMovI64ToF64, [SiFive7PipeB]>; 427def : WriteRes<WriteFMovF64ToI64, [SiFive7PipeB]>; 428} 429 430// 6. Configuration-Setting Instructions 431let Latency = 3 in { 432def : WriteRes<WriteVSETVLI, [SiFive7PipeA]>; 433def : WriteRes<WriteVSETIVLI, [SiFive7PipeA]>; 434def : WriteRes<WriteVSETVL, [SiFive7PipeA]>; 435} 436 437// 7. Vector Loads and Stores 438// Unit-stride loads and stores can operate at the full bandwidth of the memory 439// pipe. The memory pipe is DLEN bits wide on x280. 440foreach mx = SchedMxList in { 441 defvar Cycles = SiFive7GetCyclesDefault<mx>.c; 442 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 443 let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 444 defm "" : LMULWriteResMX<"WriteVLDE", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; 445 defm "" : LMULWriteResMX<"WriteVLDFF", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; 446 } 447 let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in 448 defm "" : LMULWriteResMX<"WriteVSTE", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; 449} 450 451foreach mx = SchedMxList in { 452 defvar Cycles = SiFive7GetMaskLoadStoreCycles<mx>.c; 453 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 454 let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in 455 defm "" : LMULWriteResMX<"WriteVLDM", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; 456 let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in 457 defm "" : LMULWriteResMX<"WriteVSTM", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; 458} 459 460// Strided loads and stores operate at one element per cycle and should be 461// scheduled accordingly. Indexed loads and stores operate at one element per 462// cycle, and they stall the machine until all addresses have been generated, 463// so they cannot be scheduled. Indexed and strided loads and stores have LMUL 464// specific suffixes, but since SEW is already encoded in the name of the 465// resource, we do not need to use LMULSEWXXX constructors. However, we do 466// use the SEW from the name to determine the number of Cycles. 467 468// This predicate is true when the rs2 operand of vlse or vsse is x0, false 469// otherwise. 470def VLDSX0Pred : MCSchedPredicate<CheckRegOperand<3, X0>>; 471 472foreach mx = SchedMxList in { 473 defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c; 474 defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 8>.c; 475 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 476 defm SiFive7 : LMULWriteResMXVariant<"WriteVLDS8", VLDSX0Pred, [SiFive7VCQ, SiFive7VL], 477 4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles), 478 [0, 1], [1, !add(1, Cycles)], mx, IsWorstCase>; 479 let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 480 defm "" : LMULWriteResMX<"WriteVLDUX8", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; 481 defm "" : LMULWriteResMX<"WriteVLDOX8", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; 482 } 483 let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 484 defm "" : LMULWriteResMX<"WriteVSTS8", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; 485 defm "" : LMULWriteResMX<"WriteVSTUX8", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; 486 defm "" : LMULWriteResMX<"WriteVSTOX8", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; 487 } 488} 489// TODO: The MxLists need to be filtered by EEW. We only need to support 490// LMUL >= SEW_min/ELEN. Here, the smallest EEW prevents us from having MF8 491// since LMUL >= 16/64. 492foreach mx = ["MF4", "MF2", "M1", "M2", "M4", "M8"] in { 493 defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c; 494 defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 16>.c; 495 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 496 defm SiFive7 : LMULWriteResMXVariant<"WriteVLDS16", VLDSX0Pred, [SiFive7VCQ, SiFive7VL], 497 4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles), 498 [0, 1], [1, !add(1, Cycles)], mx, IsWorstCase>; 499 let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 500 defm "" : LMULWriteResMX<"WriteVLDUX16", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; 501 defm "" : LMULWriteResMX<"WriteVLDOX16", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; 502 } 503 let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 504 defm "" : LMULWriteResMX<"WriteVSTS16", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; 505 defm "" : LMULWriteResMX<"WriteVSTUX16", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; 506 defm "" : LMULWriteResMX<"WriteVSTOX16", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; 507 } 508} 509foreach mx = ["MF2", "M1", "M2", "M4", "M8"] in { 510 defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c; 511 defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 32>.c; 512 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 513 defm SiFive7 : LMULWriteResMXVariant<"WriteVLDS32", VLDSX0Pred, [SiFive7VCQ, SiFive7VL], 514 4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles), 515 [0, 1], [1, !add(1, Cycles)], mx, IsWorstCase>; 516 let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 517 defm "" : LMULWriteResMX<"WriteVLDUX32", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; 518 defm "" : LMULWriteResMX<"WriteVLDOX32", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; 519 } 520 let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 521 defm "" : LMULWriteResMX<"WriteVSTS32", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; 522 defm "" : LMULWriteResMX<"WriteVSTUX32", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; 523 defm "" : LMULWriteResMX<"WriteVSTOX32", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; 524 } 525} 526foreach mx = ["M1", "M2", "M4", "M8"] in { 527 defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c; 528 defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 64>.c; 529 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 530 defm SiFive7 : LMULWriteResMXVariant<"WriteVLDS64", VLDSX0Pred, [SiFive7VCQ, SiFive7VL], 531 4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles), 532 [0, 1], [1, !add(1, Cycles)], mx, IsWorstCase>; 533 let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 534 defm "" : LMULWriteResMX<"WriteVLDUX64", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; 535 defm "" : LMULWriteResMX<"WriteVLDOX64", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; 536 } 537 let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 538 defm "" : LMULWriteResMX<"WriteVSTS64", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; 539 defm "" : LMULWriteResMX<"WriteVSTUX64", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; 540 defm "" : LMULWriteResMX<"WriteVSTOX64", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; 541 } 542} 543 544// VLD*R is LMUL aware 545let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 2)] in 546 def : WriteRes<WriteVLD1R, [SiFive7VCQ, SiFive7VL]>; 547let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 4)] in 548 def : WriteRes<WriteVLD2R, [SiFive7VCQ, SiFive7VL]>; 549let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 8)] in 550 def : WriteRes<WriteVLD4R, [SiFive7VCQ, SiFive7VL]>; 551let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 16)] in 552 def : WriteRes<WriteVLD8R, [SiFive7VCQ, SiFive7VL]>; 553// VST*R is LMUL aware 554let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 2)] in 555 def : WriteRes<WriteVST1R, [SiFive7VCQ, SiFive7VS]>; 556let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 4)] in 557 def : WriteRes<WriteVST2R, [SiFive7VCQ, SiFive7VS]>; 558let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 8)] in 559 def : WriteRes<WriteVST4R, [SiFive7VCQ, SiFive7VS]>; 560let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 16)] in 561 def : WriteRes<WriteVST8R, [SiFive7VCQ, SiFive7VS]>; 562 563// Segmented Loads and Stores 564// Unit-stride segmented loads and stores are effectively converted into strided 565// segment loads and stores. Strided segment loads and stores operate at up to 566// one segment per cycle if the segment fits within one aligned memory beat. 567// Indexed segment loads and stores operate at the same rate as strided ones, 568// but they stall the machine until all addresses have been generated. 569foreach mx = SchedMxList in { 570 foreach eew = [8, 16, 32, 64] in { 571 defvar Cycles = SiFive7GetCyclesSegmentedSeg2<mx>.c; 572 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 573 // Does not chain so set latency high 574 let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 575 defm "" : LMULWriteResMX<"WriteVLSEG2e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; 576 defm "" : LMULWriteResMX<"WriteVLSEGFF2e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; 577 } 578 let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in 579 defm "" : LMULWriteResMX<"WriteVSSEG2e" # eew, [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; 580 foreach nf=3-8 in { 581 defvar Cycles = SiFive7GetCyclesSegmented<mx, eew, nf>.c; 582 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 583 // Does not chain so set latency high 584 let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 585 defm "" : LMULWriteResMX<"WriteVLSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; 586 defm "" : LMULWriteResMX<"WriteVLSEGFF" # nf # "e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; 587 } 588 let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in 589 defm "" : LMULWriteResMX<"WriteVSSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; 590 } 591 } 592} 593foreach mx = SchedMxList in { 594 foreach nf=2-8 in { 595 foreach eew = [8, 16, 32, 64] in { 596 defvar Cycles = SiFive7GetCyclesSegmented<mx, eew, nf>.c; 597 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 598 // Does not chain so set latency high 599 let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 600 defm "" : LMULWriteResMX<"WriteVLSSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; 601 defm "" : LMULWriteResMX<"WriteVLUXSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; 602 defm "" : LMULWriteResMX<"WriteVLOXSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; 603 } 604 let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 605 defm "" : LMULWriteResMX<"WriteVSSSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; 606 defm "" : LMULWriteResMX<"WriteVSUXSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; 607 defm "" : LMULWriteResMX<"WriteVSOXSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; 608 } 609 } 610 } 611} 612 613// 11. Vector Integer Arithmetic Instructions 614foreach mx = SchedMxList in { 615 defvar Cycles = SiFive7GetCyclesDefault<mx>.c; 616 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 617 let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 618 defm "" : LMULWriteResMX<"WriteVIALUV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 619 defm "" : LMULWriteResMX<"WriteVIALUX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 620 defm "" : LMULWriteResMX<"WriteVIALUI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 621 defm "" : LMULWriteResMX<"WriteVICALUV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 622 defm "" : LMULWriteResMX<"WriteVICALUX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 623 defm "" : LMULWriteResMX<"WriteVICALUI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 624 defm "" : LMULWriteResMX<"WriteVShiftV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 625 defm "" : LMULWriteResMX<"WriteVShiftX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 626 defm "" : LMULWriteResMX<"WriteVShiftI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 627 defm "" : LMULWriteResMX<"WriteVIMinMaxV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 628 defm "" : LMULWriteResMX<"WriteVIMinMaxX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 629 defm "" : LMULWriteResMX<"WriteVIMulV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 630 defm "" : LMULWriteResMX<"WriteVIMulX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 631 defm "" : LMULWriteResMX<"WriteVIMulAddV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 632 defm "" : LMULWriteResMX<"WriteVIMulAddX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 633 defm "" : LMULWriteResMX<"WriteVIMergeV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 634 defm "" : LMULWriteResMX<"WriteVIMergeX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 635 defm "" : LMULWriteResMX<"WriteVIMergeI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 636 defm "" : LMULWriteResMX<"WriteVIMovV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 637 defm "" : LMULWriteResMX<"WriteVIMovX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 638 defm "" : LMULWriteResMX<"WriteVIMovI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 639 } 640 // Mask results can't chain. 641 let Latency = !add(Cycles, 3), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 642 defm "" : LMULWriteResMX<"WriteVICmpV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 643 defm "" : LMULWriteResMX<"WriteVICmpX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 644 defm "" : LMULWriteResMX<"WriteVICmpI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 645 } 646} 647foreach mx = SchedMxList in { 648 defvar Cycles = SiFive7GetCyclesDefault<mx>.c; 649 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 650 let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 651 defm "" : LMULWriteResMX<"WriteVExtV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 652 } 653} 654foreach mx = SchedMxList in { 655 foreach sew = SchedSEWSet<mx>.val in { 656 defvar Cycles = !mul(SiFive7GetDivOrSqrtFactor<sew>.c, 657 !div(SiFive7GetCyclesOnePerElement<mx, sew>.c, 4)); 658 defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxList>.c; 659 let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 660 defm "" : LMULSEWWriteResMXSEW<"WriteVIDivV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; 661 defm "" : LMULSEWWriteResMXSEW<"WriteVIDivX", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; 662 } 663 } 664} 665 666// Widening 667foreach mx = SchedMxListW in { 668 defvar Cycles = SiFive7GetCyclesDefault<mx>.c; 669 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c; 670 let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 671 defm "" : LMULWriteResMX<"WriteVIWALUV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 672 defm "" : LMULWriteResMX<"WriteVIWALUX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 673 defm "" : LMULWriteResMX<"WriteVIWALUI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 674 defm "" : LMULWriteResMX<"WriteVIWMulV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 675 defm "" : LMULWriteResMX<"WriteVIWMulX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 676 defm "" : LMULWriteResMX<"WriteVIWMulAddV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 677 defm "" : LMULWriteResMX<"WriteVIWMulAddX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 678 } 679} 680// Narrowing 681foreach mx = SchedMxListW in { 682 defvar Cycles = SiFive7GetCyclesNarrowing<mx>.c; 683 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c; 684 let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 685 defm "" : LMULWriteResMX<"WriteVNShiftV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 686 defm "" : LMULWriteResMX<"WriteVNShiftX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 687 defm "" : LMULWriteResMX<"WriteVNShiftI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 688 } 689} 690 691// 12. Vector Fixed-Point Arithmetic Instructions 692foreach mx = SchedMxList in { 693 defvar Cycles = SiFive7GetCyclesDefault<mx>.c; 694 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 695 let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 696 defm "" : LMULWriteResMX<"WriteVSALUV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 697 defm "" : LMULWriteResMX<"WriteVSALUX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 698 defm "" : LMULWriteResMX<"WriteVSALUI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 699 defm "" : LMULWriteResMX<"WriteVAALUV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 700 defm "" : LMULWriteResMX<"WriteVAALUX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 701 defm "" : LMULWriteResMX<"WriteVSMulV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 702 defm "" : LMULWriteResMX<"WriteVSMulX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 703 defm "" : LMULWriteResMX<"WriteVSShiftV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 704 defm "" : LMULWriteResMX<"WriteVSShiftX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 705 defm "" : LMULWriteResMX<"WriteVSShiftI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 706 } 707} 708// Narrowing 709foreach mx = SchedMxListW in { 710 defvar Cycles = SiFive7GetCyclesNarrowing<mx>.c; 711 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c; 712 let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 713 defm "" : LMULWriteResMX<"WriteVNClipV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 714 defm "" : LMULWriteResMX<"WriteVNClipX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 715 defm "" : LMULWriteResMX<"WriteVNClipI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 716 } 717} 718 719// 13. Vector Floating-Point Instructions 720foreach mx = SchedMxList in { 721 defvar Cycles = SiFive7GetCyclesDefault<mx>.c; 722 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 723 let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 724 defm "" : LMULWriteResMX<"WriteVFALUV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 725 defm "" : LMULWriteResMX<"WriteVFALUF", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 726 defm "" : LMULWriteResMX<"WriteVFMulV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 727 defm "" : LMULWriteResMX<"WriteVFMulF", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 728 defm "" : LMULWriteResMX<"WriteVFMulAddV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 729 defm "" : LMULWriteResMX<"WriteVFMulAddF", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 730 defm "" : LMULWriteResMX<"WriteVFRecpV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 731 defm "" : LMULWriteResMX<"WriteVFCvtIToFV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 732 defm "" : LMULWriteResMX<"WriteVFCvtFToIV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 733 } 734 let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 735 defm "" : LMULWriteResMX<"WriteVFSgnjV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 736 defm "" : LMULWriteResMX<"WriteVFSgnjF", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 737 defm "" : LMULWriteResMX<"WriteVFMinMaxV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 738 defm "" : LMULWriteResMX<"WriteVFMinMaxF", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 739 defm "" : LMULWriteResMX<"WriteVFClassV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 740 defm "" : LMULWriteResMX<"WriteVFMergeV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 741 defm "" : LMULWriteResMX<"WriteVFMovV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 742 } 743 // Mask results can't chain. 744 let Latency = !add(Cycles, 3), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 745 defm "" : LMULWriteResMX<"WriteVFCmpV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 746 defm "" : LMULWriteResMX<"WriteVFCmpF", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 747 } 748} 749foreach mx = SchedMxListF in { 750 foreach sew = SchedSEWSet<mx, isF=1>.val in { 751 defvar Cycles = !mul(SiFive7GetDivOrSqrtFactor<sew>.c, 752 !div(SiFive7GetCyclesOnePerElement<mx, sew>.c, 4)); 753 defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListF, 1>.c; 754 let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 755 defm "" : LMULSEWWriteResMXSEW<"WriteVFSqrtV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; 756 defm "" : LMULSEWWriteResMXSEW<"WriteVFDivV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; 757 defm "" : LMULSEWWriteResMXSEW<"WriteVFDivF", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; 758 } 759 } 760} 761 762// Widening 763foreach mx = SchedMxListW in { 764 defvar Cycles = SiFive7GetCyclesDefault<mx>.c; 765 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c; 766 let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 767 defm "" : LMULWriteResMX<"WriteVFWCvtIToFV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 768 } 769} 770foreach mx = SchedMxListFW in { 771 defvar Cycles = SiFive7GetCyclesDefault<mx>.c; 772 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListFW>.c; 773 let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 774 defm "" : LMULWriteResMX<"WriteVFWALUV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 775 defm "" : LMULWriteResMX<"WriteVFWMulV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 776 defm "" : LMULWriteResMX<"WriteVFWMulAddV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 777 defm "" : LMULWriteResMX<"WriteVFWCvtFToIV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 778 defm "" : LMULWriteResMX<"WriteVFWCvtFToFV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 779 defm "" : LMULWriteResMX<"WriteVFWMulAddF", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 780 defm "" : LMULWriteResMX<"WriteVFWMulF", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 781 defm "" : LMULWriteResMX<"WriteVFWALUF", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 782 } 783} 784// Narrowing 785foreach mx = SchedMxListW in { 786 defvar Cycles = SiFive7GetCyclesNarrowing<mx>.c; 787 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c; 788 let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 789 defm "" : LMULWriteResMX<"WriteVFNCvtFToIV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 790 } 791} 792foreach mx = SchedMxListFW in { 793 defvar Cycles = SiFive7GetCyclesNarrowing<mx>.c; 794 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListFW>.c; 795 let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 796 defm "" : LMULWriteResMX<"WriteVFNCvtIToFV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 797 defm "" : LMULWriteResMX<"WriteVFNCvtFToFV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 798 } 799} 800 801// 14. Vector Reduction Operations 802foreach mx = SchedMxList in { 803 foreach sew = SchedSEWSet<mx>.val in { 804 defvar Cycles = SiFive7GetReductionCycles<mx, sew>.c; 805 defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxList>.c; 806 let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 807 defm "" : LMULSEWWriteResMXSEW<"WriteVIRedV_From", [SiFive7VCQ, SiFive7VA], 808 mx, sew, IsWorstCase>; 809 defm "" : LMULSEWWriteResMXSEW<"WriteVIRedMinMaxV_From", [SiFive7VCQ, SiFive7VA], 810 mx, sew, IsWorstCase>; 811 } 812 } 813} 814 815foreach mx = SchedMxListWRed in { 816 foreach sew = SchedSEWSet<mx, 0, 1>.val in { 817 defvar Cycles = SiFive7GetReductionCycles<mx, sew>.c; 818 defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListWRed>.c; 819 let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in 820 defm "" : LMULSEWWriteResMXSEW<"WriteVIWRedV_From", [SiFive7VCQ, SiFive7VA], 821 mx, sew, IsWorstCase>; 822 } 823} 824 825foreach mx = SchedMxListF in { 826 foreach sew = SchedSEWSet<mx, 1>.val in { 827 defvar RedCycles = SiFive7GetReductionCycles<mx, sew>.c; 828 defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListF, 1>.c; 829 let Latency = RedCycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, RedCycles)] in { 830 defm "" : LMULSEWWriteResMXSEW<"WriteVFRedV_From", [SiFive7VCQ, SiFive7VA], 831 mx, sew, IsWorstCase>; 832 defm "" : LMULSEWWriteResMXSEW<"WriteVFRedMinMaxV_From", [SiFive7VCQ, SiFive7VA], 833 mx, sew, IsWorstCase>; 834 } 835 defvar OrdRedCycles = SiFive7GetOrderedReductionCycles<mx, sew>.c; 836 let Latency = OrdRedCycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, OrdRedCycles)] in 837 defm "" : LMULSEWWriteResMXSEW<"WriteVFRedOV_From", [SiFive7VCQ, SiFive7VA], 838 mx, sew, IsWorstCase>; 839 } 840} 841 842foreach mx = SchedMxListFWRed in { 843 foreach sew = SchedSEWSet<mx, 1, 1>.val in { 844 defvar RedCycles = SiFive7GetReductionCycles<mx, sew>.c; 845 defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListFWRed, 1>.c; 846 let Latency = RedCycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, RedCycles)] in 847 defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedV_From", [SiFive7VCQ, SiFive7VA], 848 mx, sew, IsWorstCase>; 849 defvar OrdRedCycles = SiFive7GetOrderedReductionCycles<mx, sew>.c; 850 let Latency = OrdRedCycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, OrdRedCycles)] in 851 defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedOV_From", [SiFive7VCQ, SiFive7VA], 852 mx, sew, IsWorstCase>; 853 } 854} 855 856// 15. Vector Mask Instructions 857foreach mx = SchedMxList in { 858 defvar Cycles = SiFive7GetCyclesVMask<mx>.c; 859 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 860 let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 861 defm "" : LMULWriteResMX<"WriteVMALUV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 862 defm "" : LMULWriteResMX<"WriteVMPopV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 863 defm "" : LMULWriteResMX<"WriteVMFFSV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 864 defm "" : LMULWriteResMX<"WriteVMSFSV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 865 } 866} 867foreach mx = SchedMxList in { 868 defvar Cycles = SiFive7GetCyclesDefault<mx>.c; 869 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 870 let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 871 defm "" : LMULWriteResMX<"WriteVMIotV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 872 defm "" : LMULWriteResMX<"WriteVMIdxV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 873 } 874} 875 876// 16. Vector Permutation Instructions 877let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 1)] in { 878 def : WriteRes<WriteVIMovVX, [SiFive7VCQ, SiFive7VA]>; 879 def : WriteRes<WriteVIMovXV, [SiFive7VCQ, SiFive7VA]>; 880 def : WriteRes<WriteVFMovVF, [SiFive7VCQ, SiFive7VA]>; 881 def : WriteRes<WriteVFMovFV, [SiFive7VCQ, SiFive7VA]>; 882} 883foreach mx = SchedMxList in { 884 defvar Cycles = SiFive7GetCyclesDefault<mx>.c; 885 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 886 let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 887 defm "" : LMULWriteResMX<"WriteVRGatherVX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 888 defm "" : LMULWriteResMX<"WriteVRGatherVI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 889 } 890} 891 892foreach mx = SchedMxList in { 893 foreach sew = SchedSEWSet<mx>.val in { 894 defvar Cycles = SiFive7GetCyclesOnePerElement<mx, sew>.c; 895 defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxList>.c; 896 let Latency = !add(Cycles, 3), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 897 defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherVV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; 898 defm "" : LMULSEWWriteResMXSEW<"WriteVCompressV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; 899 } 900 } 901} 902 903foreach mx = SchedMxList in { 904 defvar Cycles = SiFive7GetCyclesDefault<mx>.c; 905 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 906 let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 907 defm "" : LMULWriteResMX<"WriteVISlideX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 908 defm "" : LMULWriteResMX<"WriteVISlideI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 909 defm "" : LMULWriteResMX<"WriteVISlide1X", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 910 defm "" : LMULWriteResMX<"WriteVFSlide1F", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 911 } 912} 913 914// VMov*V is LMUL Aware 915let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 2)] in 916 def : WriteRes<WriteVMov1V, [SiFive7VCQ, SiFive7VA]>; 917let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 4)] in 918 def : WriteRes<WriteVMov2V, [SiFive7VCQ, SiFive7VA]>; 919let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 8)] in 920 def : WriteRes<WriteVMov4V, [SiFive7VCQ, SiFive7VA]>; 921let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 16)] in 922 def : WriteRes<WriteVMov8V, [SiFive7VCQ, SiFive7VA]>; 923 924// Others 925def : WriteRes<WriteCSR, [SiFive7PipeB]>; 926def : WriteRes<WriteNop, []>; 927let Latency = 3 in 928 def : WriteRes<WriteRdVLENB, [SiFive7PipeB]>; 929 930def : InstRW<[WriteIALU], (instrs COPY)>; 931 932//===----------------------------------------------------------------------===// 933 934// Bypass and advance 935def : SiFive7AnyToGPRBypass<ReadJmp>; 936def : SiFive7AnyToGPRBypass<ReadJalr>; 937def : ReadAdvance<ReadCSR, 0>; 938def : ReadAdvance<ReadStoreData, 0>; 939def : ReadAdvance<ReadMemBase, 0>; 940def : SiFive7AnyToGPRBypass<ReadIALU>; 941def : SiFive7AnyToGPRBypass<ReadIALU32>; 942def : SiFive7AnyToGPRBypass<ReadShiftImm>; 943def : SiFive7AnyToGPRBypass<ReadShiftImm32>; 944def : SiFive7AnyToGPRBypass<ReadShiftReg>; 945def : SiFive7AnyToGPRBypass<ReadShiftReg32>; 946def : ReadAdvance<ReadIDiv, 0>; 947def : ReadAdvance<ReadIDiv32, 0>; 948def : ReadAdvance<ReadIMul, 0>; 949def : ReadAdvance<ReadIMul32, 0>; 950def : ReadAdvance<ReadAtomicWA, 0>; 951def : ReadAdvance<ReadAtomicWD, 0>; 952def : ReadAdvance<ReadAtomicDA, 0>; 953def : ReadAdvance<ReadAtomicDD, 0>; 954def : ReadAdvance<ReadAtomicLDW, 0>; 955def : ReadAdvance<ReadAtomicLDD, 0>; 956def : ReadAdvance<ReadAtomicSTW, 0>; 957def : ReadAdvance<ReadAtomicSTD, 0>; 958def : ReadAdvance<ReadFStoreData, 0>; 959def : ReadAdvance<ReadFMemBase, 0>; 960def : ReadAdvance<ReadFAdd16, 0>; 961def : ReadAdvance<ReadFAdd32, 0>; 962def : ReadAdvance<ReadFAdd64, 0>; 963def : ReadAdvance<ReadFMul16, 0>; 964def : ReadAdvance<ReadFMA16, 0>; 965def : ReadAdvance<ReadFMA16Addend, 0>; 966def : ReadAdvance<ReadFMul32, 0>; 967def : ReadAdvance<ReadFMul64, 0>; 968def : ReadAdvance<ReadFMA32, 0>; 969def : ReadAdvance<ReadFMA32Addend, 0>; 970def : ReadAdvance<ReadFMA64, 0>; 971def : ReadAdvance<ReadFMA64Addend, 0>; 972def : ReadAdvance<ReadFDiv16, 0>; 973def : ReadAdvance<ReadFDiv32, 0>; 974def : ReadAdvance<ReadFDiv64, 0>; 975def : ReadAdvance<ReadFSqrt16, 0>; 976def : ReadAdvance<ReadFSqrt32, 0>; 977def : ReadAdvance<ReadFSqrt64, 0>; 978def : ReadAdvance<ReadFCmp16, 0>; 979def : ReadAdvance<ReadFCmp32, 0>; 980def : ReadAdvance<ReadFCmp64, 0>; 981def : ReadAdvance<ReadFSGNJ16, 0>; 982def : ReadAdvance<ReadFSGNJ32, 0>; 983def : ReadAdvance<ReadFSGNJ64, 0>; 984def : ReadAdvance<ReadFMinMax16, 0>; 985def : ReadAdvance<ReadFMinMax32, 0>; 986def : ReadAdvance<ReadFMinMax64, 0>; 987def : ReadAdvance<ReadFCvtF16ToI32, 0>; 988def : ReadAdvance<ReadFCvtF16ToI64, 0>; 989def : ReadAdvance<ReadFCvtF32ToI32, 0>; 990def : ReadAdvance<ReadFCvtF32ToI64, 0>; 991def : ReadAdvance<ReadFCvtF64ToI32, 0>; 992def : ReadAdvance<ReadFCvtF64ToI64, 0>; 993def : ReadAdvance<ReadFCvtI32ToF16, 0>; 994def : ReadAdvance<ReadFCvtI32ToF32, 0>; 995def : ReadAdvance<ReadFCvtI32ToF64, 0>; 996def : ReadAdvance<ReadFCvtI64ToF16, 0>; 997def : ReadAdvance<ReadFCvtI64ToF32, 0>; 998def : ReadAdvance<ReadFCvtI64ToF64, 0>; 999def : ReadAdvance<ReadFCvtF32ToF64, 0>; 1000def : ReadAdvance<ReadFCvtF64ToF32, 0>; 1001def : ReadAdvance<ReadFCvtF16ToF32, 0>; 1002def : ReadAdvance<ReadFCvtF32ToF16, 0>; 1003def : ReadAdvance<ReadFCvtF16ToF64, 0>; 1004def : ReadAdvance<ReadFCvtF64ToF16, 0>; 1005def : ReadAdvance<ReadFMovF16ToI16, 0>; 1006def : ReadAdvance<ReadFMovI16ToF16, 0>; 1007def : ReadAdvance<ReadFMovF32ToI32, 0>; 1008def : ReadAdvance<ReadFMovI32ToF32, 0>; 1009def : ReadAdvance<ReadFMovF64ToI64, 0>; 1010def : ReadAdvance<ReadFMovI64ToF64, 0>; 1011def : ReadAdvance<ReadFClass16, 0>; 1012def : ReadAdvance<ReadFClass32, 0>; 1013def : ReadAdvance<ReadFClass64, 0>; 1014 1015def : SiFive7AnyToGPRBypass<ReadSFBJmp, 0>; 1016def : SiFive7AnyToGPRBypass<ReadSFBALU, 0>; 1017 1018// Bitmanip 1019def : SiFive7AnyToGPRBypass<ReadRotateImm>; 1020def : SiFive7AnyToGPRBypass<ReadRotateImm32>; 1021def : SiFive7AnyToGPRBypass<ReadRotateReg>; 1022def : SiFive7AnyToGPRBypass<ReadRotateReg32>; 1023def : SiFive7AnyToGPRBypass<ReadCLZ>; 1024def : SiFive7AnyToGPRBypass<ReadCLZ32>; 1025def : SiFive7AnyToGPRBypass<ReadCTZ>; 1026def : SiFive7AnyToGPRBypass<ReadCTZ32>; 1027def : ReadAdvance<ReadCPOP, 0>; 1028def : ReadAdvance<ReadCPOP32, 0>; 1029def : SiFive7AnyToGPRBypass<ReadORCB>; 1030def : SiFive7AnyToGPRBypass<ReadREV8>; 1031def : SiFive7AnyToGPRBypass<ReadSHXADD>; 1032def : SiFive7AnyToGPRBypass<ReadSHXADD32>; 1033// Single-bit instructions 1034def : SiFive7AnyToGPRBypass<ReadSingleBit>; 1035def : SiFive7AnyToGPRBypass<ReadSingleBitImm>; 1036 1037// 6. Configuration-Setting Instructions 1038def : ReadAdvance<ReadVSETVLI, 2>; 1039def : ReadAdvance<ReadVSETVL, 2>; 1040 1041// 7. Vector Loads and Stores 1042def : ReadAdvance<ReadVLDX, 0>; 1043def : ReadAdvance<ReadVSTX, 0>; 1044defm "" : LMULReadAdvance<"ReadVSTEV", 0>; 1045defm "" : LMULReadAdvance<"ReadVSTM", 0>; 1046def : ReadAdvance<ReadVLDSX, 0>; 1047def : ReadAdvance<ReadVSTSX, 0>; 1048defm "" : LMULReadAdvance<"ReadVSTS8V", 0>; 1049defm "" : LMULReadAdvance<"ReadVSTS16V", 0>; 1050defm "" : LMULReadAdvance<"ReadVSTS32V", 0>; 1051defm "" : LMULReadAdvance<"ReadVSTS64V", 0>; 1052defm "" : LMULReadAdvance<"ReadVLDUXV", 0>; 1053defm "" : LMULReadAdvance<"ReadVLDOXV", 0>; 1054defm "" : LMULReadAdvance<"ReadVSTUX8", 0>; 1055defm "" : LMULReadAdvance<"ReadVSTUX16", 0>; 1056defm "" : LMULReadAdvance<"ReadVSTUX32", 0>; 1057defm "" : LMULReadAdvance<"ReadVSTUX64", 0>; 1058defm "" : LMULReadAdvance<"ReadVSTUXV", 0>; 1059defm "" : LMULReadAdvance<"ReadVSTUX8V", 0>; 1060defm "" : LMULReadAdvance<"ReadVSTUX16V", 0>; 1061defm "" : LMULReadAdvance<"ReadVSTUX32V", 0>; 1062defm "" : LMULReadAdvance<"ReadVSTUX64V", 0>; 1063defm "" : LMULReadAdvance<"ReadVSTOX8", 0>; 1064defm "" : LMULReadAdvance<"ReadVSTOX16", 0>; 1065defm "" : LMULReadAdvance<"ReadVSTOX32", 0>; 1066defm "" : LMULReadAdvance<"ReadVSTOX64", 0>; 1067defm "" : LMULReadAdvance<"ReadVSTOXV", 0>; 1068defm "" : LMULReadAdvance<"ReadVSTOX8V", 0>; 1069defm "" : LMULReadAdvance<"ReadVSTOX16V", 0>; 1070defm "" : LMULReadAdvance<"ReadVSTOX32V", 0>; 1071defm "" : LMULReadAdvance<"ReadVSTOX64V", 0>; 1072// LMUL Aware 1073def : ReadAdvance<ReadVST1R, 0>; 1074def : ReadAdvance<ReadVST2R, 0>; 1075def : ReadAdvance<ReadVST4R, 0>; 1076def : ReadAdvance<ReadVST8R, 0>; 1077 1078// 12. Vector Integer Arithmetic Instructions 1079defm : LMULReadAdvance<"ReadVIALUV", 0>; 1080defm : LMULReadAdvance<"ReadVIALUX", 0>; 1081defm : LMULReadAdvanceW<"ReadVIWALUV", 0>; 1082defm : LMULReadAdvanceW<"ReadVIWALUX", 0>; 1083defm : LMULReadAdvance<"ReadVExtV", 0>; 1084defm : LMULReadAdvance<"ReadVICALUV", 0>; 1085defm : LMULReadAdvance<"ReadVICALUX", 0>; 1086defm : LMULReadAdvance<"ReadVShiftV", 0>; 1087defm : LMULReadAdvance<"ReadVShiftX", 0>; 1088defm : LMULReadAdvanceW<"ReadVNShiftV", 0>; 1089defm : LMULReadAdvanceW<"ReadVNShiftX", 0>; 1090defm : LMULReadAdvance<"ReadVICmpV", 0>; 1091defm : LMULReadAdvance<"ReadVICmpX", 0>; 1092defm : LMULReadAdvance<"ReadVIMinMaxV", 0>; 1093defm : LMULReadAdvance<"ReadVIMinMaxX", 0>; 1094defm : LMULReadAdvance<"ReadVIMulV", 0>; 1095defm : LMULReadAdvance<"ReadVIMulX", 0>; 1096defm : LMULSEWReadAdvance<"ReadVIDivV", 0>; 1097defm : LMULSEWReadAdvance<"ReadVIDivX", 0>; 1098defm : LMULReadAdvanceW<"ReadVIWMulV", 0>; 1099defm : LMULReadAdvanceW<"ReadVIWMulX", 0>; 1100defm : LMULReadAdvance<"ReadVIMulAddV", 0>; 1101defm : LMULReadAdvance<"ReadVIMulAddX", 0>; 1102defm : LMULReadAdvanceW<"ReadVIWMulAddV", 0>; 1103defm : LMULReadAdvanceW<"ReadVIWMulAddX", 0>; 1104defm : LMULReadAdvance<"ReadVIMergeV", 0>; 1105defm : LMULReadAdvance<"ReadVIMergeX", 0>; 1106defm : LMULReadAdvance<"ReadVIMovV", 0>; 1107defm : LMULReadAdvance<"ReadVIMovX", 0>; 1108 1109// 13. Vector Fixed-Point Arithmetic Instructions 1110defm "" : LMULReadAdvance<"ReadVSALUV", 0>; 1111defm "" : LMULReadAdvance<"ReadVSALUX", 0>; 1112defm "" : LMULReadAdvance<"ReadVAALUV", 0>; 1113defm "" : LMULReadAdvance<"ReadVAALUX", 0>; 1114defm "" : LMULReadAdvance<"ReadVSMulV", 0>; 1115defm "" : LMULReadAdvance<"ReadVSMulX", 0>; 1116defm "" : LMULReadAdvance<"ReadVSShiftV", 0>; 1117defm "" : LMULReadAdvance<"ReadVSShiftX", 0>; 1118defm "" : LMULReadAdvanceW<"ReadVNClipV", 0>; 1119defm "" : LMULReadAdvanceW<"ReadVNClipX", 0>; 1120 1121// 14. Vector Floating-Point Instructions 1122defm "" : LMULReadAdvance<"ReadVFALUV", 0>; 1123defm "" : LMULReadAdvance<"ReadVFALUF", 0>; 1124defm "" : LMULReadAdvanceFW<"ReadVFWALUV", 0>; 1125defm "" : LMULReadAdvanceFW<"ReadVFWALUF", 0>; 1126defm "" : LMULReadAdvance<"ReadVFMulV", 0>; 1127defm "" : LMULReadAdvance<"ReadVFMulF", 0>; 1128defm "" : LMULSEWReadAdvanceF<"ReadVFDivV", 0>; 1129defm "" : LMULSEWReadAdvanceF<"ReadVFDivF", 0>; 1130defm "" : LMULReadAdvanceFW<"ReadVFWMulV", 0>; 1131defm "" : LMULReadAdvanceFW<"ReadVFWMulF", 0>; 1132defm "" : LMULReadAdvance<"ReadVFMulAddV", 0>; 1133defm "" : LMULReadAdvance<"ReadVFMulAddF", 0>; 1134defm "" : LMULReadAdvanceFW<"ReadVFWMulAddV", 0>; 1135defm "" : LMULReadAdvanceFW<"ReadVFWMulAddF", 0>; 1136defm "" : LMULSEWReadAdvanceF<"ReadVFSqrtV", 0>; 1137defm "" : LMULReadAdvance<"ReadVFRecpV", 0>; 1138defm "" : LMULReadAdvance<"ReadVFMinMaxV", 0>; 1139defm "" : LMULReadAdvance<"ReadVFMinMaxF", 0>; 1140defm "" : LMULReadAdvance<"ReadVFSgnjV", 0>; 1141defm "" : LMULReadAdvance<"ReadVFSgnjF", 0>; 1142defm "" : LMULReadAdvance<"ReadVFCmpV", 0>; 1143defm "" : LMULReadAdvance<"ReadVFCmpF", 0>; 1144defm "" : LMULReadAdvance<"ReadVFClassV", 0>; 1145defm "" : LMULReadAdvance<"ReadVFMergeV", 0>; 1146defm "" : LMULReadAdvance<"ReadVFMergeF", 0>; 1147defm "" : LMULReadAdvance<"ReadVFMovF", 0>; 1148defm "" : LMULReadAdvance<"ReadVFCvtIToFV", 0>; 1149defm "" : LMULReadAdvance<"ReadVFCvtFToIV", 0>; 1150defm "" : LMULReadAdvanceW<"ReadVFWCvtIToFV", 0>; 1151defm "" : LMULReadAdvanceFW<"ReadVFWCvtFToIV", 0>; 1152defm "" : LMULReadAdvanceFW<"ReadVFWCvtFToFV", 0>; 1153defm "" : LMULReadAdvanceFW<"ReadVFNCvtIToFV", 0>; 1154defm "" : LMULReadAdvanceW<"ReadVFNCvtFToIV", 0>; 1155defm "" : LMULReadAdvanceFW<"ReadVFNCvtFToFV", 0>; 1156 1157// 15. Vector Reduction Operations 1158def : ReadAdvance<ReadVIRedV, 0>; 1159def : ReadAdvance<ReadVIRedV0, 0>; 1160def : ReadAdvance<ReadVIWRedV, 0>; 1161def : ReadAdvance<ReadVIWRedV0, 0>; 1162def : ReadAdvance<ReadVFRedV, 0>; 1163def : ReadAdvance<ReadVFRedV0, 0>; 1164def : ReadAdvance<ReadVFRedOV, 0>; 1165def : ReadAdvance<ReadVFRedOV0, 0>; 1166def : ReadAdvance<ReadVFWRedV, 0>; 1167def : ReadAdvance<ReadVFWRedV0, 0>; 1168def : ReadAdvance<ReadVFWRedOV, 0>; 1169def : ReadAdvance<ReadVFWRedOV0, 0>; 1170 1171// 16. Vector Mask Instructions 1172defm "" : LMULReadAdvance<"ReadVMALUV", 0>; 1173defm "" : LMULReadAdvance<"ReadVMPopV", 0>; 1174defm "" : LMULReadAdvance<"ReadVMFFSV", 0>; 1175defm "" : LMULReadAdvance<"ReadVMSFSV", 0>; 1176defm "" : LMULReadAdvance<"ReadVMIotV", 0>; 1177 1178// 17. Vector Permutation Instructions 1179def : ReadAdvance<ReadVIMovVX, 0>; 1180def : ReadAdvance<ReadVIMovXV, 0>; 1181def : ReadAdvance<ReadVIMovXX, 0>; 1182def : ReadAdvance<ReadVFMovVF, 0>; 1183def : ReadAdvance<ReadVFMovFV, 0>; 1184def : ReadAdvance<ReadVFMovFX, 0>; 1185defm "" : LMULReadAdvance<"ReadVISlideV", 0>; 1186defm "" : LMULReadAdvance<"ReadVISlideX", 0>; 1187defm "" : LMULReadAdvance<"ReadVFSlideV", 0>; 1188defm "" : LMULReadAdvance<"ReadVFSlideF", 0>; 1189defm "" : LMULSEWReadAdvance<"ReadVRGatherVV_data", 0>; 1190defm "" : LMULSEWReadAdvance<"ReadVRGatherVV_index", 0>; 1191defm "" : LMULReadAdvance<"ReadVRGatherVX_data", 0>; 1192defm "" : LMULReadAdvance<"ReadVRGatherVX_index", 0>; 1193defm "" : LMULReadAdvance<"ReadVRGatherVI_data", 0>; 1194defm "" : LMULSEWReadAdvance<"ReadVCompressV", 0>; 1195// LMUL Aware 1196def : ReadAdvance<ReadVMov1V, 0>; 1197def : ReadAdvance<ReadVMov2V, 0>; 1198def : ReadAdvance<ReadVMov4V, 0>; 1199def : ReadAdvance<ReadVMov8V, 0>; 1200 1201// Others 1202def : ReadAdvance<ReadVMask, 0>; 1203def : ReadAdvance<ReadVMergeOp_WorstCase, 0>; 1204foreach mx = SchedMxList in { 1205 def : ReadAdvance<!cast<SchedRead>("ReadVMergeOp_" # mx), 0>; 1206 foreach sew = SchedSEWSet<mx>.val in 1207 def : ReadAdvance<!cast<SchedRead>("ReadVMergeOp_" # mx # "_E" # sew), 0>; 1208} 1209 1210//===----------------------------------------------------------------------===// 1211// Unsupported extensions 1212defm : UnsupportedSchedZbc; 1213defm : UnsupportedSchedZbkb; 1214defm : UnsupportedSchedZbkx; 1215defm : UnsupportedSchedZfa; 1216} 1217