1//==- RISCVSchedSiFive7.td - SiFive7 Scheduling Definitions --*- tablegen -*-=// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9//===----------------------------------------------------------------------===// 10 11/// c is true if mx has the worst case behavior compared to LMULs in MxList. 12/// On the SiFive7, the worst case LMUL is the Largest LMUL 13/// and the worst case sew is the smallest SEW for that LMUL. 14class SiFive7IsWorstCaseMX<string mx, list<string> MxList> { 15 defvar LLMUL = LargestLMUL<MxList>.r; 16 bit c = !eq(mx, LLMUL); 17} 18 19/// c is true if mx and sew have the worst case behavior compared to LMULs in 20/// MxList. On the SiFive7, the worst case LMUL is the Largest LMUL 21/// and the worst case sew is the smallest SEW for that LMUL. 22class SiFive7IsWorstCaseMXSEW<string mx, int sew, list<string> MxList, 23 bit isF = 0> { 24 defvar LLMUL = LargestLMUL<MxList>.r; 25 defvar SSEW = SmallestSEW<mx, isF>.r; 26 bit c = !and(!eq(mx, LLMUL), !eq(sew, SSEW)); 27} 28 29/// Number of DLEN parts = (LMUL * VLEN) / DLEN. 30/// Since DLEN = VLEN / 2, Num DLEN parts = 2 * LMUL. 31class SiFive7GetCyclesDefault<string mx> { 32 int c = !cond( 33 !eq(mx, "M1") : 2, 34 !eq(mx, "M2") : 4, 35 !eq(mx, "M4") : 8, 36 !eq(mx, "M8") : 16, 37 !eq(mx, "MF2") : 1, 38 !eq(mx, "MF4") : 1, 39 !eq(mx, "MF8") : 1 40 ); 41} 42 43class SiFive7GetCyclesNarrowing<string mx> { 44 int c = !cond( 45 !eq(mx, "M1") : 4, 46 !eq(mx, "M2") : 8, 47 !eq(mx, "M4") : 16, 48 !eq(mx, "MF2") : 2, 49 !eq(mx, "MF4") : 1, 50 !eq(mx, "MF8") : 1 51 ); 52} 53 54class SiFive7GetCyclesVMask<string mx> { 55 int c = !cond( 56 !eq(mx, "M1") : 1, 57 !eq(mx, "M2") : 1, 58 !eq(mx, "M4") : 1, 59 !eq(mx, "M8") : 2, 60 !eq(mx, "MF2") : 1, 61 !eq(mx, "MF4") : 1, 62 !eq(mx, "MF8") : 1 63 ); 64} 65 66/// VLDM and VSTM can't read/write more than 2 DLENs of data. 67/// 2 DLENs when LMUL=8. 1 DLEN for all other DLENs 68class SiFive7GetMaskLoadStoreCycles<string mx> { 69 int c = !cond( 70 !eq(mx, "M8") : 2, 71 true : 1 72 ); 73} 74 75// Cycles for nf=2 segmented loads and stores are calculated using the 76// formula (2 * VLEN * LMUL) / DLEN = 4 * LMUL 77class SiFive7GetCyclesSegmentedSeg2<string mx> { 78 int c = !cond( 79 !eq(mx, "M1") : 4, 80 !eq(mx, "M2") : 8, 81 !eq(mx, "M4") : 16, 82 !eq(mx, "M8") : 32, 83 !eq(mx, "MF2") : 2, 84 !eq(mx, "MF4") : 1, 85 !eq(mx, "MF8") : 1 86 ); 87} 88 89// Cycles for segmented loads and stores are calculated using the 90// formula vl * ceil((SEW * nf) / DLEN), where SEW * nf is the segment size. 91class SiFive7GetCyclesSegmented<string mx, int sew, int nf> { 92 defvar VLEN = 512; 93 defvar DLEN = 256; 94 // (VLEN * LMUL) / SEW 95 defvar VLUpperBound = !cond( 96 !eq(mx, "M1") : !div(VLEN, sew), 97 !eq(mx, "M2") : !div(!mul(VLEN, 2), sew), 98 !eq(mx, "M4") : !div(!mul(VLEN, 4), sew), 99 !eq(mx, "M8") : !div(!mul(VLEN, 8), sew), 100 !eq(mx, "MF2") : !div(!div(VLEN, 2), sew), 101 !eq(mx, "MF4") : !div(!div(VLEN, 4), sew), 102 !eq(mx, "MF8") : !div(!div(VLEN, 8), sew), 103 ); 104 // We can calculate ceil(a/b) using (a + b - 1) / b. 105 defvar a = !mul(sew, nf); 106 defvar b = DLEN; 107 int c = !mul(VLUpperBound, !div(!sub(!add(a, b), 1), b)); 108} 109 110class SiFive7GetCyclesOnePerElement<string mx, int sew> { 111 // FIXME: On SiFive7, VLEN is 512. Although a user can request the compiler 112 // to use a different VLEN, this model will not make scheduling decisions 113 // based on the user specified VLEN. 114 // c = ceil(VLEN / SEW) * LMUL 115 // Note: c >= 1 since the smallest VLEN is 512 / 8 = 8, and the 116 // largest division performed on VLEN is in MF8 case with division 117 // by 8. Therefore, there is no need to ceil the result. 118 int VLEN = !div(512, sew); 119 int c = !cond( 120 !eq(mx, "M1") : VLEN, 121 !eq(mx, "M2") : !mul(VLEN, 2), 122 !eq(mx, "M4") : !mul(VLEN, 4), 123 !eq(mx, "M8") : !mul(VLEN, 8), 124 !eq(mx, "MF2") : !div(VLEN, 2), 125 !eq(mx, "MF4") : !div(VLEN, 4), 126 !eq(mx, "MF8") : !div(VLEN, 8) 127 ); 128} 129 130class SiFive7GetDivOrSqrtFactor<int sew> { 131 int c = !cond( 132 // TODO: Add SchedSEWSetFP upstream and remove the SEW=8 case. 133 !eq(sew, 8) : 15, 134 !eq(sew, 16) : 15, 135 !eq(sew, 32) : 28, 136 !eq(sew, 64) : 57 137 ); 138} 139 140/// Cycles for reductions take approximately VL*SEW/DLEN + 5(4 + log(DLEN/SEW)) 141/// cycles. 142class SiFive7GetReductionCycles<string mx, int sew> { 143 // VLUpperBound*SEW/DLEN is equivalent to 2*LMUL since 144 // VLUpperBound=(VLEN*LMUL)/SEW. 145 defvar VLEN = 512; 146 defvar DLEN = !div(VLEN, 2); 147 defvar TwoTimesLMUL = !cond( 148 !eq(mx, "M1") : 2, 149 !eq(mx, "M2") : 4, 150 !eq(mx, "M4") : 8, 151 !eq(mx, "M8") : 16, 152 !eq(mx, "MF2") : 1, 153 !eq(mx, "MF4") : 1, 154 !eq(mx, "MF8") : 1 155 ); 156 int c = !add( 157 TwoTimesLMUL, 158 !mul(5, !add(4, !logtwo(!div(DLEN, sew)))) 159 ); 160} 161 162/// Cycles for ordered reductions take approximatley 6*VL cycles 163class SiFive7GetOrderedReductionCycles<string mx, int sew> { 164 defvar VLEN = 512; 165 // (VLEN * LMUL) / SEW 166 defvar VLUpperBound = !cond( 167 !eq(mx, "M1") : !div(VLEN, sew), 168 !eq(mx, "M2") : !div(!mul(VLEN, 2), sew), 169 !eq(mx, "M4") : !div(!mul(VLEN, 4), sew), 170 !eq(mx, "M8") : !div(!mul(VLEN, 8), sew), 171 !eq(mx, "MF2") : !div(!div(VLEN, 2), sew), 172 !eq(mx, "MF4") : !div(!div(VLEN, 4), sew), 173 !eq(mx, "MF8") : !div(!div(VLEN, 8), sew), 174 ); 175 int c = !mul(6, VLUpperBound); 176} 177 178class SiFive7AnyToGPRBypass<SchedRead read, int cycles = 2> 179 : ReadAdvance<read, cycles, [WriteIALU, WriteIALU32, 180 WriteShiftImm, WriteShiftImm32, 181 WriteShiftReg, WriteShiftReg32, 182 WriteSHXADD, WriteSHXADD32, 183 WriteRotateImm, WriteRotateImm32, 184 WriteRotateReg, WriteRotateReg32, 185 WriteSingleBit, WriteSingleBitImm, 186 WriteBEXT, WriteBEXTI, 187 WriteCLZ, WriteCLZ32, WriteCTZ, WriteCTZ32, 188 WriteCPOP, WriteCPOP32, 189 WriteREV8, WriteORCB, WriteIMinMax, WriteSFB, 190 WriteIMul, WriteIMul32, 191 WriteIDiv, WriteIDiv32, 192 WriteIRem, WriteIRem32, 193 WriteLDB, WriteLDH, WriteLDW, WriteLDD]>; 194 195// SiFive7 machine model for scheduling and other instruction cost heuristics. 196def SiFive7Model : SchedMachineModel { 197 let MicroOpBufferSize = 0; // Explicitly set to zero since SiFive7 is in-order. 198 let IssueWidth = 2; // 2 micro-ops are dispatched per cycle. 199 let LoadLatency = 3; 200 let MispredictPenalty = 3; 201 let CompleteModel = 0; 202 let EnableIntervals = true; 203 let UnsupportedFeatures = [HasStdExtZbkb, HasStdExtZbkc, HasStdExtZbkx, 204 HasStdExtZcmt, HasStdExtZknd, HasStdExtZkne, 205 HasStdExtZknh, HasStdExtZksed, HasStdExtZksh, 206 HasStdExtZkr]; 207} 208 209// The SiFive7 microarchitecture has three pipelines: A, B, V. 210// Pipe A can handle memory, integer alu and vector operations. 211// Pipe B can handle integer alu, control flow, integer multiply and divide, 212// and floating point computation. 213// The V pipeline is modeled by the VCQ, VA, VL, and VS resources. 214let SchedModel = SiFive7Model in { 215let BufferSize = 0 in { 216def SiFive7PipeA : ProcResource<1>; 217def SiFive7PipeB : ProcResource<1>; 218def SiFive7IDiv : ProcResource<1>; // Int Division 219def SiFive7FDiv : ProcResource<1>; // FP Division/Sqrt 220def SiFive7VA : ProcResource<1>; // Arithmetic sequencer 221def SiFive7VL : ProcResource<1>; // Load sequencer 222def SiFive7VS : ProcResource<1>; // Store sequencer 223// The VCQ accepts instructions from the the A Pipe and holds them until the 224// vector unit is ready to dequeue them. The unit dequeues up to one instruction 225// per cycle, in order, as soon as the sequencer for that type of instruction is 226// avaliable. This resource is meant to be used for 1 cycle by all vector 227// instructions, to model that only one vector instruction may be dequed at a 228// time. The actual dequeueing into the sequencer is modeled by the VA, VL, and 229// VS sequencer resources below. Each of them will only accept a single 230// instruction at a time and remain busy for the number of cycles associated 231// with that instruction. 232def SiFive7VCQ : ProcResource<1>; // Vector Command Queue 233} 234 235def SiFive7PipeAB : ProcResGroup<[SiFive7PipeA, SiFive7PipeB]>; 236 237// Branching 238let Latency = 3 in { 239def : WriteRes<WriteJmp, [SiFive7PipeB]>; 240def : WriteRes<WriteJal, [SiFive7PipeB]>; 241def : WriteRes<WriteJalr, [SiFive7PipeB]>; 242} 243 244//Short forward branch 245def : WriteRes<WriteSFB, [SiFive7PipeA, SiFive7PipeB]> { 246 let Latency = 3; 247 let NumMicroOps = 2; 248} 249 250// Integer arithmetic and logic 251let Latency = 3 in { 252def : WriteRes<WriteIALU, [SiFive7PipeAB]>; 253def : WriteRes<WriteIALU32, [SiFive7PipeAB]>; 254def : WriteRes<WriteShiftImm, [SiFive7PipeAB]>; 255def : WriteRes<WriteShiftImm32, [SiFive7PipeAB]>; 256def : WriteRes<WriteShiftReg, [SiFive7PipeAB]>; 257def : WriteRes<WriteShiftReg32, [SiFive7PipeAB]>; 258} 259 260// Integer multiplication 261let Latency = 3 in { 262def : WriteRes<WriteIMul, [SiFive7PipeB]>; 263def : WriteRes<WriteIMul32, [SiFive7PipeB]>; 264} 265 266// Integer division 267def : WriteRes<WriteIDiv, [SiFive7PipeB, SiFive7IDiv]> { 268 let Latency = 66; 269 let ReleaseAtCycles = [1, 65]; 270} 271def : WriteRes<WriteIDiv32, [SiFive7PipeB, SiFive7IDiv]> { 272 let Latency = 34; 273 let ReleaseAtCycles = [1, 33]; 274} 275 276// Integer remainder 277def : WriteRes<WriteIRem, [SiFive7PipeB, SiFive7IDiv]> { 278 let Latency = 66; 279 let ReleaseAtCycles = [1, 65]; 280} 281def : WriteRes<WriteIRem32, [SiFive7PipeB, SiFive7IDiv]> { 282 let Latency = 34; 283 let ReleaseAtCycles = [1, 33]; 284} 285 286// Bitmanip 287let Latency = 3 in { 288// Rotates are in the late-B ALU. 289def : WriteRes<WriteRotateImm, [SiFive7PipeB]>; 290def : WriteRes<WriteRotateImm32, [SiFive7PipeB]>; 291def : WriteRes<WriteRotateReg, [SiFive7PipeB]>; 292def : WriteRes<WriteRotateReg32, [SiFive7PipeB]>; 293 294// clz[w]/ctz[w] are in the late-B ALU. 295def : WriteRes<WriteCLZ, [SiFive7PipeB]>; 296def : WriteRes<WriteCLZ32, [SiFive7PipeB]>; 297def : WriteRes<WriteCTZ, [SiFive7PipeB]>; 298def : WriteRes<WriteCTZ32, [SiFive7PipeB]>; 299 300// cpop[w] look exactly like multiply. 301def : WriteRes<WriteCPOP, [SiFive7PipeB]>; 302def : WriteRes<WriteCPOP32, [SiFive7PipeB]>; 303 304// orc.b is in the late-B ALU. 305def : WriteRes<WriteORCB, [SiFive7PipeB]>; 306 307// min/max are in the late-B ALU 308def : WriteRes<WriteIMinMax, [SiFive7PipeB]>; 309 310// rev8 is in the late-A and late-B ALUs. 311def : WriteRes<WriteREV8, [SiFive7PipeAB]>; 312 313// shNadd[.uw] is on the early-B and late-B ALUs. 314def : WriteRes<WriteSHXADD, [SiFive7PipeB]>; 315def : WriteRes<WriteSHXADD32, [SiFive7PipeB]>; 316} 317 318// Single-bit instructions 319// BEXT[I] instruction is available on all ALUs and the other instructions 320// are only available on the SiFive7B pipe. 321let Latency = 3 in { 322def : WriteRes<WriteSingleBit, [SiFive7PipeB]>; 323def : WriteRes<WriteSingleBitImm, [SiFive7PipeB]>; 324def : WriteRes<WriteBEXT, [SiFive7PipeAB]>; 325def : WriteRes<WriteBEXTI, [SiFive7PipeAB]>; 326} 327 328// Memory 329def : WriteRes<WriteSTB, [SiFive7PipeA]>; 330def : WriteRes<WriteSTH, [SiFive7PipeA]>; 331def : WriteRes<WriteSTW, [SiFive7PipeA]>; 332def : WriteRes<WriteSTD, [SiFive7PipeA]>; 333def : WriteRes<WriteFST16, [SiFive7PipeA]>; 334def : WriteRes<WriteFST32, [SiFive7PipeA]>; 335def : WriteRes<WriteFST64, [SiFive7PipeA]>; 336 337let Latency = 3 in { 338def : WriteRes<WriteLDB, [SiFive7PipeA]>; 339def : WriteRes<WriteLDH, [SiFive7PipeA]>; 340def : WriteRes<WriteLDW, [SiFive7PipeA]>; 341def : WriteRes<WriteLDD, [SiFive7PipeA]>; 342} 343 344let Latency = 2 in { 345def : WriteRes<WriteFLD16, [SiFive7PipeA]>; 346def : WriteRes<WriteFLD32, [SiFive7PipeA]>; 347def : WriteRes<WriteFLD64, [SiFive7PipeA]>; 348} 349 350// Atomic memory 351def : WriteRes<WriteAtomicSTW, [SiFive7PipeA]>; 352def : WriteRes<WriteAtomicSTD, [SiFive7PipeA]>; 353 354let Latency = 3 in { 355def : WriteRes<WriteAtomicW, [SiFive7PipeA]>; 356def : WriteRes<WriteAtomicD, [SiFive7PipeA]>; 357def : WriteRes<WriteAtomicLDW, [SiFive7PipeA]>; 358def : WriteRes<WriteAtomicLDD, [SiFive7PipeA]>; 359} 360 361// Half precision. 362let Latency = 5 in { 363def : WriteRes<WriteFAdd16, [SiFive7PipeB]>; 364def : WriteRes<WriteFMul16, [SiFive7PipeB]>; 365def : WriteRes<WriteFMA16, [SiFive7PipeB]>; 366} 367let Latency = 3 in { 368def : WriteRes<WriteFSGNJ16, [SiFive7PipeB]>; 369def : WriteRes<WriteFMinMax16, [SiFive7PipeB]>; 370} 371 372let Latency = 14, ReleaseAtCycles = [1, 13] in { 373def : WriteRes<WriteFDiv16, [SiFive7PipeB, SiFive7FDiv]>; 374def : WriteRes<WriteFSqrt16, [SiFive7PipeB, SiFive7FDiv]>; 375} 376 377// Single precision. 378let Latency = 5 in { 379def : WriteRes<WriteFAdd32, [SiFive7PipeB]>; 380def : WriteRes<WriteFMul32, [SiFive7PipeB]>; 381def : WriteRes<WriteFMA32, [SiFive7PipeB]>; 382} 383let Latency = 3 in { 384def : WriteRes<WriteFSGNJ32, [SiFive7PipeB]>; 385def : WriteRes<WriteFMinMax32, [SiFive7PipeB]>; 386} 387 388def : WriteRes<WriteFDiv32, [SiFive7PipeB, SiFive7FDiv]> { let Latency = 27; 389 let ReleaseAtCycles = [1, 26]; } 390def : WriteRes<WriteFSqrt32, [SiFive7PipeB, SiFive7FDiv]> { let Latency = 27; 391 let ReleaseAtCycles = [1, 26]; } 392 393// Double precision 394let Latency = 7 in { 395def : WriteRes<WriteFAdd64, [SiFive7PipeB]>; 396def : WriteRes<WriteFMul64, [SiFive7PipeB]>; 397def : WriteRes<WriteFMA64, [SiFive7PipeB]>; 398} 399let Latency = 3 in { 400def : WriteRes<WriteFSGNJ64, [SiFive7PipeB]>; 401def : WriteRes<WriteFMinMax64, [SiFive7PipeB]>; 402} 403 404def : WriteRes<WriteFDiv64, [SiFive7PipeB, SiFive7FDiv]> { let Latency = 56; 405 let ReleaseAtCycles = [1, 55]; } 406def : WriteRes<WriteFSqrt64, [SiFive7PipeB, SiFive7FDiv]> { let Latency = 56; 407 let ReleaseAtCycles = [1, 55]; } 408 409// Conversions 410let Latency = 3 in { 411def : WriteRes<WriteFCvtI32ToF16, [SiFive7PipeB]>; 412def : WriteRes<WriteFCvtI32ToF32, [SiFive7PipeB]>; 413def : WriteRes<WriteFCvtI32ToF64, [SiFive7PipeB]>; 414def : WriteRes<WriteFCvtI64ToF16, [SiFive7PipeB]>; 415def : WriteRes<WriteFCvtI64ToF32, [SiFive7PipeB]>; 416def : WriteRes<WriteFCvtI64ToF64, [SiFive7PipeB]>; 417def : WriteRes<WriteFCvtF16ToI32, [SiFive7PipeB]>; 418def : WriteRes<WriteFCvtF16ToI64, [SiFive7PipeB]>; 419def : WriteRes<WriteFCvtF16ToF32, [SiFive7PipeB]>; 420def : WriteRes<WriteFCvtF16ToF64, [SiFive7PipeB]>; 421def : WriteRes<WriteFCvtF32ToI32, [SiFive7PipeB]>; 422def : WriteRes<WriteFCvtF32ToI64, [SiFive7PipeB]>; 423def : WriteRes<WriteFCvtF32ToF16, [SiFive7PipeB]>; 424def : WriteRes<WriteFCvtF32ToF64, [SiFive7PipeB]>; 425def : WriteRes<WriteFCvtF64ToI32, [SiFive7PipeB]>; 426def : WriteRes<WriteFCvtF64ToI64, [SiFive7PipeB]>; 427def : WriteRes<WriteFCvtF64ToF16, [SiFive7PipeB]>; 428def : WriteRes<WriteFCvtF64ToF32, [SiFive7PipeB]>; 429 430def : WriteRes<WriteFClass16, [SiFive7PipeB]>; 431def : WriteRes<WriteFClass32, [SiFive7PipeB]>; 432def : WriteRes<WriteFClass64, [SiFive7PipeB]>; 433def : WriteRes<WriteFCmp16, [SiFive7PipeB]>; 434def : WriteRes<WriteFCmp32, [SiFive7PipeB]>; 435def : WriteRes<WriteFCmp64, [SiFive7PipeB]>; 436def : WriteRes<WriteFMovI16ToF16, [SiFive7PipeB]>; 437def : WriteRes<WriteFMovF16ToI16, [SiFive7PipeB]>; 438def : WriteRes<WriteFMovI32ToF32, [SiFive7PipeB]>; 439def : WriteRes<WriteFMovF32ToI32, [SiFive7PipeB]>; 440def : WriteRes<WriteFMovI64ToF64, [SiFive7PipeB]>; 441def : WriteRes<WriteFMovF64ToI64, [SiFive7PipeB]>; 442} 443 444// 6. Configuration-Setting Instructions 445let Latency = 3 in { 446def : WriteRes<WriteVSETVLI, [SiFive7PipeA]>; 447def : WriteRes<WriteVSETIVLI, [SiFive7PipeA]>; 448def : WriteRes<WriteVSETVL, [SiFive7PipeA]>; 449} 450 451// 7. Vector Loads and Stores 452// Unit-stride loads and stores can operate at the full bandwidth of the memory 453// pipe. The memory pipe is DLEN bits wide on x280. 454foreach mx = SchedMxList in { 455 defvar Cycles = SiFive7GetCyclesDefault<mx>.c; 456 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 457 let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 458 defm "" : LMULWriteResMX<"WriteVLDE", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; 459 defm "" : LMULWriteResMX<"WriteVLDFF", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; 460 } 461 let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in 462 defm "" : LMULWriteResMX<"WriteVSTE", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; 463} 464 465foreach mx = SchedMxList in { 466 defvar Cycles = SiFive7GetMaskLoadStoreCycles<mx>.c; 467 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 468 let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in 469 defm "" : LMULWriteResMX<"WriteVLDM", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; 470 let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in 471 defm "" : LMULWriteResMX<"WriteVSTM", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; 472} 473 474// Strided loads and stores operate at one element per cycle and should be 475// scheduled accordingly. Indexed loads and stores operate at one element per 476// cycle, and they stall the machine until all addresses have been generated, 477// so they cannot be scheduled. Indexed and strided loads and stores have LMUL 478// specific suffixes, but since SEW is already encoded in the name of the 479// resource, we do not need to use LMULSEWXXX constructors. However, we do 480// use the SEW from the name to determine the number of Cycles. 481 482// This predicate is true when the rs2 operand of vlse or vsse is x0, false 483// otherwise. 484def VLDSX0Pred : MCSchedPredicate<CheckRegOperand<3, X0>>; 485 486foreach mx = SchedMxList in { 487 defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c; 488 defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 8>.c; 489 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 490 defm SiFive7 : LMULWriteResMXVariant<"WriteVLDS8", VLDSX0Pred, [SiFive7VCQ, SiFive7VL], 491 4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles), 492 [0, 1], [1, !add(1, Cycles)], mx, IsWorstCase>; 493 let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 494 defm "" : LMULWriteResMX<"WriteVLDUX8", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; 495 defm "" : LMULWriteResMX<"WriteVLDOX8", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; 496 } 497 let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 498 defm "" : LMULWriteResMX<"WriteVSTS8", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; 499 defm "" : LMULWriteResMX<"WriteVSTUX8", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; 500 defm "" : LMULWriteResMX<"WriteVSTOX8", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; 501 } 502} 503// TODO: The MxLists need to be filtered by EEW. We only need to support 504// LMUL >= SEW_min/ELEN. Here, the smallest EEW prevents us from having MF8 505// since LMUL >= 16/64. 506foreach mx = ["MF4", "MF2", "M1", "M2", "M4", "M8"] in { 507 defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c; 508 defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 16>.c; 509 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 510 defm SiFive7 : LMULWriteResMXVariant<"WriteVLDS16", VLDSX0Pred, [SiFive7VCQ, SiFive7VL], 511 4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles), 512 [0, 1], [1, !add(1, Cycles)], mx, IsWorstCase>; 513 let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 514 defm "" : LMULWriteResMX<"WriteVLDUX16", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; 515 defm "" : LMULWriteResMX<"WriteVLDOX16", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; 516 } 517 let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 518 defm "" : LMULWriteResMX<"WriteVSTS16", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; 519 defm "" : LMULWriteResMX<"WriteVSTUX16", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; 520 defm "" : LMULWriteResMX<"WriteVSTOX16", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; 521 } 522} 523foreach mx = ["MF2", "M1", "M2", "M4", "M8"] in { 524 defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c; 525 defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 32>.c; 526 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 527 defm SiFive7 : LMULWriteResMXVariant<"WriteVLDS32", VLDSX0Pred, [SiFive7VCQ, SiFive7VL], 528 4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles), 529 [0, 1], [1, !add(1, Cycles)], mx, IsWorstCase>; 530 let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 531 defm "" : LMULWriteResMX<"WriteVLDUX32", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; 532 defm "" : LMULWriteResMX<"WriteVLDOX32", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; 533 } 534 let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 535 defm "" : LMULWriteResMX<"WriteVSTS32", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; 536 defm "" : LMULWriteResMX<"WriteVSTUX32", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; 537 defm "" : LMULWriteResMX<"WriteVSTOX32", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; 538 } 539} 540foreach mx = ["M1", "M2", "M4", "M8"] in { 541 defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c; 542 defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 64>.c; 543 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 544 defm SiFive7 : LMULWriteResMXVariant<"WriteVLDS64", VLDSX0Pred, [SiFive7VCQ, SiFive7VL], 545 4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles), 546 [0, 1], [1, !add(1, Cycles)], mx, IsWorstCase>; 547 let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 548 defm "" : LMULWriteResMX<"WriteVLDUX64", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; 549 defm "" : LMULWriteResMX<"WriteVLDOX64", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; 550 } 551 let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 552 defm "" : LMULWriteResMX<"WriteVSTS64", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; 553 defm "" : LMULWriteResMX<"WriteVSTUX64", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; 554 defm "" : LMULWriteResMX<"WriteVSTOX64", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; 555 } 556} 557 558// VLD*R is LMUL aware 559let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 2)] in 560 def : WriteRes<WriteVLD1R, [SiFive7VCQ, SiFive7VL]>; 561let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 4)] in 562 def : WriteRes<WriteVLD2R, [SiFive7VCQ, SiFive7VL]>; 563let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 8)] in 564 def : WriteRes<WriteVLD4R, [SiFive7VCQ, SiFive7VL]>; 565let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 16)] in 566 def : WriteRes<WriteVLD8R, [SiFive7VCQ, SiFive7VL]>; 567// VST*R is LMUL aware 568let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 2)] in 569 def : WriteRes<WriteVST1R, [SiFive7VCQ, SiFive7VS]>; 570let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 4)] in 571 def : WriteRes<WriteVST2R, [SiFive7VCQ, SiFive7VS]>; 572let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 8)] in 573 def : WriteRes<WriteVST4R, [SiFive7VCQ, SiFive7VS]>; 574let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 16)] in 575 def : WriteRes<WriteVST8R, [SiFive7VCQ, SiFive7VS]>; 576 577// Segmented Loads and Stores 578// Unit-stride segmented loads and stores are effectively converted into strided 579// segment loads and stores. Strided segment loads and stores operate at up to 580// one segment per cycle if the segment fits within one aligned memory beat. 581// Indexed segment loads and stores operate at the same rate as strided ones, 582// but they stall the machine until all addresses have been generated. 583foreach mx = SchedMxList in { 584 foreach eew = [8, 16, 32, 64] in { 585 defvar Cycles = SiFive7GetCyclesSegmentedSeg2<mx>.c; 586 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 587 // Does not chain so set latency high 588 let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 589 defm "" : LMULWriteResMX<"WriteVLSEG2e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; 590 defm "" : LMULWriteResMX<"WriteVLSEGFF2e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; 591 } 592 let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in 593 defm "" : LMULWriteResMX<"WriteVSSEG2e" # eew, [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; 594 foreach nf=3-8 in { 595 defvar Cycles = SiFive7GetCyclesSegmented<mx, eew, nf>.c; 596 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 597 // Does not chain so set latency high 598 let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 599 defm "" : LMULWriteResMX<"WriteVLSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; 600 defm "" : LMULWriteResMX<"WriteVLSEGFF" # nf # "e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; 601 } 602 let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in 603 defm "" : LMULWriteResMX<"WriteVSSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; 604 } 605 } 606} 607foreach mx = SchedMxList in { 608 foreach nf=2-8 in { 609 foreach eew = [8, 16, 32, 64] in { 610 defvar Cycles = SiFive7GetCyclesSegmented<mx, eew, nf>.c; 611 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 612 // Does not chain so set latency high 613 let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 614 defm "" : LMULWriteResMX<"WriteVLSSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; 615 defm "" : LMULWriteResMX<"WriteVLUXSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; 616 defm "" : LMULWriteResMX<"WriteVLOXSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; 617 } 618 let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 619 defm "" : LMULWriteResMX<"WriteVSSSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; 620 defm "" : LMULWriteResMX<"WriteVSUXSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; 621 defm "" : LMULWriteResMX<"WriteVSOXSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; 622 } 623 } 624 } 625} 626 627// 11. Vector Integer Arithmetic Instructions 628foreach mx = SchedMxList in { 629 defvar Cycles = SiFive7GetCyclesDefault<mx>.c; 630 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 631 let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 632 defm "" : LMULWriteResMX<"WriteVIALUV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 633 defm "" : LMULWriteResMX<"WriteVIALUX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 634 defm "" : LMULWriteResMX<"WriteVIALUI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 635 defm "" : LMULWriteResMX<"WriteVICALUV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 636 defm "" : LMULWriteResMX<"WriteVICALUX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 637 defm "" : LMULWriteResMX<"WriteVICALUI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 638 defm "" : LMULWriteResMX<"WriteVShiftV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 639 defm "" : LMULWriteResMX<"WriteVShiftX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 640 defm "" : LMULWriteResMX<"WriteVShiftI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 641 defm "" : LMULWriteResMX<"WriteVIMinMaxV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 642 defm "" : LMULWriteResMX<"WriteVIMinMaxX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 643 defm "" : LMULWriteResMX<"WriteVIMulV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 644 defm "" : LMULWriteResMX<"WriteVIMulX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 645 defm "" : LMULWriteResMX<"WriteVIMulAddV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 646 defm "" : LMULWriteResMX<"WriteVIMulAddX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 647 defm "" : LMULWriteResMX<"WriteVIMergeV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 648 defm "" : LMULWriteResMX<"WriteVIMergeX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 649 defm "" : LMULWriteResMX<"WriteVIMergeI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 650 defm "" : LMULWriteResMX<"WriteVIMovV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 651 defm "" : LMULWriteResMX<"WriteVIMovX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 652 defm "" : LMULWriteResMX<"WriteVIMovI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 653 } 654 // Mask results can't chain. 655 let Latency = !add(Cycles, 3), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 656 defm "" : LMULWriteResMX<"WriteVICmpV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 657 defm "" : LMULWriteResMX<"WriteVICmpX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 658 defm "" : LMULWriteResMX<"WriteVICmpI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 659 } 660} 661foreach mx = SchedMxList in { 662 defvar Cycles = SiFive7GetCyclesDefault<mx>.c; 663 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 664 let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 665 defm "" : LMULWriteResMX<"WriteVExtV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 666 } 667} 668foreach mx = SchedMxList in { 669 foreach sew = SchedSEWSet<mx>.val in { 670 defvar Cycles = !mul(SiFive7GetDivOrSqrtFactor<sew>.c, 671 !div(SiFive7GetCyclesOnePerElement<mx, sew>.c, 4)); 672 defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxList>.c; 673 let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 674 defm "" : LMULSEWWriteResMXSEW<"WriteVIDivV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; 675 defm "" : LMULSEWWriteResMXSEW<"WriteVIDivX", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; 676 } 677 } 678} 679 680// Widening 681foreach mx = SchedMxListW in { 682 defvar Cycles = SiFive7GetCyclesDefault<mx>.c; 683 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c; 684 let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 685 defm "" : LMULWriteResMX<"WriteVIWALUV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 686 defm "" : LMULWriteResMX<"WriteVIWALUX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 687 defm "" : LMULWriteResMX<"WriteVIWALUI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 688 defm "" : LMULWriteResMX<"WriteVIWMulV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 689 defm "" : LMULWriteResMX<"WriteVIWMulX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 690 defm "" : LMULWriteResMX<"WriteVIWMulAddV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 691 defm "" : LMULWriteResMX<"WriteVIWMulAddX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 692 } 693} 694// Narrowing 695foreach mx = SchedMxListW in { 696 defvar Cycles = SiFive7GetCyclesNarrowing<mx>.c; 697 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c; 698 let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 699 defm "" : LMULWriteResMX<"WriteVNShiftV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 700 defm "" : LMULWriteResMX<"WriteVNShiftX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 701 defm "" : LMULWriteResMX<"WriteVNShiftI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 702 } 703} 704 705// 12. Vector Fixed-Point Arithmetic Instructions 706foreach mx = SchedMxList in { 707 defvar Cycles = SiFive7GetCyclesDefault<mx>.c; 708 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 709 let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 710 defm "" : LMULWriteResMX<"WriteVSALUV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 711 defm "" : LMULWriteResMX<"WriteVSALUX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 712 defm "" : LMULWriteResMX<"WriteVSALUI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 713 defm "" : LMULWriteResMX<"WriteVAALUV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 714 defm "" : LMULWriteResMX<"WriteVAALUX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 715 defm "" : LMULWriteResMX<"WriteVSMulV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 716 defm "" : LMULWriteResMX<"WriteVSMulX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 717 defm "" : LMULWriteResMX<"WriteVSShiftV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 718 defm "" : LMULWriteResMX<"WriteVSShiftX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 719 defm "" : LMULWriteResMX<"WriteVSShiftI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 720 } 721} 722// Narrowing 723foreach mx = SchedMxListW in { 724 defvar Cycles = SiFive7GetCyclesNarrowing<mx>.c; 725 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c; 726 let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 727 defm "" : LMULWriteResMX<"WriteVNClipV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 728 defm "" : LMULWriteResMX<"WriteVNClipX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 729 defm "" : LMULWriteResMX<"WriteVNClipI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 730 } 731} 732 733// 13. Vector Floating-Point Instructions 734foreach mx = SchedMxListF in { 735 foreach sew = SchedSEWSet<mx, isF=1>.val in { 736 defvar Cycles = SiFive7GetCyclesDefault<mx>.c; 737 defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListF, isF=1>.c; 738 let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 739 defm "" : LMULSEWWriteResMXSEW<"WriteVFALUV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; 740 defm "" : LMULSEWWriteResMXSEW<"WriteVFALUF", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; 741 defm "" : LMULSEWWriteResMXSEW<"WriteVFMulV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; 742 defm "" : LMULSEWWriteResMXSEW<"WriteVFMulF", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; 743 defm "" : LMULSEWWriteResMXSEW<"WriteVFMulAddV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; 744 defm "" : LMULSEWWriteResMXSEW<"WriteVFMulAddF", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; 745 defm "" : LMULSEWWriteResMXSEW<"WriteVFRecpV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; 746 defm "" : LMULSEWWriteResMXSEW<"WriteVFCvtIToFV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; 747 } 748 let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 749 defm "" : LMULSEWWriteResMXSEW<"WriteVFMinMaxV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; 750 defm "" : LMULSEWWriteResMXSEW<"WriteVFMinMaxF", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; 751 defm "" : LMULSEWWriteResMXSEW<"WriteVFSgnjV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; 752 defm "" : LMULSEWWriteResMXSEW<"WriteVFSgnjF", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; 753 } 754 } 755} 756foreach mx = SchedMxList in { 757 defvar Cycles = SiFive7GetCyclesDefault<mx>.c; 758 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 759 let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 760 defm "" : LMULWriteResMX<"WriteVFCvtFToIV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 761 } 762 let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 763 defm "" : LMULWriteResMX<"WriteVFClassV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 764 defm "" : LMULWriteResMX<"WriteVFMergeV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 765 defm "" : LMULWriteResMX<"WriteVFMovV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 766 } 767 // Mask results can't chain. 768 let Latency = !add(Cycles, 3), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 769 defm "" : LMULWriteResMX<"WriteVFCmpV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 770 defm "" : LMULWriteResMX<"WriteVFCmpF", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 771 } 772} 773foreach mx = SchedMxListF in { 774 foreach sew = SchedSEWSet<mx, isF=1>.val in { 775 defvar Cycles = !mul(SiFive7GetDivOrSqrtFactor<sew>.c, 776 !div(SiFive7GetCyclesOnePerElement<mx, sew>.c, 4)); 777 defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListF, 1>.c; 778 let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 779 defm "" : LMULSEWWriteResMXSEW<"WriteVFSqrtV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; 780 defm "" : LMULSEWWriteResMXSEW<"WriteVFDivV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; 781 defm "" : LMULSEWWriteResMXSEW<"WriteVFDivF", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; 782 } 783 } 784} 785 786// Widening 787foreach mx = SchedMxListW in { 788 foreach sew = SchedSEWSet<mx, isF=0, isWidening=1>.val in { 789 defvar Cycles = SiFive7GetCyclesDefault<mx>.c; 790 defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListW>.c; 791 let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in 792 defm "" : LMULSEWWriteResMXSEW<"WriteVFWCvtIToFV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; 793 } 794} 795foreach mx = SchedMxListFW in { 796 foreach sew = SchedSEWSet<mx, isF=1, isWidening=1>.val in { 797 defvar Cycles = SiFive7GetCyclesDefault<mx>.c; 798 defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListFW, isF=1>.c; 799 let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 800 defm "" : LMULSEWWriteResMXSEW<"WriteVFWALUV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; 801 defm "" : LMULSEWWriteResMXSEW<"WriteVFWALUF", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; 802 defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; 803 defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulF", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; 804 defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulAddV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; 805 defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulAddF", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; 806 defm "" : LMULSEWWriteResMXSEW<"WriteVFWCvtFToFV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; 807 } 808 } 809 defvar Cycles = SiFive7GetCyclesDefault<mx>.c; 810 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListFW>.c; 811 let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in 812 defm "" : LMULWriteResMX<"WriteVFWCvtFToIV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 813} 814// Narrowing 815foreach mx = SchedMxListW in { 816 defvar Cycles = SiFive7GetCyclesNarrowing<mx>.c; 817 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c; 818 let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 819 defm "" : LMULWriteResMX<"WriteVFNCvtFToIV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 820 } 821} 822foreach mx = SchedMxListFW in { 823 foreach sew = SchedSEWSet<mx, isF=1, isWidening=1>.val in { 824 defvar Cycles = SiFive7GetCyclesNarrowing<mx>.c; 825 defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListFW, isF=1>.c; 826 let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 827 defm "" : LMULSEWWriteResMXSEW<"WriteVFNCvtIToFV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; 828 defm "" : LMULSEWWriteResMXSEW<"WriteVFNCvtFToFV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; 829 } 830 } 831} 832 833// 14. Vector Reduction Operations 834foreach mx = SchedMxList in { 835 foreach sew = SchedSEWSet<mx>.val in { 836 defvar Cycles = SiFive7GetReductionCycles<mx, sew>.c; 837 defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxList>.c; 838 let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 839 defm "" : LMULSEWWriteResMXSEW<"WriteVIRedV_From", [SiFive7VCQ, SiFive7VA], 840 mx, sew, IsWorstCase>; 841 defm "" : LMULSEWWriteResMXSEW<"WriteVIRedMinMaxV_From", [SiFive7VCQ, SiFive7VA], 842 mx, sew, IsWorstCase>; 843 } 844 } 845} 846 847foreach mx = SchedMxListWRed in { 848 foreach sew = SchedSEWSet<mx, 0, 1>.val in { 849 defvar Cycles = SiFive7GetReductionCycles<mx, sew>.c; 850 defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListWRed>.c; 851 let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in 852 defm "" : LMULSEWWriteResMXSEW<"WriteVIWRedV_From", [SiFive7VCQ, SiFive7VA], 853 mx, sew, IsWorstCase>; 854 } 855} 856 857foreach mx = SchedMxListF in { 858 foreach sew = SchedSEWSet<mx, 1>.val in { 859 defvar RedCycles = SiFive7GetReductionCycles<mx, sew>.c; 860 defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListF, 1>.c; 861 let Latency = RedCycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, RedCycles)] in { 862 defm "" : LMULSEWWriteResMXSEW<"WriteVFRedV_From", [SiFive7VCQ, SiFive7VA], 863 mx, sew, IsWorstCase>; 864 defm "" : LMULSEWWriteResMXSEW<"WriteVFRedMinMaxV_From", [SiFive7VCQ, SiFive7VA], 865 mx, sew, IsWorstCase>; 866 } 867 defvar OrdRedCycles = SiFive7GetOrderedReductionCycles<mx, sew>.c; 868 let Latency = OrdRedCycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, OrdRedCycles)] in 869 defm "" : LMULSEWWriteResMXSEW<"WriteVFRedOV_From", [SiFive7VCQ, SiFive7VA], 870 mx, sew, IsWorstCase>; 871 } 872} 873 874foreach mx = SchedMxListFWRed in { 875 foreach sew = SchedSEWSet<mx, 1, 1>.val in { 876 defvar RedCycles = SiFive7GetReductionCycles<mx, sew>.c; 877 defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListFWRed, 1>.c; 878 let Latency = RedCycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, RedCycles)] in 879 defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedV_From", [SiFive7VCQ, SiFive7VA], 880 mx, sew, IsWorstCase>; 881 defvar OrdRedCycles = SiFive7GetOrderedReductionCycles<mx, sew>.c; 882 let Latency = OrdRedCycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, OrdRedCycles)] in 883 defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedOV_From", [SiFive7VCQ, SiFive7VA], 884 mx, sew, IsWorstCase>; 885 } 886} 887 888// 15. Vector Mask Instructions 889foreach mx = SchedMxList in { 890 defvar Cycles = SiFive7GetCyclesVMask<mx>.c; 891 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 892 let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 893 defm "" : LMULWriteResMX<"WriteVMALUV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 894 defm "" : LMULWriteResMX<"WriteVMPopV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 895 defm "" : LMULWriteResMX<"WriteVMFFSV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 896 defm "" : LMULWriteResMX<"WriteVMSFSV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 897 } 898} 899foreach mx = SchedMxList in { 900 defvar Cycles = SiFive7GetCyclesDefault<mx>.c; 901 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 902 let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 903 defm "" : LMULWriteResMX<"WriteVIotaV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 904 defm "" : LMULWriteResMX<"WriteVIdxV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 905 } 906} 907 908// 16. Vector Permutation Instructions 909let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 1)] in { 910 def : WriteRes<WriteVMovSX, [SiFive7VCQ, SiFive7VA]>; 911 def : WriteRes<WriteVMovXS, [SiFive7VCQ, SiFive7VA]>; 912 def : WriteRes<WriteVMovSF, [SiFive7VCQ, SiFive7VA]>; 913 def : WriteRes<WriteVMovFS, [SiFive7VCQ, SiFive7VA]>; 914} 915foreach mx = SchedMxList in { 916 defvar Cycles = SiFive7GetCyclesDefault<mx>.c; 917 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 918 let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 919 defm "" : LMULWriteResMX<"WriteVRGatherVX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 920 defm "" : LMULWriteResMX<"WriteVRGatherVI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 921 } 922} 923 924foreach mx = SchedMxList in { 925 foreach sew = SchedSEWSet<mx>.val in { 926 defvar Cycles = SiFive7GetCyclesOnePerElement<mx, sew>.c; 927 defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxList>.c; 928 let Latency = !add(Cycles, 3), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 929 defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherVV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; 930 defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherEI16VV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; 931 defm "" : LMULSEWWriteResMXSEW<"WriteVCompressV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; 932 } 933 } 934} 935 936foreach mx = SchedMxList in { 937 defvar Cycles = SiFive7GetCyclesDefault<mx>.c; 938 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 939 let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 940 defm "" : LMULWriteResMX<"WriteVSlideUpX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 941 defm "" : LMULWriteResMX<"WriteVSlideDownX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 942 defm "" : LMULWriteResMX<"WriteVSlideI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 943 defm "" : LMULWriteResMX<"WriteVISlide1X", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 944 defm "" : LMULWriteResMX<"WriteVFSlide1F", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 945 } 946} 947 948// VMov*V is LMUL Aware 949let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 2)] in 950 def : WriteRes<WriteVMov1V, [SiFive7VCQ, SiFive7VA]>; 951let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 4)] in 952 def : WriteRes<WriteVMov2V, [SiFive7VCQ, SiFive7VA]>; 953let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 8)] in 954 def : WriteRes<WriteVMov4V, [SiFive7VCQ, SiFive7VA]>; 955let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 16)] in 956 def : WriteRes<WriteVMov8V, [SiFive7VCQ, SiFive7VA]>; 957 958// Others 959def : WriteRes<WriteCSR, [SiFive7PipeB]>; 960def : WriteRes<WriteNop, []>; 961let Latency = 3 in 962 def : WriteRes<WriteRdVLENB, [SiFive7PipeB]>; 963 964def : InstRW<[WriteIALU], (instrs COPY)>; 965 966// VCIX 967// 968// In principle we don't know the latency of any VCIX instructions. But instead 969// of taking the default of 1, which can lead to issues [1], we assume that they 970// have a fairly high latency. 971// 972// [1] https://github.com/llvm/llvm-project/issues/83391 973foreach mx = SchedMxList in { 974 defvar Cycles = SiFive7GetCyclesDefault<mx>.c; 975 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 976 let Latency = !mul(Cycles, 10), 977 AcquireAtCycles = [0, 1], 978 ReleaseAtCycles = [1, !add(1, Cycles)] in { 979 defm "" : LMULWriteResMX<"WriteVC_V_I", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 980 defm "" : LMULWriteResMX<"WriteVC_V_X", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 981 defm "" : LMULWriteResMX<"WriteVC_V_IV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 982 defm "" : LMULWriteResMX<"WriteVC_V_VV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 983 defm "" : LMULWriteResMX<"WriteVC_V_XV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 984 defm "" : LMULWriteResMX<"WriteVC_V_IVV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 985 defm "" : LMULWriteResMX<"WriteVC_V_IVW", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 986 defm "" : LMULWriteResMX<"WriteVC_V_VVV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 987 defm "" : LMULWriteResMX<"WriteVC_V_VVW", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 988 defm "" : LMULWriteResMX<"WriteVC_V_XVV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 989 defm "" : LMULWriteResMX<"WriteVC_V_XVW", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 990 foreach f = ["FPR16", "FPR32", "FPR64"] in { 991 defm "" : LMULWriteResMX<"WriteVC_V_" # f # "V", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 992 defm "" : LMULWriteResMX<"WriteVC_V_" # f # "VV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 993 defm "" : LMULWriteResMX<"WriteVC_V_" # f # "VW", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 994 } 995 defm "" : LMULWriteResMX<"WriteVC_I", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 996 defm "" : LMULWriteResMX<"WriteVC_X", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 997 defm "" : LMULWriteResMX<"WriteVC_IV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 998 defm "" : LMULWriteResMX<"WriteVC_VV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 999 defm "" : LMULWriteResMX<"WriteVC_XV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 1000 defm "" : LMULWriteResMX<"WriteVC_IVV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 1001 defm "" : LMULWriteResMX<"WriteVC_IVW", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 1002 defm "" : LMULWriteResMX<"WriteVC_VVV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 1003 defm "" : LMULWriteResMX<"WriteVC_VVW", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 1004 defm "" : LMULWriteResMX<"WriteVC_XVV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 1005 defm "" : LMULWriteResMX<"WriteVC_XVW", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 1006 foreach f = ["FPR16", "FPR32", "FPR64"] in { 1007 defm "" : LMULWriteResMX<"WriteVC_" # f # "V", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 1008 defm "" : LMULWriteResMX<"WriteVC_" # f # "VV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 1009 defm "" : LMULWriteResMX<"WriteVC_" # f # "VW", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 1010 } 1011 } 1012} 1013 1014//===----------------------------------------------------------------------===// 1015 1016// Bypass and advance 1017def : SiFive7AnyToGPRBypass<ReadJmp>; 1018def : SiFive7AnyToGPRBypass<ReadJalr>; 1019def : ReadAdvance<ReadCSR, 0>; 1020def : SiFive7AnyToGPRBypass<ReadStoreData>; 1021def : ReadAdvance<ReadMemBase, 0>; 1022def : SiFive7AnyToGPRBypass<ReadIALU>; 1023def : SiFive7AnyToGPRBypass<ReadIALU32>; 1024def : SiFive7AnyToGPRBypass<ReadShiftImm>; 1025def : SiFive7AnyToGPRBypass<ReadShiftImm32>; 1026def : SiFive7AnyToGPRBypass<ReadShiftReg>; 1027def : SiFive7AnyToGPRBypass<ReadShiftReg32>; 1028def : ReadAdvance<ReadIDiv, 0>; 1029def : ReadAdvance<ReadIDiv32, 0>; 1030def : ReadAdvance<ReadIRem, 0>; 1031def : ReadAdvance<ReadIRem32, 0>; 1032def : ReadAdvance<ReadIMul, 0>; 1033def : ReadAdvance<ReadIMul32, 0>; 1034def : ReadAdvance<ReadAtomicWA, 0>; 1035def : ReadAdvance<ReadAtomicWD, 0>; 1036def : ReadAdvance<ReadAtomicDA, 0>; 1037def : ReadAdvance<ReadAtomicDD, 0>; 1038def : ReadAdvance<ReadAtomicLDW, 0>; 1039def : ReadAdvance<ReadAtomicLDD, 0>; 1040def : ReadAdvance<ReadAtomicSTW, 0>; 1041def : ReadAdvance<ReadAtomicSTD, 0>; 1042def : ReadAdvance<ReadFStoreData, 0>; 1043def : ReadAdvance<ReadFMemBase, 0>; 1044def : ReadAdvance<ReadFAdd16, 0>; 1045def : ReadAdvance<ReadFAdd32, 0>; 1046def : ReadAdvance<ReadFAdd64, 0>; 1047def : ReadAdvance<ReadFMul16, 0>; 1048def : ReadAdvance<ReadFMA16, 0>; 1049def : ReadAdvance<ReadFMA16Addend, 0>; 1050def : ReadAdvance<ReadFMul32, 0>; 1051def : ReadAdvance<ReadFMul64, 0>; 1052def : ReadAdvance<ReadFMA32, 0>; 1053def : ReadAdvance<ReadFMA32Addend, 0>; 1054def : ReadAdvance<ReadFMA64, 0>; 1055def : ReadAdvance<ReadFMA64Addend, 0>; 1056def : ReadAdvance<ReadFDiv16, 0>; 1057def : ReadAdvance<ReadFDiv32, 0>; 1058def : ReadAdvance<ReadFDiv64, 0>; 1059def : ReadAdvance<ReadFSqrt16, 0>; 1060def : ReadAdvance<ReadFSqrt32, 0>; 1061def : ReadAdvance<ReadFSqrt64, 0>; 1062def : ReadAdvance<ReadFCmp16, 0>; 1063def : ReadAdvance<ReadFCmp32, 0>; 1064def : ReadAdvance<ReadFCmp64, 0>; 1065def : ReadAdvance<ReadFSGNJ16, 0>; 1066def : ReadAdvance<ReadFSGNJ32, 0>; 1067def : ReadAdvance<ReadFSGNJ64, 0>; 1068def : ReadAdvance<ReadFMinMax16, 0>; 1069def : ReadAdvance<ReadFMinMax32, 0>; 1070def : ReadAdvance<ReadFMinMax64, 0>; 1071def : ReadAdvance<ReadFCvtF16ToI32, 0>; 1072def : ReadAdvance<ReadFCvtF16ToI64, 0>; 1073def : ReadAdvance<ReadFCvtF32ToI32, 0>; 1074def : ReadAdvance<ReadFCvtF32ToI64, 0>; 1075def : ReadAdvance<ReadFCvtF64ToI32, 0>; 1076def : ReadAdvance<ReadFCvtF64ToI64, 0>; 1077def : ReadAdvance<ReadFCvtI32ToF16, 0>; 1078def : ReadAdvance<ReadFCvtI32ToF32, 0>; 1079def : ReadAdvance<ReadFCvtI32ToF64, 0>; 1080def : ReadAdvance<ReadFCvtI64ToF16, 0>; 1081def : ReadAdvance<ReadFCvtI64ToF32, 0>; 1082def : ReadAdvance<ReadFCvtI64ToF64, 0>; 1083def : ReadAdvance<ReadFCvtF32ToF64, 0>; 1084def : ReadAdvance<ReadFCvtF64ToF32, 0>; 1085def : ReadAdvance<ReadFCvtF16ToF32, 0>; 1086def : ReadAdvance<ReadFCvtF32ToF16, 0>; 1087def : ReadAdvance<ReadFCvtF16ToF64, 0>; 1088def : ReadAdvance<ReadFCvtF64ToF16, 0>; 1089def : ReadAdvance<ReadFMovF16ToI16, 0>; 1090def : ReadAdvance<ReadFMovI16ToF16, 0>; 1091def : ReadAdvance<ReadFMovF32ToI32, 0>; 1092def : ReadAdvance<ReadFMovI32ToF32, 0>; 1093def : ReadAdvance<ReadFMovF64ToI64, 0>; 1094def : ReadAdvance<ReadFMovI64ToF64, 0>; 1095def : ReadAdvance<ReadFClass16, 0>; 1096def : ReadAdvance<ReadFClass32, 0>; 1097def : ReadAdvance<ReadFClass64, 0>; 1098 1099def : SiFive7AnyToGPRBypass<ReadSFBJmp, 0>; 1100def : SiFive7AnyToGPRBypass<ReadSFBALU, 0>; 1101 1102// Bitmanip 1103def : SiFive7AnyToGPRBypass<ReadRotateImm>; 1104def : SiFive7AnyToGPRBypass<ReadRotateImm32>; 1105def : SiFive7AnyToGPRBypass<ReadRotateReg>; 1106def : SiFive7AnyToGPRBypass<ReadRotateReg32>; 1107def : SiFive7AnyToGPRBypass<ReadCLZ>; 1108def : SiFive7AnyToGPRBypass<ReadCLZ32>; 1109def : SiFive7AnyToGPRBypass<ReadCTZ>; 1110def : SiFive7AnyToGPRBypass<ReadCTZ32>; 1111def : ReadAdvance<ReadCPOP, 0>; 1112def : ReadAdvance<ReadCPOP32, 0>; 1113def : SiFive7AnyToGPRBypass<ReadORCB>; 1114def : SiFive7AnyToGPRBypass<ReadIMinMax>; 1115def : SiFive7AnyToGPRBypass<ReadREV8>; 1116def : SiFive7AnyToGPRBypass<ReadSHXADD>; 1117def : SiFive7AnyToGPRBypass<ReadSHXADD32>; 1118// Single-bit instructions 1119def : SiFive7AnyToGPRBypass<ReadSingleBit>; 1120def : SiFive7AnyToGPRBypass<ReadSingleBitImm>; 1121 1122// 6. Configuration-Setting Instructions 1123def : ReadAdvance<ReadVSETVLI, 2>; 1124def : ReadAdvance<ReadVSETVL, 2>; 1125 1126// 7. Vector Loads and Stores 1127def : ReadAdvance<ReadVLDX, 0>; 1128def : ReadAdvance<ReadVSTX, 0>; 1129defm "" : LMULReadAdvance<"ReadVSTEV", 0>; 1130defm "" : LMULReadAdvance<"ReadVSTM", 0>; 1131def : ReadAdvance<ReadVLDSX, 0>; 1132def : ReadAdvance<ReadVSTSX, 0>; 1133defm "" : LMULReadAdvance<"ReadVSTS8V", 0>; 1134defm "" : LMULReadAdvance<"ReadVSTS16V", 0>; 1135defm "" : LMULReadAdvance<"ReadVSTS32V", 0>; 1136defm "" : LMULReadAdvance<"ReadVSTS64V", 0>; 1137defm "" : LMULReadAdvance<"ReadVLDUXV", 0>; 1138defm "" : LMULReadAdvance<"ReadVLDOXV", 0>; 1139defm "" : LMULReadAdvance<"ReadVSTUX8", 0>; 1140defm "" : LMULReadAdvance<"ReadVSTUX16", 0>; 1141defm "" : LMULReadAdvance<"ReadVSTUX32", 0>; 1142defm "" : LMULReadAdvance<"ReadVSTUX64", 0>; 1143defm "" : LMULReadAdvance<"ReadVSTUXV", 0>; 1144defm "" : LMULReadAdvance<"ReadVSTUX8V", 0>; 1145defm "" : LMULReadAdvance<"ReadVSTUX16V", 0>; 1146defm "" : LMULReadAdvance<"ReadVSTUX32V", 0>; 1147defm "" : LMULReadAdvance<"ReadVSTUX64V", 0>; 1148defm "" : LMULReadAdvance<"ReadVSTOX8", 0>; 1149defm "" : LMULReadAdvance<"ReadVSTOX16", 0>; 1150defm "" : LMULReadAdvance<"ReadVSTOX32", 0>; 1151defm "" : LMULReadAdvance<"ReadVSTOX64", 0>; 1152defm "" : LMULReadAdvance<"ReadVSTOXV", 0>; 1153defm "" : LMULReadAdvance<"ReadVSTOX8V", 0>; 1154defm "" : LMULReadAdvance<"ReadVSTOX16V", 0>; 1155defm "" : LMULReadAdvance<"ReadVSTOX32V", 0>; 1156defm "" : LMULReadAdvance<"ReadVSTOX64V", 0>; 1157// LMUL Aware 1158def : ReadAdvance<ReadVST1R, 0>; 1159def : ReadAdvance<ReadVST2R, 0>; 1160def : ReadAdvance<ReadVST4R, 0>; 1161def : ReadAdvance<ReadVST8R, 0>; 1162 1163// 12. Vector Integer Arithmetic Instructions 1164defm : LMULReadAdvance<"ReadVIALUV", 0>; 1165defm : LMULReadAdvance<"ReadVIALUX", 0>; 1166defm : LMULReadAdvanceW<"ReadVIWALUV", 0>; 1167defm : LMULReadAdvanceW<"ReadVIWALUX", 0>; 1168defm : LMULReadAdvance<"ReadVExtV", 0>; 1169defm : LMULReadAdvance<"ReadVICALUV", 0>; 1170defm : LMULReadAdvance<"ReadVICALUX", 0>; 1171defm : LMULReadAdvance<"ReadVShiftV", 0>; 1172defm : LMULReadAdvance<"ReadVShiftX", 0>; 1173defm : LMULReadAdvanceW<"ReadVNShiftV", 0>; 1174defm : LMULReadAdvanceW<"ReadVNShiftX", 0>; 1175defm : LMULReadAdvance<"ReadVICmpV", 0>; 1176defm : LMULReadAdvance<"ReadVICmpX", 0>; 1177defm : LMULReadAdvance<"ReadVIMinMaxV", 0>; 1178defm : LMULReadAdvance<"ReadVIMinMaxX", 0>; 1179defm : LMULReadAdvance<"ReadVIMulV", 0>; 1180defm : LMULReadAdvance<"ReadVIMulX", 0>; 1181defm : LMULSEWReadAdvance<"ReadVIDivV", 0>; 1182defm : LMULSEWReadAdvance<"ReadVIDivX", 0>; 1183defm : LMULReadAdvanceW<"ReadVIWMulV", 0>; 1184defm : LMULReadAdvanceW<"ReadVIWMulX", 0>; 1185defm : LMULReadAdvance<"ReadVIMulAddV", 0>; 1186defm : LMULReadAdvance<"ReadVIMulAddX", 0>; 1187defm : LMULReadAdvanceW<"ReadVIWMulAddV", 0>; 1188defm : LMULReadAdvanceW<"ReadVIWMulAddX", 0>; 1189defm : LMULReadAdvance<"ReadVIMergeV", 0>; 1190defm : LMULReadAdvance<"ReadVIMergeX", 0>; 1191defm : LMULReadAdvance<"ReadVIMovV", 0>; 1192defm : LMULReadAdvance<"ReadVIMovX", 0>; 1193 1194// 13. Vector Fixed-Point Arithmetic Instructions 1195defm "" : LMULReadAdvance<"ReadVSALUV", 0>; 1196defm "" : LMULReadAdvance<"ReadVSALUX", 0>; 1197defm "" : LMULReadAdvance<"ReadVAALUV", 0>; 1198defm "" : LMULReadAdvance<"ReadVAALUX", 0>; 1199defm "" : LMULReadAdvance<"ReadVSMulV", 0>; 1200defm "" : LMULReadAdvance<"ReadVSMulX", 0>; 1201defm "" : LMULReadAdvance<"ReadVSShiftV", 0>; 1202defm "" : LMULReadAdvance<"ReadVSShiftX", 0>; 1203defm "" : LMULReadAdvanceW<"ReadVNClipV", 0>; 1204defm "" : LMULReadAdvanceW<"ReadVNClipX", 0>; 1205 1206// 14. Vector Floating-Point Instructions 1207defm "" : LMULSEWReadAdvanceF<"ReadVFALUV", 0>; 1208defm "" : LMULSEWReadAdvanceF<"ReadVFALUF", 0>; 1209defm "" : LMULSEWReadAdvanceFW<"ReadVFWALUV", 0>; 1210defm "" : LMULSEWReadAdvanceFW<"ReadVFWALUF", 0>; 1211defm "" : LMULSEWReadAdvanceF<"ReadVFMulV", 0>; 1212defm "" : LMULSEWReadAdvanceF<"ReadVFMulF", 0>; 1213defm "" : LMULSEWReadAdvanceF<"ReadVFDivV", 0>; 1214defm "" : LMULSEWReadAdvanceF<"ReadVFDivF", 0>; 1215defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulV", 0>; 1216defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulF", 0>; 1217defm "" : LMULSEWReadAdvanceF<"ReadVFMulAddV", 0>; 1218defm "" : LMULSEWReadAdvanceF<"ReadVFMulAddF", 0>; 1219defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulAddV", 0>; 1220defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulAddF", 0>; 1221defm "" : LMULSEWReadAdvanceF<"ReadVFSqrtV", 0>; 1222defm "" : LMULSEWReadAdvanceF<"ReadVFRecpV", 0>; 1223defm "" : LMULSEWReadAdvanceF<"ReadVFMinMaxV", 0>; 1224defm "" : LMULSEWReadAdvanceF<"ReadVFMinMaxF", 0>; 1225defm "" : LMULSEWReadAdvanceF<"ReadVFSgnjV", 0>; 1226defm "" : LMULSEWReadAdvanceF<"ReadVFSgnjF", 0>; 1227defm "" : LMULReadAdvance<"ReadVFCmpV", 0>; 1228defm "" : LMULReadAdvance<"ReadVFCmpF", 0>; 1229defm "" : LMULReadAdvance<"ReadVFClassV", 0>; 1230defm "" : LMULReadAdvance<"ReadVFMergeV", 0>; 1231defm "" : LMULReadAdvance<"ReadVFMergeF", 0>; 1232defm "" : LMULReadAdvance<"ReadVFMovF", 0>; 1233defm "" : LMULSEWReadAdvanceF<"ReadVFCvtIToFV", 0>; 1234defm "" : LMULReadAdvance<"ReadVFCvtFToIV", 0>; 1235defm "" : LMULSEWReadAdvanceW<"ReadVFWCvtIToFV", 0>; 1236defm "" : LMULReadAdvanceFW<"ReadVFWCvtFToIV", 0>; 1237defm "" : LMULSEWReadAdvanceFW<"ReadVFWCvtFToFV", 0>; 1238defm "" : LMULSEWReadAdvanceFW<"ReadVFNCvtIToFV", 0>; 1239defm "" : LMULReadAdvanceW<"ReadVFNCvtFToIV", 0>; 1240defm "" : LMULSEWReadAdvanceFW<"ReadVFNCvtFToFV", 0>; 1241 1242// 15. Vector Reduction Operations 1243def : ReadAdvance<ReadVIRedV, 0>; 1244def : ReadAdvance<ReadVIRedV0, 0>; 1245def : ReadAdvance<ReadVIWRedV, 0>; 1246def : ReadAdvance<ReadVIWRedV0, 0>; 1247def : ReadAdvance<ReadVFRedV, 0>; 1248def : ReadAdvance<ReadVFRedV0, 0>; 1249def : ReadAdvance<ReadVFRedOV, 0>; 1250def : ReadAdvance<ReadVFRedOV0, 0>; 1251def : ReadAdvance<ReadVFWRedV, 0>; 1252def : ReadAdvance<ReadVFWRedV0, 0>; 1253def : ReadAdvance<ReadVFWRedOV, 0>; 1254def : ReadAdvance<ReadVFWRedOV0, 0>; 1255 1256// 16. Vector Mask Instructions 1257defm "" : LMULReadAdvance<"ReadVMALUV", 0>; 1258defm "" : LMULReadAdvance<"ReadVMPopV", 0>; 1259defm "" : LMULReadAdvance<"ReadVMFFSV", 0>; 1260defm "" : LMULReadAdvance<"ReadVMSFSV", 0>; 1261defm "" : LMULReadAdvance<"ReadVIotaV", 0>; 1262 1263// 17. Vector Permutation Instructions 1264def : ReadAdvance<ReadVMovXS, 0>; 1265def : ReadAdvance<ReadVMovSX_V, 0>; 1266def : ReadAdvance<ReadVMovSX_X, 0>; 1267def : ReadAdvance<ReadVMovFS, 0>; 1268def : ReadAdvance<ReadVMovSF_V, 0>; 1269def : ReadAdvance<ReadVMovSF_F, 0>; 1270defm "" : LMULReadAdvance<"ReadVISlideV", 0>; 1271defm "" : LMULReadAdvance<"ReadVISlideX", 0>; 1272defm "" : LMULReadAdvance<"ReadVFSlideV", 0>; 1273defm "" : LMULReadAdvance<"ReadVFSlideF", 0>; 1274defm "" : LMULSEWReadAdvance<"ReadVRGatherVV_data", 0>; 1275defm "" : LMULSEWReadAdvance<"ReadVRGatherVV_index", 0>; 1276defm "" : LMULSEWReadAdvance<"ReadVRGatherEI16VV_data", 0>; 1277defm "" : LMULSEWReadAdvance<"ReadVRGatherEI16VV_index", 0>; 1278defm "" : LMULReadAdvance<"ReadVRGatherVX_data", 0>; 1279defm "" : LMULReadAdvance<"ReadVRGatherVX_index", 0>; 1280defm "" : LMULReadAdvance<"ReadVRGatherVI_data", 0>; 1281defm "" : LMULSEWReadAdvance<"ReadVCompressV", 0>; 1282// LMUL Aware 1283def : ReadAdvance<ReadVMov1V, 0>; 1284def : ReadAdvance<ReadVMov2V, 0>; 1285def : ReadAdvance<ReadVMov4V, 0>; 1286def : ReadAdvance<ReadVMov8V, 0>; 1287 1288// Others 1289def : ReadAdvance<ReadVMask, 0>; 1290def : ReadAdvance<ReadVMergeOp_WorstCase, 0>; 1291foreach mx = SchedMxList in { 1292 def : ReadAdvance<!cast<SchedRead>("ReadVMergeOp_" # mx), 0>; 1293 foreach sew = SchedSEWSet<mx>.val in 1294 def : ReadAdvance<!cast<SchedRead>("ReadVMergeOp_" # mx # "_E" # sew), 0>; 1295} 1296 1297//===----------------------------------------------------------------------===// 1298// Unsupported extensions 1299defm : UnsupportedSchedZabha; 1300defm : UnsupportedSchedZbc; 1301defm : UnsupportedSchedZbkb; 1302defm : UnsupportedSchedZbkx; 1303defm : UnsupportedSchedZfa; 1304defm : UnsupportedSchedZvk; 1305} 1306