1//==- RISCVSchedSiFive7.td - SiFive7 Scheduling Definitions --*- tablegen -*-=// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9//===----------------------------------------------------------------------===// 10 11/// c is true if mx has the worst case behavior compared to LMULs in MxList. 12/// On the SiFive7, the worst case LMUL is the Largest LMUL 13/// and the worst case sew is the smallest SEW for that LMUL. 14class SiFive7IsWorstCaseMX<string mx, list<string> MxList> { 15 defvar LLMUL = LargestLMUL<MxList>.r; 16 bit c = !eq(mx, LLMUL); 17} 18 19/// c is true if mx and sew have the worst case behavior compared to LMULs in 20/// MxList. On the SiFive7, the worst case LMUL is the Largest LMUL 21/// and the worst case sew is the smallest SEW for that LMUL. 22class SiFive7IsWorstCaseMXSEW<string mx, int sew, list<string> MxList, 23 bit isF = 0> { 24 defvar LLMUL = LargestLMUL<MxList>.r; 25 defvar SSEW = SmallestSEW<mx, isF>.r; 26 bit c = !and(!eq(mx, LLMUL), !eq(sew, SSEW)); 27} 28 29/// Number of DLEN parts = (LMUL * VLEN) / DLEN. 30/// Since DLEN = VLEN / 2, Num DLEN parts = 2 * LMUL. 31class SiFive7GetCyclesDefault<string mx> { 32 int c = !cond( 33 !eq(mx, "M1") : 2, 34 !eq(mx, "M2") : 4, 35 !eq(mx, "M4") : 8, 36 !eq(mx, "M8") : 16, 37 !eq(mx, "MF2") : 1, 38 !eq(mx, "MF4") : 1, 39 !eq(mx, "MF8") : 1 40 ); 41} 42 43class SiFive7GetCyclesNarrowing<string mx> { 44 int c = !cond( 45 !eq(mx, "M1") : 4, 46 !eq(mx, "M2") : 8, 47 !eq(mx, "M4") : 16, 48 !eq(mx, "MF2") : 2, 49 !eq(mx, "MF4") : 1, 50 !eq(mx, "MF8") : 1 51 ); 52} 53 54class SiFive7GetCyclesVMask<string mx> { 55 int c = !cond( 56 !eq(mx, "M1") : 1, 57 !eq(mx, "M2") : 1, 58 !eq(mx, "M4") : 1, 59 !eq(mx, "M8") : 2, 60 !eq(mx, "MF2") : 1, 61 !eq(mx, "MF4") : 1, 62 !eq(mx, "MF8") : 1 63 ); 64} 65 66/// VLDM and VSTM can't read/write more than 2 DLENs of data. 67/// 2 DLENs when LMUL=8. 1 DLEN for all other DLENs 68class SiFive7GetMaskLoadStoreCycles<string mx> { 69 int c = !cond( 70 !eq(mx, "M8") : 2, 71 true : 1 72 ); 73} 74 75// Cycles for nf=2 segmented loads and stores are calculated using the 76// formula (2 * VLEN * LMUL) / DLEN = 4 * LMUL 77class SiFive7GetCyclesSegmentedSeg2<string mx> { 78 int c = !cond( 79 !eq(mx, "M1") : 4, 80 !eq(mx, "M2") : 8, 81 !eq(mx, "M4") : 16, 82 !eq(mx, "M8") : 32, 83 !eq(mx, "MF2") : 2, 84 !eq(mx, "MF4") : 1, 85 !eq(mx, "MF8") : 1 86 ); 87} 88 89// Cycles for segmented loads and stores are calculated using the 90// formula vl * ceil((SEW * nf) / DLEN), where SEW * nf is the segment size. 91class SiFive7GetCyclesSegmented<string mx, int sew, int nf> { 92 defvar VLEN = 512; 93 defvar DLEN = 256; 94 // (VLEN * LMUL) / SEW 95 defvar VLUpperBound = !cond( 96 !eq(mx, "M1") : !div(VLEN, sew), 97 !eq(mx, "M2") : !div(!mul(VLEN, 2), sew), 98 !eq(mx, "M4") : !div(!mul(VLEN, 4), sew), 99 !eq(mx, "M8") : !div(!mul(VLEN, 8), sew), 100 !eq(mx, "MF2") : !div(!div(VLEN, 2), sew), 101 !eq(mx, "MF4") : !div(!div(VLEN, 4), sew), 102 !eq(mx, "MF8") : !div(!div(VLEN, 8), sew), 103 ); 104 // We can calculate ceil(a/b) using (a + b - 1) / b. 105 defvar a = !mul(sew, nf); 106 defvar b = DLEN; 107 int c = !mul(VLUpperBound, !div(!sub(!add(a, b), 1), b)); 108} 109 110class SiFive7GetCyclesOnePerElement<string mx, int sew> { 111 // FIXME: On SiFive7, VLEN is 512. Although a user can request the compiler 112 // to use a different VLEN, this model will not make scheduling decisions 113 // based on the user specified VLEN. 114 // c = ceil(VLEN / SEW) * LMUL 115 // Note: c >= 1 since the smallest VLEN is 512 / 8 = 8, and the 116 // largest division performed on VLEN is in MF8 case with division 117 // by 8. Therefore, there is no need to ceil the result. 118 int VLEN = !div(512, sew); 119 int c = !cond( 120 !eq(mx, "M1") : VLEN, 121 !eq(mx, "M2") : !mul(VLEN, 2), 122 !eq(mx, "M4") : !mul(VLEN, 4), 123 !eq(mx, "M8") : !mul(VLEN, 8), 124 !eq(mx, "MF2") : !div(VLEN, 2), 125 !eq(mx, "MF4") : !div(VLEN, 4), 126 !eq(mx, "MF8") : !div(VLEN, 8) 127 ); 128} 129 130class SiFive7GetDivOrSqrtFactor<int sew> { 131 int c = !cond( 132 // TODO: Add SchedSEWSetFP upstream and remove the SEW=8 case. 133 !eq(sew, 8) : 15, 134 !eq(sew, 16) : 15, 135 !eq(sew, 32) : 28, 136 !eq(sew, 64) : 57 137 ); 138} 139 140/// Cycles for reductions take approximately VL*SEW/DLEN + 5(4 + log(DLEN/SEW)) 141/// cycles. 142class SiFive7GetReductionCycles<string mx, int sew> { 143 // VLUpperBound*SEW/DLEN is equivalent to 2*LMUL since 144 // VLUpperBound=(VLEN*LMUL)/SEW. 145 defvar VLEN = 512; 146 defvar DLEN = !div(VLEN, 2); 147 defvar TwoTimesLMUL = !cond( 148 !eq(mx, "M1") : 2, 149 !eq(mx, "M2") : 4, 150 !eq(mx, "M4") : 8, 151 !eq(mx, "M8") : 16, 152 !eq(mx, "MF2") : 1, 153 !eq(mx, "MF4") : 1, 154 !eq(mx, "MF8") : 1 155 ); 156 int c = !add( 157 !div(TwoTimesLMUL, DLEN), 158 !mul(5, !add(4, !logtwo(!div(DLEN, sew)))) 159 ); 160} 161 162/// Cycles for ordered reductions take approximatley 5*VL cycles 163class SiFive7GetOrderedReductionCycles<string mx, int sew> { 164 defvar VLEN = 512; 165 // (VLEN * LMUL) / SEW 166 defvar VLUpperBound = !cond( 167 !eq(mx, "M1") : !div(VLEN, sew), 168 !eq(mx, "M2") : !div(!mul(VLEN, 2), sew), 169 !eq(mx, "M4") : !div(!mul(VLEN, 4), sew), 170 !eq(mx, "M8") : !div(!mul(VLEN, 8), sew), 171 !eq(mx, "MF2") : !div(!div(VLEN, 2), sew), 172 !eq(mx, "MF4") : !div(!div(VLEN, 4), sew), 173 !eq(mx, "MF8") : !div(!div(VLEN, 8), sew), 174 ); 175 int c = !mul(5, VLUpperBound); 176} 177 178class SiFive7AnyToGPRBypass<SchedRead read, int cycles = 2> 179 : ReadAdvance<read, cycles, [WriteIALU, WriteIALU32, 180 WriteShiftImm, WriteShiftImm32, 181 WriteShiftReg, WriteShiftReg32, 182 WriteSHXADD, WriteSHXADD32, 183 WriteRotateImm, WriteRotateImm32, 184 WriteRotateReg, WriteRotateReg32, 185 WriteCLZ, WriteCLZ32, WriteCTZ, WriteCTZ32, 186 WriteCPOP, WriteCPOP32, 187 WriteREV8, WriteORCB, WriteSFB, 188 WriteIMul, WriteIMul32, 189 WriteIDiv, WriteIDiv32, 190 WriteLDB, WriteLDH, WriteLDW, WriteLDD]>; 191 192// SiFive7 machine model for scheduling and other instruction cost heuristics. 193def SiFive7Model : SchedMachineModel { 194 let MicroOpBufferSize = 0; // Explicitly set to zero since SiFive7 is in-order. 195 let IssueWidth = 2; // 2 micro-ops are dispatched per cycle. 196 let LoadLatency = 3; 197 let MispredictPenalty = 3; 198 let CompleteModel = 0; 199 let UnsupportedFeatures = [HasStdExtZbkb, HasStdExtZbkc, HasStdExtZbkx, 200 HasStdExtZcmt, HasStdExtZknd, HasStdExtZkne, 201 HasStdExtZknh, HasStdExtZksed, HasStdExtZksh, 202 HasStdExtZkr]; 203} 204 205// The SiFive7 microarchitecture has three pipelines: A, B, V. 206// Pipe A can handle memory, integer alu and vector operations. 207// Pipe B can handle integer alu, control flow, integer multiply and divide, 208// and floating point computation. 209// Pipe V can handle the V extension. 210let SchedModel = SiFive7Model in { 211let BufferSize = 0 in { 212def SiFive7PipeA : ProcResource<1>; 213def SiFive7PipeB : ProcResource<1>; 214def SiFive7PipeV : ProcResource<1>; 215} 216 217let BufferSize = 1 in { 218def SiFive7IDiv : ProcResource<1> { let Super = SiFive7PipeB; } // Int Division 219def SiFive7FDiv : ProcResource<1> { let Super = SiFive7PipeB; } // FP Division/Sqrt 220def SiFive7VA : ProcResource<1> { let Super = SiFive7PipeV; } // Arithmetic sequencer 221def SiFive7VL : ProcResource<1> { let Super = SiFive7PipeV; } // Load sequencer 222def SiFive7VS : ProcResource<1> { let Super = SiFive7PipeV; } // Store sequencer 223} 224 225def SiFive7PipeAB : ProcResGroup<[SiFive7PipeA, SiFive7PipeB]>; 226 227// Branching 228let Latency = 3 in { 229def : WriteRes<WriteJmp, [SiFive7PipeB]>; 230def : WriteRes<WriteJal, [SiFive7PipeB]>; 231def : WriteRes<WriteJalr, [SiFive7PipeB]>; 232} 233 234//Short forward branch 235def : WriteRes<WriteSFB, [SiFive7PipeA, SiFive7PipeB]> { 236 let Latency = 3; 237 let NumMicroOps = 2; 238} 239 240// Integer arithmetic and logic 241let Latency = 3 in { 242def : WriteRes<WriteIALU, [SiFive7PipeAB]>; 243def : WriteRes<WriteIALU32, [SiFive7PipeAB]>; 244def : WriteRes<WriteShiftImm, [SiFive7PipeAB]>; 245def : WriteRes<WriteShiftImm32, [SiFive7PipeAB]>; 246def : WriteRes<WriteShiftReg, [SiFive7PipeAB]>; 247def : WriteRes<WriteShiftReg32, [SiFive7PipeAB]>; 248} 249 250// Integer multiplication 251let Latency = 3 in { 252def : WriteRes<WriteIMul, [SiFive7PipeB]>; 253def : WriteRes<WriteIMul32, [SiFive7PipeB]>; 254} 255 256// Integer division 257def : WriteRes<WriteIDiv, [SiFive7PipeB, SiFive7IDiv]> { 258 let Latency = 66; 259 let ResourceCycles = [1, 65]; 260} 261def : WriteRes<WriteIDiv32, [SiFive7PipeB, SiFive7IDiv]> { 262 let Latency = 34; 263 let ResourceCycles = [1, 33]; 264} 265 266// Bitmanip 267let Latency = 3 in { 268// Rotates are in the late-B ALU. 269def : WriteRes<WriteRotateImm, [SiFive7PipeB]>; 270def : WriteRes<WriteRotateImm32, [SiFive7PipeB]>; 271def : WriteRes<WriteRotateReg, [SiFive7PipeB]>; 272def : WriteRes<WriteRotateReg32, [SiFive7PipeB]>; 273 274// clz[w]/ctz[w] are in the late-B ALU. 275def : WriteRes<WriteCLZ, [SiFive7PipeB]>; 276def : WriteRes<WriteCLZ32, [SiFive7PipeB]>; 277def : WriteRes<WriteCTZ, [SiFive7PipeB]>; 278def : WriteRes<WriteCTZ32, [SiFive7PipeB]>; 279 280// cpop[w] look exactly like multiply. 281def : WriteRes<WriteCPOP, [SiFive7PipeB]>; 282def : WriteRes<WriteCPOP32, [SiFive7PipeB]>; 283 284// orc.b is in the late-B ALU. 285def : WriteRes<WriteORCB, [SiFive7PipeB]>; 286 287// rev8 is in the late-A and late-B ALUs. 288def : WriteRes<WriteREV8, [SiFive7PipeAB]>; 289 290// shNadd[.uw] is on the early-B and late-B ALUs. 291def : WriteRes<WriteSHXADD, [SiFive7PipeB]>; 292def : WriteRes<WriteSHXADD32, [SiFive7PipeB]>; 293} 294 295// Memory 296def : WriteRes<WriteSTB, [SiFive7PipeA]>; 297def : WriteRes<WriteSTH, [SiFive7PipeA]>; 298def : WriteRes<WriteSTW, [SiFive7PipeA]>; 299def : WriteRes<WriteSTD, [SiFive7PipeA]>; 300def : WriteRes<WriteFST16, [SiFive7PipeA]>; 301def : WriteRes<WriteFST32, [SiFive7PipeA]>; 302def : WriteRes<WriteFST64, [SiFive7PipeA]>; 303 304let Latency = 3 in { 305def : WriteRes<WriteLDB, [SiFive7PipeA]>; 306def : WriteRes<WriteLDH, [SiFive7PipeA]>; 307def : WriteRes<WriteLDW, [SiFive7PipeA]>; 308def : WriteRes<WriteLDD, [SiFive7PipeA]>; 309} 310 311let Latency = 2 in { 312def : WriteRes<WriteFLD16, [SiFive7PipeA]>; 313def : WriteRes<WriteFLD32, [SiFive7PipeA]>; 314def : WriteRes<WriteFLD64, [SiFive7PipeA]>; 315} 316 317// Atomic memory 318def : WriteRes<WriteAtomicSTW, [SiFive7PipeA]>; 319def : WriteRes<WriteAtomicSTD, [SiFive7PipeA]>; 320 321let Latency = 3 in { 322def : WriteRes<WriteAtomicW, [SiFive7PipeA]>; 323def : WriteRes<WriteAtomicD, [SiFive7PipeA]>; 324def : WriteRes<WriteAtomicLDW, [SiFive7PipeA]>; 325def : WriteRes<WriteAtomicLDD, [SiFive7PipeA]>; 326} 327 328// Half precision. 329let Latency = 5 in { 330def : WriteRes<WriteFAdd16, [SiFive7PipeB]>; 331def : WriteRes<WriteFMul16, [SiFive7PipeB]>; 332def : WriteRes<WriteFMA16, [SiFive7PipeB]>; 333} 334let Latency = 3 in { 335def : WriteRes<WriteFSGNJ16, [SiFive7PipeB]>; 336def : WriteRes<WriteFMinMax16, [SiFive7PipeB]>; 337} 338 339let Latency = 14, ResourceCycles = [1, 13] in { 340def : WriteRes<WriteFDiv16, [SiFive7PipeB, SiFive7FDiv]>; 341def : WriteRes<WriteFSqrt16, [SiFive7PipeB, SiFive7FDiv]>; 342} 343 344// Single precision. 345let Latency = 5 in { 346def : WriteRes<WriteFAdd32, [SiFive7PipeB]>; 347def : WriteRes<WriteFMul32, [SiFive7PipeB]>; 348def : WriteRes<WriteFMA32, [SiFive7PipeB]>; 349} 350let Latency = 3 in { 351def : WriteRes<WriteFSGNJ32, [SiFive7PipeB]>; 352def : WriteRes<WriteFMinMax32, [SiFive7PipeB]>; 353} 354 355def : WriteRes<WriteFDiv32, [SiFive7PipeB, SiFive7FDiv]> { let Latency = 27; 356 let ResourceCycles = [1, 26]; } 357def : WriteRes<WriteFSqrt32, [SiFive7PipeB, SiFive7FDiv]> { let Latency = 27; 358 let ResourceCycles = [1, 26]; } 359 360// Double precision 361let Latency = 7 in { 362def : WriteRes<WriteFAdd64, [SiFive7PipeB]>; 363def : WriteRes<WriteFMul64, [SiFive7PipeB]>; 364def : WriteRes<WriteFMA64, [SiFive7PipeB]>; 365} 366let Latency = 3 in { 367def : WriteRes<WriteFSGNJ64, [SiFive7PipeB]>; 368def : WriteRes<WriteFMinMax64, [SiFive7PipeB]>; 369} 370 371def : WriteRes<WriteFDiv64, [SiFive7PipeB, SiFive7FDiv]> { let Latency = 56; 372 let ResourceCycles = [1, 55]; } 373def : WriteRes<WriteFSqrt64, [SiFive7PipeB, SiFive7FDiv]> { let Latency = 56; 374 let ResourceCycles = [1, 55]; } 375 376// Conversions 377let Latency = 3 in { 378def : WriteRes<WriteFCvtI32ToF16, [SiFive7PipeB]>; 379def : WriteRes<WriteFCvtI32ToF32, [SiFive7PipeB]>; 380def : WriteRes<WriteFCvtI32ToF64, [SiFive7PipeB]>; 381def : WriteRes<WriteFCvtI64ToF16, [SiFive7PipeB]>; 382def : WriteRes<WriteFCvtI64ToF32, [SiFive7PipeB]>; 383def : WriteRes<WriteFCvtI64ToF64, [SiFive7PipeB]>; 384def : WriteRes<WriteFCvtF16ToI32, [SiFive7PipeB]>; 385def : WriteRes<WriteFCvtF16ToI64, [SiFive7PipeB]>; 386def : WriteRes<WriteFCvtF16ToF32, [SiFive7PipeB]>; 387def : WriteRes<WriteFCvtF16ToF64, [SiFive7PipeB]>; 388def : WriteRes<WriteFCvtF32ToI32, [SiFive7PipeB]>; 389def : WriteRes<WriteFCvtF32ToI64, [SiFive7PipeB]>; 390def : WriteRes<WriteFCvtF32ToF16, [SiFive7PipeB]>; 391def : WriteRes<WriteFCvtF32ToF64, [SiFive7PipeB]>; 392def : WriteRes<WriteFCvtF64ToI32, [SiFive7PipeB]>; 393def : WriteRes<WriteFCvtF64ToI64, [SiFive7PipeB]>; 394def : WriteRes<WriteFCvtF64ToF16, [SiFive7PipeB]>; 395def : WriteRes<WriteFCvtF64ToF32, [SiFive7PipeB]>; 396 397def : WriteRes<WriteFClass16, [SiFive7PipeB]>; 398def : WriteRes<WriteFClass32, [SiFive7PipeB]>; 399def : WriteRes<WriteFClass64, [SiFive7PipeB]>; 400def : WriteRes<WriteFCmp16, [SiFive7PipeB]>; 401def : WriteRes<WriteFCmp32, [SiFive7PipeB]>; 402def : WriteRes<WriteFCmp64, [SiFive7PipeB]>; 403def : WriteRes<WriteFMovI16ToF16, [SiFive7PipeB]>; 404def : WriteRes<WriteFMovF16ToI16, [SiFive7PipeB]>; 405def : WriteRes<WriteFMovI32ToF32, [SiFive7PipeB]>; 406def : WriteRes<WriteFMovF32ToI32, [SiFive7PipeB]>; 407def : WriteRes<WriteFMovI64ToF64, [SiFive7PipeB]>; 408def : WriteRes<WriteFMovF64ToI64, [SiFive7PipeB]>; 409} 410 411// 6. Configuration-Setting Instructions 412let Latency = 3 in { 413def : WriteRes<WriteVSETVLI, [SiFive7PipeA]>; 414def : WriteRes<WriteVSETIVLI, [SiFive7PipeA]>; 415def : WriteRes<WriteVSETVL, [SiFive7PipeA]>; 416} 417 418// 7. Vector Loads and Stores 419// Unit-stride loads and stores can operate at the full bandwidth of the memory 420// pipe. The memory pipe is DLEN bits wide on x280. 421foreach mx = SchedMxList in { 422 defvar Cycles = SiFive7GetCyclesDefault<mx>.c; 423 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 424 let Latency = 4, ResourceCycles = [Cycles] in { 425 defm "" : LMULWriteResMX<"WriteVLDE", [SiFive7VL], mx, IsWorstCase>; 426 defm "" : LMULWriteResMX<"WriteVLDFF", [SiFive7VL], mx, IsWorstCase>; 427 } 428 let Latency = 1, ResourceCycles = [Cycles] in 429 defm "" : LMULWriteResMX<"WriteVSTE", [SiFive7VS], mx, IsWorstCase>; 430} 431 432foreach mx = SchedMxList in { 433 defvar Cycles = SiFive7GetMaskLoadStoreCycles<mx>.c; 434 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 435 let Latency = 4, ResourceCycles = [Cycles] in 436 defm "" : LMULWriteResMX<"WriteVLDM", [SiFive7VL], mx, IsWorstCase>; 437 let Latency = 1, ResourceCycles = [Cycles] in 438 defm "" : LMULWriteResMX<"WriteVSTM", [SiFive7VS], mx, IsWorstCase>; 439} 440 441// Strided loads and stores operate at one element per cycle and should be 442// scheduled accordingly. Indexed loads and stores operate at one element per 443// cycle, and they stall the machine until all addresses have been generated, 444// so they cannot be scheduled. Indexed and strided loads and stores have LMUL 445// specific suffixes, but since SEW is already encoded in the name of the 446// resource, we do not need to use LMULSEWXXX constructors. However, we do 447// use the SEW from the name to determine the number of Cycles. 448foreach mx = SchedMxList in { 449 defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 8>.c; 450 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 451 let Latency = !add(3, Cycles), ResourceCycles = [Cycles] in { 452 defm "" : LMULWriteResMX<"WriteVLDS8", [SiFive7VL], mx, IsWorstCase>; 453 defm "" : LMULWriteResMX<"WriteVLDUX8", [SiFive7VL], mx, IsWorstCase>; 454 defm "" : LMULWriteResMX<"WriteVLDOX8", [SiFive7VL], mx, IsWorstCase>; 455 } 456 let Latency = 1, ResourceCycles = [Cycles] in { 457 defm "" : LMULWriteResMX<"WriteVSTS8", [SiFive7VS], mx, IsWorstCase>; 458 defm "" : LMULWriteResMX<"WriteVSTUX8", [SiFive7VS], mx, IsWorstCase>; 459 defm "" : LMULWriteResMX<"WriteVSTOX8", [SiFive7VS], mx, IsWorstCase>; 460 } 461} 462foreach mx = SchedMxList in { 463 defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 16>.c; 464 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 465 let Latency = !add(3, Cycles), ResourceCycles = [Cycles] in { 466 defm "" : LMULWriteResMX<"WriteVLDS16", [SiFive7VL], mx, IsWorstCase>; 467 defm "" : LMULWriteResMX<"WriteVLDUX16", [SiFive7VL], mx, IsWorstCase>; 468 defm "" : LMULWriteResMX<"WriteVLDOX16", [SiFive7VL], mx, IsWorstCase>; 469 } 470 let Latency = 1, ResourceCycles = [Cycles] in { 471 defm "" : LMULWriteResMX<"WriteVSTS16", [SiFive7VS], mx, IsWorstCase>; 472 defm "" : LMULWriteResMX<"WriteVSTUX16", [SiFive7VS], mx, IsWorstCase>; 473 defm "" : LMULWriteResMX<"WriteVSTOX16", [SiFive7VS], mx, IsWorstCase>; 474 } 475} 476foreach mx = SchedMxList in { 477 defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 32>.c; 478 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 479 let Latency = !add(3, Cycles), ResourceCycles = [Cycles] in { 480 defm "" : LMULWriteResMX<"WriteVLDS32", [SiFive7VL], mx, IsWorstCase>; 481 defm "" : LMULWriteResMX<"WriteVLDUX32", [SiFive7VL], mx, IsWorstCase>; 482 defm "" : LMULWriteResMX<"WriteVLDOX32", [SiFive7VL], mx, IsWorstCase>; 483 } 484 let Latency = 1, ResourceCycles = [Cycles] in { 485 defm "" : LMULWriteResMX<"WriteVSTS32", [SiFive7VS], mx, IsWorstCase>; 486 defm "" : LMULWriteResMX<"WriteVSTUX32", [SiFive7VS], mx, IsWorstCase>; 487 defm "" : LMULWriteResMX<"WriteVSTOX32", [SiFive7VS], mx, IsWorstCase>; 488 } 489} 490foreach mx = SchedMxList in { 491 defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 64>.c; 492 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 493 let Latency = !add(3, Cycles), ResourceCycles = [Cycles] in { 494 defm "" : LMULWriteResMX<"WriteVLDS64", [SiFive7VL], mx, IsWorstCase>; 495 defm "" : LMULWriteResMX<"WriteVLDUX64", [SiFive7VL], mx, IsWorstCase>; 496 defm "" : LMULWriteResMX<"WriteVLDOX64", [SiFive7VL], mx, IsWorstCase>; 497 } 498 let Latency = 1, ResourceCycles = [Cycles] in { 499 defm "" : LMULWriteResMX<"WriteVSTS64", [SiFive7VS], mx, IsWorstCase>; 500 defm "" : LMULWriteResMX<"WriteVSTUX64", [SiFive7VS], mx, IsWorstCase>; 501 defm "" : LMULWriteResMX<"WriteVSTOX64", [SiFive7VS], mx, IsWorstCase>; 502 } 503} 504 505// VLD*R is LMUL aware 506let Latency = 4, ResourceCycles = [2] in 507 def : WriteRes<WriteVLD1R, [SiFive7VL]>; 508let Latency = 4, ResourceCycles = [4] in 509 def : WriteRes<WriteVLD2R, [SiFive7VL]>; 510let Latency = 4, ResourceCycles = [8] in 511 def : WriteRes<WriteVLD4R, [SiFive7VL]>; 512let Latency = 4, ResourceCycles = [16] in 513 def : WriteRes<WriteVLD8R, [SiFive7VL]>; 514// VST*R is LMUL aware 515let Latency = 1, ResourceCycles = [2] in 516 def : WriteRes<WriteVST1R, [SiFive7VS]>; 517let Latency = 1, ResourceCycles = [4] in 518 def : WriteRes<WriteVST2R, [SiFive7VS]>; 519let Latency = 1, ResourceCycles = [8] in 520 def : WriteRes<WriteVST4R, [SiFive7VS]>; 521let Latency = 1, ResourceCycles = [16] in 522 def : WriteRes<WriteVST8R, [SiFive7VS]>; 523 524// Segmented Loads and Stores 525// Unit-stride segmented loads and stores are effectively converted into strided 526// segment loads and stores. Strided segment loads and stores operate at up to 527// one segment per cycle if the segment fits within one aligned memory beat. 528// Indexed segment loads and stores operate at the same rate as strided ones, 529// but they stall the machine until all addresses have been generated. 530foreach mx = SchedMxList in { 531 foreach eew = [8, 16, 32, 64] in { 532 defvar Cycles = SiFive7GetCyclesSegmentedSeg2<mx>.c; 533 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 534 // Does not chain so set latency high 535 let Latency = !add(3, Cycles), ResourceCycles = [Cycles] in { 536 defm "" : LMULWriteResMX<"WriteVLSEG2e" # eew, [SiFive7VL], mx, IsWorstCase>; 537 defm "" : LMULWriteResMX<"WriteVLSEGFF2e" # eew, [SiFive7VL], mx, IsWorstCase>; 538 } 539 let Latency = 1, ResourceCycles = [Cycles] in 540 defm "" : LMULWriteResMX<"WriteVSSEG2e" # eew, [SiFive7VS], mx, IsWorstCase>; 541 foreach nf=3-8 in { 542 defvar Cycles = SiFive7GetCyclesSegmented<mx, eew, nf>.c; 543 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 544 // Does not chain so set latency high 545 let Latency = !add(3, Cycles), ResourceCycles = [Cycles] in { 546 defm "" : LMULWriteResMX<"WriteVLSEG" # nf # "e" # eew, [SiFive7VL], mx, IsWorstCase>; 547 defm "" : LMULWriteResMX<"WriteVLSEGFF" # nf # "e" # eew, [SiFive7VL], mx, IsWorstCase>; 548 } 549 let Latency = 1, ResourceCycles = [Cycles] in 550 defm "" : LMULWriteResMX<"WriteVSSEG" # nf # "e" # eew, [SiFive7VS], mx, IsWorstCase>; 551 } 552 } 553} 554foreach mx = SchedMxList in { 555 foreach nf=2-8 in { 556 foreach eew = [8, 16, 32, 64] in { 557 defvar Cycles = SiFive7GetCyclesSegmented<mx, eew, nf>.c; 558 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 559 // Does not chain so set latency high 560 let Latency = !add(3, Cycles), ResourceCycles = [Cycles] in { 561 defm "" : LMULWriteResMX<"WriteVLSSEG" # nf # "e" # eew, [SiFive7VL], mx, IsWorstCase>; 562 defm "" : LMULWriteResMX<"WriteVLUXSEG" # nf # "e" # eew, [SiFive7VL], mx, IsWorstCase>; 563 defm "" : LMULWriteResMX<"WriteVLOXSEG" # nf # "e" # eew, [SiFive7VL], mx, IsWorstCase>; 564 } 565 let Latency = 1, ResourceCycles = [Cycles] in { 566 defm "" : LMULWriteResMX<"WriteVSSSEG" # nf # "e" # eew, [SiFive7VS], mx, IsWorstCase>; 567 defm "" : LMULWriteResMX<"WriteVSUXSEG" # nf # "e" # eew, [SiFive7VS], mx, IsWorstCase>; 568 defm "" : LMULWriteResMX<"WriteVSOXSEG" # nf # "e" # eew, [SiFive7VS], mx, IsWorstCase>; 569 } 570 } 571 } 572} 573 574// 11. Vector Integer Arithmetic Instructions 575foreach mx = SchedMxList in { 576 defvar Cycles = SiFive7GetCyclesDefault<mx>.c; 577 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 578 let Latency = 4, ResourceCycles = [Cycles] in { 579 defm "" : LMULWriteResMX<"WriteVIALUV", [SiFive7VA], mx, IsWorstCase>; 580 defm "" : LMULWriteResMX<"WriteVIALUX", [SiFive7VA], mx, IsWorstCase>; 581 defm "" : LMULWriteResMX<"WriteVIALUI", [SiFive7VA], mx, IsWorstCase>; 582 defm "" : LMULWriteResMX<"WriteVICALUV", [SiFive7VA], mx, IsWorstCase>; 583 defm "" : LMULWriteResMX<"WriteVICALUX", [SiFive7VA], mx, IsWorstCase>; 584 defm "" : LMULWriteResMX<"WriteVICALUI", [SiFive7VA], mx, IsWorstCase>; 585 defm "" : LMULWriteResMX<"WriteVShiftV", [SiFive7VA], mx, IsWorstCase>; 586 defm "" : LMULWriteResMX<"WriteVShiftX", [SiFive7VA], mx, IsWorstCase>; 587 defm "" : LMULWriteResMX<"WriteVShiftI", [SiFive7VA], mx, IsWorstCase>; 588 defm "" : LMULWriteResMX<"WriteVIMinMaxV", [SiFive7VA], mx, IsWorstCase>; 589 defm "" : LMULWriteResMX<"WriteVIMinMaxX", [SiFive7VA], mx, IsWorstCase>; 590 defm "" : LMULWriteResMX<"WriteVIMulV", [SiFive7VA], mx, IsWorstCase>; 591 defm "" : LMULWriteResMX<"WriteVIMulX", [SiFive7VA], mx, IsWorstCase>; 592 defm "" : LMULWriteResMX<"WriteVIMulAddV", [SiFive7VA], mx, IsWorstCase>; 593 defm "" : LMULWriteResMX<"WriteVIMulAddX", [SiFive7VA], mx, IsWorstCase>; 594 defm "" : LMULWriteResMX<"WriteVIMergeV", [SiFive7VA], mx, IsWorstCase>; 595 defm "" : LMULWriteResMX<"WriteVIMergeX", [SiFive7VA], mx, IsWorstCase>; 596 defm "" : LMULWriteResMX<"WriteVIMergeI", [SiFive7VA], mx, IsWorstCase>; 597 defm "" : LMULWriteResMX<"WriteVIMovV", [SiFive7VA], mx, IsWorstCase>; 598 defm "" : LMULWriteResMX<"WriteVIMovX", [SiFive7VA], mx, IsWorstCase>; 599 defm "" : LMULWriteResMX<"WriteVIMovI", [SiFive7VA], mx, IsWorstCase>; 600 } 601 // Mask results can't chain. 602 let Latency = !add(Cycles, 3), ResourceCycles = [Cycles] in { 603 defm "" : LMULWriteResMX<"WriteVICmpV", [SiFive7VA], mx, IsWorstCase>; 604 defm "" : LMULWriteResMX<"WriteVICmpX", [SiFive7VA], mx, IsWorstCase>; 605 defm "" : LMULWriteResMX<"WriteVICmpI", [SiFive7VA], mx, IsWorstCase>; 606 } 607} 608foreach mx = SchedMxList in { 609 defvar Cycles = SiFive7GetCyclesDefault<mx>.c; 610 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 611 let Latency = 4, ResourceCycles = [Cycles] in { 612 defm "" : LMULWriteResMX<"WriteVExtV", [SiFive7VA], mx, IsWorstCase>; 613 } 614} 615foreach mx = SchedMxList in { 616 foreach sew = SchedSEWSet<mx>.val in { 617 defvar Cycles = !mul(SiFive7GetDivOrSqrtFactor<sew>.c, 618 !div(SiFive7GetCyclesOnePerElement<mx, sew>.c, 4)); 619 defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxList>.c; 620 let Latency = Cycles, ResourceCycles = [Cycles] in { 621 defm "" : LMULSEWWriteResMXSEW<"WriteVIDivV", [SiFive7VA], mx, sew, IsWorstCase>; 622 defm "" : LMULSEWWriteResMXSEW<"WriteVIDivX", [SiFive7VA], mx, sew, IsWorstCase>; 623 } 624 } 625} 626 627// Widening 628foreach mx = SchedMxListW in { 629 defvar Cycles = SiFive7GetCyclesDefault<mx>.c; 630 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c; 631 let Latency = 8, ResourceCycles = [Cycles] in { 632 defm "" : LMULWriteResMX<"WriteVIWALUV", [SiFive7VA], mx, IsWorstCase>; 633 defm "" : LMULWriteResMX<"WriteVIWALUX", [SiFive7VA], mx, IsWorstCase>; 634 defm "" : LMULWriteResMX<"WriteVIWALUI", [SiFive7VA], mx, IsWorstCase>; 635 defm "" : LMULWriteResMX<"WriteVIWMulV", [SiFive7VA], mx, IsWorstCase>; 636 defm "" : LMULWriteResMX<"WriteVIWMulX", [SiFive7VA], mx, IsWorstCase>; 637 defm "" : LMULWriteResMX<"WriteVIWMulAddV", [SiFive7VA], mx, IsWorstCase>; 638 defm "" : LMULWriteResMX<"WriteVIWMulAddX", [SiFive7VA], mx, IsWorstCase>; 639 } 640} 641// Narrowing 642foreach mx = SchedMxListW in { 643 defvar Cycles = SiFive7GetCyclesNarrowing<mx>.c; 644 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c; 645 let Latency = 8, ResourceCycles = [Cycles] in { 646 defm "" : LMULWriteResMX<"WriteVNShiftV", [SiFive7VA], mx, IsWorstCase>; 647 defm "" : LMULWriteResMX<"WriteVNShiftX", [SiFive7VA], mx, IsWorstCase>; 648 defm "" : LMULWriteResMX<"WriteVNShiftI", [SiFive7VA], mx, IsWorstCase>; 649 } 650} 651 652// 12. Vector Fixed-Point Arithmetic Instructions 653foreach mx = SchedMxList in { 654 defvar Cycles = SiFive7GetCyclesDefault<mx>.c; 655 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 656 let Latency = 8, ResourceCycles = [Cycles] in { 657 defm "" : LMULWriteResMX<"WriteVSALUV", [SiFive7VA], mx, IsWorstCase>; 658 defm "" : LMULWriteResMX<"WriteVSALUX", [SiFive7VA], mx, IsWorstCase>; 659 defm "" : LMULWriteResMX<"WriteVSALUI", [SiFive7VA], mx, IsWorstCase>; 660 defm "" : LMULWriteResMX<"WriteVAALUV", [SiFive7VA], mx, IsWorstCase>; 661 defm "" : LMULWriteResMX<"WriteVAALUX", [SiFive7VA], mx, IsWorstCase>; 662 defm "" : LMULWriteResMX<"WriteVSMulV", [SiFive7VA], mx, IsWorstCase>; 663 defm "" : LMULWriteResMX<"WriteVSMulX", [SiFive7VA], mx, IsWorstCase>; 664 defm "" : LMULWriteResMX<"WriteVSShiftV", [SiFive7VA], mx, IsWorstCase>; 665 defm "" : LMULWriteResMX<"WriteVSShiftX", [SiFive7VA], mx, IsWorstCase>; 666 defm "" : LMULWriteResMX<"WriteVSShiftI", [SiFive7VA], mx, IsWorstCase>; 667 } 668} 669// Narrowing 670foreach mx = SchedMxListW in { 671 defvar Cycles = SiFive7GetCyclesNarrowing<mx>.c; 672 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c; 673 let Latency = 8, ResourceCycles = [Cycles] in { 674 defm "" : LMULWriteResMX<"WriteVNClipV", [SiFive7VA], mx, IsWorstCase>; 675 defm "" : LMULWriteResMX<"WriteVNClipX", [SiFive7VA], mx, IsWorstCase>; 676 defm "" : LMULWriteResMX<"WriteVNClipI", [SiFive7VA], mx, IsWorstCase>; 677 } 678} 679 680// 13. Vector Floating-Point Instructions 681foreach mx = SchedMxList in { 682 defvar Cycles = SiFive7GetCyclesDefault<mx>.c; 683 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 684 let Latency = 8, ResourceCycles = [Cycles] in { 685 defm "" : LMULWriteResMX<"WriteVFALUV", [SiFive7VA], mx, IsWorstCase>; 686 defm "" : LMULWriteResMX<"WriteVFALUF", [SiFive7VA], mx, IsWorstCase>; 687 defm "" : LMULWriteResMX<"WriteVFMulV", [SiFive7VA], mx, IsWorstCase>; 688 defm "" : LMULWriteResMX<"WriteVFMulF", [SiFive7VA], mx, IsWorstCase>; 689 defm "" : LMULWriteResMX<"WriteVFMulAddV", [SiFive7VA], mx, IsWorstCase>; 690 defm "" : LMULWriteResMX<"WriteVFMulAddF", [SiFive7VA], mx, IsWorstCase>; 691 defm "" : LMULWriteResMX<"WriteVFRecpV", [SiFive7VA], mx, IsWorstCase>; 692 defm "" : LMULWriteResMX<"WriteVFCvtIToFV", [SiFive7VA], mx, IsWorstCase>; 693 defm "" : LMULWriteResMX<"WriteVFCvtFToIV", [SiFive7VA], mx, IsWorstCase>; 694 } 695 let Latency = 4, ResourceCycles = [Cycles] in { 696 defm "" : LMULWriteResMX<"WriteVFSgnjV", [SiFive7VA], mx, IsWorstCase>; 697 defm "" : LMULWriteResMX<"WriteVFSgnjF", [SiFive7VA], mx, IsWorstCase>; 698 defm "" : LMULWriteResMX<"WriteVFMinMaxV", [SiFive7VA], mx, IsWorstCase>; 699 defm "" : LMULWriteResMX<"WriteVFMinMaxF", [SiFive7VA], mx, IsWorstCase>; 700 defm "" : LMULWriteResMX<"WriteVFClassV", [SiFive7VA], mx, IsWorstCase>; 701 defm "" : LMULWriteResMX<"WriteVFMergeV", [SiFive7VA], mx, IsWorstCase>; 702 defm "" : LMULWriteResMX<"WriteVFMovV", [SiFive7VA], mx, IsWorstCase>; 703 } 704 // Mask results can't chain. 705 let Latency = !add(Cycles, 3), ResourceCycles = [Cycles] in { 706 defm "" : LMULWriteResMX<"WriteVFCmpV", [SiFive7VA], mx, IsWorstCase>; 707 defm "" : LMULWriteResMX<"WriteVFCmpF", [SiFive7VA], mx, IsWorstCase>; 708 } 709} 710foreach mx = SchedMxListF in { 711 foreach sew = SchedSEWSet<mx, isF=1>.val in { 712 defvar Cycles = !mul(SiFive7GetDivOrSqrtFactor<sew>.c, 713 !div(SiFive7GetCyclesOnePerElement<mx, sew>.c, 4)); 714 defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListF, 1>.c; 715 let Latency = Cycles, ResourceCycles = [Cycles] in { 716 defm "" : LMULSEWWriteResMXSEW<"WriteVFSqrtV", [SiFive7VA], mx, sew, IsWorstCase>; 717 defm "" : LMULSEWWriteResMXSEW<"WriteVFDivV", [SiFive7VA], mx, sew, IsWorstCase>; 718 defm "" : LMULSEWWriteResMXSEW<"WriteVFDivF", [SiFive7VA], mx, sew, IsWorstCase>; 719 } 720 } 721} 722 723// Widening 724foreach mx = SchedMxListW in { 725 defvar Cycles = SiFive7GetCyclesDefault<mx>.c; 726 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c; 727 let Latency = 8, ResourceCycles = [Cycles] in { 728 defm "" : LMULWriteResMX<"WriteVFWCvtIToFV", [SiFive7VA], mx, IsWorstCase>; 729 } 730} 731foreach mx = SchedMxListFW in { 732 defvar Cycles = SiFive7GetCyclesDefault<mx>.c; 733 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListFW>.c; 734 let Latency = 8, ResourceCycles = [Cycles] in { 735 defm "" : LMULWriteResMX<"WriteVFWALUV", [SiFive7VA], mx, IsWorstCase>; 736 defm "" : LMULWriteResMX<"WriteVFWMulV", [SiFive7VA], mx, IsWorstCase>; 737 defm "" : LMULWriteResMX<"WriteVFWMulAddV", [SiFive7VA], mx, IsWorstCase>; 738 defm "" : LMULWriteResMX<"WriteVFWCvtFToIV", [SiFive7VA], mx, IsWorstCase>; 739 defm "" : LMULWriteResMX<"WriteVFWCvtFToFV", [SiFive7VA], mx, IsWorstCase>; 740 defm "" : LMULWriteResMX<"WriteVFWMulAddF", [SiFive7VA], mx, IsWorstCase>; 741 defm "" : LMULWriteResMX<"WriteVFWMulF", [SiFive7VA], mx, IsWorstCase>; 742 defm "" : LMULWriteResMX<"WriteVFWALUF", [SiFive7VA], mx, IsWorstCase>; 743 } 744} 745// Narrowing 746foreach mx = SchedMxListW in { 747 defvar Cycles = SiFive7GetCyclesNarrowing<mx>.c; 748 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c; 749 let Latency = 8, ResourceCycles = [Cycles] in { 750 defm "" : LMULWriteResMX<"WriteVFNCvtFToIV", [SiFive7VA], mx, IsWorstCase>; 751 } 752} 753foreach mx = SchedMxListFW in { 754 defvar Cycles = SiFive7GetCyclesNarrowing<mx>.c; 755 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListFW>.c; 756 let Latency = 8, ResourceCycles = [Cycles] in { 757 defm "" : LMULWriteResMX<"WriteVFNCvtIToFV", [SiFive7VA], mx, IsWorstCase>; 758 defm "" : LMULWriteResMX<"WriteVFNCvtFToFV", [SiFive7VA], mx, IsWorstCase>; 759 } 760} 761 762// 14. Vector Reduction Operations 763foreach mx = SchedMxList in { 764 foreach sew = SchedSEWSet<mx>.val in { 765 defvar Cycles = SiFive7GetReductionCycles<mx, sew>.c; 766 defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxList>.c; 767 let Latency = Cycles, ResourceCycles = [Cycles] in 768 defm "" : LMULSEWWriteResMXSEW<"WriteVIRedV_From", [SiFive7VA], 769 mx, sew, IsWorstCase>; 770 defm "" : LMULSEWWriteResMXSEW<"WriteVIRedMinMaxV_From", [SiFive7VA], 771 mx, sew, IsWorstCase>; 772 } 773} 774 775foreach mx = SchedMxListWRed in { 776 foreach sew = SchedSEWSet<mx, 0, 1>.val in { 777 defvar Cycles = SiFive7GetReductionCycles<mx, sew>.c; 778 defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListWRed>.c; 779 let Latency = Cycles, ResourceCycles = [Cycles] in 780 defm "" : LMULSEWWriteResMXSEW<"WriteVIWRedV_From", [SiFive7VA], 781 mx, sew, IsWorstCase>; 782 } 783} 784 785foreach mx = SchedMxListF in { 786 foreach sew = SchedSEWSet<mx, 1>.val in { 787 defvar RedCycles = SiFive7GetReductionCycles<mx, sew>.c; 788 defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListF, 1>.c; 789 let Latency = RedCycles, ResourceCycles = [RedCycles] in { 790 defm "" : LMULSEWWriteResMXSEW<"WriteVFRedV_From", [SiFive7VA], 791 mx, sew, IsWorstCase>; 792 defm "" : LMULSEWWriteResMXSEW<"WriteVFRedMinMaxV_From", [SiFive7VA], 793 mx, sew, IsWorstCase>; 794 } 795 defvar OrdRedCycles = SiFive7GetOrderedReductionCycles<mx, sew>.c; 796 let Latency = OrdRedCycles, ResourceCycles = [OrdRedCycles] in 797 defm "" : LMULSEWWriteResMXSEW<"WriteVFRedOV_From", [SiFive7VA], 798 mx, sew, IsWorstCase>; 799 } 800} 801 802foreach mx = SchedMxListFWRed in { 803 foreach sew = SchedSEWSet<mx, 1, 1>.val in { 804 defvar RedCycles = SiFive7GetReductionCycles<mx, sew>.c; 805 defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListFWRed, 1>.c; 806 let Latency = RedCycles, ResourceCycles = [RedCycles] in 807 defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedV_From", [SiFive7VA], 808 mx, sew, IsWorstCase>; 809 defvar OrdRedCycles = SiFive7GetOrderedReductionCycles<mx, sew>.c; 810 let Latency = OrdRedCycles, ResourceCycles = [OrdRedCycles] in 811 defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedOV_From", [SiFive7VA], 812 mx, sew, IsWorstCase>; 813 } 814} 815 816// 15. Vector Mask Instructions 817foreach mx = SchedMxList in { 818 defvar Cycles = SiFive7GetCyclesVMask<mx>.c; 819 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 820 let Latency = 4, ResourceCycles = [Cycles] in { 821 defm "" : LMULWriteResMX<"WriteVMALUV", [SiFive7VA], mx, IsWorstCase>; 822 defm "" : LMULWriteResMX<"WriteVMPopV", [SiFive7VA], mx, IsWorstCase>; 823 defm "" : LMULWriteResMX<"WriteVMFFSV", [SiFive7VA], mx, IsWorstCase>; 824 defm "" : LMULWriteResMX<"WriteVMSFSV", [SiFive7VA], mx, IsWorstCase>; 825 } 826} 827foreach mx = SchedMxList in { 828 defvar Cycles = SiFive7GetCyclesDefault<mx>.c; 829 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 830 let Latency = 4, ResourceCycles = [Cycles] in { 831 defm "" : LMULWriteResMX<"WriteVMIotV", [SiFive7VA], mx, IsWorstCase>; 832 defm "" : LMULWriteResMX<"WriteVMIdxV", [SiFive7VA], mx, IsWorstCase>; 833 } 834} 835 836// 16. Vector Permutation Instructions 837let Latency = 4, ResourceCycles = [1] in { 838 def : WriteRes<WriteVIMovVX, [SiFive7VA]>; 839 def : WriteRes<WriteVIMovXV, [SiFive7VA]>; 840 def : WriteRes<WriteVFMovVF, [SiFive7VA]>; 841 def : WriteRes<WriteVFMovFV, [SiFive7VA]>; 842} 843foreach mx = SchedMxList in { 844 defvar Cycles = SiFive7GetCyclesDefault<mx>.c; 845 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 846 let Latency = 8, ResourceCycles = [Cycles] in { 847 defm "" : LMULWriteResMX<"WriteVRGatherVX", [SiFive7VA], mx, IsWorstCase>; 848 defm "" : LMULWriteResMX<"WriteVRGatherVI", [SiFive7VA], mx, IsWorstCase>; 849 } 850} 851 852foreach mx = SchedMxList in { 853 foreach sew = SchedSEWSet<mx>.val in { 854 defvar Cycles = SiFive7GetCyclesOnePerElement<mx, sew>.c; 855 defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxList>.c; 856 let Latency = !add(Cycles, 3), ResourceCycles = [Cycles] in { 857 defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherVV", [SiFive7VA], mx, sew, IsWorstCase>; 858 defm "" : LMULSEWWriteResMXSEW<"WriteVCompressV", [SiFive7VA], mx, sew, IsWorstCase>; 859 } 860 } 861} 862 863foreach mx = SchedMxList in { 864 defvar Cycles = SiFive7GetCyclesDefault<mx>.c; 865 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 866 let Latency = 4, ResourceCycles = [Cycles] in { 867 defm "" : LMULWriteResMX<"WriteVISlideX", [SiFive7VA], mx, IsWorstCase>; 868 defm "" : LMULWriteResMX<"WriteVISlideI", [SiFive7VA], mx, IsWorstCase>; 869 defm "" : LMULWriteResMX<"WriteVISlide1X", [SiFive7VA], mx, IsWorstCase>; 870 defm "" : LMULWriteResMX<"WriteVFSlide1F", [SiFive7VA], mx, IsWorstCase>; 871 } 872} 873 874// VMov*V is LMUL Aware 875let Latency = 4, ResourceCycles = [2] in 876 def : WriteRes<WriteVMov1V, [SiFive7VA]>; 877let Latency = 4, ResourceCycles = [4] in 878 def : WriteRes<WriteVMov2V, [SiFive7VA]>; 879let Latency = 4, ResourceCycles = [8] in 880 def : WriteRes<WriteVMov4V, [SiFive7VA]>; 881let Latency = 4, ResourceCycles = [16] in 882 def : WriteRes<WriteVMov8V, [SiFive7VA]>; 883 884// Others 885def : WriteRes<WriteCSR, [SiFive7PipeB]>; 886def : WriteRes<WriteNop, []>; 887let Latency = 3 in 888 def : WriteRes<WriteRdVLENB, [SiFive7PipeB]>; 889 890def : InstRW<[WriteIALU], (instrs COPY)>; 891 892//===----------------------------------------------------------------------===// 893 894// Bypass and advance 895def : SiFive7AnyToGPRBypass<ReadJmp>; 896def : SiFive7AnyToGPRBypass<ReadJalr>; 897def : ReadAdvance<ReadCSR, 0>; 898def : ReadAdvance<ReadStoreData, 0>; 899def : ReadAdvance<ReadMemBase, 0>; 900def : SiFive7AnyToGPRBypass<ReadIALU>; 901def : SiFive7AnyToGPRBypass<ReadIALU32>; 902def : SiFive7AnyToGPRBypass<ReadShiftImm>; 903def : SiFive7AnyToGPRBypass<ReadShiftImm32>; 904def : SiFive7AnyToGPRBypass<ReadShiftReg>; 905def : SiFive7AnyToGPRBypass<ReadShiftReg32>; 906def : ReadAdvance<ReadIDiv, 0>; 907def : ReadAdvance<ReadIDiv32, 0>; 908def : ReadAdvance<ReadIMul, 0>; 909def : ReadAdvance<ReadIMul32, 0>; 910def : ReadAdvance<ReadAtomicWA, 0>; 911def : ReadAdvance<ReadAtomicWD, 0>; 912def : ReadAdvance<ReadAtomicDA, 0>; 913def : ReadAdvance<ReadAtomicDD, 0>; 914def : ReadAdvance<ReadAtomicLDW, 0>; 915def : ReadAdvance<ReadAtomicLDD, 0>; 916def : ReadAdvance<ReadAtomicSTW, 0>; 917def : ReadAdvance<ReadAtomicSTD, 0>; 918def : ReadAdvance<ReadFStoreData, 0>; 919def : ReadAdvance<ReadFMemBase, 0>; 920def : ReadAdvance<ReadFAdd16, 0>; 921def : ReadAdvance<ReadFAdd32, 0>; 922def : ReadAdvance<ReadFAdd64, 0>; 923def : ReadAdvance<ReadFMul16, 0>; 924def : ReadAdvance<ReadFMA16, 0>; 925def : ReadAdvance<ReadFMul32, 0>; 926def : ReadAdvance<ReadFMul64, 0>; 927def : ReadAdvance<ReadFMA32, 0>; 928def : ReadAdvance<ReadFMA64, 0>; 929def : ReadAdvance<ReadFDiv16, 0>; 930def : ReadAdvance<ReadFDiv32, 0>; 931def : ReadAdvance<ReadFDiv64, 0>; 932def : ReadAdvance<ReadFSqrt16, 0>; 933def : ReadAdvance<ReadFSqrt32, 0>; 934def : ReadAdvance<ReadFSqrt64, 0>; 935def : ReadAdvance<ReadFCmp16, 0>; 936def : ReadAdvance<ReadFCmp32, 0>; 937def : ReadAdvance<ReadFCmp64, 0>; 938def : ReadAdvance<ReadFSGNJ16, 0>; 939def : ReadAdvance<ReadFSGNJ32, 0>; 940def : ReadAdvance<ReadFSGNJ64, 0>; 941def : ReadAdvance<ReadFMinMax16, 0>; 942def : ReadAdvance<ReadFMinMax32, 0>; 943def : ReadAdvance<ReadFMinMax64, 0>; 944def : ReadAdvance<ReadFCvtF16ToI32, 0>; 945def : ReadAdvance<ReadFCvtF16ToI64, 0>; 946def : ReadAdvance<ReadFCvtF32ToI32, 0>; 947def : ReadAdvance<ReadFCvtF32ToI64, 0>; 948def : ReadAdvance<ReadFCvtF64ToI32, 0>; 949def : ReadAdvance<ReadFCvtF64ToI64, 0>; 950def : ReadAdvance<ReadFCvtI32ToF16, 0>; 951def : ReadAdvance<ReadFCvtI32ToF32, 0>; 952def : ReadAdvance<ReadFCvtI32ToF64, 0>; 953def : ReadAdvance<ReadFCvtI64ToF16, 0>; 954def : ReadAdvance<ReadFCvtI64ToF32, 0>; 955def : ReadAdvance<ReadFCvtI64ToF64, 0>; 956def : ReadAdvance<ReadFCvtF32ToF64, 0>; 957def : ReadAdvance<ReadFCvtF64ToF32, 0>; 958def : ReadAdvance<ReadFCvtF16ToF32, 0>; 959def : ReadAdvance<ReadFCvtF32ToF16, 0>; 960def : ReadAdvance<ReadFCvtF16ToF64, 0>; 961def : ReadAdvance<ReadFCvtF64ToF16, 0>; 962def : ReadAdvance<ReadFMovF16ToI16, 0>; 963def : ReadAdvance<ReadFMovI16ToF16, 0>; 964def : ReadAdvance<ReadFMovF32ToI32, 0>; 965def : ReadAdvance<ReadFMovI32ToF32, 0>; 966def : ReadAdvance<ReadFMovF64ToI64, 0>; 967def : ReadAdvance<ReadFMovI64ToF64, 0>; 968def : ReadAdvance<ReadFClass16, 0>; 969def : ReadAdvance<ReadFClass32, 0>; 970def : ReadAdvance<ReadFClass64, 0>; 971 972def : SiFive7AnyToGPRBypass<ReadSFBJmp, 0>; 973def : SiFive7AnyToGPRBypass<ReadSFBALU, 0>; 974 975// Bitmanip 976def : SiFive7AnyToGPRBypass<ReadRotateImm>; 977def : SiFive7AnyToGPRBypass<ReadRotateImm32>; 978def : SiFive7AnyToGPRBypass<ReadRotateReg>; 979def : SiFive7AnyToGPRBypass<ReadRotateReg32>; 980def : SiFive7AnyToGPRBypass<ReadCLZ>; 981def : SiFive7AnyToGPRBypass<ReadCLZ32>; 982def : SiFive7AnyToGPRBypass<ReadCTZ>; 983def : SiFive7AnyToGPRBypass<ReadCTZ32>; 984def : ReadAdvance<ReadCPOP, 0>; 985def : ReadAdvance<ReadCPOP32, 0>; 986def : SiFive7AnyToGPRBypass<ReadORCB>; 987def : SiFive7AnyToGPRBypass<ReadREV8>; 988def : SiFive7AnyToGPRBypass<ReadSHXADD>; 989def : SiFive7AnyToGPRBypass<ReadSHXADD32>; 990 991// 6. Configuration-Setting Instructions 992def : ReadAdvance<ReadVSETVLI, 2>; 993def : ReadAdvance<ReadVSETVL, 2>; 994 995// 7. Vector Loads and Stores 996def : ReadAdvance<ReadVLDX, 0>; 997def : ReadAdvance<ReadVSTX, 0>; 998defm "" : LMULReadAdvance<"ReadVSTEV", 0>; 999defm "" : LMULReadAdvance<"ReadVSTM", 0>; 1000def : ReadAdvance<ReadVLDSX, 0>; 1001def : ReadAdvance<ReadVSTSX, 0>; 1002defm "" : LMULReadAdvance<"ReadVSTS8V", 0>; 1003defm "" : LMULReadAdvance<"ReadVSTS16V", 0>; 1004defm "" : LMULReadAdvance<"ReadVSTS32V", 0>; 1005defm "" : LMULReadAdvance<"ReadVSTS64V", 0>; 1006defm "" : LMULReadAdvance<"ReadVLDUXV", 0>; 1007defm "" : LMULReadAdvance<"ReadVLDOXV", 0>; 1008defm "" : LMULReadAdvance<"ReadVSTUX8", 0>; 1009defm "" : LMULReadAdvance<"ReadVSTUX16", 0>; 1010defm "" : LMULReadAdvance<"ReadVSTUX32", 0>; 1011defm "" : LMULReadAdvance<"ReadVSTUX64", 0>; 1012defm "" : LMULReadAdvance<"ReadVSTUXV", 0>; 1013defm "" : LMULReadAdvance<"ReadVSTUX8V", 0>; 1014defm "" : LMULReadAdvance<"ReadVSTUX16V", 0>; 1015defm "" : LMULReadAdvance<"ReadVSTUX32V", 0>; 1016defm "" : LMULReadAdvance<"ReadVSTUX64V", 0>; 1017defm "" : LMULReadAdvance<"ReadVSTOX8", 0>; 1018defm "" : LMULReadAdvance<"ReadVSTOX16", 0>; 1019defm "" : LMULReadAdvance<"ReadVSTOX32", 0>; 1020defm "" : LMULReadAdvance<"ReadVSTOX64", 0>; 1021defm "" : LMULReadAdvance<"ReadVSTOXV", 0>; 1022defm "" : LMULReadAdvance<"ReadVSTOX8V", 0>; 1023defm "" : LMULReadAdvance<"ReadVSTOX16V", 0>; 1024defm "" : LMULReadAdvance<"ReadVSTOX32V", 0>; 1025defm "" : LMULReadAdvance<"ReadVSTOX64V", 0>; 1026// LMUL Aware 1027def : ReadAdvance<ReadVST1R, 0>; 1028def : ReadAdvance<ReadVST2R, 0>; 1029def : ReadAdvance<ReadVST4R, 0>; 1030def : ReadAdvance<ReadVST8R, 0>; 1031 1032// 12. Vector Integer Arithmetic Instructions 1033defm : LMULReadAdvance<"ReadVIALUV", 0>; 1034defm : LMULReadAdvance<"ReadVIALUX", 0>; 1035defm : LMULReadAdvanceW<"ReadVIWALUV", 0>; 1036defm : LMULReadAdvanceW<"ReadVIWALUX", 0>; 1037defm : LMULReadAdvance<"ReadVExtV", 0>; 1038defm : LMULReadAdvance<"ReadVICALUV", 0>; 1039defm : LMULReadAdvance<"ReadVICALUX", 0>; 1040defm : LMULReadAdvance<"ReadVShiftV", 0>; 1041defm : LMULReadAdvance<"ReadVShiftX", 0>; 1042defm : LMULReadAdvanceW<"ReadVNShiftV", 0>; 1043defm : LMULReadAdvanceW<"ReadVNShiftX", 0>; 1044defm : LMULReadAdvance<"ReadVICmpV", 0>; 1045defm : LMULReadAdvance<"ReadVICmpX", 0>; 1046defm : LMULReadAdvance<"ReadVIMinMaxV", 0>; 1047defm : LMULReadAdvance<"ReadVIMinMaxX", 0>; 1048defm : LMULReadAdvance<"ReadVIMulV", 0>; 1049defm : LMULReadAdvance<"ReadVIMulX", 0>; 1050defm : LMULSEWReadAdvance<"ReadVIDivV", 0>; 1051defm : LMULSEWReadAdvance<"ReadVIDivX", 0>; 1052defm : LMULReadAdvanceW<"ReadVIWMulV", 0>; 1053defm : LMULReadAdvanceW<"ReadVIWMulX", 0>; 1054defm : LMULReadAdvance<"ReadVIMulAddV", 0>; 1055defm : LMULReadAdvance<"ReadVIMulAddX", 0>; 1056defm : LMULReadAdvanceW<"ReadVIWMulAddV", 0>; 1057defm : LMULReadAdvanceW<"ReadVIWMulAddX", 0>; 1058defm : LMULReadAdvance<"ReadVIMergeV", 0>; 1059defm : LMULReadAdvance<"ReadVIMergeX", 0>; 1060defm : LMULReadAdvance<"ReadVIMovV", 0>; 1061defm : LMULReadAdvance<"ReadVIMovX", 0>; 1062 1063// 13. Vector Fixed-Point Arithmetic Instructions 1064defm "" : LMULReadAdvance<"ReadVSALUV", 0>; 1065defm "" : LMULReadAdvance<"ReadVSALUX", 0>; 1066defm "" : LMULReadAdvance<"ReadVAALUV", 0>; 1067defm "" : LMULReadAdvance<"ReadVAALUX", 0>; 1068defm "" : LMULReadAdvance<"ReadVSMulV", 0>; 1069defm "" : LMULReadAdvance<"ReadVSMulX", 0>; 1070defm "" : LMULReadAdvance<"ReadVSShiftV", 0>; 1071defm "" : LMULReadAdvance<"ReadVSShiftX", 0>; 1072defm "" : LMULReadAdvanceW<"ReadVNClipV", 0>; 1073defm "" : LMULReadAdvanceW<"ReadVNClipX", 0>; 1074 1075// 14. Vector Floating-Point Instructions 1076defm "" : LMULReadAdvance<"ReadVFALUV", 0>; 1077defm "" : LMULReadAdvance<"ReadVFALUF", 0>; 1078defm "" : LMULReadAdvanceFW<"ReadVFWALUV", 0>; 1079defm "" : LMULReadAdvanceFW<"ReadVFWALUF", 0>; 1080defm "" : LMULReadAdvance<"ReadVFMulV", 0>; 1081defm "" : LMULReadAdvance<"ReadVFMulF", 0>; 1082defm "" : LMULSEWReadAdvanceF<"ReadVFDivV", 0>; 1083defm "" : LMULSEWReadAdvanceF<"ReadVFDivF", 0>; 1084defm "" : LMULReadAdvanceFW<"ReadVFWMulV", 0>; 1085defm "" : LMULReadAdvanceFW<"ReadVFWMulF", 0>; 1086defm "" : LMULReadAdvance<"ReadVFMulAddV", 0>; 1087defm "" : LMULReadAdvance<"ReadVFMulAddF", 0>; 1088defm "" : LMULReadAdvanceFW<"ReadVFWMulAddV", 0>; 1089defm "" : LMULReadAdvanceFW<"ReadVFWMulAddF", 0>; 1090defm "" : LMULSEWReadAdvanceF<"ReadVFSqrtV", 0>; 1091defm "" : LMULReadAdvance<"ReadVFRecpV", 0>; 1092defm "" : LMULReadAdvance<"ReadVFMinMaxV", 0>; 1093defm "" : LMULReadAdvance<"ReadVFMinMaxF", 0>; 1094defm "" : LMULReadAdvance<"ReadVFSgnjV", 0>; 1095defm "" : LMULReadAdvance<"ReadVFSgnjF", 0>; 1096defm "" : LMULReadAdvance<"ReadVFCmpV", 0>; 1097defm "" : LMULReadAdvance<"ReadVFCmpF", 0>; 1098defm "" : LMULReadAdvance<"ReadVFClassV", 0>; 1099defm "" : LMULReadAdvance<"ReadVFMergeV", 0>; 1100defm "" : LMULReadAdvance<"ReadVFMergeF", 0>; 1101defm "" : LMULReadAdvance<"ReadVFMovF", 0>; 1102defm "" : LMULReadAdvance<"ReadVFCvtIToFV", 0>; 1103defm "" : LMULReadAdvance<"ReadVFCvtFToIV", 0>; 1104defm "" : LMULReadAdvanceW<"ReadVFWCvtIToFV", 0>; 1105defm "" : LMULReadAdvanceFW<"ReadVFWCvtFToIV", 0>; 1106defm "" : LMULReadAdvanceFW<"ReadVFWCvtFToFV", 0>; 1107defm "" : LMULReadAdvanceFW<"ReadVFNCvtIToFV", 0>; 1108defm "" : LMULReadAdvanceW<"ReadVFNCvtFToIV", 0>; 1109defm "" : LMULReadAdvanceFW<"ReadVFNCvtFToFV", 0>; 1110 1111// 15. Vector Reduction Operations 1112def : ReadAdvance<ReadVIRedV, 0>; 1113def : ReadAdvance<ReadVIRedV0, 0>; 1114def : ReadAdvance<ReadVIWRedV, 0>; 1115def : ReadAdvance<ReadVIWRedV0, 0>; 1116def : ReadAdvance<ReadVFRedV, 0>; 1117def : ReadAdvance<ReadVFRedV0, 0>; 1118def : ReadAdvance<ReadVFRedOV, 0>; 1119def : ReadAdvance<ReadVFRedOV0, 0>; 1120def : ReadAdvance<ReadVFWRedV, 0>; 1121def : ReadAdvance<ReadVFWRedV0, 0>; 1122def : ReadAdvance<ReadVFWRedOV, 0>; 1123def : ReadAdvance<ReadVFWRedOV0, 0>; 1124 1125// 16. Vector Mask Instructions 1126defm "" : LMULReadAdvance<"ReadVMALUV", 0>; 1127defm "" : LMULReadAdvance<"ReadVMPopV", 0>; 1128defm "" : LMULReadAdvance<"ReadVMFFSV", 0>; 1129defm "" : LMULReadAdvance<"ReadVMSFSV", 0>; 1130defm "" : LMULReadAdvance<"ReadVMIotV", 0>; 1131 1132// 17. Vector Permutation Instructions 1133def : ReadAdvance<ReadVIMovVX, 0>; 1134def : ReadAdvance<ReadVIMovXV, 0>; 1135def : ReadAdvance<ReadVIMovXX, 0>; 1136def : ReadAdvance<ReadVFMovVF, 0>; 1137def : ReadAdvance<ReadVFMovFV, 0>; 1138def : ReadAdvance<ReadVFMovFX, 0>; 1139defm "" : LMULReadAdvance<"ReadVISlideV", 0>; 1140defm "" : LMULReadAdvance<"ReadVISlideX", 0>; 1141defm "" : LMULReadAdvance<"ReadVFSlideV", 0>; 1142defm "" : LMULReadAdvance<"ReadVFSlideF", 0>; 1143defm "" : LMULSEWReadAdvance<"ReadVRGatherVV_data", 0>; 1144defm "" : LMULSEWReadAdvance<"ReadVRGatherVV_index", 0>; 1145defm "" : LMULReadAdvance<"ReadVRGatherVX_data", 0>; 1146defm "" : LMULReadAdvance<"ReadVRGatherVX_index", 0>; 1147defm "" : LMULReadAdvance<"ReadVRGatherVI_data", 0>; 1148defm "" : LMULSEWReadAdvance<"ReadVCompressV", 0>; 1149// LMUL Aware 1150def : ReadAdvance<ReadVMov1V, 0>; 1151def : ReadAdvance<ReadVMov2V, 0>; 1152def : ReadAdvance<ReadVMov4V, 0>; 1153def : ReadAdvance<ReadVMov8V, 0>; 1154 1155// Others 1156def : ReadAdvance<ReadVMask, 0>; 1157 1158//===----------------------------------------------------------------------===// 1159// Unsupported extensions 1160defm : UnsupportedSchedZbc; 1161defm : UnsupportedSchedZbs; 1162defm : UnsupportedSchedZbkb; 1163defm : UnsupportedSchedZbkx; 1164defm : UnsupportedSchedZfa; 1165} 1166