1*0b57cec5SDimitry Andric//===-- SISchedule.td - SI Scheduling definitons -------------------------===// 2*0b57cec5SDimitry Andric// 3*0b57cec5SDimitry Andric// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*0b57cec5SDimitry Andric// See https://llvm.org/LICENSE.txt for license information. 5*0b57cec5SDimitry Andric// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*0b57cec5SDimitry Andric// 7*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 8*0b57cec5SDimitry Andric// 9*0b57cec5SDimitry Andric// MachineModel definitions for Southern Islands (SI) 10*0b57cec5SDimitry Andric// 11*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 12*0b57cec5SDimitry Andric 13*0b57cec5SDimitry Andricdef : PredicateProlog<[{ 14*0b57cec5SDimitry Andric const SIInstrInfo *TII = 15*0b57cec5SDimitry Andric static_cast<const SIInstrInfo*>(SchedModel->getInstrInfo()); 16*0b57cec5SDimitry Andric (void)TII; 17*0b57cec5SDimitry Andric}]>; 18*0b57cec5SDimitry Andric 19*0b57cec5SDimitry Andricdef WriteBranch : SchedWrite; 20*0b57cec5SDimitry Andricdef WriteExport : SchedWrite; 21*0b57cec5SDimitry Andricdef WriteLDS : SchedWrite; 22*0b57cec5SDimitry Andricdef WriteSALU : SchedWrite; 23*0b57cec5SDimitry Andricdef WriteSMEM : SchedWrite; 24*0b57cec5SDimitry Andricdef WriteVMEM : SchedWrite; 25*0b57cec5SDimitry Andricdef WriteBarrier : SchedWrite; 26*0b57cec5SDimitry Andric 27*0b57cec5SDimitry Andricdef MIVGPRRead : SchedRead; 28*0b57cec5SDimitry Andricdef MIMFMARead : SchedRead; 29*0b57cec5SDimitry Andric 30*0b57cec5SDimitry Andric// Vector ALU instructions 31*0b57cec5SDimitry Andricdef Write32Bit : SchedWrite; 32*0b57cec5SDimitry Andricdef WriteQuarterRate32 : SchedWrite; 33*0b57cec5SDimitry Andricdef WriteFullOrQuarterRate32 : SchedWrite; 34*0b57cec5SDimitry Andric 35*0b57cec5SDimitry Andricdef WriteFloatFMA : SchedWrite; 36*0b57cec5SDimitry Andric 37*0b57cec5SDimitry Andric// Slow quarter rate f64 instruction. 38*0b57cec5SDimitry Andricdef WriteDouble : SchedWrite; 39*0b57cec5SDimitry Andric 40*0b57cec5SDimitry Andric// half rate f64 instruction (same as v_add_f64) 41*0b57cec5SDimitry Andricdef WriteDoubleAdd : SchedWrite; 42*0b57cec5SDimitry Andric 43*0b57cec5SDimitry Andric// Conversion to or from f64 instruction 44*0b57cec5SDimitry Andricdef WriteDoubleCvt : SchedWrite; 45*0b57cec5SDimitry Andric 46*0b57cec5SDimitry Andric// Half rate 64-bit instructions. 47*0b57cec5SDimitry Andricdef Write64Bit : SchedWrite; 48*0b57cec5SDimitry Andric 49*0b57cec5SDimitry Andric// mAI multipass instructions. 50*0b57cec5SDimitry Andricdef Write2PassMAI : SchedWrite; 51*0b57cec5SDimitry Andricdef Write8PassMAI : SchedWrite; 52*0b57cec5SDimitry Andricdef Write16PassMAI : SchedWrite; 53*0b57cec5SDimitry Andric 54*0b57cec5SDimitry Andric// FIXME: Should there be a class for instructions which are VALU 55*0b57cec5SDimitry Andric// instructions and have VALU rates, but write to the SALU (i.e. VOPC 56*0b57cec5SDimitry Andric// instructions) 57*0b57cec5SDimitry Andric 58*0b57cec5SDimitry Andricclass SISchedMachineModel : SchedMachineModel { 59*0b57cec5SDimitry Andric let CompleteModel = 0; 60*0b57cec5SDimitry Andric // MicroOpBufferSize = 1 means that instructions will always be added 61*0b57cec5SDimitry Andric // the ready queue when they become available. This exposes them 62*0b57cec5SDimitry Andric // to the register pressure analysis. 63*0b57cec5SDimitry Andric let MicroOpBufferSize = 1; 64*0b57cec5SDimitry Andric let IssueWidth = 1; 65*0b57cec5SDimitry Andric let PostRAScheduler = 1; 66*0b57cec5SDimitry Andric 67*0b57cec5SDimitry Andric // FIXME:Approximate 2 * branch cost. Try to hack around bad 68*0b57cec5SDimitry Andric // early-ifcvt heuristics. These need improvement to avoid the OOE 69*0b57cec5SDimitry Andric // heuristics. 70*0b57cec5SDimitry Andric int MispredictPenalty = 20; 71*0b57cec5SDimitry Andric} 72*0b57cec5SDimitry Andric 73*0b57cec5SDimitry Andricdef SIFullSpeedModel : SISchedMachineModel; 74*0b57cec5SDimitry Andricdef SIQuarterSpeedModel : SISchedMachineModel; 75*0b57cec5SDimitry Andricdef GFX10SpeedModel : SISchedMachineModel; 76*0b57cec5SDimitry Andric 77*0b57cec5SDimitry Andric// XXX: Are the resource counts correct? 78*0b57cec5SDimitry Andricdef HWBranch : ProcResource<1> { 79*0b57cec5SDimitry Andric let BufferSize = 1; 80*0b57cec5SDimitry Andric} 81*0b57cec5SDimitry Andricdef HWExport : ProcResource<1> { 82*0b57cec5SDimitry Andric let BufferSize = 7; // Taken from S_WAITCNT 83*0b57cec5SDimitry Andric} 84*0b57cec5SDimitry Andricdef HWLGKM : ProcResource<1> { 85*0b57cec5SDimitry Andric let BufferSize = 31; // Taken from S_WAITCNT 86*0b57cec5SDimitry Andric} 87*0b57cec5SDimitry Andricdef HWSALU : ProcResource<1> { 88*0b57cec5SDimitry Andric let BufferSize = 1; 89*0b57cec5SDimitry Andric} 90*0b57cec5SDimitry Andricdef HWVMEM : ProcResource<1> { 91*0b57cec5SDimitry Andric let BufferSize = 15; // Taken from S_WAITCNT 92*0b57cec5SDimitry Andric} 93*0b57cec5SDimitry Andricdef HWVALU : ProcResource<1> { 94*0b57cec5SDimitry Andric let BufferSize = 1; 95*0b57cec5SDimitry Andric} 96*0b57cec5SDimitry Andricdef HWRC : ProcResource<1> { // Register destination cache 97*0b57cec5SDimitry Andric let BufferSize = 1; 98*0b57cec5SDimitry Andric} 99*0b57cec5SDimitry Andric 100*0b57cec5SDimitry Andricclass HWWriteRes<SchedWrite write, list<ProcResourceKind> resources, 101*0b57cec5SDimitry Andric int latency> : WriteRes<write, resources> { 102*0b57cec5SDimitry Andric let Latency = latency; 103*0b57cec5SDimitry Andric} 104*0b57cec5SDimitry Andric 105*0b57cec5SDimitry Andricclass HWVALUWriteRes<SchedWrite write, int latency> : 106*0b57cec5SDimitry Andric HWWriteRes<write, [HWVALU], latency>; 107*0b57cec5SDimitry Andric 108*0b57cec5SDimitry Andricdef PredMIReadVGPR : SchedPredicate<[{TII->hasVGPRUses(*MI)}]>; 109*0b57cec5SDimitry Andric 110*0b57cec5SDimitry Andricdef MIReadVGPR : SchedReadVariant<[ 111*0b57cec5SDimitry Andric SchedVar<PredMIReadVGPR, [MIVGPRRead]>, 112*0b57cec5SDimitry Andric SchedVar<NoSchedPred, [ReadDefault]>]>; 113*0b57cec5SDimitry Andric 114*0b57cec5SDimitry Andric// The latency numbers are taken from AMD Accelerated Parallel Processing 115*0b57cec5SDimitry Andric// guide. They may not be accurate. 116*0b57cec5SDimitry Andric 117*0b57cec5SDimitry Andric// The latency values are 1 / (operations / cycle) / 4. 118*0b57cec5SDimitry Andricmulticlass SICommonWriteRes { 119*0b57cec5SDimitry Andric 120*0b57cec5SDimitry Andric def : HWWriteRes<WriteBranch, [HWBranch], 8>; 121*0b57cec5SDimitry Andric def : HWWriteRes<WriteExport, [HWExport], 4>; 122*0b57cec5SDimitry Andric def : HWWriteRes<WriteLDS, [HWLGKM], 5>; // Can be between 2 and 64 123*0b57cec5SDimitry Andric def : HWWriteRes<WriteSALU, [HWSALU], 1>; 124*0b57cec5SDimitry Andric def : HWWriteRes<WriteSMEM, [HWLGKM], 5>; 125*0b57cec5SDimitry Andric def : HWWriteRes<WriteVMEM, [HWVMEM], 80>; 126*0b57cec5SDimitry Andric def : HWWriteRes<WriteBarrier, [HWBranch], 500>; // XXX: Guessed ??? 127*0b57cec5SDimitry Andric 128*0b57cec5SDimitry Andric def : HWVALUWriteRes<Write32Bit, 1>; 129*0b57cec5SDimitry Andric def : HWVALUWriteRes<Write64Bit, 2>; 130*0b57cec5SDimitry Andric def : HWVALUWriteRes<WriteQuarterRate32, 4>; 131*0b57cec5SDimitry Andric def : HWVALUWriteRes<Write2PassMAI, 2>; 132*0b57cec5SDimitry Andric def : HWVALUWriteRes<Write8PassMAI, 8>; 133*0b57cec5SDimitry Andric def : HWVALUWriteRes<Write16PassMAI, 16>; 134*0b57cec5SDimitry Andric 135*0b57cec5SDimitry Andric def : ReadAdvance<MIVGPRRead, -2>; 136*0b57cec5SDimitry Andric def : InstRW<[Write64Bit, MIReadVGPR], (instregex "^V_ACCVGPR_WRITE_B32$")>; 137*0b57cec5SDimitry Andric 138*0b57cec5SDimitry Andric // Technicaly mfma reads can be from 0 to 4 cycles but that does not make 139*0b57cec5SDimitry Andric // sense to model because its register setup is huge. In particular if we 140*0b57cec5SDimitry Andric // properly model read advanice as -2 for a vgpr read it will result in a 141*0b57cec5SDimitry Andric // bad scheduling of acc writes before that mfma. To avoid it we would 142*0b57cec5SDimitry Andric // need to consume 2 or 4 more vgprs to be initialized before the acc 143*0b57cec5SDimitry Andric // write sequence. Just assume worst case here. 144*0b57cec5SDimitry Andric def : ReadAdvance<MIMFMARead, -4>; 145*0b57cec5SDimitry Andric 146*0b57cec5SDimitry Andric def : InstRW<[Write2PassMAI, MIMFMARead], (instregex "^V_MFMA_..._4X4X")>; 147*0b57cec5SDimitry Andric def : InstRW<[Write8PassMAI, MIMFMARead], (instregex "^V_MFMA_..._16X16X")>; 148*0b57cec5SDimitry Andric def : InstRW<[Write16PassMAI, MIMFMARead], (instregex "^V_MFMA_..._32X32X")>; 149*0b57cec5SDimitry Andric} 150*0b57cec5SDimitry Andric 151*0b57cec5SDimitry Andricdef PredIsVGPR32Copy : SchedPredicate<[{TII->isVGPRCopy(*MI) && TII->getOpSize(*MI, 0) <= 32}]>; 152*0b57cec5SDimitry Andricdef PredIsVGPR64Copy : SchedPredicate<[{TII->isVGPRCopy(*MI) && TII->getOpSize(*MI, 0) > 32}]>; 153*0b57cec5SDimitry Andricdef WriteCopy : SchedWriteVariant<[ 154*0b57cec5SDimitry Andric SchedVar<PredIsVGPR32Copy, [Write32Bit]>, 155*0b57cec5SDimitry Andric SchedVar<PredIsVGPR64Copy, [Write64Bit]>, 156*0b57cec5SDimitry Andric SchedVar<NoSchedPred, [WriteSALU]>]>; 157*0b57cec5SDimitry Andric 158*0b57cec5SDimitry Andriclet SchedModel = SIFullSpeedModel in { 159*0b57cec5SDimitry Andric 160*0b57cec5SDimitry Andricdefm : SICommonWriteRes; 161*0b57cec5SDimitry Andric 162*0b57cec5SDimitry Andricdef : HWVALUWriteRes<WriteFloatFMA, 1>; 163*0b57cec5SDimitry Andricdef : HWVALUWriteRes<WriteDouble, 4>; 164*0b57cec5SDimitry Andricdef : HWVALUWriteRes<WriteDoubleAdd, 2>; 165*0b57cec5SDimitry Andricdef : HWVALUWriteRes<WriteDoubleCvt, 4>; 166*0b57cec5SDimitry Andric 167*0b57cec5SDimitry Andricdef : InstRW<[WriteCopy], (instrs COPY)>; 168*0b57cec5SDimitry Andric 169*0b57cec5SDimitry Andric} // End SchedModel = SIFullSpeedModel 170*0b57cec5SDimitry Andric 171*0b57cec5SDimitry Andriclet SchedModel = SIQuarterSpeedModel in { 172*0b57cec5SDimitry Andric 173*0b57cec5SDimitry Andricdefm : SICommonWriteRes; 174*0b57cec5SDimitry Andric 175*0b57cec5SDimitry Andricdef : HWVALUWriteRes<WriteFloatFMA, 16>; 176*0b57cec5SDimitry Andricdef : HWVALUWriteRes<WriteDouble, 16>; 177*0b57cec5SDimitry Andricdef : HWVALUWriteRes<WriteDoubleAdd, 8>; 178*0b57cec5SDimitry Andricdef : HWVALUWriteRes<WriteDoubleCvt, 4>; 179*0b57cec5SDimitry Andric 180*0b57cec5SDimitry Andricdef : InstRW<[WriteCopy], (instrs COPY)>; 181*0b57cec5SDimitry Andric 182*0b57cec5SDimitry Andric} // End SchedModel = SIQuarterSpeedModel 183*0b57cec5SDimitry Andric 184*0b57cec5SDimitry Andriclet SchedModel = GFX10SpeedModel in { 185*0b57cec5SDimitry Andric 186*0b57cec5SDimitry Andric// The latency values are 1 / (operations / cycle). 187*0b57cec5SDimitry Andric// Add 1 stall cycle for VGPR read. 188*0b57cec5SDimitry Andricdef : HWWriteRes<Write32Bit, [HWVALU, HWRC], 5>; 189*0b57cec5SDimitry Andricdef : HWWriteRes<Write64Bit, [HWVALU, HWRC], 9>; 190*0b57cec5SDimitry Andricdef : HWWriteRes<WriteQuarterRate32, [HWVALU, HWRC], 17>; 191*0b57cec5SDimitry Andricdef : HWWriteRes<WriteFloatFMA, [HWVALU, HWRC], 5>; 192*0b57cec5SDimitry Andricdef : HWWriteRes<WriteDouble, [HWVALU, HWRC], 17>; 193*0b57cec5SDimitry Andricdef : HWWriteRes<WriteDoubleAdd, [HWVALU, HWRC], 17>; 194*0b57cec5SDimitry Andricdef : HWWriteRes<WriteDoubleCvt, [HWVALU, HWRC], 17>; 195*0b57cec5SDimitry Andric 196*0b57cec5SDimitry Andricdef : HWWriteRes<WriteBranch, [HWBranch], 32>; 197*0b57cec5SDimitry Andricdef : HWWriteRes<WriteExport, [HWExport, HWRC], 16>; 198*0b57cec5SDimitry Andricdef : HWWriteRes<WriteLDS, [HWLGKM, HWRC], 20>; 199*0b57cec5SDimitry Andricdef : HWWriteRes<WriteSALU, [HWSALU, HWRC], 5>; 200*0b57cec5SDimitry Andricdef : HWWriteRes<WriteSMEM, [HWLGKM, HWRC], 20>; 201*0b57cec5SDimitry Andricdef : HWWriteRes<WriteVMEM, [HWVMEM, HWRC], 320>; 202*0b57cec5SDimitry Andricdef : HWWriteRes<WriteBarrier, [HWBranch], 2000>; 203*0b57cec5SDimitry Andric 204*0b57cec5SDimitry Andricdef : InstRW<[WriteCopy], (instrs COPY)>; 205*0b57cec5SDimitry Andric 206*0b57cec5SDimitry Andric} // End SchedModel = GFX10SpeedModel 207