1//===-- SISchedule.td - SI Scheduling definitons -------------------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// MachineModel definitions for Southern Islands (SI) 10// 11//===----------------------------------------------------------------------===// 12 13def : PredicateProlog<[{ 14 const SIInstrInfo *TII = 15 static_cast<const SIInstrInfo*>(SchedModel->getInstrInfo()); 16 (void)TII; 17}]>; 18 19def WriteBranch : SchedWrite; 20def WriteExport : SchedWrite; 21def WriteLDS : SchedWrite; 22def WriteSALU : SchedWrite; 23def WriteSMEM : SchedWrite; 24def WriteVMEM : SchedWrite; 25def WriteBarrier : SchedWrite; 26 27def MIVGPRRead : SchedRead; 28def MIMFMARead : SchedRead; 29 30// Vector ALU instructions 31def Write32Bit : SchedWrite; 32def WriteQuarterRate32 : SchedWrite; 33def WriteFullOrQuarterRate32 : SchedWrite; 34 35def WriteFloatFMA : SchedWrite; 36 37// Slow quarter rate f64 instruction. 38def WriteDouble : SchedWrite; 39 40// half rate f64 instruction (same as v_add_f64) 41def WriteDoubleAdd : SchedWrite; 42 43// Conversion to or from f64 instruction 44def WriteDoubleCvt : SchedWrite; 45 46// Half rate 64-bit instructions. 47def Write64Bit : SchedWrite; 48 49// mAI multipass instructions. 50def Write2PassMAI : SchedWrite; 51def Write8PassMAI : SchedWrite; 52def Write16PassMAI : SchedWrite; 53 54// FIXME: Should there be a class for instructions which are VALU 55// instructions and have VALU rates, but write to the SALU (i.e. VOPC 56// instructions) 57 58class SISchedMachineModel : SchedMachineModel { 59 let CompleteModel = 0; 60 // MicroOpBufferSize = 1 means that instructions will always be added 61 // the ready queue when they become available. This exposes them 62 // to the register pressure analysis. 63 let MicroOpBufferSize = 1; 64 let IssueWidth = 1; 65 let PostRAScheduler = 1; 66 67 // FIXME:Approximate 2 * branch cost. Try to hack around bad 68 // early-ifcvt heuristics. These need improvement to avoid the OOE 69 // heuristics. 70 int MispredictPenalty = 20; 71} 72 73def SIFullSpeedModel : SISchedMachineModel; 74def SIQuarterSpeedModel : SISchedMachineModel; 75def GFX10SpeedModel : SISchedMachineModel; 76 77// XXX: Are the resource counts correct? 78def HWBranch : ProcResource<1> { 79 let BufferSize = 1; 80} 81def HWExport : ProcResource<1> { 82 let BufferSize = 7; // Taken from S_WAITCNT 83} 84def HWLGKM : ProcResource<1> { 85 let BufferSize = 31; // Taken from S_WAITCNT 86} 87def HWSALU : ProcResource<1> { 88 let BufferSize = 1; 89} 90def HWVMEM : ProcResource<1> { 91 let BufferSize = 15; // Taken from S_WAITCNT 92} 93def HWVALU : ProcResource<1> { 94 let BufferSize = 1; 95} 96def HWRC : ProcResource<1> { // Register destination cache 97 let BufferSize = 1; 98} 99 100class HWWriteRes<SchedWrite write, list<ProcResourceKind> resources, 101 int latency> : WriteRes<write, resources> { 102 let Latency = latency; 103} 104 105class HWVALUWriteRes<SchedWrite write, int latency> : 106 HWWriteRes<write, [HWVALU], latency>; 107 108def PredMIReadVGPR : SchedPredicate<[{TII->hasVGPRUses(*MI)}]>; 109 110def MIReadVGPR : SchedReadVariant<[ 111 SchedVar<PredMIReadVGPR, [MIVGPRRead]>, 112 SchedVar<NoSchedPred, [ReadDefault]>]>; 113 114// The latency numbers are taken from AMD Accelerated Parallel Processing 115// guide. They may not be accurate. 116 117// The latency values are 1 / (operations / cycle) / 4. 118multiclass SICommonWriteRes { 119 120 def : HWWriteRes<WriteBranch, [HWBranch], 8>; 121 def : HWWriteRes<WriteExport, [HWExport], 4>; 122 def : HWWriteRes<WriteLDS, [HWLGKM], 5>; // Can be between 2 and 64 123 def : HWWriteRes<WriteSALU, [HWSALU], 1>; 124 def : HWWriteRes<WriteSMEM, [HWLGKM], 5>; 125 def : HWWriteRes<WriteVMEM, [HWVMEM], 80>; 126 def : HWWriteRes<WriteBarrier, [HWBranch], 500>; // XXX: Guessed ??? 127 128 def : HWVALUWriteRes<Write32Bit, 1>; 129 def : HWVALUWriteRes<Write64Bit, 2>; 130 def : HWVALUWriteRes<WriteQuarterRate32, 4>; 131 def : HWVALUWriteRes<Write2PassMAI, 2>; 132 def : HWVALUWriteRes<Write8PassMAI, 8>; 133 def : HWVALUWriteRes<Write16PassMAI, 16>; 134 135 def : ReadAdvance<MIVGPRRead, -2>; 136 def : InstRW<[Write64Bit, MIReadVGPR], (instregex "^V_ACCVGPR_WRITE_B32$")>; 137 138 // Technicaly mfma reads can be from 0 to 4 cycles but that does not make 139 // sense to model because its register setup is huge. In particular if we 140 // properly model read advanice as -2 for a vgpr read it will result in a 141 // bad scheduling of acc writes before that mfma. To avoid it we would 142 // need to consume 2 or 4 more vgprs to be initialized before the acc 143 // write sequence. Just assume worst case here. 144 def : ReadAdvance<MIMFMARead, -4>; 145 146 def : InstRW<[Write2PassMAI, MIMFMARead], (instregex "^V_MFMA_..._4X4X")>; 147 def : InstRW<[Write8PassMAI, MIMFMARead], (instregex "^V_MFMA_..._16X16X")>; 148 def : InstRW<[Write16PassMAI, MIMFMARead], (instregex "^V_MFMA_..._32X32X")>; 149} 150 151def PredIsVGPR32Copy : SchedPredicate<[{TII->isVGPRCopy(*MI) && TII->getOpSize(*MI, 0) <= 32}]>; 152def PredIsVGPR64Copy : SchedPredicate<[{TII->isVGPRCopy(*MI) && TII->getOpSize(*MI, 0) > 32}]>; 153def WriteCopy : SchedWriteVariant<[ 154 SchedVar<PredIsVGPR32Copy, [Write32Bit]>, 155 SchedVar<PredIsVGPR64Copy, [Write64Bit]>, 156 SchedVar<NoSchedPred, [WriteSALU]>]>; 157 158let SchedModel = SIFullSpeedModel in { 159 160defm : SICommonWriteRes; 161 162def : HWVALUWriteRes<WriteFloatFMA, 1>; 163def : HWVALUWriteRes<WriteDouble, 4>; 164def : HWVALUWriteRes<WriteDoubleAdd, 2>; 165def : HWVALUWriteRes<WriteDoubleCvt, 4>; 166 167def : InstRW<[WriteCopy], (instrs COPY)>; 168 169} // End SchedModel = SIFullSpeedModel 170 171let SchedModel = SIQuarterSpeedModel in { 172 173defm : SICommonWriteRes; 174 175def : HWVALUWriteRes<WriteFloatFMA, 16>; 176def : HWVALUWriteRes<WriteDouble, 16>; 177def : HWVALUWriteRes<WriteDoubleAdd, 8>; 178def : HWVALUWriteRes<WriteDoubleCvt, 4>; 179 180def : InstRW<[WriteCopy], (instrs COPY)>; 181 182} // End SchedModel = SIQuarterSpeedModel 183 184let SchedModel = GFX10SpeedModel in { 185 186// The latency values are 1 / (operations / cycle). 187// Add 1 stall cycle for VGPR read. 188def : HWWriteRes<Write32Bit, [HWVALU, HWRC], 5>; 189def : HWWriteRes<Write64Bit, [HWVALU, HWRC], 9>; 190def : HWWriteRes<WriteQuarterRate32, [HWVALU, HWRC], 17>; 191def : HWWriteRes<WriteFloatFMA, [HWVALU, HWRC], 5>; 192def : HWWriteRes<WriteDouble, [HWVALU, HWRC], 17>; 193def : HWWriteRes<WriteDoubleAdd, [HWVALU, HWRC], 17>; 194def : HWWriteRes<WriteDoubleCvt, [HWVALU, HWRC], 17>; 195 196def : HWWriteRes<WriteBranch, [HWBranch], 32>; 197def : HWWriteRes<WriteExport, [HWExport, HWRC], 16>; 198def : HWWriteRes<WriteLDS, [HWLGKM, HWRC], 20>; 199def : HWWriteRes<WriteSALU, [HWSALU, HWRC], 5>; 200def : HWWriteRes<WriteSMEM, [HWLGKM, HWRC], 20>; 201def : HWWriteRes<WriteVMEM, [HWVMEM, HWRC], 320>; 202def : HWWriteRes<WriteBarrier, [HWBranch], 2000>; 203 204def : InstRW<[WriteCopy], (instrs COPY)>; 205 206} // End SchedModel = GFX10SpeedModel 207