1//===-- SISchedule.td - SI Scheduling definitions -------------------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// MachineModel definitions for Southern Islands (SI) 10// 11//===----------------------------------------------------------------------===// 12 13def : PredicateProlog<[{ 14 const SIInstrInfo *TII = 15 static_cast<const SIInstrInfo*>(SchedModel->getInstrInfo()); 16 (void)TII; 17}]>; 18 19def WriteBranch : SchedWrite; 20def WriteExport : SchedWrite; 21def WriteLDS : SchedWrite; 22def WriteSALU : SchedWrite; 23def WriteSMEM : SchedWrite; 24def WriteVMEM : SchedWrite; 25def WriteBarrier : SchedWrite; 26 27def MIVGPRRead : SchedRead; 28def MIMFMARead : SchedRead; 29 30// Normal 16 or 32 bit VALU instructions 31def Write32Bit : SchedWrite; 32// Conversion to or from F32 (but not converting F64 to or from F32) 33def WriteFloatCvt : SchedWrite; 34// F16 or F32 transcendental instructions (these are quarter rate) 35def WriteTrans32 : SchedWrite; 36// Other quarter rate VALU instructions 37def WriteQuarterRate32 : SchedWrite; 38 39def WriteFloatFMA : SchedWrite; 40 41// Slow quarter rate f64 instruction. 42def WriteDouble : SchedWrite; 43 44// half rate f64 instruction (same as v_add_f64) 45def WriteDoubleAdd : SchedWrite; 46 47// Conversion to or from f64 instruction 48def WriteDoubleCvt : SchedWrite; 49 50// F64 "transcendental" (actually only reciprocal and/or square root) 51// instructions 52def WriteTrans64 : SchedWrite; 53 54// Half rate 64-bit instructions. 55def Write64Bit : SchedWrite; 56 57// Integer multiplications. 58def WriteIntMul : SchedWrite; 59 60// mAI multipass instructions. 61def Write2PassMAI : SchedWrite; 62def Write4PassMAI : SchedWrite; 63def Write8PassMAI : SchedWrite; 64def Write16PassMAI : SchedWrite; 65def Write4PassDGEMM : SchedWrite; 66def Write8PassDGEMM : SchedWrite; 67 68// FIXME: Should there be a class for instructions which are VALU 69// instructions and have VALU rates, but write to the SALU (i.e. VOPC 70// instructions) 71 72class SISchedMachineModel : SchedMachineModel { 73 let CompleteModel = 1; 74 // MicroOpBufferSize = 1 means that instructions will always be added 75 // the ready queue when they become available. This exposes them 76 // to the register pressure analysis. 77 let MicroOpBufferSize = 1; 78 let IssueWidth = 1; 79 let PostRAScheduler = 1; 80 81 // FIXME:Approximate 2 * branch cost. Try to hack around bad 82 // early-ifcvt heuristics. These need improvement to avoid the OOE 83 // heuristics. 84 int MispredictPenalty = 20; 85} 86 87def SIFullSpeedModel : SISchedMachineModel; 88def SIQuarterSpeedModel : SISchedMachineModel; 89def SIDPFullSpeedModel : SISchedMachineModel; 90def SIDPGFX940FullSpeedModel : SISchedMachineModel; 91def GFX10SpeedModel : SISchedMachineModel; 92def GFX11SpeedModel : SISchedMachineModel; 93 94// XXX: Are the resource counts correct? 95def HWBranch : ProcResource<1> { 96 let BufferSize = 1; 97} 98def HWExport : ProcResource<1> { 99 let BufferSize = 1; 100} 101def HWLGKM : ProcResource<1> { 102 let BufferSize = 1; 103} 104def HWSALU : ProcResource<1> { 105 let BufferSize = 1; 106} 107def HWVMEM : ProcResource<1> { 108 let BufferSize = 1; 109} 110def HWVALU : ProcResource<1> { 111 let BufferSize = 1; 112} 113def HWTransVALU : ProcResource<1> { // Transcendental VALU 114 let BufferSize = 1; 115} 116def HWRC : ProcResource<1> { // Register destination cache 117 let BufferSize = 1; 118} 119def HWXDL : ProcResource<1> { // MFMA CU 120 let BufferSize = 0; 121} 122 123class HWWriteRes<SchedWrite write, list<ProcResourceKind> resources, 124 int latency> : WriteRes<write, resources> { 125 let Latency = latency; 126} 127 128class HWVALUWriteRes<SchedWrite write, int latency> : 129 HWWriteRes<write, [HWVALU], latency>; 130 131def PredMIReadVGPR : SchedPredicate<[{TII->hasVGPRUses(*MI)}]>; 132 133def MIReadVGPR : SchedReadVariant<[ 134 SchedVar<PredMIReadVGPR, [MIVGPRRead]>, 135 SchedVar<NoSchedPred, [ReadDefault]>]>; 136 137// The latency numbers are taken from AMD Accelerated Parallel Processing 138// guide. They may not be accurate. 139 140// The latency values are 1 / (operations / cycle) / 4. 141multiclass SICommonWriteRes { 142 143 let RetireOOO = 1 in { // llvm-mca specific flag 144 def : HWWriteRes<WriteBranch, [HWBranch], 8>; 145 def : HWWriteRes<WriteExport, [HWExport], 4>; 146 def : HWWriteRes<WriteLDS, [HWLGKM], 5>; // Can be between 2 and 64 147 def : HWWriteRes<WriteSALU, [HWSALU], 1>; 148 def : HWWriteRes<WriteSMEM, [HWLGKM], 5>; 149 def : HWWriteRes<WriteVMEM, [HWVMEM], 80>; 150 def : HWWriteRes<WriteBarrier, [HWBranch], 500>; // XXX: Guessed ??? 151 152 def : HWVALUWriteRes<Write32Bit, 1>; 153 def : HWVALUWriteRes<WriteFloatCvt, 4>; 154 def : HWVALUWriteRes<WriteTrans32, 4>; 155 def : HWVALUWriteRes<WriteQuarterRate32, 4>; 156 157 def : HWVALUWriteRes<Write4PassDGEMM, 4>; 158 def : HWVALUWriteRes<Write8PassDGEMM, 16>; 159 160 let ResourceCycles = [2] in 161 def : HWWriteRes<Write2PassMAI, [HWXDL], 2>; 162 let ResourceCycles = [4] in 163 def : HWWriteRes<Write4PassMAI, [HWXDL], 4>; 164 let ResourceCycles = [8] in 165 def : HWWriteRes<Write8PassMAI, [HWXDL], 8>; 166 let ResourceCycles = [16] in 167 def : HWWriteRes<Write16PassMAI, [HWXDL], 16>; 168 } // End RetireOOO = 1 169 170 def : ReadAdvance<MIVGPRRead, -2>; 171 172 // Technically mfma reads can be from 0 to 4 cycles but that does not make 173 // sense to model because its register setup is huge. In particular if we 174 // properly model read advance as -2 for a vgpr read it will result in a 175 // bad scheduling of acc writes before that mfma. To avoid it we would 176 // need to consume 2 or 4 more vgprs to be initialized before the acc 177 // write sequence. Just assume worst case here. 178 def : ReadAdvance<MIMFMARead, -4>; 179} 180 181def PredIsVGPR32Copy : SchedPredicate<[{TII->isVGPRCopy(*MI) && TII->getOpSize(*MI, 0) <= 32}]>; 182def PredIsVGPR64Copy : SchedPredicate<[{TII->isVGPRCopy(*MI) && TII->getOpSize(*MI, 0) > 32}]>; 183def WriteCopy : SchedWriteVariant<[ 184 SchedVar<PredIsVGPR32Copy, [Write32Bit]>, 185 SchedVar<PredIsVGPR64Copy, [Write64Bit]>, 186 SchedVar<NoSchedPred, [WriteSALU]>]>; 187 188let SchedModel = SIFullSpeedModel in { 189 190defm : SICommonWriteRes; 191 192let RetireOOO = 1 in { // llvm-mca specific flag 193def : HWVALUWriteRes<Write64Bit, 2>; 194def : HWVALUWriteRes<WriteIntMul, 4>; 195def : HWVALUWriteRes<WriteFloatFMA, 1>; 196def : HWVALUWriteRes<WriteDouble, 4>; 197def : HWVALUWriteRes<WriteDoubleAdd, 2>; 198def : HWVALUWriteRes<WriteDoubleCvt, 4>; 199def : HWVALUWriteRes<WriteTrans64, 4>; 200} // End RetireOOO = 1 201 202def : InstRW<[WriteCopy], (instrs COPY)>; 203 204} // End SchedModel = SIFullSpeedModel 205 206let SchedModel = SIQuarterSpeedModel in { 207 208defm : SICommonWriteRes; 209 210let RetireOOO = 1 in { // llvm-mca specific flag 211def : HWVALUWriteRes<Write64Bit, 2>; 212def : HWVALUWriteRes<WriteIntMul, 4>; 213def : HWVALUWriteRes<WriteFloatFMA, 16>; 214def : HWVALUWriteRes<WriteDouble, 16>; 215def : HWVALUWriteRes<WriteDoubleAdd, 8>; 216def : HWVALUWriteRes<WriteDoubleCvt, 4>; 217def : HWVALUWriteRes<WriteTrans64, 16>; 218} // End RetireOOO = 1 219 220def : InstRW<[WriteCopy], (instrs COPY)>; 221def : InstRW<[Write64Bit, MIReadVGPR], (instregex "^V_ACCVGPR_WRITE_B32_e64$")>; 222def : InstRW<[Write2PassMAI, MIMFMARead], (instregex "^V_MFMA_..._4X4X")>; 223def : InstRW<[Write8PassMAI, MIMFMARead], (instregex "^V_MFMA_..._16X16X")>; 224def : InstRW<[Write16PassMAI, MIMFMARead], (instregex "^V_MFMA_..._32X32X")>; 225 226} // End SchedModel = SIQuarterSpeedModel 227 228let SchedModel = SIDPFullSpeedModel in { 229 230defm : SICommonWriteRes; 231 232let RetireOOO = 1 in { // llvm-mca specific flag 233def : HWVALUWriteRes<WriteFloatFMA, 1>; 234def : HWVALUWriteRes<WriteDouble, 1>; 235def : HWVALUWriteRes<WriteDoubleAdd, 1>; 236def : HWVALUWriteRes<WriteDoubleCvt, 1>; 237def : HWVALUWriteRes<WriteTrans64, 4>; 238def : HWVALUWriteRes<WriteIntMul, 1>; 239def : HWVALUWriteRes<Write64Bit, 1>; 240} // End RetireOOO = 1 241 242def : InstRW<[WriteCopy], (instrs COPY)>; 243def : InstRW<[Write64Bit], (instregex "^V_ACCVGPR_WRITE_B32_e64$")>; 244def : InstRW<[Write2PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_4X4X")>; 245def : InstRW<[Write8PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_16X16X")>; 246def : InstRW<[Write16PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_32X32X")>; 247def : InstRW<[Write4PassDGEMM, MIMFMARead], (instregex "^V_MFMA_.64_4X4X")>; 248def : InstRW<[Write8PassDGEMM, MIMFMARead], (instregex "^V_MFMA_.64_16X16X")>; 249 250} // End SchedModel = SIDPFullSpeedModel 251 252let SchedModel = SIDPGFX940FullSpeedModel in { 253 254defm : SICommonWriteRes; 255 256def : HWVALUWriteRes<WriteFloatFMA, 1>; 257def : HWVALUWriteRes<WriteDouble, 1>; 258def : HWVALUWriteRes<WriteDoubleAdd, 1>; 259def : HWVALUWriteRes<WriteDoubleCvt, 1>; 260def : HWVALUWriteRes<WriteTrans64, 4>; 261def : HWVALUWriteRes<WriteIntMul, 1>; 262def : HWVALUWriteRes<Write64Bit, 1>; 263 264def : InstRW<[WriteCopy], (instrs COPY)>; 265def : InstRW<[Write64Bit], (instregex "^V_ACCVGPR_WRITE_B32_e64$")>; 266def : InstRW<[Write2PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_4X4X")>; 267 268def : InstRW<[Write4PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_16X16X8X")>; 269def : InstRW<[Write4PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_16X16X16")>; 270def : InstRW<[Write4PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_16X16X32")>; 271def : InstRW<[Write8PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_16X16X[14][FBI]")>; 272 273def : InstRW<[Write8PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_32X32X4XF")>; 274def : InstRW<[Write8PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_32X32X8")>; 275def : InstRW<[Write8PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_32X32X16")>; 276def : InstRW<[Write16PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_32X32X[124][FBI]")>; 277 278def : InstRW<[Write4PassDGEMM, MIMFMARead], (instregex "^V_MFMA_.64_4X4X")>; 279def : InstRW<[Write8PassDGEMM, MIMFMARead], (instregex "^V_MFMA_.64_16X16X")>; 280 281def : InstRW<[Write4PassMAI, MIMFMARead], (instregex "^V_SMFMAC_.32_16X16X")>; 282def : InstRW<[Write8PassMAI, MIMFMARead], (instregex "^V_SMFMAC_.32_32X32X")>; 283 284} // End SchedModel = SIDPGFX940FullSpeedModel 285 286let SchedModel = GFX10SpeedModel in { 287 288// The latency values are 1 / (operations / cycle). 289// Add 1 stall cycle for VGPR read. 290let RetireOOO = 1 in { // llvm-mca specific flag 291def : HWWriteRes<Write32Bit, [HWVALU, HWRC], 5>; 292def : HWWriteRes<WriteFloatCvt, [HWVALU, HWRC], 5>; 293def : HWWriteRes<Write64Bit, [HWVALU, HWRC], 6>; 294def : HWWriteRes<WriteTrans32, [HWTransVALU, HWRC], 10>; 295def : HWWriteRes<WriteQuarterRate32, [HWVALU, HWRC], 8>; 296def : HWWriteRes<WriteFloatFMA, [HWVALU, HWRC], 5>; 297def : HWWriteRes<WriteDouble, [HWVALU, HWRC], 22>; 298def : HWWriteRes<WriteDoubleAdd, [HWVALU, HWRC], 22>; 299def : HWWriteRes<WriteDoubleCvt, [HWVALU, HWRC], 22>; 300def : HWWriteRes<WriteIntMul, [HWVALU, HWRC], 8>; 301def : HWWriteRes<WriteTrans64, [HWVALU, HWTransVALU, HWRC], 24>; 302 303def : HWWriteRes<WriteBranch, [HWBranch], 32>; 304def : HWWriteRes<WriteExport, [HWExport, HWRC], 16>; 305def : HWWriteRes<WriteLDS, [HWLGKM, HWRC], 20>; 306def : HWWriteRes<WriteSALU, [HWSALU, HWRC], 2>; 307def : HWWriteRes<WriteSMEM, [HWLGKM, HWRC], 20>; 308def : HWWriteRes<WriteVMEM, [HWVMEM, HWRC], 320>; 309def : HWWriteRes<WriteBarrier, [HWBranch], 2000>; 310} // End RetireOOO = 1 311 312def : InstRW<[WriteCopy], (instrs COPY)>; 313 314} // End SchedModel = GFX10SpeedModel 315 316let SchedModel = GFX11SpeedModel in { 317 318def : HWWriteRes<Write32Bit, [HWVALU, HWRC], 5>; 319def : HWWriteRes<WriteFloatCvt, [HWVALU, HWRC], 5>; 320def : HWWriteRes<Write64Bit, [HWVALU, HWRC], 6>; 321def : HWWriteRes<WriteTrans32, [HWVALU, HWRC], 10>; 322def : HWWriteRes<WriteQuarterRate32, [HWVALU, HWRC], 8>; 323def : HWWriteRes<WriteFloatFMA, [HWVALU, HWRC], 5>; 324def : HWWriteRes<WriteDouble, [HWVALU, HWRC], 38>; 325def : HWWriteRes<WriteDoubleAdd, [HWVALU, HWRC], 38>; 326def : HWWriteRes<WriteDoubleCvt, [HWVALU, HWRC], 38>; 327def : HWWriteRes<WriteIntMul, [HWVALU, HWRC], 8>; 328def : HWWriteRes<WriteTrans64, [HWVALU, HWRC], 40>; 329 330def : HWWriteRes<WriteBranch, [HWBranch], 32>; 331def : HWWriteRes<WriteExport, [HWExport, HWRC], 16>; 332def : HWWriteRes<WriteLDS, [HWLGKM, HWRC], 20>; 333def : HWWriteRes<WriteSALU, [HWSALU, HWRC], 2>; 334def : HWWriteRes<WriteSMEM, [HWLGKM, HWRC], 20>; 335def : HWWriteRes<WriteVMEM, [HWVMEM, HWRC], 320>; 336def : HWWriteRes<WriteBarrier, [HWBranch], 2000>; 337 338def : InstRW<[WriteCopy], (instrs COPY)>; 339 340} // End SchedModel = GFX11SpeedModel 341