15ffd83dbSDimitry Andric//===-- SISchedule.td - SI Scheduling definitions -------------------------===// 20b57cec5SDimitry Andric// 30b57cec5SDimitry Andric// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric// See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric// 70b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric// 90b57cec5SDimitry Andric// MachineModel definitions for Southern Islands (SI) 100b57cec5SDimitry Andric// 110b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 120b57cec5SDimitry Andric 130b57cec5SDimitry Andricdef : PredicateProlog<[{ 140b57cec5SDimitry Andric const SIInstrInfo *TII = 150b57cec5SDimitry Andric static_cast<const SIInstrInfo*>(SchedModel->getInstrInfo()); 160b57cec5SDimitry Andric (void)TII; 170b57cec5SDimitry Andric}]>; 180b57cec5SDimitry Andric 190b57cec5SDimitry Andricdef WriteBranch : SchedWrite; 200b57cec5SDimitry Andricdef WriteExport : SchedWrite; 210b57cec5SDimitry Andricdef WriteLDS : SchedWrite; 220b57cec5SDimitry Andricdef WriteSALU : SchedWrite; 230b57cec5SDimitry Andricdef WriteSMEM : SchedWrite; 240b57cec5SDimitry Andricdef WriteVMEM : SchedWrite; 250b57cec5SDimitry Andricdef WriteBarrier : SchedWrite; 260b57cec5SDimitry Andric 270b57cec5SDimitry Andricdef MIVGPRRead : SchedRead; 280b57cec5SDimitry Andricdef MIMFMARead : SchedRead; 290b57cec5SDimitry Andric 305ffd83dbSDimitry Andric// Normal 16 or 32 bit VALU instructions 310b57cec5SDimitry Andricdef Write32Bit : SchedWrite; 325ffd83dbSDimitry Andric// Conversion to or from F32 (but not converting F64 to or from F32) 335ffd83dbSDimitry Andricdef WriteFloatCvt : SchedWrite; 345ffd83dbSDimitry Andric// F16 or F32 transcendental instructions (these are quarter rate) 355ffd83dbSDimitry Andricdef WriteTrans32 : SchedWrite; 365ffd83dbSDimitry Andric// Other quarter rate VALU instructions 370b57cec5SDimitry Andricdef WriteQuarterRate32 : SchedWrite; 380b57cec5SDimitry Andric 390b57cec5SDimitry Andricdef WriteFloatFMA : SchedWrite; 400b57cec5SDimitry Andric 410b57cec5SDimitry Andric// Slow quarter rate f64 instruction. 420b57cec5SDimitry Andricdef WriteDouble : SchedWrite; 430b57cec5SDimitry Andric 440b57cec5SDimitry Andric// half rate f64 instruction (same as v_add_f64) 450b57cec5SDimitry Andricdef WriteDoubleAdd : SchedWrite; 460b57cec5SDimitry Andric 470b57cec5SDimitry Andric// Conversion to or from f64 instruction 480b57cec5SDimitry Andricdef WriteDoubleCvt : SchedWrite; 490b57cec5SDimitry Andric 505ffd83dbSDimitry Andric// F64 "transcendental" (actually only reciprocal and/or square root) 515ffd83dbSDimitry Andric// instructions 525ffd83dbSDimitry Andricdef WriteTrans64 : SchedWrite; 535ffd83dbSDimitry Andric 540b57cec5SDimitry Andric// Half rate 64-bit instructions. 550b57cec5SDimitry Andricdef Write64Bit : SchedWrite; 560b57cec5SDimitry Andric 57fe6060f1SDimitry Andric// Integer multiplications. 58fe6060f1SDimitry Andricdef WriteIntMul : SchedWrite; 59fe6060f1SDimitry Andric 600b57cec5SDimitry Andric// mAI multipass instructions. 610b57cec5SDimitry Andricdef Write2PassMAI : SchedWrite; 620b57cec5SDimitry Andricdef Write8PassMAI : SchedWrite; 630b57cec5SDimitry Andricdef Write16PassMAI : SchedWrite; 64fe6060f1SDimitry Andricdef Write4PassDGEMM : SchedWrite; 65fe6060f1SDimitry Andricdef Write8PassDGEMM : SchedWrite; 660b57cec5SDimitry Andric 670b57cec5SDimitry Andric// FIXME: Should there be a class for instructions which are VALU 680b57cec5SDimitry Andric// instructions and have VALU rates, but write to the SALU (i.e. VOPC 690b57cec5SDimitry Andric// instructions) 700b57cec5SDimitry Andric 710b57cec5SDimitry Andricclass SISchedMachineModel : SchedMachineModel { 725ffd83dbSDimitry Andric let CompleteModel = 1; 730b57cec5SDimitry Andric // MicroOpBufferSize = 1 means that instructions will always be added 740b57cec5SDimitry Andric // the ready queue when they become available. This exposes them 750b57cec5SDimitry Andric // to the register pressure analysis. 760b57cec5SDimitry Andric let MicroOpBufferSize = 1; 770b57cec5SDimitry Andric let IssueWidth = 1; 780b57cec5SDimitry Andric let PostRAScheduler = 1; 790b57cec5SDimitry Andric 800b57cec5SDimitry Andric // FIXME:Approximate 2 * branch cost. Try to hack around bad 810b57cec5SDimitry Andric // early-ifcvt heuristics. These need improvement to avoid the OOE 820b57cec5SDimitry Andric // heuristics. 830b57cec5SDimitry Andric int MispredictPenalty = 20; 840b57cec5SDimitry Andric} 850b57cec5SDimitry Andric 860b57cec5SDimitry Andricdef SIFullSpeedModel : SISchedMachineModel; 870b57cec5SDimitry Andricdef SIQuarterSpeedModel : SISchedMachineModel; 88fe6060f1SDimitry Andricdef SIDPFullSpeedModel : SISchedMachineModel; 890b57cec5SDimitry Andricdef GFX10SpeedModel : SISchedMachineModel; 900b57cec5SDimitry Andric 910b57cec5SDimitry Andric// XXX: Are the resource counts correct? 920b57cec5SDimitry Andricdef HWBranch : ProcResource<1> { 930b57cec5SDimitry Andric let BufferSize = 1; 940b57cec5SDimitry Andric} 950b57cec5SDimitry Andricdef HWExport : ProcResource<1> { 960b57cec5SDimitry Andric let BufferSize = 7; // Taken from S_WAITCNT 970b57cec5SDimitry Andric} 980b57cec5SDimitry Andricdef HWLGKM : ProcResource<1> { 990b57cec5SDimitry Andric let BufferSize = 31; // Taken from S_WAITCNT 1000b57cec5SDimitry Andric} 1010b57cec5SDimitry Andricdef HWSALU : ProcResource<1> { 1020b57cec5SDimitry Andric let BufferSize = 1; 1030b57cec5SDimitry Andric} 1040b57cec5SDimitry Andricdef HWVMEM : ProcResource<1> { 1050b57cec5SDimitry Andric let BufferSize = 15; // Taken from S_WAITCNT 1060b57cec5SDimitry Andric} 1070b57cec5SDimitry Andricdef HWVALU : ProcResource<1> { 1080b57cec5SDimitry Andric let BufferSize = 1; 1090b57cec5SDimitry Andric} 110fe6060f1SDimitry Andricdef HWTransVALU : ProcResource<1> { // Transcendental VALU 111fe6060f1SDimitry Andric let BufferSize = 1; 112fe6060f1SDimitry Andric} 1130b57cec5SDimitry Andricdef HWRC : ProcResource<1> { // Register destination cache 1140b57cec5SDimitry Andric let BufferSize = 1; 1150b57cec5SDimitry Andric} 116e8d8bef9SDimitry Andricdef HWXDL : ProcResource<1> { // MFMA CU 117e8d8bef9SDimitry Andric let BufferSize = 0; 118e8d8bef9SDimitry Andric} 1190b57cec5SDimitry Andric 1200b57cec5SDimitry Andricclass HWWriteRes<SchedWrite write, list<ProcResourceKind> resources, 1210b57cec5SDimitry Andric int latency> : WriteRes<write, resources> { 1220b57cec5SDimitry Andric let Latency = latency; 1230b57cec5SDimitry Andric} 1240b57cec5SDimitry Andric 1250b57cec5SDimitry Andricclass HWVALUWriteRes<SchedWrite write, int latency> : 1260b57cec5SDimitry Andric HWWriteRes<write, [HWVALU], latency>; 1270b57cec5SDimitry Andric 1280b57cec5SDimitry Andricdef PredMIReadVGPR : SchedPredicate<[{TII->hasVGPRUses(*MI)}]>; 1290b57cec5SDimitry Andric 1300b57cec5SDimitry Andricdef MIReadVGPR : SchedReadVariant<[ 1310b57cec5SDimitry Andric SchedVar<PredMIReadVGPR, [MIVGPRRead]>, 1320b57cec5SDimitry Andric SchedVar<NoSchedPred, [ReadDefault]>]>; 1330b57cec5SDimitry Andric 1340b57cec5SDimitry Andric// The latency numbers are taken from AMD Accelerated Parallel Processing 1350b57cec5SDimitry Andric// guide. They may not be accurate. 1360b57cec5SDimitry Andric 1370b57cec5SDimitry Andric// The latency values are 1 / (operations / cycle) / 4. 1380b57cec5SDimitry Andricmulticlass SICommonWriteRes { 1390b57cec5SDimitry Andric 140*349cc55cSDimitry Andric let RetireOOO = 1 in { // llvm-mca specific flag 1410b57cec5SDimitry Andric def : HWWriteRes<WriteBranch, [HWBranch], 8>; 1420b57cec5SDimitry Andric def : HWWriteRes<WriteExport, [HWExport], 4>; 1430b57cec5SDimitry Andric def : HWWriteRes<WriteLDS, [HWLGKM], 5>; // Can be between 2 and 64 1440b57cec5SDimitry Andric def : HWWriteRes<WriteSALU, [HWSALU], 1>; 1450b57cec5SDimitry Andric def : HWWriteRes<WriteSMEM, [HWLGKM], 5>; 1460b57cec5SDimitry Andric def : HWWriteRes<WriteVMEM, [HWVMEM], 80>; 1470b57cec5SDimitry Andric def : HWWriteRes<WriteBarrier, [HWBranch], 500>; // XXX: Guessed ??? 1480b57cec5SDimitry Andric 1490b57cec5SDimitry Andric def : HWVALUWriteRes<Write32Bit, 1>; 1505ffd83dbSDimitry Andric def : HWVALUWriteRes<WriteFloatCvt, 4>; 1515ffd83dbSDimitry Andric def : HWVALUWriteRes<WriteTrans32, 4>; 1520b57cec5SDimitry Andric def : HWVALUWriteRes<WriteQuarterRate32, 4>; 153e8d8bef9SDimitry Andric 154fe6060f1SDimitry Andric def : HWVALUWriteRes<Write4PassDGEMM, 4>; 155fe6060f1SDimitry Andric def : HWVALUWriteRes<Write8PassDGEMM, 16>; 156fe6060f1SDimitry Andric 157e8d8bef9SDimitry Andric let ResourceCycles = [2] in 158e8d8bef9SDimitry Andric def : HWWriteRes<Write2PassMAI, [HWXDL], 2>; 159e8d8bef9SDimitry Andric let ResourceCycles = [8] in 160e8d8bef9SDimitry Andric def : HWWriteRes<Write8PassMAI, [HWXDL], 8>; 161e8d8bef9SDimitry Andric let ResourceCycles = [16] in 162e8d8bef9SDimitry Andric def : HWWriteRes<Write16PassMAI, [HWXDL], 16>; 163*349cc55cSDimitry Andric } // End RetireOOO = 1 1640b57cec5SDimitry Andric 1650b57cec5SDimitry Andric def : ReadAdvance<MIVGPRRead, -2>; 1660b57cec5SDimitry Andric 1675ffd83dbSDimitry Andric // Technically mfma reads can be from 0 to 4 cycles but that does not make 1680b57cec5SDimitry Andric // sense to model because its register setup is huge. In particular if we 1695ffd83dbSDimitry Andric // properly model read advance as -2 for a vgpr read it will result in a 1700b57cec5SDimitry Andric // bad scheduling of acc writes before that mfma. To avoid it we would 1710b57cec5SDimitry Andric // need to consume 2 or 4 more vgprs to be initialized before the acc 1720b57cec5SDimitry Andric // write sequence. Just assume worst case here. 1730b57cec5SDimitry Andric def : ReadAdvance<MIMFMARead, -4>; 1740b57cec5SDimitry Andric} 1750b57cec5SDimitry Andric 1760b57cec5SDimitry Andricdef PredIsVGPR32Copy : SchedPredicate<[{TII->isVGPRCopy(*MI) && TII->getOpSize(*MI, 0) <= 32}]>; 1770b57cec5SDimitry Andricdef PredIsVGPR64Copy : SchedPredicate<[{TII->isVGPRCopy(*MI) && TII->getOpSize(*MI, 0) > 32}]>; 1780b57cec5SDimitry Andricdef WriteCopy : SchedWriteVariant<[ 1790b57cec5SDimitry Andric SchedVar<PredIsVGPR32Copy, [Write32Bit]>, 1800b57cec5SDimitry Andric SchedVar<PredIsVGPR64Copy, [Write64Bit]>, 1810b57cec5SDimitry Andric SchedVar<NoSchedPred, [WriteSALU]>]>; 1820b57cec5SDimitry Andric 1830b57cec5SDimitry Andriclet SchedModel = SIFullSpeedModel in { 1840b57cec5SDimitry Andric 1850b57cec5SDimitry Andricdefm : SICommonWriteRes; 1860b57cec5SDimitry Andric 187*349cc55cSDimitry Andriclet RetireOOO = 1 in { // llvm-mca specific flag 188fe6060f1SDimitry Andricdef : HWVALUWriteRes<Write64Bit, 2>; 189fe6060f1SDimitry Andricdef : HWVALUWriteRes<WriteIntMul, 4>; 1900b57cec5SDimitry Andricdef : HWVALUWriteRes<WriteFloatFMA, 1>; 1910b57cec5SDimitry Andricdef : HWVALUWriteRes<WriteDouble, 4>; 1920b57cec5SDimitry Andricdef : HWVALUWriteRes<WriteDoubleAdd, 2>; 1930b57cec5SDimitry Andricdef : HWVALUWriteRes<WriteDoubleCvt, 4>; 1945ffd83dbSDimitry Andricdef : HWVALUWriteRes<WriteTrans64, 4>; 195*349cc55cSDimitry Andric} // End RetireOOO = 1 1960b57cec5SDimitry Andric 1970b57cec5SDimitry Andricdef : InstRW<[WriteCopy], (instrs COPY)>; 1980b57cec5SDimitry Andric 1990b57cec5SDimitry Andric} // End SchedModel = SIFullSpeedModel 2000b57cec5SDimitry Andric 2010b57cec5SDimitry Andriclet SchedModel = SIQuarterSpeedModel in { 2020b57cec5SDimitry Andric 2030b57cec5SDimitry Andricdefm : SICommonWriteRes; 2040b57cec5SDimitry Andric 205*349cc55cSDimitry Andriclet RetireOOO = 1 in { // llvm-mca specific flag 206fe6060f1SDimitry Andricdef : HWVALUWriteRes<Write64Bit, 2>; 207fe6060f1SDimitry Andricdef : HWVALUWriteRes<WriteIntMul, 4>; 2080b57cec5SDimitry Andricdef : HWVALUWriteRes<WriteFloatFMA, 16>; 2090b57cec5SDimitry Andricdef : HWVALUWriteRes<WriteDouble, 16>; 2100b57cec5SDimitry Andricdef : HWVALUWriteRes<WriteDoubleAdd, 8>; 2110b57cec5SDimitry Andricdef : HWVALUWriteRes<WriteDoubleCvt, 4>; 2125ffd83dbSDimitry Andricdef : HWVALUWriteRes<WriteTrans64, 16>; 213*349cc55cSDimitry Andric} // End RetireOOO = 1 2140b57cec5SDimitry Andric 2150b57cec5SDimitry Andricdef : InstRW<[WriteCopy], (instrs COPY)>; 216fe6060f1SDimitry Andricdef : InstRW<[Write64Bit, MIReadVGPR], (instregex "^V_ACCVGPR_WRITE_B32_e64$")>; 217fe6060f1SDimitry Andricdef : InstRW<[Write2PassMAI, MIMFMARead], (instregex "^V_MFMA_..._4X4X")>; 218fe6060f1SDimitry Andricdef : InstRW<[Write8PassMAI, MIMFMARead], (instregex "^V_MFMA_..._16X16X")>; 219fe6060f1SDimitry Andricdef : InstRW<[Write16PassMAI, MIMFMARead], (instregex "^V_MFMA_..._32X32X")>; 2200b57cec5SDimitry Andric 2210b57cec5SDimitry Andric} // End SchedModel = SIQuarterSpeedModel 2220b57cec5SDimitry Andric 223fe6060f1SDimitry Andriclet SchedModel = SIDPFullSpeedModel in { 224fe6060f1SDimitry Andric 225fe6060f1SDimitry Andricdefm : SICommonWriteRes; 226fe6060f1SDimitry Andric 227*349cc55cSDimitry Andriclet RetireOOO = 1 in { // llvm-mca specific flag 228fe6060f1SDimitry Andricdef : HWVALUWriteRes<WriteFloatFMA, 1>; 229fe6060f1SDimitry Andricdef : HWVALUWriteRes<WriteDouble, 1>; 230fe6060f1SDimitry Andricdef : HWVALUWriteRes<WriteDoubleAdd, 1>; 231fe6060f1SDimitry Andricdef : HWVALUWriteRes<WriteDoubleCvt, 1>; 232fe6060f1SDimitry Andricdef : HWVALUWriteRes<WriteTrans64, 4>; 233fe6060f1SDimitry Andricdef : HWVALUWriteRes<WriteIntMul, 1>; 234fe6060f1SDimitry Andricdef : HWVALUWriteRes<Write64Bit, 1>; 235*349cc55cSDimitry Andric} // End RetireOOO = 1 236fe6060f1SDimitry Andric 237fe6060f1SDimitry Andricdef : InstRW<[WriteCopy], (instrs COPY)>; 238fe6060f1SDimitry Andricdef : InstRW<[Write64Bit], (instregex "^V_ACCVGPR_WRITE_B32_e64$")>; 239fe6060f1SDimitry Andricdef : InstRW<[Write2PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_4X4X")>; 240fe6060f1SDimitry Andricdef : InstRW<[Write8PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_16X16X")>; 241fe6060f1SDimitry Andricdef : InstRW<[Write16PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_32X32X")>; 242fe6060f1SDimitry Andricdef : InstRW<[Write4PassDGEMM, MIMFMARead], (instregex "^V_MFMA_.64_4X4X")>; 243fe6060f1SDimitry Andricdef : InstRW<[Write8PassDGEMM, MIMFMARead], (instregex "^V_MFMA_.64_16X16X")>; 244fe6060f1SDimitry Andric 245fe6060f1SDimitry Andric} // End SchedModel = SIDPFullSpeedModel 246fe6060f1SDimitry Andric 2470b57cec5SDimitry Andriclet SchedModel = GFX10SpeedModel in { 2480b57cec5SDimitry Andric 2490b57cec5SDimitry Andric// The latency values are 1 / (operations / cycle). 2500b57cec5SDimitry Andric// Add 1 stall cycle for VGPR read. 251*349cc55cSDimitry Andriclet RetireOOO = 1 in { // llvm-mca specific flag 2520b57cec5SDimitry Andricdef : HWWriteRes<Write32Bit, [HWVALU, HWRC], 5>; 2535ffd83dbSDimitry Andricdef : HWWriteRes<WriteFloatCvt, [HWVALU, HWRC], 5>; 2545ffd83dbSDimitry Andricdef : HWWriteRes<Write64Bit, [HWVALU, HWRC], 6>; 255fe6060f1SDimitry Andricdef : HWWriteRes<WriteTrans32, [HWTransVALU, HWRC], 10>; 2565ffd83dbSDimitry Andricdef : HWWriteRes<WriteQuarterRate32, [HWVALU, HWRC], 8>; 2570b57cec5SDimitry Andricdef : HWWriteRes<WriteFloatFMA, [HWVALU, HWRC], 5>; 2585ffd83dbSDimitry Andricdef : HWWriteRes<WriteDouble, [HWVALU, HWRC], 22>; 2595ffd83dbSDimitry Andricdef : HWWriteRes<WriteDoubleAdd, [HWVALU, HWRC], 22>; 2605ffd83dbSDimitry Andricdef : HWWriteRes<WriteDoubleCvt, [HWVALU, HWRC], 22>; 261fe6060f1SDimitry Andricdef : HWWriteRes<WriteIntMul, [HWVALU, HWRC], 8>; 262fe6060f1SDimitry Andricdef : HWWriteRes<WriteTrans64, [HWVALU, HWTransVALU, HWRC], 24>; 2630b57cec5SDimitry Andric 2640b57cec5SDimitry Andricdef : HWWriteRes<WriteBranch, [HWBranch], 32>; 2650b57cec5SDimitry Andricdef : HWWriteRes<WriteExport, [HWExport, HWRC], 16>; 2660b57cec5SDimitry Andricdef : HWWriteRes<WriteLDS, [HWLGKM, HWRC], 20>; 2675ffd83dbSDimitry Andricdef : HWWriteRes<WriteSALU, [HWSALU, HWRC], 2>; 2680b57cec5SDimitry Andricdef : HWWriteRes<WriteSMEM, [HWLGKM, HWRC], 20>; 2690b57cec5SDimitry Andricdef : HWWriteRes<WriteVMEM, [HWVMEM, HWRC], 320>; 2700b57cec5SDimitry Andricdef : HWWriteRes<WriteBarrier, [HWBranch], 2000>; 271*349cc55cSDimitry Andric} // End RetireOOO = 1 2720b57cec5SDimitry Andric 2730b57cec5SDimitry Andricdef : InstRW<[WriteCopy], (instrs COPY)>; 2740b57cec5SDimitry Andric 2750b57cec5SDimitry Andric} // End SchedModel = GFX10SpeedModel 276