1*5ffd83dbSDimitry Andric//===-- SISchedule.td - SI Scheduling definitions -------------------------===// 20b57cec5SDimitry Andric// 30b57cec5SDimitry Andric// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric// See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric// 70b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric// 90b57cec5SDimitry Andric// MachineModel definitions for Southern Islands (SI) 100b57cec5SDimitry Andric// 110b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 120b57cec5SDimitry Andric 130b57cec5SDimitry Andricdef : PredicateProlog<[{ 140b57cec5SDimitry Andric const SIInstrInfo *TII = 150b57cec5SDimitry Andric static_cast<const SIInstrInfo*>(SchedModel->getInstrInfo()); 160b57cec5SDimitry Andric (void)TII; 170b57cec5SDimitry Andric}]>; 180b57cec5SDimitry Andric 190b57cec5SDimitry Andricdef WriteBranch : SchedWrite; 200b57cec5SDimitry Andricdef WriteExport : SchedWrite; 210b57cec5SDimitry Andricdef WriteLDS : SchedWrite; 220b57cec5SDimitry Andricdef WriteSALU : SchedWrite; 230b57cec5SDimitry Andricdef WriteSMEM : SchedWrite; 240b57cec5SDimitry Andricdef WriteVMEM : SchedWrite; 250b57cec5SDimitry Andricdef WriteBarrier : SchedWrite; 260b57cec5SDimitry Andric 270b57cec5SDimitry Andricdef MIVGPRRead : SchedRead; 280b57cec5SDimitry Andricdef MIMFMARead : SchedRead; 290b57cec5SDimitry Andric 30*5ffd83dbSDimitry Andric// Normal 16 or 32 bit VALU instructions 310b57cec5SDimitry Andricdef Write32Bit : SchedWrite; 32*5ffd83dbSDimitry Andric// Conversion to or from F32 (but not converting F64 to or from F32) 33*5ffd83dbSDimitry Andricdef WriteFloatCvt : SchedWrite; 34*5ffd83dbSDimitry Andric// F16 or F32 transcendental instructions (these are quarter rate) 35*5ffd83dbSDimitry Andricdef WriteTrans32 : SchedWrite; 36*5ffd83dbSDimitry Andric// Other quarter rate VALU instructions 370b57cec5SDimitry Andricdef WriteQuarterRate32 : SchedWrite; 380b57cec5SDimitry Andric 390b57cec5SDimitry Andricdef WriteFloatFMA : SchedWrite; 400b57cec5SDimitry Andric 410b57cec5SDimitry Andric// Slow quarter rate f64 instruction. 420b57cec5SDimitry Andricdef WriteDouble : SchedWrite; 430b57cec5SDimitry Andric 440b57cec5SDimitry Andric// half rate f64 instruction (same as v_add_f64) 450b57cec5SDimitry Andricdef WriteDoubleAdd : SchedWrite; 460b57cec5SDimitry Andric 470b57cec5SDimitry Andric// Conversion to or from f64 instruction 480b57cec5SDimitry Andricdef WriteDoubleCvt : SchedWrite; 490b57cec5SDimitry Andric 50*5ffd83dbSDimitry Andric// F64 "transcendental" (actually only reciprocal and/or square root) 51*5ffd83dbSDimitry Andric// instructions 52*5ffd83dbSDimitry Andricdef WriteTrans64 : SchedWrite; 53*5ffd83dbSDimitry Andric 540b57cec5SDimitry Andric// Half rate 64-bit instructions. 550b57cec5SDimitry Andricdef Write64Bit : SchedWrite; 560b57cec5SDimitry Andric 570b57cec5SDimitry Andric// mAI multipass instructions. 580b57cec5SDimitry Andricdef Write2PassMAI : SchedWrite; 590b57cec5SDimitry Andricdef Write8PassMAI : SchedWrite; 600b57cec5SDimitry Andricdef Write16PassMAI : SchedWrite; 610b57cec5SDimitry Andric 620b57cec5SDimitry Andric// FIXME: Should there be a class for instructions which are VALU 630b57cec5SDimitry Andric// instructions and have VALU rates, but write to the SALU (i.e. VOPC 640b57cec5SDimitry Andric// instructions) 650b57cec5SDimitry Andric 660b57cec5SDimitry Andricclass SISchedMachineModel : SchedMachineModel { 67*5ffd83dbSDimitry Andric let CompleteModel = 1; 680b57cec5SDimitry Andric // MicroOpBufferSize = 1 means that instructions will always be added 690b57cec5SDimitry Andric // the ready queue when they become available. This exposes them 700b57cec5SDimitry Andric // to the register pressure analysis. 710b57cec5SDimitry Andric let MicroOpBufferSize = 1; 720b57cec5SDimitry Andric let IssueWidth = 1; 730b57cec5SDimitry Andric let PostRAScheduler = 1; 740b57cec5SDimitry Andric 750b57cec5SDimitry Andric // FIXME:Approximate 2 * branch cost. Try to hack around bad 760b57cec5SDimitry Andric // early-ifcvt heuristics. These need improvement to avoid the OOE 770b57cec5SDimitry Andric // heuristics. 780b57cec5SDimitry Andric int MispredictPenalty = 20; 790b57cec5SDimitry Andric} 800b57cec5SDimitry Andric 810b57cec5SDimitry Andricdef SIFullSpeedModel : SISchedMachineModel; 820b57cec5SDimitry Andricdef SIQuarterSpeedModel : SISchedMachineModel; 830b57cec5SDimitry Andricdef GFX10SpeedModel : SISchedMachineModel; 840b57cec5SDimitry Andric 850b57cec5SDimitry Andric// XXX: Are the resource counts correct? 860b57cec5SDimitry Andricdef HWBranch : ProcResource<1> { 870b57cec5SDimitry Andric let BufferSize = 1; 880b57cec5SDimitry Andric} 890b57cec5SDimitry Andricdef HWExport : ProcResource<1> { 900b57cec5SDimitry Andric let BufferSize = 7; // Taken from S_WAITCNT 910b57cec5SDimitry Andric} 920b57cec5SDimitry Andricdef HWLGKM : ProcResource<1> { 930b57cec5SDimitry Andric let BufferSize = 31; // Taken from S_WAITCNT 940b57cec5SDimitry Andric} 950b57cec5SDimitry Andricdef HWSALU : ProcResource<1> { 960b57cec5SDimitry Andric let BufferSize = 1; 970b57cec5SDimitry Andric} 980b57cec5SDimitry Andricdef HWVMEM : ProcResource<1> { 990b57cec5SDimitry Andric let BufferSize = 15; // Taken from S_WAITCNT 1000b57cec5SDimitry Andric} 1010b57cec5SDimitry Andricdef HWVALU : ProcResource<1> { 1020b57cec5SDimitry Andric let BufferSize = 1; 1030b57cec5SDimitry Andric} 1040b57cec5SDimitry Andricdef HWRC : ProcResource<1> { // Register destination cache 1050b57cec5SDimitry Andric let BufferSize = 1; 1060b57cec5SDimitry Andric} 1070b57cec5SDimitry Andric 1080b57cec5SDimitry Andricclass HWWriteRes<SchedWrite write, list<ProcResourceKind> resources, 1090b57cec5SDimitry Andric int latency> : WriteRes<write, resources> { 1100b57cec5SDimitry Andric let Latency = latency; 1110b57cec5SDimitry Andric} 1120b57cec5SDimitry Andric 1130b57cec5SDimitry Andricclass HWVALUWriteRes<SchedWrite write, int latency> : 1140b57cec5SDimitry Andric HWWriteRes<write, [HWVALU], latency>; 1150b57cec5SDimitry Andric 1160b57cec5SDimitry Andricdef PredMIReadVGPR : SchedPredicate<[{TII->hasVGPRUses(*MI)}]>; 1170b57cec5SDimitry Andric 1180b57cec5SDimitry Andricdef MIReadVGPR : SchedReadVariant<[ 1190b57cec5SDimitry Andric SchedVar<PredMIReadVGPR, [MIVGPRRead]>, 1200b57cec5SDimitry Andric SchedVar<NoSchedPred, [ReadDefault]>]>; 1210b57cec5SDimitry Andric 1220b57cec5SDimitry Andric// The latency numbers are taken from AMD Accelerated Parallel Processing 1230b57cec5SDimitry Andric// guide. They may not be accurate. 1240b57cec5SDimitry Andric 1250b57cec5SDimitry Andric// The latency values are 1 / (operations / cycle) / 4. 1260b57cec5SDimitry Andricmulticlass SICommonWriteRes { 1270b57cec5SDimitry Andric 1280b57cec5SDimitry Andric def : HWWriteRes<WriteBranch, [HWBranch], 8>; 1290b57cec5SDimitry Andric def : HWWriteRes<WriteExport, [HWExport], 4>; 1300b57cec5SDimitry Andric def : HWWriteRes<WriteLDS, [HWLGKM], 5>; // Can be between 2 and 64 1310b57cec5SDimitry Andric def : HWWriteRes<WriteSALU, [HWSALU], 1>; 1320b57cec5SDimitry Andric def : HWWriteRes<WriteSMEM, [HWLGKM], 5>; 1330b57cec5SDimitry Andric def : HWWriteRes<WriteVMEM, [HWVMEM], 80>; 1340b57cec5SDimitry Andric def : HWWriteRes<WriteBarrier, [HWBranch], 500>; // XXX: Guessed ??? 1350b57cec5SDimitry Andric 1360b57cec5SDimitry Andric def : HWVALUWriteRes<Write32Bit, 1>; 1370b57cec5SDimitry Andric def : HWVALUWriteRes<Write64Bit, 2>; 138*5ffd83dbSDimitry Andric def : HWVALUWriteRes<WriteFloatCvt, 4>; 139*5ffd83dbSDimitry Andric def : HWVALUWriteRes<WriteTrans32, 4>; 1400b57cec5SDimitry Andric def : HWVALUWriteRes<WriteQuarterRate32, 4>; 1410b57cec5SDimitry Andric def : HWVALUWriteRes<Write2PassMAI, 2>; 1420b57cec5SDimitry Andric def : HWVALUWriteRes<Write8PassMAI, 8>; 1430b57cec5SDimitry Andric def : HWVALUWriteRes<Write16PassMAI, 16>; 1440b57cec5SDimitry Andric 1450b57cec5SDimitry Andric def : ReadAdvance<MIVGPRRead, -2>; 1460b57cec5SDimitry Andric def : InstRW<[Write64Bit, MIReadVGPR], (instregex "^V_ACCVGPR_WRITE_B32$")>; 1470b57cec5SDimitry Andric 148*5ffd83dbSDimitry Andric // Technically mfma reads can be from 0 to 4 cycles but that does not make 1490b57cec5SDimitry Andric // sense to model because its register setup is huge. In particular if we 150*5ffd83dbSDimitry Andric // properly model read advance as -2 for a vgpr read it will result in a 1510b57cec5SDimitry Andric // bad scheduling of acc writes before that mfma. To avoid it we would 1520b57cec5SDimitry Andric // need to consume 2 or 4 more vgprs to be initialized before the acc 1530b57cec5SDimitry Andric // write sequence. Just assume worst case here. 1540b57cec5SDimitry Andric def : ReadAdvance<MIMFMARead, -4>; 1550b57cec5SDimitry Andric 1560b57cec5SDimitry Andric def : InstRW<[Write2PassMAI, MIMFMARead], (instregex "^V_MFMA_..._4X4X")>; 1570b57cec5SDimitry Andric def : InstRW<[Write8PassMAI, MIMFMARead], (instregex "^V_MFMA_..._16X16X")>; 1580b57cec5SDimitry Andric def : InstRW<[Write16PassMAI, MIMFMARead], (instregex "^V_MFMA_..._32X32X")>; 1590b57cec5SDimitry Andric} 1600b57cec5SDimitry Andric 1610b57cec5SDimitry Andricdef PredIsVGPR32Copy : SchedPredicate<[{TII->isVGPRCopy(*MI) && TII->getOpSize(*MI, 0) <= 32}]>; 1620b57cec5SDimitry Andricdef PredIsVGPR64Copy : SchedPredicate<[{TII->isVGPRCopy(*MI) && TII->getOpSize(*MI, 0) > 32}]>; 1630b57cec5SDimitry Andricdef WriteCopy : SchedWriteVariant<[ 1640b57cec5SDimitry Andric SchedVar<PredIsVGPR32Copy, [Write32Bit]>, 1650b57cec5SDimitry Andric SchedVar<PredIsVGPR64Copy, [Write64Bit]>, 1660b57cec5SDimitry Andric SchedVar<NoSchedPred, [WriteSALU]>]>; 1670b57cec5SDimitry Andric 1680b57cec5SDimitry Andriclet SchedModel = SIFullSpeedModel in { 1690b57cec5SDimitry Andric 1700b57cec5SDimitry Andricdefm : SICommonWriteRes; 1710b57cec5SDimitry Andric 1720b57cec5SDimitry Andricdef : HWVALUWriteRes<WriteFloatFMA, 1>; 1730b57cec5SDimitry Andricdef : HWVALUWriteRes<WriteDouble, 4>; 1740b57cec5SDimitry Andricdef : HWVALUWriteRes<WriteDoubleAdd, 2>; 1750b57cec5SDimitry Andricdef : HWVALUWriteRes<WriteDoubleCvt, 4>; 176*5ffd83dbSDimitry Andricdef : HWVALUWriteRes<WriteTrans64, 4>; 1770b57cec5SDimitry Andric 1780b57cec5SDimitry Andricdef : InstRW<[WriteCopy], (instrs COPY)>; 1790b57cec5SDimitry Andric 1800b57cec5SDimitry Andric} // End SchedModel = SIFullSpeedModel 1810b57cec5SDimitry Andric 1820b57cec5SDimitry Andriclet SchedModel = SIQuarterSpeedModel in { 1830b57cec5SDimitry Andric 1840b57cec5SDimitry Andricdefm : SICommonWriteRes; 1850b57cec5SDimitry Andric 1860b57cec5SDimitry Andricdef : HWVALUWriteRes<WriteFloatFMA, 16>; 1870b57cec5SDimitry Andricdef : HWVALUWriteRes<WriteDouble, 16>; 1880b57cec5SDimitry Andricdef : HWVALUWriteRes<WriteDoubleAdd, 8>; 1890b57cec5SDimitry Andricdef : HWVALUWriteRes<WriteDoubleCvt, 4>; 190*5ffd83dbSDimitry Andricdef : HWVALUWriteRes<WriteTrans64, 16>; 1910b57cec5SDimitry Andric 1920b57cec5SDimitry Andricdef : InstRW<[WriteCopy], (instrs COPY)>; 1930b57cec5SDimitry Andric 1940b57cec5SDimitry Andric} // End SchedModel = SIQuarterSpeedModel 1950b57cec5SDimitry Andric 1960b57cec5SDimitry Andriclet SchedModel = GFX10SpeedModel in { 1970b57cec5SDimitry Andric 1980b57cec5SDimitry Andric// The latency values are 1 / (operations / cycle). 1990b57cec5SDimitry Andric// Add 1 stall cycle for VGPR read. 2000b57cec5SDimitry Andricdef : HWWriteRes<Write32Bit, [HWVALU, HWRC], 5>; 201*5ffd83dbSDimitry Andricdef : HWWriteRes<WriteFloatCvt, [HWVALU, HWRC], 5>; 202*5ffd83dbSDimitry Andricdef : HWWriteRes<Write64Bit, [HWVALU, HWRC], 6>; 203*5ffd83dbSDimitry Andricdef : HWWriteRes<WriteTrans32, [HWVALU, HWRC], 10>; 204*5ffd83dbSDimitry Andricdef : HWWriteRes<WriteQuarterRate32, [HWVALU, HWRC], 8>; 2050b57cec5SDimitry Andricdef : HWWriteRes<WriteFloatFMA, [HWVALU, HWRC], 5>; 206*5ffd83dbSDimitry Andricdef : HWWriteRes<WriteDouble, [HWVALU, HWRC], 22>; 207*5ffd83dbSDimitry Andricdef : HWWriteRes<WriteDoubleAdd, [HWVALU, HWRC], 22>; 208*5ffd83dbSDimitry Andricdef : HWWriteRes<WriteDoubleCvt, [HWVALU, HWRC], 22>; 209*5ffd83dbSDimitry Andricdef : HWWriteRes<WriteTrans64, [HWVALU, HWRC], 24>; 2100b57cec5SDimitry Andric 2110b57cec5SDimitry Andricdef : HWWriteRes<WriteBranch, [HWBranch], 32>; 2120b57cec5SDimitry Andricdef : HWWriteRes<WriteExport, [HWExport, HWRC], 16>; 2130b57cec5SDimitry Andricdef : HWWriteRes<WriteLDS, [HWLGKM, HWRC], 20>; 214*5ffd83dbSDimitry Andricdef : HWWriteRes<WriteSALU, [HWSALU, HWRC], 2>; 2150b57cec5SDimitry Andricdef : HWWriteRes<WriteSMEM, [HWLGKM, HWRC], 20>; 2160b57cec5SDimitry Andricdef : HWWriteRes<WriteVMEM, [HWVMEM, HWRC], 320>; 2170b57cec5SDimitry Andricdef : HWWriteRes<WriteBarrier, [HWBranch], 2000>; 2180b57cec5SDimitry Andric 2190b57cec5SDimitry Andricdef : InstRW<[WriteCopy], (instrs COPY)>; 2200b57cec5SDimitry Andric 2210b57cec5SDimitry Andric} // End SchedModel = GFX10SpeedModel 222