15ffd83dbSDimitry Andric//===-- SISchedule.td - SI Scheduling definitions -------------------------===// 20b57cec5SDimitry Andric// 30b57cec5SDimitry Andric// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric// See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric// 70b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric// 90b57cec5SDimitry Andric// MachineModel definitions for Southern Islands (SI) 100b57cec5SDimitry Andric// 110b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 120b57cec5SDimitry Andric 130b57cec5SDimitry Andricdef : PredicateProlog<[{ 140b57cec5SDimitry Andric const SIInstrInfo *TII = 150b57cec5SDimitry Andric static_cast<const SIInstrInfo*>(SchedModel->getInstrInfo()); 160b57cec5SDimitry Andric (void)TII; 170b57cec5SDimitry Andric}]>; 180b57cec5SDimitry Andric 190b57cec5SDimitry Andricdef WriteBranch : SchedWrite; 200b57cec5SDimitry Andricdef WriteExport : SchedWrite; 210b57cec5SDimitry Andricdef WriteLDS : SchedWrite; 220b57cec5SDimitry Andricdef WriteSALU : SchedWrite; 230b57cec5SDimitry Andricdef WriteSMEM : SchedWrite; 240b57cec5SDimitry Andricdef WriteVMEM : SchedWrite; 250b57cec5SDimitry Andricdef WriteBarrier : SchedWrite; 260b57cec5SDimitry Andric 270b57cec5SDimitry Andricdef MIVGPRRead : SchedRead; 280b57cec5SDimitry Andricdef MIMFMARead : SchedRead; 290b57cec5SDimitry Andric 305ffd83dbSDimitry Andric// Normal 16 or 32 bit VALU instructions 310b57cec5SDimitry Andricdef Write32Bit : SchedWrite; 325ffd83dbSDimitry Andric// Conversion to or from F32 (but not converting F64 to or from F32) 335ffd83dbSDimitry Andricdef WriteFloatCvt : SchedWrite; 345ffd83dbSDimitry Andric// F16 or F32 transcendental instructions (these are quarter rate) 355ffd83dbSDimitry Andricdef WriteTrans32 : SchedWrite; 365ffd83dbSDimitry Andric// Other quarter rate VALU instructions 370b57cec5SDimitry Andricdef WriteQuarterRate32 : SchedWrite; 380b57cec5SDimitry Andric 390b57cec5SDimitry Andricdef WriteFloatFMA : SchedWrite; 400b57cec5SDimitry Andric 410b57cec5SDimitry Andric// Slow quarter rate f64 instruction. 420b57cec5SDimitry Andricdef WriteDouble : SchedWrite; 430b57cec5SDimitry Andric 440b57cec5SDimitry Andric// half rate f64 instruction (same as v_add_f64) 450b57cec5SDimitry Andricdef WriteDoubleAdd : SchedWrite; 460b57cec5SDimitry Andric 470b57cec5SDimitry Andric// Conversion to or from f64 instruction 480b57cec5SDimitry Andricdef WriteDoubleCvt : SchedWrite; 490b57cec5SDimitry Andric 505ffd83dbSDimitry Andric// F64 "transcendental" (actually only reciprocal and/or square root) 515ffd83dbSDimitry Andric// instructions 525ffd83dbSDimitry Andricdef WriteTrans64 : SchedWrite; 535ffd83dbSDimitry Andric 540b57cec5SDimitry Andric// Half rate 64-bit instructions. 550b57cec5SDimitry Andricdef Write64Bit : SchedWrite; 560b57cec5SDimitry Andric 57fe6060f1SDimitry Andric// Integer multiplications. 58fe6060f1SDimitry Andricdef WriteIntMul : SchedWrite; 59fe6060f1SDimitry Andric 600b57cec5SDimitry Andric// mAI multipass instructions. 610b57cec5SDimitry Andricdef Write2PassMAI : SchedWrite; 6281ad6265SDimitry Andricdef Write4PassMAI : SchedWrite; 630b57cec5SDimitry Andricdef Write8PassMAI : SchedWrite; 640b57cec5SDimitry Andricdef Write16PassMAI : SchedWrite; 65fe6060f1SDimitry Andricdef Write4PassDGEMM : SchedWrite; 66fe6060f1SDimitry Andricdef Write8PassDGEMM : SchedWrite; 670b57cec5SDimitry Andric 685f757f3fSDimitry Andric// Scalar float instructions 695f757f3fSDimitry Andricdef WriteSFPU : SchedWrite; 705f757f3fSDimitry Andric 715f757f3fSDimitry Andric// F16 or F32 pseudo scalar transcendental instructions 725f757f3fSDimitry Andricdef WritePseudoScalarTrans : SchedWrite; 735f757f3fSDimitry Andric 740b57cec5SDimitry Andric// FIXME: Should there be a class for instructions which are VALU 750b57cec5SDimitry Andric// instructions and have VALU rates, but write to the SALU (i.e. VOPC 760b57cec5SDimitry Andric// instructions) 770b57cec5SDimitry Andric 780b57cec5SDimitry Andricclass SISchedMachineModel : SchedMachineModel { 795ffd83dbSDimitry Andric let CompleteModel = 1; 800b57cec5SDimitry Andric // MicroOpBufferSize = 1 means that instructions will always be added 810b57cec5SDimitry Andric // the ready queue when they become available. This exposes them 820b57cec5SDimitry Andric // to the register pressure analysis. 830b57cec5SDimitry Andric let MicroOpBufferSize = 1; 840b57cec5SDimitry Andric let IssueWidth = 1; 850b57cec5SDimitry Andric let PostRAScheduler = 1; 860b57cec5SDimitry Andric 870b57cec5SDimitry Andric // FIXME:Approximate 2 * branch cost. Try to hack around bad 880b57cec5SDimitry Andric // early-ifcvt heuristics. These need improvement to avoid the OOE 890b57cec5SDimitry Andric // heuristics. 900b57cec5SDimitry Andric int MispredictPenalty = 20; 910b57cec5SDimitry Andric} 920b57cec5SDimitry Andric 930b57cec5SDimitry Andricdef SIFullSpeedModel : SISchedMachineModel; 940b57cec5SDimitry Andricdef SIQuarterSpeedModel : SISchedMachineModel; 95fe6060f1SDimitry Andricdef SIDPFullSpeedModel : SISchedMachineModel; 9681ad6265SDimitry Andricdef SIDPGFX940FullSpeedModel : SISchedMachineModel; 970b57cec5SDimitry Andricdef GFX10SpeedModel : SISchedMachineModel; 9881ad6265SDimitry Andricdef GFX11SpeedModel : SISchedMachineModel; 995f757f3fSDimitry Andricdef GFX12SpeedModel : SISchedMachineModel; 1000b57cec5SDimitry Andric 1010b57cec5SDimitry Andric// XXX: Are the resource counts correct? 1020b57cec5SDimitry Andricdef HWBranch : ProcResource<1> { 1030b57cec5SDimitry Andric let BufferSize = 1; 1040b57cec5SDimitry Andric} 1050b57cec5SDimitry Andricdef HWExport : ProcResource<1> { 1064824e7fdSDimitry Andric let BufferSize = 1; 1070b57cec5SDimitry Andric} 1080b57cec5SDimitry Andricdef HWLGKM : ProcResource<1> { 1094824e7fdSDimitry Andric let BufferSize = 1; 1100b57cec5SDimitry Andric} 1110b57cec5SDimitry Andricdef HWSALU : ProcResource<1> { 1120b57cec5SDimitry Andric let BufferSize = 1; 1130b57cec5SDimitry Andric} 1140b57cec5SDimitry Andricdef HWVMEM : ProcResource<1> { 1154824e7fdSDimitry Andric let BufferSize = 1; 1160b57cec5SDimitry Andric} 1170b57cec5SDimitry Andricdef HWVALU : ProcResource<1> { 1180b57cec5SDimitry Andric let BufferSize = 1; 1190b57cec5SDimitry Andric} 120fe6060f1SDimitry Andricdef HWTransVALU : ProcResource<1> { // Transcendental VALU 121fe6060f1SDimitry Andric let BufferSize = 1; 122fe6060f1SDimitry Andric} 1230b57cec5SDimitry Andricdef HWRC : ProcResource<1> { // Register destination cache 1240b57cec5SDimitry Andric let BufferSize = 1; 1250b57cec5SDimitry Andric} 126e8d8bef9SDimitry Andricdef HWXDL : ProcResource<1> { // MFMA CU 127e8d8bef9SDimitry Andric let BufferSize = 0; 128e8d8bef9SDimitry Andric} 1290b57cec5SDimitry Andric 1300b57cec5SDimitry Andricclass HWWriteRes<SchedWrite write, list<ProcResourceKind> resources, 1310b57cec5SDimitry Andric int latency> : WriteRes<write, resources> { 1320b57cec5SDimitry Andric let Latency = latency; 1330b57cec5SDimitry Andric} 1340b57cec5SDimitry Andric 1350b57cec5SDimitry Andricclass HWVALUWriteRes<SchedWrite write, int latency> : 1360b57cec5SDimitry Andric HWWriteRes<write, [HWVALU], latency>; 1370b57cec5SDimitry Andric 1385f757f3fSDimitry Andricclass UnsupportedWriteRes<SchedWrite write> : WriteRes<write, []> { 1395f757f3fSDimitry Andric let Unsupported = 1; 1405f757f3fSDimitry Andric} 1415f757f3fSDimitry Andric 1420b57cec5SDimitry Andricdef PredMIReadVGPR : SchedPredicate<[{TII->hasVGPRUses(*MI)}]>; 1430b57cec5SDimitry Andric 1440b57cec5SDimitry Andricdef MIReadVGPR : SchedReadVariant<[ 1450b57cec5SDimitry Andric SchedVar<PredMIReadVGPR, [MIVGPRRead]>, 1460b57cec5SDimitry Andric SchedVar<NoSchedPred, [ReadDefault]>]>; 1470b57cec5SDimitry Andric 1480b57cec5SDimitry Andric// The latency numbers are taken from AMD Accelerated Parallel Processing 1490b57cec5SDimitry Andric// guide. They may not be accurate. 1500b57cec5SDimitry Andric 1510b57cec5SDimitry Andric// The latency values are 1 / (operations / cycle) / 4. 1520b57cec5SDimitry Andricmulticlass SICommonWriteRes { 1530b57cec5SDimitry Andric 154349cc55cSDimitry Andric let RetireOOO = 1 in { // llvm-mca specific flag 1550b57cec5SDimitry Andric def : HWWriteRes<WriteBranch, [HWBranch], 8>; 1560b57cec5SDimitry Andric def : HWWriteRes<WriteExport, [HWExport], 4>; 1570b57cec5SDimitry Andric def : HWWriteRes<WriteLDS, [HWLGKM], 5>; // Can be between 2 and 64 1580b57cec5SDimitry Andric def : HWWriteRes<WriteSALU, [HWSALU], 1>; 1590b57cec5SDimitry Andric def : HWWriteRes<WriteSMEM, [HWLGKM], 5>; 1600b57cec5SDimitry Andric def : HWWriteRes<WriteVMEM, [HWVMEM], 80>; 1610b57cec5SDimitry Andric def : HWWriteRes<WriteBarrier, [HWBranch], 500>; // XXX: Guessed ??? 1620b57cec5SDimitry Andric 1630b57cec5SDimitry Andric def : HWVALUWriteRes<Write32Bit, 1>; 1645ffd83dbSDimitry Andric def : HWVALUWriteRes<WriteFloatCvt, 4>; 1655ffd83dbSDimitry Andric def : HWVALUWriteRes<WriteTrans32, 4>; 1660b57cec5SDimitry Andric def : HWVALUWriteRes<WriteQuarterRate32, 4>; 167e8d8bef9SDimitry Andric 168*0fca6ea1SDimitry Andric let ReleaseAtCycles = [4] in 169fe6060f1SDimitry Andric def : HWVALUWriteRes<Write4PassDGEMM, 4>; 170*0fca6ea1SDimitry Andric let ReleaseAtCycles = [8] in 171*0fca6ea1SDimitry Andric def : HWVALUWriteRes<Write8PassDGEMM, 8>; 172fe6060f1SDimitry Andric 1735f757f3fSDimitry Andric let ReleaseAtCycles = [2] in 174e8d8bef9SDimitry Andric def : HWWriteRes<Write2PassMAI, [HWXDL], 2>; 1755f757f3fSDimitry Andric let ReleaseAtCycles = [4] in 17681ad6265SDimitry Andric def : HWWriteRes<Write4PassMAI, [HWXDL], 4>; 1775f757f3fSDimitry Andric let ReleaseAtCycles = [8] in 178e8d8bef9SDimitry Andric def : HWWriteRes<Write8PassMAI, [HWXDL], 8>; 1795f757f3fSDimitry Andric let ReleaseAtCycles = [16] in 180e8d8bef9SDimitry Andric def : HWWriteRes<Write16PassMAI, [HWXDL], 16>; 1815f757f3fSDimitry Andric 1825f757f3fSDimitry Andric def : UnsupportedWriteRes<WriteSFPU>; 1835f757f3fSDimitry Andric def : UnsupportedWriteRes<WritePseudoScalarTrans>; 184349cc55cSDimitry Andric } // End RetireOOO = 1 1850b57cec5SDimitry Andric 1860b57cec5SDimitry Andric def : ReadAdvance<MIVGPRRead, -2>; 1870b57cec5SDimitry Andric 1885ffd83dbSDimitry Andric // Technically mfma reads can be from 0 to 4 cycles but that does not make 1890b57cec5SDimitry Andric // sense to model because its register setup is huge. In particular if we 1905ffd83dbSDimitry Andric // properly model read advance as -2 for a vgpr read it will result in a 1910b57cec5SDimitry Andric // bad scheduling of acc writes before that mfma. To avoid it we would 1920b57cec5SDimitry Andric // need to consume 2 or 4 more vgprs to be initialized before the acc 1930b57cec5SDimitry Andric // write sequence. Just assume worst case here. 1940b57cec5SDimitry Andric def : ReadAdvance<MIMFMARead, -4>; 1950b57cec5SDimitry Andric} 1960b57cec5SDimitry Andric 1970b57cec5SDimitry Andricdef PredIsVGPR32Copy : SchedPredicate<[{TII->isVGPRCopy(*MI) && TII->getOpSize(*MI, 0) <= 32}]>; 1980b57cec5SDimitry Andricdef PredIsVGPR64Copy : SchedPredicate<[{TII->isVGPRCopy(*MI) && TII->getOpSize(*MI, 0) > 32}]>; 1990b57cec5SDimitry Andricdef WriteCopy : SchedWriteVariant<[ 2000b57cec5SDimitry Andric SchedVar<PredIsVGPR32Copy, [Write32Bit]>, 2010b57cec5SDimitry Andric SchedVar<PredIsVGPR64Copy, [Write64Bit]>, 2020b57cec5SDimitry Andric SchedVar<NoSchedPred, [WriteSALU]>]>; 2030b57cec5SDimitry Andric 2040b57cec5SDimitry Andriclet SchedModel = SIFullSpeedModel in { 2050b57cec5SDimitry Andric 2060b57cec5SDimitry Andricdefm : SICommonWriteRes; 2070b57cec5SDimitry Andric 208349cc55cSDimitry Andriclet RetireOOO = 1 in { // llvm-mca specific flag 209fe6060f1SDimitry Andricdef : HWVALUWriteRes<Write64Bit, 2>; 210fe6060f1SDimitry Andricdef : HWVALUWriteRes<WriteIntMul, 4>; 2110b57cec5SDimitry Andricdef : HWVALUWriteRes<WriteFloatFMA, 1>; 2120b57cec5SDimitry Andricdef : HWVALUWriteRes<WriteDouble, 4>; 2130b57cec5SDimitry Andricdef : HWVALUWriteRes<WriteDoubleAdd, 2>; 2140b57cec5SDimitry Andricdef : HWVALUWriteRes<WriteDoubleCvt, 4>; 2155ffd83dbSDimitry Andricdef : HWVALUWriteRes<WriteTrans64, 4>; 216349cc55cSDimitry Andric} // End RetireOOO = 1 2170b57cec5SDimitry Andric 2180b57cec5SDimitry Andricdef : InstRW<[WriteCopy], (instrs COPY)>; 2190b57cec5SDimitry Andric 2200b57cec5SDimitry Andric} // End SchedModel = SIFullSpeedModel 2210b57cec5SDimitry Andric 2220b57cec5SDimitry Andriclet SchedModel = SIQuarterSpeedModel in { 2230b57cec5SDimitry Andric 2240b57cec5SDimitry Andricdefm : SICommonWriteRes; 2250b57cec5SDimitry Andric 226349cc55cSDimitry Andriclet RetireOOO = 1 in { // llvm-mca specific flag 227fe6060f1SDimitry Andricdef : HWVALUWriteRes<Write64Bit, 2>; 228fe6060f1SDimitry Andricdef : HWVALUWriteRes<WriteIntMul, 4>; 2290b57cec5SDimitry Andricdef : HWVALUWriteRes<WriteFloatFMA, 16>; 2300b57cec5SDimitry Andricdef : HWVALUWriteRes<WriteDouble, 16>; 2310b57cec5SDimitry Andricdef : HWVALUWriteRes<WriteDoubleAdd, 8>; 2320b57cec5SDimitry Andricdef : HWVALUWriteRes<WriteDoubleCvt, 4>; 2335ffd83dbSDimitry Andricdef : HWVALUWriteRes<WriteTrans64, 16>; 234349cc55cSDimitry Andric} // End RetireOOO = 1 2350b57cec5SDimitry Andric 2360b57cec5SDimitry Andricdef : InstRW<[WriteCopy], (instrs COPY)>; 237fe6060f1SDimitry Andricdef : InstRW<[Write64Bit, MIReadVGPR], (instregex "^V_ACCVGPR_WRITE_B32_e64$")>; 238fe6060f1SDimitry Andricdef : InstRW<[Write2PassMAI, MIMFMARead], (instregex "^V_MFMA_..._4X4X")>; 239fe6060f1SDimitry Andricdef : InstRW<[Write8PassMAI, MIMFMARead], (instregex "^V_MFMA_..._16X16X")>; 240fe6060f1SDimitry Andricdef : InstRW<[Write16PassMAI, MIMFMARead], (instregex "^V_MFMA_..._32X32X")>; 2410b57cec5SDimitry Andric 2420b57cec5SDimitry Andric} // End SchedModel = SIQuarterSpeedModel 2430b57cec5SDimitry Andric 244fe6060f1SDimitry Andriclet SchedModel = SIDPFullSpeedModel in { 245fe6060f1SDimitry Andric 246fe6060f1SDimitry Andricdefm : SICommonWriteRes; 247fe6060f1SDimitry Andric 248349cc55cSDimitry Andriclet RetireOOO = 1 in { // llvm-mca specific flag 249fe6060f1SDimitry Andricdef : HWVALUWriteRes<WriteFloatFMA, 1>; 250fe6060f1SDimitry Andricdef : HWVALUWriteRes<WriteDouble, 1>; 251fe6060f1SDimitry Andricdef : HWVALUWriteRes<WriteDoubleAdd, 1>; 252fe6060f1SDimitry Andricdef : HWVALUWriteRes<WriteDoubleCvt, 1>; 253fe6060f1SDimitry Andricdef : HWVALUWriteRes<WriteTrans64, 4>; 254fe6060f1SDimitry Andricdef : HWVALUWriteRes<WriteIntMul, 1>; 255fe6060f1SDimitry Andricdef : HWVALUWriteRes<Write64Bit, 1>; 256349cc55cSDimitry Andric} // End RetireOOO = 1 257fe6060f1SDimitry Andric 258fe6060f1SDimitry Andricdef : InstRW<[WriteCopy], (instrs COPY)>; 259fe6060f1SDimitry Andricdef : InstRW<[Write64Bit], (instregex "^V_ACCVGPR_WRITE_B32_e64$")>; 260fe6060f1SDimitry Andricdef : InstRW<[Write2PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_4X4X")>; 261fe6060f1SDimitry Andricdef : InstRW<[Write8PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_16X16X")>; 262fe6060f1SDimitry Andricdef : InstRW<[Write16PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_32X32X")>; 263fe6060f1SDimitry Andricdef : InstRW<[Write4PassDGEMM, MIMFMARead], (instregex "^V_MFMA_.64_4X4X")>; 264fe6060f1SDimitry Andricdef : InstRW<[Write8PassDGEMM, MIMFMARead], (instregex "^V_MFMA_.64_16X16X")>; 265fe6060f1SDimitry Andric 266fe6060f1SDimitry Andric} // End SchedModel = SIDPFullSpeedModel 267fe6060f1SDimitry Andric 26881ad6265SDimitry Andriclet SchedModel = SIDPGFX940FullSpeedModel in { 26981ad6265SDimitry Andric 27081ad6265SDimitry Andricdefm : SICommonWriteRes; 27181ad6265SDimitry Andric 27281ad6265SDimitry Andricdef : HWVALUWriteRes<WriteFloatFMA, 1>; 27381ad6265SDimitry Andricdef : HWVALUWriteRes<WriteDouble, 1>; 27481ad6265SDimitry Andricdef : HWVALUWriteRes<WriteDoubleAdd, 1>; 27581ad6265SDimitry Andricdef : HWVALUWriteRes<WriteDoubleCvt, 1>; 27681ad6265SDimitry Andricdef : HWVALUWriteRes<WriteTrans64, 4>; 27781ad6265SDimitry Andricdef : HWVALUWriteRes<WriteIntMul, 1>; 27881ad6265SDimitry Andricdef : HWVALUWriteRes<Write64Bit, 1>; 27981ad6265SDimitry Andric 28081ad6265SDimitry Andricdef : InstRW<[WriteCopy], (instrs COPY)>; 28181ad6265SDimitry Andricdef : InstRW<[Write64Bit], (instregex "^V_ACCVGPR_WRITE_B32_e64$")>; 28281ad6265SDimitry Andricdef : InstRW<[Write2PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_4X4X")>; 28381ad6265SDimitry Andric 28481ad6265SDimitry Andricdef : InstRW<[Write4PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_16X16X8X")>; 28581ad6265SDimitry Andricdef : InstRW<[Write4PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_16X16X16")>; 28681ad6265SDimitry Andricdef : InstRW<[Write4PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_16X16X32")>; 28781ad6265SDimitry Andricdef : InstRW<[Write8PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_16X16X[14][FBI]")>; 28881ad6265SDimitry Andric 28981ad6265SDimitry Andricdef : InstRW<[Write8PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_32X32X4XF")>; 29081ad6265SDimitry Andricdef : InstRW<[Write8PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_32X32X8")>; 29181ad6265SDimitry Andricdef : InstRW<[Write8PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_32X32X16")>; 29281ad6265SDimitry Andricdef : InstRW<[Write16PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_32X32X[124][FBI]")>; 29381ad6265SDimitry Andric 29481ad6265SDimitry Andricdef : InstRW<[Write4PassDGEMM, MIMFMARead], (instregex "^V_MFMA_.64_4X4X")>; 29581ad6265SDimitry Andricdef : InstRW<[Write8PassDGEMM, MIMFMARead], (instregex "^V_MFMA_.64_16X16X")>; 29681ad6265SDimitry Andric 29781ad6265SDimitry Andricdef : InstRW<[Write4PassMAI, MIMFMARead], (instregex "^V_SMFMAC_.32_16X16X")>; 29881ad6265SDimitry Andricdef : InstRW<[Write8PassMAI, MIMFMARead], (instregex "^V_SMFMAC_.32_32X32X")>; 29981ad6265SDimitry Andric 30081ad6265SDimitry Andric} // End SchedModel = SIDPGFX940FullSpeedModel 30181ad6265SDimitry Andric 3020b57cec5SDimitry Andriclet SchedModel = GFX10SpeedModel in { 3030b57cec5SDimitry Andric 3040b57cec5SDimitry Andric// The latency values are 1 / (operations / cycle). 3050b57cec5SDimitry Andric// Add 1 stall cycle for VGPR read. 306349cc55cSDimitry Andriclet RetireOOO = 1 in { // llvm-mca specific flag 3070b57cec5SDimitry Andricdef : HWWriteRes<Write32Bit, [HWVALU, HWRC], 5>; 3085ffd83dbSDimitry Andricdef : HWWriteRes<WriteFloatCvt, [HWVALU, HWRC], 5>; 3095ffd83dbSDimitry Andricdef : HWWriteRes<Write64Bit, [HWVALU, HWRC], 6>; 310fe6060f1SDimitry Andricdef : HWWriteRes<WriteTrans32, [HWTransVALU, HWRC], 10>; 3115ffd83dbSDimitry Andricdef : HWWriteRes<WriteQuarterRate32, [HWVALU, HWRC], 8>; 3120b57cec5SDimitry Andricdef : HWWriteRes<WriteFloatFMA, [HWVALU, HWRC], 5>; 3135ffd83dbSDimitry Andricdef : HWWriteRes<WriteDouble, [HWVALU, HWRC], 22>; 3145ffd83dbSDimitry Andricdef : HWWriteRes<WriteDoubleAdd, [HWVALU, HWRC], 22>; 3155ffd83dbSDimitry Andricdef : HWWriteRes<WriteDoubleCvt, [HWVALU, HWRC], 22>; 316fe6060f1SDimitry Andricdef : HWWriteRes<WriteIntMul, [HWVALU, HWRC], 8>; 317fe6060f1SDimitry Andricdef : HWWriteRes<WriteTrans64, [HWVALU, HWTransVALU, HWRC], 24>; 3180b57cec5SDimitry Andric 3190b57cec5SDimitry Andricdef : HWWriteRes<WriteBranch, [HWBranch], 32>; 3200b57cec5SDimitry Andricdef : HWWriteRes<WriteExport, [HWExport, HWRC], 16>; 3210b57cec5SDimitry Andricdef : HWWriteRes<WriteLDS, [HWLGKM, HWRC], 20>; 3225ffd83dbSDimitry Andricdef : HWWriteRes<WriteSALU, [HWSALU, HWRC], 2>; 3230b57cec5SDimitry Andricdef : HWWriteRes<WriteSMEM, [HWLGKM, HWRC], 20>; 3240b57cec5SDimitry Andricdef : HWWriteRes<WriteVMEM, [HWVMEM, HWRC], 320>; 3250b57cec5SDimitry Andricdef : HWWriteRes<WriteBarrier, [HWBranch], 2000>; 3265f757f3fSDimitry Andric 3275f757f3fSDimitry Andricdef : UnsupportedWriteRes<WriteSFPU>; 3285f757f3fSDimitry Andricdef : UnsupportedWriteRes<WritePseudoScalarTrans>; 329349cc55cSDimitry Andric} // End RetireOOO = 1 3300b57cec5SDimitry Andric 3310b57cec5SDimitry Andricdef : InstRW<[WriteCopy], (instrs COPY)>; 3320b57cec5SDimitry Andric 3330b57cec5SDimitry Andric} // End SchedModel = GFX10SpeedModel 33481ad6265SDimitry Andric 33581ad6265SDimitry Andriclet SchedModel = GFX11SpeedModel in { 33681ad6265SDimitry Andric 3375f757f3fSDimitry Andric// The latency values are 1 / (operations / cycle). 3385f757f3fSDimitry Andric// Add 1 stall cycle for VGPR read. 3395f757f3fSDimitry Andriclet RetireOOO = 1 in { // llvm-mca specific flag 3405f757f3fSDimitry Andricdef : HWWriteRes<Write32Bit, [HWVALU, HWRC], 5>; 3415f757f3fSDimitry Andricdef : HWWriteRes<WriteFloatCvt, [HWVALU, HWRC], 5>; 3425f757f3fSDimitry Andricdef : HWWriteRes<Write64Bit, [HWVALU, HWRC], 6>; 3435f757f3fSDimitry Andricdef : HWWriteRes<WriteTrans32, [HWTransVALU, HWRC], 10>; 3445f757f3fSDimitry Andricdef : HWWriteRes<WriteQuarterRate32, [HWVALU, HWRC], 8>; 3455f757f3fSDimitry Andricdef : HWWriteRes<WriteFloatFMA, [HWVALU, HWRC], 5>; 3465f757f3fSDimitry Andricdef : HWWriteRes<WriteDouble, [HWVALU, HWRC], 38>; 3475f757f3fSDimitry Andricdef : HWWriteRes<WriteDoubleAdd, [HWVALU, HWRC], 38>; 3485f757f3fSDimitry Andricdef : HWWriteRes<WriteDoubleCvt, [HWVALU, HWRC], 38>; 3495f757f3fSDimitry Andricdef : HWWriteRes<WriteIntMul, [HWVALU, HWRC], 8>; 3505f757f3fSDimitry Andricdef : HWWriteRes<WriteTrans64, [HWVALU, HWTransVALU, HWRC], 40>; 3515f757f3fSDimitry Andric 3525f757f3fSDimitry Andricdef : HWWriteRes<WriteBranch, [HWBranch], 32>; 3535f757f3fSDimitry Andricdef : HWWriteRes<WriteExport, [HWExport, HWRC], 16>; 3545f757f3fSDimitry Andricdef : HWWriteRes<WriteLDS, [HWLGKM, HWRC], 20>; 3555f757f3fSDimitry Andricdef : HWWriteRes<WriteSALU, [HWSALU, HWRC], 2>; 3565f757f3fSDimitry Andricdef : HWWriteRes<WriteSFPU, [HWSALU, HWRC], 4>; 3575f757f3fSDimitry Andricdef : HWWriteRes<WriteSMEM, [HWLGKM, HWRC], 20>; 3585f757f3fSDimitry Andricdef : HWWriteRes<WriteVMEM, [HWVMEM, HWRC], 320>; 3595f757f3fSDimitry Andricdef : HWWriteRes<WriteBarrier, [HWBranch], 2000>; 3605f757f3fSDimitry Andric} // End RetireOOO = 1 3615f757f3fSDimitry Andric 3625f757f3fSDimitry Andricdef : UnsupportedWriteRes<WritePseudoScalarTrans>; 3635f757f3fSDimitry Andric 3645f757f3fSDimitry Andricdef : InstRW<[WriteCopy], (instrs COPY)>; 3655f757f3fSDimitry Andric 3665f757f3fSDimitry Andric} // End SchedModel = GFX11SpeedModel 3675f757f3fSDimitry Andric 3685f757f3fSDimitry Andriclet SchedModel = GFX12SpeedModel in { 3695f757f3fSDimitry Andric 37081ad6265SDimitry Andricdef : HWWriteRes<Write32Bit, [HWVALU, HWRC], 5>; 37181ad6265SDimitry Andricdef : HWWriteRes<WriteFloatCvt, [HWVALU, HWRC], 5>; 37281ad6265SDimitry Andricdef : HWWriteRes<Write64Bit, [HWVALU, HWRC], 6>; 37381ad6265SDimitry Andricdef : HWWriteRes<WriteTrans32, [HWVALU, HWRC], 10>; 37481ad6265SDimitry Andricdef : HWWriteRes<WriteQuarterRate32, [HWVALU, HWRC], 8>; 37581ad6265SDimitry Andricdef : HWWriteRes<WriteFloatFMA, [HWVALU, HWRC], 5>; 37681ad6265SDimitry Andricdef : HWWriteRes<WriteDouble, [HWVALU, HWRC], 38>; 37781ad6265SDimitry Andricdef : HWWriteRes<WriteDoubleAdd, [HWVALU, HWRC], 38>; 37881ad6265SDimitry Andricdef : HWWriteRes<WriteDoubleCvt, [HWVALU, HWRC], 38>; 37981ad6265SDimitry Andricdef : HWWriteRes<WriteIntMul, [HWVALU, HWRC], 8>; 38081ad6265SDimitry Andricdef : HWWriteRes<WriteTrans64, [HWVALU, HWRC], 40>; 3815f757f3fSDimitry Andricdef : HWWriteRes<WritePseudoScalarTrans, [HWVALU, HWRC], 7>; 38281ad6265SDimitry Andric 38381ad6265SDimitry Andricdef : HWWriteRes<WriteBranch, [HWBranch], 32>; 38481ad6265SDimitry Andricdef : HWWriteRes<WriteExport, [HWExport, HWRC], 16>; 38581ad6265SDimitry Andricdef : HWWriteRes<WriteLDS, [HWLGKM, HWRC], 20>; 38681ad6265SDimitry Andricdef : HWWriteRes<WriteSALU, [HWSALU, HWRC], 2>; 3875f757f3fSDimitry Andricdef : HWWriteRes<WriteSFPU, [HWSALU, HWRC], 4>; 38881ad6265SDimitry Andricdef : HWWriteRes<WriteSMEM, [HWLGKM, HWRC], 20>; 38981ad6265SDimitry Andricdef : HWWriteRes<WriteVMEM, [HWVMEM, HWRC], 320>; 39081ad6265SDimitry Andricdef : HWWriteRes<WriteBarrier, [HWBranch], 2000>; 39181ad6265SDimitry Andric 39281ad6265SDimitry Andricdef : InstRW<[WriteCopy], (instrs COPY)>; 39381ad6265SDimitry Andric 3945f757f3fSDimitry Andric} // End SchedModel = GFX12SpeedModel 395