15ffd83dbSDimitry Andric//===-- SISchedule.td - SI Scheduling definitions -------------------------===// 20b57cec5SDimitry Andric// 30b57cec5SDimitry Andric// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric// See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric// 70b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric// 90b57cec5SDimitry Andric// MachineModel definitions for Southern Islands (SI) 100b57cec5SDimitry Andric// 110b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 120b57cec5SDimitry Andric 130b57cec5SDimitry Andricdef : PredicateProlog<[{ 140b57cec5SDimitry Andric const SIInstrInfo *TII = 150b57cec5SDimitry Andric static_cast<const SIInstrInfo*>(SchedModel->getInstrInfo()); 160b57cec5SDimitry Andric (void)TII; 170b57cec5SDimitry Andric}]>; 180b57cec5SDimitry Andric 190b57cec5SDimitry Andricdef WriteBranch : SchedWrite; 200b57cec5SDimitry Andricdef WriteExport : SchedWrite; 210b57cec5SDimitry Andricdef WriteLDS : SchedWrite; 220b57cec5SDimitry Andricdef WriteSALU : SchedWrite; 230b57cec5SDimitry Andricdef WriteSMEM : SchedWrite; 240b57cec5SDimitry Andricdef WriteVMEM : SchedWrite; 250b57cec5SDimitry Andricdef WriteBarrier : SchedWrite; 260b57cec5SDimitry Andric 270b57cec5SDimitry Andricdef MIVGPRRead : SchedRead; 280b57cec5SDimitry Andricdef MIMFMARead : SchedRead; 290b57cec5SDimitry Andric 305ffd83dbSDimitry Andric// Normal 16 or 32 bit VALU instructions 310b57cec5SDimitry Andricdef Write32Bit : SchedWrite; 325ffd83dbSDimitry Andric// Conversion to or from F32 (but not converting F64 to or from F32) 335ffd83dbSDimitry Andricdef WriteFloatCvt : SchedWrite; 345ffd83dbSDimitry Andric// F16 or F32 transcendental instructions (these are quarter rate) 355ffd83dbSDimitry Andricdef WriteTrans32 : SchedWrite; 365ffd83dbSDimitry Andric// Other quarter rate VALU instructions 370b57cec5SDimitry Andricdef WriteQuarterRate32 : SchedWrite; 380b57cec5SDimitry Andric 390b57cec5SDimitry Andricdef WriteFloatFMA : SchedWrite; 400b57cec5SDimitry Andric 410b57cec5SDimitry Andric// Slow quarter rate f64 instruction. 420b57cec5SDimitry Andricdef WriteDouble : SchedWrite; 430b57cec5SDimitry Andric 440b57cec5SDimitry Andric// half rate f64 instruction (same as v_add_f64) 450b57cec5SDimitry Andricdef WriteDoubleAdd : SchedWrite; 460b57cec5SDimitry Andric 470b57cec5SDimitry Andric// Conversion to or from f64 instruction 480b57cec5SDimitry Andricdef WriteDoubleCvt : SchedWrite; 490b57cec5SDimitry Andric 505ffd83dbSDimitry Andric// F64 "transcendental" (actually only reciprocal and/or square root) 515ffd83dbSDimitry Andric// instructions 525ffd83dbSDimitry Andricdef WriteTrans64 : SchedWrite; 535ffd83dbSDimitry Andric 540b57cec5SDimitry Andric// Half rate 64-bit instructions. 550b57cec5SDimitry Andricdef Write64Bit : SchedWrite; 560b57cec5SDimitry Andric 57fe6060f1SDimitry Andric// Integer multiplications. 58fe6060f1SDimitry Andricdef WriteIntMul : SchedWrite; 59fe6060f1SDimitry Andric 600b57cec5SDimitry Andric// mAI multipass instructions. 610b57cec5SDimitry Andricdef Write2PassMAI : SchedWrite; 6281ad6265SDimitry Andricdef Write4PassMAI : SchedWrite; 630b57cec5SDimitry Andricdef Write8PassMAI : SchedWrite; 640b57cec5SDimitry Andricdef Write16PassMAI : SchedWrite; 65fe6060f1SDimitry Andricdef Write4PassDGEMM : SchedWrite; 66fe6060f1SDimitry Andricdef Write8PassDGEMM : SchedWrite; 670b57cec5SDimitry Andric 68*5f757f3fSDimitry Andric// Scalar float instructions 69*5f757f3fSDimitry Andricdef WriteSFPU : SchedWrite; 70*5f757f3fSDimitry Andric 71*5f757f3fSDimitry Andric// F16 or F32 pseudo scalar transcendental instructions 72*5f757f3fSDimitry Andricdef WritePseudoScalarTrans : SchedWrite; 73*5f757f3fSDimitry Andric 740b57cec5SDimitry Andric// FIXME: Should there be a class for instructions which are VALU 750b57cec5SDimitry Andric// instructions and have VALU rates, but write to the SALU (i.e. VOPC 760b57cec5SDimitry Andric// instructions) 770b57cec5SDimitry Andric 780b57cec5SDimitry Andricclass SISchedMachineModel : SchedMachineModel { 795ffd83dbSDimitry Andric let CompleteModel = 1; 800b57cec5SDimitry Andric // MicroOpBufferSize = 1 means that instructions will always be added 810b57cec5SDimitry Andric // the ready queue when they become available. This exposes them 820b57cec5SDimitry Andric // to the register pressure analysis. 830b57cec5SDimitry Andric let MicroOpBufferSize = 1; 840b57cec5SDimitry Andric let IssueWidth = 1; 850b57cec5SDimitry Andric let PostRAScheduler = 1; 860b57cec5SDimitry Andric 870b57cec5SDimitry Andric // FIXME:Approximate 2 * branch cost. Try to hack around bad 880b57cec5SDimitry Andric // early-ifcvt heuristics. These need improvement to avoid the OOE 890b57cec5SDimitry Andric // heuristics. 900b57cec5SDimitry Andric int MispredictPenalty = 20; 910b57cec5SDimitry Andric} 920b57cec5SDimitry Andric 930b57cec5SDimitry Andricdef SIFullSpeedModel : SISchedMachineModel; 940b57cec5SDimitry Andricdef SIQuarterSpeedModel : SISchedMachineModel; 95fe6060f1SDimitry Andricdef SIDPFullSpeedModel : SISchedMachineModel; 9681ad6265SDimitry Andricdef SIDPGFX940FullSpeedModel : SISchedMachineModel; 970b57cec5SDimitry Andricdef GFX10SpeedModel : SISchedMachineModel; 9881ad6265SDimitry Andricdef GFX11SpeedModel : SISchedMachineModel; 99*5f757f3fSDimitry Andricdef GFX12SpeedModel : SISchedMachineModel; 1000b57cec5SDimitry Andric 1010b57cec5SDimitry Andric// XXX: Are the resource counts correct? 1020b57cec5SDimitry Andricdef HWBranch : ProcResource<1> { 1030b57cec5SDimitry Andric let BufferSize = 1; 1040b57cec5SDimitry Andric} 1050b57cec5SDimitry Andricdef HWExport : ProcResource<1> { 1064824e7fdSDimitry Andric let BufferSize = 1; 1070b57cec5SDimitry Andric} 1080b57cec5SDimitry Andricdef HWLGKM : ProcResource<1> { 1094824e7fdSDimitry Andric let BufferSize = 1; 1100b57cec5SDimitry Andric} 1110b57cec5SDimitry Andricdef HWSALU : ProcResource<1> { 1120b57cec5SDimitry Andric let BufferSize = 1; 1130b57cec5SDimitry Andric} 1140b57cec5SDimitry Andricdef HWVMEM : ProcResource<1> { 1154824e7fdSDimitry Andric let BufferSize = 1; 1160b57cec5SDimitry Andric} 1170b57cec5SDimitry Andricdef HWVALU : ProcResource<1> { 1180b57cec5SDimitry Andric let BufferSize = 1; 1190b57cec5SDimitry Andric} 120fe6060f1SDimitry Andricdef HWTransVALU : ProcResource<1> { // Transcendental VALU 121fe6060f1SDimitry Andric let BufferSize = 1; 122fe6060f1SDimitry Andric} 1230b57cec5SDimitry Andricdef HWRC : ProcResource<1> { // Register destination cache 1240b57cec5SDimitry Andric let BufferSize = 1; 1250b57cec5SDimitry Andric} 126e8d8bef9SDimitry Andricdef HWXDL : ProcResource<1> { // MFMA CU 127e8d8bef9SDimitry Andric let BufferSize = 0; 128e8d8bef9SDimitry Andric} 1290b57cec5SDimitry Andric 1300b57cec5SDimitry Andricclass HWWriteRes<SchedWrite write, list<ProcResourceKind> resources, 1310b57cec5SDimitry Andric int latency> : WriteRes<write, resources> { 1320b57cec5SDimitry Andric let Latency = latency; 1330b57cec5SDimitry Andric} 1340b57cec5SDimitry Andric 1350b57cec5SDimitry Andricclass HWVALUWriteRes<SchedWrite write, int latency> : 1360b57cec5SDimitry Andric HWWriteRes<write, [HWVALU], latency>; 1370b57cec5SDimitry Andric 138*5f757f3fSDimitry Andricclass UnsupportedWriteRes<SchedWrite write> : WriteRes<write, []> { 139*5f757f3fSDimitry Andric let Unsupported = 1; 140*5f757f3fSDimitry Andric} 141*5f757f3fSDimitry Andric 1420b57cec5SDimitry Andricdef PredMIReadVGPR : SchedPredicate<[{TII->hasVGPRUses(*MI)}]>; 1430b57cec5SDimitry Andric 1440b57cec5SDimitry Andricdef MIReadVGPR : SchedReadVariant<[ 1450b57cec5SDimitry Andric SchedVar<PredMIReadVGPR, [MIVGPRRead]>, 1460b57cec5SDimitry Andric SchedVar<NoSchedPred, [ReadDefault]>]>; 1470b57cec5SDimitry Andric 1480b57cec5SDimitry Andric// The latency numbers are taken from AMD Accelerated Parallel Processing 1490b57cec5SDimitry Andric// guide. They may not be accurate. 1500b57cec5SDimitry Andric 1510b57cec5SDimitry Andric// The latency values are 1 / (operations / cycle) / 4. 1520b57cec5SDimitry Andricmulticlass SICommonWriteRes { 1530b57cec5SDimitry Andric 154349cc55cSDimitry Andric let RetireOOO = 1 in { // llvm-mca specific flag 1550b57cec5SDimitry Andric def : HWWriteRes<WriteBranch, [HWBranch], 8>; 1560b57cec5SDimitry Andric def : HWWriteRes<WriteExport, [HWExport], 4>; 1570b57cec5SDimitry Andric def : HWWriteRes<WriteLDS, [HWLGKM], 5>; // Can be between 2 and 64 1580b57cec5SDimitry Andric def : HWWriteRes<WriteSALU, [HWSALU], 1>; 1590b57cec5SDimitry Andric def : HWWriteRes<WriteSMEM, [HWLGKM], 5>; 1600b57cec5SDimitry Andric def : HWWriteRes<WriteVMEM, [HWVMEM], 80>; 1610b57cec5SDimitry Andric def : HWWriteRes<WriteBarrier, [HWBranch], 500>; // XXX: Guessed ??? 1620b57cec5SDimitry Andric 1630b57cec5SDimitry Andric def : HWVALUWriteRes<Write32Bit, 1>; 1645ffd83dbSDimitry Andric def : HWVALUWriteRes<WriteFloatCvt, 4>; 1655ffd83dbSDimitry Andric def : HWVALUWriteRes<WriteTrans32, 4>; 1660b57cec5SDimitry Andric def : HWVALUWriteRes<WriteQuarterRate32, 4>; 167e8d8bef9SDimitry Andric 168fe6060f1SDimitry Andric def : HWVALUWriteRes<Write4PassDGEMM, 4>; 169fe6060f1SDimitry Andric def : HWVALUWriteRes<Write8PassDGEMM, 16>; 170fe6060f1SDimitry Andric 171*5f757f3fSDimitry Andric let ReleaseAtCycles = [2] in 172e8d8bef9SDimitry Andric def : HWWriteRes<Write2PassMAI, [HWXDL], 2>; 173*5f757f3fSDimitry Andric let ReleaseAtCycles = [4] in 17481ad6265SDimitry Andric def : HWWriteRes<Write4PassMAI, [HWXDL], 4>; 175*5f757f3fSDimitry Andric let ReleaseAtCycles = [8] in 176e8d8bef9SDimitry Andric def : HWWriteRes<Write8PassMAI, [HWXDL], 8>; 177*5f757f3fSDimitry Andric let ReleaseAtCycles = [16] in 178e8d8bef9SDimitry Andric def : HWWriteRes<Write16PassMAI, [HWXDL], 16>; 179*5f757f3fSDimitry Andric 180*5f757f3fSDimitry Andric def : UnsupportedWriteRes<WriteSFPU>; 181*5f757f3fSDimitry Andric def : UnsupportedWriteRes<WritePseudoScalarTrans>; 182349cc55cSDimitry Andric } // End RetireOOO = 1 1830b57cec5SDimitry Andric 1840b57cec5SDimitry Andric def : ReadAdvance<MIVGPRRead, -2>; 1850b57cec5SDimitry Andric 1865ffd83dbSDimitry Andric // Technically mfma reads can be from 0 to 4 cycles but that does not make 1870b57cec5SDimitry Andric // sense to model because its register setup is huge. In particular if we 1885ffd83dbSDimitry Andric // properly model read advance as -2 for a vgpr read it will result in a 1890b57cec5SDimitry Andric // bad scheduling of acc writes before that mfma. To avoid it we would 1900b57cec5SDimitry Andric // need to consume 2 or 4 more vgprs to be initialized before the acc 1910b57cec5SDimitry Andric // write sequence. Just assume worst case here. 1920b57cec5SDimitry Andric def : ReadAdvance<MIMFMARead, -4>; 1930b57cec5SDimitry Andric} 1940b57cec5SDimitry Andric 1950b57cec5SDimitry Andricdef PredIsVGPR32Copy : SchedPredicate<[{TII->isVGPRCopy(*MI) && TII->getOpSize(*MI, 0) <= 32}]>; 1960b57cec5SDimitry Andricdef PredIsVGPR64Copy : SchedPredicate<[{TII->isVGPRCopy(*MI) && TII->getOpSize(*MI, 0) > 32}]>; 1970b57cec5SDimitry Andricdef WriteCopy : SchedWriteVariant<[ 1980b57cec5SDimitry Andric SchedVar<PredIsVGPR32Copy, [Write32Bit]>, 1990b57cec5SDimitry Andric SchedVar<PredIsVGPR64Copy, [Write64Bit]>, 2000b57cec5SDimitry Andric SchedVar<NoSchedPred, [WriteSALU]>]>; 2010b57cec5SDimitry Andric 2020b57cec5SDimitry Andriclet SchedModel = SIFullSpeedModel in { 2030b57cec5SDimitry Andric 2040b57cec5SDimitry Andricdefm : SICommonWriteRes; 2050b57cec5SDimitry Andric 206349cc55cSDimitry Andriclet RetireOOO = 1 in { // llvm-mca specific flag 207fe6060f1SDimitry Andricdef : HWVALUWriteRes<Write64Bit, 2>; 208fe6060f1SDimitry Andricdef : HWVALUWriteRes<WriteIntMul, 4>; 2090b57cec5SDimitry Andricdef : HWVALUWriteRes<WriteFloatFMA, 1>; 2100b57cec5SDimitry Andricdef : HWVALUWriteRes<WriteDouble, 4>; 2110b57cec5SDimitry Andricdef : HWVALUWriteRes<WriteDoubleAdd, 2>; 2120b57cec5SDimitry Andricdef : HWVALUWriteRes<WriteDoubleCvt, 4>; 2135ffd83dbSDimitry Andricdef : HWVALUWriteRes<WriteTrans64, 4>; 214349cc55cSDimitry Andric} // End RetireOOO = 1 2150b57cec5SDimitry Andric 2160b57cec5SDimitry Andricdef : InstRW<[WriteCopy], (instrs COPY)>; 2170b57cec5SDimitry Andric 2180b57cec5SDimitry Andric} // End SchedModel = SIFullSpeedModel 2190b57cec5SDimitry Andric 2200b57cec5SDimitry Andriclet SchedModel = SIQuarterSpeedModel in { 2210b57cec5SDimitry Andric 2220b57cec5SDimitry Andricdefm : SICommonWriteRes; 2230b57cec5SDimitry Andric 224349cc55cSDimitry Andriclet RetireOOO = 1 in { // llvm-mca specific flag 225fe6060f1SDimitry Andricdef : HWVALUWriteRes<Write64Bit, 2>; 226fe6060f1SDimitry Andricdef : HWVALUWriteRes<WriteIntMul, 4>; 2270b57cec5SDimitry Andricdef : HWVALUWriteRes<WriteFloatFMA, 16>; 2280b57cec5SDimitry Andricdef : HWVALUWriteRes<WriteDouble, 16>; 2290b57cec5SDimitry Andricdef : HWVALUWriteRes<WriteDoubleAdd, 8>; 2300b57cec5SDimitry Andricdef : HWVALUWriteRes<WriteDoubleCvt, 4>; 2315ffd83dbSDimitry Andricdef : HWVALUWriteRes<WriteTrans64, 16>; 232349cc55cSDimitry Andric} // End RetireOOO = 1 2330b57cec5SDimitry Andric 2340b57cec5SDimitry Andricdef : InstRW<[WriteCopy], (instrs COPY)>; 235fe6060f1SDimitry Andricdef : InstRW<[Write64Bit, MIReadVGPR], (instregex "^V_ACCVGPR_WRITE_B32_e64$")>; 236fe6060f1SDimitry Andricdef : InstRW<[Write2PassMAI, MIMFMARead], (instregex "^V_MFMA_..._4X4X")>; 237fe6060f1SDimitry Andricdef : InstRW<[Write8PassMAI, MIMFMARead], (instregex "^V_MFMA_..._16X16X")>; 238fe6060f1SDimitry Andricdef : InstRW<[Write16PassMAI, MIMFMARead], (instregex "^V_MFMA_..._32X32X")>; 2390b57cec5SDimitry Andric 2400b57cec5SDimitry Andric} // End SchedModel = SIQuarterSpeedModel 2410b57cec5SDimitry Andric 242fe6060f1SDimitry Andriclet SchedModel = SIDPFullSpeedModel in { 243fe6060f1SDimitry Andric 244fe6060f1SDimitry Andricdefm : SICommonWriteRes; 245fe6060f1SDimitry Andric 246349cc55cSDimitry Andriclet RetireOOO = 1 in { // llvm-mca specific flag 247fe6060f1SDimitry Andricdef : HWVALUWriteRes<WriteFloatFMA, 1>; 248fe6060f1SDimitry Andricdef : HWVALUWriteRes<WriteDouble, 1>; 249fe6060f1SDimitry Andricdef : HWVALUWriteRes<WriteDoubleAdd, 1>; 250fe6060f1SDimitry Andricdef : HWVALUWriteRes<WriteDoubleCvt, 1>; 251fe6060f1SDimitry Andricdef : HWVALUWriteRes<WriteTrans64, 4>; 252fe6060f1SDimitry Andricdef : HWVALUWriteRes<WriteIntMul, 1>; 253fe6060f1SDimitry Andricdef : HWVALUWriteRes<Write64Bit, 1>; 254349cc55cSDimitry Andric} // End RetireOOO = 1 255fe6060f1SDimitry Andric 256fe6060f1SDimitry Andricdef : InstRW<[WriteCopy], (instrs COPY)>; 257fe6060f1SDimitry Andricdef : InstRW<[Write64Bit], (instregex "^V_ACCVGPR_WRITE_B32_e64$")>; 258fe6060f1SDimitry Andricdef : InstRW<[Write2PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_4X4X")>; 259fe6060f1SDimitry Andricdef : InstRW<[Write8PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_16X16X")>; 260fe6060f1SDimitry Andricdef : InstRW<[Write16PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_32X32X")>; 261fe6060f1SDimitry Andricdef : InstRW<[Write4PassDGEMM, MIMFMARead], (instregex "^V_MFMA_.64_4X4X")>; 262fe6060f1SDimitry Andricdef : InstRW<[Write8PassDGEMM, MIMFMARead], (instregex "^V_MFMA_.64_16X16X")>; 263fe6060f1SDimitry Andric 264fe6060f1SDimitry Andric} // End SchedModel = SIDPFullSpeedModel 265fe6060f1SDimitry Andric 26681ad6265SDimitry Andriclet SchedModel = SIDPGFX940FullSpeedModel in { 26781ad6265SDimitry Andric 26881ad6265SDimitry Andricdefm : SICommonWriteRes; 26981ad6265SDimitry Andric 27081ad6265SDimitry Andricdef : HWVALUWriteRes<WriteFloatFMA, 1>; 27181ad6265SDimitry Andricdef : HWVALUWriteRes<WriteDouble, 1>; 27281ad6265SDimitry Andricdef : HWVALUWriteRes<WriteDoubleAdd, 1>; 27381ad6265SDimitry Andricdef : HWVALUWriteRes<WriteDoubleCvt, 1>; 27481ad6265SDimitry Andricdef : HWVALUWriteRes<WriteTrans64, 4>; 27581ad6265SDimitry Andricdef : HWVALUWriteRes<WriteIntMul, 1>; 27681ad6265SDimitry Andricdef : HWVALUWriteRes<Write64Bit, 1>; 27781ad6265SDimitry Andric 27881ad6265SDimitry Andricdef : InstRW<[WriteCopy], (instrs COPY)>; 27981ad6265SDimitry Andricdef : InstRW<[Write64Bit], (instregex "^V_ACCVGPR_WRITE_B32_e64$")>; 28081ad6265SDimitry Andricdef : InstRW<[Write2PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_4X4X")>; 28181ad6265SDimitry Andric 28281ad6265SDimitry Andricdef : InstRW<[Write4PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_16X16X8X")>; 28381ad6265SDimitry Andricdef : InstRW<[Write4PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_16X16X16")>; 28481ad6265SDimitry Andricdef : InstRW<[Write4PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_16X16X32")>; 28581ad6265SDimitry Andricdef : InstRW<[Write8PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_16X16X[14][FBI]")>; 28681ad6265SDimitry Andric 28781ad6265SDimitry Andricdef : InstRW<[Write8PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_32X32X4XF")>; 28881ad6265SDimitry Andricdef : InstRW<[Write8PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_32X32X8")>; 28981ad6265SDimitry Andricdef : InstRW<[Write8PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_32X32X16")>; 29081ad6265SDimitry Andricdef : InstRW<[Write16PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_32X32X[124][FBI]")>; 29181ad6265SDimitry Andric 29281ad6265SDimitry Andricdef : InstRW<[Write4PassDGEMM, MIMFMARead], (instregex "^V_MFMA_.64_4X4X")>; 29381ad6265SDimitry Andricdef : InstRW<[Write8PassDGEMM, MIMFMARead], (instregex "^V_MFMA_.64_16X16X")>; 29481ad6265SDimitry Andric 29581ad6265SDimitry Andricdef : InstRW<[Write4PassMAI, MIMFMARead], (instregex "^V_SMFMAC_.32_16X16X")>; 29681ad6265SDimitry Andricdef : InstRW<[Write8PassMAI, MIMFMARead], (instregex "^V_SMFMAC_.32_32X32X")>; 29781ad6265SDimitry Andric 29881ad6265SDimitry Andric} // End SchedModel = SIDPGFX940FullSpeedModel 29981ad6265SDimitry Andric 3000b57cec5SDimitry Andriclet SchedModel = GFX10SpeedModel in { 3010b57cec5SDimitry Andric 3020b57cec5SDimitry Andric// The latency values are 1 / (operations / cycle). 3030b57cec5SDimitry Andric// Add 1 stall cycle for VGPR read. 304349cc55cSDimitry Andriclet RetireOOO = 1 in { // llvm-mca specific flag 3050b57cec5SDimitry Andricdef : HWWriteRes<Write32Bit, [HWVALU, HWRC], 5>; 3065ffd83dbSDimitry Andricdef : HWWriteRes<WriteFloatCvt, [HWVALU, HWRC], 5>; 3075ffd83dbSDimitry Andricdef : HWWriteRes<Write64Bit, [HWVALU, HWRC], 6>; 308fe6060f1SDimitry Andricdef : HWWriteRes<WriteTrans32, [HWTransVALU, HWRC], 10>; 3095ffd83dbSDimitry Andricdef : HWWriteRes<WriteQuarterRate32, [HWVALU, HWRC], 8>; 3100b57cec5SDimitry Andricdef : HWWriteRes<WriteFloatFMA, [HWVALU, HWRC], 5>; 3115ffd83dbSDimitry Andricdef : HWWriteRes<WriteDouble, [HWVALU, HWRC], 22>; 3125ffd83dbSDimitry Andricdef : HWWriteRes<WriteDoubleAdd, [HWVALU, HWRC], 22>; 3135ffd83dbSDimitry Andricdef : HWWriteRes<WriteDoubleCvt, [HWVALU, HWRC], 22>; 314fe6060f1SDimitry Andricdef : HWWriteRes<WriteIntMul, [HWVALU, HWRC], 8>; 315fe6060f1SDimitry Andricdef : HWWriteRes<WriteTrans64, [HWVALU, HWTransVALU, HWRC], 24>; 3160b57cec5SDimitry Andric 3170b57cec5SDimitry Andricdef : HWWriteRes<WriteBranch, [HWBranch], 32>; 3180b57cec5SDimitry Andricdef : HWWriteRes<WriteExport, [HWExport, HWRC], 16>; 3190b57cec5SDimitry Andricdef : HWWriteRes<WriteLDS, [HWLGKM, HWRC], 20>; 3205ffd83dbSDimitry Andricdef : HWWriteRes<WriteSALU, [HWSALU, HWRC], 2>; 3210b57cec5SDimitry Andricdef : HWWriteRes<WriteSMEM, [HWLGKM, HWRC], 20>; 3220b57cec5SDimitry Andricdef : HWWriteRes<WriteVMEM, [HWVMEM, HWRC], 320>; 3230b57cec5SDimitry Andricdef : HWWriteRes<WriteBarrier, [HWBranch], 2000>; 324*5f757f3fSDimitry Andric 325*5f757f3fSDimitry Andricdef : UnsupportedWriteRes<WriteSFPU>; 326*5f757f3fSDimitry Andricdef : UnsupportedWriteRes<WritePseudoScalarTrans>; 327349cc55cSDimitry Andric} // End RetireOOO = 1 3280b57cec5SDimitry Andric 3290b57cec5SDimitry Andricdef : InstRW<[WriteCopy], (instrs COPY)>; 3300b57cec5SDimitry Andric 3310b57cec5SDimitry Andric} // End SchedModel = GFX10SpeedModel 33281ad6265SDimitry Andric 33381ad6265SDimitry Andriclet SchedModel = GFX11SpeedModel in { 33481ad6265SDimitry Andric 335*5f757f3fSDimitry Andric// The latency values are 1 / (operations / cycle). 336*5f757f3fSDimitry Andric// Add 1 stall cycle for VGPR read. 337*5f757f3fSDimitry Andriclet RetireOOO = 1 in { // llvm-mca specific flag 338*5f757f3fSDimitry Andricdef : HWWriteRes<Write32Bit, [HWVALU, HWRC], 5>; 339*5f757f3fSDimitry Andricdef : HWWriteRes<WriteFloatCvt, [HWVALU, HWRC], 5>; 340*5f757f3fSDimitry Andricdef : HWWriteRes<Write64Bit, [HWVALU, HWRC], 6>; 341*5f757f3fSDimitry Andricdef : HWWriteRes<WriteTrans32, [HWTransVALU, HWRC], 10>; 342*5f757f3fSDimitry Andricdef : HWWriteRes<WriteQuarterRate32, [HWVALU, HWRC], 8>; 343*5f757f3fSDimitry Andricdef : HWWriteRes<WriteFloatFMA, [HWVALU, HWRC], 5>; 344*5f757f3fSDimitry Andricdef : HWWriteRes<WriteDouble, [HWVALU, HWRC], 38>; 345*5f757f3fSDimitry Andricdef : HWWriteRes<WriteDoubleAdd, [HWVALU, HWRC], 38>; 346*5f757f3fSDimitry Andricdef : HWWriteRes<WriteDoubleCvt, [HWVALU, HWRC], 38>; 347*5f757f3fSDimitry Andricdef : HWWriteRes<WriteIntMul, [HWVALU, HWRC], 8>; 348*5f757f3fSDimitry Andricdef : HWWriteRes<WriteTrans64, [HWVALU, HWTransVALU, HWRC], 40>; 349*5f757f3fSDimitry Andric 350*5f757f3fSDimitry Andricdef : HWWriteRes<WriteBranch, [HWBranch], 32>; 351*5f757f3fSDimitry Andricdef : HWWriteRes<WriteExport, [HWExport, HWRC], 16>; 352*5f757f3fSDimitry Andricdef : HWWriteRes<WriteLDS, [HWLGKM, HWRC], 20>; 353*5f757f3fSDimitry Andricdef : HWWriteRes<WriteSALU, [HWSALU, HWRC], 2>; 354*5f757f3fSDimitry Andricdef : HWWriteRes<WriteSFPU, [HWSALU, HWRC], 4>; 355*5f757f3fSDimitry Andricdef : HWWriteRes<WriteSMEM, [HWLGKM, HWRC], 20>; 356*5f757f3fSDimitry Andricdef : HWWriteRes<WriteVMEM, [HWVMEM, HWRC], 320>; 357*5f757f3fSDimitry Andricdef : HWWriteRes<WriteBarrier, [HWBranch], 2000>; 358*5f757f3fSDimitry Andric} // End RetireOOO = 1 359*5f757f3fSDimitry Andric 360*5f757f3fSDimitry Andricdef : UnsupportedWriteRes<WritePseudoScalarTrans>; 361*5f757f3fSDimitry Andric 362*5f757f3fSDimitry Andricdef : InstRW<[WriteCopy], (instrs COPY)>; 363*5f757f3fSDimitry Andric 364*5f757f3fSDimitry Andric} // End SchedModel = GFX11SpeedModel 365*5f757f3fSDimitry Andric 366*5f757f3fSDimitry Andriclet SchedModel = GFX12SpeedModel in { 367*5f757f3fSDimitry Andric 36881ad6265SDimitry Andricdef : HWWriteRes<Write32Bit, [HWVALU, HWRC], 5>; 36981ad6265SDimitry Andricdef : HWWriteRes<WriteFloatCvt, [HWVALU, HWRC], 5>; 37081ad6265SDimitry Andricdef : HWWriteRes<Write64Bit, [HWVALU, HWRC], 6>; 37181ad6265SDimitry Andricdef : HWWriteRes<WriteTrans32, [HWVALU, HWRC], 10>; 37281ad6265SDimitry Andricdef : HWWriteRes<WriteQuarterRate32, [HWVALU, HWRC], 8>; 37381ad6265SDimitry Andricdef : HWWriteRes<WriteFloatFMA, [HWVALU, HWRC], 5>; 37481ad6265SDimitry Andricdef : HWWriteRes<WriteDouble, [HWVALU, HWRC], 38>; 37581ad6265SDimitry Andricdef : HWWriteRes<WriteDoubleAdd, [HWVALU, HWRC], 38>; 37681ad6265SDimitry Andricdef : HWWriteRes<WriteDoubleCvt, [HWVALU, HWRC], 38>; 37781ad6265SDimitry Andricdef : HWWriteRes<WriteIntMul, [HWVALU, HWRC], 8>; 37881ad6265SDimitry Andricdef : HWWriteRes<WriteTrans64, [HWVALU, HWRC], 40>; 379*5f757f3fSDimitry Andricdef : HWWriteRes<WritePseudoScalarTrans, [HWVALU, HWRC], 7>; 38081ad6265SDimitry Andric 38181ad6265SDimitry Andricdef : HWWriteRes<WriteBranch, [HWBranch], 32>; 38281ad6265SDimitry Andricdef : HWWriteRes<WriteExport, [HWExport, HWRC], 16>; 38381ad6265SDimitry Andricdef : HWWriteRes<WriteLDS, [HWLGKM, HWRC], 20>; 38481ad6265SDimitry Andricdef : HWWriteRes<WriteSALU, [HWSALU, HWRC], 2>; 385*5f757f3fSDimitry Andricdef : HWWriteRes<WriteSFPU, [HWSALU, HWRC], 4>; 38681ad6265SDimitry Andricdef : HWWriteRes<WriteSMEM, [HWLGKM, HWRC], 20>; 38781ad6265SDimitry Andricdef : HWWriteRes<WriteVMEM, [HWVMEM, HWRC], 320>; 38881ad6265SDimitry Andricdef : HWWriteRes<WriteBarrier, [HWBranch], 2000>; 38981ad6265SDimitry Andric 39081ad6265SDimitry Andricdef : InstRW<[WriteCopy], (instrs COPY)>; 39181ad6265SDimitry Andric 392*5f757f3fSDimitry Andric} // End SchedModel = GFX12SpeedModel 393