10b57cec5SDimitry Andric //===- SIInstrInfo.cpp - SI Instruction Information ----------------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric /// \file 100b57cec5SDimitry Andric /// SI Implementation of TargetInstrInfo. 110b57cec5SDimitry Andric // 120b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 130b57cec5SDimitry Andric 140b57cec5SDimitry Andric #include "SIInstrInfo.h" 150b57cec5SDimitry Andric #include "AMDGPU.h" 16e8d8bef9SDimitry Andric #include "AMDGPUInstrInfo.h" 170b57cec5SDimitry Andric #include "GCNHazardRecognizer.h" 18e8d8bef9SDimitry Andric #include "GCNSubtarget.h" 19e8d8bef9SDimitry Andric #include "SIMachineFunctionInfo.h" 205f757f3fSDimitry Andric #include "Utils/AMDGPUBaseInfo.h" 210b57cec5SDimitry Andric #include "llvm/Analysis/ValueTracking.h" 225f757f3fSDimitry Andric #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" 23349cc55cSDimitry Andric #include "llvm/CodeGen/LiveIntervals.h" 24e8d8bef9SDimitry Andric #include "llvm/CodeGen/LiveVariables.h" 250b57cec5SDimitry Andric #include "llvm/CodeGen/MachineDominators.h" 2681ad6265SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h" 27349cc55cSDimitry Andric #include "llvm/CodeGen/MachineScheduler.h" 280b57cec5SDimitry Andric #include "llvm/CodeGen/RegisterScavenging.h" 290b57cec5SDimitry Andric #include "llvm/CodeGen/ScheduleDAG.h" 300b57cec5SDimitry Andric #include "llvm/IR/DiagnosticInfo.h" 31e8d8bef9SDimitry Andric #include "llvm/IR/IntrinsicsAMDGPU.h" 32fe6060f1SDimitry Andric #include "llvm/MC/MCContext.h" 330b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h" 340b57cec5SDimitry Andric #include "llvm/Target/TargetMachine.h" 350b57cec5SDimitry Andric 360b57cec5SDimitry Andric using namespace llvm; 370b57cec5SDimitry Andric 385ffd83dbSDimitry Andric #define DEBUG_TYPE "si-instr-info" 395ffd83dbSDimitry Andric 400b57cec5SDimitry Andric #define GET_INSTRINFO_CTOR_DTOR 410b57cec5SDimitry Andric #include "AMDGPUGenInstrInfo.inc" 420b57cec5SDimitry Andric 430b57cec5SDimitry Andric namespace llvm { 440b57cec5SDimitry Andric namespace AMDGPU { 450b57cec5SDimitry Andric #define GET_D16ImageDimIntrinsics_IMPL 460b57cec5SDimitry Andric #define GET_ImageDimIntrinsicTable_IMPL 470b57cec5SDimitry Andric #define GET_RsrcIntrinsics_IMPL 480b57cec5SDimitry Andric #include "AMDGPUGenSearchableTables.inc" 490b57cec5SDimitry Andric } 500b57cec5SDimitry Andric } 510b57cec5SDimitry Andric 520b57cec5SDimitry Andric 530b57cec5SDimitry Andric // Must be at least 4 to be able to branch over minimum unconditional branch 540b57cec5SDimitry Andric // code. This is only for making it possible to write reasonably small tests for 550b57cec5SDimitry Andric // long branches. 560b57cec5SDimitry Andric static cl::opt<unsigned> 570b57cec5SDimitry Andric BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16), 580b57cec5SDimitry Andric cl::desc("Restrict range of branch instructions (DEBUG)")); 590b57cec5SDimitry Andric 605ffd83dbSDimitry Andric static cl::opt<bool> Fix16BitCopies( 615ffd83dbSDimitry Andric "amdgpu-fix-16-bit-physreg-copies", 625ffd83dbSDimitry Andric cl::desc("Fix copies between 32 and 16 bit registers by extending to 32 bit"), 635ffd83dbSDimitry Andric cl::init(true), 645ffd83dbSDimitry Andric cl::ReallyHidden); 655ffd83dbSDimitry Andric 660b57cec5SDimitry Andric SIInstrInfo::SIInstrInfo(const GCNSubtarget &ST) 670b57cec5SDimitry Andric : AMDGPUGenInstrInfo(AMDGPU::ADJCALLSTACKUP, AMDGPU::ADJCALLSTACKDOWN), 68480093f4SDimitry Andric RI(ST), ST(ST) { 69480093f4SDimitry Andric SchedModel.init(&ST); 70480093f4SDimitry Andric } 710b57cec5SDimitry Andric 720b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 730b57cec5SDimitry Andric // TargetInstrInfo callbacks 740b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 750b57cec5SDimitry Andric 760b57cec5SDimitry Andric static unsigned getNumOperandsNoGlue(SDNode *Node) { 770b57cec5SDimitry Andric unsigned N = Node->getNumOperands(); 780b57cec5SDimitry Andric while (N && Node->getOperand(N - 1).getValueType() == MVT::Glue) 790b57cec5SDimitry Andric --N; 800b57cec5SDimitry Andric return N; 810b57cec5SDimitry Andric } 820b57cec5SDimitry Andric 830b57cec5SDimitry Andric /// Returns true if both nodes have the same value for the given 840b57cec5SDimitry Andric /// operand \p Op, or if both nodes do not have this operand. 850b57cec5SDimitry Andric static bool nodesHaveSameOperandValue(SDNode *N0, SDNode* N1, unsigned OpName) { 860b57cec5SDimitry Andric unsigned Opc0 = N0->getMachineOpcode(); 870b57cec5SDimitry Andric unsigned Opc1 = N1->getMachineOpcode(); 880b57cec5SDimitry Andric 890b57cec5SDimitry Andric int Op0Idx = AMDGPU::getNamedOperandIdx(Opc0, OpName); 900b57cec5SDimitry Andric int Op1Idx = AMDGPU::getNamedOperandIdx(Opc1, OpName); 910b57cec5SDimitry Andric 920b57cec5SDimitry Andric if (Op0Idx == -1 && Op1Idx == -1) 930b57cec5SDimitry Andric return true; 940b57cec5SDimitry Andric 950b57cec5SDimitry Andric 960b57cec5SDimitry Andric if ((Op0Idx == -1 && Op1Idx != -1) || 970b57cec5SDimitry Andric (Op1Idx == -1 && Op0Idx != -1)) 980b57cec5SDimitry Andric return false; 990b57cec5SDimitry Andric 1000b57cec5SDimitry Andric // getNamedOperandIdx returns the index for the MachineInstr's operands, 1010b57cec5SDimitry Andric // which includes the result as the first operand. We are indexing into the 1020b57cec5SDimitry Andric // MachineSDNode's operands, so we need to skip the result operand to get 1030b57cec5SDimitry Andric // the real index. 1040b57cec5SDimitry Andric --Op0Idx; 1050b57cec5SDimitry Andric --Op1Idx; 1060b57cec5SDimitry Andric 1070b57cec5SDimitry Andric return N0->getOperand(Op0Idx) == N1->getOperand(Op1Idx); 1080b57cec5SDimitry Andric } 1090b57cec5SDimitry Andric 1105f757f3fSDimitry Andric static bool canRemat(const MachineInstr &MI) { 1115f757f3fSDimitry Andric 1125f757f3fSDimitry Andric if (SIInstrInfo::isVOP1(MI) || SIInstrInfo::isVOP2(MI) || 1135f757f3fSDimitry Andric SIInstrInfo::isVOP3(MI) || SIInstrInfo::isSDWA(MI) || 1145f757f3fSDimitry Andric SIInstrInfo::isSALU(MI)) 1155f757f3fSDimitry Andric return true; 1165f757f3fSDimitry Andric 1175f757f3fSDimitry Andric if (SIInstrInfo::isSMRD(MI)) { 1185f757f3fSDimitry Andric return !MI.memoperands_empty() && 1195f757f3fSDimitry Andric llvm::all_of(MI.memoperands(), [](const MachineMemOperand *MMO) { 1205f757f3fSDimitry Andric return MMO->isLoad() && MMO->isInvariant(); 1215f757f3fSDimitry Andric }); 1225f757f3fSDimitry Andric } 1235f757f3fSDimitry Andric 1245f757f3fSDimitry Andric return false; 1255f757f3fSDimitry Andric } 1265f757f3fSDimitry Andric 127fcaf7f86SDimitry Andric bool SIInstrInfo::isReallyTriviallyReMaterializable( 128fcaf7f86SDimitry Andric const MachineInstr &MI) const { 1295f757f3fSDimitry Andric 1305f757f3fSDimitry Andric if (canRemat(MI)) { 131fe6060f1SDimitry Andric // Normally VALU use of exec would block the rematerialization, but that 132fe6060f1SDimitry Andric // is OK in this case to have an implicit exec read as all VALU do. 133fe6060f1SDimitry Andric // We really want all of the generic logic for this except for this. 134fe6060f1SDimitry Andric 135fe6060f1SDimitry Andric // Another potential implicit use is mode register. The core logic of 136fe6060f1SDimitry Andric // the RA will not attempt rematerialization if mode is set anywhere 137fe6060f1SDimitry Andric // in the function, otherwise it is safe since mode is not changed. 138349cc55cSDimitry Andric 139349cc55cSDimitry Andric // There is difference to generic method which does not allow 140349cc55cSDimitry Andric // rematerialization if there are virtual register uses. We allow this, 141349cc55cSDimitry Andric // therefore this method includes SOP instructions as well. 1425f757f3fSDimitry Andric if (!MI.hasImplicitDef() && 143bdd1243dSDimitry Andric MI.getNumImplicitOperands() == MI.getDesc().implicit_uses().size() && 1445f757f3fSDimitry Andric !MI.mayRaiseFPException()) 1455f757f3fSDimitry Andric return true; 146fe6060f1SDimitry Andric } 147fe6060f1SDimitry Andric 1485f757f3fSDimitry Andric return TargetInstrInfo::isReallyTriviallyReMaterializable(MI); 1490b57cec5SDimitry Andric } 150fe6060f1SDimitry Andric 15181ad6265SDimitry Andric // Returns true if the scalar result of a VALU instruction depends on exec. 15281ad6265SDimitry Andric static bool resultDependsOnExec(const MachineInstr &MI) { 15381ad6265SDimitry Andric // Ignore comparisons which are only used masked with exec. 15481ad6265SDimitry Andric // This allows some hoisting/sinking of VALU comparisons. 15581ad6265SDimitry Andric if (MI.isCompare()) { 15681ad6265SDimitry Andric const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 15781ad6265SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 15881ad6265SDimitry Andric if (!DstReg.isVirtual()) 15904eeddc0SDimitry Andric return true; 16081ad6265SDimitry Andric for (MachineInstr &Use : MRI.use_nodbg_instructions(DstReg)) { 16181ad6265SDimitry Andric switch (Use.getOpcode()) { 16281ad6265SDimitry Andric case AMDGPU::S_AND_SAVEEXEC_B32: 16381ad6265SDimitry Andric case AMDGPU::S_AND_SAVEEXEC_B64: 16481ad6265SDimitry Andric break; 16581ad6265SDimitry Andric case AMDGPU::S_AND_B32: 16681ad6265SDimitry Andric case AMDGPU::S_AND_B64: 16781ad6265SDimitry Andric if (!Use.readsRegister(AMDGPU::EXEC)) 16881ad6265SDimitry Andric return true; 16981ad6265SDimitry Andric break; 17081ad6265SDimitry Andric default: 17181ad6265SDimitry Andric return true; 17281ad6265SDimitry Andric } 17381ad6265SDimitry Andric } 17481ad6265SDimitry Andric return false; 17581ad6265SDimitry Andric } 17604eeddc0SDimitry Andric 17704eeddc0SDimitry Andric switch (MI.getOpcode()) { 17804eeddc0SDimitry Andric default: 17904eeddc0SDimitry Andric break; 18004eeddc0SDimitry Andric case AMDGPU::V_READFIRSTLANE_B32: 18104eeddc0SDimitry Andric return true; 18204eeddc0SDimitry Andric } 18304eeddc0SDimitry Andric 18404eeddc0SDimitry Andric return false; 18504eeddc0SDimitry Andric } 18604eeddc0SDimitry Andric 187fe6060f1SDimitry Andric bool SIInstrInfo::isIgnorableUse(const MachineOperand &MO) const { 188fe6060f1SDimitry Andric // Any implicit use of exec by VALU is not a real register read. 189fe6060f1SDimitry Andric return MO.getReg() == AMDGPU::EXEC && MO.isImplicit() && 19081ad6265SDimitry Andric isVALU(*MO.getParent()) && !resultDependsOnExec(*MO.getParent()); 1910b57cec5SDimitry Andric } 1920b57cec5SDimitry Andric 1935f757f3fSDimitry Andric bool SIInstrInfo::isSafeToSink(MachineInstr &MI, 1945f757f3fSDimitry Andric MachineBasicBlock *SuccToSinkTo, 1955f757f3fSDimitry Andric MachineCycleInfo *CI) const { 1965f757f3fSDimitry Andric // Allow sinking if MI edits lane mask (divergent i1 in sgpr). 1975f757f3fSDimitry Andric if (MI.getOpcode() == AMDGPU::SI_IF_BREAK) 1985f757f3fSDimitry Andric return true; 1995f757f3fSDimitry Andric 2005f757f3fSDimitry Andric MachineRegisterInfo &MRI = MI.getMF()->getRegInfo(); 2015f757f3fSDimitry Andric // Check if sinking of MI would create temporal divergent use. 2025f757f3fSDimitry Andric for (auto Op : MI.uses()) { 2035f757f3fSDimitry Andric if (Op.isReg() && Op.getReg().isVirtual() && 2045f757f3fSDimitry Andric RI.isSGPRClass(MRI.getRegClass(Op.getReg()))) { 2055f757f3fSDimitry Andric MachineInstr *SgprDef = MRI.getVRegDef(Op.getReg()); 2065f757f3fSDimitry Andric 2075f757f3fSDimitry Andric // SgprDef defined inside cycle 2085f757f3fSDimitry Andric MachineCycle *FromCycle = CI->getCycle(SgprDef->getParent()); 2095f757f3fSDimitry Andric if (FromCycle == nullptr) 2105f757f3fSDimitry Andric continue; 2115f757f3fSDimitry Andric 2125f757f3fSDimitry Andric MachineCycle *ToCycle = CI->getCycle(SuccToSinkTo); 2135f757f3fSDimitry Andric // Check if there is a FromCycle that contains SgprDef's basic block but 2145f757f3fSDimitry Andric // does not contain SuccToSinkTo and also has divergent exit condition. 2155f757f3fSDimitry Andric while (FromCycle && !FromCycle->contains(ToCycle)) { 2165f757f3fSDimitry Andric // After structurize-cfg, there should be exactly one cycle exit. 2175f757f3fSDimitry Andric SmallVector<MachineBasicBlock *, 1> ExitBlocks; 2185f757f3fSDimitry Andric FromCycle->getExitBlocks(ExitBlocks); 2195f757f3fSDimitry Andric assert(ExitBlocks.size() == 1); 2205f757f3fSDimitry Andric assert(ExitBlocks[0]->getSinglePredecessor()); 2215f757f3fSDimitry Andric 2225f757f3fSDimitry Andric // FromCycle has divergent exit condition. 2235f757f3fSDimitry Andric if (hasDivergentBranch(ExitBlocks[0]->getSinglePredecessor())) { 2245f757f3fSDimitry Andric return false; 2255f757f3fSDimitry Andric } 2265f757f3fSDimitry Andric 2275f757f3fSDimitry Andric FromCycle = FromCycle->getParentCycle(); 2285f757f3fSDimitry Andric } 2295f757f3fSDimitry Andric } 2305f757f3fSDimitry Andric } 2315f757f3fSDimitry Andric 2325f757f3fSDimitry Andric return true; 2335f757f3fSDimitry Andric } 2345f757f3fSDimitry Andric 2350b57cec5SDimitry Andric bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, 2360b57cec5SDimitry Andric int64_t &Offset0, 2370b57cec5SDimitry Andric int64_t &Offset1) const { 2380b57cec5SDimitry Andric if (!Load0->isMachineOpcode() || !Load1->isMachineOpcode()) 2390b57cec5SDimitry Andric return false; 2400b57cec5SDimitry Andric 2410b57cec5SDimitry Andric unsigned Opc0 = Load0->getMachineOpcode(); 2420b57cec5SDimitry Andric unsigned Opc1 = Load1->getMachineOpcode(); 2430b57cec5SDimitry Andric 2440b57cec5SDimitry Andric // Make sure both are actually loads. 2450b57cec5SDimitry Andric if (!get(Opc0).mayLoad() || !get(Opc1).mayLoad()) 2460b57cec5SDimitry Andric return false; 2470b57cec5SDimitry Andric 248cb14a3feSDimitry Andric // A mayLoad instruction without a def is not a load. Likely a prefetch. 249cb14a3feSDimitry Andric if (!get(Opc0).getNumDefs() || !get(Opc1).getNumDefs()) 250cb14a3feSDimitry Andric return false; 251cb14a3feSDimitry Andric 2520b57cec5SDimitry Andric if (isDS(Opc0) && isDS(Opc1)) { 2530b57cec5SDimitry Andric 2540b57cec5SDimitry Andric // FIXME: Handle this case: 2550b57cec5SDimitry Andric if (getNumOperandsNoGlue(Load0) != getNumOperandsNoGlue(Load1)) 2560b57cec5SDimitry Andric return false; 2570b57cec5SDimitry Andric 2580b57cec5SDimitry Andric // Check base reg. 2590b57cec5SDimitry Andric if (Load0->getOperand(0) != Load1->getOperand(0)) 2600b57cec5SDimitry Andric return false; 2610b57cec5SDimitry Andric 2620b57cec5SDimitry Andric // Skip read2 / write2 variants for simplicity. 2630b57cec5SDimitry Andric // TODO: We should report true if the used offsets are adjacent (excluded 2640b57cec5SDimitry Andric // st64 versions). 2650b57cec5SDimitry Andric int Offset0Idx = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset); 2660b57cec5SDimitry Andric int Offset1Idx = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset); 2670b57cec5SDimitry Andric if (Offset0Idx == -1 || Offset1Idx == -1) 2680b57cec5SDimitry Andric return false; 2690b57cec5SDimitry Andric 27081ad6265SDimitry Andric // XXX - be careful of dataless loads 2710b57cec5SDimitry Andric // getNamedOperandIdx returns the index for MachineInstrs. Since they 2720b57cec5SDimitry Andric // include the output in the operand list, but SDNodes don't, we need to 2730b57cec5SDimitry Andric // subtract the index by one. 2740b57cec5SDimitry Andric Offset0Idx -= get(Opc0).NumDefs; 2750b57cec5SDimitry Andric Offset1Idx -= get(Opc1).NumDefs; 276*647cbc5dSDimitry Andric Offset0 = Load0->getConstantOperandVal(Offset0Idx); 277*647cbc5dSDimitry Andric Offset1 = Load1->getConstantOperandVal(Offset1Idx); 2780b57cec5SDimitry Andric return true; 2790b57cec5SDimitry Andric } 2800b57cec5SDimitry Andric 2810b57cec5SDimitry Andric if (isSMRD(Opc0) && isSMRD(Opc1)) { 2820b57cec5SDimitry Andric // Skip time and cache invalidation instructions. 283bdd1243dSDimitry Andric if (!AMDGPU::hasNamedOperand(Opc0, AMDGPU::OpName::sbase) || 284bdd1243dSDimitry Andric !AMDGPU::hasNamedOperand(Opc1, AMDGPU::OpName::sbase)) 2850b57cec5SDimitry Andric return false; 2860b57cec5SDimitry Andric 287fcaf7f86SDimitry Andric unsigned NumOps = getNumOperandsNoGlue(Load0); 288fcaf7f86SDimitry Andric if (NumOps != getNumOperandsNoGlue(Load1)) 289fcaf7f86SDimitry Andric return false; 2900b57cec5SDimitry Andric 2910b57cec5SDimitry Andric // Check base reg. 2920b57cec5SDimitry Andric if (Load0->getOperand(0) != Load1->getOperand(0)) 2930b57cec5SDimitry Andric return false; 2940b57cec5SDimitry Andric 295fcaf7f86SDimitry Andric // Match register offsets, if both register and immediate offsets present. 296fcaf7f86SDimitry Andric assert(NumOps == 4 || NumOps == 5); 297fcaf7f86SDimitry Andric if (NumOps == 5 && Load0->getOperand(1) != Load1->getOperand(1)) 298fcaf7f86SDimitry Andric return false; 299fcaf7f86SDimitry Andric 3000b57cec5SDimitry Andric const ConstantSDNode *Load0Offset = 301fcaf7f86SDimitry Andric dyn_cast<ConstantSDNode>(Load0->getOperand(NumOps - 3)); 3020b57cec5SDimitry Andric const ConstantSDNode *Load1Offset = 303fcaf7f86SDimitry Andric dyn_cast<ConstantSDNode>(Load1->getOperand(NumOps - 3)); 3040b57cec5SDimitry Andric 3050b57cec5SDimitry Andric if (!Load0Offset || !Load1Offset) 3060b57cec5SDimitry Andric return false; 3070b57cec5SDimitry Andric 3080b57cec5SDimitry Andric Offset0 = Load0Offset->getZExtValue(); 3090b57cec5SDimitry Andric Offset1 = Load1Offset->getZExtValue(); 3100b57cec5SDimitry Andric return true; 3110b57cec5SDimitry Andric } 3120b57cec5SDimitry Andric 3130b57cec5SDimitry Andric // MUBUF and MTBUF can access the same addresses. 3140b57cec5SDimitry Andric if ((isMUBUF(Opc0) || isMTBUF(Opc0)) && (isMUBUF(Opc1) || isMTBUF(Opc1))) { 3150b57cec5SDimitry Andric 3160b57cec5SDimitry Andric // MUBUF and MTBUF have vaddr at different indices. 3170b57cec5SDimitry Andric if (!nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::soffset) || 3180b57cec5SDimitry Andric !nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::vaddr) || 3190b57cec5SDimitry Andric !nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::srsrc)) 3200b57cec5SDimitry Andric return false; 3210b57cec5SDimitry Andric 3220b57cec5SDimitry Andric int OffIdx0 = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset); 3230b57cec5SDimitry Andric int OffIdx1 = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset); 3240b57cec5SDimitry Andric 3250b57cec5SDimitry Andric if (OffIdx0 == -1 || OffIdx1 == -1) 3260b57cec5SDimitry Andric return false; 3270b57cec5SDimitry Andric 3280b57cec5SDimitry Andric // getNamedOperandIdx returns the index for MachineInstrs. Since they 3290b57cec5SDimitry Andric // include the output in the operand list, but SDNodes don't, we need to 3300b57cec5SDimitry Andric // subtract the index by one. 3310b57cec5SDimitry Andric OffIdx0 -= get(Opc0).NumDefs; 3320b57cec5SDimitry Andric OffIdx1 -= get(Opc1).NumDefs; 3330b57cec5SDimitry Andric 3340b57cec5SDimitry Andric SDValue Off0 = Load0->getOperand(OffIdx0); 3350b57cec5SDimitry Andric SDValue Off1 = Load1->getOperand(OffIdx1); 3360b57cec5SDimitry Andric 3370b57cec5SDimitry Andric // The offset might be a FrameIndexSDNode. 3380b57cec5SDimitry Andric if (!isa<ConstantSDNode>(Off0) || !isa<ConstantSDNode>(Off1)) 3390b57cec5SDimitry Andric return false; 3400b57cec5SDimitry Andric 3410b57cec5SDimitry Andric Offset0 = cast<ConstantSDNode>(Off0)->getZExtValue(); 3420b57cec5SDimitry Andric Offset1 = cast<ConstantSDNode>(Off1)->getZExtValue(); 3430b57cec5SDimitry Andric return true; 3440b57cec5SDimitry Andric } 3450b57cec5SDimitry Andric 3460b57cec5SDimitry Andric return false; 3470b57cec5SDimitry Andric } 3480b57cec5SDimitry Andric 3490b57cec5SDimitry Andric static bool isStride64(unsigned Opc) { 3500b57cec5SDimitry Andric switch (Opc) { 3510b57cec5SDimitry Andric case AMDGPU::DS_READ2ST64_B32: 3520b57cec5SDimitry Andric case AMDGPU::DS_READ2ST64_B64: 3530b57cec5SDimitry Andric case AMDGPU::DS_WRITE2ST64_B32: 3540b57cec5SDimitry Andric case AMDGPU::DS_WRITE2ST64_B64: 3550b57cec5SDimitry Andric return true; 3560b57cec5SDimitry Andric default: 3570b57cec5SDimitry Andric return false; 3580b57cec5SDimitry Andric } 3590b57cec5SDimitry Andric } 3600b57cec5SDimitry Andric 3615ffd83dbSDimitry Andric bool SIInstrInfo::getMemOperandsWithOffsetWidth( 3625ffd83dbSDimitry Andric const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps, 3635ffd83dbSDimitry Andric int64_t &Offset, bool &OffsetIsScalable, unsigned &Width, 3640b57cec5SDimitry Andric const TargetRegisterInfo *TRI) const { 365480093f4SDimitry Andric if (!LdSt.mayLoadOrStore()) 366480093f4SDimitry Andric return false; 367480093f4SDimitry Andric 3680b57cec5SDimitry Andric unsigned Opc = LdSt.getOpcode(); 3695ffd83dbSDimitry Andric OffsetIsScalable = false; 3705ffd83dbSDimitry Andric const MachineOperand *BaseOp, *OffsetOp; 3715ffd83dbSDimitry Andric int DataOpIdx; 3720b57cec5SDimitry Andric 3730b57cec5SDimitry Andric if (isDS(LdSt)) { 3740b57cec5SDimitry Andric BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::addr); 3755ffd83dbSDimitry Andric OffsetOp = getNamedOperand(LdSt, AMDGPU::OpName::offset); 3765ffd83dbSDimitry Andric if (OffsetOp) { 3775ffd83dbSDimitry Andric // Normal, single offset LDS instruction. 3785ffd83dbSDimitry Andric if (!BaseOp) { 3795ffd83dbSDimitry Andric // DS_CONSUME/DS_APPEND use M0 for the base address. 3805ffd83dbSDimitry Andric // TODO: find the implicit use operand for M0 and use that as BaseOp? 3810b57cec5SDimitry Andric return false; 3820b57cec5SDimitry Andric } 3835ffd83dbSDimitry Andric BaseOps.push_back(BaseOp); 3845ffd83dbSDimitry Andric Offset = OffsetOp->getImm(); 3855ffd83dbSDimitry Andric // Get appropriate operand, and compute width accordingly. 3865ffd83dbSDimitry Andric DataOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst); 3875ffd83dbSDimitry Andric if (DataOpIdx == -1) 3885ffd83dbSDimitry Andric DataOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data0); 3895ffd83dbSDimitry Andric Width = getOpSize(LdSt, DataOpIdx); 3905ffd83dbSDimitry Andric } else { 3910b57cec5SDimitry Andric // The 2 offset instructions use offset0 and offset1 instead. We can treat 3925ffd83dbSDimitry Andric // these as a load with a single offset if the 2 offsets are consecutive. 3935ffd83dbSDimitry Andric // We will use this for some partially aligned loads. 3945ffd83dbSDimitry Andric const MachineOperand *Offset0Op = 3950b57cec5SDimitry Andric getNamedOperand(LdSt, AMDGPU::OpName::offset0); 3965ffd83dbSDimitry Andric const MachineOperand *Offset1Op = 3970b57cec5SDimitry Andric getNamedOperand(LdSt, AMDGPU::OpName::offset1); 3980b57cec5SDimitry Andric 39906c3fb27SDimitry Andric unsigned Offset0 = Offset0Op->getImm() & 0xff; 40006c3fb27SDimitry Andric unsigned Offset1 = Offset1Op->getImm() & 0xff; 4015ffd83dbSDimitry Andric if (Offset0 + 1 != Offset1) 4025ffd83dbSDimitry Andric return false; 4030b57cec5SDimitry Andric 4040b57cec5SDimitry Andric // Each of these offsets is in element sized units, so we need to convert 4050b57cec5SDimitry Andric // to bytes of the individual reads. 4060b57cec5SDimitry Andric 4070b57cec5SDimitry Andric unsigned EltSize; 4080b57cec5SDimitry Andric if (LdSt.mayLoad()) 4090b57cec5SDimitry Andric EltSize = TRI->getRegSizeInBits(*getOpRegClass(LdSt, 0)) / 16; 4100b57cec5SDimitry Andric else { 4110b57cec5SDimitry Andric assert(LdSt.mayStore()); 4120b57cec5SDimitry Andric int Data0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data0); 4130b57cec5SDimitry Andric EltSize = TRI->getRegSizeInBits(*getOpRegClass(LdSt, Data0Idx)) / 8; 4140b57cec5SDimitry Andric } 4150b57cec5SDimitry Andric 4160b57cec5SDimitry Andric if (isStride64(Opc)) 4170b57cec5SDimitry Andric EltSize *= 64; 4180b57cec5SDimitry Andric 4195ffd83dbSDimitry Andric BaseOps.push_back(BaseOp); 4200b57cec5SDimitry Andric Offset = EltSize * Offset0; 4215ffd83dbSDimitry Andric // Get appropriate operand(s), and compute width accordingly. 4225ffd83dbSDimitry Andric DataOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst); 4235ffd83dbSDimitry Andric if (DataOpIdx == -1) { 4245ffd83dbSDimitry Andric DataOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data0); 4255ffd83dbSDimitry Andric Width = getOpSize(LdSt, DataOpIdx); 4265ffd83dbSDimitry Andric DataOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data1); 4275ffd83dbSDimitry Andric Width += getOpSize(LdSt, DataOpIdx); 4285ffd83dbSDimitry Andric } else { 4295ffd83dbSDimitry Andric Width = getOpSize(LdSt, DataOpIdx); 4300b57cec5SDimitry Andric } 4315ffd83dbSDimitry Andric } 4325ffd83dbSDimitry Andric return true; 4330b57cec5SDimitry Andric } 4340b57cec5SDimitry Andric 4350b57cec5SDimitry Andric if (isMUBUF(LdSt) || isMTBUF(LdSt)) { 4368bcb0991SDimitry Andric const MachineOperand *RSrc = getNamedOperand(LdSt, AMDGPU::OpName::srsrc); 437fe6060f1SDimitry Andric if (!RSrc) // e.g. BUFFER_WBINVL1_VOL 4388bcb0991SDimitry Andric return false; 4395ffd83dbSDimitry Andric BaseOps.push_back(RSrc); 4405ffd83dbSDimitry Andric BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::vaddr); 441fe6060f1SDimitry Andric if (BaseOp && !BaseOp->isFI()) 4425ffd83dbSDimitry Andric BaseOps.push_back(BaseOp); 4430b57cec5SDimitry Andric const MachineOperand *OffsetImm = 4440b57cec5SDimitry Andric getNamedOperand(LdSt, AMDGPU::OpName::offset); 4450b57cec5SDimitry Andric Offset = OffsetImm->getImm(); 446fe6060f1SDimitry Andric const MachineOperand *SOffset = 447fe6060f1SDimitry Andric getNamedOperand(LdSt, AMDGPU::OpName::soffset); 448fe6060f1SDimitry Andric if (SOffset) { 449fe6060f1SDimitry Andric if (SOffset->isReg()) 450fe6060f1SDimitry Andric BaseOps.push_back(SOffset); 451fe6060f1SDimitry Andric else 4520b57cec5SDimitry Andric Offset += SOffset->getImm(); 4535ffd83dbSDimitry Andric } 4545ffd83dbSDimitry Andric // Get appropriate operand, and compute width accordingly. 4555ffd83dbSDimitry Andric DataOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst); 4565ffd83dbSDimitry Andric if (DataOpIdx == -1) 4575ffd83dbSDimitry Andric DataOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 45881ad6265SDimitry Andric if (DataOpIdx == -1) // LDS DMA 45981ad6265SDimitry Andric return false; 4605ffd83dbSDimitry Andric Width = getOpSize(LdSt, DataOpIdx); 4615ffd83dbSDimitry Andric return true; 4625ffd83dbSDimitry Andric } 4630b57cec5SDimitry Andric 4645ffd83dbSDimitry Andric if (isMIMG(LdSt)) { 4655ffd83dbSDimitry Andric int SRsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 4665ffd83dbSDimitry Andric BaseOps.push_back(&LdSt.getOperand(SRsrcIdx)); 4675ffd83dbSDimitry Andric int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 4685ffd83dbSDimitry Andric if (VAddr0Idx >= 0) { 4695ffd83dbSDimitry Andric // GFX10 possible NSA encoding. 4705ffd83dbSDimitry Andric for (int I = VAddr0Idx; I < SRsrcIdx; ++I) 4715ffd83dbSDimitry Andric BaseOps.push_back(&LdSt.getOperand(I)); 4725ffd83dbSDimitry Andric } else { 4735ffd83dbSDimitry Andric BaseOps.push_back(getNamedOperand(LdSt, AMDGPU::OpName::vaddr)); 4745ffd83dbSDimitry Andric } 4755ffd83dbSDimitry Andric Offset = 0; 4765ffd83dbSDimitry Andric // Get appropriate operand, and compute width accordingly. 4775ffd83dbSDimitry Andric DataOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 4785ffd83dbSDimitry Andric Width = getOpSize(LdSt, DataOpIdx); 4790b57cec5SDimitry Andric return true; 4800b57cec5SDimitry Andric } 4810b57cec5SDimitry Andric 4820b57cec5SDimitry Andric if (isSMRD(LdSt)) { 4835ffd83dbSDimitry Andric BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::sbase); 4845ffd83dbSDimitry Andric if (!BaseOp) // e.g. S_MEMTIME 4850b57cec5SDimitry Andric return false; 4865ffd83dbSDimitry Andric BaseOps.push_back(BaseOp); 4875ffd83dbSDimitry Andric OffsetOp = getNamedOperand(LdSt, AMDGPU::OpName::offset); 4885ffd83dbSDimitry Andric Offset = OffsetOp ? OffsetOp->getImm() : 0; 4895ffd83dbSDimitry Andric // Get appropriate operand, and compute width accordingly. 4905ffd83dbSDimitry Andric DataOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst); 4915f757f3fSDimitry Andric if (DataOpIdx == -1) 4925f757f3fSDimitry Andric return false; 4935ffd83dbSDimitry Andric Width = getOpSize(LdSt, DataOpIdx); 4940b57cec5SDimitry Andric return true; 4950b57cec5SDimitry Andric } 4960b57cec5SDimitry Andric 4970b57cec5SDimitry Andric if (isFLAT(LdSt)) { 498e8d8bef9SDimitry Andric // Instructions have either vaddr or saddr or both or none. 4995ffd83dbSDimitry Andric BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::vaddr); 5005ffd83dbSDimitry Andric if (BaseOp) 5015ffd83dbSDimitry Andric BaseOps.push_back(BaseOp); 5020b57cec5SDimitry Andric BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::saddr); 5035ffd83dbSDimitry Andric if (BaseOp) 5045ffd83dbSDimitry Andric BaseOps.push_back(BaseOp); 5050b57cec5SDimitry Andric Offset = getNamedOperand(LdSt, AMDGPU::OpName::offset)->getImm(); 5065ffd83dbSDimitry Andric // Get appropriate operand, and compute width accordingly. 5075ffd83dbSDimitry Andric DataOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst); 5085ffd83dbSDimitry Andric if (DataOpIdx == -1) 5095ffd83dbSDimitry Andric DataOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 51081ad6265SDimitry Andric if (DataOpIdx == -1) // LDS DMA 51181ad6265SDimitry Andric return false; 5125ffd83dbSDimitry Andric Width = getOpSize(LdSt, DataOpIdx); 5130b57cec5SDimitry Andric return true; 5140b57cec5SDimitry Andric } 5150b57cec5SDimitry Andric 5160b57cec5SDimitry Andric return false; 5170b57cec5SDimitry Andric } 5180b57cec5SDimitry Andric 5190b57cec5SDimitry Andric static bool memOpsHaveSameBasePtr(const MachineInstr &MI1, 5205ffd83dbSDimitry Andric ArrayRef<const MachineOperand *> BaseOps1, 5210b57cec5SDimitry Andric const MachineInstr &MI2, 5225ffd83dbSDimitry Andric ArrayRef<const MachineOperand *> BaseOps2) { 5235ffd83dbSDimitry Andric // Only examine the first "base" operand of each instruction, on the 5245ffd83dbSDimitry Andric // assumption that it represents the real base address of the memory access. 5255ffd83dbSDimitry Andric // Other operands are typically offsets or indices from this base address. 5265ffd83dbSDimitry Andric if (BaseOps1.front()->isIdenticalTo(*BaseOps2.front())) 5270b57cec5SDimitry Andric return true; 5280b57cec5SDimitry Andric 5290b57cec5SDimitry Andric if (!MI1.hasOneMemOperand() || !MI2.hasOneMemOperand()) 5300b57cec5SDimitry Andric return false; 5310b57cec5SDimitry Andric 5320b57cec5SDimitry Andric auto MO1 = *MI1.memoperands_begin(); 5330b57cec5SDimitry Andric auto MO2 = *MI2.memoperands_begin(); 5340b57cec5SDimitry Andric if (MO1->getAddrSpace() != MO2->getAddrSpace()) 5350b57cec5SDimitry Andric return false; 5360b57cec5SDimitry Andric 5370b57cec5SDimitry Andric auto Base1 = MO1->getValue(); 5380b57cec5SDimitry Andric auto Base2 = MO2->getValue(); 5390b57cec5SDimitry Andric if (!Base1 || !Base2) 5400b57cec5SDimitry Andric return false; 541e8d8bef9SDimitry Andric Base1 = getUnderlyingObject(Base1); 542e8d8bef9SDimitry Andric Base2 = getUnderlyingObject(Base2); 5430b57cec5SDimitry Andric 5440b57cec5SDimitry Andric if (isa<UndefValue>(Base1) || isa<UndefValue>(Base2)) 5450b57cec5SDimitry Andric return false; 5460b57cec5SDimitry Andric 5470b57cec5SDimitry Andric return Base1 == Base2; 5480b57cec5SDimitry Andric } 5490b57cec5SDimitry Andric 5505ffd83dbSDimitry Andric bool SIInstrInfo::shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1, 5515f757f3fSDimitry Andric int64_t Offset1, bool OffsetIsScalable1, 5525ffd83dbSDimitry Andric ArrayRef<const MachineOperand *> BaseOps2, 5535f757f3fSDimitry Andric int64_t Offset2, bool OffsetIsScalable2, 5545f757f3fSDimitry Andric unsigned ClusterSize, 5555ffd83dbSDimitry Andric unsigned NumBytes) const { 556e8d8bef9SDimitry Andric // If the mem ops (to be clustered) do not have the same base ptr, then they 557e8d8bef9SDimitry Andric // should not be clustered 558e8d8bef9SDimitry Andric if (!BaseOps1.empty() && !BaseOps2.empty()) { 5595ffd83dbSDimitry Andric const MachineInstr &FirstLdSt = *BaseOps1.front()->getParent(); 5605ffd83dbSDimitry Andric const MachineInstr &SecondLdSt = *BaseOps2.front()->getParent(); 5615ffd83dbSDimitry Andric if (!memOpsHaveSameBasePtr(FirstLdSt, BaseOps1, SecondLdSt, BaseOps2)) 5620b57cec5SDimitry Andric return false; 563e8d8bef9SDimitry Andric } else if (!BaseOps1.empty() || !BaseOps2.empty()) { 564e8d8bef9SDimitry Andric // If only one base op is empty, they do not have the same base ptr 565e8d8bef9SDimitry Andric return false; 5660b57cec5SDimitry Andric } 567e8d8bef9SDimitry Andric 56881ad6265SDimitry Andric // In order to avoid register pressure, on an average, the number of DWORDS 569e8d8bef9SDimitry Andric // loaded together by all clustered mem ops should not exceed 8. This is an 570e8d8bef9SDimitry Andric // empirical value based on certain observations and performance related 571e8d8bef9SDimitry Andric // experiments. 572e8d8bef9SDimitry Andric // The good thing about this heuristic is - it avoids clustering of too many 573e8d8bef9SDimitry Andric // sub-word loads, and also avoids clustering of wide loads. Below is the 574e8d8bef9SDimitry Andric // brief summary of how the heuristic behaves for various `LoadSize`. 575e8d8bef9SDimitry Andric // (1) 1 <= LoadSize <= 4: cluster at max 8 mem ops 576e8d8bef9SDimitry Andric // (2) 5 <= LoadSize <= 8: cluster at max 4 mem ops 577e8d8bef9SDimitry Andric // (3) 9 <= LoadSize <= 12: cluster at max 2 mem ops 578e8d8bef9SDimitry Andric // (4) 13 <= LoadSize <= 16: cluster at max 2 mem ops 579e8d8bef9SDimitry Andric // (5) LoadSize >= 17: do not cluster 5805f757f3fSDimitry Andric const unsigned LoadSize = NumBytes / ClusterSize; 5815f757f3fSDimitry Andric const unsigned NumDWORDs = ((LoadSize + 3) / 4) * ClusterSize; 582e8d8bef9SDimitry Andric return NumDWORDs <= 8; 5830b57cec5SDimitry Andric } 5840b57cec5SDimitry Andric 5850b57cec5SDimitry Andric // FIXME: This behaves strangely. If, for example, you have 32 load + stores, 5860b57cec5SDimitry Andric // the first 16 loads will be interleaved with the stores, and the next 16 will 5870b57cec5SDimitry Andric // be clustered as expected. It should really split into 2 16 store batches. 5880b57cec5SDimitry Andric // 5890b57cec5SDimitry Andric // Loads are clustered until this returns false, rather than trying to schedule 5900b57cec5SDimitry Andric // groups of stores. This also means we have to deal with saying different 5910b57cec5SDimitry Andric // address space loads should be clustered, and ones which might cause bank 5920b57cec5SDimitry Andric // conflicts. 5930b57cec5SDimitry Andric // 5940b57cec5SDimitry Andric // This might be deprecated so it might not be worth that much effort to fix. 5950b57cec5SDimitry Andric bool SIInstrInfo::shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, 5960b57cec5SDimitry Andric int64_t Offset0, int64_t Offset1, 5970b57cec5SDimitry Andric unsigned NumLoads) const { 5980b57cec5SDimitry Andric assert(Offset1 > Offset0 && 5990b57cec5SDimitry Andric "Second offset should be larger than first offset!"); 6000b57cec5SDimitry Andric // If we have less than 16 loads in a row, and the offsets are within 64 6010b57cec5SDimitry Andric // bytes, then schedule together. 6020b57cec5SDimitry Andric 6030b57cec5SDimitry Andric // A cacheline is 64 bytes (for global memory). 6040b57cec5SDimitry Andric return (NumLoads <= 16 && (Offset1 - Offset0) < 64); 6050b57cec5SDimitry Andric } 6060b57cec5SDimitry Andric 6070b57cec5SDimitry Andric static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB, 6080b57cec5SDimitry Andric MachineBasicBlock::iterator MI, 609480093f4SDimitry Andric const DebugLoc &DL, MCRegister DestReg, 6105ffd83dbSDimitry Andric MCRegister SrcReg, bool KillSrc, 61106c3fb27SDimitry Andric const char *Msg = "illegal VGPR to SGPR copy") { 6120b57cec5SDimitry Andric MachineFunction *MF = MBB.getParent(); 6135ffd83dbSDimitry Andric DiagnosticInfoUnsupported IllegalCopy(MF->getFunction(), Msg, DL, DS_Error); 6140b57cec5SDimitry Andric LLVMContext &C = MF->getFunction().getContext(); 6150b57cec5SDimitry Andric C.diagnose(IllegalCopy); 6160b57cec5SDimitry Andric 6170b57cec5SDimitry Andric BuildMI(MBB, MI, DL, TII->get(AMDGPU::SI_ILLEGAL_COPY), DestReg) 6180b57cec5SDimitry Andric .addReg(SrcReg, getKillRegState(KillSrc)); 6190b57cec5SDimitry Andric } 6200b57cec5SDimitry Andric 62181ad6265SDimitry Andric /// Handle copying from SGPR to AGPR, or from AGPR to AGPR on GFX908. It is not 62281ad6265SDimitry Andric /// possible to have a direct copy in these cases on GFX908, so an intermediate 62381ad6265SDimitry Andric /// VGPR copy is required. 624e8d8bef9SDimitry Andric static void indirectCopyToAGPR(const SIInstrInfo &TII, 625e8d8bef9SDimitry Andric MachineBasicBlock &MBB, 626e8d8bef9SDimitry Andric MachineBasicBlock::iterator MI, 627e8d8bef9SDimitry Andric const DebugLoc &DL, MCRegister DestReg, 628e8d8bef9SDimitry Andric MCRegister SrcReg, bool KillSrc, 629bdd1243dSDimitry Andric RegScavenger &RS, bool RegsOverlap, 630e8d8bef9SDimitry Andric Register ImpDefSuperReg = Register(), 631e8d8bef9SDimitry Andric Register ImpUseSuperReg = Register()) { 63281ad6265SDimitry Andric assert((TII.getSubtarget().hasMAIInsts() && 63381ad6265SDimitry Andric !TII.getSubtarget().hasGFX90AInsts()) && 63481ad6265SDimitry Andric "Expected GFX908 subtarget."); 635e8d8bef9SDimitry Andric 63681ad6265SDimitry Andric assert((AMDGPU::SReg_32RegClass.contains(SrcReg) || 63781ad6265SDimitry Andric AMDGPU::AGPR_32RegClass.contains(SrcReg)) && 63881ad6265SDimitry Andric "Source register of the copy should be either an SGPR or an AGPR."); 63981ad6265SDimitry Andric 64081ad6265SDimitry Andric assert(AMDGPU::AGPR_32RegClass.contains(DestReg) && 64181ad6265SDimitry Andric "Destination register of the copy should be an AGPR."); 64281ad6265SDimitry Andric 64381ad6265SDimitry Andric const SIRegisterInfo &RI = TII.getRegisterInfo(); 644e8d8bef9SDimitry Andric 645e8d8bef9SDimitry Andric // First try to find defining accvgpr_write to avoid temporary registers. 646bdd1243dSDimitry Andric // In the case of copies of overlapping AGPRs, we conservatively do not 647bdd1243dSDimitry Andric // reuse previous accvgpr_writes. Otherwise, we may incorrectly pick up 648bdd1243dSDimitry Andric // an accvgpr_write used for this same copy due to implicit-defs 649bdd1243dSDimitry Andric if (!RegsOverlap) { 650e8d8bef9SDimitry Andric for (auto Def = MI, E = MBB.begin(); Def != E; ) { 651e8d8bef9SDimitry Andric --Def; 65206c3fb27SDimitry Andric 65306c3fb27SDimitry Andric if (!Def->modifiesRegister(SrcReg, &RI)) 654e8d8bef9SDimitry Andric continue; 65506c3fb27SDimitry Andric 65606c3fb27SDimitry Andric if (Def->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 || 65706c3fb27SDimitry Andric Def->getOperand(0).getReg() != SrcReg) 658e8d8bef9SDimitry Andric break; 659e8d8bef9SDimitry Andric 660e8d8bef9SDimitry Andric MachineOperand &DefOp = Def->getOperand(1); 661e8d8bef9SDimitry Andric assert(DefOp.isReg() || DefOp.isImm()); 662e8d8bef9SDimitry Andric 663e8d8bef9SDimitry Andric if (DefOp.isReg()) { 664e8d8bef9SDimitry Andric bool SafeToPropagate = true; 665bdd1243dSDimitry Andric // Check that register source operand is not clobbered before MI. 666bdd1243dSDimitry Andric // Immediate operands are always safe to propagate. 667e8d8bef9SDimitry Andric for (auto I = Def; I != MI && SafeToPropagate; ++I) 668e8d8bef9SDimitry Andric if (I->modifiesRegister(DefOp.getReg(), &RI)) 669e8d8bef9SDimitry Andric SafeToPropagate = false; 670e8d8bef9SDimitry Andric 671e8d8bef9SDimitry Andric if (!SafeToPropagate) 672e8d8bef9SDimitry Andric break; 673e8d8bef9SDimitry Andric 674e8d8bef9SDimitry Andric DefOp.setIsKill(false); 675e8d8bef9SDimitry Andric } 676e8d8bef9SDimitry Andric 677e8d8bef9SDimitry Andric MachineInstrBuilder Builder = 678e8d8bef9SDimitry Andric BuildMI(MBB, MI, DL, TII.get(AMDGPU::V_ACCVGPR_WRITE_B32_e64), DestReg) 679e8d8bef9SDimitry Andric .add(DefOp); 680e8d8bef9SDimitry Andric if (ImpDefSuperReg) 681e8d8bef9SDimitry Andric Builder.addReg(ImpDefSuperReg, RegState::Define | RegState::Implicit); 682e8d8bef9SDimitry Andric 683e8d8bef9SDimitry Andric if (ImpUseSuperReg) { 684e8d8bef9SDimitry Andric Builder.addReg(ImpUseSuperReg, 685e8d8bef9SDimitry Andric getKillRegState(KillSrc) | RegState::Implicit); 686e8d8bef9SDimitry Andric } 687e8d8bef9SDimitry Andric 688e8d8bef9SDimitry Andric return; 689e8d8bef9SDimitry Andric } 690bdd1243dSDimitry Andric } 691e8d8bef9SDimitry Andric 69206c3fb27SDimitry Andric RS.enterBasicBlockEnd(MBB); 6935f757f3fSDimitry Andric RS.backward(std::next(MI)); 694e8d8bef9SDimitry Andric 695e8d8bef9SDimitry Andric // Ideally we want to have three registers for a long reg_sequence copy 696e8d8bef9SDimitry Andric // to hide 2 waitstates between v_mov_b32 and accvgpr_write. 697e8d8bef9SDimitry Andric unsigned MaxVGPRs = RI.getRegPressureLimit(&AMDGPU::VGPR_32RegClass, 698e8d8bef9SDimitry Andric *MBB.getParent()); 699e8d8bef9SDimitry Andric 700e8d8bef9SDimitry Andric // Registers in the sequence are allocated contiguously so we can just 701e8d8bef9SDimitry Andric // use register number to pick one of three round-robin temps. 70281ad6265SDimitry Andric unsigned RegNo = (DestReg - AMDGPU::AGPR0) % 3; 70381ad6265SDimitry Andric Register Tmp = 70481ad6265SDimitry Andric MBB.getParent()->getInfo<SIMachineFunctionInfo>()->getVGPRForAGPRCopy(); 70581ad6265SDimitry Andric assert(MBB.getParent()->getRegInfo().isReserved(Tmp) && 70681ad6265SDimitry Andric "VGPR used for an intermediate copy should have been reserved."); 707fe6060f1SDimitry Andric 70806c3fb27SDimitry Andric // Only loop through if there are any free registers left. We don't want to 70906c3fb27SDimitry Andric // spill. 71006c3fb27SDimitry Andric while (RegNo--) { 71106c3fb27SDimitry Andric Register Tmp2 = RS.scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass, MI, 71206c3fb27SDimitry Andric /* RestoreAfter */ false, 0, 71306c3fb27SDimitry Andric /* AllowSpill */ false); 714e8d8bef9SDimitry Andric if (!Tmp2 || RI.getHWRegIndex(Tmp2) >= MaxVGPRs) 715e8d8bef9SDimitry Andric break; 716e8d8bef9SDimitry Andric Tmp = Tmp2; 717e8d8bef9SDimitry Andric RS.setRegUsed(Tmp); 718e8d8bef9SDimitry Andric } 719e8d8bef9SDimitry Andric 720e8d8bef9SDimitry Andric // Insert copy to temporary VGPR. 721e8d8bef9SDimitry Andric unsigned TmpCopyOp = AMDGPU::V_MOV_B32_e32; 722e8d8bef9SDimitry Andric if (AMDGPU::AGPR_32RegClass.contains(SrcReg)) { 723e8d8bef9SDimitry Andric TmpCopyOp = AMDGPU::V_ACCVGPR_READ_B32_e64; 724e8d8bef9SDimitry Andric } else { 725e8d8bef9SDimitry Andric assert(AMDGPU::SReg_32RegClass.contains(SrcReg)); 726e8d8bef9SDimitry Andric } 727e8d8bef9SDimitry Andric 728e8d8bef9SDimitry Andric MachineInstrBuilder UseBuilder = BuildMI(MBB, MI, DL, TII.get(TmpCopyOp), Tmp) 729e8d8bef9SDimitry Andric .addReg(SrcReg, getKillRegState(KillSrc)); 730e8d8bef9SDimitry Andric if (ImpUseSuperReg) { 731e8d8bef9SDimitry Andric UseBuilder.addReg(ImpUseSuperReg, 732e8d8bef9SDimitry Andric getKillRegState(KillSrc) | RegState::Implicit); 733e8d8bef9SDimitry Andric } 734e8d8bef9SDimitry Andric 735e8d8bef9SDimitry Andric MachineInstrBuilder DefBuilder 736e8d8bef9SDimitry Andric = BuildMI(MBB, MI, DL, TII.get(AMDGPU::V_ACCVGPR_WRITE_B32_e64), DestReg) 737e8d8bef9SDimitry Andric .addReg(Tmp, RegState::Kill); 738e8d8bef9SDimitry Andric 739e8d8bef9SDimitry Andric if (ImpDefSuperReg) 740e8d8bef9SDimitry Andric DefBuilder.addReg(ImpDefSuperReg, RegState::Define | RegState::Implicit); 741e8d8bef9SDimitry Andric } 742e8d8bef9SDimitry Andric 743e8d8bef9SDimitry Andric static void expandSGPRCopy(const SIInstrInfo &TII, MachineBasicBlock &MBB, 744e8d8bef9SDimitry Andric MachineBasicBlock::iterator MI, const DebugLoc &DL, 745e8d8bef9SDimitry Andric MCRegister DestReg, MCRegister SrcReg, bool KillSrc, 746e8d8bef9SDimitry Andric const TargetRegisterClass *RC, bool Forward) { 747e8d8bef9SDimitry Andric const SIRegisterInfo &RI = TII.getRegisterInfo(); 748e8d8bef9SDimitry Andric ArrayRef<int16_t> BaseIndices = RI.getRegSplitParts(RC, 4); 749e8d8bef9SDimitry Andric MachineBasicBlock::iterator I = MI; 750e8d8bef9SDimitry Andric MachineInstr *FirstMI = nullptr, *LastMI = nullptr; 751e8d8bef9SDimitry Andric 752e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < BaseIndices.size(); ++Idx) { 753e8d8bef9SDimitry Andric int16_t SubIdx = BaseIndices[Idx]; 7545f757f3fSDimitry Andric Register DestSubReg = RI.getSubReg(DestReg, SubIdx); 7555f757f3fSDimitry Andric Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx); 7565f757f3fSDimitry Andric assert(DestSubReg && SrcSubReg && "Failed to find subregs!"); 757e8d8bef9SDimitry Andric unsigned Opcode = AMDGPU::S_MOV_B32; 758e8d8bef9SDimitry Andric 759e8d8bef9SDimitry Andric // Is SGPR aligned? If so try to combine with next. 7605f757f3fSDimitry Andric bool AlignedDest = ((DestSubReg - AMDGPU::SGPR0) % 2) == 0; 7615f757f3fSDimitry Andric bool AlignedSrc = ((SrcSubReg - AMDGPU::SGPR0) % 2) == 0; 762e8d8bef9SDimitry Andric if (AlignedDest && AlignedSrc && (Idx + 1 < BaseIndices.size())) { 763e8d8bef9SDimitry Andric // Can use SGPR64 copy 764e8d8bef9SDimitry Andric unsigned Channel = RI.getChannelFromSubReg(SubIdx); 765e8d8bef9SDimitry Andric SubIdx = RI.getSubRegFromChannel(Channel, 2); 7665f757f3fSDimitry Andric DestSubReg = RI.getSubReg(DestReg, SubIdx); 7675f757f3fSDimitry Andric SrcSubReg = RI.getSubReg(SrcReg, SubIdx); 7685f757f3fSDimitry Andric assert(DestSubReg && SrcSubReg && "Failed to find subregs!"); 769e8d8bef9SDimitry Andric Opcode = AMDGPU::S_MOV_B64; 770e8d8bef9SDimitry Andric Idx++; 771e8d8bef9SDimitry Andric } 772e8d8bef9SDimitry Andric 7735f757f3fSDimitry Andric LastMI = BuildMI(MBB, I, DL, TII.get(Opcode), DestSubReg) 7745f757f3fSDimitry Andric .addReg(SrcSubReg) 775e8d8bef9SDimitry Andric .addReg(SrcReg, RegState::Implicit); 776e8d8bef9SDimitry Andric 777e8d8bef9SDimitry Andric if (!FirstMI) 778e8d8bef9SDimitry Andric FirstMI = LastMI; 779e8d8bef9SDimitry Andric 780e8d8bef9SDimitry Andric if (!Forward) 781e8d8bef9SDimitry Andric I--; 782e8d8bef9SDimitry Andric } 783e8d8bef9SDimitry Andric 784e8d8bef9SDimitry Andric assert(FirstMI && LastMI); 785e8d8bef9SDimitry Andric if (!Forward) 786e8d8bef9SDimitry Andric std::swap(FirstMI, LastMI); 787e8d8bef9SDimitry Andric 788e8d8bef9SDimitry Andric FirstMI->addOperand( 789e8d8bef9SDimitry Andric MachineOperand::CreateReg(DestReg, true /*IsDef*/, true /*IsImp*/)); 790e8d8bef9SDimitry Andric 791e8d8bef9SDimitry Andric if (KillSrc) 792e8d8bef9SDimitry Andric LastMI->addRegisterKilled(SrcReg, &RI); 793e8d8bef9SDimitry Andric } 794e8d8bef9SDimitry Andric 7950b57cec5SDimitry Andric void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, 7960b57cec5SDimitry Andric MachineBasicBlock::iterator MI, 797480093f4SDimitry Andric const DebugLoc &DL, MCRegister DestReg, 798480093f4SDimitry Andric MCRegister SrcReg, bool KillSrc) const { 799bdd1243dSDimitry Andric const TargetRegisterClass *RC = RI.getPhysRegBaseClass(DestReg); 8005f757f3fSDimitry Andric unsigned Size = RI.getRegSizeInBits(*RC); 8015f757f3fSDimitry Andric const TargetRegisterClass *SrcRC = RI.getPhysRegBaseClass(SrcReg); 8025f757f3fSDimitry Andric unsigned SrcSize = RI.getRegSizeInBits(*SrcRC); 8030b57cec5SDimitry Andric 8045f757f3fSDimitry Andric // The rest of copyPhysReg assumes Src and Dst size are the same size. 8055f757f3fSDimitry Andric // TODO-GFX11_16BIT If all true 16 bit instruction patterns are completed can 8065f757f3fSDimitry Andric // we remove Fix16BitCopies and this code block? 8075f757f3fSDimitry Andric if (Fix16BitCopies) { 8085f757f3fSDimitry Andric if (((Size == 16) != (SrcSize == 16))) { 8095f757f3fSDimitry Andric // Non-VGPR Src and Dst will later be expanded back to 32 bits. 8105f757f3fSDimitry Andric assert(ST.hasTrue16BitInsts()); 8115f757f3fSDimitry Andric MCRegister &RegToFix = (Size == 32) ? DestReg : SrcReg; 8125f757f3fSDimitry Andric MCRegister SubReg = RI.getSubReg(RegToFix, AMDGPU::lo16); 8135f757f3fSDimitry Andric RegToFix = SubReg; 8145ffd83dbSDimitry Andric 8155ffd83dbSDimitry Andric if (DestReg == SrcReg) { 8165f757f3fSDimitry Andric // Identity copy. Insert empty bundle since ExpandPostRA expects an 8175f757f3fSDimitry Andric // instruction here. 8185ffd83dbSDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::BUNDLE)); 8195ffd83dbSDimitry Andric return; 8205ffd83dbSDimitry Andric } 821bdd1243dSDimitry Andric RC = RI.getPhysRegBaseClass(DestReg); 8225f757f3fSDimitry Andric Size = RI.getRegSizeInBits(*RC); 8235f757f3fSDimitry Andric SrcRC = RI.getPhysRegBaseClass(SrcReg); 8245f757f3fSDimitry Andric SrcSize = RI.getRegSizeInBits(*SrcRC); 8255f757f3fSDimitry Andric } 8265ffd83dbSDimitry Andric } 8275ffd83dbSDimitry Andric 8280b57cec5SDimitry Andric if (RC == &AMDGPU::VGPR_32RegClass) { 8290b57cec5SDimitry Andric assert(AMDGPU::VGPR_32RegClass.contains(SrcReg) || 8300b57cec5SDimitry Andric AMDGPU::SReg_32RegClass.contains(SrcReg) || 8310b57cec5SDimitry Andric AMDGPU::AGPR_32RegClass.contains(SrcReg)); 8320b57cec5SDimitry Andric unsigned Opc = AMDGPU::AGPR_32RegClass.contains(SrcReg) ? 833e8d8bef9SDimitry Andric AMDGPU::V_ACCVGPR_READ_B32_e64 : AMDGPU::V_MOV_B32_e32; 8340b57cec5SDimitry Andric BuildMI(MBB, MI, DL, get(Opc), DestReg) 8350b57cec5SDimitry Andric .addReg(SrcReg, getKillRegState(KillSrc)); 8360b57cec5SDimitry Andric return; 8370b57cec5SDimitry Andric } 8380b57cec5SDimitry Andric 8390b57cec5SDimitry Andric if (RC == &AMDGPU::SReg_32_XM0RegClass || 8400b57cec5SDimitry Andric RC == &AMDGPU::SReg_32RegClass) { 8410b57cec5SDimitry Andric if (SrcReg == AMDGPU::SCC) { 8420b57cec5SDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::S_CSELECT_B32), DestReg) 843480093f4SDimitry Andric .addImm(1) 8440b57cec5SDimitry Andric .addImm(0); 8450b57cec5SDimitry Andric return; 8460b57cec5SDimitry Andric } 8470b57cec5SDimitry Andric 8480b57cec5SDimitry Andric if (DestReg == AMDGPU::VCC_LO) { 8490b57cec5SDimitry Andric if (AMDGPU::SReg_32RegClass.contains(SrcReg)) { 8500b57cec5SDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), AMDGPU::VCC_LO) 8510b57cec5SDimitry Andric .addReg(SrcReg, getKillRegState(KillSrc)); 8520b57cec5SDimitry Andric } else { 8530b57cec5SDimitry Andric // FIXME: Hack until VReg_1 removed. 8540b57cec5SDimitry Andric assert(AMDGPU::VGPR_32RegClass.contains(SrcReg)); 8550b57cec5SDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_U32_e32)) 8560b57cec5SDimitry Andric .addImm(0) 8570b57cec5SDimitry Andric .addReg(SrcReg, getKillRegState(KillSrc)); 8580b57cec5SDimitry Andric } 8590b57cec5SDimitry Andric 8600b57cec5SDimitry Andric return; 8610b57cec5SDimitry Andric } 8620b57cec5SDimitry Andric 8630b57cec5SDimitry Andric if (!AMDGPU::SReg_32RegClass.contains(SrcReg)) { 8640b57cec5SDimitry Andric reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc); 8650b57cec5SDimitry Andric return; 8660b57cec5SDimitry Andric } 8670b57cec5SDimitry Andric 8680b57cec5SDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg) 8690b57cec5SDimitry Andric .addReg(SrcReg, getKillRegState(KillSrc)); 8700b57cec5SDimitry Andric return; 8710b57cec5SDimitry Andric } 8720b57cec5SDimitry Andric 8730b57cec5SDimitry Andric if (RC == &AMDGPU::SReg_64RegClass) { 8745ffd83dbSDimitry Andric if (SrcReg == AMDGPU::SCC) { 8755ffd83dbSDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::S_CSELECT_B64), DestReg) 8765ffd83dbSDimitry Andric .addImm(1) 8775ffd83dbSDimitry Andric .addImm(0); 8785ffd83dbSDimitry Andric return; 8795ffd83dbSDimitry Andric } 8805ffd83dbSDimitry Andric 8810b57cec5SDimitry Andric if (DestReg == AMDGPU::VCC) { 8820b57cec5SDimitry Andric if (AMDGPU::SReg_64RegClass.contains(SrcReg)) { 8830b57cec5SDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), AMDGPU::VCC) 8840b57cec5SDimitry Andric .addReg(SrcReg, getKillRegState(KillSrc)); 8850b57cec5SDimitry Andric } else { 8860b57cec5SDimitry Andric // FIXME: Hack until VReg_1 removed. 8870b57cec5SDimitry Andric assert(AMDGPU::VGPR_32RegClass.contains(SrcReg)); 8880b57cec5SDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_U32_e32)) 8890b57cec5SDimitry Andric .addImm(0) 8900b57cec5SDimitry Andric .addReg(SrcReg, getKillRegState(KillSrc)); 8910b57cec5SDimitry Andric } 8920b57cec5SDimitry Andric 8930b57cec5SDimitry Andric return; 8940b57cec5SDimitry Andric } 8950b57cec5SDimitry Andric 8960b57cec5SDimitry Andric if (!AMDGPU::SReg_64RegClass.contains(SrcReg)) { 8970b57cec5SDimitry Andric reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc); 8980b57cec5SDimitry Andric return; 8990b57cec5SDimitry Andric } 9000b57cec5SDimitry Andric 9010b57cec5SDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg) 9020b57cec5SDimitry Andric .addReg(SrcReg, getKillRegState(KillSrc)); 9030b57cec5SDimitry Andric return; 9040b57cec5SDimitry Andric } 9050b57cec5SDimitry Andric 9060b57cec5SDimitry Andric if (DestReg == AMDGPU::SCC) { 9075ffd83dbSDimitry Andric // Copying 64-bit or 32-bit sources to SCC barely makes sense, 9085ffd83dbSDimitry Andric // but SelectionDAG emits such copies for i1 sources. 9095ffd83dbSDimitry Andric if (AMDGPU::SReg_64RegClass.contains(SrcReg)) { 910e8d8bef9SDimitry Andric // This copy can only be produced by patterns 911e8d8bef9SDimitry Andric // with explicit SCC, which are known to be enabled 912e8d8bef9SDimitry Andric // only for subtargets with S_CMP_LG_U64 present. 913e8d8bef9SDimitry Andric assert(ST.hasScalarCompareEq64()); 914e8d8bef9SDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::S_CMP_LG_U64)) 915e8d8bef9SDimitry Andric .addReg(SrcReg, getKillRegState(KillSrc)) 916e8d8bef9SDimitry Andric .addImm(0); 917e8d8bef9SDimitry Andric } else { 9180b57cec5SDimitry Andric assert(AMDGPU::SReg_32RegClass.contains(SrcReg)); 9190b57cec5SDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::S_CMP_LG_U32)) 9200b57cec5SDimitry Andric .addReg(SrcReg, getKillRegState(KillSrc)) 9210b57cec5SDimitry Andric .addImm(0); 922e8d8bef9SDimitry Andric } 9235ffd83dbSDimitry Andric 9240b57cec5SDimitry Andric return; 9250b57cec5SDimitry Andric } 9260b57cec5SDimitry Andric 9270b57cec5SDimitry Andric if (RC == &AMDGPU::AGPR_32RegClass) { 92881ad6265SDimitry Andric if (AMDGPU::VGPR_32RegClass.contains(SrcReg) || 92981ad6265SDimitry Andric (ST.hasGFX90AInsts() && AMDGPU::SReg_32RegClass.contains(SrcReg))) { 930e8d8bef9SDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::V_ACCVGPR_WRITE_B32_e64), DestReg) 9310b57cec5SDimitry Andric .addReg(SrcReg, getKillRegState(KillSrc)); 9320b57cec5SDimitry Andric return; 9330b57cec5SDimitry Andric } 9340b57cec5SDimitry Andric 935fe6060f1SDimitry Andric if (AMDGPU::AGPR_32RegClass.contains(SrcReg) && ST.hasGFX90AInsts()) { 936fe6060f1SDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::V_ACCVGPR_MOV_B32), DestReg) 937fe6060f1SDimitry Andric .addReg(SrcReg, getKillRegState(KillSrc)); 938fe6060f1SDimitry Andric return; 939fe6060f1SDimitry Andric } 940fe6060f1SDimitry Andric 941e8d8bef9SDimitry Andric // FIXME: Pass should maintain scavenger to avoid scan through the block on 942e8d8bef9SDimitry Andric // every AGPR spill. 943e8d8bef9SDimitry Andric RegScavenger RS; 944bdd1243dSDimitry Andric const bool Overlap = RI.regsOverlap(SrcReg, DestReg); 945bdd1243dSDimitry Andric indirectCopyToAGPR(*this, MBB, MI, DL, DestReg, SrcReg, KillSrc, RS, Overlap); 946e8d8bef9SDimitry Andric return; 947e8d8bef9SDimitry Andric } 948e8d8bef9SDimitry Andric 949fe6060f1SDimitry Andric if (Size == 16) { 9505f757f3fSDimitry Andric assert(AMDGPU::VGPR_16RegClass.contains(SrcReg) || 9515ffd83dbSDimitry Andric AMDGPU::SReg_LO16RegClass.contains(SrcReg) || 9525ffd83dbSDimitry Andric AMDGPU::AGPR_LO16RegClass.contains(SrcReg)); 9535ffd83dbSDimitry Andric 9545ffd83dbSDimitry Andric bool IsSGPRDst = AMDGPU::SReg_LO16RegClass.contains(DestReg); 9555ffd83dbSDimitry Andric bool IsSGPRSrc = AMDGPU::SReg_LO16RegClass.contains(SrcReg); 9565ffd83dbSDimitry Andric bool IsAGPRDst = AMDGPU::AGPR_LO16RegClass.contains(DestReg); 9575ffd83dbSDimitry Andric bool IsAGPRSrc = AMDGPU::AGPR_LO16RegClass.contains(SrcReg); 958*647cbc5dSDimitry Andric bool DstLow = !AMDGPU::isHi(DestReg, RI); 959*647cbc5dSDimitry Andric bool SrcLow = !AMDGPU::isHi(SrcReg, RI); 9605ffd83dbSDimitry Andric MCRegister NewDestReg = RI.get32BitRegister(DestReg); 9615ffd83dbSDimitry Andric MCRegister NewSrcReg = RI.get32BitRegister(SrcReg); 9625ffd83dbSDimitry Andric 9635ffd83dbSDimitry Andric if (IsSGPRDst) { 9645ffd83dbSDimitry Andric if (!IsSGPRSrc) { 9655ffd83dbSDimitry Andric reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc); 9665ffd83dbSDimitry Andric return; 9675ffd83dbSDimitry Andric } 9685ffd83dbSDimitry Andric 9695ffd83dbSDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), NewDestReg) 9705ffd83dbSDimitry Andric .addReg(NewSrcReg, getKillRegState(KillSrc)); 9715ffd83dbSDimitry Andric return; 9725ffd83dbSDimitry Andric } 9735ffd83dbSDimitry Andric 9745ffd83dbSDimitry Andric if (IsAGPRDst || IsAGPRSrc) { 9755ffd83dbSDimitry Andric if (!DstLow || !SrcLow) { 9765ffd83dbSDimitry Andric reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc, 9775ffd83dbSDimitry Andric "Cannot use hi16 subreg with an AGPR!"); 9785ffd83dbSDimitry Andric } 9795ffd83dbSDimitry Andric 9805ffd83dbSDimitry Andric copyPhysReg(MBB, MI, DL, NewDestReg, NewSrcReg, KillSrc); 9815ffd83dbSDimitry Andric return; 9825ffd83dbSDimitry Andric } 9835ffd83dbSDimitry Andric 9845f757f3fSDimitry Andric if (ST.hasTrue16BitInsts()) { 9855f757f3fSDimitry Andric if (IsSGPRSrc) { 9865f757f3fSDimitry Andric assert(SrcLow); 9875f757f3fSDimitry Andric SrcReg = NewSrcReg; 9885f757f3fSDimitry Andric } 9895f757f3fSDimitry Andric // Use the smaller instruction encoding if possible. 9905f757f3fSDimitry Andric if (AMDGPU::VGPR_16_Lo128RegClass.contains(DestReg) && 9915f757f3fSDimitry Andric (IsSGPRSrc || AMDGPU::VGPR_16_Lo128RegClass.contains(SrcReg))) { 9925f757f3fSDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B16_t16_e32), DestReg) 9935f757f3fSDimitry Andric .addReg(SrcReg); 9945f757f3fSDimitry Andric } else { 9955f757f3fSDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B16_t16_e64), DestReg) 9965f757f3fSDimitry Andric .addImm(0) // src0_modifiers 9975f757f3fSDimitry Andric .addReg(SrcReg) 9985f757f3fSDimitry Andric .addImm(0); // op_sel 9995f757f3fSDimitry Andric } 10005f757f3fSDimitry Andric return; 10015f757f3fSDimitry Andric } 10025f757f3fSDimitry Andric 10035ffd83dbSDimitry Andric if (IsSGPRSrc && !ST.hasSDWAScalar()) { 10045ffd83dbSDimitry Andric if (!DstLow || !SrcLow) { 10055ffd83dbSDimitry Andric reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc, 10065ffd83dbSDimitry Andric "Cannot use hi16 subreg on VI!"); 10075ffd83dbSDimitry Andric } 10085ffd83dbSDimitry Andric 10095ffd83dbSDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), NewDestReg) 10105ffd83dbSDimitry Andric .addReg(NewSrcReg, getKillRegState(KillSrc)); 10115ffd83dbSDimitry Andric return; 10125ffd83dbSDimitry Andric } 10135ffd83dbSDimitry Andric 10145ffd83dbSDimitry Andric auto MIB = BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_sdwa), NewDestReg) 10155ffd83dbSDimitry Andric .addImm(0) // src0_modifiers 10165ffd83dbSDimitry Andric .addReg(NewSrcReg) 10175ffd83dbSDimitry Andric .addImm(0) // clamp 10185ffd83dbSDimitry Andric .addImm(DstLow ? AMDGPU::SDWA::SdwaSel::WORD_0 10195ffd83dbSDimitry Andric : AMDGPU::SDWA::SdwaSel::WORD_1) 10205ffd83dbSDimitry Andric .addImm(AMDGPU::SDWA::DstUnused::UNUSED_PRESERVE) 10215ffd83dbSDimitry Andric .addImm(SrcLow ? AMDGPU::SDWA::SdwaSel::WORD_0 10225ffd83dbSDimitry Andric : AMDGPU::SDWA::SdwaSel::WORD_1) 10235ffd83dbSDimitry Andric .addReg(NewDestReg, RegState::Implicit | RegState::Undef); 10245ffd83dbSDimitry Andric // First implicit operand is $exec. 10255ffd83dbSDimitry Andric MIB->tieOperands(0, MIB->getNumOperands() - 1); 10265ffd83dbSDimitry Andric return; 10275ffd83dbSDimitry Andric } 10285ffd83dbSDimitry Andric 1029fe6060f1SDimitry Andric if (RC == RI.getVGPR64Class() && (SrcRC == RC || RI.isSGPRClass(SrcRC))) { 103081ad6265SDimitry Andric if (ST.hasMovB64()) { 103181ad6265SDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B64_e32), DestReg) 103281ad6265SDimitry Andric .addReg(SrcReg, getKillRegState(KillSrc)); 103381ad6265SDimitry Andric return; 103481ad6265SDimitry Andric } 10355f757f3fSDimitry Andric if (ST.hasPkMovB32()) { 1036fe6060f1SDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::V_PK_MOV_B32), DestReg) 1037fe6060f1SDimitry Andric .addImm(SISrcMods::OP_SEL_1) 1038fe6060f1SDimitry Andric .addReg(SrcReg) 1039fe6060f1SDimitry Andric .addImm(SISrcMods::OP_SEL_0 | SISrcMods::OP_SEL_1) 1040fe6060f1SDimitry Andric .addReg(SrcReg) 1041fe6060f1SDimitry Andric .addImm(0) // op_sel_lo 1042fe6060f1SDimitry Andric .addImm(0) // op_sel_hi 1043fe6060f1SDimitry Andric .addImm(0) // neg_lo 1044fe6060f1SDimitry Andric .addImm(0) // neg_hi 1045fe6060f1SDimitry Andric .addImm(0) // clamp 1046fe6060f1SDimitry Andric .addReg(SrcReg, getKillRegState(KillSrc) | RegState::Implicit); 1047fe6060f1SDimitry Andric return; 1048fe6060f1SDimitry Andric } 1049fe6060f1SDimitry Andric } 1050fe6060f1SDimitry Andric 1051e8d8bef9SDimitry Andric const bool Forward = RI.getHWRegIndex(DestReg) <= RI.getHWRegIndex(SrcReg); 10520b57cec5SDimitry Andric if (RI.isSGPRClass(RC)) { 1053fe6060f1SDimitry Andric if (!RI.isSGPRClass(SrcRC)) { 10540b57cec5SDimitry Andric reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc); 10550b57cec5SDimitry Andric return; 10560b57cec5SDimitry Andric } 105781ad6265SDimitry Andric const bool CanKillSuperReg = KillSrc && !RI.regsOverlap(SrcReg, DestReg); 105881ad6265SDimitry Andric expandSGPRCopy(*this, MBB, MI, DL, DestReg, SrcReg, CanKillSuperReg, RC, 105981ad6265SDimitry Andric Forward); 1060e8d8bef9SDimitry Andric return; 10610b57cec5SDimitry Andric } 10620b57cec5SDimitry Andric 1063fe6060f1SDimitry Andric unsigned EltSize = 4; 1064e8d8bef9SDimitry Andric unsigned Opcode = AMDGPU::V_MOV_B32_e32; 10654824e7fdSDimitry Andric if (RI.isAGPRClass(RC)) { 10660eae32dcSDimitry Andric if (ST.hasGFX90AInsts() && RI.isAGPRClass(SrcRC)) 10670eae32dcSDimitry Andric Opcode = AMDGPU::V_ACCVGPR_MOV_B32; 106881ad6265SDimitry Andric else if (RI.hasVGPRs(SrcRC) || 106981ad6265SDimitry Andric (ST.hasGFX90AInsts() && RI.isSGPRClass(SrcRC))) 10700eae32dcSDimitry Andric Opcode = AMDGPU::V_ACCVGPR_WRITE_B32_e64; 10710eae32dcSDimitry Andric else 10720eae32dcSDimitry Andric Opcode = AMDGPU::INSTRUCTION_LIST_END; 10734824e7fdSDimitry Andric } else if (RI.hasVGPRs(RC) && RI.isAGPRClass(SrcRC)) { 1074e8d8bef9SDimitry Andric Opcode = AMDGPU::V_ACCVGPR_READ_B32_e64; 1075fe6060f1SDimitry Andric } else if ((Size % 64 == 0) && RI.hasVGPRs(RC) && 1076fe6060f1SDimitry Andric (RI.isProperlyAlignedRC(*RC) && 1077fe6060f1SDimitry Andric (SrcRC == RC || RI.isSGPRClass(SrcRC)))) { 1078fe6060f1SDimitry Andric // TODO: In 96-bit case, could do a 64-bit mov and then a 32-bit mov. 107981ad6265SDimitry Andric if (ST.hasMovB64()) { 108081ad6265SDimitry Andric Opcode = AMDGPU::V_MOV_B64_e32; 108181ad6265SDimitry Andric EltSize = 8; 10825f757f3fSDimitry Andric } else if (ST.hasPkMovB32()) { 1083fe6060f1SDimitry Andric Opcode = AMDGPU::V_PK_MOV_B32; 1084fe6060f1SDimitry Andric EltSize = 8; 1085fe6060f1SDimitry Andric } 1086e8d8bef9SDimitry Andric } 1087e8d8bef9SDimitry Andric 1088e8d8bef9SDimitry Andric // For the cases where we need an intermediate instruction/temporary register 1089e8d8bef9SDimitry Andric // (destination is an AGPR), we need a scavenger. 1090e8d8bef9SDimitry Andric // 1091e8d8bef9SDimitry Andric // FIXME: The pass should maintain this for us so we don't have to re-scan the 1092e8d8bef9SDimitry Andric // whole block for every handled copy. 1093e8d8bef9SDimitry Andric std::unique_ptr<RegScavenger> RS; 1094e8d8bef9SDimitry Andric if (Opcode == AMDGPU::INSTRUCTION_LIST_END) 1095e8d8bef9SDimitry Andric RS.reset(new RegScavenger()); 1096e8d8bef9SDimitry Andric 1097fe6060f1SDimitry Andric ArrayRef<int16_t> SubIndices = RI.getRegSplitParts(RC, EltSize); 1098e8d8bef9SDimitry Andric 1099e8d8bef9SDimitry Andric // If there is an overlap, we can't kill the super-register on the last 1100e8d8bef9SDimitry Andric // instruction, since it will also kill the components made live by this def. 1101bdd1243dSDimitry Andric const bool Overlap = RI.regsOverlap(SrcReg, DestReg); 1102bdd1243dSDimitry Andric const bool CanKillSuperReg = KillSrc && !Overlap; 11030b57cec5SDimitry Andric 11040b57cec5SDimitry Andric for (unsigned Idx = 0; Idx < SubIndices.size(); ++Idx) { 11050b57cec5SDimitry Andric unsigned SubIdx; 11060b57cec5SDimitry Andric if (Forward) 11070b57cec5SDimitry Andric SubIdx = SubIndices[Idx]; 11080b57cec5SDimitry Andric else 11090b57cec5SDimitry Andric SubIdx = SubIndices[SubIndices.size() - Idx - 1]; 11105f757f3fSDimitry Andric Register DestSubReg = RI.getSubReg(DestReg, SubIdx); 11115f757f3fSDimitry Andric Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx); 11125f757f3fSDimitry Andric assert(DestSubReg && SrcSubReg && "Failed to find subregs!"); 11130b57cec5SDimitry Andric 1114bdd1243dSDimitry Andric bool IsFirstSubreg = Idx == 0; 1115e8d8bef9SDimitry Andric bool UseKill = CanKillSuperReg && Idx == SubIndices.size() - 1; 11160b57cec5SDimitry Andric 1117e8d8bef9SDimitry Andric if (Opcode == AMDGPU::INSTRUCTION_LIST_END) { 1118bdd1243dSDimitry Andric Register ImpDefSuper = IsFirstSubreg ? Register(DestReg) : Register(); 1119e8d8bef9SDimitry Andric Register ImpUseSuper = SrcReg; 11205f757f3fSDimitry Andric indirectCopyToAGPR(*this, MBB, MI, DL, DestSubReg, SrcSubReg, UseKill, 11215f757f3fSDimitry Andric *RS, Overlap, ImpDefSuper, ImpUseSuper); 1122fe6060f1SDimitry Andric } else if (Opcode == AMDGPU::V_PK_MOV_B32) { 1123fe6060f1SDimitry Andric MachineInstrBuilder MIB = 11245f757f3fSDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::V_PK_MOV_B32), DestSubReg) 1125fe6060f1SDimitry Andric .addImm(SISrcMods::OP_SEL_1) 1126fe6060f1SDimitry Andric .addReg(SrcSubReg) 1127fe6060f1SDimitry Andric .addImm(SISrcMods::OP_SEL_0 | SISrcMods::OP_SEL_1) 1128fe6060f1SDimitry Andric .addReg(SrcSubReg) 1129fe6060f1SDimitry Andric .addImm(0) // op_sel_lo 1130fe6060f1SDimitry Andric .addImm(0) // op_sel_hi 1131fe6060f1SDimitry Andric .addImm(0) // neg_lo 1132fe6060f1SDimitry Andric .addImm(0) // neg_hi 1133fe6060f1SDimitry Andric .addImm(0) // clamp 1134fe6060f1SDimitry Andric .addReg(SrcReg, getKillRegState(UseKill) | RegState::Implicit); 1135bdd1243dSDimitry Andric if (IsFirstSubreg) 1136fe6060f1SDimitry Andric MIB.addReg(DestReg, RegState::Define | RegState::Implicit); 1137e8d8bef9SDimitry Andric } else { 1138e8d8bef9SDimitry Andric MachineInstrBuilder Builder = 11395f757f3fSDimitry Andric BuildMI(MBB, MI, DL, get(Opcode), DestSubReg).addReg(SrcSubReg); 1140bdd1243dSDimitry Andric if (IsFirstSubreg) 11410b57cec5SDimitry Andric Builder.addReg(DestReg, RegState::Define | RegState::Implicit); 11420b57cec5SDimitry Andric 11430b57cec5SDimitry Andric Builder.addReg(SrcReg, getKillRegState(UseKill) | RegState::Implicit); 11440b57cec5SDimitry Andric } 11450b57cec5SDimitry Andric } 1146e8d8bef9SDimitry Andric } 11470b57cec5SDimitry Andric 11480b57cec5SDimitry Andric int SIInstrInfo::commuteOpcode(unsigned Opcode) const { 11490b57cec5SDimitry Andric int NewOpc; 11500b57cec5SDimitry Andric 11510b57cec5SDimitry Andric // Try to map original to commuted opcode 11520b57cec5SDimitry Andric NewOpc = AMDGPU::getCommuteRev(Opcode); 11530b57cec5SDimitry Andric if (NewOpc != -1) 11540b57cec5SDimitry Andric // Check if the commuted (REV) opcode exists on the target. 11550b57cec5SDimitry Andric return pseudoToMCOpcode(NewOpc) != -1 ? NewOpc : -1; 11560b57cec5SDimitry Andric 11570b57cec5SDimitry Andric // Try to map commuted to original opcode 11580b57cec5SDimitry Andric NewOpc = AMDGPU::getCommuteOrig(Opcode); 11590b57cec5SDimitry Andric if (NewOpc != -1) 11600b57cec5SDimitry Andric // Check if the original (non-REV) opcode exists on the target. 11610b57cec5SDimitry Andric return pseudoToMCOpcode(NewOpc) != -1 ? NewOpc : -1; 11620b57cec5SDimitry Andric 11630b57cec5SDimitry Andric return Opcode; 11640b57cec5SDimitry Andric } 11650b57cec5SDimitry Andric 11660b57cec5SDimitry Andric void SIInstrInfo::materializeImmediate(MachineBasicBlock &MBB, 11670b57cec5SDimitry Andric MachineBasicBlock::iterator MI, 1168bdd1243dSDimitry Andric const DebugLoc &DL, Register DestReg, 11690b57cec5SDimitry Andric int64_t Value) const { 11700b57cec5SDimitry Andric MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 11710b57cec5SDimitry Andric const TargetRegisterClass *RegClass = MRI.getRegClass(DestReg); 11720b57cec5SDimitry Andric if (RegClass == &AMDGPU::SReg_32RegClass || 11730b57cec5SDimitry Andric RegClass == &AMDGPU::SGPR_32RegClass || 11740b57cec5SDimitry Andric RegClass == &AMDGPU::SReg_32_XM0RegClass || 11750b57cec5SDimitry Andric RegClass == &AMDGPU::SReg_32_XM0_XEXECRegClass) { 11760b57cec5SDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg) 11770b57cec5SDimitry Andric .addImm(Value); 11780b57cec5SDimitry Andric return; 11790b57cec5SDimitry Andric } 11800b57cec5SDimitry Andric 11810b57cec5SDimitry Andric if (RegClass == &AMDGPU::SReg_64RegClass || 11820b57cec5SDimitry Andric RegClass == &AMDGPU::SGPR_64RegClass || 11830b57cec5SDimitry Andric RegClass == &AMDGPU::SReg_64_XEXECRegClass) { 11840b57cec5SDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg) 11850b57cec5SDimitry Andric .addImm(Value); 11860b57cec5SDimitry Andric return; 11870b57cec5SDimitry Andric } 11880b57cec5SDimitry Andric 11890b57cec5SDimitry Andric if (RegClass == &AMDGPU::VGPR_32RegClass) { 11900b57cec5SDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg) 11910b57cec5SDimitry Andric .addImm(Value); 11920b57cec5SDimitry Andric return; 11930b57cec5SDimitry Andric } 1194fe6060f1SDimitry Andric if (RegClass->hasSuperClassEq(&AMDGPU::VReg_64RegClass)) { 11950b57cec5SDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B64_PSEUDO), DestReg) 11960b57cec5SDimitry Andric .addImm(Value); 11970b57cec5SDimitry Andric return; 11980b57cec5SDimitry Andric } 11990b57cec5SDimitry Andric 12000b57cec5SDimitry Andric unsigned EltSize = 4; 12010b57cec5SDimitry Andric unsigned Opcode = AMDGPU::V_MOV_B32_e32; 12020b57cec5SDimitry Andric if (RI.isSGPRClass(RegClass)) { 12030b57cec5SDimitry Andric if (RI.getRegSizeInBits(*RegClass) > 32) { 12040b57cec5SDimitry Andric Opcode = AMDGPU::S_MOV_B64; 12050b57cec5SDimitry Andric EltSize = 8; 12060b57cec5SDimitry Andric } else { 12070b57cec5SDimitry Andric Opcode = AMDGPU::S_MOV_B32; 12080b57cec5SDimitry Andric EltSize = 4; 12090b57cec5SDimitry Andric } 12100b57cec5SDimitry Andric } 12110b57cec5SDimitry Andric 12120b57cec5SDimitry Andric ArrayRef<int16_t> SubIndices = RI.getRegSplitParts(RegClass, EltSize); 12130b57cec5SDimitry Andric for (unsigned Idx = 0; Idx < SubIndices.size(); ++Idx) { 12140b57cec5SDimitry Andric int64_t IdxValue = Idx == 0 ? Value : 0; 12150b57cec5SDimitry Andric 12160b57cec5SDimitry Andric MachineInstrBuilder Builder = BuildMI(MBB, MI, DL, 12175ffd83dbSDimitry Andric get(Opcode), RI.getSubReg(DestReg, SubIndices[Idx])); 12180b57cec5SDimitry Andric Builder.addImm(IdxValue); 12190b57cec5SDimitry Andric } 12200b57cec5SDimitry Andric } 12210b57cec5SDimitry Andric 12220b57cec5SDimitry Andric const TargetRegisterClass * 12230b57cec5SDimitry Andric SIInstrInfo::getPreferredSelectRegClass(unsigned Size) const { 12240b57cec5SDimitry Andric return &AMDGPU::VGPR_32RegClass; 12250b57cec5SDimitry Andric } 12260b57cec5SDimitry Andric 12270b57cec5SDimitry Andric void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB, 12280b57cec5SDimitry Andric MachineBasicBlock::iterator I, 12295ffd83dbSDimitry Andric const DebugLoc &DL, Register DstReg, 12300b57cec5SDimitry Andric ArrayRef<MachineOperand> Cond, 12315ffd83dbSDimitry Andric Register TrueReg, 12325ffd83dbSDimitry Andric Register FalseReg) const { 12330b57cec5SDimitry Andric MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 12340b57cec5SDimitry Andric const TargetRegisterClass *BoolXExecRC = 12350b57cec5SDimitry Andric RI.getRegClass(AMDGPU::SReg_1_XEXECRegClassID); 12360b57cec5SDimitry Andric assert(MRI.getRegClass(DstReg) == &AMDGPU::VGPR_32RegClass && 12370b57cec5SDimitry Andric "Not a VGPR32 reg"); 12380b57cec5SDimitry Andric 12390b57cec5SDimitry Andric if (Cond.size() == 1) { 12408bcb0991SDimitry Andric Register SReg = MRI.createVirtualRegister(BoolXExecRC); 12410b57cec5SDimitry Andric BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg) 12420b57cec5SDimitry Andric .add(Cond[0]); 12430b57cec5SDimitry Andric BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) 12440b57cec5SDimitry Andric .addImm(0) 12450b57cec5SDimitry Andric .addReg(FalseReg) 12460b57cec5SDimitry Andric .addImm(0) 12470b57cec5SDimitry Andric .addReg(TrueReg) 12480b57cec5SDimitry Andric .addReg(SReg); 12490b57cec5SDimitry Andric } else if (Cond.size() == 2) { 12500b57cec5SDimitry Andric assert(Cond[0].isImm() && "Cond[0] is not an immediate"); 12510b57cec5SDimitry Andric switch (Cond[0].getImm()) { 12520b57cec5SDimitry Andric case SIInstrInfo::SCC_TRUE: { 12538bcb0991SDimitry Andric Register SReg = MRI.createVirtualRegister(BoolXExecRC); 12540b57cec5SDimitry Andric BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_CSELECT_B32 12550b57cec5SDimitry Andric : AMDGPU::S_CSELECT_B64), SReg) 1256480093f4SDimitry Andric .addImm(1) 12570b57cec5SDimitry Andric .addImm(0); 12580b57cec5SDimitry Andric BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) 12590b57cec5SDimitry Andric .addImm(0) 12600b57cec5SDimitry Andric .addReg(FalseReg) 12610b57cec5SDimitry Andric .addImm(0) 12620b57cec5SDimitry Andric .addReg(TrueReg) 12630b57cec5SDimitry Andric .addReg(SReg); 12640b57cec5SDimitry Andric break; 12650b57cec5SDimitry Andric } 12660b57cec5SDimitry Andric case SIInstrInfo::SCC_FALSE: { 12678bcb0991SDimitry Andric Register SReg = MRI.createVirtualRegister(BoolXExecRC); 12680b57cec5SDimitry Andric BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_CSELECT_B32 12690b57cec5SDimitry Andric : AMDGPU::S_CSELECT_B64), SReg) 12700b57cec5SDimitry Andric .addImm(0) 1271480093f4SDimitry Andric .addImm(1); 12720b57cec5SDimitry Andric BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) 12730b57cec5SDimitry Andric .addImm(0) 12740b57cec5SDimitry Andric .addReg(FalseReg) 12750b57cec5SDimitry Andric .addImm(0) 12760b57cec5SDimitry Andric .addReg(TrueReg) 12770b57cec5SDimitry Andric .addReg(SReg); 12780b57cec5SDimitry Andric break; 12790b57cec5SDimitry Andric } 12800b57cec5SDimitry Andric case SIInstrInfo::VCCNZ: { 12810b57cec5SDimitry Andric MachineOperand RegOp = Cond[1]; 12820b57cec5SDimitry Andric RegOp.setImplicit(false); 12838bcb0991SDimitry Andric Register SReg = MRI.createVirtualRegister(BoolXExecRC); 12840b57cec5SDimitry Andric BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg) 12850b57cec5SDimitry Andric .add(RegOp); 12860b57cec5SDimitry Andric BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) 12870b57cec5SDimitry Andric .addImm(0) 12880b57cec5SDimitry Andric .addReg(FalseReg) 12890b57cec5SDimitry Andric .addImm(0) 12900b57cec5SDimitry Andric .addReg(TrueReg) 12910b57cec5SDimitry Andric .addReg(SReg); 12920b57cec5SDimitry Andric break; 12930b57cec5SDimitry Andric } 12940b57cec5SDimitry Andric case SIInstrInfo::VCCZ: { 12950b57cec5SDimitry Andric MachineOperand RegOp = Cond[1]; 12960b57cec5SDimitry Andric RegOp.setImplicit(false); 12978bcb0991SDimitry Andric Register SReg = MRI.createVirtualRegister(BoolXExecRC); 12980b57cec5SDimitry Andric BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg) 12990b57cec5SDimitry Andric .add(RegOp); 13000b57cec5SDimitry Andric BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) 13010b57cec5SDimitry Andric .addImm(0) 13020b57cec5SDimitry Andric .addReg(TrueReg) 13030b57cec5SDimitry Andric .addImm(0) 13040b57cec5SDimitry Andric .addReg(FalseReg) 13050b57cec5SDimitry Andric .addReg(SReg); 13060b57cec5SDimitry Andric break; 13070b57cec5SDimitry Andric } 13080b57cec5SDimitry Andric case SIInstrInfo::EXECNZ: { 13098bcb0991SDimitry Andric Register SReg = MRI.createVirtualRegister(BoolXExecRC); 13108bcb0991SDimitry Andric Register SReg2 = MRI.createVirtualRegister(RI.getBoolRC()); 13110b57cec5SDimitry Andric BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32 13120b57cec5SDimitry Andric : AMDGPU::S_OR_SAVEEXEC_B64), SReg2) 13130b57cec5SDimitry Andric .addImm(0); 13140b57cec5SDimitry Andric BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_CSELECT_B32 13150b57cec5SDimitry Andric : AMDGPU::S_CSELECT_B64), SReg) 1316480093f4SDimitry Andric .addImm(1) 13170b57cec5SDimitry Andric .addImm(0); 13180b57cec5SDimitry Andric BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) 13190b57cec5SDimitry Andric .addImm(0) 13200b57cec5SDimitry Andric .addReg(FalseReg) 13210b57cec5SDimitry Andric .addImm(0) 13220b57cec5SDimitry Andric .addReg(TrueReg) 13230b57cec5SDimitry Andric .addReg(SReg); 13240b57cec5SDimitry Andric break; 13250b57cec5SDimitry Andric } 13260b57cec5SDimitry Andric case SIInstrInfo::EXECZ: { 13278bcb0991SDimitry Andric Register SReg = MRI.createVirtualRegister(BoolXExecRC); 13288bcb0991SDimitry Andric Register SReg2 = MRI.createVirtualRegister(RI.getBoolRC()); 13290b57cec5SDimitry Andric BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32 13300b57cec5SDimitry Andric : AMDGPU::S_OR_SAVEEXEC_B64), SReg2) 13310b57cec5SDimitry Andric .addImm(0); 13320b57cec5SDimitry Andric BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_CSELECT_B32 13330b57cec5SDimitry Andric : AMDGPU::S_CSELECT_B64), SReg) 13340b57cec5SDimitry Andric .addImm(0) 1335480093f4SDimitry Andric .addImm(1); 13360b57cec5SDimitry Andric BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) 13370b57cec5SDimitry Andric .addImm(0) 13380b57cec5SDimitry Andric .addReg(FalseReg) 13390b57cec5SDimitry Andric .addImm(0) 13400b57cec5SDimitry Andric .addReg(TrueReg) 13410b57cec5SDimitry Andric .addReg(SReg); 13420b57cec5SDimitry Andric llvm_unreachable("Unhandled branch predicate EXECZ"); 13430b57cec5SDimitry Andric break; 13440b57cec5SDimitry Andric } 13450b57cec5SDimitry Andric default: 13460b57cec5SDimitry Andric llvm_unreachable("invalid branch predicate"); 13470b57cec5SDimitry Andric } 13480b57cec5SDimitry Andric } else { 13490b57cec5SDimitry Andric llvm_unreachable("Can only handle Cond size 1 or 2"); 13500b57cec5SDimitry Andric } 13510b57cec5SDimitry Andric } 13520b57cec5SDimitry Andric 13535ffd83dbSDimitry Andric Register SIInstrInfo::insertEQ(MachineBasicBlock *MBB, 13540b57cec5SDimitry Andric MachineBasicBlock::iterator I, 13550b57cec5SDimitry Andric const DebugLoc &DL, 13565ffd83dbSDimitry Andric Register SrcReg, int Value) const { 13570b57cec5SDimitry Andric MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 13588bcb0991SDimitry Andric Register Reg = MRI.createVirtualRegister(RI.getBoolRC()); 13590b57cec5SDimitry Andric BuildMI(*MBB, I, DL, get(AMDGPU::V_CMP_EQ_I32_e64), Reg) 13600b57cec5SDimitry Andric .addImm(Value) 13610b57cec5SDimitry Andric .addReg(SrcReg); 13620b57cec5SDimitry Andric 13630b57cec5SDimitry Andric return Reg; 13640b57cec5SDimitry Andric } 13650b57cec5SDimitry Andric 13665ffd83dbSDimitry Andric Register SIInstrInfo::insertNE(MachineBasicBlock *MBB, 13670b57cec5SDimitry Andric MachineBasicBlock::iterator I, 13680b57cec5SDimitry Andric const DebugLoc &DL, 13695ffd83dbSDimitry Andric Register SrcReg, int Value) const { 13700b57cec5SDimitry Andric MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 13718bcb0991SDimitry Andric Register Reg = MRI.createVirtualRegister(RI.getBoolRC()); 13720b57cec5SDimitry Andric BuildMI(*MBB, I, DL, get(AMDGPU::V_CMP_NE_I32_e64), Reg) 13730b57cec5SDimitry Andric .addImm(Value) 13740b57cec5SDimitry Andric .addReg(SrcReg); 13750b57cec5SDimitry Andric 13760b57cec5SDimitry Andric return Reg; 13770b57cec5SDimitry Andric } 13780b57cec5SDimitry Andric 13790b57cec5SDimitry Andric unsigned SIInstrInfo::getMovOpcode(const TargetRegisterClass *DstRC) const { 13800b57cec5SDimitry Andric 13814824e7fdSDimitry Andric if (RI.isAGPRClass(DstRC)) 13820b57cec5SDimitry Andric return AMDGPU::COPY; 13835f757f3fSDimitry Andric if (RI.getRegSizeInBits(*DstRC) == 16) { 13845f757f3fSDimitry Andric // Assume hi bits are unneeded. Only _e64 true16 instructions are legal 13855f757f3fSDimitry Andric // before RA. 13865f757f3fSDimitry Andric return RI.isSGPRClass(DstRC) ? AMDGPU::COPY : AMDGPU::V_MOV_B16_t16_e64; 13875f757f3fSDimitry Andric } else if (RI.getRegSizeInBits(*DstRC) == 32) { 13880b57cec5SDimitry Andric return RI.isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32; 13890b57cec5SDimitry Andric } else if (RI.getRegSizeInBits(*DstRC) == 64 && RI.isSGPRClass(DstRC)) { 13900b57cec5SDimitry Andric return AMDGPU::S_MOV_B64; 13910b57cec5SDimitry Andric } else if (RI.getRegSizeInBits(*DstRC) == 64 && !RI.isSGPRClass(DstRC)) { 13920b57cec5SDimitry Andric return AMDGPU::V_MOV_B64_PSEUDO; 13930b57cec5SDimitry Andric } 13940b57cec5SDimitry Andric return AMDGPU::COPY; 13950b57cec5SDimitry Andric } 13960b57cec5SDimitry Andric 1397e8d8bef9SDimitry Andric const MCInstrDesc & 1398e8d8bef9SDimitry Andric SIInstrInfo::getIndirectGPRIDXPseudo(unsigned VecSize, 1399e8d8bef9SDimitry Andric bool IsIndirectSrc) const { 1400e8d8bef9SDimitry Andric if (IsIndirectSrc) { 14015ffd83dbSDimitry Andric if (VecSize <= 32) // 4 bytes 1402e8d8bef9SDimitry Andric return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1); 14035ffd83dbSDimitry Andric if (VecSize <= 64) // 8 bytes 1404e8d8bef9SDimitry Andric return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2); 14055ffd83dbSDimitry Andric if (VecSize <= 96) // 12 bytes 1406e8d8bef9SDimitry Andric return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3); 14075ffd83dbSDimitry Andric if (VecSize <= 128) // 16 bytes 1408e8d8bef9SDimitry Andric return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4); 14095ffd83dbSDimitry Andric if (VecSize <= 160) // 20 bytes 1410e8d8bef9SDimitry Andric return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5); 14115ffd83dbSDimitry Andric if (VecSize <= 256) // 32 bytes 1412e8d8bef9SDimitry Andric return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8); 1413bdd1243dSDimitry Andric if (VecSize <= 288) // 36 bytes 1414bdd1243dSDimitry Andric return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9); 1415bdd1243dSDimitry Andric if (VecSize <= 320) // 40 bytes 1416bdd1243dSDimitry Andric return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10); 1417bdd1243dSDimitry Andric if (VecSize <= 352) // 44 bytes 1418bdd1243dSDimitry Andric return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11); 1419bdd1243dSDimitry Andric if (VecSize <= 384) // 48 bytes 1420bdd1243dSDimitry Andric return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12); 14215ffd83dbSDimitry Andric if (VecSize <= 512) // 64 bytes 1422e8d8bef9SDimitry Andric return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16); 14235ffd83dbSDimitry Andric if (VecSize <= 1024) // 128 bytes 1424e8d8bef9SDimitry Andric return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32); 14255ffd83dbSDimitry Andric 1426e8d8bef9SDimitry Andric llvm_unreachable("unsupported size for IndirectRegReadGPRIDX pseudos"); 14275ffd83dbSDimitry Andric } 14285ffd83dbSDimitry Andric 14295ffd83dbSDimitry Andric if (VecSize <= 32) // 4 bytes 1430e8d8bef9SDimitry Andric return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1); 14315ffd83dbSDimitry Andric if (VecSize <= 64) // 8 bytes 1432e8d8bef9SDimitry Andric return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2); 14335ffd83dbSDimitry Andric if (VecSize <= 96) // 12 bytes 1434e8d8bef9SDimitry Andric return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3); 14355ffd83dbSDimitry Andric if (VecSize <= 128) // 16 bytes 1436e8d8bef9SDimitry Andric return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4); 14375ffd83dbSDimitry Andric if (VecSize <= 160) // 20 bytes 1438e8d8bef9SDimitry Andric return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5); 14395ffd83dbSDimitry Andric if (VecSize <= 256) // 32 bytes 1440e8d8bef9SDimitry Andric return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8); 1441bdd1243dSDimitry Andric if (VecSize <= 288) // 36 bytes 1442bdd1243dSDimitry Andric return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9); 1443bdd1243dSDimitry Andric if (VecSize <= 320) // 40 bytes 1444bdd1243dSDimitry Andric return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10); 1445bdd1243dSDimitry Andric if (VecSize <= 352) // 44 bytes 1446bdd1243dSDimitry Andric return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11); 1447bdd1243dSDimitry Andric if (VecSize <= 384) // 48 bytes 1448bdd1243dSDimitry Andric return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12); 14495ffd83dbSDimitry Andric if (VecSize <= 512) // 64 bytes 1450e8d8bef9SDimitry Andric return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16); 14515ffd83dbSDimitry Andric if (VecSize <= 1024) // 128 bytes 1452e8d8bef9SDimitry Andric return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32); 14535ffd83dbSDimitry Andric 1454e8d8bef9SDimitry Andric llvm_unreachable("unsupported size for IndirectRegWriteGPRIDX pseudos"); 14555ffd83dbSDimitry Andric } 14565ffd83dbSDimitry Andric 1457e8d8bef9SDimitry Andric static unsigned getIndirectVGPRWriteMovRelPseudoOpc(unsigned VecSize) { 1458e8d8bef9SDimitry Andric if (VecSize <= 32) // 4 bytes 1459e8d8bef9SDimitry Andric return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1; 14605ffd83dbSDimitry Andric if (VecSize <= 64) // 8 bytes 1461e8d8bef9SDimitry Andric return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2; 1462e8d8bef9SDimitry Andric if (VecSize <= 96) // 12 bytes 1463e8d8bef9SDimitry Andric return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3; 14645ffd83dbSDimitry Andric if (VecSize <= 128) // 16 bytes 1465e8d8bef9SDimitry Andric return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4; 1466e8d8bef9SDimitry Andric if (VecSize <= 160) // 20 bytes 1467e8d8bef9SDimitry Andric return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5; 14685ffd83dbSDimitry Andric if (VecSize <= 256) // 32 bytes 1469e8d8bef9SDimitry Andric return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8; 1470bdd1243dSDimitry Andric if (VecSize <= 288) // 36 bytes 1471bdd1243dSDimitry Andric return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9; 1472bdd1243dSDimitry Andric if (VecSize <= 320) // 40 bytes 1473bdd1243dSDimitry Andric return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10; 1474bdd1243dSDimitry Andric if (VecSize <= 352) // 44 bytes 1475bdd1243dSDimitry Andric return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11; 1476bdd1243dSDimitry Andric if (VecSize <= 384) // 48 bytes 1477bdd1243dSDimitry Andric return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12; 14785ffd83dbSDimitry Andric if (VecSize <= 512) // 64 bytes 1479e8d8bef9SDimitry Andric return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16; 14805ffd83dbSDimitry Andric if (VecSize <= 1024) // 128 bytes 1481e8d8bef9SDimitry Andric return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32; 14825ffd83dbSDimitry Andric 14835ffd83dbSDimitry Andric llvm_unreachable("unsupported size for IndirectRegWrite pseudos"); 14845ffd83dbSDimitry Andric } 14855ffd83dbSDimitry Andric 1486e8d8bef9SDimitry Andric static unsigned getIndirectSGPRWriteMovRelPseudo32(unsigned VecSize) { 1487e8d8bef9SDimitry Andric if (VecSize <= 32) // 4 bytes 1488e8d8bef9SDimitry Andric return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1; 1489e8d8bef9SDimitry Andric if (VecSize <= 64) // 8 bytes 1490e8d8bef9SDimitry Andric return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2; 1491e8d8bef9SDimitry Andric if (VecSize <= 96) // 12 bytes 1492e8d8bef9SDimitry Andric return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3; 1493e8d8bef9SDimitry Andric if (VecSize <= 128) // 16 bytes 1494e8d8bef9SDimitry Andric return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4; 1495e8d8bef9SDimitry Andric if (VecSize <= 160) // 20 bytes 1496e8d8bef9SDimitry Andric return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5; 1497e8d8bef9SDimitry Andric if (VecSize <= 256) // 32 bytes 1498e8d8bef9SDimitry Andric return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8; 149906c3fb27SDimitry Andric if (VecSize <= 288) // 36 bytes 150006c3fb27SDimitry Andric return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9; 150106c3fb27SDimitry Andric if (VecSize <= 320) // 40 bytes 150206c3fb27SDimitry Andric return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10; 150306c3fb27SDimitry Andric if (VecSize <= 352) // 44 bytes 150406c3fb27SDimitry Andric return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11; 150506c3fb27SDimitry Andric if (VecSize <= 384) // 48 bytes 150606c3fb27SDimitry Andric return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12; 1507e8d8bef9SDimitry Andric if (VecSize <= 512) // 64 bytes 1508e8d8bef9SDimitry Andric return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16; 1509e8d8bef9SDimitry Andric if (VecSize <= 1024) // 128 bytes 1510e8d8bef9SDimitry Andric return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32; 1511e8d8bef9SDimitry Andric 1512e8d8bef9SDimitry Andric llvm_unreachable("unsupported size for IndirectRegWrite pseudos"); 1513e8d8bef9SDimitry Andric } 1514e8d8bef9SDimitry Andric 1515e8d8bef9SDimitry Andric static unsigned getIndirectSGPRWriteMovRelPseudo64(unsigned VecSize) { 1516e8d8bef9SDimitry Andric if (VecSize <= 64) // 8 bytes 1517e8d8bef9SDimitry Andric return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1; 1518e8d8bef9SDimitry Andric if (VecSize <= 128) // 16 bytes 1519e8d8bef9SDimitry Andric return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2; 1520e8d8bef9SDimitry Andric if (VecSize <= 256) // 32 bytes 1521e8d8bef9SDimitry Andric return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4; 1522e8d8bef9SDimitry Andric if (VecSize <= 512) // 64 bytes 1523e8d8bef9SDimitry Andric return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8; 1524e8d8bef9SDimitry Andric if (VecSize <= 1024) // 128 bytes 1525e8d8bef9SDimitry Andric return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16; 1526e8d8bef9SDimitry Andric 1527e8d8bef9SDimitry Andric llvm_unreachable("unsupported size for IndirectRegWrite pseudos"); 1528e8d8bef9SDimitry Andric } 1529e8d8bef9SDimitry Andric 1530e8d8bef9SDimitry Andric const MCInstrDesc & 1531e8d8bef9SDimitry Andric SIInstrInfo::getIndirectRegWriteMovRelPseudo(unsigned VecSize, unsigned EltSize, 1532e8d8bef9SDimitry Andric bool IsSGPR) const { 15335ffd83dbSDimitry Andric if (IsSGPR) { 15345ffd83dbSDimitry Andric switch (EltSize) { 15355ffd83dbSDimitry Andric case 32: 1536e8d8bef9SDimitry Andric return get(getIndirectSGPRWriteMovRelPseudo32(VecSize)); 15375ffd83dbSDimitry Andric case 64: 1538e8d8bef9SDimitry Andric return get(getIndirectSGPRWriteMovRelPseudo64(VecSize)); 15395ffd83dbSDimitry Andric default: 15405ffd83dbSDimitry Andric llvm_unreachable("invalid reg indexing elt size"); 15415ffd83dbSDimitry Andric } 15425ffd83dbSDimitry Andric } 15435ffd83dbSDimitry Andric 15445ffd83dbSDimitry Andric assert(EltSize == 32 && "invalid reg indexing elt size"); 1545e8d8bef9SDimitry Andric return get(getIndirectVGPRWriteMovRelPseudoOpc(VecSize)); 15465ffd83dbSDimitry Andric } 15475ffd83dbSDimitry Andric 15480b57cec5SDimitry Andric static unsigned getSGPRSpillSaveOpcode(unsigned Size) { 15490b57cec5SDimitry Andric switch (Size) { 15500b57cec5SDimitry Andric case 4: 15510b57cec5SDimitry Andric return AMDGPU::SI_SPILL_S32_SAVE; 15520b57cec5SDimitry Andric case 8: 15530b57cec5SDimitry Andric return AMDGPU::SI_SPILL_S64_SAVE; 15540b57cec5SDimitry Andric case 12: 15550b57cec5SDimitry Andric return AMDGPU::SI_SPILL_S96_SAVE; 15560b57cec5SDimitry Andric case 16: 15570b57cec5SDimitry Andric return AMDGPU::SI_SPILL_S128_SAVE; 15580b57cec5SDimitry Andric case 20: 15590b57cec5SDimitry Andric return AMDGPU::SI_SPILL_S160_SAVE; 15605ffd83dbSDimitry Andric case 24: 15615ffd83dbSDimitry Andric return AMDGPU::SI_SPILL_S192_SAVE; 1562fe6060f1SDimitry Andric case 28: 1563fe6060f1SDimitry Andric return AMDGPU::SI_SPILL_S224_SAVE; 15640b57cec5SDimitry Andric case 32: 15650b57cec5SDimitry Andric return AMDGPU::SI_SPILL_S256_SAVE; 1566bdd1243dSDimitry Andric case 36: 1567bdd1243dSDimitry Andric return AMDGPU::SI_SPILL_S288_SAVE; 1568bdd1243dSDimitry Andric case 40: 1569bdd1243dSDimitry Andric return AMDGPU::SI_SPILL_S320_SAVE; 1570bdd1243dSDimitry Andric case 44: 1571bdd1243dSDimitry Andric return AMDGPU::SI_SPILL_S352_SAVE; 1572bdd1243dSDimitry Andric case 48: 1573bdd1243dSDimitry Andric return AMDGPU::SI_SPILL_S384_SAVE; 15740b57cec5SDimitry Andric case 64: 15750b57cec5SDimitry Andric return AMDGPU::SI_SPILL_S512_SAVE; 15760b57cec5SDimitry Andric case 128: 15770b57cec5SDimitry Andric return AMDGPU::SI_SPILL_S1024_SAVE; 15780b57cec5SDimitry Andric default: 15790b57cec5SDimitry Andric llvm_unreachable("unknown register size"); 15800b57cec5SDimitry Andric } 15810b57cec5SDimitry Andric } 15820b57cec5SDimitry Andric 15830b57cec5SDimitry Andric static unsigned getVGPRSpillSaveOpcode(unsigned Size) { 15840b57cec5SDimitry Andric switch (Size) { 15850b57cec5SDimitry Andric case 4: 15860b57cec5SDimitry Andric return AMDGPU::SI_SPILL_V32_SAVE; 15870b57cec5SDimitry Andric case 8: 15880b57cec5SDimitry Andric return AMDGPU::SI_SPILL_V64_SAVE; 15890b57cec5SDimitry Andric case 12: 15900b57cec5SDimitry Andric return AMDGPU::SI_SPILL_V96_SAVE; 15910b57cec5SDimitry Andric case 16: 15920b57cec5SDimitry Andric return AMDGPU::SI_SPILL_V128_SAVE; 15930b57cec5SDimitry Andric case 20: 15940b57cec5SDimitry Andric return AMDGPU::SI_SPILL_V160_SAVE; 15955ffd83dbSDimitry Andric case 24: 15965ffd83dbSDimitry Andric return AMDGPU::SI_SPILL_V192_SAVE; 1597fe6060f1SDimitry Andric case 28: 1598fe6060f1SDimitry Andric return AMDGPU::SI_SPILL_V224_SAVE; 15990b57cec5SDimitry Andric case 32: 16000b57cec5SDimitry Andric return AMDGPU::SI_SPILL_V256_SAVE; 1601bdd1243dSDimitry Andric case 36: 1602bdd1243dSDimitry Andric return AMDGPU::SI_SPILL_V288_SAVE; 1603bdd1243dSDimitry Andric case 40: 1604bdd1243dSDimitry Andric return AMDGPU::SI_SPILL_V320_SAVE; 1605bdd1243dSDimitry Andric case 44: 1606bdd1243dSDimitry Andric return AMDGPU::SI_SPILL_V352_SAVE; 1607bdd1243dSDimitry Andric case 48: 1608bdd1243dSDimitry Andric return AMDGPU::SI_SPILL_V384_SAVE; 16090b57cec5SDimitry Andric case 64: 16100b57cec5SDimitry Andric return AMDGPU::SI_SPILL_V512_SAVE; 16110b57cec5SDimitry Andric case 128: 16120b57cec5SDimitry Andric return AMDGPU::SI_SPILL_V1024_SAVE; 16130b57cec5SDimitry Andric default: 16140b57cec5SDimitry Andric llvm_unreachable("unknown register size"); 16150b57cec5SDimitry Andric } 16160b57cec5SDimitry Andric } 16170b57cec5SDimitry Andric 16180b57cec5SDimitry Andric static unsigned getAGPRSpillSaveOpcode(unsigned Size) { 16190b57cec5SDimitry Andric switch (Size) { 16200b57cec5SDimitry Andric case 4: 16210b57cec5SDimitry Andric return AMDGPU::SI_SPILL_A32_SAVE; 16220b57cec5SDimitry Andric case 8: 16230b57cec5SDimitry Andric return AMDGPU::SI_SPILL_A64_SAVE; 1624e8d8bef9SDimitry Andric case 12: 1625e8d8bef9SDimitry Andric return AMDGPU::SI_SPILL_A96_SAVE; 16260b57cec5SDimitry Andric case 16: 16270b57cec5SDimitry Andric return AMDGPU::SI_SPILL_A128_SAVE; 1628e8d8bef9SDimitry Andric case 20: 1629e8d8bef9SDimitry Andric return AMDGPU::SI_SPILL_A160_SAVE; 1630e8d8bef9SDimitry Andric case 24: 1631e8d8bef9SDimitry Andric return AMDGPU::SI_SPILL_A192_SAVE; 1632fe6060f1SDimitry Andric case 28: 1633fe6060f1SDimitry Andric return AMDGPU::SI_SPILL_A224_SAVE; 1634e8d8bef9SDimitry Andric case 32: 1635e8d8bef9SDimitry Andric return AMDGPU::SI_SPILL_A256_SAVE; 1636bdd1243dSDimitry Andric case 36: 1637bdd1243dSDimitry Andric return AMDGPU::SI_SPILL_A288_SAVE; 1638bdd1243dSDimitry Andric case 40: 1639bdd1243dSDimitry Andric return AMDGPU::SI_SPILL_A320_SAVE; 1640bdd1243dSDimitry Andric case 44: 1641bdd1243dSDimitry Andric return AMDGPU::SI_SPILL_A352_SAVE; 1642bdd1243dSDimitry Andric case 48: 1643bdd1243dSDimitry Andric return AMDGPU::SI_SPILL_A384_SAVE; 16440b57cec5SDimitry Andric case 64: 16450b57cec5SDimitry Andric return AMDGPU::SI_SPILL_A512_SAVE; 16460b57cec5SDimitry Andric case 128: 16470b57cec5SDimitry Andric return AMDGPU::SI_SPILL_A1024_SAVE; 16480b57cec5SDimitry Andric default: 16490b57cec5SDimitry Andric llvm_unreachable("unknown register size"); 16500b57cec5SDimitry Andric } 16510b57cec5SDimitry Andric } 16520b57cec5SDimitry Andric 16530eae32dcSDimitry Andric static unsigned getAVSpillSaveOpcode(unsigned Size) { 16540eae32dcSDimitry Andric switch (Size) { 16550eae32dcSDimitry Andric case 4: 16560eae32dcSDimitry Andric return AMDGPU::SI_SPILL_AV32_SAVE; 16570eae32dcSDimitry Andric case 8: 16580eae32dcSDimitry Andric return AMDGPU::SI_SPILL_AV64_SAVE; 16590eae32dcSDimitry Andric case 12: 16600eae32dcSDimitry Andric return AMDGPU::SI_SPILL_AV96_SAVE; 16610eae32dcSDimitry Andric case 16: 16620eae32dcSDimitry Andric return AMDGPU::SI_SPILL_AV128_SAVE; 16630eae32dcSDimitry Andric case 20: 16640eae32dcSDimitry Andric return AMDGPU::SI_SPILL_AV160_SAVE; 16650eae32dcSDimitry Andric case 24: 16660eae32dcSDimitry Andric return AMDGPU::SI_SPILL_AV192_SAVE; 16670eae32dcSDimitry Andric case 28: 16680eae32dcSDimitry Andric return AMDGPU::SI_SPILL_AV224_SAVE; 16690eae32dcSDimitry Andric case 32: 16700eae32dcSDimitry Andric return AMDGPU::SI_SPILL_AV256_SAVE; 1671bdd1243dSDimitry Andric case 36: 1672bdd1243dSDimitry Andric return AMDGPU::SI_SPILL_AV288_SAVE; 1673bdd1243dSDimitry Andric case 40: 1674bdd1243dSDimitry Andric return AMDGPU::SI_SPILL_AV320_SAVE; 1675bdd1243dSDimitry Andric case 44: 1676bdd1243dSDimitry Andric return AMDGPU::SI_SPILL_AV352_SAVE; 1677bdd1243dSDimitry Andric case 48: 1678bdd1243dSDimitry Andric return AMDGPU::SI_SPILL_AV384_SAVE; 16790eae32dcSDimitry Andric case 64: 16800eae32dcSDimitry Andric return AMDGPU::SI_SPILL_AV512_SAVE; 16810eae32dcSDimitry Andric case 128: 16820eae32dcSDimitry Andric return AMDGPU::SI_SPILL_AV1024_SAVE; 16830eae32dcSDimitry Andric default: 16840eae32dcSDimitry Andric llvm_unreachable("unknown register size"); 16850eae32dcSDimitry Andric } 16860eae32dcSDimitry Andric } 16870eae32dcSDimitry Andric 16885f757f3fSDimitry Andric static unsigned getWWMRegSpillSaveOpcode(unsigned Size, 16895f757f3fSDimitry Andric bool IsVectorSuperClass) { 169006c3fb27SDimitry Andric // Currently, there is only 32-bit WWM register spills needed. 169106c3fb27SDimitry Andric if (Size != 4) 169206c3fb27SDimitry Andric llvm_unreachable("unknown wwm register spill size"); 169306c3fb27SDimitry Andric 16945f757f3fSDimitry Andric if (IsVectorSuperClass) 16955f757f3fSDimitry Andric return AMDGPU::SI_SPILL_WWM_AV32_SAVE; 16965f757f3fSDimitry Andric 169706c3fb27SDimitry Andric return AMDGPU::SI_SPILL_WWM_V32_SAVE; 169806c3fb27SDimitry Andric } 169906c3fb27SDimitry Andric 170006c3fb27SDimitry Andric static unsigned getVectorRegSpillSaveOpcode(Register Reg, 170106c3fb27SDimitry Andric const TargetRegisterClass *RC, 170206c3fb27SDimitry Andric unsigned Size, 170306c3fb27SDimitry Andric const SIRegisterInfo &TRI, 170406c3fb27SDimitry Andric const SIMachineFunctionInfo &MFI) { 17055f757f3fSDimitry Andric bool IsVectorSuperClass = TRI.isVectorSuperClass(RC); 17065f757f3fSDimitry Andric 170706c3fb27SDimitry Andric // Choose the right opcode if spilling a WWM register. 170806c3fb27SDimitry Andric if (MFI.checkFlag(Reg, AMDGPU::VirtRegFlag::WWM_REG)) 17095f757f3fSDimitry Andric return getWWMRegSpillSaveOpcode(Size, IsVectorSuperClass); 171006c3fb27SDimitry Andric 17115f757f3fSDimitry Andric if (IsVectorSuperClass) 171206c3fb27SDimitry Andric return getAVSpillSaveOpcode(Size); 171306c3fb27SDimitry Andric 171406c3fb27SDimitry Andric return TRI.isAGPRClass(RC) ? getAGPRSpillSaveOpcode(Size) 171506c3fb27SDimitry Andric : getVGPRSpillSaveOpcode(Size); 171606c3fb27SDimitry Andric } 171706c3fb27SDimitry Andric 1718bdd1243dSDimitry Andric void SIInstrInfo::storeRegToStackSlot( 1719bdd1243dSDimitry Andric MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, 1720bdd1243dSDimitry Andric bool isKill, int FrameIndex, const TargetRegisterClass *RC, 1721bdd1243dSDimitry Andric const TargetRegisterInfo *TRI, Register VReg) const { 17220b57cec5SDimitry Andric MachineFunction *MF = MBB.getParent(); 17230b57cec5SDimitry Andric SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); 17240b57cec5SDimitry Andric MachineFrameInfo &FrameInfo = MF->getFrameInfo(); 17250b57cec5SDimitry Andric const DebugLoc &DL = MBB.findDebugLoc(MI); 17260b57cec5SDimitry Andric 17270b57cec5SDimitry Andric MachinePointerInfo PtrInfo 17280b57cec5SDimitry Andric = MachinePointerInfo::getFixedStack(*MF, FrameIndex); 17295ffd83dbSDimitry Andric MachineMemOperand *MMO = MF->getMachineMemOperand( 17305ffd83dbSDimitry Andric PtrInfo, MachineMemOperand::MOStore, FrameInfo.getObjectSize(FrameIndex), 17315ffd83dbSDimitry Andric FrameInfo.getObjectAlign(FrameIndex)); 17320b57cec5SDimitry Andric unsigned SpillSize = TRI->getSpillSize(*RC); 17330b57cec5SDimitry Andric 17344824e7fdSDimitry Andric MachineRegisterInfo &MRI = MF->getRegInfo(); 17350b57cec5SDimitry Andric if (RI.isSGPRClass(RC)) { 17360b57cec5SDimitry Andric MFI->setHasSpilledSGPRs(); 1737480093f4SDimitry Andric assert(SrcReg != AMDGPU::M0 && "m0 should not be spilled"); 17385ffd83dbSDimitry Andric assert(SrcReg != AMDGPU::EXEC_LO && SrcReg != AMDGPU::EXEC_HI && 17395ffd83dbSDimitry Andric SrcReg != AMDGPU::EXEC && "exec should not be spilled"); 17400b57cec5SDimitry Andric 17410b57cec5SDimitry Andric // We are only allowed to create one new instruction when spilling 17420b57cec5SDimitry Andric // registers, so we need to use pseudo instruction for spilling SGPRs. 17430b57cec5SDimitry Andric const MCInstrDesc &OpDesc = get(getSGPRSpillSaveOpcode(SpillSize)); 17440b57cec5SDimitry Andric 17450b57cec5SDimitry Andric // The SGPR spill/restore instructions only work on number sgprs, so we need 17460b57cec5SDimitry Andric // to make sure we are using the correct register class. 1747e8d8bef9SDimitry Andric if (SrcReg.isVirtual() && SpillSize == 4) { 17485ffd83dbSDimitry Andric MRI.constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0_XEXECRegClass); 17490b57cec5SDimitry Andric } 17500b57cec5SDimitry Andric 17518bcb0991SDimitry Andric BuildMI(MBB, MI, DL, OpDesc) 17520b57cec5SDimitry Andric .addReg(SrcReg, getKillRegState(isKill)) // data 17530b57cec5SDimitry Andric .addFrameIndex(FrameIndex) // addr 17540b57cec5SDimitry Andric .addMemOperand(MMO) 17550b57cec5SDimitry Andric .addReg(MFI->getStackPtrOffsetReg(), RegState::Implicit); 1756e8d8bef9SDimitry Andric 17570b57cec5SDimitry Andric if (RI.spillSGPRToVGPR()) 17580b57cec5SDimitry Andric FrameInfo.setStackID(FrameIndex, TargetStackID::SGPRSpill); 17590b57cec5SDimitry Andric return; 17600b57cec5SDimitry Andric } 17610b57cec5SDimitry Andric 176206c3fb27SDimitry Andric unsigned Opcode = getVectorRegSpillSaveOpcode(VReg ? VReg : SrcReg, RC, 176306c3fb27SDimitry Andric SpillSize, RI, *MFI); 17640b57cec5SDimitry Andric MFI->setHasSpilledVGPRs(); 17650b57cec5SDimitry Andric 1766e8d8bef9SDimitry Andric BuildMI(MBB, MI, DL, get(Opcode)) 1767e8d8bef9SDimitry Andric .addReg(SrcReg, getKillRegState(isKill)) // data 17680b57cec5SDimitry Andric .addFrameIndex(FrameIndex) // addr 17690b57cec5SDimitry Andric .addReg(MFI->getStackPtrOffsetReg()) // scratch_offset 17700b57cec5SDimitry Andric .addImm(0) // offset 17710b57cec5SDimitry Andric .addMemOperand(MMO); 17720b57cec5SDimitry Andric } 17730b57cec5SDimitry Andric 17740b57cec5SDimitry Andric static unsigned getSGPRSpillRestoreOpcode(unsigned Size) { 17750b57cec5SDimitry Andric switch (Size) { 17760b57cec5SDimitry Andric case 4: 17770b57cec5SDimitry Andric return AMDGPU::SI_SPILL_S32_RESTORE; 17780b57cec5SDimitry Andric case 8: 17790b57cec5SDimitry Andric return AMDGPU::SI_SPILL_S64_RESTORE; 17800b57cec5SDimitry Andric case 12: 17810b57cec5SDimitry Andric return AMDGPU::SI_SPILL_S96_RESTORE; 17820b57cec5SDimitry Andric case 16: 17830b57cec5SDimitry Andric return AMDGPU::SI_SPILL_S128_RESTORE; 17840b57cec5SDimitry Andric case 20: 17850b57cec5SDimitry Andric return AMDGPU::SI_SPILL_S160_RESTORE; 17865ffd83dbSDimitry Andric case 24: 17875ffd83dbSDimitry Andric return AMDGPU::SI_SPILL_S192_RESTORE; 1788fe6060f1SDimitry Andric case 28: 1789fe6060f1SDimitry Andric return AMDGPU::SI_SPILL_S224_RESTORE; 17900b57cec5SDimitry Andric case 32: 17910b57cec5SDimitry Andric return AMDGPU::SI_SPILL_S256_RESTORE; 1792bdd1243dSDimitry Andric case 36: 1793bdd1243dSDimitry Andric return AMDGPU::SI_SPILL_S288_RESTORE; 1794bdd1243dSDimitry Andric case 40: 1795bdd1243dSDimitry Andric return AMDGPU::SI_SPILL_S320_RESTORE; 1796bdd1243dSDimitry Andric case 44: 1797bdd1243dSDimitry Andric return AMDGPU::SI_SPILL_S352_RESTORE; 1798bdd1243dSDimitry Andric case 48: 1799bdd1243dSDimitry Andric return AMDGPU::SI_SPILL_S384_RESTORE; 18000b57cec5SDimitry Andric case 64: 18010b57cec5SDimitry Andric return AMDGPU::SI_SPILL_S512_RESTORE; 18020b57cec5SDimitry Andric case 128: 18030b57cec5SDimitry Andric return AMDGPU::SI_SPILL_S1024_RESTORE; 18040b57cec5SDimitry Andric default: 18050b57cec5SDimitry Andric llvm_unreachable("unknown register size"); 18060b57cec5SDimitry Andric } 18070b57cec5SDimitry Andric } 18080b57cec5SDimitry Andric 18090b57cec5SDimitry Andric static unsigned getVGPRSpillRestoreOpcode(unsigned Size) { 18100b57cec5SDimitry Andric switch (Size) { 18110b57cec5SDimitry Andric case 4: 18120b57cec5SDimitry Andric return AMDGPU::SI_SPILL_V32_RESTORE; 18130b57cec5SDimitry Andric case 8: 18140b57cec5SDimitry Andric return AMDGPU::SI_SPILL_V64_RESTORE; 18150b57cec5SDimitry Andric case 12: 18160b57cec5SDimitry Andric return AMDGPU::SI_SPILL_V96_RESTORE; 18170b57cec5SDimitry Andric case 16: 18180b57cec5SDimitry Andric return AMDGPU::SI_SPILL_V128_RESTORE; 18190b57cec5SDimitry Andric case 20: 18200b57cec5SDimitry Andric return AMDGPU::SI_SPILL_V160_RESTORE; 18215ffd83dbSDimitry Andric case 24: 18225ffd83dbSDimitry Andric return AMDGPU::SI_SPILL_V192_RESTORE; 1823fe6060f1SDimitry Andric case 28: 1824fe6060f1SDimitry Andric return AMDGPU::SI_SPILL_V224_RESTORE; 18250b57cec5SDimitry Andric case 32: 18260b57cec5SDimitry Andric return AMDGPU::SI_SPILL_V256_RESTORE; 1827bdd1243dSDimitry Andric case 36: 1828bdd1243dSDimitry Andric return AMDGPU::SI_SPILL_V288_RESTORE; 1829bdd1243dSDimitry Andric case 40: 1830bdd1243dSDimitry Andric return AMDGPU::SI_SPILL_V320_RESTORE; 1831bdd1243dSDimitry Andric case 44: 1832bdd1243dSDimitry Andric return AMDGPU::SI_SPILL_V352_RESTORE; 1833bdd1243dSDimitry Andric case 48: 1834bdd1243dSDimitry Andric return AMDGPU::SI_SPILL_V384_RESTORE; 18350b57cec5SDimitry Andric case 64: 18360b57cec5SDimitry Andric return AMDGPU::SI_SPILL_V512_RESTORE; 18370b57cec5SDimitry Andric case 128: 18380b57cec5SDimitry Andric return AMDGPU::SI_SPILL_V1024_RESTORE; 18390b57cec5SDimitry Andric default: 18400b57cec5SDimitry Andric llvm_unreachable("unknown register size"); 18410b57cec5SDimitry Andric } 18420b57cec5SDimitry Andric } 18430b57cec5SDimitry Andric 18440b57cec5SDimitry Andric static unsigned getAGPRSpillRestoreOpcode(unsigned Size) { 18450b57cec5SDimitry Andric switch (Size) { 18460b57cec5SDimitry Andric case 4: 18470b57cec5SDimitry Andric return AMDGPU::SI_SPILL_A32_RESTORE; 18480b57cec5SDimitry Andric case 8: 18490b57cec5SDimitry Andric return AMDGPU::SI_SPILL_A64_RESTORE; 1850e8d8bef9SDimitry Andric case 12: 1851e8d8bef9SDimitry Andric return AMDGPU::SI_SPILL_A96_RESTORE; 18520b57cec5SDimitry Andric case 16: 18530b57cec5SDimitry Andric return AMDGPU::SI_SPILL_A128_RESTORE; 1854e8d8bef9SDimitry Andric case 20: 1855e8d8bef9SDimitry Andric return AMDGPU::SI_SPILL_A160_RESTORE; 1856e8d8bef9SDimitry Andric case 24: 1857e8d8bef9SDimitry Andric return AMDGPU::SI_SPILL_A192_RESTORE; 1858fe6060f1SDimitry Andric case 28: 1859fe6060f1SDimitry Andric return AMDGPU::SI_SPILL_A224_RESTORE; 1860e8d8bef9SDimitry Andric case 32: 1861e8d8bef9SDimitry Andric return AMDGPU::SI_SPILL_A256_RESTORE; 1862bdd1243dSDimitry Andric case 36: 1863bdd1243dSDimitry Andric return AMDGPU::SI_SPILL_A288_RESTORE; 1864bdd1243dSDimitry Andric case 40: 1865bdd1243dSDimitry Andric return AMDGPU::SI_SPILL_A320_RESTORE; 1866bdd1243dSDimitry Andric case 44: 1867bdd1243dSDimitry Andric return AMDGPU::SI_SPILL_A352_RESTORE; 1868bdd1243dSDimitry Andric case 48: 1869bdd1243dSDimitry Andric return AMDGPU::SI_SPILL_A384_RESTORE; 18700b57cec5SDimitry Andric case 64: 18710b57cec5SDimitry Andric return AMDGPU::SI_SPILL_A512_RESTORE; 18720b57cec5SDimitry Andric case 128: 18730b57cec5SDimitry Andric return AMDGPU::SI_SPILL_A1024_RESTORE; 18740b57cec5SDimitry Andric default: 18750b57cec5SDimitry Andric llvm_unreachable("unknown register size"); 18760b57cec5SDimitry Andric } 18770b57cec5SDimitry Andric } 18780b57cec5SDimitry Andric 18790eae32dcSDimitry Andric static unsigned getAVSpillRestoreOpcode(unsigned Size) { 18800eae32dcSDimitry Andric switch (Size) { 18810eae32dcSDimitry Andric case 4: 18820eae32dcSDimitry Andric return AMDGPU::SI_SPILL_AV32_RESTORE; 18830eae32dcSDimitry Andric case 8: 18840eae32dcSDimitry Andric return AMDGPU::SI_SPILL_AV64_RESTORE; 18850eae32dcSDimitry Andric case 12: 18860eae32dcSDimitry Andric return AMDGPU::SI_SPILL_AV96_RESTORE; 18870eae32dcSDimitry Andric case 16: 18880eae32dcSDimitry Andric return AMDGPU::SI_SPILL_AV128_RESTORE; 18890eae32dcSDimitry Andric case 20: 18900eae32dcSDimitry Andric return AMDGPU::SI_SPILL_AV160_RESTORE; 18910eae32dcSDimitry Andric case 24: 18920eae32dcSDimitry Andric return AMDGPU::SI_SPILL_AV192_RESTORE; 18930eae32dcSDimitry Andric case 28: 18940eae32dcSDimitry Andric return AMDGPU::SI_SPILL_AV224_RESTORE; 18950eae32dcSDimitry Andric case 32: 18960eae32dcSDimitry Andric return AMDGPU::SI_SPILL_AV256_RESTORE; 1897bdd1243dSDimitry Andric case 36: 1898bdd1243dSDimitry Andric return AMDGPU::SI_SPILL_AV288_RESTORE; 1899bdd1243dSDimitry Andric case 40: 1900bdd1243dSDimitry Andric return AMDGPU::SI_SPILL_AV320_RESTORE; 1901bdd1243dSDimitry Andric case 44: 1902bdd1243dSDimitry Andric return AMDGPU::SI_SPILL_AV352_RESTORE; 1903bdd1243dSDimitry Andric case 48: 1904bdd1243dSDimitry Andric return AMDGPU::SI_SPILL_AV384_RESTORE; 19050eae32dcSDimitry Andric case 64: 19060eae32dcSDimitry Andric return AMDGPU::SI_SPILL_AV512_RESTORE; 19070eae32dcSDimitry Andric case 128: 19080eae32dcSDimitry Andric return AMDGPU::SI_SPILL_AV1024_RESTORE; 19090eae32dcSDimitry Andric default: 19100eae32dcSDimitry Andric llvm_unreachable("unknown register size"); 19110eae32dcSDimitry Andric } 19120eae32dcSDimitry Andric } 19130eae32dcSDimitry Andric 19145f757f3fSDimitry Andric static unsigned getWWMRegSpillRestoreOpcode(unsigned Size, 19155f757f3fSDimitry Andric bool IsVectorSuperClass) { 191606c3fb27SDimitry Andric // Currently, there is only 32-bit WWM register spills needed. 191706c3fb27SDimitry Andric if (Size != 4) 191806c3fb27SDimitry Andric llvm_unreachable("unknown wwm register spill size"); 191906c3fb27SDimitry Andric 19205f757f3fSDimitry Andric if (IsVectorSuperClass) 19215f757f3fSDimitry Andric return AMDGPU::SI_SPILL_WWM_AV32_RESTORE; 19225f757f3fSDimitry Andric 192306c3fb27SDimitry Andric return AMDGPU::SI_SPILL_WWM_V32_RESTORE; 192406c3fb27SDimitry Andric } 192506c3fb27SDimitry Andric 192606c3fb27SDimitry Andric static unsigned 192706c3fb27SDimitry Andric getVectorRegSpillRestoreOpcode(Register Reg, const TargetRegisterClass *RC, 192806c3fb27SDimitry Andric unsigned Size, const SIRegisterInfo &TRI, 192906c3fb27SDimitry Andric const SIMachineFunctionInfo &MFI) { 19305f757f3fSDimitry Andric bool IsVectorSuperClass = TRI.isVectorSuperClass(RC); 19315f757f3fSDimitry Andric 193206c3fb27SDimitry Andric // Choose the right opcode if restoring a WWM register. 193306c3fb27SDimitry Andric if (MFI.checkFlag(Reg, AMDGPU::VirtRegFlag::WWM_REG)) 19345f757f3fSDimitry Andric return getWWMRegSpillRestoreOpcode(Size, IsVectorSuperClass); 193506c3fb27SDimitry Andric 19365f757f3fSDimitry Andric if (IsVectorSuperClass) 193706c3fb27SDimitry Andric return getAVSpillRestoreOpcode(Size); 193806c3fb27SDimitry Andric 193906c3fb27SDimitry Andric return TRI.isAGPRClass(RC) ? getAGPRSpillRestoreOpcode(Size) 194006c3fb27SDimitry Andric : getVGPRSpillRestoreOpcode(Size); 194106c3fb27SDimitry Andric } 194206c3fb27SDimitry Andric 19430b57cec5SDimitry Andric void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, 19440b57cec5SDimitry Andric MachineBasicBlock::iterator MI, 19455ffd83dbSDimitry Andric Register DestReg, int FrameIndex, 19460b57cec5SDimitry Andric const TargetRegisterClass *RC, 1947bdd1243dSDimitry Andric const TargetRegisterInfo *TRI, 1948bdd1243dSDimitry Andric Register VReg) const { 19490b57cec5SDimitry Andric MachineFunction *MF = MBB.getParent(); 19500b57cec5SDimitry Andric SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); 19510b57cec5SDimitry Andric MachineFrameInfo &FrameInfo = MF->getFrameInfo(); 19520b57cec5SDimitry Andric const DebugLoc &DL = MBB.findDebugLoc(MI); 19530b57cec5SDimitry Andric unsigned SpillSize = TRI->getSpillSize(*RC); 19540b57cec5SDimitry Andric 19550b57cec5SDimitry Andric MachinePointerInfo PtrInfo 19560b57cec5SDimitry Andric = MachinePointerInfo::getFixedStack(*MF, FrameIndex); 19570b57cec5SDimitry Andric 19580b57cec5SDimitry Andric MachineMemOperand *MMO = MF->getMachineMemOperand( 19595ffd83dbSDimitry Andric PtrInfo, MachineMemOperand::MOLoad, FrameInfo.getObjectSize(FrameIndex), 19605ffd83dbSDimitry Andric FrameInfo.getObjectAlign(FrameIndex)); 19610b57cec5SDimitry Andric 19620b57cec5SDimitry Andric if (RI.isSGPRClass(RC)) { 19630b57cec5SDimitry Andric MFI->setHasSpilledSGPRs(); 1964480093f4SDimitry Andric assert(DestReg != AMDGPU::M0 && "m0 should not be reloaded into"); 19655ffd83dbSDimitry Andric assert(DestReg != AMDGPU::EXEC_LO && DestReg != AMDGPU::EXEC_HI && 19665ffd83dbSDimitry Andric DestReg != AMDGPU::EXEC && "exec should not be spilled"); 19670b57cec5SDimitry Andric 19680b57cec5SDimitry Andric // FIXME: Maybe this should not include a memoperand because it will be 19690b57cec5SDimitry Andric // lowered to non-memory instructions. 19700b57cec5SDimitry Andric const MCInstrDesc &OpDesc = get(getSGPRSpillRestoreOpcode(SpillSize)); 19715ffd83dbSDimitry Andric if (DestReg.isVirtual() && SpillSize == 4) { 19720b57cec5SDimitry Andric MachineRegisterInfo &MRI = MF->getRegInfo(); 19735ffd83dbSDimitry Andric MRI.constrainRegClass(DestReg, &AMDGPU::SReg_32_XM0_XEXECRegClass); 19740b57cec5SDimitry Andric } 19750b57cec5SDimitry Andric 19760b57cec5SDimitry Andric if (RI.spillSGPRToVGPR()) 19770b57cec5SDimitry Andric FrameInfo.setStackID(FrameIndex, TargetStackID::SGPRSpill); 19788bcb0991SDimitry Andric BuildMI(MBB, MI, DL, OpDesc, DestReg) 19790b57cec5SDimitry Andric .addFrameIndex(FrameIndex) // addr 19800b57cec5SDimitry Andric .addMemOperand(MMO) 19810b57cec5SDimitry Andric .addReg(MFI->getStackPtrOffsetReg(), RegState::Implicit); 1982e8d8bef9SDimitry Andric 19830b57cec5SDimitry Andric return; 19840b57cec5SDimitry Andric } 19850b57cec5SDimitry Andric 198606c3fb27SDimitry Andric unsigned Opcode = getVectorRegSpillRestoreOpcode(VReg ? VReg : DestReg, RC, 198706c3fb27SDimitry Andric SpillSize, RI, *MFI); 1988e8d8bef9SDimitry Andric BuildMI(MBB, MI, DL, get(Opcode), DestReg) 1989e8d8bef9SDimitry Andric .addFrameIndex(FrameIndex) // vaddr 19900b57cec5SDimitry Andric .addReg(MFI->getStackPtrOffsetReg()) // scratch_offset 19910b57cec5SDimitry Andric .addImm(0) // offset 19920b57cec5SDimitry Andric .addMemOperand(MMO); 19930b57cec5SDimitry Andric } 19940b57cec5SDimitry Andric 19950b57cec5SDimitry Andric void SIInstrInfo::insertNoop(MachineBasicBlock &MBB, 19960b57cec5SDimitry Andric MachineBasicBlock::iterator MI) const { 1997e8d8bef9SDimitry Andric insertNoops(MBB, MI, 1); 1998e8d8bef9SDimitry Andric } 1999e8d8bef9SDimitry Andric 2000e8d8bef9SDimitry Andric void SIInstrInfo::insertNoops(MachineBasicBlock &MBB, 2001e8d8bef9SDimitry Andric MachineBasicBlock::iterator MI, 2002e8d8bef9SDimitry Andric unsigned Quantity) const { 2003e8d8bef9SDimitry Andric DebugLoc DL = MBB.findDebugLoc(MI); 2004e8d8bef9SDimitry Andric while (Quantity > 0) { 2005e8d8bef9SDimitry Andric unsigned Arg = std::min(Quantity, 8u); 2006e8d8bef9SDimitry Andric Quantity -= Arg; 2007e8d8bef9SDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::S_NOP)).addImm(Arg - 1); 2008e8d8bef9SDimitry Andric } 20090b57cec5SDimitry Andric } 20100b57cec5SDimitry Andric 20110b57cec5SDimitry Andric void SIInstrInfo::insertReturn(MachineBasicBlock &MBB) const { 20120b57cec5SDimitry Andric auto MF = MBB.getParent(); 20130b57cec5SDimitry Andric SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>(); 20140b57cec5SDimitry Andric 20150b57cec5SDimitry Andric assert(Info->isEntryFunction()); 20160b57cec5SDimitry Andric 20170b57cec5SDimitry Andric if (MBB.succ_empty()) { 20180b57cec5SDimitry Andric bool HasNoTerminator = MBB.getFirstTerminator() == MBB.end(); 20190b57cec5SDimitry Andric if (HasNoTerminator) { 20200b57cec5SDimitry Andric if (Info->returnsVoid()) { 20210b57cec5SDimitry Andric BuildMI(MBB, MBB.end(), DebugLoc(), get(AMDGPU::S_ENDPGM)).addImm(0); 20220b57cec5SDimitry Andric } else { 20230b57cec5SDimitry Andric BuildMI(MBB, MBB.end(), DebugLoc(), get(AMDGPU::SI_RETURN_TO_EPILOG)); 20240b57cec5SDimitry Andric } 20250b57cec5SDimitry Andric } 20260b57cec5SDimitry Andric } 20270b57cec5SDimitry Andric } 20280b57cec5SDimitry Andric 20290b57cec5SDimitry Andric unsigned SIInstrInfo::getNumWaitStates(const MachineInstr &MI) { 20300b57cec5SDimitry Andric switch (MI.getOpcode()) { 2031349cc55cSDimitry Andric default: 2032349cc55cSDimitry Andric if (MI.isMetaInstruction()) 2033349cc55cSDimitry Andric return 0; 2034349cc55cSDimitry Andric return 1; // FIXME: Do wait states equal cycles? 20350b57cec5SDimitry Andric 20360b57cec5SDimitry Andric case AMDGPU::S_NOP: 20370b57cec5SDimitry Andric return MI.getOperand(0).getImm() + 1; 2038349cc55cSDimitry Andric // SI_RETURN_TO_EPILOG is a fallthrough to code outside of the function. The 2039349cc55cSDimitry Andric // hazard, even if one exist, won't really be visible. Should we handle it? 20400b57cec5SDimitry Andric } 20410b57cec5SDimitry Andric } 20420b57cec5SDimitry Andric 20430b57cec5SDimitry Andric bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { 2044fe6060f1SDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo(); 20450b57cec5SDimitry Andric MachineBasicBlock &MBB = *MI.getParent(); 20460b57cec5SDimitry Andric DebugLoc DL = MBB.findDebugLoc(MI); 20470b57cec5SDimitry Andric switch (MI.getOpcode()) { 20480b57cec5SDimitry Andric default: return TargetInstrInfo::expandPostRAPseudo(MI); 20490b57cec5SDimitry Andric case AMDGPU::S_MOV_B64_term: 20500b57cec5SDimitry Andric // This is only a terminator to get the correct spill code placement during 20510b57cec5SDimitry Andric // register allocation. 20520b57cec5SDimitry Andric MI.setDesc(get(AMDGPU::S_MOV_B64)); 20530b57cec5SDimitry Andric break; 20540b57cec5SDimitry Andric 20550b57cec5SDimitry Andric case AMDGPU::S_MOV_B32_term: 20560b57cec5SDimitry Andric // This is only a terminator to get the correct spill code placement during 20570b57cec5SDimitry Andric // register allocation. 20580b57cec5SDimitry Andric MI.setDesc(get(AMDGPU::S_MOV_B32)); 20590b57cec5SDimitry Andric break; 20600b57cec5SDimitry Andric 20610b57cec5SDimitry Andric case AMDGPU::S_XOR_B64_term: 20620b57cec5SDimitry Andric // This is only a terminator to get the correct spill code placement during 20630b57cec5SDimitry Andric // register allocation. 20640b57cec5SDimitry Andric MI.setDesc(get(AMDGPU::S_XOR_B64)); 20650b57cec5SDimitry Andric break; 20660b57cec5SDimitry Andric 20670b57cec5SDimitry Andric case AMDGPU::S_XOR_B32_term: 20680b57cec5SDimitry Andric // This is only a terminator to get the correct spill code placement during 20690b57cec5SDimitry Andric // register allocation. 20700b57cec5SDimitry Andric MI.setDesc(get(AMDGPU::S_XOR_B32)); 20710b57cec5SDimitry Andric break; 2072e8d8bef9SDimitry Andric case AMDGPU::S_OR_B64_term: 2073e8d8bef9SDimitry Andric // This is only a terminator to get the correct spill code placement during 2074e8d8bef9SDimitry Andric // register allocation. 2075e8d8bef9SDimitry Andric MI.setDesc(get(AMDGPU::S_OR_B64)); 2076e8d8bef9SDimitry Andric break; 20770b57cec5SDimitry Andric case AMDGPU::S_OR_B32_term: 20780b57cec5SDimitry Andric // This is only a terminator to get the correct spill code placement during 20790b57cec5SDimitry Andric // register allocation. 20800b57cec5SDimitry Andric MI.setDesc(get(AMDGPU::S_OR_B32)); 20810b57cec5SDimitry Andric break; 20820b57cec5SDimitry Andric 20830b57cec5SDimitry Andric case AMDGPU::S_ANDN2_B64_term: 20840b57cec5SDimitry Andric // This is only a terminator to get the correct spill code placement during 20850b57cec5SDimitry Andric // register allocation. 20860b57cec5SDimitry Andric MI.setDesc(get(AMDGPU::S_ANDN2_B64)); 20870b57cec5SDimitry Andric break; 20880b57cec5SDimitry Andric 20890b57cec5SDimitry Andric case AMDGPU::S_ANDN2_B32_term: 20900b57cec5SDimitry Andric // This is only a terminator to get the correct spill code placement during 20910b57cec5SDimitry Andric // register allocation. 20920b57cec5SDimitry Andric MI.setDesc(get(AMDGPU::S_ANDN2_B32)); 20930b57cec5SDimitry Andric break; 20940b57cec5SDimitry Andric 2095fe6060f1SDimitry Andric case AMDGPU::S_AND_B64_term: 2096fe6060f1SDimitry Andric // This is only a terminator to get the correct spill code placement during 2097fe6060f1SDimitry Andric // register allocation. 2098fe6060f1SDimitry Andric MI.setDesc(get(AMDGPU::S_AND_B64)); 2099fe6060f1SDimitry Andric break; 2100fe6060f1SDimitry Andric 2101fe6060f1SDimitry Andric case AMDGPU::S_AND_B32_term: 2102fe6060f1SDimitry Andric // This is only a terminator to get the correct spill code placement during 2103fe6060f1SDimitry Andric // register allocation. 2104fe6060f1SDimitry Andric MI.setDesc(get(AMDGPU::S_AND_B32)); 2105fe6060f1SDimitry Andric break; 2106fe6060f1SDimitry Andric 210706c3fb27SDimitry Andric case AMDGPU::S_AND_SAVEEXEC_B64_term: 210806c3fb27SDimitry Andric // This is only a terminator to get the correct spill code placement during 210906c3fb27SDimitry Andric // register allocation. 211006c3fb27SDimitry Andric MI.setDesc(get(AMDGPU::S_AND_SAVEEXEC_B64)); 211106c3fb27SDimitry Andric break; 211206c3fb27SDimitry Andric 211306c3fb27SDimitry Andric case AMDGPU::S_AND_SAVEEXEC_B32_term: 211406c3fb27SDimitry Andric // This is only a terminator to get the correct spill code placement during 211506c3fb27SDimitry Andric // register allocation. 211606c3fb27SDimitry Andric MI.setDesc(get(AMDGPU::S_AND_SAVEEXEC_B32)); 211706c3fb27SDimitry Andric break; 211806c3fb27SDimitry Andric 21195f757f3fSDimitry Andric case AMDGPU::SI_SPILL_S32_TO_VGPR: 21205f757f3fSDimitry Andric MI.setDesc(get(AMDGPU::V_WRITELANE_B32)); 21215f757f3fSDimitry Andric break; 21225f757f3fSDimitry Andric 21235f757f3fSDimitry Andric case AMDGPU::SI_RESTORE_S32_FROM_VGPR: 21245f757f3fSDimitry Andric MI.setDesc(get(AMDGPU::V_READLANE_B32)); 21255f757f3fSDimitry Andric break; 21265f757f3fSDimitry Andric 21270b57cec5SDimitry Andric case AMDGPU::V_MOV_B64_PSEUDO: { 21288bcb0991SDimitry Andric Register Dst = MI.getOperand(0).getReg(); 21298bcb0991SDimitry Andric Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0); 21308bcb0991SDimitry Andric Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1); 21310b57cec5SDimitry Andric 21320b57cec5SDimitry Andric const MachineOperand &SrcOp = MI.getOperand(1); 21330b57cec5SDimitry Andric // FIXME: Will this work for 64-bit floating point immediates? 21340b57cec5SDimitry Andric assert(!SrcOp.isFPImm()); 213581ad6265SDimitry Andric if (ST.hasMovB64()) { 213681ad6265SDimitry Andric MI.setDesc(get(AMDGPU::V_MOV_B64_e32)); 2137bdd1243dSDimitry Andric if (SrcOp.isReg() || isInlineConstant(MI, 1) || 2138bdd1243dSDimitry Andric isUInt<32>(SrcOp.getImm())) 213981ad6265SDimitry Andric break; 214081ad6265SDimitry Andric } 21410b57cec5SDimitry Andric if (SrcOp.isImm()) { 21420b57cec5SDimitry Andric APInt Imm(64, SrcOp.getImm()); 2143fe6060f1SDimitry Andric APInt Lo(32, Imm.getLoBits(32).getZExtValue()); 2144fe6060f1SDimitry Andric APInt Hi(32, Imm.getHiBits(32).getZExtValue()); 21455f757f3fSDimitry Andric if (ST.hasPkMovB32() && Lo == Hi && isInlineConstant(Lo)) { 2146fe6060f1SDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::V_PK_MOV_B32), Dst) 2147fe6060f1SDimitry Andric .addImm(SISrcMods::OP_SEL_1) 2148fe6060f1SDimitry Andric .addImm(Lo.getSExtValue()) 2149fe6060f1SDimitry Andric .addImm(SISrcMods::OP_SEL_1) 2150fe6060f1SDimitry Andric .addImm(Lo.getSExtValue()) 2151fe6060f1SDimitry Andric .addImm(0) // op_sel_lo 2152fe6060f1SDimitry Andric .addImm(0) // op_sel_hi 2153fe6060f1SDimitry Andric .addImm(0) // neg_lo 2154fe6060f1SDimitry Andric .addImm(0) // neg_hi 2155fe6060f1SDimitry Andric .addImm(0); // clamp 2156fe6060f1SDimitry Andric } else { 21570b57cec5SDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstLo) 2158fe6060f1SDimitry Andric .addImm(Lo.getSExtValue()) 21590b57cec5SDimitry Andric .addReg(Dst, RegState::Implicit | RegState::Define); 21600b57cec5SDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstHi) 2161fe6060f1SDimitry Andric .addImm(Hi.getSExtValue()) 21620b57cec5SDimitry Andric .addReg(Dst, RegState::Implicit | RegState::Define); 2163fe6060f1SDimitry Andric } 21640b57cec5SDimitry Andric } else { 21650b57cec5SDimitry Andric assert(SrcOp.isReg()); 21665f757f3fSDimitry Andric if (ST.hasPkMovB32() && 2167fe6060f1SDimitry Andric !RI.isAGPR(MBB.getParent()->getRegInfo(), SrcOp.getReg())) { 2168fe6060f1SDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::V_PK_MOV_B32), Dst) 2169fe6060f1SDimitry Andric .addImm(SISrcMods::OP_SEL_1) // src0_mod 2170fe6060f1SDimitry Andric .addReg(SrcOp.getReg()) 2171fe6060f1SDimitry Andric .addImm(SISrcMods::OP_SEL_0 | SISrcMods::OP_SEL_1) // src1_mod 2172fe6060f1SDimitry Andric .addReg(SrcOp.getReg()) 2173fe6060f1SDimitry Andric .addImm(0) // op_sel_lo 2174fe6060f1SDimitry Andric .addImm(0) // op_sel_hi 2175fe6060f1SDimitry Andric .addImm(0) // neg_lo 2176fe6060f1SDimitry Andric .addImm(0) // neg_hi 2177fe6060f1SDimitry Andric .addImm(0); // clamp 2178fe6060f1SDimitry Andric } else { 21790b57cec5SDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstLo) 21800b57cec5SDimitry Andric .addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub0)) 21810b57cec5SDimitry Andric .addReg(Dst, RegState::Implicit | RegState::Define); 21820b57cec5SDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstHi) 21830b57cec5SDimitry Andric .addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub1)) 21840b57cec5SDimitry Andric .addReg(Dst, RegState::Implicit | RegState::Define); 21850b57cec5SDimitry Andric } 2186fe6060f1SDimitry Andric } 21870b57cec5SDimitry Andric MI.eraseFromParent(); 21880b57cec5SDimitry Andric break; 21890b57cec5SDimitry Andric } 21908bcb0991SDimitry Andric case AMDGPU::V_MOV_B64_DPP_PSEUDO: { 21918bcb0991SDimitry Andric expandMovDPP64(MI); 21928bcb0991SDimitry Andric break; 21938bcb0991SDimitry Andric } 2194fe6060f1SDimitry Andric case AMDGPU::S_MOV_B64_IMM_PSEUDO: { 2195fe6060f1SDimitry Andric const MachineOperand &SrcOp = MI.getOperand(1); 2196fe6060f1SDimitry Andric assert(!SrcOp.isFPImm()); 2197fe6060f1SDimitry Andric APInt Imm(64, SrcOp.getImm()); 2198fe6060f1SDimitry Andric if (Imm.isIntN(32) || isInlineConstant(Imm)) { 2199fe6060f1SDimitry Andric MI.setDesc(get(AMDGPU::S_MOV_B64)); 2200fe6060f1SDimitry Andric break; 2201fe6060f1SDimitry Andric } 2202fe6060f1SDimitry Andric 2203fe6060f1SDimitry Andric Register Dst = MI.getOperand(0).getReg(); 2204fe6060f1SDimitry Andric Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0); 2205fe6060f1SDimitry Andric Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1); 2206fe6060f1SDimitry Andric 2207fe6060f1SDimitry Andric APInt Lo(32, Imm.getLoBits(32).getZExtValue()); 2208fe6060f1SDimitry Andric APInt Hi(32, Imm.getHiBits(32).getZExtValue()); 2209fe6060f1SDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DstLo) 2210fe6060f1SDimitry Andric .addImm(Lo.getSExtValue()) 2211fe6060f1SDimitry Andric .addReg(Dst, RegState::Implicit | RegState::Define); 2212fe6060f1SDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DstHi) 2213fe6060f1SDimitry Andric .addImm(Hi.getSExtValue()) 2214fe6060f1SDimitry Andric .addReg(Dst, RegState::Implicit | RegState::Define); 2215fe6060f1SDimitry Andric MI.eraseFromParent(); 2216fe6060f1SDimitry Andric break; 2217fe6060f1SDimitry Andric } 22180b57cec5SDimitry Andric case AMDGPU::V_SET_INACTIVE_B32: { 22190b57cec5SDimitry Andric unsigned NotOpc = ST.isWave32() ? AMDGPU::S_NOT_B32 : AMDGPU::S_NOT_B64; 22200b57cec5SDimitry Andric unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; 222181ad6265SDimitry Andric // FIXME: We may possibly optimize the COPY once we find ways to make LLVM 222281ad6265SDimitry Andric // optimizations (mainly Register Coalescer) aware of WWM register liveness. 222381ad6265SDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), MI.getOperand(0).getReg()) 222481ad6265SDimitry Andric .add(MI.getOperand(1)); 2225fe6060f1SDimitry Andric auto FirstNot = BuildMI(MBB, MI, DL, get(NotOpc), Exec).addReg(Exec); 2226fe6060f1SDimitry Andric FirstNot->addRegisterDead(AMDGPU::SCC, TRI); // SCC is overwritten 22270b57cec5SDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), MI.getOperand(0).getReg()) 22280b57cec5SDimitry Andric .add(MI.getOperand(2)); 22290b57cec5SDimitry Andric BuildMI(MBB, MI, DL, get(NotOpc), Exec) 22300b57cec5SDimitry Andric .addReg(Exec); 22310b57cec5SDimitry Andric MI.eraseFromParent(); 22320b57cec5SDimitry Andric break; 22330b57cec5SDimitry Andric } 22340b57cec5SDimitry Andric case AMDGPU::V_SET_INACTIVE_B64: { 22350b57cec5SDimitry Andric unsigned NotOpc = ST.isWave32() ? AMDGPU::S_NOT_B32 : AMDGPU::S_NOT_B64; 22360b57cec5SDimitry Andric unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; 223781ad6265SDimitry Andric MachineInstr *Copy = BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B64_PSEUDO), 223881ad6265SDimitry Andric MI.getOperand(0).getReg()) 223981ad6265SDimitry Andric .add(MI.getOperand(1)); 224081ad6265SDimitry Andric expandPostRAPseudo(*Copy); 2241fe6060f1SDimitry Andric auto FirstNot = BuildMI(MBB, MI, DL, get(NotOpc), Exec).addReg(Exec); 2242fe6060f1SDimitry Andric FirstNot->addRegisterDead(AMDGPU::SCC, TRI); // SCC is overwritten 224381ad6265SDimitry Andric Copy = BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B64_PSEUDO), 22440b57cec5SDimitry Andric MI.getOperand(0).getReg()) 22450b57cec5SDimitry Andric .add(MI.getOperand(2)); 22460b57cec5SDimitry Andric expandPostRAPseudo(*Copy); 22470b57cec5SDimitry Andric BuildMI(MBB, MI, DL, get(NotOpc), Exec) 22480b57cec5SDimitry Andric .addReg(Exec); 22490b57cec5SDimitry Andric MI.eraseFromParent(); 22500b57cec5SDimitry Andric break; 22510b57cec5SDimitry Andric } 2252e8d8bef9SDimitry Andric case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1: 2253e8d8bef9SDimitry Andric case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2: 2254e8d8bef9SDimitry Andric case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3: 2255e8d8bef9SDimitry Andric case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4: 2256e8d8bef9SDimitry Andric case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5: 2257e8d8bef9SDimitry Andric case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8: 2258bdd1243dSDimitry Andric case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9: 2259bdd1243dSDimitry Andric case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10: 2260bdd1243dSDimitry Andric case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11: 2261bdd1243dSDimitry Andric case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12: 2262e8d8bef9SDimitry Andric case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16: 2263e8d8bef9SDimitry Andric case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32: 2264e8d8bef9SDimitry Andric case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1: 2265e8d8bef9SDimitry Andric case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2: 2266e8d8bef9SDimitry Andric case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3: 2267e8d8bef9SDimitry Andric case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4: 2268e8d8bef9SDimitry Andric case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5: 2269e8d8bef9SDimitry Andric case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8: 227006c3fb27SDimitry Andric case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9: 227106c3fb27SDimitry Andric case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10: 227206c3fb27SDimitry Andric case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11: 227306c3fb27SDimitry Andric case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12: 2274e8d8bef9SDimitry Andric case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16: 2275e8d8bef9SDimitry Andric case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32: 2276e8d8bef9SDimitry Andric case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1: 2277e8d8bef9SDimitry Andric case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2: 2278e8d8bef9SDimitry Andric case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4: 2279e8d8bef9SDimitry Andric case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8: 2280e8d8bef9SDimitry Andric case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16: { 22815ffd83dbSDimitry Andric const TargetRegisterClass *EltRC = getOpRegClass(MI, 2); 22825ffd83dbSDimitry Andric 22835ffd83dbSDimitry Andric unsigned Opc; 22845ffd83dbSDimitry Andric if (RI.hasVGPRs(EltRC)) { 2285e8d8bef9SDimitry Andric Opc = AMDGPU::V_MOVRELD_B32_e32; 22865ffd83dbSDimitry Andric } else { 2287e8d8bef9SDimitry Andric Opc = RI.getRegSizeInBits(*EltRC) == 64 ? AMDGPU::S_MOVRELD_B64 2288e8d8bef9SDimitry Andric : AMDGPU::S_MOVRELD_B32; 22895ffd83dbSDimitry Andric } 22905ffd83dbSDimitry Andric 22915ffd83dbSDimitry Andric const MCInstrDesc &OpDesc = get(Opc); 22928bcb0991SDimitry Andric Register VecReg = MI.getOperand(0).getReg(); 22930b57cec5SDimitry Andric bool IsUndef = MI.getOperand(1).isUndef(); 22945ffd83dbSDimitry Andric unsigned SubReg = MI.getOperand(3).getImm(); 22950b57cec5SDimitry Andric assert(VecReg == MI.getOperand(1).getReg()); 22960b57cec5SDimitry Andric 22975ffd83dbSDimitry Andric MachineInstrBuilder MIB = 22985ffd83dbSDimitry Andric BuildMI(MBB, MI, DL, OpDesc) 22990b57cec5SDimitry Andric .addReg(RI.getSubReg(VecReg, SubReg), RegState::Undef) 23000b57cec5SDimitry Andric .add(MI.getOperand(2)) 23010b57cec5SDimitry Andric .addReg(VecReg, RegState::ImplicitDefine) 23025ffd83dbSDimitry Andric .addReg(VecReg, RegState::Implicit | (IsUndef ? RegState::Undef : 0)); 23030b57cec5SDimitry Andric 23040b57cec5SDimitry Andric const int ImpDefIdx = 2305bdd1243dSDimitry Andric OpDesc.getNumOperands() + OpDesc.implicit_uses().size(); 23060b57cec5SDimitry Andric const int ImpUseIdx = ImpDefIdx + 1; 23075ffd83dbSDimitry Andric MIB->tieOperands(ImpDefIdx, ImpUseIdx); 23080b57cec5SDimitry Andric MI.eraseFromParent(); 23090b57cec5SDimitry Andric break; 23100b57cec5SDimitry Andric } 2311e8d8bef9SDimitry Andric case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1: 2312e8d8bef9SDimitry Andric case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2: 2313e8d8bef9SDimitry Andric case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3: 2314e8d8bef9SDimitry Andric case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4: 2315e8d8bef9SDimitry Andric case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5: 2316e8d8bef9SDimitry Andric case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8: 2317bdd1243dSDimitry Andric case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9: 2318bdd1243dSDimitry Andric case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10: 2319bdd1243dSDimitry Andric case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11: 2320bdd1243dSDimitry Andric case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12: 2321e8d8bef9SDimitry Andric case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16: 2322e8d8bef9SDimitry Andric case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32: { 2323e8d8bef9SDimitry Andric assert(ST.useVGPRIndexMode()); 2324e8d8bef9SDimitry Andric Register VecReg = MI.getOperand(0).getReg(); 2325e8d8bef9SDimitry Andric bool IsUndef = MI.getOperand(1).isUndef(); 2326e8d8bef9SDimitry Andric Register Idx = MI.getOperand(3).getReg(); 2327e8d8bef9SDimitry Andric Register SubReg = MI.getOperand(4).getImm(); 2328e8d8bef9SDimitry Andric 2329e8d8bef9SDimitry Andric MachineInstr *SetOn = BuildMI(MBB, MI, DL, get(AMDGPU::S_SET_GPR_IDX_ON)) 2330e8d8bef9SDimitry Andric .addReg(Idx) 2331e8d8bef9SDimitry Andric .addImm(AMDGPU::VGPRIndexMode::DST_ENABLE); 2332e8d8bef9SDimitry Andric SetOn->getOperand(3).setIsUndef(); 2333e8d8bef9SDimitry Andric 2334349cc55cSDimitry Andric const MCInstrDesc &OpDesc = get(AMDGPU::V_MOV_B32_indirect_write); 2335e8d8bef9SDimitry Andric MachineInstrBuilder MIB = 2336e8d8bef9SDimitry Andric BuildMI(MBB, MI, DL, OpDesc) 2337e8d8bef9SDimitry Andric .addReg(RI.getSubReg(VecReg, SubReg), RegState::Undef) 2338e8d8bef9SDimitry Andric .add(MI.getOperand(2)) 2339e8d8bef9SDimitry Andric .addReg(VecReg, RegState::ImplicitDefine) 2340e8d8bef9SDimitry Andric .addReg(VecReg, 2341e8d8bef9SDimitry Andric RegState::Implicit | (IsUndef ? RegState::Undef : 0)); 2342e8d8bef9SDimitry Andric 2343bdd1243dSDimitry Andric const int ImpDefIdx = 2344bdd1243dSDimitry Andric OpDesc.getNumOperands() + OpDesc.implicit_uses().size(); 2345e8d8bef9SDimitry Andric const int ImpUseIdx = ImpDefIdx + 1; 2346e8d8bef9SDimitry Andric MIB->tieOperands(ImpDefIdx, ImpUseIdx); 2347e8d8bef9SDimitry Andric 2348e8d8bef9SDimitry Andric MachineInstr *SetOff = BuildMI(MBB, MI, DL, get(AMDGPU::S_SET_GPR_IDX_OFF)); 2349e8d8bef9SDimitry Andric 2350e8d8bef9SDimitry Andric finalizeBundle(MBB, SetOn->getIterator(), std::next(SetOff->getIterator())); 2351e8d8bef9SDimitry Andric 2352e8d8bef9SDimitry Andric MI.eraseFromParent(); 2353e8d8bef9SDimitry Andric break; 2354e8d8bef9SDimitry Andric } 2355e8d8bef9SDimitry Andric case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1: 2356e8d8bef9SDimitry Andric case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2: 2357e8d8bef9SDimitry Andric case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3: 2358e8d8bef9SDimitry Andric case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4: 2359e8d8bef9SDimitry Andric case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5: 2360e8d8bef9SDimitry Andric case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8: 2361bdd1243dSDimitry Andric case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9: 2362bdd1243dSDimitry Andric case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10: 2363bdd1243dSDimitry Andric case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11: 2364bdd1243dSDimitry Andric case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12: 2365e8d8bef9SDimitry Andric case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16: 2366e8d8bef9SDimitry Andric case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32: { 2367e8d8bef9SDimitry Andric assert(ST.useVGPRIndexMode()); 2368e8d8bef9SDimitry Andric Register Dst = MI.getOperand(0).getReg(); 2369e8d8bef9SDimitry Andric Register VecReg = MI.getOperand(1).getReg(); 2370e8d8bef9SDimitry Andric bool IsUndef = MI.getOperand(1).isUndef(); 2371e8d8bef9SDimitry Andric Register Idx = MI.getOperand(2).getReg(); 2372e8d8bef9SDimitry Andric Register SubReg = MI.getOperand(3).getImm(); 2373e8d8bef9SDimitry Andric 2374e8d8bef9SDimitry Andric MachineInstr *SetOn = BuildMI(MBB, MI, DL, get(AMDGPU::S_SET_GPR_IDX_ON)) 2375e8d8bef9SDimitry Andric .addReg(Idx) 2376e8d8bef9SDimitry Andric .addImm(AMDGPU::VGPRIndexMode::SRC0_ENABLE); 2377e8d8bef9SDimitry Andric SetOn->getOperand(3).setIsUndef(); 2378e8d8bef9SDimitry Andric 2379349cc55cSDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_indirect_read)) 2380e8d8bef9SDimitry Andric .addDef(Dst) 2381e8d8bef9SDimitry Andric .addReg(RI.getSubReg(VecReg, SubReg), RegState::Undef) 2382349cc55cSDimitry Andric .addReg(VecReg, RegState::Implicit | (IsUndef ? RegState::Undef : 0)); 2383e8d8bef9SDimitry Andric 2384e8d8bef9SDimitry Andric MachineInstr *SetOff = BuildMI(MBB, MI, DL, get(AMDGPU::S_SET_GPR_IDX_OFF)); 2385e8d8bef9SDimitry Andric 2386e8d8bef9SDimitry Andric finalizeBundle(MBB, SetOn->getIterator(), std::next(SetOff->getIterator())); 2387e8d8bef9SDimitry Andric 2388e8d8bef9SDimitry Andric MI.eraseFromParent(); 2389e8d8bef9SDimitry Andric break; 2390e8d8bef9SDimitry Andric } 23910b57cec5SDimitry Andric case AMDGPU::SI_PC_ADD_REL_OFFSET: { 23920b57cec5SDimitry Andric MachineFunction &MF = *MBB.getParent(); 23938bcb0991SDimitry Andric Register Reg = MI.getOperand(0).getReg(); 23948bcb0991SDimitry Andric Register RegLo = RI.getSubReg(Reg, AMDGPU::sub0); 23958bcb0991SDimitry Andric Register RegHi = RI.getSubReg(Reg, AMDGPU::sub1); 23965f757f3fSDimitry Andric MachineOperand OpLo = MI.getOperand(1); 23975f757f3fSDimitry Andric MachineOperand OpHi = MI.getOperand(2); 23980b57cec5SDimitry Andric 23990b57cec5SDimitry Andric // Create a bundle so these instructions won't be re-ordered by the 24000b57cec5SDimitry Andric // post-RA scheduler. 24010b57cec5SDimitry Andric MIBundleBuilder Bundler(MBB, MI); 24020b57cec5SDimitry Andric Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_GETPC_B64), Reg)); 24030b57cec5SDimitry Andric 24045f757f3fSDimitry Andric // What we want here is an offset from the value returned by s_getpc (which 24055f757f3fSDimitry Andric // is the address of the s_add_u32 instruction) to the global variable, but 24065f757f3fSDimitry Andric // since the encoding of $symbol starts 4 bytes after the start of the 24075f757f3fSDimitry Andric // s_add_u32 instruction, we end up with an offset that is 4 bytes too 24085f757f3fSDimitry Andric // small. This requires us to add 4 to the global variable offset in order 24095f757f3fSDimitry Andric // to compute the correct address. Similarly for the s_addc_u32 instruction, 24105f757f3fSDimitry Andric // the encoding of $symbol starts 12 bytes after the start of the s_add_u32 24115f757f3fSDimitry Andric // instruction. 24120b57cec5SDimitry Andric 24135f757f3fSDimitry Andric if (OpLo.isGlobal()) 24145f757f3fSDimitry Andric OpLo.setOffset(OpLo.getOffset() + 4); 24155f757f3fSDimitry Andric Bundler.append( 24165f757f3fSDimitry Andric BuildMI(MF, DL, get(AMDGPU::S_ADD_U32), RegLo).addReg(RegLo).add(OpLo)); 24170b57cec5SDimitry Andric 24185f757f3fSDimitry Andric if (OpHi.isGlobal()) 24195f757f3fSDimitry Andric OpHi.setOffset(OpHi.getOffset() + 12); 24205f757f3fSDimitry Andric Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_ADDC_U32), RegHi) 24215f757f3fSDimitry Andric .addReg(RegHi) 24225f757f3fSDimitry Andric .add(OpHi)); 24235f757f3fSDimitry Andric 24240b57cec5SDimitry Andric finalizeBundle(MBB, Bundler.begin()); 24250b57cec5SDimitry Andric 24260b57cec5SDimitry Andric MI.eraseFromParent(); 24270b57cec5SDimitry Andric break; 24280b57cec5SDimitry Andric } 2429fe6060f1SDimitry Andric case AMDGPU::ENTER_STRICT_WWM: { 24300b57cec5SDimitry Andric // This only gets its own opcode so that SIPreAllocateWWMRegs can tell when 2431fe6060f1SDimitry Andric // Whole Wave Mode is entered. 24320b57cec5SDimitry Andric MI.setDesc(get(ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32 24330b57cec5SDimitry Andric : AMDGPU::S_OR_SAVEEXEC_B64)); 24340b57cec5SDimitry Andric break; 24350b57cec5SDimitry Andric } 2436fe6060f1SDimitry Andric case AMDGPU::ENTER_STRICT_WQM: { 24370b57cec5SDimitry Andric // This only gets its own opcode so that SIPreAllocateWWMRegs can tell when 2438fe6060f1SDimitry Andric // STRICT_WQM is entered. 2439fe6060f1SDimitry Andric const unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; 2440fe6060f1SDimitry Andric const unsigned WQMOp = ST.isWave32() ? AMDGPU::S_WQM_B32 : AMDGPU::S_WQM_B64; 2441fe6060f1SDimitry Andric const unsigned MovOp = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; 2442fe6060f1SDimitry Andric BuildMI(MBB, MI, DL, get(MovOp), MI.getOperand(0).getReg()).addReg(Exec); 2443fe6060f1SDimitry Andric BuildMI(MBB, MI, DL, get(WQMOp), Exec).addReg(Exec); 2444fe6060f1SDimitry Andric 2445fe6060f1SDimitry Andric MI.eraseFromParent(); 2446fe6060f1SDimitry Andric break; 2447fe6060f1SDimitry Andric } 2448fe6060f1SDimitry Andric case AMDGPU::EXIT_STRICT_WWM: 2449fe6060f1SDimitry Andric case AMDGPU::EXIT_STRICT_WQM: { 2450fe6060f1SDimitry Andric // This only gets its own opcode so that SIPreAllocateWWMRegs can tell when 2451fe6060f1SDimitry Andric // WWM/STICT_WQM is exited. 24520b57cec5SDimitry Andric MI.setDesc(get(ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64)); 24530b57cec5SDimitry Andric break; 24540b57cec5SDimitry Andric } 2455bdd1243dSDimitry Andric case AMDGPU::ENTER_PSEUDO_WM: 2456bdd1243dSDimitry Andric case AMDGPU::EXIT_PSEUDO_WM: { 2457bdd1243dSDimitry Andric // These do nothing. 2458bdd1243dSDimitry Andric MI.eraseFromParent(); 2459bdd1243dSDimitry Andric break; 2460bdd1243dSDimitry Andric } 246181ad6265SDimitry Andric case AMDGPU::SI_RETURN: { 246281ad6265SDimitry Andric const MachineFunction *MF = MBB.getParent(); 246381ad6265SDimitry Andric const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>(); 246481ad6265SDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo(); 246581ad6265SDimitry Andric // Hiding the return address use with SI_RETURN may lead to extra kills in 246681ad6265SDimitry Andric // the function and missing live-ins. We are fine in practice because callee 246781ad6265SDimitry Andric // saved register handling ensures the register value is restored before 246881ad6265SDimitry Andric // RET, but we need the undef flag here to appease the MachineVerifier 246981ad6265SDimitry Andric // liveness checks. 247081ad6265SDimitry Andric MachineInstrBuilder MIB = 247181ad6265SDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::S_SETPC_B64_return)) 247281ad6265SDimitry Andric .addReg(TRI->getReturnAddressReg(*MF), RegState::Undef); 247381ad6265SDimitry Andric 247481ad6265SDimitry Andric MIB.copyImplicitOps(MI); 247581ad6265SDimitry Andric MI.eraseFromParent(); 247681ad6265SDimitry Andric break; 247781ad6265SDimitry Andric } 24780b57cec5SDimitry Andric } 24790b57cec5SDimitry Andric return true; 24800b57cec5SDimitry Andric } 24810b57cec5SDimitry Andric 24825f757f3fSDimitry Andric void SIInstrInfo::reMaterialize(MachineBasicBlock &MBB, 24835f757f3fSDimitry Andric MachineBasicBlock::iterator I, Register DestReg, 24845f757f3fSDimitry Andric unsigned SubIdx, const MachineInstr &Orig, 24855f757f3fSDimitry Andric const TargetRegisterInfo &RI) const { 24865f757f3fSDimitry Andric 24875f757f3fSDimitry Andric // Try shrinking the instruction to remat only the part needed for current 24885f757f3fSDimitry Andric // context. 24895f757f3fSDimitry Andric // TODO: Handle more cases. 24905f757f3fSDimitry Andric unsigned Opcode = Orig.getOpcode(); 24915f757f3fSDimitry Andric switch (Opcode) { 24925f757f3fSDimitry Andric case AMDGPU::S_LOAD_DWORDX16_IMM: 24935f757f3fSDimitry Andric case AMDGPU::S_LOAD_DWORDX8_IMM: { 24945f757f3fSDimitry Andric if (SubIdx != 0) 24955f757f3fSDimitry Andric break; 24965f757f3fSDimitry Andric 24975f757f3fSDimitry Andric if (I == MBB.end()) 24985f757f3fSDimitry Andric break; 24995f757f3fSDimitry Andric 25005f757f3fSDimitry Andric if (I->isBundled()) 25015f757f3fSDimitry Andric break; 25025f757f3fSDimitry Andric 25035f757f3fSDimitry Andric // Look for a single use of the register that is also a subreg. 25045f757f3fSDimitry Andric Register RegToFind = Orig.getOperand(0).getReg(); 25055f757f3fSDimitry Andric MachineOperand *UseMO = nullptr; 25065f757f3fSDimitry Andric for (auto &CandMO : I->operands()) { 25075f757f3fSDimitry Andric if (!CandMO.isReg() || CandMO.getReg() != RegToFind || CandMO.isDef()) 25085f757f3fSDimitry Andric continue; 25095f757f3fSDimitry Andric if (UseMO) { 25105f757f3fSDimitry Andric UseMO = nullptr; 25115f757f3fSDimitry Andric break; 25125f757f3fSDimitry Andric } 25135f757f3fSDimitry Andric UseMO = &CandMO; 25145f757f3fSDimitry Andric } 25155f757f3fSDimitry Andric if (!UseMO || UseMO->getSubReg() == AMDGPU::NoSubRegister) 25165f757f3fSDimitry Andric break; 25175f757f3fSDimitry Andric 25185f757f3fSDimitry Andric unsigned Offset = RI.getSubRegIdxOffset(UseMO->getSubReg()); 25195f757f3fSDimitry Andric unsigned SubregSize = RI.getSubRegIdxSize(UseMO->getSubReg()); 25205f757f3fSDimitry Andric 25215f757f3fSDimitry Andric MachineFunction *MF = MBB.getParent(); 25225f757f3fSDimitry Andric MachineRegisterInfo &MRI = MF->getRegInfo(); 25235f757f3fSDimitry Andric assert(MRI.use_nodbg_empty(DestReg) && "DestReg should have no users yet."); 25245f757f3fSDimitry Andric 25255f757f3fSDimitry Andric unsigned NewOpcode = -1; 25265f757f3fSDimitry Andric if (SubregSize == 256) 25275f757f3fSDimitry Andric NewOpcode = AMDGPU::S_LOAD_DWORDX8_IMM; 25285f757f3fSDimitry Andric else if (SubregSize == 128) 25295f757f3fSDimitry Andric NewOpcode = AMDGPU::S_LOAD_DWORDX4_IMM; 25305f757f3fSDimitry Andric else 25315f757f3fSDimitry Andric break; 25325f757f3fSDimitry Andric 25335f757f3fSDimitry Andric const MCInstrDesc &TID = get(NewOpcode); 25345f757f3fSDimitry Andric const TargetRegisterClass *NewRC = 25355f757f3fSDimitry Andric RI.getAllocatableClass(getRegClass(TID, 0, &RI, *MF)); 25365f757f3fSDimitry Andric MRI.setRegClass(DestReg, NewRC); 25375f757f3fSDimitry Andric 25385f757f3fSDimitry Andric UseMO->setReg(DestReg); 25395f757f3fSDimitry Andric UseMO->setSubReg(AMDGPU::NoSubRegister); 25405f757f3fSDimitry Andric 25415f757f3fSDimitry Andric // Use a smaller load with the desired size, possibly with updated offset. 25425f757f3fSDimitry Andric MachineInstr *MI = MF->CloneMachineInstr(&Orig); 25435f757f3fSDimitry Andric MI->setDesc(TID); 25445f757f3fSDimitry Andric MI->getOperand(0).setReg(DestReg); 25455f757f3fSDimitry Andric MI->getOperand(0).setSubReg(AMDGPU::NoSubRegister); 25465f757f3fSDimitry Andric if (Offset) { 25475f757f3fSDimitry Andric MachineOperand *OffsetMO = getNamedOperand(*MI, AMDGPU::OpName::offset); 25485f757f3fSDimitry Andric int64_t FinalOffset = OffsetMO->getImm() + Offset / 8; 25495f757f3fSDimitry Andric OffsetMO->setImm(FinalOffset); 25505f757f3fSDimitry Andric } 25515f757f3fSDimitry Andric SmallVector<MachineMemOperand *> NewMMOs; 25525f757f3fSDimitry Andric for (const MachineMemOperand *MemOp : Orig.memoperands()) 25535f757f3fSDimitry Andric NewMMOs.push_back(MF->getMachineMemOperand(MemOp, MemOp->getPointerInfo(), 25545f757f3fSDimitry Andric SubregSize / 8)); 25555f757f3fSDimitry Andric MI->setMemRefs(*MF, NewMMOs); 25565f757f3fSDimitry Andric 25575f757f3fSDimitry Andric MBB.insert(I, MI); 25585f757f3fSDimitry Andric return; 25595f757f3fSDimitry Andric } 25605f757f3fSDimitry Andric 25615f757f3fSDimitry Andric default: 25625f757f3fSDimitry Andric break; 25635f757f3fSDimitry Andric } 25645f757f3fSDimitry Andric 25655f757f3fSDimitry Andric TargetInstrInfo::reMaterialize(MBB, I, DestReg, SubIdx, Orig, RI); 25665f757f3fSDimitry Andric } 25675f757f3fSDimitry Andric 25688bcb0991SDimitry Andric std::pair<MachineInstr*, MachineInstr*> 25698bcb0991SDimitry Andric SIInstrInfo::expandMovDPP64(MachineInstr &MI) const { 25708bcb0991SDimitry Andric assert (MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO); 25718bcb0991SDimitry Andric 257281ad6265SDimitry Andric if (ST.hasMovB64() && 25735f757f3fSDimitry Andric AMDGPU::isLegalDPALU_DPPControl( 257481ad6265SDimitry Andric getNamedOperand(MI, AMDGPU::OpName::dpp_ctrl)->getImm())) { 257581ad6265SDimitry Andric MI.setDesc(get(AMDGPU::V_MOV_B64_dpp)); 2576bdd1243dSDimitry Andric return std::pair(&MI, nullptr); 257781ad6265SDimitry Andric } 257881ad6265SDimitry Andric 25798bcb0991SDimitry Andric MachineBasicBlock &MBB = *MI.getParent(); 25808bcb0991SDimitry Andric DebugLoc DL = MBB.findDebugLoc(MI); 25818bcb0991SDimitry Andric MachineFunction *MF = MBB.getParent(); 25828bcb0991SDimitry Andric MachineRegisterInfo &MRI = MF->getRegInfo(); 25838bcb0991SDimitry Andric Register Dst = MI.getOperand(0).getReg(); 25848bcb0991SDimitry Andric unsigned Part = 0; 25858bcb0991SDimitry Andric MachineInstr *Split[2]; 25868bcb0991SDimitry Andric 25878bcb0991SDimitry Andric for (auto Sub : { AMDGPU::sub0, AMDGPU::sub1 }) { 25888bcb0991SDimitry Andric auto MovDPP = BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_dpp)); 25898bcb0991SDimitry Andric if (Dst.isPhysical()) { 25908bcb0991SDimitry Andric MovDPP.addDef(RI.getSubReg(Dst, Sub)); 25918bcb0991SDimitry Andric } else { 25928bcb0991SDimitry Andric assert(MRI.isSSA()); 25938bcb0991SDimitry Andric auto Tmp = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 25948bcb0991SDimitry Andric MovDPP.addDef(Tmp); 25958bcb0991SDimitry Andric } 25968bcb0991SDimitry Andric 25978bcb0991SDimitry Andric for (unsigned I = 1; I <= 2; ++I) { // old and src operands. 25988bcb0991SDimitry Andric const MachineOperand &SrcOp = MI.getOperand(I); 25998bcb0991SDimitry Andric assert(!SrcOp.isFPImm()); 26008bcb0991SDimitry Andric if (SrcOp.isImm()) { 26018bcb0991SDimitry Andric APInt Imm(64, SrcOp.getImm()); 26028bcb0991SDimitry Andric Imm.ashrInPlace(Part * 32); 26038bcb0991SDimitry Andric MovDPP.addImm(Imm.getLoBits(32).getZExtValue()); 26048bcb0991SDimitry Andric } else { 26058bcb0991SDimitry Andric assert(SrcOp.isReg()); 26068bcb0991SDimitry Andric Register Src = SrcOp.getReg(); 26078bcb0991SDimitry Andric if (Src.isPhysical()) 26088bcb0991SDimitry Andric MovDPP.addReg(RI.getSubReg(Src, Sub)); 26098bcb0991SDimitry Andric else 26108bcb0991SDimitry Andric MovDPP.addReg(Src, SrcOp.isUndef() ? RegState::Undef : 0, Sub); 26118bcb0991SDimitry Andric } 26128bcb0991SDimitry Andric } 26138bcb0991SDimitry Andric 2614bdd1243dSDimitry Andric for (const MachineOperand &MO : llvm::drop_begin(MI.explicit_operands(), 3)) 2615bdd1243dSDimitry Andric MovDPP.addImm(MO.getImm()); 26168bcb0991SDimitry Andric 26178bcb0991SDimitry Andric Split[Part] = MovDPP; 26188bcb0991SDimitry Andric ++Part; 26198bcb0991SDimitry Andric } 26208bcb0991SDimitry Andric 26218bcb0991SDimitry Andric if (Dst.isVirtual()) 26228bcb0991SDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::REG_SEQUENCE), Dst) 26238bcb0991SDimitry Andric .addReg(Split[0]->getOperand(0).getReg()) 26248bcb0991SDimitry Andric .addImm(AMDGPU::sub0) 26258bcb0991SDimitry Andric .addReg(Split[1]->getOperand(0).getReg()) 26268bcb0991SDimitry Andric .addImm(AMDGPU::sub1); 26278bcb0991SDimitry Andric 26288bcb0991SDimitry Andric MI.eraseFromParent(); 2629bdd1243dSDimitry Andric return std::pair(Split[0], Split[1]); 26308bcb0991SDimitry Andric } 26318bcb0991SDimitry Andric 26325f757f3fSDimitry Andric std::optional<DestSourcePair> 26335f757f3fSDimitry Andric SIInstrInfo::isCopyInstrImpl(const MachineInstr &MI) const { 26345f757f3fSDimitry Andric if (MI.getOpcode() == AMDGPU::WWM_COPY) 26355f757f3fSDimitry Andric return DestSourcePair{MI.getOperand(0), MI.getOperand(1)}; 26365f757f3fSDimitry Andric 26375f757f3fSDimitry Andric return std::nullopt; 26385f757f3fSDimitry Andric } 26395f757f3fSDimitry Andric 26400b57cec5SDimitry Andric bool SIInstrInfo::swapSourceModifiers(MachineInstr &MI, 26410b57cec5SDimitry Andric MachineOperand &Src0, 26420b57cec5SDimitry Andric unsigned Src0OpName, 26430b57cec5SDimitry Andric MachineOperand &Src1, 26440b57cec5SDimitry Andric unsigned Src1OpName) const { 26450b57cec5SDimitry Andric MachineOperand *Src0Mods = getNamedOperand(MI, Src0OpName); 26460b57cec5SDimitry Andric if (!Src0Mods) 26470b57cec5SDimitry Andric return false; 26480b57cec5SDimitry Andric 26490b57cec5SDimitry Andric MachineOperand *Src1Mods = getNamedOperand(MI, Src1OpName); 26500b57cec5SDimitry Andric assert(Src1Mods && 26510b57cec5SDimitry Andric "All commutable instructions have both src0 and src1 modifiers"); 26520b57cec5SDimitry Andric 26530b57cec5SDimitry Andric int Src0ModsVal = Src0Mods->getImm(); 26540b57cec5SDimitry Andric int Src1ModsVal = Src1Mods->getImm(); 26550b57cec5SDimitry Andric 26560b57cec5SDimitry Andric Src1Mods->setImm(Src0ModsVal); 26570b57cec5SDimitry Andric Src0Mods->setImm(Src1ModsVal); 26580b57cec5SDimitry Andric return true; 26590b57cec5SDimitry Andric } 26600b57cec5SDimitry Andric 26610b57cec5SDimitry Andric static MachineInstr *swapRegAndNonRegOperand(MachineInstr &MI, 26620b57cec5SDimitry Andric MachineOperand &RegOp, 26630b57cec5SDimitry Andric MachineOperand &NonRegOp) { 26648bcb0991SDimitry Andric Register Reg = RegOp.getReg(); 26650b57cec5SDimitry Andric unsigned SubReg = RegOp.getSubReg(); 26660b57cec5SDimitry Andric bool IsKill = RegOp.isKill(); 26670b57cec5SDimitry Andric bool IsDead = RegOp.isDead(); 26680b57cec5SDimitry Andric bool IsUndef = RegOp.isUndef(); 26690b57cec5SDimitry Andric bool IsDebug = RegOp.isDebug(); 26700b57cec5SDimitry Andric 26710b57cec5SDimitry Andric if (NonRegOp.isImm()) 26720b57cec5SDimitry Andric RegOp.ChangeToImmediate(NonRegOp.getImm()); 26730b57cec5SDimitry Andric else if (NonRegOp.isFI()) 26740b57cec5SDimitry Andric RegOp.ChangeToFrameIndex(NonRegOp.getIndex()); 26755ffd83dbSDimitry Andric else if (NonRegOp.isGlobal()) { 26765ffd83dbSDimitry Andric RegOp.ChangeToGA(NonRegOp.getGlobal(), NonRegOp.getOffset(), 26775ffd83dbSDimitry Andric NonRegOp.getTargetFlags()); 26785ffd83dbSDimitry Andric } else 26790b57cec5SDimitry Andric return nullptr; 26800b57cec5SDimitry Andric 26815ffd83dbSDimitry Andric // Make sure we don't reinterpret a subreg index in the target flags. 26825ffd83dbSDimitry Andric RegOp.setTargetFlags(NonRegOp.getTargetFlags()); 26835ffd83dbSDimitry Andric 26840b57cec5SDimitry Andric NonRegOp.ChangeToRegister(Reg, false, false, IsKill, IsDead, IsUndef, IsDebug); 26850b57cec5SDimitry Andric NonRegOp.setSubReg(SubReg); 26860b57cec5SDimitry Andric 26870b57cec5SDimitry Andric return &MI; 26880b57cec5SDimitry Andric } 26890b57cec5SDimitry Andric 26900b57cec5SDimitry Andric MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, 26910b57cec5SDimitry Andric unsigned Src0Idx, 26920b57cec5SDimitry Andric unsigned Src1Idx) const { 26930b57cec5SDimitry Andric assert(!NewMI && "this should never be used"); 26940b57cec5SDimitry Andric 26950b57cec5SDimitry Andric unsigned Opc = MI.getOpcode(); 26960b57cec5SDimitry Andric int CommutedOpcode = commuteOpcode(Opc); 26970b57cec5SDimitry Andric if (CommutedOpcode == -1) 26980b57cec5SDimitry Andric return nullptr; 26990b57cec5SDimitry Andric 27005f757f3fSDimitry Andric if (Src0Idx > Src1Idx) 27015f757f3fSDimitry Andric std::swap(Src0Idx, Src1Idx); 27025f757f3fSDimitry Andric 27030b57cec5SDimitry Andric assert(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0) == 27040b57cec5SDimitry Andric static_cast<int>(Src0Idx) && 27050b57cec5SDimitry Andric AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1) == 27060b57cec5SDimitry Andric static_cast<int>(Src1Idx) && 27070b57cec5SDimitry Andric "inconsistency with findCommutedOpIndices"); 27080b57cec5SDimitry Andric 27090b57cec5SDimitry Andric MachineOperand &Src0 = MI.getOperand(Src0Idx); 27100b57cec5SDimitry Andric MachineOperand &Src1 = MI.getOperand(Src1Idx); 27110b57cec5SDimitry Andric 27120b57cec5SDimitry Andric MachineInstr *CommutedMI = nullptr; 27130b57cec5SDimitry Andric if (Src0.isReg() && Src1.isReg()) { 27140b57cec5SDimitry Andric if (isOperandLegal(MI, Src1Idx, &Src0)) { 27150b57cec5SDimitry Andric // Be sure to copy the source modifiers to the right place. 27160b57cec5SDimitry Andric CommutedMI 27170b57cec5SDimitry Andric = TargetInstrInfo::commuteInstructionImpl(MI, NewMI, Src0Idx, Src1Idx); 27180b57cec5SDimitry Andric } 27190b57cec5SDimitry Andric 27200b57cec5SDimitry Andric } else if (Src0.isReg() && !Src1.isReg()) { 27210b57cec5SDimitry Andric // src0 should always be able to support any operand type, so no need to 27220b57cec5SDimitry Andric // check operand legality. 27230b57cec5SDimitry Andric CommutedMI = swapRegAndNonRegOperand(MI, Src0, Src1); 27240b57cec5SDimitry Andric } else if (!Src0.isReg() && Src1.isReg()) { 27250b57cec5SDimitry Andric if (isOperandLegal(MI, Src1Idx, &Src0)) 27260b57cec5SDimitry Andric CommutedMI = swapRegAndNonRegOperand(MI, Src1, Src0); 27270b57cec5SDimitry Andric } else { 27280b57cec5SDimitry Andric // FIXME: Found two non registers to commute. This does happen. 27290b57cec5SDimitry Andric return nullptr; 27300b57cec5SDimitry Andric } 27310b57cec5SDimitry Andric 27320b57cec5SDimitry Andric if (CommutedMI) { 27330b57cec5SDimitry Andric swapSourceModifiers(MI, Src0, AMDGPU::OpName::src0_modifiers, 27340b57cec5SDimitry Andric Src1, AMDGPU::OpName::src1_modifiers); 27350b57cec5SDimitry Andric 27360b57cec5SDimitry Andric CommutedMI->setDesc(get(CommutedOpcode)); 27370b57cec5SDimitry Andric } 27380b57cec5SDimitry Andric 27390b57cec5SDimitry Andric return CommutedMI; 27400b57cec5SDimitry Andric } 27410b57cec5SDimitry Andric 27420b57cec5SDimitry Andric // This needs to be implemented because the source modifiers may be inserted 27430b57cec5SDimitry Andric // between the true commutable operands, and the base 27440b57cec5SDimitry Andric // TargetInstrInfo::commuteInstruction uses it. 27458bcb0991SDimitry Andric bool SIInstrInfo::findCommutedOpIndices(const MachineInstr &MI, 27468bcb0991SDimitry Andric unsigned &SrcOpIdx0, 27470b57cec5SDimitry Andric unsigned &SrcOpIdx1) const { 27480b57cec5SDimitry Andric return findCommutedOpIndices(MI.getDesc(), SrcOpIdx0, SrcOpIdx1); 27490b57cec5SDimitry Andric } 27500b57cec5SDimitry Andric 2751bdd1243dSDimitry Andric bool SIInstrInfo::findCommutedOpIndices(const MCInstrDesc &Desc, 2752bdd1243dSDimitry Andric unsigned &SrcOpIdx0, 27530b57cec5SDimitry Andric unsigned &SrcOpIdx1) const { 27540b57cec5SDimitry Andric if (!Desc.isCommutable()) 27550b57cec5SDimitry Andric return false; 27560b57cec5SDimitry Andric 27570b57cec5SDimitry Andric unsigned Opc = Desc.getOpcode(); 27580b57cec5SDimitry Andric int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 27590b57cec5SDimitry Andric if (Src0Idx == -1) 27600b57cec5SDimitry Andric return false; 27610b57cec5SDimitry Andric 27620b57cec5SDimitry Andric int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1); 27630b57cec5SDimitry Andric if (Src1Idx == -1) 27640b57cec5SDimitry Andric return false; 27650b57cec5SDimitry Andric 27660b57cec5SDimitry Andric return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx); 27670b57cec5SDimitry Andric } 27680b57cec5SDimitry Andric 27690b57cec5SDimitry Andric bool SIInstrInfo::isBranchOffsetInRange(unsigned BranchOp, 27700b57cec5SDimitry Andric int64_t BrOffset) const { 27710b57cec5SDimitry Andric // BranchRelaxation should never have to check s_setpc_b64 because its dest 27720b57cec5SDimitry Andric // block is unanalyzable. 27730b57cec5SDimitry Andric assert(BranchOp != AMDGPU::S_SETPC_B64); 27740b57cec5SDimitry Andric 27750b57cec5SDimitry Andric // Convert to dwords. 27760b57cec5SDimitry Andric BrOffset /= 4; 27770b57cec5SDimitry Andric 27780b57cec5SDimitry Andric // The branch instructions do PC += signext(SIMM16 * 4) + 4, so the offset is 27790b57cec5SDimitry Andric // from the next instruction. 27800b57cec5SDimitry Andric BrOffset -= 1; 27810b57cec5SDimitry Andric 27820b57cec5SDimitry Andric return isIntN(BranchOffsetBits, BrOffset); 27830b57cec5SDimitry Andric } 27840b57cec5SDimitry Andric 27855f757f3fSDimitry Andric MachineBasicBlock * 27865f757f3fSDimitry Andric SIInstrInfo::getBranchDestBlock(const MachineInstr &MI) const { 27870b57cec5SDimitry Andric return MI.getOperand(0).getMBB(); 27880b57cec5SDimitry Andric } 27890b57cec5SDimitry Andric 2790bdd1243dSDimitry Andric bool SIInstrInfo::hasDivergentBranch(const MachineBasicBlock *MBB) const { 2791bdd1243dSDimitry Andric for (const MachineInstr &MI : MBB->terminators()) { 2792bdd1243dSDimitry Andric if (MI.getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO || 2793bdd1243dSDimitry Andric MI.getOpcode() == AMDGPU::SI_IF || MI.getOpcode() == AMDGPU::SI_ELSE || 2794bdd1243dSDimitry Andric MI.getOpcode() == AMDGPU::SI_LOOP) 2795bdd1243dSDimitry Andric return true; 2796bdd1243dSDimitry Andric } 2797bdd1243dSDimitry Andric return false; 2798bdd1243dSDimitry Andric } 2799bdd1243dSDimitry Andric 2800349cc55cSDimitry Andric void SIInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB, 28010b57cec5SDimitry Andric MachineBasicBlock &DestBB, 2802349cc55cSDimitry Andric MachineBasicBlock &RestoreBB, 2803349cc55cSDimitry Andric const DebugLoc &DL, int64_t BrOffset, 28040b57cec5SDimitry Andric RegScavenger *RS) const { 28050b57cec5SDimitry Andric assert(RS && "RegScavenger required for long branching"); 28060b57cec5SDimitry Andric assert(MBB.empty() && 28070b57cec5SDimitry Andric "new block should be inserted for expanding unconditional branch"); 28080b57cec5SDimitry Andric assert(MBB.pred_size() == 1); 2809349cc55cSDimitry Andric assert(RestoreBB.empty() && 2810349cc55cSDimitry Andric "restore block should be inserted for restoring clobbered registers"); 28110b57cec5SDimitry Andric 28120b57cec5SDimitry Andric MachineFunction *MF = MBB.getParent(); 28130b57cec5SDimitry Andric MachineRegisterInfo &MRI = MF->getRegInfo(); 281406c3fb27SDimitry Andric const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); 28150b57cec5SDimitry Andric 28160b57cec5SDimitry Andric // FIXME: Virtual register workaround for RegScavenger not working with empty 28170b57cec5SDimitry Andric // blocks. 28188bcb0991SDimitry Andric Register PCReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); 28190b57cec5SDimitry Andric 28200b57cec5SDimitry Andric auto I = MBB.end(); 28210b57cec5SDimitry Andric 28220b57cec5SDimitry Andric // We need to compute the offset relative to the instruction immediately after 28230b57cec5SDimitry Andric // s_getpc_b64. Insert pc arithmetic code before last terminator. 28240b57cec5SDimitry Andric MachineInstr *GetPC = BuildMI(MBB, I, DL, get(AMDGPU::S_GETPC_B64), PCReg); 28250b57cec5SDimitry Andric 2826fe6060f1SDimitry Andric auto &MCCtx = MF->getContext(); 2827fe6060f1SDimitry Andric MCSymbol *PostGetPCLabel = 2828fe6060f1SDimitry Andric MCCtx.createTempSymbol("post_getpc", /*AlwaysAddSuffix=*/true); 2829fe6060f1SDimitry Andric GetPC->setPostInstrSymbol(*MF, PostGetPCLabel); 2830fe6060f1SDimitry Andric 2831fe6060f1SDimitry Andric MCSymbol *OffsetLo = 2832fe6060f1SDimitry Andric MCCtx.createTempSymbol("offset_lo", /*AlwaysAddSuffix=*/true); 2833fe6060f1SDimitry Andric MCSymbol *OffsetHi = 2834fe6060f1SDimitry Andric MCCtx.createTempSymbol("offset_hi", /*AlwaysAddSuffix=*/true); 28350b57cec5SDimitry Andric BuildMI(MBB, I, DL, get(AMDGPU::S_ADD_U32)) 28360b57cec5SDimitry Andric .addReg(PCReg, RegState::Define, AMDGPU::sub0) 28370b57cec5SDimitry Andric .addReg(PCReg, 0, AMDGPU::sub0) 2838fe6060f1SDimitry Andric .addSym(OffsetLo, MO_FAR_BRANCH_OFFSET); 28390b57cec5SDimitry Andric BuildMI(MBB, I, DL, get(AMDGPU::S_ADDC_U32)) 28400b57cec5SDimitry Andric .addReg(PCReg, RegState::Define, AMDGPU::sub1) 28410b57cec5SDimitry Andric .addReg(PCReg, 0, AMDGPU::sub1) 2842fe6060f1SDimitry Andric .addSym(OffsetHi, MO_FAR_BRANCH_OFFSET); 28430b57cec5SDimitry Andric 28440b57cec5SDimitry Andric // Insert the indirect branch after the other terminator. 28450b57cec5SDimitry Andric BuildMI(&MBB, DL, get(AMDGPU::S_SETPC_B64)) 28460b57cec5SDimitry Andric .addReg(PCReg); 28470b57cec5SDimitry Andric 28480b57cec5SDimitry Andric // If a spill is needed for the pc register pair, we need to insert a spill 28490b57cec5SDimitry Andric // restore block right before the destination block, and insert a short branch 28500b57cec5SDimitry Andric // into the old destination block's fallthrough predecessor. 28510b57cec5SDimitry Andric // e.g.: 28520b57cec5SDimitry Andric // 28530b57cec5SDimitry Andric // s_cbranch_scc0 skip_long_branch: 28540b57cec5SDimitry Andric // 28550b57cec5SDimitry Andric // long_branch_bb: 28560b57cec5SDimitry Andric // spill s[8:9] 28570b57cec5SDimitry Andric // s_getpc_b64 s[8:9] 28580b57cec5SDimitry Andric // s_add_u32 s8, s8, restore_bb 28590b57cec5SDimitry Andric // s_addc_u32 s9, s9, 0 28600b57cec5SDimitry Andric // s_setpc_b64 s[8:9] 28610b57cec5SDimitry Andric // 28620b57cec5SDimitry Andric // skip_long_branch: 28630b57cec5SDimitry Andric // foo; 28640b57cec5SDimitry Andric // 28650b57cec5SDimitry Andric // ..... 28660b57cec5SDimitry Andric // 28670b57cec5SDimitry Andric // dest_bb_fallthrough_predecessor: 28680b57cec5SDimitry Andric // bar; 28690b57cec5SDimitry Andric // s_branch dest_bb 28700b57cec5SDimitry Andric // 28710b57cec5SDimitry Andric // restore_bb: 28720b57cec5SDimitry Andric // restore s[8:9] 28730b57cec5SDimitry Andric // fallthrough dest_bb 28740b57cec5SDimitry Andric /// 28750b57cec5SDimitry Andric // dest_bb: 28760b57cec5SDimitry Andric // buzz; 28770b57cec5SDimitry Andric 287806c3fb27SDimitry Andric Register LongBranchReservedReg = MFI->getLongBranchReservedReg(); 287906c3fb27SDimitry Andric Register Scav; 288006c3fb27SDimitry Andric 288106c3fb27SDimitry Andric // If we've previously reserved a register for long branches 288206c3fb27SDimitry Andric // avoid running the scavenger and just use those registers 288306c3fb27SDimitry Andric if (LongBranchReservedReg) { 288406c3fb27SDimitry Andric RS->enterBasicBlock(MBB); 288506c3fb27SDimitry Andric Scav = LongBranchReservedReg; 288606c3fb27SDimitry Andric } else { 28870b57cec5SDimitry Andric RS->enterBasicBlockEnd(MBB); 288806c3fb27SDimitry Andric Scav = RS->scavengeRegisterBackwards( 2889349cc55cSDimitry Andric AMDGPU::SReg_64RegClass, MachineBasicBlock::iterator(GetPC), 2890349cc55cSDimitry Andric /* RestoreAfter */ false, 0, /* AllowSpill */ false); 289106c3fb27SDimitry Andric } 2892349cc55cSDimitry Andric if (Scav) { 2893349cc55cSDimitry Andric RS->setRegUsed(Scav); 28940b57cec5SDimitry Andric MRI.replaceRegWith(PCReg, Scav); 28950b57cec5SDimitry Andric MRI.clearVirtRegs(); 2896349cc55cSDimitry Andric } else { 2897349cc55cSDimitry Andric // As SGPR needs VGPR to be spilled, we reuse the slot of temporary VGPR for 2898349cc55cSDimitry Andric // SGPR spill. 2899349cc55cSDimitry Andric const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>(); 2900349cc55cSDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo(); 2901349cc55cSDimitry Andric TRI->spillEmergencySGPR(GetPC, RestoreBB, AMDGPU::SGPR0_SGPR1, RS); 2902349cc55cSDimitry Andric MRI.replaceRegWith(PCReg, AMDGPU::SGPR0_SGPR1); 2903349cc55cSDimitry Andric MRI.clearVirtRegs(); 2904349cc55cSDimitry Andric } 29050b57cec5SDimitry Andric 2906349cc55cSDimitry Andric MCSymbol *DestLabel = Scav ? DestBB.getSymbol() : RestoreBB.getSymbol(); 2907fe6060f1SDimitry Andric // Now, the distance could be defined. 2908fe6060f1SDimitry Andric auto *Offset = MCBinaryExpr::createSub( 2909349cc55cSDimitry Andric MCSymbolRefExpr::create(DestLabel, MCCtx), 2910fe6060f1SDimitry Andric MCSymbolRefExpr::create(PostGetPCLabel, MCCtx), MCCtx); 2911fe6060f1SDimitry Andric // Add offset assignments. 2912fe6060f1SDimitry Andric auto *Mask = MCConstantExpr::create(0xFFFFFFFFULL, MCCtx); 2913fe6060f1SDimitry Andric OffsetLo->setVariableValue(MCBinaryExpr::createAnd(Offset, Mask, MCCtx)); 2914fe6060f1SDimitry Andric auto *ShAmt = MCConstantExpr::create(32, MCCtx); 2915fe6060f1SDimitry Andric OffsetHi->setVariableValue(MCBinaryExpr::createAShr(Offset, ShAmt, MCCtx)); 29160b57cec5SDimitry Andric } 29170b57cec5SDimitry Andric 29180b57cec5SDimitry Andric unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate Cond) { 29190b57cec5SDimitry Andric switch (Cond) { 29200b57cec5SDimitry Andric case SIInstrInfo::SCC_TRUE: 29210b57cec5SDimitry Andric return AMDGPU::S_CBRANCH_SCC1; 29220b57cec5SDimitry Andric case SIInstrInfo::SCC_FALSE: 29230b57cec5SDimitry Andric return AMDGPU::S_CBRANCH_SCC0; 29240b57cec5SDimitry Andric case SIInstrInfo::VCCNZ: 29250b57cec5SDimitry Andric return AMDGPU::S_CBRANCH_VCCNZ; 29260b57cec5SDimitry Andric case SIInstrInfo::VCCZ: 29270b57cec5SDimitry Andric return AMDGPU::S_CBRANCH_VCCZ; 29280b57cec5SDimitry Andric case SIInstrInfo::EXECNZ: 29290b57cec5SDimitry Andric return AMDGPU::S_CBRANCH_EXECNZ; 29300b57cec5SDimitry Andric case SIInstrInfo::EXECZ: 29310b57cec5SDimitry Andric return AMDGPU::S_CBRANCH_EXECZ; 29320b57cec5SDimitry Andric default: 29330b57cec5SDimitry Andric llvm_unreachable("invalid branch predicate"); 29340b57cec5SDimitry Andric } 29350b57cec5SDimitry Andric } 29360b57cec5SDimitry Andric 29370b57cec5SDimitry Andric SIInstrInfo::BranchPredicate SIInstrInfo::getBranchPredicate(unsigned Opcode) { 29380b57cec5SDimitry Andric switch (Opcode) { 29390b57cec5SDimitry Andric case AMDGPU::S_CBRANCH_SCC0: 29400b57cec5SDimitry Andric return SCC_FALSE; 29410b57cec5SDimitry Andric case AMDGPU::S_CBRANCH_SCC1: 29420b57cec5SDimitry Andric return SCC_TRUE; 29430b57cec5SDimitry Andric case AMDGPU::S_CBRANCH_VCCNZ: 29440b57cec5SDimitry Andric return VCCNZ; 29450b57cec5SDimitry Andric case AMDGPU::S_CBRANCH_VCCZ: 29460b57cec5SDimitry Andric return VCCZ; 29470b57cec5SDimitry Andric case AMDGPU::S_CBRANCH_EXECNZ: 29480b57cec5SDimitry Andric return EXECNZ; 29490b57cec5SDimitry Andric case AMDGPU::S_CBRANCH_EXECZ: 29500b57cec5SDimitry Andric return EXECZ; 29510b57cec5SDimitry Andric default: 29520b57cec5SDimitry Andric return INVALID_BR; 29530b57cec5SDimitry Andric } 29540b57cec5SDimitry Andric } 29550b57cec5SDimitry Andric 29560b57cec5SDimitry Andric bool SIInstrInfo::analyzeBranchImpl(MachineBasicBlock &MBB, 29570b57cec5SDimitry Andric MachineBasicBlock::iterator I, 29580b57cec5SDimitry Andric MachineBasicBlock *&TBB, 29590b57cec5SDimitry Andric MachineBasicBlock *&FBB, 29600b57cec5SDimitry Andric SmallVectorImpl<MachineOperand> &Cond, 29610b57cec5SDimitry Andric bool AllowModify) const { 29620b57cec5SDimitry Andric if (I->getOpcode() == AMDGPU::S_BRANCH) { 29630b57cec5SDimitry Andric // Unconditional Branch 29640b57cec5SDimitry Andric TBB = I->getOperand(0).getMBB(); 29650b57cec5SDimitry Andric return false; 29660b57cec5SDimitry Andric } 29670b57cec5SDimitry Andric 29680b57cec5SDimitry Andric MachineBasicBlock *CondBB = nullptr; 29690b57cec5SDimitry Andric 29700b57cec5SDimitry Andric if (I->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) { 29710b57cec5SDimitry Andric CondBB = I->getOperand(1).getMBB(); 29720b57cec5SDimitry Andric Cond.push_back(I->getOperand(0)); 29730b57cec5SDimitry Andric } else { 29740b57cec5SDimitry Andric BranchPredicate Pred = getBranchPredicate(I->getOpcode()); 29750b57cec5SDimitry Andric if (Pred == INVALID_BR) 29760b57cec5SDimitry Andric return true; 29770b57cec5SDimitry Andric 29780b57cec5SDimitry Andric CondBB = I->getOperand(0).getMBB(); 29790b57cec5SDimitry Andric Cond.push_back(MachineOperand::CreateImm(Pred)); 29800b57cec5SDimitry Andric Cond.push_back(I->getOperand(1)); // Save the branch register. 29810b57cec5SDimitry Andric } 29820b57cec5SDimitry Andric ++I; 29830b57cec5SDimitry Andric 29840b57cec5SDimitry Andric if (I == MBB.end()) { 29850b57cec5SDimitry Andric // Conditional branch followed by fall-through. 29860b57cec5SDimitry Andric TBB = CondBB; 29870b57cec5SDimitry Andric return false; 29880b57cec5SDimitry Andric } 29890b57cec5SDimitry Andric 29900b57cec5SDimitry Andric if (I->getOpcode() == AMDGPU::S_BRANCH) { 29910b57cec5SDimitry Andric TBB = CondBB; 29920b57cec5SDimitry Andric FBB = I->getOperand(0).getMBB(); 29930b57cec5SDimitry Andric return false; 29940b57cec5SDimitry Andric } 29950b57cec5SDimitry Andric 29960b57cec5SDimitry Andric return true; 29970b57cec5SDimitry Andric } 29980b57cec5SDimitry Andric 29990b57cec5SDimitry Andric bool SIInstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, 30000b57cec5SDimitry Andric MachineBasicBlock *&FBB, 30010b57cec5SDimitry Andric SmallVectorImpl<MachineOperand> &Cond, 30020b57cec5SDimitry Andric bool AllowModify) const { 30030b57cec5SDimitry Andric MachineBasicBlock::iterator I = MBB.getFirstTerminator(); 30040b57cec5SDimitry Andric auto E = MBB.end(); 30050b57cec5SDimitry Andric if (I == E) 30060b57cec5SDimitry Andric return false; 30070b57cec5SDimitry Andric 30080b57cec5SDimitry Andric // Skip over the instructions that are artificially terminators for special 30090b57cec5SDimitry Andric // exec management. 3010fe6060f1SDimitry Andric while (I != E && !I->isBranch() && !I->isReturn()) { 30110b57cec5SDimitry Andric switch (I->getOpcode()) { 30120b57cec5SDimitry Andric case AMDGPU::S_MOV_B64_term: 30130b57cec5SDimitry Andric case AMDGPU::S_XOR_B64_term: 3014e8d8bef9SDimitry Andric case AMDGPU::S_OR_B64_term: 30150b57cec5SDimitry Andric case AMDGPU::S_ANDN2_B64_term: 3016fe6060f1SDimitry Andric case AMDGPU::S_AND_B64_term: 301706c3fb27SDimitry Andric case AMDGPU::S_AND_SAVEEXEC_B64_term: 30180b57cec5SDimitry Andric case AMDGPU::S_MOV_B32_term: 30190b57cec5SDimitry Andric case AMDGPU::S_XOR_B32_term: 30200b57cec5SDimitry Andric case AMDGPU::S_OR_B32_term: 30210b57cec5SDimitry Andric case AMDGPU::S_ANDN2_B32_term: 3022fe6060f1SDimitry Andric case AMDGPU::S_AND_B32_term: 302306c3fb27SDimitry Andric case AMDGPU::S_AND_SAVEEXEC_B32_term: 30240b57cec5SDimitry Andric break; 30250b57cec5SDimitry Andric case AMDGPU::SI_IF: 30260b57cec5SDimitry Andric case AMDGPU::SI_ELSE: 30270b57cec5SDimitry Andric case AMDGPU::SI_KILL_I1_TERMINATOR: 30280b57cec5SDimitry Andric case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR: 30290b57cec5SDimitry Andric // FIXME: It's messy that these need to be considered here at all. 30300b57cec5SDimitry Andric return true; 30310b57cec5SDimitry Andric default: 30320b57cec5SDimitry Andric llvm_unreachable("unexpected non-branch terminator inst"); 30330b57cec5SDimitry Andric } 30340b57cec5SDimitry Andric 30350b57cec5SDimitry Andric ++I; 30360b57cec5SDimitry Andric } 30370b57cec5SDimitry Andric 30380b57cec5SDimitry Andric if (I == E) 30390b57cec5SDimitry Andric return false; 30400b57cec5SDimitry Andric 30410b57cec5SDimitry Andric return analyzeBranchImpl(MBB, I, TBB, FBB, Cond, AllowModify); 30420b57cec5SDimitry Andric } 30430b57cec5SDimitry Andric 30440b57cec5SDimitry Andric unsigned SIInstrInfo::removeBranch(MachineBasicBlock &MBB, 30450b57cec5SDimitry Andric int *BytesRemoved) const { 30460b57cec5SDimitry Andric unsigned Count = 0; 30470b57cec5SDimitry Andric unsigned RemovedSize = 0; 3048349cc55cSDimitry Andric for (MachineInstr &MI : llvm::make_early_inc_range(MBB.terminators())) { 3049349cc55cSDimitry Andric // Skip over artificial terminators when removing instructions. 3050349cc55cSDimitry Andric if (MI.isBranch() || MI.isReturn()) { 3051349cc55cSDimitry Andric RemovedSize += getInstSizeInBytes(MI); 3052349cc55cSDimitry Andric MI.eraseFromParent(); 30530b57cec5SDimitry Andric ++Count; 3054349cc55cSDimitry Andric } 30550b57cec5SDimitry Andric } 30560b57cec5SDimitry Andric 30570b57cec5SDimitry Andric if (BytesRemoved) 30580b57cec5SDimitry Andric *BytesRemoved = RemovedSize; 30590b57cec5SDimitry Andric 30600b57cec5SDimitry Andric return Count; 30610b57cec5SDimitry Andric } 30620b57cec5SDimitry Andric 30630b57cec5SDimitry Andric // Copy the flags onto the implicit condition register operand. 30640b57cec5SDimitry Andric static void preserveCondRegFlags(MachineOperand &CondReg, 30650b57cec5SDimitry Andric const MachineOperand &OrigCond) { 30660b57cec5SDimitry Andric CondReg.setIsUndef(OrigCond.isUndef()); 30670b57cec5SDimitry Andric CondReg.setIsKill(OrigCond.isKill()); 30680b57cec5SDimitry Andric } 30690b57cec5SDimitry Andric 30700b57cec5SDimitry Andric unsigned SIInstrInfo::insertBranch(MachineBasicBlock &MBB, 30710b57cec5SDimitry Andric MachineBasicBlock *TBB, 30720b57cec5SDimitry Andric MachineBasicBlock *FBB, 30730b57cec5SDimitry Andric ArrayRef<MachineOperand> Cond, 30740b57cec5SDimitry Andric const DebugLoc &DL, 30750b57cec5SDimitry Andric int *BytesAdded) const { 30760b57cec5SDimitry Andric if (!FBB && Cond.empty()) { 30770b57cec5SDimitry Andric BuildMI(&MBB, DL, get(AMDGPU::S_BRANCH)) 30780b57cec5SDimitry Andric .addMBB(TBB); 30790b57cec5SDimitry Andric if (BytesAdded) 3080e8d8bef9SDimitry Andric *BytesAdded = ST.hasOffset3fBug() ? 8 : 4; 30810b57cec5SDimitry Andric return 1; 30820b57cec5SDimitry Andric } 30830b57cec5SDimitry Andric 30840b57cec5SDimitry Andric if(Cond.size() == 1 && Cond[0].isReg()) { 30850b57cec5SDimitry Andric BuildMI(&MBB, DL, get(AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO)) 30860b57cec5SDimitry Andric .add(Cond[0]) 30870b57cec5SDimitry Andric .addMBB(TBB); 30880b57cec5SDimitry Andric return 1; 30890b57cec5SDimitry Andric } 30900b57cec5SDimitry Andric 30910b57cec5SDimitry Andric assert(TBB && Cond[0].isImm()); 30920b57cec5SDimitry Andric 30930b57cec5SDimitry Andric unsigned Opcode 30940b57cec5SDimitry Andric = getBranchOpcode(static_cast<BranchPredicate>(Cond[0].getImm())); 30950b57cec5SDimitry Andric 30960b57cec5SDimitry Andric if (!FBB) { 30970b57cec5SDimitry Andric MachineInstr *CondBr = 30980b57cec5SDimitry Andric BuildMI(&MBB, DL, get(Opcode)) 30990b57cec5SDimitry Andric .addMBB(TBB); 31000b57cec5SDimitry Andric 31010b57cec5SDimitry Andric // Copy the flags onto the implicit condition register operand. 31020b57cec5SDimitry Andric preserveCondRegFlags(CondBr->getOperand(1), Cond[1]); 31035ffd83dbSDimitry Andric fixImplicitOperands(*CondBr); 31040b57cec5SDimitry Andric 31050b57cec5SDimitry Andric if (BytesAdded) 3106e8d8bef9SDimitry Andric *BytesAdded = ST.hasOffset3fBug() ? 8 : 4; 31070b57cec5SDimitry Andric return 1; 31080b57cec5SDimitry Andric } 31090b57cec5SDimitry Andric 31100b57cec5SDimitry Andric assert(TBB && FBB); 31110b57cec5SDimitry Andric 31120b57cec5SDimitry Andric MachineInstr *CondBr = 31130b57cec5SDimitry Andric BuildMI(&MBB, DL, get(Opcode)) 31140b57cec5SDimitry Andric .addMBB(TBB); 3115fe6060f1SDimitry Andric fixImplicitOperands(*CondBr); 31160b57cec5SDimitry Andric BuildMI(&MBB, DL, get(AMDGPU::S_BRANCH)) 31170b57cec5SDimitry Andric .addMBB(FBB); 31180b57cec5SDimitry Andric 31190b57cec5SDimitry Andric MachineOperand &CondReg = CondBr->getOperand(1); 31200b57cec5SDimitry Andric CondReg.setIsUndef(Cond[1].isUndef()); 31210b57cec5SDimitry Andric CondReg.setIsKill(Cond[1].isKill()); 31220b57cec5SDimitry Andric 31230b57cec5SDimitry Andric if (BytesAdded) 3124e8d8bef9SDimitry Andric *BytesAdded = ST.hasOffset3fBug() ? 16 : 8; 31250b57cec5SDimitry Andric 31260b57cec5SDimitry Andric return 2; 31270b57cec5SDimitry Andric } 31280b57cec5SDimitry Andric 31290b57cec5SDimitry Andric bool SIInstrInfo::reverseBranchCondition( 31300b57cec5SDimitry Andric SmallVectorImpl<MachineOperand> &Cond) const { 31310b57cec5SDimitry Andric if (Cond.size() != 2) { 31320b57cec5SDimitry Andric return true; 31330b57cec5SDimitry Andric } 31340b57cec5SDimitry Andric 31350b57cec5SDimitry Andric if (Cond[0].isImm()) { 31360b57cec5SDimitry Andric Cond[0].setImm(-Cond[0].getImm()); 31370b57cec5SDimitry Andric return false; 31380b57cec5SDimitry Andric } 31390b57cec5SDimitry Andric 31400b57cec5SDimitry Andric return true; 31410b57cec5SDimitry Andric } 31420b57cec5SDimitry Andric 31430b57cec5SDimitry Andric bool SIInstrInfo::canInsertSelect(const MachineBasicBlock &MBB, 31440b57cec5SDimitry Andric ArrayRef<MachineOperand> Cond, 31455ffd83dbSDimitry Andric Register DstReg, Register TrueReg, 31465ffd83dbSDimitry Andric Register FalseReg, int &CondCycles, 31470b57cec5SDimitry Andric int &TrueCycles, int &FalseCycles) const { 31480b57cec5SDimitry Andric switch (Cond[0].getImm()) { 31490b57cec5SDimitry Andric case VCCNZ: 31500b57cec5SDimitry Andric case VCCZ: { 31510b57cec5SDimitry Andric const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 31520b57cec5SDimitry Andric const TargetRegisterClass *RC = MRI.getRegClass(TrueReg); 3153e8d8bef9SDimitry Andric if (MRI.getRegClass(FalseReg) != RC) 3154e8d8bef9SDimitry Andric return false; 31550b57cec5SDimitry Andric 315606c3fb27SDimitry Andric int NumInsts = AMDGPU::getRegBitWidth(*RC) / 32; 31570b57cec5SDimitry Andric CondCycles = TrueCycles = FalseCycles = NumInsts; // ??? 31580b57cec5SDimitry Andric 31590b57cec5SDimitry Andric // Limit to equal cost for branch vs. N v_cndmask_b32s. 31600b57cec5SDimitry Andric return RI.hasVGPRs(RC) && NumInsts <= 6; 31610b57cec5SDimitry Andric } 31620b57cec5SDimitry Andric case SCC_TRUE: 31630b57cec5SDimitry Andric case SCC_FALSE: { 31640b57cec5SDimitry Andric // FIXME: We could insert for VGPRs if we could replace the original compare 31650b57cec5SDimitry Andric // with a vector one. 31660b57cec5SDimitry Andric const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 31670b57cec5SDimitry Andric const TargetRegisterClass *RC = MRI.getRegClass(TrueReg); 3168e8d8bef9SDimitry Andric if (MRI.getRegClass(FalseReg) != RC) 3169e8d8bef9SDimitry Andric return false; 31700b57cec5SDimitry Andric 317106c3fb27SDimitry Andric int NumInsts = AMDGPU::getRegBitWidth(*RC) / 32; 31720b57cec5SDimitry Andric 31730b57cec5SDimitry Andric // Multiples of 8 can do s_cselect_b64 31740b57cec5SDimitry Andric if (NumInsts % 2 == 0) 31750b57cec5SDimitry Andric NumInsts /= 2; 31760b57cec5SDimitry Andric 31770b57cec5SDimitry Andric CondCycles = TrueCycles = FalseCycles = NumInsts; // ??? 31780b57cec5SDimitry Andric return RI.isSGPRClass(RC); 31790b57cec5SDimitry Andric } 31800b57cec5SDimitry Andric default: 31810b57cec5SDimitry Andric return false; 31820b57cec5SDimitry Andric } 31830b57cec5SDimitry Andric } 31840b57cec5SDimitry Andric 31850b57cec5SDimitry Andric void SIInstrInfo::insertSelect(MachineBasicBlock &MBB, 31860b57cec5SDimitry Andric MachineBasicBlock::iterator I, const DebugLoc &DL, 31875ffd83dbSDimitry Andric Register DstReg, ArrayRef<MachineOperand> Cond, 31885ffd83dbSDimitry Andric Register TrueReg, Register FalseReg) const { 31890b57cec5SDimitry Andric BranchPredicate Pred = static_cast<BranchPredicate>(Cond[0].getImm()); 31900b57cec5SDimitry Andric if (Pred == VCCZ || Pred == SCC_FALSE) { 31910b57cec5SDimitry Andric Pred = static_cast<BranchPredicate>(-Pred); 31920b57cec5SDimitry Andric std::swap(TrueReg, FalseReg); 31930b57cec5SDimitry Andric } 31940b57cec5SDimitry Andric 31950b57cec5SDimitry Andric MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 31960b57cec5SDimitry Andric const TargetRegisterClass *DstRC = MRI.getRegClass(DstReg); 31970b57cec5SDimitry Andric unsigned DstSize = RI.getRegSizeInBits(*DstRC); 31980b57cec5SDimitry Andric 31990b57cec5SDimitry Andric if (DstSize == 32) { 32005ffd83dbSDimitry Andric MachineInstr *Select; 32015ffd83dbSDimitry Andric if (Pred == SCC_TRUE) { 32025ffd83dbSDimitry Andric Select = BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B32), DstReg) 32035ffd83dbSDimitry Andric .addReg(TrueReg) 32045ffd83dbSDimitry Andric .addReg(FalseReg); 32055ffd83dbSDimitry Andric } else { 32060b57cec5SDimitry Andric // Instruction's operands are backwards from what is expected. 32075ffd83dbSDimitry Andric Select = BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e32), DstReg) 32080b57cec5SDimitry Andric .addReg(FalseReg) 32090b57cec5SDimitry Andric .addReg(TrueReg); 32105ffd83dbSDimitry Andric } 32110b57cec5SDimitry Andric 32120b57cec5SDimitry Andric preserveCondRegFlags(Select->getOperand(3), Cond[1]); 32130b57cec5SDimitry Andric return; 32140b57cec5SDimitry Andric } 32150b57cec5SDimitry Andric 32160b57cec5SDimitry Andric if (DstSize == 64 && Pred == SCC_TRUE) { 32170b57cec5SDimitry Andric MachineInstr *Select = 32180b57cec5SDimitry Andric BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), DstReg) 32195ffd83dbSDimitry Andric .addReg(TrueReg) 32205ffd83dbSDimitry Andric .addReg(FalseReg); 32210b57cec5SDimitry Andric 32220b57cec5SDimitry Andric preserveCondRegFlags(Select->getOperand(3), Cond[1]); 32230b57cec5SDimitry Andric return; 32240b57cec5SDimitry Andric } 32250b57cec5SDimitry Andric 32260b57cec5SDimitry Andric static const int16_t Sub0_15[] = { 32270b57cec5SDimitry Andric AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, 32280b57cec5SDimitry Andric AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, 32290b57cec5SDimitry Andric AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11, 32300b57cec5SDimitry Andric AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15, 32310b57cec5SDimitry Andric }; 32320b57cec5SDimitry Andric 32330b57cec5SDimitry Andric static const int16_t Sub0_15_64[] = { 32340b57cec5SDimitry Andric AMDGPU::sub0_sub1, AMDGPU::sub2_sub3, 32350b57cec5SDimitry Andric AMDGPU::sub4_sub5, AMDGPU::sub6_sub7, 32360b57cec5SDimitry Andric AMDGPU::sub8_sub9, AMDGPU::sub10_sub11, 32370b57cec5SDimitry Andric AMDGPU::sub12_sub13, AMDGPU::sub14_sub15, 32380b57cec5SDimitry Andric }; 32390b57cec5SDimitry Andric 32400b57cec5SDimitry Andric unsigned SelOp = AMDGPU::V_CNDMASK_B32_e32; 32410b57cec5SDimitry Andric const TargetRegisterClass *EltRC = &AMDGPU::VGPR_32RegClass; 32420b57cec5SDimitry Andric const int16_t *SubIndices = Sub0_15; 32430b57cec5SDimitry Andric int NElts = DstSize / 32; 32440b57cec5SDimitry Andric 32450b57cec5SDimitry Andric // 64-bit select is only available for SALU. 32460b57cec5SDimitry Andric // TODO: Split 96-bit into 64-bit and 32-bit, not 3x 32-bit. 32470b57cec5SDimitry Andric if (Pred == SCC_TRUE) { 32480b57cec5SDimitry Andric if (NElts % 2) { 32490b57cec5SDimitry Andric SelOp = AMDGPU::S_CSELECT_B32; 32500b57cec5SDimitry Andric EltRC = &AMDGPU::SGPR_32RegClass; 32510b57cec5SDimitry Andric } else { 32520b57cec5SDimitry Andric SelOp = AMDGPU::S_CSELECT_B64; 32530b57cec5SDimitry Andric EltRC = &AMDGPU::SGPR_64RegClass; 32540b57cec5SDimitry Andric SubIndices = Sub0_15_64; 32550b57cec5SDimitry Andric NElts /= 2; 32560b57cec5SDimitry Andric } 32570b57cec5SDimitry Andric } 32580b57cec5SDimitry Andric 32590b57cec5SDimitry Andric MachineInstrBuilder MIB = BuildMI( 32600b57cec5SDimitry Andric MBB, I, DL, get(AMDGPU::REG_SEQUENCE), DstReg); 32610b57cec5SDimitry Andric 32620b57cec5SDimitry Andric I = MIB->getIterator(); 32630b57cec5SDimitry Andric 32645ffd83dbSDimitry Andric SmallVector<Register, 8> Regs; 32650b57cec5SDimitry Andric for (int Idx = 0; Idx != NElts; ++Idx) { 32668bcb0991SDimitry Andric Register DstElt = MRI.createVirtualRegister(EltRC); 32670b57cec5SDimitry Andric Regs.push_back(DstElt); 32680b57cec5SDimitry Andric 32690b57cec5SDimitry Andric unsigned SubIdx = SubIndices[Idx]; 32700b57cec5SDimitry Andric 32715ffd83dbSDimitry Andric MachineInstr *Select; 32725ffd83dbSDimitry Andric if (SelOp == AMDGPU::V_CNDMASK_B32_e32) { 32735ffd83dbSDimitry Andric Select = 32740b57cec5SDimitry Andric BuildMI(MBB, I, DL, get(SelOp), DstElt) 32750b57cec5SDimitry Andric .addReg(FalseReg, 0, SubIdx) 32760b57cec5SDimitry Andric .addReg(TrueReg, 0, SubIdx); 32775ffd83dbSDimitry Andric } else { 32785ffd83dbSDimitry Andric Select = 32795ffd83dbSDimitry Andric BuildMI(MBB, I, DL, get(SelOp), DstElt) 32805ffd83dbSDimitry Andric .addReg(TrueReg, 0, SubIdx) 32815ffd83dbSDimitry Andric .addReg(FalseReg, 0, SubIdx); 32825ffd83dbSDimitry Andric } 32835ffd83dbSDimitry Andric 32840b57cec5SDimitry Andric preserveCondRegFlags(Select->getOperand(3), Cond[1]); 32850b57cec5SDimitry Andric fixImplicitOperands(*Select); 32860b57cec5SDimitry Andric 32870b57cec5SDimitry Andric MIB.addReg(DstElt) 32880b57cec5SDimitry Andric .addImm(SubIdx); 32890b57cec5SDimitry Andric } 32900b57cec5SDimitry Andric } 32910b57cec5SDimitry Andric 3292349cc55cSDimitry Andric bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) { 32930b57cec5SDimitry Andric switch (MI.getOpcode()) { 32940b57cec5SDimitry Andric case AMDGPU::V_MOV_B32_e32: 32950b57cec5SDimitry Andric case AMDGPU::V_MOV_B32_e64: 3296349cc55cSDimitry Andric case AMDGPU::V_MOV_B64_PSEUDO: 329781ad6265SDimitry Andric case AMDGPU::V_MOV_B64_e32: 329881ad6265SDimitry Andric case AMDGPU::V_MOV_B64_e64: 32990b57cec5SDimitry Andric case AMDGPU::S_MOV_B32: 33000b57cec5SDimitry Andric case AMDGPU::S_MOV_B64: 33015f757f3fSDimitry Andric case AMDGPU::S_MOV_B64_IMM_PSEUDO: 33020b57cec5SDimitry Andric case AMDGPU::COPY: 33035f757f3fSDimitry Andric case AMDGPU::WWM_COPY: 3304e8d8bef9SDimitry Andric case AMDGPU::V_ACCVGPR_WRITE_B32_e64: 3305e8d8bef9SDimitry Andric case AMDGPU::V_ACCVGPR_READ_B32_e64: 3306fe6060f1SDimitry Andric case AMDGPU::V_ACCVGPR_MOV_B32: 33070b57cec5SDimitry Andric return true; 33080b57cec5SDimitry Andric default: 33090b57cec5SDimitry Andric return false; 33100b57cec5SDimitry Andric } 33110b57cec5SDimitry Andric } 33120b57cec5SDimitry Andric 331381ad6265SDimitry Andric static constexpr unsigned ModifierOpNames[] = { 331481ad6265SDimitry Andric AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers, 331581ad6265SDimitry Andric AMDGPU::OpName::src2_modifiers, AMDGPU::OpName::clamp, 3316bdd1243dSDimitry Andric AMDGPU::OpName::omod, AMDGPU::OpName::op_sel}; 33170b57cec5SDimitry Andric 331881ad6265SDimitry Andric void SIInstrInfo::removeModOperands(MachineInstr &MI) const { 33190b57cec5SDimitry Andric unsigned Opc = MI.getOpcode(); 3320bdd1243dSDimitry Andric for (unsigned Name : reverse(ModifierOpNames)) { 3321bdd1243dSDimitry Andric int Idx = AMDGPU::getNamedOperandIdx(Opc, Name); 3322bdd1243dSDimitry Andric if (Idx >= 0) 3323bdd1243dSDimitry Andric MI.removeOperand(Idx); 3324bdd1243dSDimitry Andric } 33250b57cec5SDimitry Andric } 33260b57cec5SDimitry Andric 33270b57cec5SDimitry Andric bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, 33285ffd83dbSDimitry Andric Register Reg, MachineRegisterInfo *MRI) const { 33290b57cec5SDimitry Andric if (!MRI->hasOneNonDBGUse(Reg)) 33300b57cec5SDimitry Andric return false; 33310b57cec5SDimitry Andric 33320b57cec5SDimitry Andric switch (DefMI.getOpcode()) { 33330b57cec5SDimitry Andric default: 33340b57cec5SDimitry Andric return false; 33355f757f3fSDimitry Andric case AMDGPU::V_MOV_B64_e32: 33360b57cec5SDimitry Andric case AMDGPU::S_MOV_B64: 33375f757f3fSDimitry Andric case AMDGPU::V_MOV_B64_PSEUDO: 33385f757f3fSDimitry Andric case AMDGPU::S_MOV_B64_IMM_PSEUDO: 33390b57cec5SDimitry Andric case AMDGPU::V_MOV_B32_e32: 33400b57cec5SDimitry Andric case AMDGPU::S_MOV_B32: 3341e8d8bef9SDimitry Andric case AMDGPU::V_ACCVGPR_WRITE_B32_e64: 33420b57cec5SDimitry Andric break; 33430b57cec5SDimitry Andric } 33440b57cec5SDimitry Andric 33450b57cec5SDimitry Andric const MachineOperand *ImmOp = getNamedOperand(DefMI, AMDGPU::OpName::src0); 33460b57cec5SDimitry Andric assert(ImmOp); 33470b57cec5SDimitry Andric // FIXME: We could handle FrameIndex values here. 33480b57cec5SDimitry Andric if (!ImmOp->isImm()) 33490b57cec5SDimitry Andric return false; 33500b57cec5SDimitry Andric 33515f757f3fSDimitry Andric auto getImmFor = [ImmOp](const MachineOperand &UseOp) -> int64_t { 33525f757f3fSDimitry Andric int64_t Imm = ImmOp->getImm(); 33535f757f3fSDimitry Andric switch (UseOp.getSubReg()) { 33545f757f3fSDimitry Andric default: 33555f757f3fSDimitry Andric return Imm; 33565f757f3fSDimitry Andric case AMDGPU::sub0: 33575f757f3fSDimitry Andric return Lo_32(Imm); 33585f757f3fSDimitry Andric case AMDGPU::sub1: 33595f757f3fSDimitry Andric return Hi_32(Imm); 33605f757f3fSDimitry Andric case AMDGPU::lo16: 33615f757f3fSDimitry Andric return APInt(16, Imm).getSExtValue(); 33625f757f3fSDimitry Andric case AMDGPU::hi16: 33635f757f3fSDimitry Andric return APInt(32, Imm).ashr(16).getSExtValue(); 33645f757f3fSDimitry Andric case AMDGPU::sub1_lo16: 33655f757f3fSDimitry Andric return APInt(16, Hi_32(Imm)).getSExtValue(); 33665f757f3fSDimitry Andric case AMDGPU::sub1_hi16: 33675f757f3fSDimitry Andric return APInt(32, Hi_32(Imm)).ashr(16).getSExtValue(); 33685f757f3fSDimitry Andric } 33695f757f3fSDimitry Andric }; 33705f757f3fSDimitry Andric 33715f757f3fSDimitry Andric assert(!DefMI.getOperand(0).getSubReg() && "Expected SSA form"); 33725f757f3fSDimitry Andric 33730b57cec5SDimitry Andric unsigned Opc = UseMI.getOpcode(); 33740b57cec5SDimitry Andric if (Opc == AMDGPU::COPY) { 33755f757f3fSDimitry Andric assert(!UseMI.getOperand(0).getSubReg() && "Expected SSA form"); 33765ffd83dbSDimitry Andric 33775f757f3fSDimitry Andric Register DstReg = UseMI.getOperand(0).getReg(); 33785f757f3fSDimitry Andric unsigned OpSize = getOpSize(UseMI, 0); 33795f757f3fSDimitry Andric bool Is16Bit = OpSize == 2; 33805f757f3fSDimitry Andric bool Is64Bit = OpSize == 8; 33815f757f3fSDimitry Andric bool isVGPRCopy = RI.isVGPR(*MRI, DstReg); 33825f757f3fSDimitry Andric unsigned NewOpc = isVGPRCopy ? Is64Bit ? AMDGPU::V_MOV_B64_PSEUDO 33835f757f3fSDimitry Andric : AMDGPU::V_MOV_B32_e32 33845f757f3fSDimitry Andric : Is64Bit ? AMDGPU::S_MOV_B64_IMM_PSEUDO 33855f757f3fSDimitry Andric : AMDGPU::S_MOV_B32; 33865f757f3fSDimitry Andric APInt Imm(Is64Bit ? 64 : 32, getImmFor(UseMI.getOperand(1))); 33875ffd83dbSDimitry Andric 33885ffd83dbSDimitry Andric if (RI.isAGPR(*MRI, DstReg)) { 33895f757f3fSDimitry Andric if (Is64Bit || !isInlineConstant(Imm)) 33900b57cec5SDimitry Andric return false; 3391e8d8bef9SDimitry Andric NewOpc = AMDGPU::V_ACCVGPR_WRITE_B32_e64; 33920b57cec5SDimitry Andric } 33935ffd83dbSDimitry Andric 33945ffd83dbSDimitry Andric if (Is16Bit) { 33955ffd83dbSDimitry Andric if (isVGPRCopy) 33965ffd83dbSDimitry Andric return false; // Do not clobber vgpr_hi16 33975ffd83dbSDimitry Andric 33984824e7fdSDimitry Andric if (DstReg.isVirtual() && UseMI.getOperand(0).getSubReg() != AMDGPU::lo16) 33995ffd83dbSDimitry Andric return false; 34005ffd83dbSDimitry Andric 34015ffd83dbSDimitry Andric UseMI.getOperand(0).setSubReg(0); 34025ffd83dbSDimitry Andric if (DstReg.isPhysical()) { 34035ffd83dbSDimitry Andric DstReg = RI.get32BitRegister(DstReg); 34045ffd83dbSDimitry Andric UseMI.getOperand(0).setReg(DstReg); 34055ffd83dbSDimitry Andric } 34065ffd83dbSDimitry Andric assert(UseMI.getOperand(1).getReg().isVirtual()); 34075ffd83dbSDimitry Andric } 34085ffd83dbSDimitry Andric 340906c3fb27SDimitry Andric const MCInstrDesc &NewMCID = get(NewOpc); 341006c3fb27SDimitry Andric if (DstReg.isPhysical() && 341106c3fb27SDimitry Andric !RI.getRegClass(NewMCID.operands()[0].RegClass)->contains(DstReg)) 341206c3fb27SDimitry Andric return false; 341306c3fb27SDimitry Andric 341406c3fb27SDimitry Andric UseMI.setDesc(NewMCID); 34155ffd83dbSDimitry Andric UseMI.getOperand(1).ChangeToImmediate(Imm.getSExtValue()); 34160b57cec5SDimitry Andric UseMI.addImplicitDefUseOperands(*UseMI.getParent()->getParent()); 34170b57cec5SDimitry Andric return true; 34180b57cec5SDimitry Andric } 34190b57cec5SDimitry Andric 3420e8d8bef9SDimitry Andric if (Opc == AMDGPU::V_MAD_F32_e64 || Opc == AMDGPU::V_MAC_F32_e64 || 3421e8d8bef9SDimitry Andric Opc == AMDGPU::V_MAD_F16_e64 || Opc == AMDGPU::V_MAC_F16_e64 || 3422e8d8bef9SDimitry Andric Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64 || 3423bdd1243dSDimitry Andric Opc == AMDGPU::V_FMA_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e64 || 3424bdd1243dSDimitry Andric Opc == AMDGPU::V_FMAC_F16_t16_e64) { 34250b57cec5SDimitry Andric // Don't fold if we are using source or output modifiers. The new VOP2 34260b57cec5SDimitry Andric // instructions don't have them. 34270b57cec5SDimitry Andric if (hasAnyModifiersSet(UseMI)) 34280b57cec5SDimitry Andric return false; 34290b57cec5SDimitry Andric 34300b57cec5SDimitry Andric // If this is a free constant, there's no reason to do this. 34310b57cec5SDimitry Andric // TODO: We could fold this here instead of letting SIFoldOperands do it 34320b57cec5SDimitry Andric // later. 34330b57cec5SDimitry Andric MachineOperand *Src0 = getNamedOperand(UseMI, AMDGPU::OpName::src0); 34340b57cec5SDimitry Andric 34350b57cec5SDimitry Andric // Any src operand can be used for the legality check. 34360b57cec5SDimitry Andric if (isInlineConstant(UseMI, *Src0, *ImmOp)) 34370b57cec5SDimitry Andric return false; 34380b57cec5SDimitry Andric 3439e8d8bef9SDimitry Andric bool IsF32 = Opc == AMDGPU::V_MAD_F32_e64 || Opc == AMDGPU::V_MAC_F32_e64 || 3440e8d8bef9SDimitry Andric Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64; 3441bdd1243dSDimitry Andric bool IsFMA = 3442bdd1243dSDimitry Andric Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64 || 3443bdd1243dSDimitry Andric Opc == AMDGPU::V_FMA_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e64 || 3444bdd1243dSDimitry Andric Opc == AMDGPU::V_FMAC_F16_t16_e64; 34450b57cec5SDimitry Andric MachineOperand *Src1 = getNamedOperand(UseMI, AMDGPU::OpName::src1); 34460b57cec5SDimitry Andric MachineOperand *Src2 = getNamedOperand(UseMI, AMDGPU::OpName::src2); 34470b57cec5SDimitry Andric 34480b57cec5SDimitry Andric // Multiplied part is the constant: Use v_madmk_{f16, f32}. 34495f757f3fSDimitry Andric if ((Src0->isReg() && Src0->getReg() == Reg) || 34505f757f3fSDimitry Andric (Src1->isReg() && Src1->getReg() == Reg)) { 34515f757f3fSDimitry Andric MachineOperand *RegSrc = 34525f757f3fSDimitry Andric Src1->isReg() && Src1->getReg() == Reg ? Src0 : Src1; 34535f757f3fSDimitry Andric if (!RegSrc->isReg()) 34545f757f3fSDimitry Andric return false; 34555f757f3fSDimitry Andric if (RI.isSGPRClass(MRI->getRegClass(RegSrc->getReg())) && 34565f757f3fSDimitry Andric ST.getConstantBusLimit(Opc) < 2) 34570b57cec5SDimitry Andric return false; 34580b57cec5SDimitry Andric 34590b57cec5SDimitry Andric if (!Src2->isReg() || RI.isSGPRClass(MRI->getRegClass(Src2->getReg()))) 34600b57cec5SDimitry Andric return false; 34610b57cec5SDimitry Andric 34625f757f3fSDimitry Andric // If src2 is also a literal constant then we have to choose which one to 34635f757f3fSDimitry Andric // fold. In general it is better to choose madak so that the other literal 34645f757f3fSDimitry Andric // can be materialized in an sgpr instead of a vgpr: 34655f757f3fSDimitry Andric // s_mov_b32 s0, literal 34665f757f3fSDimitry Andric // v_madak_f32 v0, s0, v0, literal 34675f757f3fSDimitry Andric // Instead of: 34685f757f3fSDimitry Andric // v_mov_b32 v1, literal 34695f757f3fSDimitry Andric // v_madmk_f32 v0, v0, literal, v1 34705f757f3fSDimitry Andric MachineInstr *Def = MRI->getUniqueVRegDef(Src2->getReg()); 34715f757f3fSDimitry Andric if (Def && Def->isMoveImmediate() && 34725f757f3fSDimitry Andric !isInlineConstant(Def->getOperand(1))) 34735f757f3fSDimitry Andric return false; 34745f757f3fSDimitry Andric 34750b57cec5SDimitry Andric unsigned NewOpc = 3476bdd1243dSDimitry Andric IsFMA ? (IsF32 ? AMDGPU::V_FMAMK_F32 3477bdd1243dSDimitry Andric : ST.hasTrue16BitInsts() ? AMDGPU::V_FMAMK_F16_t16 3478bdd1243dSDimitry Andric : AMDGPU::V_FMAMK_F16) 34790b57cec5SDimitry Andric : (IsF32 ? AMDGPU::V_MADMK_F32 : AMDGPU::V_MADMK_F16); 34800b57cec5SDimitry Andric if (pseudoToMCOpcode(NewOpc) == -1) 34810b57cec5SDimitry Andric return false; 34820b57cec5SDimitry Andric 34835f757f3fSDimitry Andric // V_FMAMK_F16_t16 takes VGPR_32_Lo128 operands, so the rewrite 34845f757f3fSDimitry Andric // would also require restricting their register classes. For now 34855f757f3fSDimitry Andric // just bail out. 34865f757f3fSDimitry Andric if (NewOpc == AMDGPU::V_FMAMK_F16_t16) 34875f757f3fSDimitry Andric return false; 34880b57cec5SDimitry Andric 34895f757f3fSDimitry Andric const int64_t Imm = getImmFor(RegSrc == Src1 ? *Src0 : *Src1); 34900b57cec5SDimitry Andric 34910b57cec5SDimitry Andric // FIXME: This would be a lot easier if we could return a new instruction 34920b57cec5SDimitry Andric // instead of having to modify in place. 34930b57cec5SDimitry Andric 34945f757f3fSDimitry Andric Register SrcReg = RegSrc->getReg(); 34955f757f3fSDimitry Andric unsigned SrcSubReg = RegSrc->getSubReg(); 34965f757f3fSDimitry Andric Src0->setReg(SrcReg); 34975f757f3fSDimitry Andric Src0->setSubReg(SrcSubReg); 34985f757f3fSDimitry Andric Src0->setIsKill(RegSrc->isKill()); 34990b57cec5SDimitry Andric 3500bdd1243dSDimitry Andric if (Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 || 3501bdd1243dSDimitry Andric Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_t16_e64 || 35020b57cec5SDimitry Andric Opc == AMDGPU::V_FMAC_F16_e64) 35030b57cec5SDimitry Andric UseMI.untieRegOperand( 35040b57cec5SDimitry Andric AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)); 35050b57cec5SDimitry Andric 35060b57cec5SDimitry Andric Src1->ChangeToImmediate(Imm); 35070b57cec5SDimitry Andric 35080b57cec5SDimitry Andric removeModOperands(UseMI); 35090b57cec5SDimitry Andric UseMI.setDesc(get(NewOpc)); 35100b57cec5SDimitry Andric 351181ad6265SDimitry Andric bool DeleteDef = MRI->use_nodbg_empty(Reg); 35120b57cec5SDimitry Andric if (DeleteDef) 35130b57cec5SDimitry Andric DefMI.eraseFromParent(); 35140b57cec5SDimitry Andric 35150b57cec5SDimitry Andric return true; 35160b57cec5SDimitry Andric } 35170b57cec5SDimitry Andric 35180b57cec5SDimitry Andric // Added part is the constant: Use v_madak_{f16, f32}. 35190b57cec5SDimitry Andric if (Src2->isReg() && Src2->getReg() == Reg) { 35205f757f3fSDimitry Andric if (ST.getConstantBusLimit(Opc) < 2) { 35210b57cec5SDimitry Andric // Not allowed to use constant bus for another operand. 35220b57cec5SDimitry Andric // We can however allow an inline immediate as src0. 35230b57cec5SDimitry Andric bool Src0Inlined = false; 35240b57cec5SDimitry Andric if (Src0->isReg()) { 35250b57cec5SDimitry Andric // Try to inline constant if possible. 35260b57cec5SDimitry Andric // If the Def moves immediate and the use is single 35270b57cec5SDimitry Andric // We are saving VGPR here. 35280b57cec5SDimitry Andric MachineInstr *Def = MRI->getUniqueVRegDef(Src0->getReg()); 35290b57cec5SDimitry Andric if (Def && Def->isMoveImmediate() && 35300b57cec5SDimitry Andric isInlineConstant(Def->getOperand(1)) && 35310b57cec5SDimitry Andric MRI->hasOneUse(Src0->getReg())) { 35320b57cec5SDimitry Andric Src0->ChangeToImmediate(Def->getOperand(1).getImm()); 35330b57cec5SDimitry Andric Src0Inlined = true; 35345f757f3fSDimitry Andric } else if (ST.getConstantBusLimit(Opc) <= 1 && 35355f757f3fSDimitry Andric RI.isSGPRReg(*MRI, Src0->getReg())) { 35360b57cec5SDimitry Andric return false; 35375f757f3fSDimitry Andric } 35380b57cec5SDimitry Andric // VGPR is okay as Src0 - fallthrough 35390b57cec5SDimitry Andric } 35400b57cec5SDimitry Andric 35410b57cec5SDimitry Andric if (Src1->isReg() && !Src0Inlined) { 35420b57cec5SDimitry Andric // We have one slot for inlinable constant so far - try to fill it 35430b57cec5SDimitry Andric MachineInstr *Def = MRI->getUniqueVRegDef(Src1->getReg()); 35440b57cec5SDimitry Andric if (Def && Def->isMoveImmediate() && 35450b57cec5SDimitry Andric isInlineConstant(Def->getOperand(1)) && 35465f757f3fSDimitry Andric MRI->hasOneUse(Src1->getReg()) && commuteInstruction(UseMI)) 35470b57cec5SDimitry Andric Src0->ChangeToImmediate(Def->getOperand(1).getImm()); 35485f757f3fSDimitry Andric else if (RI.isSGPRReg(*MRI, Src1->getReg())) 35490b57cec5SDimitry Andric return false; 35500b57cec5SDimitry Andric // VGPR is okay as Src1 - fallthrough 35510b57cec5SDimitry Andric } 35525f757f3fSDimitry Andric } 35530b57cec5SDimitry Andric 35540b57cec5SDimitry Andric unsigned NewOpc = 3555bdd1243dSDimitry Andric IsFMA ? (IsF32 ? AMDGPU::V_FMAAK_F32 3556bdd1243dSDimitry Andric : ST.hasTrue16BitInsts() ? AMDGPU::V_FMAAK_F16_t16 3557bdd1243dSDimitry Andric : AMDGPU::V_FMAAK_F16) 35580b57cec5SDimitry Andric : (IsF32 ? AMDGPU::V_MADAK_F32 : AMDGPU::V_MADAK_F16); 35590b57cec5SDimitry Andric if (pseudoToMCOpcode(NewOpc) == -1) 35600b57cec5SDimitry Andric return false; 35610b57cec5SDimitry Andric 35625f757f3fSDimitry Andric // V_FMAAK_F16_t16 takes VGPR_32_Lo128 operands, so the rewrite 35635f757f3fSDimitry Andric // would also require restricting their register classes. For now 35645f757f3fSDimitry Andric // just bail out. 35655f757f3fSDimitry Andric if (NewOpc == AMDGPU::V_FMAAK_F16_t16) 35665f757f3fSDimitry Andric return false; 35670b57cec5SDimitry Andric 35680b57cec5SDimitry Andric // FIXME: This would be a lot easier if we could return a new instruction 35690b57cec5SDimitry Andric // instead of having to modify in place. 35700b57cec5SDimitry Andric 3571bdd1243dSDimitry Andric if (Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 || 3572bdd1243dSDimitry Andric Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_t16_e64 || 35730b57cec5SDimitry Andric Opc == AMDGPU::V_FMAC_F16_e64) 35740b57cec5SDimitry Andric UseMI.untieRegOperand( 35750b57cec5SDimitry Andric AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)); 35760b57cec5SDimitry Andric 35770b57cec5SDimitry Andric // ChangingToImmediate adds Src2 back to the instruction. 35785f757f3fSDimitry Andric Src2->ChangeToImmediate(getImmFor(*Src2)); 35790b57cec5SDimitry Andric 35800b57cec5SDimitry Andric // These come before src2. 35810b57cec5SDimitry Andric removeModOperands(UseMI); 35820b57cec5SDimitry Andric UseMI.setDesc(get(NewOpc)); 35830b57cec5SDimitry Andric // It might happen that UseMI was commuted 35840b57cec5SDimitry Andric // and we now have SGPR as SRC1. If so 2 inlined 35850b57cec5SDimitry Andric // constant and SGPR are illegal. 35860b57cec5SDimitry Andric legalizeOperands(UseMI); 35870b57cec5SDimitry Andric 358881ad6265SDimitry Andric bool DeleteDef = MRI->use_nodbg_empty(Reg); 35890b57cec5SDimitry Andric if (DeleteDef) 35900b57cec5SDimitry Andric DefMI.eraseFromParent(); 35910b57cec5SDimitry Andric 35920b57cec5SDimitry Andric return true; 35930b57cec5SDimitry Andric } 35940b57cec5SDimitry Andric } 35950b57cec5SDimitry Andric 35960b57cec5SDimitry Andric return false; 35970b57cec5SDimitry Andric } 35980b57cec5SDimitry Andric 35995ffd83dbSDimitry Andric static bool 36005ffd83dbSDimitry Andric memOpsHaveSameBaseOperands(ArrayRef<const MachineOperand *> BaseOps1, 36015ffd83dbSDimitry Andric ArrayRef<const MachineOperand *> BaseOps2) { 36025ffd83dbSDimitry Andric if (BaseOps1.size() != BaseOps2.size()) 36035ffd83dbSDimitry Andric return false; 36045ffd83dbSDimitry Andric for (size_t I = 0, E = BaseOps1.size(); I < E; ++I) { 36055ffd83dbSDimitry Andric if (!BaseOps1[I]->isIdenticalTo(*BaseOps2[I])) 36065ffd83dbSDimitry Andric return false; 36075ffd83dbSDimitry Andric } 36085ffd83dbSDimitry Andric return true; 36095ffd83dbSDimitry Andric } 36105ffd83dbSDimitry Andric 36110b57cec5SDimitry Andric static bool offsetsDoNotOverlap(int WidthA, int OffsetA, 36120b57cec5SDimitry Andric int WidthB, int OffsetB) { 36130b57cec5SDimitry Andric int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB; 36140b57cec5SDimitry Andric int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA; 36150b57cec5SDimitry Andric int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB; 36160b57cec5SDimitry Andric return LowOffset + LowWidth <= HighOffset; 36170b57cec5SDimitry Andric } 36180b57cec5SDimitry Andric 36190b57cec5SDimitry Andric bool SIInstrInfo::checkInstOffsetsDoNotOverlap(const MachineInstr &MIa, 36200b57cec5SDimitry Andric const MachineInstr &MIb) const { 36215ffd83dbSDimitry Andric SmallVector<const MachineOperand *, 4> BaseOps0, BaseOps1; 36220b57cec5SDimitry Andric int64_t Offset0, Offset1; 36235ffd83dbSDimitry Andric unsigned Dummy0, Dummy1; 36245ffd83dbSDimitry Andric bool Offset0IsScalable, Offset1IsScalable; 36255ffd83dbSDimitry Andric if (!getMemOperandsWithOffsetWidth(MIa, BaseOps0, Offset0, Offset0IsScalable, 36265ffd83dbSDimitry Andric Dummy0, &RI) || 36275ffd83dbSDimitry Andric !getMemOperandsWithOffsetWidth(MIb, BaseOps1, Offset1, Offset1IsScalable, 36285ffd83dbSDimitry Andric Dummy1, &RI)) 36295ffd83dbSDimitry Andric return false; 36300b57cec5SDimitry Andric 36315ffd83dbSDimitry Andric if (!memOpsHaveSameBaseOperands(BaseOps0, BaseOps1)) 36320b57cec5SDimitry Andric return false; 36330b57cec5SDimitry Andric 36340b57cec5SDimitry Andric if (!MIa.hasOneMemOperand() || !MIb.hasOneMemOperand()) { 36350b57cec5SDimitry Andric // FIXME: Handle ds_read2 / ds_write2. 36360b57cec5SDimitry Andric return false; 36370b57cec5SDimitry Andric } 36385ffd83dbSDimitry Andric unsigned Width0 = MIa.memoperands().front()->getSize(); 36395ffd83dbSDimitry Andric unsigned Width1 = MIb.memoperands().front()->getSize(); 36405ffd83dbSDimitry Andric return offsetsDoNotOverlap(Width0, Offset0, Width1, Offset1); 36410b57cec5SDimitry Andric } 36420b57cec5SDimitry Andric 36430b57cec5SDimitry Andric bool SIInstrInfo::areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, 36448bcb0991SDimitry Andric const MachineInstr &MIb) const { 3645480093f4SDimitry Andric assert(MIa.mayLoadOrStore() && 36460b57cec5SDimitry Andric "MIa must load from or modify a memory location"); 3647480093f4SDimitry Andric assert(MIb.mayLoadOrStore() && 36480b57cec5SDimitry Andric "MIb must load from or modify a memory location"); 36490b57cec5SDimitry Andric 36500b57cec5SDimitry Andric if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects()) 36510b57cec5SDimitry Andric return false; 36520b57cec5SDimitry Andric 36530b57cec5SDimitry Andric // XXX - Can we relax this between address spaces? 36540b57cec5SDimitry Andric if (MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef()) 36550b57cec5SDimitry Andric return false; 36560b57cec5SDimitry Andric 3657cb14a3feSDimitry Andric if (isLDSDMA(MIa) || isLDSDMA(MIb)) 3658cb14a3feSDimitry Andric return false; 3659cb14a3feSDimitry Andric 36600b57cec5SDimitry Andric // TODO: Should we check the address space from the MachineMemOperand? That 36610b57cec5SDimitry Andric // would allow us to distinguish objects we know don't alias based on the 36620b57cec5SDimitry Andric // underlying address space, even if it was lowered to a different one, 36630b57cec5SDimitry Andric // e.g. private accesses lowered to use MUBUF instructions on a scratch 36640b57cec5SDimitry Andric // buffer. 36650b57cec5SDimitry Andric if (isDS(MIa)) { 36660b57cec5SDimitry Andric if (isDS(MIb)) 36670b57cec5SDimitry Andric return checkInstOffsetsDoNotOverlap(MIa, MIb); 36680b57cec5SDimitry Andric 36690b57cec5SDimitry Andric return !isFLAT(MIb) || isSegmentSpecificFLAT(MIb); 36700b57cec5SDimitry Andric } 36710b57cec5SDimitry Andric 36720b57cec5SDimitry Andric if (isMUBUF(MIa) || isMTBUF(MIa)) { 36730b57cec5SDimitry Andric if (isMUBUF(MIb) || isMTBUF(MIb)) 36740b57cec5SDimitry Andric return checkInstOffsetsDoNotOverlap(MIa, MIb); 36750b57cec5SDimitry Andric 36765f757f3fSDimitry Andric if (isFLAT(MIb)) 36775f757f3fSDimitry Andric return isFLATScratch(MIb); 36785f757f3fSDimitry Andric 36795f757f3fSDimitry Andric return !isSMRD(MIb); 36800b57cec5SDimitry Andric } 36810b57cec5SDimitry Andric 36820b57cec5SDimitry Andric if (isSMRD(MIa)) { 36830b57cec5SDimitry Andric if (isSMRD(MIb)) 36840b57cec5SDimitry Andric return checkInstOffsetsDoNotOverlap(MIa, MIb); 36850b57cec5SDimitry Andric 36865f757f3fSDimitry Andric if (isFLAT(MIb)) 36875f757f3fSDimitry Andric return isFLATScratch(MIb); 36885f757f3fSDimitry Andric 36895f757f3fSDimitry Andric return !isMUBUF(MIb) && !isMTBUF(MIb); 36900b57cec5SDimitry Andric } 36910b57cec5SDimitry Andric 36920b57cec5SDimitry Andric if (isFLAT(MIa)) { 36935f757f3fSDimitry Andric if (isFLAT(MIb)) { 36945f757f3fSDimitry Andric if ((isFLATScratch(MIa) && isFLATGlobal(MIb)) || 36955f757f3fSDimitry Andric (isFLATGlobal(MIa) && isFLATScratch(MIb))) 36965f757f3fSDimitry Andric return true; 36975f757f3fSDimitry Andric 36980b57cec5SDimitry Andric return checkInstOffsetsDoNotOverlap(MIa, MIb); 36995f757f3fSDimitry Andric } 37000b57cec5SDimitry Andric 37010b57cec5SDimitry Andric return false; 37020b57cec5SDimitry Andric } 37030b57cec5SDimitry Andric 37040b57cec5SDimitry Andric return false; 37050b57cec5SDimitry Andric } 37060b57cec5SDimitry Andric 3707349cc55cSDimitry Andric static bool getFoldableImm(Register Reg, const MachineRegisterInfo &MRI, 37080eae32dcSDimitry Andric int64_t &Imm, MachineInstr **DefMI = nullptr) { 3709349cc55cSDimitry Andric if (Reg.isPhysical()) 3710349cc55cSDimitry Andric return false; 3711349cc55cSDimitry Andric auto *Def = MRI.getUniqueVRegDef(Reg); 3712349cc55cSDimitry Andric if (Def && SIInstrInfo::isFoldableCopy(*Def) && Def->getOperand(1).isImm()) { 3713349cc55cSDimitry Andric Imm = Def->getOperand(1).getImm(); 37140eae32dcSDimitry Andric if (DefMI) 37150eae32dcSDimitry Andric *DefMI = Def; 3716349cc55cSDimitry Andric return true; 3717349cc55cSDimitry Andric } 3718349cc55cSDimitry Andric return false; 3719349cc55cSDimitry Andric } 3720349cc55cSDimitry Andric 37210eae32dcSDimitry Andric static bool getFoldableImm(const MachineOperand *MO, int64_t &Imm, 37220eae32dcSDimitry Andric MachineInstr **DefMI = nullptr) { 37230b57cec5SDimitry Andric if (!MO->isReg()) 37240b57cec5SDimitry Andric return false; 37250b57cec5SDimitry Andric const MachineFunction *MF = MO->getParent()->getParent()->getParent(); 37260b57cec5SDimitry Andric const MachineRegisterInfo &MRI = MF->getRegInfo(); 37270eae32dcSDimitry Andric return getFoldableImm(MO->getReg(), MRI, Imm, DefMI); 37280b57cec5SDimitry Andric } 37290b57cec5SDimitry Andric 3730e8d8bef9SDimitry Andric static void updateLiveVariables(LiveVariables *LV, MachineInstr &MI, 3731e8d8bef9SDimitry Andric MachineInstr &NewMI) { 3732e8d8bef9SDimitry Andric if (LV) { 3733e8d8bef9SDimitry Andric unsigned NumOps = MI.getNumOperands(); 3734e8d8bef9SDimitry Andric for (unsigned I = 1; I < NumOps; ++I) { 3735e8d8bef9SDimitry Andric MachineOperand &Op = MI.getOperand(I); 3736e8d8bef9SDimitry Andric if (Op.isReg() && Op.isKill()) 3737e8d8bef9SDimitry Andric LV->replaceKillInstruction(Op.getReg(), MI, NewMI); 3738e8d8bef9SDimitry Andric } 3739e8d8bef9SDimitry Andric } 3740e8d8bef9SDimitry Andric } 3741e8d8bef9SDimitry Andric 3742349cc55cSDimitry Andric MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI, 3743349cc55cSDimitry Andric LiveVariables *LV, 3744349cc55cSDimitry Andric LiveIntervals *LIS) const { 374504eeddc0SDimitry Andric MachineBasicBlock &MBB = *MI.getParent(); 374681ad6265SDimitry Andric unsigned Opc = MI.getOpcode(); 374704eeddc0SDimitry Andric 374881ad6265SDimitry Andric // Handle MFMA. 374981ad6265SDimitry Andric int NewMFMAOpc = AMDGPU::getMFMAEarlyClobberOp(Opc); 375004eeddc0SDimitry Andric if (NewMFMAOpc != -1) { 375181ad6265SDimitry Andric MachineInstrBuilder MIB = 375281ad6265SDimitry Andric BuildMI(MBB, MI, MI.getDebugLoc(), get(NewMFMAOpc)); 375304eeddc0SDimitry Andric for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) 375404eeddc0SDimitry Andric MIB.add(MI.getOperand(I)); 375504eeddc0SDimitry Andric updateLiveVariables(LV, MI, *MIB); 375604eeddc0SDimitry Andric if (LIS) 375704eeddc0SDimitry Andric LIS->ReplaceMachineInstrInMaps(MI, *MIB); 375804eeddc0SDimitry Andric return MIB; 375904eeddc0SDimitry Andric } 376004eeddc0SDimitry Andric 376181ad6265SDimitry Andric if (SIInstrInfo::isWMMA(MI)) { 376281ad6265SDimitry Andric unsigned NewOpc = AMDGPU::mapWMMA2AddrTo3AddrOpcode(MI.getOpcode()); 376381ad6265SDimitry Andric MachineInstrBuilder MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc)) 376481ad6265SDimitry Andric .setMIFlags(MI.getFlags()); 376581ad6265SDimitry Andric for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) 376681ad6265SDimitry Andric MIB->addOperand(MI.getOperand(I)); 376781ad6265SDimitry Andric 376881ad6265SDimitry Andric updateLiveVariables(LV, MI, *MIB); 376981ad6265SDimitry Andric if (LIS) 377081ad6265SDimitry Andric LIS->ReplaceMachineInstrInMaps(MI, *MIB); 377181ad6265SDimitry Andric 377281ad6265SDimitry Andric return MIB; 377381ad6265SDimitry Andric } 377481ad6265SDimitry Andric 3775bdd1243dSDimitry Andric assert(Opc != AMDGPU::V_FMAC_F16_t16_e32 && 3776bdd1243dSDimitry Andric "V_FMAC_F16_t16_e32 is not supported and not expected to be present " 3777bdd1243dSDimitry Andric "pre-RA"); 3778bdd1243dSDimitry Andric 377981ad6265SDimitry Andric // Handle MAC/FMAC. 378081ad6265SDimitry Andric bool IsF16 = Opc == AMDGPU::V_MAC_F16_e32 || Opc == AMDGPU::V_MAC_F16_e64 || 3781bdd1243dSDimitry Andric Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64 || 3782bdd1243dSDimitry Andric Opc == AMDGPU::V_FMAC_F16_t16_e64; 378381ad6265SDimitry Andric bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e32 || Opc == AMDGPU::V_FMAC_F32_e64 || 378481ad6265SDimitry Andric Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 || 378581ad6265SDimitry Andric Opc == AMDGPU::V_FMAC_LEGACY_F32_e64 || 378681ad6265SDimitry Andric Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64 || 3787bdd1243dSDimitry Andric Opc == AMDGPU::V_FMAC_F16_t16_e64 || 378881ad6265SDimitry Andric Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64; 378981ad6265SDimitry Andric bool IsF64 = Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64; 379081ad6265SDimitry Andric bool IsLegacy = Opc == AMDGPU::V_MAC_LEGACY_F32_e32 || 379181ad6265SDimitry Andric Opc == AMDGPU::V_MAC_LEGACY_F32_e64 || 379281ad6265SDimitry Andric Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 || 379381ad6265SDimitry Andric Opc == AMDGPU::V_FMAC_LEGACY_F32_e64; 379481ad6265SDimitry Andric bool Src0Literal = false; 379581ad6265SDimitry Andric 379681ad6265SDimitry Andric switch (Opc) { 379781ad6265SDimitry Andric default: 379881ad6265SDimitry Andric return nullptr; 379981ad6265SDimitry Andric case AMDGPU::V_MAC_F16_e64: 380081ad6265SDimitry Andric case AMDGPU::V_FMAC_F16_e64: 3801bdd1243dSDimitry Andric case AMDGPU::V_FMAC_F16_t16_e64: 380281ad6265SDimitry Andric case AMDGPU::V_MAC_F32_e64: 380381ad6265SDimitry Andric case AMDGPU::V_MAC_LEGACY_F32_e64: 380481ad6265SDimitry Andric case AMDGPU::V_FMAC_F32_e64: 380581ad6265SDimitry Andric case AMDGPU::V_FMAC_LEGACY_F32_e64: 380681ad6265SDimitry Andric case AMDGPU::V_FMAC_F64_e64: 380781ad6265SDimitry Andric break; 380881ad6265SDimitry Andric case AMDGPU::V_MAC_F16_e32: 380981ad6265SDimitry Andric case AMDGPU::V_FMAC_F16_e32: 381081ad6265SDimitry Andric case AMDGPU::V_MAC_F32_e32: 381181ad6265SDimitry Andric case AMDGPU::V_MAC_LEGACY_F32_e32: 381281ad6265SDimitry Andric case AMDGPU::V_FMAC_F32_e32: 381381ad6265SDimitry Andric case AMDGPU::V_FMAC_LEGACY_F32_e32: 381481ad6265SDimitry Andric case AMDGPU::V_FMAC_F64_e32: { 381581ad6265SDimitry Andric int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), 381681ad6265SDimitry Andric AMDGPU::OpName::src0); 381781ad6265SDimitry Andric const MachineOperand *Src0 = &MI.getOperand(Src0Idx); 381881ad6265SDimitry Andric if (!Src0->isReg() && !Src0->isImm()) 381981ad6265SDimitry Andric return nullptr; 382081ad6265SDimitry Andric 382181ad6265SDimitry Andric if (Src0->isImm() && !isInlineConstant(MI, Src0Idx, *Src0)) 382281ad6265SDimitry Andric Src0Literal = true; 382381ad6265SDimitry Andric 382481ad6265SDimitry Andric break; 382581ad6265SDimitry Andric } 382681ad6265SDimitry Andric } 382781ad6265SDimitry Andric 382881ad6265SDimitry Andric MachineInstrBuilder MIB; 38290b57cec5SDimitry Andric const MachineOperand *Dst = getNamedOperand(MI, AMDGPU::OpName::vdst); 38300b57cec5SDimitry Andric const MachineOperand *Src0 = getNamedOperand(MI, AMDGPU::OpName::src0); 38310b57cec5SDimitry Andric const MachineOperand *Src0Mods = 38320b57cec5SDimitry Andric getNamedOperand(MI, AMDGPU::OpName::src0_modifiers); 38330b57cec5SDimitry Andric const MachineOperand *Src1 = getNamedOperand(MI, AMDGPU::OpName::src1); 38340b57cec5SDimitry Andric const MachineOperand *Src1Mods = 38350b57cec5SDimitry Andric getNamedOperand(MI, AMDGPU::OpName::src1_modifiers); 38360b57cec5SDimitry Andric const MachineOperand *Src2 = getNamedOperand(MI, AMDGPU::OpName::src2); 383781ad6265SDimitry Andric const MachineOperand *Src2Mods = 383881ad6265SDimitry Andric getNamedOperand(MI, AMDGPU::OpName::src2_modifiers); 38390b57cec5SDimitry Andric const MachineOperand *Clamp = getNamedOperand(MI, AMDGPU::OpName::clamp); 38400b57cec5SDimitry Andric const MachineOperand *Omod = getNamedOperand(MI, AMDGPU::OpName::omod); 3841bdd1243dSDimitry Andric const MachineOperand *OpSel = getNamedOperand(MI, AMDGPU::OpName::op_sel); 38420b57cec5SDimitry Andric 384381ad6265SDimitry Andric if (!Src0Mods && !Src1Mods && !Src2Mods && !Clamp && !Omod && !IsF64 && 384481ad6265SDimitry Andric !IsLegacy && 38450b57cec5SDimitry Andric // If we have an SGPR input, we will violate the constant bus restriction. 3846e8d8bef9SDimitry Andric (ST.getConstantBusLimit(Opc) > 1 || !Src0->isReg() || 3847349cc55cSDimitry Andric !RI.isSGPRReg(MBB.getParent()->getRegInfo(), Src0->getReg()))) { 38480eae32dcSDimitry Andric MachineInstr *DefMI; 3849753f127fSDimitry Andric const auto killDef = [&]() -> void { 38500eae32dcSDimitry Andric const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 38510eae32dcSDimitry Andric // The only user is the instruction which will be killed. 3852753f127fSDimitry Andric Register DefReg = DefMI->getOperand(0).getReg(); 3853753f127fSDimitry Andric if (!MRI.hasOneNonDBGUse(DefReg)) 38540eae32dcSDimitry Andric return; 38550eae32dcSDimitry Andric // We cannot just remove the DefMI here, calling pass will crash. 38560eae32dcSDimitry Andric DefMI->setDesc(get(AMDGPU::IMPLICIT_DEF)); 38570eae32dcSDimitry Andric for (unsigned I = DefMI->getNumOperands() - 1; I != 0; --I) 385881ad6265SDimitry Andric DefMI->removeOperand(I); 3859753f127fSDimitry Andric if (LV) 3860753f127fSDimitry Andric LV->getVarInfo(DefReg).AliveBlocks.clear(); 38610eae32dcSDimitry Andric }; 38620eae32dcSDimitry Andric 3863349cc55cSDimitry Andric int64_t Imm; 386481ad6265SDimitry Andric if (!Src0Literal && getFoldableImm(Src2, Imm, &DefMI)) { 38650b57cec5SDimitry Andric unsigned NewOpc = 3866bdd1243dSDimitry Andric IsFMA ? (IsF16 ? (ST.hasTrue16BitInsts() ? AMDGPU::V_FMAAK_F16_t16 3867bdd1243dSDimitry Andric : AMDGPU::V_FMAAK_F16) 3868bdd1243dSDimitry Andric : AMDGPU::V_FMAAK_F32) 38690b57cec5SDimitry Andric : (IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32); 3870e8d8bef9SDimitry Andric if (pseudoToMCOpcode(NewOpc) != -1) { 3871349cc55cSDimitry Andric MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc)) 38720b57cec5SDimitry Andric .add(*Dst) 38730b57cec5SDimitry Andric .add(*Src0) 38740b57cec5SDimitry Andric .add(*Src1) 38750b57cec5SDimitry Andric .addImm(Imm); 3876e8d8bef9SDimitry Andric updateLiveVariables(LV, MI, *MIB); 3877349cc55cSDimitry Andric if (LIS) 3878349cc55cSDimitry Andric LIS->ReplaceMachineInstrInMaps(MI, *MIB); 38790eae32dcSDimitry Andric killDef(); 3880e8d8bef9SDimitry Andric return MIB; 38810b57cec5SDimitry Andric } 3882e8d8bef9SDimitry Andric } 3883bdd1243dSDimitry Andric unsigned NewOpc = 3884bdd1243dSDimitry Andric IsFMA ? (IsF16 ? (ST.hasTrue16BitInsts() ? AMDGPU::V_FMAMK_F16_t16 3885bdd1243dSDimitry Andric : AMDGPU::V_FMAMK_F16) 3886bdd1243dSDimitry Andric : AMDGPU::V_FMAMK_F32) 38870b57cec5SDimitry Andric : (IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32); 388881ad6265SDimitry Andric if (!Src0Literal && getFoldableImm(Src1, Imm, &DefMI)) { 3889e8d8bef9SDimitry Andric if (pseudoToMCOpcode(NewOpc) != -1) { 3890349cc55cSDimitry Andric MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc)) 38910b57cec5SDimitry Andric .add(*Dst) 38920b57cec5SDimitry Andric .add(*Src0) 38930b57cec5SDimitry Andric .addImm(Imm) 38940b57cec5SDimitry Andric .add(*Src2); 3895e8d8bef9SDimitry Andric updateLiveVariables(LV, MI, *MIB); 3896349cc55cSDimitry Andric if (LIS) 3897349cc55cSDimitry Andric LIS->ReplaceMachineInstrInMaps(MI, *MIB); 38980eae32dcSDimitry Andric killDef(); 3899e8d8bef9SDimitry Andric return MIB; 3900e8d8bef9SDimitry Andric } 39010b57cec5SDimitry Andric } 390281ad6265SDimitry Andric if (Src0Literal || getFoldableImm(Src0, Imm, &DefMI)) { 390381ad6265SDimitry Andric if (Src0Literal) { 390481ad6265SDimitry Andric Imm = Src0->getImm(); 390581ad6265SDimitry Andric DefMI = nullptr; 390681ad6265SDimitry Andric } 39070b57cec5SDimitry Andric if (pseudoToMCOpcode(NewOpc) != -1 && 3908e8d8bef9SDimitry Andric isOperandLegal( 3909e8d8bef9SDimitry Andric MI, AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::src0), 3910e8d8bef9SDimitry Andric Src1)) { 3911349cc55cSDimitry Andric MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc)) 39120b57cec5SDimitry Andric .add(*Dst) 39130b57cec5SDimitry Andric .add(*Src1) 39140b57cec5SDimitry Andric .addImm(Imm) 39150b57cec5SDimitry Andric .add(*Src2); 3916e8d8bef9SDimitry Andric updateLiveVariables(LV, MI, *MIB); 3917349cc55cSDimitry Andric if (LIS) 3918349cc55cSDimitry Andric LIS->ReplaceMachineInstrInMaps(MI, *MIB); 391981ad6265SDimitry Andric if (DefMI) 39200eae32dcSDimitry Andric killDef(); 3921e8d8bef9SDimitry Andric return MIB; 3922e8d8bef9SDimitry Andric } 39230b57cec5SDimitry Andric } 39240b57cec5SDimitry Andric } 39250b57cec5SDimitry Andric 392681ad6265SDimitry Andric // VOP2 mac/fmac with a literal operand cannot be converted to VOP3 mad/fma 3927bdd1243dSDimitry Andric // if VOP3 does not allow a literal operand. 3928bdd1243dSDimitry Andric if (Src0Literal && !ST.hasVOP3Literal()) 392981ad6265SDimitry Andric return nullptr; 393081ad6265SDimitry Andric 393181ad6265SDimitry Andric unsigned NewOpc = IsFMA ? IsF16 ? AMDGPU::V_FMA_F16_gfx9_e64 3932fe6060f1SDimitry Andric : IsF64 ? AMDGPU::V_FMA_F64_e64 393381ad6265SDimitry Andric : IsLegacy 393481ad6265SDimitry Andric ? AMDGPU::V_FMA_LEGACY_F32_e64 393581ad6265SDimitry Andric : AMDGPU::V_FMA_F32_e64 393681ad6265SDimitry Andric : IsF16 ? AMDGPU::V_MAD_F16_e64 393781ad6265SDimitry Andric : IsLegacy ? AMDGPU::V_MAD_LEGACY_F32_e64 393881ad6265SDimitry Andric : AMDGPU::V_MAD_F32_e64; 39390b57cec5SDimitry Andric if (pseudoToMCOpcode(NewOpc) == -1) 39400b57cec5SDimitry Andric return nullptr; 39410b57cec5SDimitry Andric 3942349cc55cSDimitry Andric MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc)) 39430b57cec5SDimitry Andric .add(*Dst) 39440b57cec5SDimitry Andric .addImm(Src0Mods ? Src0Mods->getImm() : 0) 39450b57cec5SDimitry Andric .add(*Src0) 39460b57cec5SDimitry Andric .addImm(Src1Mods ? Src1Mods->getImm() : 0) 39470b57cec5SDimitry Andric .add(*Src1) 394881ad6265SDimitry Andric .addImm(Src2Mods ? Src2Mods->getImm() : 0) 39490b57cec5SDimitry Andric .add(*Src2) 39500b57cec5SDimitry Andric .addImm(Clamp ? Clamp->getImm() : 0) 39510b57cec5SDimitry Andric .addImm(Omod ? Omod->getImm() : 0); 3952bdd1243dSDimitry Andric if (AMDGPU::hasNamedOperand(NewOpc, AMDGPU::OpName::op_sel)) 3953bdd1243dSDimitry Andric MIB.addImm(OpSel ? OpSel->getImm() : 0); 3954e8d8bef9SDimitry Andric updateLiveVariables(LV, MI, *MIB); 3955349cc55cSDimitry Andric if (LIS) 3956349cc55cSDimitry Andric LIS->ReplaceMachineInstrInMaps(MI, *MIB); 3957e8d8bef9SDimitry Andric return MIB; 39580b57cec5SDimitry Andric } 39590b57cec5SDimitry Andric 39600b57cec5SDimitry Andric // It's not generally safe to move VALU instructions across these since it will 39610b57cec5SDimitry Andric // start using the register as a base index rather than directly. 39620b57cec5SDimitry Andric // XXX - Why isn't hasSideEffects sufficient for these? 39630b57cec5SDimitry Andric static bool changesVGPRIndexingMode(const MachineInstr &MI) { 39640b57cec5SDimitry Andric switch (MI.getOpcode()) { 39650b57cec5SDimitry Andric case AMDGPU::S_SET_GPR_IDX_ON: 39660b57cec5SDimitry Andric case AMDGPU::S_SET_GPR_IDX_MODE: 39670b57cec5SDimitry Andric case AMDGPU::S_SET_GPR_IDX_OFF: 39680b57cec5SDimitry Andric return true; 39690b57cec5SDimitry Andric default: 39700b57cec5SDimitry Andric return false; 39710b57cec5SDimitry Andric } 39720b57cec5SDimitry Andric } 39730b57cec5SDimitry Andric 39740b57cec5SDimitry Andric bool SIInstrInfo::isSchedulingBoundary(const MachineInstr &MI, 39750b57cec5SDimitry Andric const MachineBasicBlock *MBB, 39760b57cec5SDimitry Andric const MachineFunction &MF) const { 39775ffd83dbSDimitry Andric // Skipping the check for SP writes in the base implementation. The reason it 39785ffd83dbSDimitry Andric // was added was apparently due to compile time concerns. 39795ffd83dbSDimitry Andric // 39805ffd83dbSDimitry Andric // TODO: Do we really want this barrier? It triggers unnecessary hazard nops 39815ffd83dbSDimitry Andric // but is probably avoidable. 39825ffd83dbSDimitry Andric 39835ffd83dbSDimitry Andric // Copied from base implementation. 39845ffd83dbSDimitry Andric // Terminators and labels can't be scheduled around. 39855ffd83dbSDimitry Andric if (MI.isTerminator() || MI.isPosition()) 39865ffd83dbSDimitry Andric return true; 39875ffd83dbSDimitry Andric 39885ffd83dbSDimitry Andric // INLINEASM_BR can jump to another block 39895ffd83dbSDimitry Andric if (MI.getOpcode() == TargetOpcode::INLINEASM_BR) 39905ffd83dbSDimitry Andric return true; 39910b57cec5SDimitry Andric 399281ad6265SDimitry Andric if (MI.getOpcode() == AMDGPU::SCHED_BARRIER && MI.getOperand(0).getImm() == 0) 399381ad6265SDimitry Andric return true; 399481ad6265SDimitry Andric 39950b57cec5SDimitry Andric // Target-independent instructions do not have an implicit-use of EXEC, even 39960b57cec5SDimitry Andric // when they operate on VGPRs. Treating EXEC modifications as scheduling 39970b57cec5SDimitry Andric // boundaries prevents incorrect movements of such instructions. 39985ffd83dbSDimitry Andric return MI.modifiesRegister(AMDGPU::EXEC, &RI) || 39990b57cec5SDimitry Andric MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 || 40000b57cec5SDimitry Andric MI.getOpcode() == AMDGPU::S_SETREG_B32 || 4001bdd1243dSDimitry Andric MI.getOpcode() == AMDGPU::S_SETPRIO || 40020b57cec5SDimitry Andric changesVGPRIndexingMode(MI); 40030b57cec5SDimitry Andric } 40040b57cec5SDimitry Andric 40050b57cec5SDimitry Andric bool SIInstrInfo::isAlwaysGDS(uint16_t Opcode) const { 40065f757f3fSDimitry Andric return Opcode == AMDGPU::DS_ORDERED_COUNT || isGWS(Opcode); 40070b57cec5SDimitry Andric } 40080b57cec5SDimitry Andric 40095ffd83dbSDimitry Andric bool SIInstrInfo::modifiesModeRegister(const MachineInstr &MI) { 40105ffd83dbSDimitry Andric // Skip the full operand and register alias search modifiesRegister 40115ffd83dbSDimitry Andric // does. There's only a handful of instructions that touch this, it's only an 40125ffd83dbSDimitry Andric // implicit def, and doesn't alias any other registers. 4013bdd1243dSDimitry Andric return is_contained(MI.getDesc().implicit_defs(), AMDGPU::MODE); 40145ffd83dbSDimitry Andric } 40155ffd83dbSDimitry Andric 40160b57cec5SDimitry Andric bool SIInstrInfo::hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const { 40170b57cec5SDimitry Andric unsigned Opcode = MI.getOpcode(); 40180b57cec5SDimitry Andric 40190b57cec5SDimitry Andric if (MI.mayStore() && isSMRD(MI)) 40200b57cec5SDimitry Andric return true; // scalar store or atomic 40210b57cec5SDimitry Andric 40220b57cec5SDimitry Andric // This will terminate the function when other lanes may need to continue. 40230b57cec5SDimitry Andric if (MI.isReturn()) 40240b57cec5SDimitry Andric return true; 40250b57cec5SDimitry Andric 40260b57cec5SDimitry Andric // These instructions cause shader I/O that may cause hardware lockups 40270b57cec5SDimitry Andric // when executed with an empty EXEC mask. 40280b57cec5SDimitry Andric // 40290b57cec5SDimitry Andric // Note: exp with VM = DONE = 0 is automatically skipped by hardware when 40300b57cec5SDimitry Andric // EXEC = 0, but checking for that case here seems not worth it 40310b57cec5SDimitry Andric // given the typical code patterns. 40320b57cec5SDimitry Andric if (Opcode == AMDGPU::S_SENDMSG || Opcode == AMDGPU::S_SENDMSGHALT || 4033e8d8bef9SDimitry Andric isEXP(Opcode) || 40340b57cec5SDimitry Andric Opcode == AMDGPU::DS_ORDERED_COUNT || Opcode == AMDGPU::S_TRAP || 40350b57cec5SDimitry Andric Opcode == AMDGPU::DS_GWS_INIT || Opcode == AMDGPU::DS_GWS_BARRIER) 40360b57cec5SDimitry Andric return true; 40370b57cec5SDimitry Andric 40380b57cec5SDimitry Andric if (MI.isCall() || MI.isInlineAsm()) 40390b57cec5SDimitry Andric return true; // conservative assumption 40400b57cec5SDimitry Andric 40415ffd83dbSDimitry Andric // A mode change is a scalar operation that influences vector instructions. 40425ffd83dbSDimitry Andric if (modifiesModeRegister(MI)) 40435ffd83dbSDimitry Andric return true; 40445ffd83dbSDimitry Andric 40450b57cec5SDimitry Andric // These are like SALU instructions in terms of effects, so it's questionable 40460b57cec5SDimitry Andric // whether we should return true for those. 40470b57cec5SDimitry Andric // 40480b57cec5SDimitry Andric // However, executing them with EXEC = 0 causes them to operate on undefined 40490b57cec5SDimitry Andric // data, which we avoid by returning true here. 4050e8d8bef9SDimitry Andric if (Opcode == AMDGPU::V_READFIRSTLANE_B32 || 40515f757f3fSDimitry Andric Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32 || 40525f757f3fSDimitry Andric Opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR || 40535f757f3fSDimitry Andric Opcode == AMDGPU::SI_SPILL_S32_TO_VGPR) 40540b57cec5SDimitry Andric return true; 40550b57cec5SDimitry Andric 40560b57cec5SDimitry Andric return false; 40570b57cec5SDimitry Andric } 40580b57cec5SDimitry Andric 40590b57cec5SDimitry Andric bool SIInstrInfo::mayReadEXEC(const MachineRegisterInfo &MRI, 40600b57cec5SDimitry Andric const MachineInstr &MI) const { 40610b57cec5SDimitry Andric if (MI.isMetaInstruction()) 40620b57cec5SDimitry Andric return false; 40630b57cec5SDimitry Andric 40640b57cec5SDimitry Andric // This won't read exec if this is an SGPR->SGPR copy. 40650b57cec5SDimitry Andric if (MI.isCopyLike()) { 40660b57cec5SDimitry Andric if (!RI.isSGPRReg(MRI, MI.getOperand(0).getReg())) 40670b57cec5SDimitry Andric return true; 40680b57cec5SDimitry Andric 40690b57cec5SDimitry Andric // Make sure this isn't copying exec as a normal operand 40700b57cec5SDimitry Andric return MI.readsRegister(AMDGPU::EXEC, &RI); 40710b57cec5SDimitry Andric } 40720b57cec5SDimitry Andric 40730b57cec5SDimitry Andric // Make a conservative assumption about the callee. 40740b57cec5SDimitry Andric if (MI.isCall()) 40750b57cec5SDimitry Andric return true; 40760b57cec5SDimitry Andric 40770b57cec5SDimitry Andric // Be conservative with any unhandled generic opcodes. 40780b57cec5SDimitry Andric if (!isTargetSpecificOpcode(MI.getOpcode())) 40790b57cec5SDimitry Andric return true; 40800b57cec5SDimitry Andric 40810b57cec5SDimitry Andric return !isSALU(MI) || MI.readsRegister(AMDGPU::EXEC, &RI); 40820b57cec5SDimitry Andric } 40830b57cec5SDimitry Andric 40840b57cec5SDimitry Andric bool SIInstrInfo::isInlineConstant(const APInt &Imm) const { 40850b57cec5SDimitry Andric switch (Imm.getBitWidth()) { 40860b57cec5SDimitry Andric case 1: // This likely will be a condition code mask. 40870b57cec5SDimitry Andric return true; 40880b57cec5SDimitry Andric 40890b57cec5SDimitry Andric case 32: 40900b57cec5SDimitry Andric return AMDGPU::isInlinableLiteral32(Imm.getSExtValue(), 40910b57cec5SDimitry Andric ST.hasInv2PiInlineImm()); 40920b57cec5SDimitry Andric case 64: 40930b57cec5SDimitry Andric return AMDGPU::isInlinableLiteral64(Imm.getSExtValue(), 40940b57cec5SDimitry Andric ST.hasInv2PiInlineImm()); 40950b57cec5SDimitry Andric case 16: 40960b57cec5SDimitry Andric return ST.has16BitInsts() && 40970b57cec5SDimitry Andric AMDGPU::isInlinableLiteral16(Imm.getSExtValue(), 40980b57cec5SDimitry Andric ST.hasInv2PiInlineImm()); 40990b57cec5SDimitry Andric default: 41000b57cec5SDimitry Andric llvm_unreachable("invalid bitwidth"); 41010b57cec5SDimitry Andric } 41020b57cec5SDimitry Andric } 41030b57cec5SDimitry Andric 41040b57cec5SDimitry Andric bool SIInstrInfo::isInlineConstant(const MachineOperand &MO, 41050b57cec5SDimitry Andric uint8_t OperandType) const { 4106bdd1243dSDimitry Andric assert(!MO.isReg() && "isInlineConstant called on register operand!"); 41075f757f3fSDimitry Andric if (!MO.isImm()) 41080b57cec5SDimitry Andric return false; 41090b57cec5SDimitry Andric 41100b57cec5SDimitry Andric // MachineOperand provides no way to tell the true operand size, since it only 41110b57cec5SDimitry Andric // records a 64-bit value. We need to know the size to determine if a 32-bit 41120b57cec5SDimitry Andric // floating point immediate bit pattern is legal for an integer immediate. It 41130b57cec5SDimitry Andric // would be for any 32-bit integer operand, but would not be for a 64-bit one. 41140b57cec5SDimitry Andric 41150b57cec5SDimitry Andric int64_t Imm = MO.getImm(); 41160b57cec5SDimitry Andric switch (OperandType) { 41170b57cec5SDimitry Andric case AMDGPU::OPERAND_REG_IMM_INT32: 41180b57cec5SDimitry Andric case AMDGPU::OPERAND_REG_IMM_FP32: 4119349cc55cSDimitry Andric case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 41200b57cec5SDimitry Andric case AMDGPU::OPERAND_REG_INLINE_C_INT32: 41210b57cec5SDimitry Andric case AMDGPU::OPERAND_REG_INLINE_C_FP32: 4122fe6060f1SDimitry Andric case AMDGPU::OPERAND_REG_IMM_V2FP32: 4123fe6060f1SDimitry Andric case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 4124fe6060f1SDimitry Andric case AMDGPU::OPERAND_REG_IMM_V2INT32: 4125fe6060f1SDimitry Andric case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 41260b57cec5SDimitry Andric case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 41275f757f3fSDimitry Andric case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 41285f757f3fSDimitry Andric case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32: { 41290b57cec5SDimitry Andric int32_t Trunc = static_cast<int32_t>(Imm); 41300b57cec5SDimitry Andric return AMDGPU::isInlinableLiteral32(Trunc, ST.hasInv2PiInlineImm()); 41310b57cec5SDimitry Andric } 41320b57cec5SDimitry Andric case AMDGPU::OPERAND_REG_IMM_INT64: 41330b57cec5SDimitry Andric case AMDGPU::OPERAND_REG_IMM_FP64: 41340b57cec5SDimitry Andric case AMDGPU::OPERAND_REG_INLINE_C_INT64: 41350b57cec5SDimitry Andric case AMDGPU::OPERAND_REG_INLINE_C_FP64: 4136fe6060f1SDimitry Andric case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 41370b57cec5SDimitry Andric return AMDGPU::isInlinableLiteral64(MO.getImm(), 41380b57cec5SDimitry Andric ST.hasInv2PiInlineImm()); 41390b57cec5SDimitry Andric case AMDGPU::OPERAND_REG_IMM_INT16: 41400b57cec5SDimitry Andric case AMDGPU::OPERAND_REG_INLINE_C_INT16: 41410b57cec5SDimitry Andric case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 41425ffd83dbSDimitry Andric // We would expect inline immediates to not be concerned with an integer/fp 41435ffd83dbSDimitry Andric // distinction. However, in the case of 16-bit integer operations, the 41445ffd83dbSDimitry Andric // "floating point" values appear to not work. It seems read the low 16-bits 41455ffd83dbSDimitry Andric // of 32-bit immediates, which happens to always work for the integer 41465ffd83dbSDimitry Andric // values. 41475ffd83dbSDimitry Andric // 41485ffd83dbSDimitry Andric // See llvm bugzilla 46302. 41495ffd83dbSDimitry Andric // 41505ffd83dbSDimitry Andric // TODO: Theoretically we could use op-sel to use the high bits of the 41515ffd83dbSDimitry Andric // 32-bit FP values. 41525ffd83dbSDimitry Andric return AMDGPU::isInlinableIntLiteral(Imm); 41535ffd83dbSDimitry Andric case AMDGPU::OPERAND_REG_IMM_V2INT16: 41545ffd83dbSDimitry Andric case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 41555ffd83dbSDimitry Andric case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 41565f757f3fSDimitry Andric return (isInt<16>(Imm) || isUInt<16>(Imm)) && 41575f757f3fSDimitry Andric AMDGPU::isInlinableIntLiteral((int16_t)Imm); 41585ffd83dbSDimitry Andric case AMDGPU::OPERAND_REG_IMM_FP16: 4159349cc55cSDimitry Andric case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 41605ffd83dbSDimitry Andric case AMDGPU::OPERAND_REG_INLINE_C_FP16: 41615f757f3fSDimitry Andric case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 41625f757f3fSDimitry Andric case AMDGPU::OPERAND_REG_IMM_V2FP16: 41635f757f3fSDimitry Andric case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 41645f757f3fSDimitry Andric case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 41650b57cec5SDimitry Andric if (isInt<16>(Imm) || isUInt<16>(Imm)) { 41660b57cec5SDimitry Andric // A few special case instructions have 16-bit operands on subtargets 41670b57cec5SDimitry Andric // where 16-bit instructions are not legal. 41680b57cec5SDimitry Andric // TODO: Do the 32-bit immediates work? We shouldn't really need to handle 41690b57cec5SDimitry Andric // constants in these cases 41700b57cec5SDimitry Andric int16_t Trunc = static_cast<int16_t>(Imm); 41710b57cec5SDimitry Andric return ST.has16BitInsts() && 41720b57cec5SDimitry Andric AMDGPU::isInlinableLiteral16(Trunc, ST.hasInv2PiInlineImm()); 41730b57cec5SDimitry Andric } 41740b57cec5SDimitry Andric 41750b57cec5SDimitry Andric return false; 41760b57cec5SDimitry Andric } 4177349cc55cSDimitry Andric case AMDGPU::OPERAND_KIMM32: 4178349cc55cSDimitry Andric case AMDGPU::OPERAND_KIMM16: 4179349cc55cSDimitry Andric return false; 41805f757f3fSDimitry Andric case AMDGPU::OPERAND_INPUT_MODS: 41815f757f3fSDimitry Andric case MCOI::OPERAND_IMMEDIATE: 41825f757f3fSDimitry Andric // Always embedded in the instruction for free. 41835f757f3fSDimitry Andric return true; 41845f757f3fSDimitry Andric case MCOI::OPERAND_UNKNOWN: 41855f757f3fSDimitry Andric case MCOI::OPERAND_REGISTER: 41865f757f3fSDimitry Andric case MCOI::OPERAND_PCREL: 41875f757f3fSDimitry Andric case MCOI::OPERAND_GENERIC_0: 41885f757f3fSDimitry Andric case MCOI::OPERAND_GENERIC_1: 41895f757f3fSDimitry Andric case MCOI::OPERAND_GENERIC_2: 41905f757f3fSDimitry Andric case MCOI::OPERAND_GENERIC_3: 41915f757f3fSDimitry Andric case MCOI::OPERAND_GENERIC_4: 41925f757f3fSDimitry Andric case MCOI::OPERAND_GENERIC_5: 41935f757f3fSDimitry Andric // Just ignore anything else. 41945f757f3fSDimitry Andric return true; 41950b57cec5SDimitry Andric default: 41965f757f3fSDimitry Andric llvm_unreachable("invalid operand type"); 41970b57cec5SDimitry Andric } 41980b57cec5SDimitry Andric } 41990b57cec5SDimitry Andric 42000b57cec5SDimitry Andric static bool compareMachineOp(const MachineOperand &Op0, 42010b57cec5SDimitry Andric const MachineOperand &Op1) { 42020b57cec5SDimitry Andric if (Op0.getType() != Op1.getType()) 42030b57cec5SDimitry Andric return false; 42040b57cec5SDimitry Andric 42050b57cec5SDimitry Andric switch (Op0.getType()) { 42060b57cec5SDimitry Andric case MachineOperand::MO_Register: 42070b57cec5SDimitry Andric return Op0.getReg() == Op1.getReg(); 42080b57cec5SDimitry Andric case MachineOperand::MO_Immediate: 42090b57cec5SDimitry Andric return Op0.getImm() == Op1.getImm(); 42100b57cec5SDimitry Andric default: 42110b57cec5SDimitry Andric llvm_unreachable("Didn't expect to be comparing these operand types"); 42120b57cec5SDimitry Andric } 42130b57cec5SDimitry Andric } 42140b57cec5SDimitry Andric 42150b57cec5SDimitry Andric bool SIInstrInfo::isImmOperandLegal(const MachineInstr &MI, unsigned OpNo, 42160b57cec5SDimitry Andric const MachineOperand &MO) const { 42170b57cec5SDimitry Andric const MCInstrDesc &InstDesc = MI.getDesc(); 4218bdd1243dSDimitry Andric const MCOperandInfo &OpInfo = InstDesc.operands()[OpNo]; 42190b57cec5SDimitry Andric 42200b57cec5SDimitry Andric assert(MO.isImm() || MO.isTargetIndex() || MO.isFI() || MO.isGlobal()); 42210b57cec5SDimitry Andric 42220b57cec5SDimitry Andric if (OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE) 42230b57cec5SDimitry Andric return true; 42240b57cec5SDimitry Andric 42250b57cec5SDimitry Andric if (OpInfo.RegClass < 0) 42260b57cec5SDimitry Andric return false; 42270b57cec5SDimitry Andric 42288bcb0991SDimitry Andric if (MO.isImm() && isInlineConstant(MO, OpInfo)) { 42298bcb0991SDimitry Andric if (isMAI(MI) && ST.hasMFMAInlineLiteralBug() && 42308bcb0991SDimitry Andric OpNo ==(unsigned)AMDGPU::getNamedOperandIdx(MI.getOpcode(), 42318bcb0991SDimitry Andric AMDGPU::OpName::src2)) 42328bcb0991SDimitry Andric return false; 42330b57cec5SDimitry Andric return RI.opCanUseInlineConstant(OpInfo.OperandType); 42348bcb0991SDimitry Andric } 42350b57cec5SDimitry Andric 42360b57cec5SDimitry Andric if (!RI.opCanUseLiteralConstant(OpInfo.OperandType)) 42370b57cec5SDimitry Andric return false; 42380b57cec5SDimitry Andric 42390b57cec5SDimitry Andric if (!isVOP3(MI) || !AMDGPU::isSISrcOperand(InstDesc, OpNo)) 42400b57cec5SDimitry Andric return true; 42410b57cec5SDimitry Andric 42420b57cec5SDimitry Andric return ST.hasVOP3Literal(); 42430b57cec5SDimitry Andric } 42440b57cec5SDimitry Andric 42450b57cec5SDimitry Andric bool SIInstrInfo::hasVALU32BitEncoding(unsigned Opcode) const { 4246fe6060f1SDimitry Andric // GFX90A does not have V_MUL_LEGACY_F32_e32. 4247fe6060f1SDimitry Andric if (Opcode == AMDGPU::V_MUL_LEGACY_F32_e64 && ST.hasGFX90AInsts()) 4248fe6060f1SDimitry Andric return false; 4249fe6060f1SDimitry Andric 42500b57cec5SDimitry Andric int Op32 = AMDGPU::getVOPe32(Opcode); 42510b57cec5SDimitry Andric if (Op32 == -1) 42520b57cec5SDimitry Andric return false; 42530b57cec5SDimitry Andric 42540b57cec5SDimitry Andric return pseudoToMCOpcode(Op32) != -1; 42550b57cec5SDimitry Andric } 42560b57cec5SDimitry Andric 42570b57cec5SDimitry Andric bool SIInstrInfo::hasModifiers(unsigned Opcode) const { 42580b57cec5SDimitry Andric // The src0_modifier operand is present on all instructions 42590b57cec5SDimitry Andric // that have modifiers. 42600b57cec5SDimitry Andric 4261bdd1243dSDimitry Andric return AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src0_modifiers); 42620b57cec5SDimitry Andric } 42630b57cec5SDimitry Andric 42640b57cec5SDimitry Andric bool SIInstrInfo::hasModifiersSet(const MachineInstr &MI, 42650b57cec5SDimitry Andric unsigned OpName) const { 42660b57cec5SDimitry Andric const MachineOperand *Mods = getNamedOperand(MI, OpName); 42670b57cec5SDimitry Andric return Mods && Mods->getImm(); 42680b57cec5SDimitry Andric } 42690b57cec5SDimitry Andric 42700b57cec5SDimitry Andric bool SIInstrInfo::hasAnyModifiersSet(const MachineInstr &MI) const { 427181ad6265SDimitry Andric return any_of(ModifierOpNames, 427281ad6265SDimitry Andric [&](unsigned Name) { return hasModifiersSet(MI, Name); }); 42730b57cec5SDimitry Andric } 42740b57cec5SDimitry Andric 42750b57cec5SDimitry Andric bool SIInstrInfo::canShrink(const MachineInstr &MI, 42760b57cec5SDimitry Andric const MachineRegisterInfo &MRI) const { 42770b57cec5SDimitry Andric const MachineOperand *Src2 = getNamedOperand(MI, AMDGPU::OpName::src2); 42780b57cec5SDimitry Andric // Can't shrink instruction with three operands. 42790b57cec5SDimitry Andric if (Src2) { 42800b57cec5SDimitry Andric switch (MI.getOpcode()) { 42810b57cec5SDimitry Andric default: return false; 42820b57cec5SDimitry Andric 42830b57cec5SDimitry Andric case AMDGPU::V_ADDC_U32_e64: 42840b57cec5SDimitry Andric case AMDGPU::V_SUBB_U32_e64: 42850b57cec5SDimitry Andric case AMDGPU::V_SUBBREV_U32_e64: { 42860b57cec5SDimitry Andric const MachineOperand *Src1 42870b57cec5SDimitry Andric = getNamedOperand(MI, AMDGPU::OpName::src1); 42880b57cec5SDimitry Andric if (!Src1->isReg() || !RI.isVGPR(MRI, Src1->getReg())) 42890b57cec5SDimitry Andric return false; 42900b57cec5SDimitry Andric // Additional verification is needed for sdst/src2. 42910b57cec5SDimitry Andric return true; 42920b57cec5SDimitry Andric } 42930b57cec5SDimitry Andric case AMDGPU::V_MAC_F16_e64: 4294349cc55cSDimitry Andric case AMDGPU::V_MAC_F32_e64: 4295349cc55cSDimitry Andric case AMDGPU::V_MAC_LEGACY_F32_e64: 42960b57cec5SDimitry Andric case AMDGPU::V_FMAC_F16_e64: 4297bdd1243dSDimitry Andric case AMDGPU::V_FMAC_F16_t16_e64: 4298349cc55cSDimitry Andric case AMDGPU::V_FMAC_F32_e64: 4299fe6060f1SDimitry Andric case AMDGPU::V_FMAC_F64_e64: 4300349cc55cSDimitry Andric case AMDGPU::V_FMAC_LEGACY_F32_e64: 43010b57cec5SDimitry Andric if (!Src2->isReg() || !RI.isVGPR(MRI, Src2->getReg()) || 43020b57cec5SDimitry Andric hasModifiersSet(MI, AMDGPU::OpName::src2_modifiers)) 43030b57cec5SDimitry Andric return false; 43040b57cec5SDimitry Andric break; 43050b57cec5SDimitry Andric 43060b57cec5SDimitry Andric case AMDGPU::V_CNDMASK_B32_e64: 43070b57cec5SDimitry Andric break; 43080b57cec5SDimitry Andric } 43090b57cec5SDimitry Andric } 43100b57cec5SDimitry Andric 43110b57cec5SDimitry Andric const MachineOperand *Src1 = getNamedOperand(MI, AMDGPU::OpName::src1); 43120b57cec5SDimitry Andric if (Src1 && (!Src1->isReg() || !RI.isVGPR(MRI, Src1->getReg()) || 43130b57cec5SDimitry Andric hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers))) 43140b57cec5SDimitry Andric return false; 43150b57cec5SDimitry Andric 43160b57cec5SDimitry Andric // We don't need to check src0, all input types are legal, so just make sure 43170b57cec5SDimitry Andric // src0 isn't using any modifiers. 43180b57cec5SDimitry Andric if (hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers)) 43190b57cec5SDimitry Andric return false; 43200b57cec5SDimitry Andric 43210b57cec5SDimitry Andric // Can it be shrunk to a valid 32 bit opcode? 43220b57cec5SDimitry Andric if (!hasVALU32BitEncoding(MI.getOpcode())) 43230b57cec5SDimitry Andric return false; 43240b57cec5SDimitry Andric 43250b57cec5SDimitry Andric // Check output modifiers 43260b57cec5SDimitry Andric return !hasModifiersSet(MI, AMDGPU::OpName::omod) && 43270b57cec5SDimitry Andric !hasModifiersSet(MI, AMDGPU::OpName::clamp); 43280b57cec5SDimitry Andric } 43290b57cec5SDimitry Andric 43300b57cec5SDimitry Andric // Set VCC operand with all flags from \p Orig, except for setting it as 43310b57cec5SDimitry Andric // implicit. 43320b57cec5SDimitry Andric static void copyFlagsToImplicitVCC(MachineInstr &MI, 43330b57cec5SDimitry Andric const MachineOperand &Orig) { 43340b57cec5SDimitry Andric 43350b57cec5SDimitry Andric for (MachineOperand &Use : MI.implicit_operands()) { 43365ffd83dbSDimitry Andric if (Use.isUse() && 43375ffd83dbSDimitry Andric (Use.getReg() == AMDGPU::VCC || Use.getReg() == AMDGPU::VCC_LO)) { 43380b57cec5SDimitry Andric Use.setIsUndef(Orig.isUndef()); 43390b57cec5SDimitry Andric Use.setIsKill(Orig.isKill()); 43400b57cec5SDimitry Andric return; 43410b57cec5SDimitry Andric } 43420b57cec5SDimitry Andric } 43430b57cec5SDimitry Andric } 43440b57cec5SDimitry Andric 43450b57cec5SDimitry Andric MachineInstr *SIInstrInfo::buildShrunkInst(MachineInstr &MI, 43460b57cec5SDimitry Andric unsigned Op32) const { 434781ad6265SDimitry Andric MachineBasicBlock *MBB = MI.getParent(); 43480b57cec5SDimitry Andric MachineInstrBuilder Inst32 = 43495ffd83dbSDimitry Andric BuildMI(*MBB, MI, MI.getDebugLoc(), get(Op32)) 43505ffd83dbSDimitry Andric .setMIFlags(MI.getFlags()); 43510b57cec5SDimitry Andric 43520b57cec5SDimitry Andric // Add the dst operand if the 32-bit encoding also has an explicit $vdst. 43530b57cec5SDimitry Andric // For VOPC instructions, this is replaced by an implicit def of vcc. 4354bdd1243dSDimitry Andric if (AMDGPU::hasNamedOperand(Op32, AMDGPU::OpName::vdst)) { 43550b57cec5SDimitry Andric // dst 43560b57cec5SDimitry Andric Inst32.add(MI.getOperand(0)); 4357bdd1243dSDimitry Andric } else if (AMDGPU::hasNamedOperand(Op32, AMDGPU::OpName::sdst)) { 435881ad6265SDimitry Andric // VOPCX instructions won't be writing to an explicit dst, so this should 435981ad6265SDimitry Andric // not fail for these instructions. 43600b57cec5SDimitry Andric assert(((MI.getOperand(0).getReg() == AMDGPU::VCC) || 43610b57cec5SDimitry Andric (MI.getOperand(0).getReg() == AMDGPU::VCC_LO)) && 43620b57cec5SDimitry Andric "Unexpected case"); 43630b57cec5SDimitry Andric } 43640b57cec5SDimitry Andric 43650b57cec5SDimitry Andric Inst32.add(*getNamedOperand(MI, AMDGPU::OpName::src0)); 43660b57cec5SDimitry Andric 43670b57cec5SDimitry Andric const MachineOperand *Src1 = getNamedOperand(MI, AMDGPU::OpName::src1); 43680b57cec5SDimitry Andric if (Src1) 43690b57cec5SDimitry Andric Inst32.add(*Src1); 43700b57cec5SDimitry Andric 43710b57cec5SDimitry Andric const MachineOperand *Src2 = getNamedOperand(MI, AMDGPU::OpName::src2); 43720b57cec5SDimitry Andric 43730b57cec5SDimitry Andric if (Src2) { 43740b57cec5SDimitry Andric int Op32Src2Idx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::src2); 43750b57cec5SDimitry Andric if (Op32Src2Idx != -1) { 43760b57cec5SDimitry Andric Inst32.add(*Src2); 43770b57cec5SDimitry Andric } else { 43780b57cec5SDimitry Andric // In the case of V_CNDMASK_B32_e32, the explicit operand src2 is 4379e8d8bef9SDimitry Andric // replaced with an implicit read of vcc or vcc_lo. The implicit read 4380e8d8bef9SDimitry Andric // of vcc was already added during the initial BuildMI, but we 4381e8d8bef9SDimitry Andric // 1) may need to change vcc to vcc_lo to preserve the original register 4382e8d8bef9SDimitry Andric // 2) have to preserve the original flags. 4383e8d8bef9SDimitry Andric fixImplicitOperands(*Inst32); 43840b57cec5SDimitry Andric copyFlagsToImplicitVCC(*Inst32, *Src2); 43850b57cec5SDimitry Andric } 43860b57cec5SDimitry Andric } 43870b57cec5SDimitry Andric 43880b57cec5SDimitry Andric return Inst32; 43890b57cec5SDimitry Andric } 43900b57cec5SDimitry Andric 43910b57cec5SDimitry Andric bool SIInstrInfo::usesConstantBus(const MachineRegisterInfo &MRI, 43920b57cec5SDimitry Andric const MachineOperand &MO, 43930b57cec5SDimitry Andric const MCOperandInfo &OpInfo) const { 43940b57cec5SDimitry Andric // Literal constants use the constant bus. 43950b57cec5SDimitry Andric if (!MO.isReg()) 4396bdd1243dSDimitry Andric return !isInlineConstant(MO, OpInfo); 43970b57cec5SDimitry Andric 43980b57cec5SDimitry Andric if (!MO.isUse()) 43990b57cec5SDimitry Andric return false; 44000b57cec5SDimitry Andric 4401e8d8bef9SDimitry Andric if (MO.getReg().isVirtual()) 44020b57cec5SDimitry Andric return RI.isSGPRClass(MRI.getRegClass(MO.getReg())); 44030b57cec5SDimitry Andric 44040b57cec5SDimitry Andric // Null is free 440581ad6265SDimitry Andric if (MO.getReg() == AMDGPU::SGPR_NULL || MO.getReg() == AMDGPU::SGPR_NULL64) 44060b57cec5SDimitry Andric return false; 44070b57cec5SDimitry Andric 44080b57cec5SDimitry Andric // SGPRs use the constant bus 44090b57cec5SDimitry Andric if (MO.isImplicit()) { 44100b57cec5SDimitry Andric return MO.getReg() == AMDGPU::M0 || 44110b57cec5SDimitry Andric MO.getReg() == AMDGPU::VCC || 44120b57cec5SDimitry Andric MO.getReg() == AMDGPU::VCC_LO; 44130b57cec5SDimitry Andric } else { 44140b57cec5SDimitry Andric return AMDGPU::SReg_32RegClass.contains(MO.getReg()) || 44150b57cec5SDimitry Andric AMDGPU::SReg_64RegClass.contains(MO.getReg()); 44160b57cec5SDimitry Andric } 44170b57cec5SDimitry Andric } 44180b57cec5SDimitry Andric 44195ffd83dbSDimitry Andric static Register findImplicitSGPRRead(const MachineInstr &MI) { 44200b57cec5SDimitry Andric for (const MachineOperand &MO : MI.implicit_operands()) { 44210b57cec5SDimitry Andric // We only care about reads. 44220b57cec5SDimitry Andric if (MO.isDef()) 44230b57cec5SDimitry Andric continue; 44240b57cec5SDimitry Andric 44250b57cec5SDimitry Andric switch (MO.getReg()) { 44260b57cec5SDimitry Andric case AMDGPU::VCC: 44270b57cec5SDimitry Andric case AMDGPU::VCC_LO: 44280b57cec5SDimitry Andric case AMDGPU::VCC_HI: 44290b57cec5SDimitry Andric case AMDGPU::M0: 44300b57cec5SDimitry Andric case AMDGPU::FLAT_SCR: 44310b57cec5SDimitry Andric return MO.getReg(); 44320b57cec5SDimitry Andric 44330b57cec5SDimitry Andric default: 44340b57cec5SDimitry Andric break; 44350b57cec5SDimitry Andric } 44360b57cec5SDimitry Andric } 44370b57cec5SDimitry Andric 4438bdd1243dSDimitry Andric return Register(); 44390b57cec5SDimitry Andric } 44400b57cec5SDimitry Andric 44410b57cec5SDimitry Andric static bool shouldReadExec(const MachineInstr &MI) { 44420b57cec5SDimitry Andric if (SIInstrInfo::isVALU(MI)) { 44430b57cec5SDimitry Andric switch (MI.getOpcode()) { 44440b57cec5SDimitry Andric case AMDGPU::V_READLANE_B32: 44455f757f3fSDimitry Andric case AMDGPU::SI_RESTORE_S32_FROM_VGPR: 44460b57cec5SDimitry Andric case AMDGPU::V_WRITELANE_B32: 44475f757f3fSDimitry Andric case AMDGPU::SI_SPILL_S32_TO_VGPR: 44480b57cec5SDimitry Andric return false; 44490b57cec5SDimitry Andric } 44500b57cec5SDimitry Andric 44510b57cec5SDimitry Andric return true; 44520b57cec5SDimitry Andric } 44530b57cec5SDimitry Andric 44548bcb0991SDimitry Andric if (MI.isPreISelOpcode() || 44558bcb0991SDimitry Andric SIInstrInfo::isGenericOpcode(MI.getOpcode()) || 44560b57cec5SDimitry Andric SIInstrInfo::isSALU(MI) || 44570b57cec5SDimitry Andric SIInstrInfo::isSMRD(MI)) 44580b57cec5SDimitry Andric return false; 44590b57cec5SDimitry Andric 44600b57cec5SDimitry Andric return true; 44610b57cec5SDimitry Andric } 44620b57cec5SDimitry Andric 44630b57cec5SDimitry Andric static bool isSubRegOf(const SIRegisterInfo &TRI, 44640b57cec5SDimitry Andric const MachineOperand &SuperVec, 44650b57cec5SDimitry Andric const MachineOperand &SubReg) { 4466e8d8bef9SDimitry Andric if (SubReg.getReg().isPhysical()) 44670b57cec5SDimitry Andric return TRI.isSubRegister(SuperVec.getReg(), SubReg.getReg()); 44680b57cec5SDimitry Andric 44690b57cec5SDimitry Andric return SubReg.getSubReg() != AMDGPU::NoSubRegister && 44700b57cec5SDimitry Andric SubReg.getReg() == SuperVec.getReg(); 44710b57cec5SDimitry Andric } 44720b57cec5SDimitry Andric 44730b57cec5SDimitry Andric bool SIInstrInfo::verifyInstruction(const MachineInstr &MI, 44740b57cec5SDimitry Andric StringRef &ErrInfo) const { 44750b57cec5SDimitry Andric uint16_t Opcode = MI.getOpcode(); 44760b57cec5SDimitry Andric if (SIInstrInfo::isGenericOpcode(MI.getOpcode())) 44770b57cec5SDimitry Andric return true; 44780b57cec5SDimitry Andric 44790b57cec5SDimitry Andric const MachineFunction *MF = MI.getParent()->getParent(); 44800b57cec5SDimitry Andric const MachineRegisterInfo &MRI = MF->getRegInfo(); 44810b57cec5SDimitry Andric 44820b57cec5SDimitry Andric int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 44830b57cec5SDimitry Andric int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 44840b57cec5SDimitry Andric int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 4485753f127fSDimitry Andric int Src3Idx = -1; 4486753f127fSDimitry Andric if (Src0Idx == -1) { 4487753f127fSDimitry Andric // VOPD V_DUAL_* instructions use different operand names. 4488753f127fSDimitry Andric Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0X); 4489753f127fSDimitry Andric Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vsrc1X); 4490753f127fSDimitry Andric Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0Y); 4491753f127fSDimitry Andric Src3Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vsrc1Y); 4492753f127fSDimitry Andric } 44930b57cec5SDimitry Andric 44940b57cec5SDimitry Andric // Make sure the number of operands is correct. 44950b57cec5SDimitry Andric const MCInstrDesc &Desc = get(Opcode); 44960b57cec5SDimitry Andric if (!Desc.isVariadic() && 44970b57cec5SDimitry Andric Desc.getNumOperands() != MI.getNumExplicitOperands()) { 44980b57cec5SDimitry Andric ErrInfo = "Instruction has wrong number of operands."; 44990b57cec5SDimitry Andric return false; 45000b57cec5SDimitry Andric } 45010b57cec5SDimitry Andric 45020b57cec5SDimitry Andric if (MI.isInlineAsm()) { 45030b57cec5SDimitry Andric // Verify register classes for inlineasm constraints. 45040b57cec5SDimitry Andric for (unsigned I = InlineAsm::MIOp_FirstOperand, E = MI.getNumOperands(); 45050b57cec5SDimitry Andric I != E; ++I) { 45060b57cec5SDimitry Andric const TargetRegisterClass *RC = MI.getRegClassConstraint(I, this, &RI); 45070b57cec5SDimitry Andric if (!RC) 45080b57cec5SDimitry Andric continue; 45090b57cec5SDimitry Andric 45100b57cec5SDimitry Andric const MachineOperand &Op = MI.getOperand(I); 45110b57cec5SDimitry Andric if (!Op.isReg()) 45120b57cec5SDimitry Andric continue; 45130b57cec5SDimitry Andric 45148bcb0991SDimitry Andric Register Reg = Op.getReg(); 4515e8d8bef9SDimitry Andric if (!Reg.isVirtual() && !RC->contains(Reg)) { 45160b57cec5SDimitry Andric ErrInfo = "inlineasm operand has incorrect register class."; 45170b57cec5SDimitry Andric return false; 45180b57cec5SDimitry Andric } 45190b57cec5SDimitry Andric } 45200b57cec5SDimitry Andric 45210b57cec5SDimitry Andric return true; 45220b57cec5SDimitry Andric } 45230b57cec5SDimitry Andric 45245f757f3fSDimitry Andric if (isImage(MI) && MI.memoperands_empty() && MI.mayLoadOrStore()) { 45255f757f3fSDimitry Andric ErrInfo = "missing memory operand from image instruction."; 45265ffd83dbSDimitry Andric return false; 45275ffd83dbSDimitry Andric } 45285ffd83dbSDimitry Andric 45290b57cec5SDimitry Andric // Make sure the register classes are correct. 45300b57cec5SDimitry Andric for (int i = 0, e = Desc.getNumOperands(); i != e; ++i) { 4531fe6060f1SDimitry Andric const MachineOperand &MO = MI.getOperand(i); 4532fe6060f1SDimitry Andric if (MO.isFPImm()) { 45330b57cec5SDimitry Andric ErrInfo = "FPImm Machine Operands are not supported. ISel should bitcast " 45340b57cec5SDimitry Andric "all fp values to integers."; 45350b57cec5SDimitry Andric return false; 45360b57cec5SDimitry Andric } 45370b57cec5SDimitry Andric 4538bdd1243dSDimitry Andric int RegClass = Desc.operands()[i].RegClass; 45390b57cec5SDimitry Andric 4540bdd1243dSDimitry Andric switch (Desc.operands()[i].OperandType) { 45410b57cec5SDimitry Andric case MCOI::OPERAND_REGISTER: 45420b57cec5SDimitry Andric if (MI.getOperand(i).isImm() || MI.getOperand(i).isGlobal()) { 45430b57cec5SDimitry Andric ErrInfo = "Illegal immediate value for operand."; 45440b57cec5SDimitry Andric return false; 45450b57cec5SDimitry Andric } 45460b57cec5SDimitry Andric break; 45470b57cec5SDimitry Andric case AMDGPU::OPERAND_REG_IMM_INT32: 45480b57cec5SDimitry Andric case AMDGPU::OPERAND_REG_IMM_FP32: 4549349cc55cSDimitry Andric case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 455081ad6265SDimitry Andric case AMDGPU::OPERAND_REG_IMM_V2FP32: 45510b57cec5SDimitry Andric break; 45520b57cec5SDimitry Andric case AMDGPU::OPERAND_REG_INLINE_C_INT32: 45530b57cec5SDimitry Andric case AMDGPU::OPERAND_REG_INLINE_C_FP32: 45540b57cec5SDimitry Andric case AMDGPU::OPERAND_REG_INLINE_C_INT64: 45550b57cec5SDimitry Andric case AMDGPU::OPERAND_REG_INLINE_C_FP64: 45560b57cec5SDimitry Andric case AMDGPU::OPERAND_REG_INLINE_C_INT16: 45570b57cec5SDimitry Andric case AMDGPU::OPERAND_REG_INLINE_C_FP16: 45580b57cec5SDimitry Andric case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 45590b57cec5SDimitry Andric case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 45600b57cec5SDimitry Andric case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 4561fe6060f1SDimitry Andric case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 4562fe6060f1SDimitry Andric case AMDGPU::OPERAND_REG_INLINE_AC_FP64: { 45630b57cec5SDimitry Andric if (!MO.isReg() && (!MO.isImm() || !isInlineConstant(MI, i))) { 45640b57cec5SDimitry Andric ErrInfo = "Illegal immediate value for operand."; 45650b57cec5SDimitry Andric return false; 45660b57cec5SDimitry Andric } 45670b57cec5SDimitry Andric break; 45680b57cec5SDimitry Andric } 45695f757f3fSDimitry Andric case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32: 45705f757f3fSDimitry Andric if (!MI.getOperand(i).isImm() || !isInlineConstant(MI, i)) { 45715f757f3fSDimitry Andric ErrInfo = "Expected inline constant for operand."; 45725f757f3fSDimitry Andric return false; 45735f757f3fSDimitry Andric } 45745f757f3fSDimitry Andric break; 45750b57cec5SDimitry Andric case MCOI::OPERAND_IMMEDIATE: 45760b57cec5SDimitry Andric case AMDGPU::OPERAND_KIMM32: 45770b57cec5SDimitry Andric // Check if this operand is an immediate. 45780b57cec5SDimitry Andric // FrameIndex operands will be replaced by immediates, so they are 45790b57cec5SDimitry Andric // allowed. 45800b57cec5SDimitry Andric if (!MI.getOperand(i).isImm() && !MI.getOperand(i).isFI()) { 45810b57cec5SDimitry Andric ErrInfo = "Expected immediate, but got non-immediate"; 45820b57cec5SDimitry Andric return false; 45830b57cec5SDimitry Andric } 4584bdd1243dSDimitry Andric [[fallthrough]]; 45850b57cec5SDimitry Andric default: 45860b57cec5SDimitry Andric continue; 45870b57cec5SDimitry Andric } 45880b57cec5SDimitry Andric 4589fe6060f1SDimitry Andric if (!MO.isReg()) 4590fe6060f1SDimitry Andric continue; 4591fe6060f1SDimitry Andric Register Reg = MO.getReg(); 4592fe6060f1SDimitry Andric if (!Reg) 45930b57cec5SDimitry Andric continue; 45940b57cec5SDimitry Andric 4595fe6060f1SDimitry Andric // FIXME: Ideally we would have separate instruction definitions with the 4596fe6060f1SDimitry Andric // aligned register constraint. 4597fe6060f1SDimitry Andric // FIXME: We do not verify inline asm operands, but custom inline asm 4598fe6060f1SDimitry Andric // verification is broken anyway 4599fe6060f1SDimitry Andric if (ST.needsAlignedVGPRs()) { 4600fe6060f1SDimitry Andric const TargetRegisterClass *RC = RI.getRegClassForReg(MRI, Reg); 46014824e7fdSDimitry Andric if (RI.hasVectorRegisters(RC) && MO.getSubReg()) { 4602fe6060f1SDimitry Andric const TargetRegisterClass *SubRC = 4603bdd1243dSDimitry Andric RI.getSubRegisterClass(RC, MO.getSubReg()); 4604fe6060f1SDimitry Andric RC = RI.getCompatibleSubRegClass(RC, SubRC, MO.getSubReg()); 4605fe6060f1SDimitry Andric if (RC) 4606fe6060f1SDimitry Andric RC = SubRC; 4607fe6060f1SDimitry Andric } 4608fe6060f1SDimitry Andric 4609fe6060f1SDimitry Andric // Check that this is the aligned version of the class. 4610fe6060f1SDimitry Andric if (!RC || !RI.isProperlyAlignedRC(*RC)) { 4611fe6060f1SDimitry Andric ErrInfo = "Subtarget requires even aligned vector registers"; 4612fe6060f1SDimitry Andric return false; 4613fe6060f1SDimitry Andric } 4614fe6060f1SDimitry Andric } 4615fe6060f1SDimitry Andric 46160b57cec5SDimitry Andric if (RegClass != -1) { 4617fe6060f1SDimitry Andric if (Reg.isVirtual()) 46180b57cec5SDimitry Andric continue; 46190b57cec5SDimitry Andric 46200b57cec5SDimitry Andric const TargetRegisterClass *RC = RI.getRegClass(RegClass); 46210b57cec5SDimitry Andric if (!RC->contains(Reg)) { 46220b57cec5SDimitry Andric ErrInfo = "Operand has incorrect register class."; 46230b57cec5SDimitry Andric return false; 46240b57cec5SDimitry Andric } 46250b57cec5SDimitry Andric } 46260b57cec5SDimitry Andric } 46270b57cec5SDimitry Andric 46280b57cec5SDimitry Andric // Verify SDWA 46290b57cec5SDimitry Andric if (isSDWA(MI)) { 46300b57cec5SDimitry Andric if (!ST.hasSDWA()) { 46310b57cec5SDimitry Andric ErrInfo = "SDWA is not supported on this target"; 46320b57cec5SDimitry Andric return false; 46330b57cec5SDimitry Andric } 46340b57cec5SDimitry Andric 46350b57cec5SDimitry Andric int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 46360b57cec5SDimitry Andric 463781ad6265SDimitry Andric for (int OpIdx : {DstIdx, Src0Idx, Src1Idx, Src2Idx}) { 46380b57cec5SDimitry Andric if (OpIdx == -1) 46390b57cec5SDimitry Andric continue; 46400b57cec5SDimitry Andric const MachineOperand &MO = MI.getOperand(OpIdx); 46410b57cec5SDimitry Andric 46420b57cec5SDimitry Andric if (!ST.hasSDWAScalar()) { 46430b57cec5SDimitry Andric // Only VGPRS on VI 46440b57cec5SDimitry Andric if (!MO.isReg() || !RI.hasVGPRs(RI.getRegClassForReg(MRI, MO.getReg()))) { 46450b57cec5SDimitry Andric ErrInfo = "Only VGPRs allowed as operands in SDWA instructions on VI"; 46460b57cec5SDimitry Andric return false; 46470b57cec5SDimitry Andric } 46480b57cec5SDimitry Andric } else { 46490b57cec5SDimitry Andric // No immediates on GFX9 46500b57cec5SDimitry Andric if (!MO.isReg()) { 4651e8d8bef9SDimitry Andric ErrInfo = 4652e8d8bef9SDimitry Andric "Only reg allowed as operands in SDWA instructions on GFX9+"; 46530b57cec5SDimitry Andric return false; 46540b57cec5SDimitry Andric } 46550b57cec5SDimitry Andric } 46560b57cec5SDimitry Andric } 46570b57cec5SDimitry Andric 46580b57cec5SDimitry Andric if (!ST.hasSDWAOmod()) { 46590b57cec5SDimitry Andric // No omod allowed on VI 46600b57cec5SDimitry Andric const MachineOperand *OMod = getNamedOperand(MI, AMDGPU::OpName::omod); 46610b57cec5SDimitry Andric if (OMod != nullptr && 46620b57cec5SDimitry Andric (!OMod->isImm() || OMod->getImm() != 0)) { 46630b57cec5SDimitry Andric ErrInfo = "OMod not allowed in SDWA instructions on VI"; 46640b57cec5SDimitry Andric return false; 46650b57cec5SDimitry Andric } 46660b57cec5SDimitry Andric } 46670b57cec5SDimitry Andric 46680b57cec5SDimitry Andric uint16_t BasicOpcode = AMDGPU::getBasicFromSDWAOp(Opcode); 46690b57cec5SDimitry Andric if (isVOPC(BasicOpcode)) { 46700b57cec5SDimitry Andric if (!ST.hasSDWASdst() && DstIdx != -1) { 46710b57cec5SDimitry Andric // Only vcc allowed as dst on VI for VOPC 46720b57cec5SDimitry Andric const MachineOperand &Dst = MI.getOperand(DstIdx); 46730b57cec5SDimitry Andric if (!Dst.isReg() || Dst.getReg() != AMDGPU::VCC) { 46740b57cec5SDimitry Andric ErrInfo = "Only VCC allowed as dst in SDWA instructions on VI"; 46750b57cec5SDimitry Andric return false; 46760b57cec5SDimitry Andric } 46770b57cec5SDimitry Andric } else if (!ST.hasSDWAOutModsVOPC()) { 46780b57cec5SDimitry Andric // No clamp allowed on GFX9 for VOPC 46790b57cec5SDimitry Andric const MachineOperand *Clamp = getNamedOperand(MI, AMDGPU::OpName::clamp); 46800b57cec5SDimitry Andric if (Clamp && (!Clamp->isImm() || Clamp->getImm() != 0)) { 46810b57cec5SDimitry Andric ErrInfo = "Clamp not allowed in VOPC SDWA instructions on VI"; 46820b57cec5SDimitry Andric return false; 46830b57cec5SDimitry Andric } 46840b57cec5SDimitry Andric 46850b57cec5SDimitry Andric // No omod allowed on GFX9 for VOPC 46860b57cec5SDimitry Andric const MachineOperand *OMod = getNamedOperand(MI, AMDGPU::OpName::omod); 46870b57cec5SDimitry Andric if (OMod && (!OMod->isImm() || OMod->getImm() != 0)) { 46880b57cec5SDimitry Andric ErrInfo = "OMod not allowed in VOPC SDWA instructions on VI"; 46890b57cec5SDimitry Andric return false; 46900b57cec5SDimitry Andric } 46910b57cec5SDimitry Andric } 46920b57cec5SDimitry Andric } 46930b57cec5SDimitry Andric 46940b57cec5SDimitry Andric const MachineOperand *DstUnused = getNamedOperand(MI, AMDGPU::OpName::dst_unused); 46950b57cec5SDimitry Andric if (DstUnused && DstUnused->isImm() && 46960b57cec5SDimitry Andric DstUnused->getImm() == AMDGPU::SDWA::UNUSED_PRESERVE) { 46970b57cec5SDimitry Andric const MachineOperand &Dst = MI.getOperand(DstIdx); 46980b57cec5SDimitry Andric if (!Dst.isReg() || !Dst.isTied()) { 46990b57cec5SDimitry Andric ErrInfo = "Dst register should have tied register"; 47000b57cec5SDimitry Andric return false; 47010b57cec5SDimitry Andric } 47020b57cec5SDimitry Andric 47030b57cec5SDimitry Andric const MachineOperand &TiedMO = 47040b57cec5SDimitry Andric MI.getOperand(MI.findTiedOperandIdx(DstIdx)); 47050b57cec5SDimitry Andric if (!TiedMO.isReg() || !TiedMO.isImplicit() || !TiedMO.isUse()) { 47060b57cec5SDimitry Andric ErrInfo = 47070b57cec5SDimitry Andric "Dst register should be tied to implicit use of preserved register"; 47080b57cec5SDimitry Andric return false; 4709e8d8bef9SDimitry Andric } else if (TiedMO.getReg().isPhysical() && 47100b57cec5SDimitry Andric Dst.getReg() != TiedMO.getReg()) { 47110b57cec5SDimitry Andric ErrInfo = "Dst register should use same physical register as preserved"; 47120b57cec5SDimitry Andric return false; 47130b57cec5SDimitry Andric } 47140b57cec5SDimitry Andric } 47150b57cec5SDimitry Andric } 47160b57cec5SDimitry Andric 47175f757f3fSDimitry Andric // Verify MIMG / VIMAGE / VSAMPLE 47185f757f3fSDimitry Andric if (isImage(MI.getOpcode()) && !MI.mayStore()) { 47190b57cec5SDimitry Andric // Ensure that the return type used is large enough for all the options 47200b57cec5SDimitry Andric // being used TFE/LWE require an extra result register. 47210b57cec5SDimitry Andric const MachineOperand *DMask = getNamedOperand(MI, AMDGPU::OpName::dmask); 47220b57cec5SDimitry Andric if (DMask) { 47230b57cec5SDimitry Andric uint64_t DMaskImm = DMask->getImm(); 47240b57cec5SDimitry Andric uint32_t RegCount = 4725bdd1243dSDimitry Andric isGather4(MI.getOpcode()) ? 4 : llvm::popcount(DMaskImm); 47260b57cec5SDimitry Andric const MachineOperand *TFE = getNamedOperand(MI, AMDGPU::OpName::tfe); 47270b57cec5SDimitry Andric const MachineOperand *LWE = getNamedOperand(MI, AMDGPU::OpName::lwe); 47280b57cec5SDimitry Andric const MachineOperand *D16 = getNamedOperand(MI, AMDGPU::OpName::d16); 47290b57cec5SDimitry Andric 47300b57cec5SDimitry Andric // Adjust for packed 16 bit values 47310b57cec5SDimitry Andric if (D16 && D16->getImm() && !ST.hasUnpackedD16VMem()) 473206c3fb27SDimitry Andric RegCount = divideCeil(RegCount, 2); 47330b57cec5SDimitry Andric 47340b57cec5SDimitry Andric // Adjust if using LWE or TFE 47350b57cec5SDimitry Andric if ((LWE && LWE->getImm()) || (TFE && TFE->getImm())) 47360b57cec5SDimitry Andric RegCount += 1; 47370b57cec5SDimitry Andric 47380b57cec5SDimitry Andric const uint32_t DstIdx = 47390b57cec5SDimitry Andric AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata); 47400b57cec5SDimitry Andric const MachineOperand &Dst = MI.getOperand(DstIdx); 47410b57cec5SDimitry Andric if (Dst.isReg()) { 47420b57cec5SDimitry Andric const TargetRegisterClass *DstRC = getOpRegClass(MI, DstIdx); 47430b57cec5SDimitry Andric uint32_t DstSize = RI.getRegSizeInBits(*DstRC) / 32; 47440b57cec5SDimitry Andric if (RegCount > DstSize) { 474506c3fb27SDimitry Andric ErrInfo = "Image instruction returns too many registers for dst " 47460b57cec5SDimitry Andric "register class"; 47470b57cec5SDimitry Andric return false; 47480b57cec5SDimitry Andric } 47490b57cec5SDimitry Andric } 47500b57cec5SDimitry Andric } 47510b57cec5SDimitry Andric } 47520b57cec5SDimitry Andric 47530b57cec5SDimitry Andric // Verify VOP*. Ignore multiple sgpr operands on writelane. 475481ad6265SDimitry Andric if (isVALU(MI) && Desc.getOpcode() != AMDGPU::V_WRITELANE_B32) { 47550b57cec5SDimitry Andric unsigned ConstantBusCount = 0; 4756fe6060f1SDimitry Andric bool UsesLiteral = false; 4757fe6060f1SDimitry Andric const MachineOperand *LiteralVal = nullptr; 47580b57cec5SDimitry Andric 475981ad6265SDimitry Andric int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm); 476081ad6265SDimitry Andric if (ImmIdx != -1) { 47610b57cec5SDimitry Andric ++ConstantBusCount; 476281ad6265SDimitry Andric UsesLiteral = true; 476381ad6265SDimitry Andric LiteralVal = &MI.getOperand(ImmIdx); 476481ad6265SDimitry Andric } 47650b57cec5SDimitry Andric 47665ffd83dbSDimitry Andric SmallVector<Register, 2> SGPRsUsed; 4767e8d8bef9SDimitry Andric Register SGPRUsed; 47680b57cec5SDimitry Andric 476981ad6265SDimitry Andric // Only look at the true operands. Only a real operand can use the constant 477081ad6265SDimitry Andric // bus, and we don't want to check pseudo-operands like the source modifier 477181ad6265SDimitry Andric // flags. 4772753f127fSDimitry Andric for (int OpIdx : {Src0Idx, Src1Idx, Src2Idx, Src3Idx}) { 47730b57cec5SDimitry Andric if (OpIdx == -1) 4774753f127fSDimitry Andric continue; 47750b57cec5SDimitry Andric const MachineOperand &MO = MI.getOperand(OpIdx); 4776bdd1243dSDimitry Andric if (usesConstantBus(MRI, MO, MI.getDesc().operands()[OpIdx])) { 47770b57cec5SDimitry Andric if (MO.isReg()) { 47780b57cec5SDimitry Andric SGPRUsed = MO.getReg(); 4779bdd1243dSDimitry Andric if (!llvm::is_contained(SGPRsUsed, SGPRUsed)) { 47800b57cec5SDimitry Andric ++ConstantBusCount; 47810b57cec5SDimitry Andric SGPRsUsed.push_back(SGPRUsed); 47820b57cec5SDimitry Andric } 47830b57cec5SDimitry Andric } else { 4784fe6060f1SDimitry Andric if (!UsesLiteral) { 47850b57cec5SDimitry Andric ++ConstantBusCount; 4786fe6060f1SDimitry Andric UsesLiteral = true; 4787fe6060f1SDimitry Andric LiteralVal = &MO; 4788fe6060f1SDimitry Andric } else if (!MO.isIdenticalTo(*LiteralVal)) { 478981ad6265SDimitry Andric assert(isVOP2(MI) || isVOP3(MI)); 479081ad6265SDimitry Andric ErrInfo = "VOP2/VOP3 instruction uses more than one literal"; 4791fe6060f1SDimitry Andric return false; 4792fe6060f1SDimitry Andric } 47930b57cec5SDimitry Andric } 47940b57cec5SDimitry Andric } 47950b57cec5SDimitry Andric } 4796e8d8bef9SDimitry Andric 4797e8d8bef9SDimitry Andric SGPRUsed = findImplicitSGPRRead(MI); 4798bdd1243dSDimitry Andric if (SGPRUsed) { 479981ad6265SDimitry Andric // Implicit uses may safely overlap true operands 4800e8d8bef9SDimitry Andric if (llvm::all_of(SGPRsUsed, [this, SGPRUsed](unsigned SGPR) { 4801e8d8bef9SDimitry Andric return !RI.regsOverlap(SGPRUsed, SGPR); 4802e8d8bef9SDimitry Andric })) { 4803e8d8bef9SDimitry Andric ++ConstantBusCount; 4804e8d8bef9SDimitry Andric SGPRsUsed.push_back(SGPRUsed); 4805e8d8bef9SDimitry Andric } 4806e8d8bef9SDimitry Andric } 4807e8d8bef9SDimitry Andric 48080b57cec5SDimitry Andric // v_writelane_b32 is an exception from constant bus restriction: 48090b57cec5SDimitry Andric // vsrc0 can be sgpr, const or m0 and lane select sgpr, m0 or inline-const 48100b57cec5SDimitry Andric if (ConstantBusCount > ST.getConstantBusLimit(Opcode) && 48110b57cec5SDimitry Andric Opcode != AMDGPU::V_WRITELANE_B32) { 48120b57cec5SDimitry Andric ErrInfo = "VOP* instruction violates constant bus restriction"; 48130b57cec5SDimitry Andric return false; 48140b57cec5SDimitry Andric } 48150b57cec5SDimitry Andric 4816fe6060f1SDimitry Andric if (isVOP3(MI) && UsesLiteral && !ST.hasVOP3Literal()) { 48170b57cec5SDimitry Andric ErrInfo = "VOP3 instruction uses literal"; 48180b57cec5SDimitry Andric return false; 48190b57cec5SDimitry Andric } 48200b57cec5SDimitry Andric } 48210b57cec5SDimitry Andric 48228bcb0991SDimitry Andric // Special case for writelane - this can break the multiple constant bus rule, 48238bcb0991SDimitry Andric // but still can't use more than one SGPR register 48248bcb0991SDimitry Andric if (Desc.getOpcode() == AMDGPU::V_WRITELANE_B32) { 48258bcb0991SDimitry Andric unsigned SGPRCount = 0; 4826bdd1243dSDimitry Andric Register SGPRUsed; 48278bcb0991SDimitry Andric 482881ad6265SDimitry Andric for (int OpIdx : {Src0Idx, Src1Idx}) { 48298bcb0991SDimitry Andric if (OpIdx == -1) 48308bcb0991SDimitry Andric break; 48318bcb0991SDimitry Andric 48328bcb0991SDimitry Andric const MachineOperand &MO = MI.getOperand(OpIdx); 48338bcb0991SDimitry Andric 4834bdd1243dSDimitry Andric if (usesConstantBus(MRI, MO, MI.getDesc().operands()[OpIdx])) { 48358bcb0991SDimitry Andric if (MO.isReg() && MO.getReg() != AMDGPU::M0) { 48368bcb0991SDimitry Andric if (MO.getReg() != SGPRUsed) 48378bcb0991SDimitry Andric ++SGPRCount; 48388bcb0991SDimitry Andric SGPRUsed = MO.getReg(); 48398bcb0991SDimitry Andric } 48408bcb0991SDimitry Andric } 48418bcb0991SDimitry Andric if (SGPRCount > ST.getConstantBusLimit(Opcode)) { 48428bcb0991SDimitry Andric ErrInfo = "WRITELANE instruction violates constant bus restriction"; 48438bcb0991SDimitry Andric return false; 48448bcb0991SDimitry Andric } 48458bcb0991SDimitry Andric } 48468bcb0991SDimitry Andric } 48478bcb0991SDimitry Andric 48480b57cec5SDimitry Andric // Verify misc. restrictions on specific instructions. 4849e8d8bef9SDimitry Andric if (Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32_e64 || 4850e8d8bef9SDimitry Andric Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64_e64) { 48510b57cec5SDimitry Andric const MachineOperand &Src0 = MI.getOperand(Src0Idx); 48520b57cec5SDimitry Andric const MachineOperand &Src1 = MI.getOperand(Src1Idx); 48530b57cec5SDimitry Andric const MachineOperand &Src2 = MI.getOperand(Src2Idx); 48540b57cec5SDimitry Andric if (Src0.isReg() && Src1.isReg() && Src2.isReg()) { 48550b57cec5SDimitry Andric if (!compareMachineOp(Src0, Src1) && 48560b57cec5SDimitry Andric !compareMachineOp(Src0, Src2)) { 48570b57cec5SDimitry Andric ErrInfo = "v_div_scale_{f32|f64} require src0 = src1 or src2"; 48580b57cec5SDimitry Andric return false; 48590b57cec5SDimitry Andric } 48600b57cec5SDimitry Andric } 4861e8d8bef9SDimitry Andric if ((getNamedOperand(MI, AMDGPU::OpName::src0_modifiers)->getImm() & 4862e8d8bef9SDimitry Andric SISrcMods::ABS) || 4863e8d8bef9SDimitry Andric (getNamedOperand(MI, AMDGPU::OpName::src1_modifiers)->getImm() & 4864e8d8bef9SDimitry Andric SISrcMods::ABS) || 4865e8d8bef9SDimitry Andric (getNamedOperand(MI, AMDGPU::OpName::src2_modifiers)->getImm() & 4866e8d8bef9SDimitry Andric SISrcMods::ABS)) { 4867e8d8bef9SDimitry Andric ErrInfo = "ABS not allowed in VOP3B instructions"; 4868e8d8bef9SDimitry Andric return false; 4869e8d8bef9SDimitry Andric } 48700b57cec5SDimitry Andric } 48710b57cec5SDimitry Andric 48720b57cec5SDimitry Andric if (isSOP2(MI) || isSOPC(MI)) { 48730b57cec5SDimitry Andric const MachineOperand &Src0 = MI.getOperand(Src0Idx); 48740b57cec5SDimitry Andric const MachineOperand &Src1 = MI.getOperand(Src1Idx); 48750b57cec5SDimitry Andric 487681ad6265SDimitry Andric if (!Src0.isReg() && !Src1.isReg() && 4877bdd1243dSDimitry Andric !isInlineConstant(Src0, Desc.operands()[Src0Idx]) && 4878bdd1243dSDimitry Andric !isInlineConstant(Src1, Desc.operands()[Src1Idx]) && 487981ad6265SDimitry Andric !Src0.isIdenticalTo(Src1)) { 48800b57cec5SDimitry Andric ErrInfo = "SOP2/SOPC instruction requires too many immediate constants"; 48810b57cec5SDimitry Andric return false; 48820b57cec5SDimitry Andric } 48830b57cec5SDimitry Andric } 48840b57cec5SDimitry Andric 48850b57cec5SDimitry Andric if (isSOPK(MI)) { 48860b57cec5SDimitry Andric auto Op = getNamedOperand(MI, AMDGPU::OpName::simm16); 48870b57cec5SDimitry Andric if (Desc.isBranch()) { 48880b57cec5SDimitry Andric if (!Op->isMBB()) { 48890b57cec5SDimitry Andric ErrInfo = "invalid branch target for SOPK instruction"; 48900b57cec5SDimitry Andric return false; 48910b57cec5SDimitry Andric } 48920b57cec5SDimitry Andric } else { 48930b57cec5SDimitry Andric uint64_t Imm = Op->getImm(); 48940b57cec5SDimitry Andric if (sopkIsZext(MI)) { 48950b57cec5SDimitry Andric if (!isUInt<16>(Imm)) { 48960b57cec5SDimitry Andric ErrInfo = "invalid immediate for SOPK instruction"; 48970b57cec5SDimitry Andric return false; 48980b57cec5SDimitry Andric } 48990b57cec5SDimitry Andric } else { 49000b57cec5SDimitry Andric if (!isInt<16>(Imm)) { 49010b57cec5SDimitry Andric ErrInfo = "invalid immediate for SOPK instruction"; 49020b57cec5SDimitry Andric return false; 49030b57cec5SDimitry Andric } 49040b57cec5SDimitry Andric } 49050b57cec5SDimitry Andric } 49060b57cec5SDimitry Andric } 49070b57cec5SDimitry Andric 49080b57cec5SDimitry Andric if (Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e32 || 49090b57cec5SDimitry Andric Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e64 || 49100b57cec5SDimitry Andric Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 || 49110b57cec5SDimitry Andric Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64) { 49120b57cec5SDimitry Andric const bool IsDst = Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 || 49130b57cec5SDimitry Andric Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64; 49140b57cec5SDimitry Andric 4915bdd1243dSDimitry Andric const unsigned StaticNumOps = 4916bdd1243dSDimitry Andric Desc.getNumOperands() + Desc.implicit_uses().size(); 49170b57cec5SDimitry Andric const unsigned NumImplicitOps = IsDst ? 2 : 1; 49180b57cec5SDimitry Andric 49190b57cec5SDimitry Andric // Allow additional implicit operands. This allows a fixup done by the post 49200b57cec5SDimitry Andric // RA scheduler where the main implicit operand is killed and implicit-defs 49210b57cec5SDimitry Andric // are added for sub-registers that remain live after this instruction. 49220b57cec5SDimitry Andric if (MI.getNumOperands() < StaticNumOps + NumImplicitOps) { 49230b57cec5SDimitry Andric ErrInfo = "missing implicit register operands"; 49240b57cec5SDimitry Andric return false; 49250b57cec5SDimitry Andric } 49260b57cec5SDimitry Andric 49270b57cec5SDimitry Andric const MachineOperand *Dst = getNamedOperand(MI, AMDGPU::OpName::vdst); 49280b57cec5SDimitry Andric if (IsDst) { 49290b57cec5SDimitry Andric if (!Dst->isUse()) { 49300b57cec5SDimitry Andric ErrInfo = "v_movreld_b32 vdst should be a use operand"; 49310b57cec5SDimitry Andric return false; 49320b57cec5SDimitry Andric } 49330b57cec5SDimitry Andric 49340b57cec5SDimitry Andric unsigned UseOpIdx; 49350b57cec5SDimitry Andric if (!MI.isRegTiedToUseOperand(StaticNumOps, &UseOpIdx) || 49360b57cec5SDimitry Andric UseOpIdx != StaticNumOps + 1) { 49370b57cec5SDimitry Andric ErrInfo = "movrel implicit operands should be tied"; 49380b57cec5SDimitry Andric return false; 49390b57cec5SDimitry Andric } 49400b57cec5SDimitry Andric } 49410b57cec5SDimitry Andric 49420b57cec5SDimitry Andric const MachineOperand &Src0 = MI.getOperand(Src0Idx); 49430b57cec5SDimitry Andric const MachineOperand &ImpUse 49440b57cec5SDimitry Andric = MI.getOperand(StaticNumOps + NumImplicitOps - 1); 49450b57cec5SDimitry Andric if (!ImpUse.isReg() || !ImpUse.isUse() || 49460b57cec5SDimitry Andric !isSubRegOf(RI, ImpUse, IsDst ? *Dst : Src0)) { 49470b57cec5SDimitry Andric ErrInfo = "src0 should be subreg of implicit vector use"; 49480b57cec5SDimitry Andric return false; 49490b57cec5SDimitry Andric } 49500b57cec5SDimitry Andric } 49510b57cec5SDimitry Andric 49520b57cec5SDimitry Andric // Make sure we aren't losing exec uses in the td files. This mostly requires 49530b57cec5SDimitry Andric // being careful when using let Uses to try to add other use registers. 49540b57cec5SDimitry Andric if (shouldReadExec(MI)) { 49550b57cec5SDimitry Andric if (!MI.hasRegisterImplicitUseOperand(AMDGPU::EXEC)) { 49560b57cec5SDimitry Andric ErrInfo = "VALU instruction does not implicitly read exec mask"; 49570b57cec5SDimitry Andric return false; 49580b57cec5SDimitry Andric } 49590b57cec5SDimitry Andric } 49600b57cec5SDimitry Andric 49610b57cec5SDimitry Andric if (isSMRD(MI)) { 496281ad6265SDimitry Andric if (MI.mayStore() && 496381ad6265SDimitry Andric ST.getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS) { 49640b57cec5SDimitry Andric // The register offset form of scalar stores may only use m0 as the 49650b57cec5SDimitry Andric // soffset register. 496681ad6265SDimitry Andric const MachineOperand *Soff = getNamedOperand(MI, AMDGPU::OpName::soffset); 49670b57cec5SDimitry Andric if (Soff && Soff->getReg() != AMDGPU::M0) { 49680b57cec5SDimitry Andric ErrInfo = "scalar stores must use m0 as offset register"; 49690b57cec5SDimitry Andric return false; 49700b57cec5SDimitry Andric } 49710b57cec5SDimitry Andric } 49720b57cec5SDimitry Andric } 49730b57cec5SDimitry Andric 4974e8d8bef9SDimitry Andric if (isFLAT(MI) && !ST.hasFlatInstOffsets()) { 49750b57cec5SDimitry Andric const MachineOperand *Offset = getNamedOperand(MI, AMDGPU::OpName::offset); 49760b57cec5SDimitry Andric if (Offset->getImm() != 0) { 49770b57cec5SDimitry Andric ErrInfo = "subtarget does not support offsets in flat instructions"; 49780b57cec5SDimitry Andric return false; 49790b57cec5SDimitry Andric } 49800b57cec5SDimitry Andric } 49810b57cec5SDimitry Andric 4982cb14a3feSDimitry Andric if (isDS(MI) && !ST.hasGDS()) { 4983cb14a3feSDimitry Andric const MachineOperand *GDSOp = getNamedOperand(MI, AMDGPU::OpName::gds); 4984cb14a3feSDimitry Andric if (GDSOp && GDSOp->getImm() != 0) { 4985cb14a3feSDimitry Andric ErrInfo = "GDS is not supported on this subtarget"; 4986cb14a3feSDimitry Andric return false; 4987cb14a3feSDimitry Andric } 4988cb14a3feSDimitry Andric } 4989cb14a3feSDimitry Andric 49905f757f3fSDimitry Andric if (isImage(MI)) { 49910b57cec5SDimitry Andric const MachineOperand *DimOp = getNamedOperand(MI, AMDGPU::OpName::dim); 49920b57cec5SDimitry Andric if (DimOp) { 49930b57cec5SDimitry Andric int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opcode, 49940b57cec5SDimitry Andric AMDGPU::OpName::vaddr0); 49955f757f3fSDimitry Andric int RSrcOpName = 49965f757f3fSDimitry Andric isMIMG(MI) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc; 49975f757f3fSDimitry Andric int RsrcIdx = AMDGPU::getNamedOperandIdx(Opcode, RSrcOpName); 49980b57cec5SDimitry Andric const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opcode); 49990b57cec5SDimitry Andric const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 50000b57cec5SDimitry Andric AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 50010b57cec5SDimitry Andric const AMDGPU::MIMGDimInfo *Dim = 50020b57cec5SDimitry Andric AMDGPU::getMIMGDimInfoByEncoding(DimOp->getImm()); 50030b57cec5SDimitry Andric 50040b57cec5SDimitry Andric if (!Dim) { 50050b57cec5SDimitry Andric ErrInfo = "dim is out of range"; 50060b57cec5SDimitry Andric return false; 50070b57cec5SDimitry Andric } 50080b57cec5SDimitry Andric 50095ffd83dbSDimitry Andric bool IsA16 = false; 50105ffd83dbSDimitry Andric if (ST.hasR128A16()) { 50115ffd83dbSDimitry Andric const MachineOperand *R128A16 = getNamedOperand(MI, AMDGPU::OpName::r128); 50125ffd83dbSDimitry Andric IsA16 = R128A16->getImm() != 0; 5013bdd1243dSDimitry Andric } else if (ST.hasA16()) { 50145ffd83dbSDimitry Andric const MachineOperand *A16 = getNamedOperand(MI, AMDGPU::OpName::a16); 50155ffd83dbSDimitry Andric IsA16 = A16->getImm() != 0; 50165ffd83dbSDimitry Andric } 50175ffd83dbSDimitry Andric 50185f757f3fSDimitry Andric bool IsNSA = RsrcIdx - VAddr0Idx > 1; 50195ffd83dbSDimitry Andric 5020fe6060f1SDimitry Andric unsigned AddrWords = 5021fe6060f1SDimitry Andric AMDGPU::getAddrSizeMIMGOp(BaseOpcode, Dim, IsA16, ST.hasG16()); 50220b57cec5SDimitry Andric 50230b57cec5SDimitry Andric unsigned VAddrWords; 50240b57cec5SDimitry Andric if (IsNSA) { 50255f757f3fSDimitry Andric VAddrWords = RsrcIdx - VAddr0Idx; 50265f757f3fSDimitry Andric if (ST.hasPartialNSAEncoding() && 50275f757f3fSDimitry Andric AddrWords > ST.getNSAMaxSize(isVSAMPLE(MI))) { 50285f757f3fSDimitry Andric unsigned LastVAddrIdx = RsrcIdx - 1; 502906c3fb27SDimitry Andric VAddrWords += getOpSize(MI, LastVAddrIdx) / 4 - 1; 503006c3fb27SDimitry Andric } 50310b57cec5SDimitry Andric } else { 503206c3fb27SDimitry Andric VAddrWords = getOpSize(MI, VAddr0Idx) / 4; 5033bdd1243dSDimitry Andric if (AddrWords > 12) 50340b57cec5SDimitry Andric AddrWords = 16; 50350b57cec5SDimitry Andric } 50360b57cec5SDimitry Andric 50370b57cec5SDimitry Andric if (VAddrWords != AddrWords) { 50385ffd83dbSDimitry Andric LLVM_DEBUG(dbgs() << "bad vaddr size, expected " << AddrWords 50395ffd83dbSDimitry Andric << " but got " << VAddrWords << "\n"); 50400b57cec5SDimitry Andric ErrInfo = "bad vaddr size"; 50410b57cec5SDimitry Andric return false; 50420b57cec5SDimitry Andric } 50430b57cec5SDimitry Andric } 50440b57cec5SDimitry Andric } 50450b57cec5SDimitry Andric 50460b57cec5SDimitry Andric const MachineOperand *DppCt = getNamedOperand(MI, AMDGPU::OpName::dpp_ctrl); 50470b57cec5SDimitry Andric if (DppCt) { 50480b57cec5SDimitry Andric using namespace AMDGPU::DPP; 50490b57cec5SDimitry Andric 50500b57cec5SDimitry Andric unsigned DC = DppCt->getImm(); 50510b57cec5SDimitry Andric if (DC == DppCtrl::DPP_UNUSED1 || DC == DppCtrl::DPP_UNUSED2 || 50520b57cec5SDimitry Andric DC == DppCtrl::DPP_UNUSED3 || DC > DppCtrl::DPP_LAST || 50530b57cec5SDimitry Andric (DC >= DppCtrl::DPP_UNUSED4_FIRST && DC <= DppCtrl::DPP_UNUSED4_LAST) || 50540b57cec5SDimitry Andric (DC >= DppCtrl::DPP_UNUSED5_FIRST && DC <= DppCtrl::DPP_UNUSED5_LAST) || 50550b57cec5SDimitry Andric (DC >= DppCtrl::DPP_UNUSED6_FIRST && DC <= DppCtrl::DPP_UNUSED6_LAST) || 50560b57cec5SDimitry Andric (DC >= DppCtrl::DPP_UNUSED7_FIRST && DC <= DppCtrl::DPP_UNUSED7_LAST) || 50570b57cec5SDimitry Andric (DC >= DppCtrl::DPP_UNUSED8_FIRST && DC <= DppCtrl::DPP_UNUSED8_LAST)) { 50580b57cec5SDimitry Andric ErrInfo = "Invalid dpp_ctrl value"; 50590b57cec5SDimitry Andric return false; 50600b57cec5SDimitry Andric } 50610b57cec5SDimitry Andric if (DC >= DppCtrl::WAVE_SHL1 && DC <= DppCtrl::WAVE_ROR1 && 50620b57cec5SDimitry Andric ST.getGeneration() >= AMDGPUSubtarget::GFX10) { 50630b57cec5SDimitry Andric ErrInfo = "Invalid dpp_ctrl value: " 50640b57cec5SDimitry Andric "wavefront shifts are not supported on GFX10+"; 50650b57cec5SDimitry Andric return false; 50660b57cec5SDimitry Andric } 50670b57cec5SDimitry Andric if (DC >= DppCtrl::BCAST15 && DC <= DppCtrl::BCAST31 && 50680b57cec5SDimitry Andric ST.getGeneration() >= AMDGPUSubtarget::GFX10) { 50690b57cec5SDimitry Andric ErrInfo = "Invalid dpp_ctrl value: " 50708bcb0991SDimitry Andric "broadcasts are not supported on GFX10+"; 50710b57cec5SDimitry Andric return false; 50720b57cec5SDimitry Andric } 50730b57cec5SDimitry Andric if (DC >= DppCtrl::ROW_SHARE_FIRST && DC <= DppCtrl::ROW_XMASK_LAST && 50740b57cec5SDimitry Andric ST.getGeneration() < AMDGPUSubtarget::GFX10) { 5075fe6060f1SDimitry Andric if (DC >= DppCtrl::ROW_NEWBCAST_FIRST && 5076fe6060f1SDimitry Andric DC <= DppCtrl::ROW_NEWBCAST_LAST && 5077fe6060f1SDimitry Andric !ST.hasGFX90AInsts()) { 5078fe6060f1SDimitry Andric ErrInfo = "Invalid dpp_ctrl value: " 5079fe6060f1SDimitry Andric "row_newbroadcast/row_share is not supported before " 5080fe6060f1SDimitry Andric "GFX90A/GFX10"; 5081fe6060f1SDimitry Andric return false; 5082fe6060f1SDimitry Andric } else if (DC > DppCtrl::ROW_NEWBCAST_LAST || !ST.hasGFX90AInsts()) { 50830b57cec5SDimitry Andric ErrInfo = "Invalid dpp_ctrl value: " 50840b57cec5SDimitry Andric "row_share and row_xmask are not supported before GFX10"; 50850b57cec5SDimitry Andric return false; 50860b57cec5SDimitry Andric } 50870b57cec5SDimitry Andric } 50880b57cec5SDimitry Andric 5089fe6060f1SDimitry Andric if (Opcode != AMDGPU::V_MOV_B64_DPP_PSEUDO && 50905f757f3fSDimitry Andric !AMDGPU::isLegalDPALU_DPPControl(DC) && AMDGPU::isDPALU_DPP(Desc)) { 5091fe6060f1SDimitry Andric ErrInfo = "Invalid dpp_ctrl value: " 50925f757f3fSDimitry Andric "DP ALU dpp only support row_newbcast"; 5093fe6060f1SDimitry Andric return false; 5094fe6060f1SDimitry Andric } 5095fe6060f1SDimitry Andric } 5096fe6060f1SDimitry Andric 5097fe6060f1SDimitry Andric if ((MI.mayStore() || MI.mayLoad()) && !isVGPRSpill(MI)) { 5098fe6060f1SDimitry Andric const MachineOperand *Dst = getNamedOperand(MI, AMDGPU::OpName::vdst); 5099fe6060f1SDimitry Andric uint16_t DataNameIdx = isDS(Opcode) ? AMDGPU::OpName::data0 5100fe6060f1SDimitry Andric : AMDGPU::OpName::vdata; 5101fe6060f1SDimitry Andric const MachineOperand *Data = getNamedOperand(MI, DataNameIdx); 5102fe6060f1SDimitry Andric const MachineOperand *Data2 = getNamedOperand(MI, AMDGPU::OpName::data1); 5103fe6060f1SDimitry Andric if (Data && !Data->isReg()) 5104fe6060f1SDimitry Andric Data = nullptr; 5105fe6060f1SDimitry Andric 5106fe6060f1SDimitry Andric if (ST.hasGFX90AInsts()) { 5107fe6060f1SDimitry Andric if (Dst && Data && 5108fe6060f1SDimitry Andric (RI.isAGPR(MRI, Dst->getReg()) != RI.isAGPR(MRI, Data->getReg()))) { 5109fe6060f1SDimitry Andric ErrInfo = "Invalid register class: " 5110fe6060f1SDimitry Andric "vdata and vdst should be both VGPR or AGPR"; 5111fe6060f1SDimitry Andric return false; 5112fe6060f1SDimitry Andric } 5113fe6060f1SDimitry Andric if (Data && Data2 && 5114fe6060f1SDimitry Andric (RI.isAGPR(MRI, Data->getReg()) != RI.isAGPR(MRI, Data2->getReg()))) { 5115fe6060f1SDimitry Andric ErrInfo = "Invalid register class: " 5116fe6060f1SDimitry Andric "both data operands should be VGPR or AGPR"; 5117fe6060f1SDimitry Andric return false; 5118fe6060f1SDimitry Andric } 5119fe6060f1SDimitry Andric } else { 5120fe6060f1SDimitry Andric if ((Dst && RI.isAGPR(MRI, Dst->getReg())) || 5121fe6060f1SDimitry Andric (Data && RI.isAGPR(MRI, Data->getReg())) || 5122fe6060f1SDimitry Andric (Data2 && RI.isAGPR(MRI, Data2->getReg()))) { 5123fe6060f1SDimitry Andric ErrInfo = "Invalid register class: " 5124fe6060f1SDimitry Andric "agpr loads and stores not supported on this GPU"; 5125fe6060f1SDimitry Andric return false; 5126fe6060f1SDimitry Andric } 5127fe6060f1SDimitry Andric } 5128fe6060f1SDimitry Andric } 5129fe6060f1SDimitry Andric 513081ad6265SDimitry Andric if (ST.needsAlignedVGPRs()) { 513181ad6265SDimitry Andric const auto isAlignedReg = [&MI, &MRI, this](unsigned OpName) -> bool { 513281ad6265SDimitry Andric const MachineOperand *Op = getNamedOperand(MI, OpName); 513381ad6265SDimitry Andric if (!Op) 513481ad6265SDimitry Andric return true; 5135fe6060f1SDimitry Andric Register Reg = Op->getReg(); 513681ad6265SDimitry Andric if (Reg.isPhysical()) 513781ad6265SDimitry Andric return !(RI.getHWRegIndex(Reg) & 1); 5138fe6060f1SDimitry Andric const TargetRegisterClass &RC = *MRI.getRegClass(Reg); 513981ad6265SDimitry Andric return RI.getRegSizeInBits(RC) > 32 && RI.isProperlyAlignedRC(RC) && 5140fe6060f1SDimitry Andric !(RI.getChannelFromSubReg(Op->getSubReg()) & 1); 514181ad6265SDimitry Andric }; 5142fe6060f1SDimitry Andric 514381ad6265SDimitry Andric if (MI.getOpcode() == AMDGPU::DS_GWS_INIT || 514481ad6265SDimitry Andric MI.getOpcode() == AMDGPU::DS_GWS_SEMA_BR || 514581ad6265SDimitry Andric MI.getOpcode() == AMDGPU::DS_GWS_BARRIER) { 514681ad6265SDimitry Andric 514781ad6265SDimitry Andric if (!isAlignedReg(AMDGPU::OpName::data0)) { 5148fe6060f1SDimitry Andric ErrInfo = "Subtarget requires even aligned vector registers " 5149fe6060f1SDimitry Andric "for DS_GWS instructions"; 5150fe6060f1SDimitry Andric return false; 5151fe6060f1SDimitry Andric } 5152fe6060f1SDimitry Andric } 5153fe6060f1SDimitry Andric 515481ad6265SDimitry Andric if (isMIMG(MI)) { 515581ad6265SDimitry Andric if (!isAlignedReg(AMDGPU::OpName::vaddr)) { 515681ad6265SDimitry Andric ErrInfo = "Subtarget requires even aligned vector registers " 515781ad6265SDimitry Andric "for vaddr operand of image instructions"; 515881ad6265SDimitry Andric return false; 515981ad6265SDimitry Andric } 516081ad6265SDimitry Andric } 516181ad6265SDimitry Andric } 516281ad6265SDimitry Andric 516381ad6265SDimitry Andric if (MI.getOpcode() == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && 516481ad6265SDimitry Andric !ST.hasGFX90AInsts()) { 516581ad6265SDimitry Andric const MachineOperand *Src = getNamedOperand(MI, AMDGPU::OpName::src0); 516681ad6265SDimitry Andric if (Src->isReg() && RI.isSGPRReg(MRI, Src->getReg())) { 516781ad6265SDimitry Andric ErrInfo = "Invalid register class: " 516881ad6265SDimitry Andric "v_accvgpr_write with an SGPR is not supported on this GPU"; 516981ad6265SDimitry Andric return false; 517081ad6265SDimitry Andric } 517181ad6265SDimitry Andric } 517281ad6265SDimitry Andric 517304eeddc0SDimitry Andric if (Desc.getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS) { 517404eeddc0SDimitry Andric const MachineOperand &SrcOp = MI.getOperand(1); 517504eeddc0SDimitry Andric if (!SrcOp.isReg() || SrcOp.getReg().isVirtual()) { 517604eeddc0SDimitry Andric ErrInfo = "pseudo expects only physical SGPRs"; 517704eeddc0SDimitry Andric return false; 517804eeddc0SDimitry Andric } 517904eeddc0SDimitry Andric } 518004eeddc0SDimitry Andric 51810b57cec5SDimitry Andric return true; 51820b57cec5SDimitry Andric } 51830b57cec5SDimitry Andric 51845f757f3fSDimitry Andric // It is more readable to list mapped opcodes on the same line. 51855f757f3fSDimitry Andric // clang-format off 51865f757f3fSDimitry Andric 51870b57cec5SDimitry Andric unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const { 51880b57cec5SDimitry Andric switch (MI.getOpcode()) { 51890b57cec5SDimitry Andric default: return AMDGPU::INSTRUCTION_LIST_END; 51900b57cec5SDimitry Andric case AMDGPU::REG_SEQUENCE: return AMDGPU::REG_SEQUENCE; 51910b57cec5SDimitry Andric case AMDGPU::COPY: return AMDGPU::COPY; 51920b57cec5SDimitry Andric case AMDGPU::PHI: return AMDGPU::PHI; 51930b57cec5SDimitry Andric case AMDGPU::INSERT_SUBREG: return AMDGPU::INSERT_SUBREG; 51940b57cec5SDimitry Andric case AMDGPU::WQM: return AMDGPU::WQM; 51958bcb0991SDimitry Andric case AMDGPU::SOFT_WQM: return AMDGPU::SOFT_WQM; 5196fe6060f1SDimitry Andric case AMDGPU::STRICT_WWM: return AMDGPU::STRICT_WWM; 5197fe6060f1SDimitry Andric case AMDGPU::STRICT_WQM: return AMDGPU::STRICT_WQM; 51980b57cec5SDimitry Andric case AMDGPU::S_MOV_B32: { 51990b57cec5SDimitry Andric const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 52000b57cec5SDimitry Andric return MI.getOperand(1).isReg() || 52010b57cec5SDimitry Andric RI.isAGPR(MRI, MI.getOperand(0).getReg()) ? 52020b57cec5SDimitry Andric AMDGPU::COPY : AMDGPU::V_MOV_B32_e32; 52030b57cec5SDimitry Andric } 52040b57cec5SDimitry Andric case AMDGPU::S_ADD_I32: 5205e8d8bef9SDimitry Andric return ST.hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32; 52060b57cec5SDimitry Andric case AMDGPU::S_ADDC_U32: 52070b57cec5SDimitry Andric return AMDGPU::V_ADDC_U32_e32; 52080b57cec5SDimitry Andric case AMDGPU::S_SUB_I32: 5209e8d8bef9SDimitry Andric return ST.hasAddNoCarry() ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_CO_U32_e32; 52100b57cec5SDimitry Andric // FIXME: These are not consistently handled, and selected when the carry is 52110b57cec5SDimitry Andric // used. 52120b57cec5SDimitry Andric case AMDGPU::S_ADD_U32: 5213e8d8bef9SDimitry Andric return AMDGPU::V_ADD_CO_U32_e32; 52140b57cec5SDimitry Andric case AMDGPU::S_SUB_U32: 5215e8d8bef9SDimitry Andric return AMDGPU::V_SUB_CO_U32_e32; 52160b57cec5SDimitry Andric case AMDGPU::S_SUBB_U32: return AMDGPU::V_SUBB_U32_e32; 5217e8d8bef9SDimitry Andric case AMDGPU::S_MUL_I32: return AMDGPU::V_MUL_LO_U32_e64; 5218e8d8bef9SDimitry Andric case AMDGPU::S_MUL_HI_U32: return AMDGPU::V_MUL_HI_U32_e64; 5219e8d8bef9SDimitry Andric case AMDGPU::S_MUL_HI_I32: return AMDGPU::V_MUL_HI_I32_e64; 52200b57cec5SDimitry Andric case AMDGPU::S_AND_B32: return AMDGPU::V_AND_B32_e64; 52210b57cec5SDimitry Andric case AMDGPU::S_OR_B32: return AMDGPU::V_OR_B32_e64; 52220b57cec5SDimitry Andric case AMDGPU::S_XOR_B32: return AMDGPU::V_XOR_B32_e64; 52230b57cec5SDimitry Andric case AMDGPU::S_XNOR_B32: 52240b57cec5SDimitry Andric return ST.hasDLInsts() ? AMDGPU::V_XNOR_B32_e64 : AMDGPU::INSTRUCTION_LIST_END; 52250b57cec5SDimitry Andric case AMDGPU::S_MIN_I32: return AMDGPU::V_MIN_I32_e64; 52260b57cec5SDimitry Andric case AMDGPU::S_MIN_U32: return AMDGPU::V_MIN_U32_e64; 52270b57cec5SDimitry Andric case AMDGPU::S_MAX_I32: return AMDGPU::V_MAX_I32_e64; 52280b57cec5SDimitry Andric case AMDGPU::S_MAX_U32: return AMDGPU::V_MAX_U32_e64; 52290b57cec5SDimitry Andric case AMDGPU::S_ASHR_I32: return AMDGPU::V_ASHR_I32_e32; 5230e8d8bef9SDimitry Andric case AMDGPU::S_ASHR_I64: return AMDGPU::V_ASHR_I64_e64; 52310b57cec5SDimitry Andric case AMDGPU::S_LSHL_B32: return AMDGPU::V_LSHL_B32_e32; 5232e8d8bef9SDimitry Andric case AMDGPU::S_LSHL_B64: return AMDGPU::V_LSHL_B64_e64; 52330b57cec5SDimitry Andric case AMDGPU::S_LSHR_B32: return AMDGPU::V_LSHR_B32_e32; 5234e8d8bef9SDimitry Andric case AMDGPU::S_LSHR_B64: return AMDGPU::V_LSHR_B64_e64; 5235e8d8bef9SDimitry Andric case AMDGPU::S_SEXT_I32_I8: return AMDGPU::V_BFE_I32_e64; 5236e8d8bef9SDimitry Andric case AMDGPU::S_SEXT_I32_I16: return AMDGPU::V_BFE_I32_e64; 5237e8d8bef9SDimitry Andric case AMDGPU::S_BFE_U32: return AMDGPU::V_BFE_U32_e64; 5238e8d8bef9SDimitry Andric case AMDGPU::S_BFE_I32: return AMDGPU::V_BFE_I32_e64; 52390b57cec5SDimitry Andric case AMDGPU::S_BFM_B32: return AMDGPU::V_BFM_B32_e64; 52400b57cec5SDimitry Andric case AMDGPU::S_BREV_B32: return AMDGPU::V_BFREV_B32_e32; 52410b57cec5SDimitry Andric case AMDGPU::S_NOT_B32: return AMDGPU::V_NOT_B32_e32; 52420b57cec5SDimitry Andric case AMDGPU::S_NOT_B64: return AMDGPU::V_NOT_B32_e32; 5243349cc55cSDimitry Andric case AMDGPU::S_CMP_EQ_I32: return AMDGPU::V_CMP_EQ_I32_e64; 5244349cc55cSDimitry Andric case AMDGPU::S_CMP_LG_I32: return AMDGPU::V_CMP_NE_I32_e64; 5245349cc55cSDimitry Andric case AMDGPU::S_CMP_GT_I32: return AMDGPU::V_CMP_GT_I32_e64; 5246349cc55cSDimitry Andric case AMDGPU::S_CMP_GE_I32: return AMDGPU::V_CMP_GE_I32_e64; 5247349cc55cSDimitry Andric case AMDGPU::S_CMP_LT_I32: return AMDGPU::V_CMP_LT_I32_e64; 5248349cc55cSDimitry Andric case AMDGPU::S_CMP_LE_I32: return AMDGPU::V_CMP_LE_I32_e64; 5249349cc55cSDimitry Andric case AMDGPU::S_CMP_EQ_U32: return AMDGPU::V_CMP_EQ_U32_e64; 5250349cc55cSDimitry Andric case AMDGPU::S_CMP_LG_U32: return AMDGPU::V_CMP_NE_U32_e64; 5251349cc55cSDimitry Andric case AMDGPU::S_CMP_GT_U32: return AMDGPU::V_CMP_GT_U32_e64; 5252349cc55cSDimitry Andric case AMDGPU::S_CMP_GE_U32: return AMDGPU::V_CMP_GE_U32_e64; 5253349cc55cSDimitry Andric case AMDGPU::S_CMP_LT_U32: return AMDGPU::V_CMP_LT_U32_e64; 5254349cc55cSDimitry Andric case AMDGPU::S_CMP_LE_U32: return AMDGPU::V_CMP_LE_U32_e64; 5255349cc55cSDimitry Andric case AMDGPU::S_CMP_EQ_U64: return AMDGPU::V_CMP_EQ_U64_e64; 5256349cc55cSDimitry Andric case AMDGPU::S_CMP_LG_U64: return AMDGPU::V_CMP_NE_U64_e64; 52570b57cec5SDimitry Andric case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e64; 52580b57cec5SDimitry Andric case AMDGPU::S_FF1_I32_B32: return AMDGPU::V_FFBL_B32_e32; 52590b57cec5SDimitry Andric case AMDGPU::S_FLBIT_I32_B32: return AMDGPU::V_FFBH_U32_e32; 52600b57cec5SDimitry Andric case AMDGPU::S_FLBIT_I32: return AMDGPU::V_FFBH_I32_e64; 52610b57cec5SDimitry Andric case AMDGPU::S_CBRANCH_SCC0: return AMDGPU::S_CBRANCH_VCCZ; 52620b57cec5SDimitry Andric case AMDGPU::S_CBRANCH_SCC1: return AMDGPU::S_CBRANCH_VCCNZ; 52635f757f3fSDimitry Andric case AMDGPU::S_CVT_F32_I32: return AMDGPU::V_CVT_F32_I32_e64; 52645f757f3fSDimitry Andric case AMDGPU::S_CVT_F32_U32: return AMDGPU::V_CVT_F32_U32_e64; 52655f757f3fSDimitry Andric case AMDGPU::S_CVT_I32_F32: return AMDGPU::V_CVT_I32_F32_e64; 52665f757f3fSDimitry Andric case AMDGPU::S_CVT_U32_F32: return AMDGPU::V_CVT_U32_F32_e64; 52675f757f3fSDimitry Andric case AMDGPU::S_CVT_F32_F16: return AMDGPU::V_CVT_F32_F16_t16_e64; 52685f757f3fSDimitry Andric case AMDGPU::S_CVT_HI_F32_F16: return AMDGPU::V_CVT_F32_F16_t16_e64; 52695f757f3fSDimitry Andric case AMDGPU::S_CVT_F16_F32: return AMDGPU::V_CVT_F16_F32_t16_e64; 52705f757f3fSDimitry Andric case AMDGPU::S_CEIL_F32: return AMDGPU::V_CEIL_F32_e64; 52715f757f3fSDimitry Andric case AMDGPU::S_FLOOR_F32: return AMDGPU::V_FLOOR_F32_e64; 52725f757f3fSDimitry Andric case AMDGPU::S_TRUNC_F32: return AMDGPU::V_TRUNC_F32_e64; 52735f757f3fSDimitry Andric case AMDGPU::S_RNDNE_F32: return AMDGPU::V_RNDNE_F32_e64; 52745f757f3fSDimitry Andric case AMDGPU::S_CEIL_F16: return AMDGPU::V_CEIL_F16_t16_e64; 52755f757f3fSDimitry Andric case AMDGPU::S_FLOOR_F16: return AMDGPU::V_FLOOR_F16_t16_e64; 52765f757f3fSDimitry Andric case AMDGPU::S_TRUNC_F16: return AMDGPU::V_TRUNC_F16_t16_e64; 52775f757f3fSDimitry Andric case AMDGPU::S_RNDNE_F16: return AMDGPU::V_RNDNE_F16_t16_e64; 52785f757f3fSDimitry Andric case AMDGPU::S_ADD_F32: return AMDGPU::V_ADD_F32_e64; 52795f757f3fSDimitry Andric case AMDGPU::S_SUB_F32: return AMDGPU::V_SUB_F32_e64; 52805f757f3fSDimitry Andric case AMDGPU::S_MIN_F32: return AMDGPU::V_MIN_F32_e64; 52815f757f3fSDimitry Andric case AMDGPU::S_MAX_F32: return AMDGPU::V_MAX_F32_e64; 52825f757f3fSDimitry Andric case AMDGPU::S_MINIMUM_F32: return AMDGPU::V_MINIMUM_F32_e64; 52835f757f3fSDimitry Andric case AMDGPU::S_MAXIMUM_F32: return AMDGPU::V_MAXIMUM_F32_e64; 52845f757f3fSDimitry Andric case AMDGPU::S_MUL_F32: return AMDGPU::V_MUL_F32_e64; 52855f757f3fSDimitry Andric case AMDGPU::S_ADD_F16: return AMDGPU::V_ADD_F16_fake16_e64; 52865f757f3fSDimitry Andric case AMDGPU::S_SUB_F16: return AMDGPU::V_SUB_F16_fake16_e64; 52875f757f3fSDimitry Andric case AMDGPU::S_MIN_F16: return AMDGPU::V_MIN_F16_fake16_e64; 52885f757f3fSDimitry Andric case AMDGPU::S_MAX_F16: return AMDGPU::V_MAX_F16_fake16_e64; 52895f757f3fSDimitry Andric case AMDGPU::S_MINIMUM_F16: return AMDGPU::V_MINIMUM_F16_e64; 52905f757f3fSDimitry Andric case AMDGPU::S_MAXIMUM_F16: return AMDGPU::V_MAXIMUM_F16_e64; 52915f757f3fSDimitry Andric case AMDGPU::S_MUL_F16: return AMDGPU::V_MUL_F16_fake16_e64; 52925f757f3fSDimitry Andric case AMDGPU::S_CVT_PK_RTZ_F16_F32: return AMDGPU::V_CVT_PKRTZ_F16_F32_e64; 52935f757f3fSDimitry Andric case AMDGPU::S_FMAC_F32: return AMDGPU::V_FMAC_F32_e64; 52945f757f3fSDimitry Andric case AMDGPU::S_FMAC_F16: return AMDGPU::V_FMAC_F16_t16_e64; 52955f757f3fSDimitry Andric case AMDGPU::S_FMAMK_F32: return AMDGPU::V_FMAMK_F32; 52965f757f3fSDimitry Andric case AMDGPU::S_FMAAK_F32: return AMDGPU::V_FMAAK_F32; 52975f757f3fSDimitry Andric case AMDGPU::S_CMP_LT_F32: return AMDGPU::V_CMP_LT_F32_e64; 52985f757f3fSDimitry Andric case AMDGPU::S_CMP_EQ_F32: return AMDGPU::V_CMP_EQ_F32_e64; 52995f757f3fSDimitry Andric case AMDGPU::S_CMP_LE_F32: return AMDGPU::V_CMP_LE_F32_e64; 53005f757f3fSDimitry Andric case AMDGPU::S_CMP_GT_F32: return AMDGPU::V_CMP_GT_F32_e64; 53015f757f3fSDimitry Andric case AMDGPU::S_CMP_LG_F32: return AMDGPU::V_CMP_LG_F32_e64; 53025f757f3fSDimitry Andric case AMDGPU::S_CMP_GE_F32: return AMDGPU::V_CMP_GE_F32_e64; 53035f757f3fSDimitry Andric case AMDGPU::S_CMP_O_F32: return AMDGPU::V_CMP_O_F32_e64; 53045f757f3fSDimitry Andric case AMDGPU::S_CMP_U_F32: return AMDGPU::V_CMP_U_F32_e64; 53055f757f3fSDimitry Andric case AMDGPU::S_CMP_NGE_F32: return AMDGPU::V_CMP_NGE_F32_e64; 53065f757f3fSDimitry Andric case AMDGPU::S_CMP_NLG_F32: return AMDGPU::V_CMP_NLG_F32_e64; 53075f757f3fSDimitry Andric case AMDGPU::S_CMP_NGT_F32: return AMDGPU::V_CMP_NGT_F32_e64; 53085f757f3fSDimitry Andric case AMDGPU::S_CMP_NLE_F32: return AMDGPU::V_CMP_NLE_F32_e64; 53095f757f3fSDimitry Andric case AMDGPU::S_CMP_NEQ_F32: return AMDGPU::V_CMP_NEQ_F32_e64; 53105f757f3fSDimitry Andric case AMDGPU::S_CMP_NLT_F32: return AMDGPU::V_CMP_NLT_F32_e64; 53115f757f3fSDimitry Andric case AMDGPU::S_CMP_LT_F16: return AMDGPU::V_CMP_LT_F16_t16_e64; 53125f757f3fSDimitry Andric case AMDGPU::S_CMP_EQ_F16: return AMDGPU::V_CMP_EQ_F16_t16_e64; 53135f757f3fSDimitry Andric case AMDGPU::S_CMP_LE_F16: return AMDGPU::V_CMP_LE_F16_t16_e64; 53145f757f3fSDimitry Andric case AMDGPU::S_CMP_GT_F16: return AMDGPU::V_CMP_GT_F16_t16_e64; 53155f757f3fSDimitry Andric case AMDGPU::S_CMP_LG_F16: return AMDGPU::V_CMP_LG_F16_t16_e64; 53165f757f3fSDimitry Andric case AMDGPU::S_CMP_GE_F16: return AMDGPU::V_CMP_GE_F16_t16_e64; 53175f757f3fSDimitry Andric case AMDGPU::S_CMP_O_F16: return AMDGPU::V_CMP_O_F16_t16_e64; 53185f757f3fSDimitry Andric case AMDGPU::S_CMP_U_F16: return AMDGPU::V_CMP_U_F16_t16_e64; 53195f757f3fSDimitry Andric case AMDGPU::S_CMP_NGE_F16: return AMDGPU::V_CMP_NGE_F16_t16_e64; 53205f757f3fSDimitry Andric case AMDGPU::S_CMP_NLG_F16: return AMDGPU::V_CMP_NLG_F16_t16_e64; 53215f757f3fSDimitry Andric case AMDGPU::S_CMP_NGT_F16: return AMDGPU::V_CMP_NGT_F16_t16_e64; 53225f757f3fSDimitry Andric case AMDGPU::S_CMP_NLE_F16: return AMDGPU::V_CMP_NLE_F16_t16_e64; 53235f757f3fSDimitry Andric case AMDGPU::S_CMP_NEQ_F16: return AMDGPU::V_CMP_NEQ_F16_t16_e64; 53245f757f3fSDimitry Andric case AMDGPU::S_CMP_NLT_F16: return AMDGPU::V_CMP_NLT_F16_t16_e64; 53255f757f3fSDimitry Andric case AMDGPU::V_S_EXP_F32_e64: return AMDGPU::V_EXP_F32_e64; 53265f757f3fSDimitry Andric case AMDGPU::V_S_EXP_F16_e64: return AMDGPU::V_EXP_F16_t16_e64; 53275f757f3fSDimitry Andric case AMDGPU::V_S_LOG_F32_e64: return AMDGPU::V_LOG_F32_e64; 53285f757f3fSDimitry Andric case AMDGPU::V_S_LOG_F16_e64: return AMDGPU::V_LOG_F16_t16_e64; 53295f757f3fSDimitry Andric case AMDGPU::V_S_RCP_F32_e64: return AMDGPU::V_RCP_F32_e64; 53305f757f3fSDimitry Andric case AMDGPU::V_S_RCP_F16_e64: return AMDGPU::V_RCP_F16_t16_e64; 53315f757f3fSDimitry Andric case AMDGPU::V_S_RSQ_F32_e64: return AMDGPU::V_RSQ_F32_e64; 53325f757f3fSDimitry Andric case AMDGPU::V_S_RSQ_F16_e64: return AMDGPU::V_RSQ_F16_t16_e64; 53335f757f3fSDimitry Andric case AMDGPU::V_S_SQRT_F32_e64: return AMDGPU::V_SQRT_F32_e64; 53345f757f3fSDimitry Andric case AMDGPU::V_S_SQRT_F16_e64: return AMDGPU::V_SQRT_F16_t16_e64; 53350b57cec5SDimitry Andric } 53360b57cec5SDimitry Andric llvm_unreachable( 53370b57cec5SDimitry Andric "Unexpected scalar opcode without corresponding vector one!"); 53380b57cec5SDimitry Andric } 53390b57cec5SDimitry Andric 53405f757f3fSDimitry Andric // clang-format on 53415f757f3fSDimitry Andric 534206c3fb27SDimitry Andric void SIInstrInfo::insertScratchExecCopy(MachineFunction &MF, 534306c3fb27SDimitry Andric MachineBasicBlock &MBB, 534406c3fb27SDimitry Andric MachineBasicBlock::iterator MBBI, 534506c3fb27SDimitry Andric const DebugLoc &DL, Register Reg, 53465f757f3fSDimitry Andric bool IsSCCLive, 53475f757f3fSDimitry Andric SlotIndexes *Indexes) const { 534806c3fb27SDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 534906c3fb27SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 535006c3fb27SDimitry Andric bool IsWave32 = ST.isWave32(); 535106c3fb27SDimitry Andric if (IsSCCLive) { 535206c3fb27SDimitry Andric // Insert two move instructions, one to save the original value of EXEC and 535306c3fb27SDimitry Andric // the other to turn on all bits in EXEC. This is required as we can't use 535406c3fb27SDimitry Andric // the single instruction S_OR_SAVEEXEC that clobbers SCC. 535506c3fb27SDimitry Andric unsigned MovOpc = IsWave32 ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; 535606c3fb27SDimitry Andric MCRegister Exec = IsWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC; 53575f757f3fSDimitry Andric auto StoreExecMI = BuildMI(MBB, MBBI, DL, TII->get(MovOpc), Reg) 53585f757f3fSDimitry Andric .addReg(Exec, RegState::Kill); 53595f757f3fSDimitry Andric auto FlipExecMI = BuildMI(MBB, MBBI, DL, TII->get(MovOpc), Exec).addImm(-1); 53605f757f3fSDimitry Andric if (Indexes) { 53615f757f3fSDimitry Andric Indexes->insertMachineInstrInMaps(*StoreExecMI); 53625f757f3fSDimitry Andric Indexes->insertMachineInstrInMaps(*FlipExecMI); 53635f757f3fSDimitry Andric } 536406c3fb27SDimitry Andric } else { 536506c3fb27SDimitry Andric const unsigned OrSaveExec = 536606c3fb27SDimitry Andric IsWave32 ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64; 536706c3fb27SDimitry Andric auto SaveExec = 536806c3fb27SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(OrSaveExec), Reg).addImm(-1); 536906c3fb27SDimitry Andric SaveExec->getOperand(3).setIsDead(); // Mark SCC as dead. 53705f757f3fSDimitry Andric if (Indexes) 53715f757f3fSDimitry Andric Indexes->insertMachineInstrInMaps(*SaveExec); 537206c3fb27SDimitry Andric } 537306c3fb27SDimitry Andric } 537406c3fb27SDimitry Andric 537506c3fb27SDimitry Andric void SIInstrInfo::restoreExec(MachineFunction &MF, MachineBasicBlock &MBB, 537606c3fb27SDimitry Andric MachineBasicBlock::iterator MBBI, 53775f757f3fSDimitry Andric const DebugLoc &DL, Register Reg, 53785f757f3fSDimitry Andric SlotIndexes *Indexes) const { 537906c3fb27SDimitry Andric unsigned ExecMov = isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; 538006c3fb27SDimitry Andric MCRegister Exec = isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; 53815f757f3fSDimitry Andric auto ExecRestoreMI = 538206c3fb27SDimitry Andric BuildMI(MBB, MBBI, DL, get(ExecMov), Exec).addReg(Reg, RegState::Kill); 53835f757f3fSDimitry Andric if (Indexes) 53845f757f3fSDimitry Andric Indexes->insertMachineInstrInMaps(*ExecRestoreMI); 538506c3fb27SDimitry Andric } 538606c3fb27SDimitry Andric 538781ad6265SDimitry Andric static const TargetRegisterClass * 538881ad6265SDimitry Andric adjustAllocatableRegClass(const GCNSubtarget &ST, const SIRegisterInfo &RI, 5389fe6060f1SDimitry Andric const MachineRegisterInfo &MRI, 539081ad6265SDimitry Andric const MCInstrDesc &TID, unsigned RCID, 5391fe6060f1SDimitry Andric bool IsAllocatable) { 5392fe6060f1SDimitry Andric if ((IsAllocatable || !ST.hasGFX90AInsts() || !MRI.reservedRegsFrozen()) && 53930eae32dcSDimitry Andric (((TID.mayLoad() || TID.mayStore()) && 53940eae32dcSDimitry Andric !(TID.TSFlags & SIInstrFlags::VGPRSpill)) || 5395fe6060f1SDimitry Andric (TID.TSFlags & (SIInstrFlags::DS | SIInstrFlags::MIMG)))) { 5396fe6060f1SDimitry Andric switch (RCID) { 539781ad6265SDimitry Andric case AMDGPU::AV_32RegClassID: 539881ad6265SDimitry Andric RCID = AMDGPU::VGPR_32RegClassID; 539981ad6265SDimitry Andric break; 540081ad6265SDimitry Andric case AMDGPU::AV_64RegClassID: 540181ad6265SDimitry Andric RCID = AMDGPU::VReg_64RegClassID; 540281ad6265SDimitry Andric break; 540381ad6265SDimitry Andric case AMDGPU::AV_96RegClassID: 540481ad6265SDimitry Andric RCID = AMDGPU::VReg_96RegClassID; 540581ad6265SDimitry Andric break; 540681ad6265SDimitry Andric case AMDGPU::AV_128RegClassID: 540781ad6265SDimitry Andric RCID = AMDGPU::VReg_128RegClassID; 540881ad6265SDimitry Andric break; 540981ad6265SDimitry Andric case AMDGPU::AV_160RegClassID: 541081ad6265SDimitry Andric RCID = AMDGPU::VReg_160RegClassID; 541181ad6265SDimitry Andric break; 541281ad6265SDimitry Andric case AMDGPU::AV_512RegClassID: 541381ad6265SDimitry Andric RCID = AMDGPU::VReg_512RegClassID; 541481ad6265SDimitry Andric break; 5415fe6060f1SDimitry Andric default: 5416fe6060f1SDimitry Andric break; 5417fe6060f1SDimitry Andric } 5418fe6060f1SDimitry Andric } 541981ad6265SDimitry Andric 542081ad6265SDimitry Andric return RI.getProperlyAlignedRC(RI.getRegClass(RCID)); 5421fe6060f1SDimitry Andric } 5422fe6060f1SDimitry Andric 5423fe6060f1SDimitry Andric const TargetRegisterClass *SIInstrInfo::getRegClass(const MCInstrDesc &TID, 5424fe6060f1SDimitry Andric unsigned OpNum, const TargetRegisterInfo *TRI, 5425fe6060f1SDimitry Andric const MachineFunction &MF) 5426fe6060f1SDimitry Andric const { 5427fe6060f1SDimitry Andric if (OpNum >= TID.getNumOperands()) 5428fe6060f1SDimitry Andric return nullptr; 5429bdd1243dSDimitry Andric auto RegClass = TID.operands()[OpNum].RegClass; 5430fe6060f1SDimitry Andric bool IsAllocatable = false; 5431fe6060f1SDimitry Andric if (TID.TSFlags & (SIInstrFlags::DS | SIInstrFlags::FLAT)) { 5432fe6060f1SDimitry Andric // vdst and vdata should be both VGPR or AGPR, same for the DS instructions 543381ad6265SDimitry Andric // with two data operands. Request register class constrained to VGPR only 5434fe6060f1SDimitry Andric // of both operands present as Machine Copy Propagation can not check this 5435fe6060f1SDimitry Andric // constraint and possibly other passes too. 5436fe6060f1SDimitry Andric // 5437fe6060f1SDimitry Andric // The check is limited to FLAT and DS because atomics in non-flat encoding 5438fe6060f1SDimitry Andric // have their vdst and vdata tied to be the same register. 5439fe6060f1SDimitry Andric const int VDstIdx = AMDGPU::getNamedOperandIdx(TID.Opcode, 5440fe6060f1SDimitry Andric AMDGPU::OpName::vdst); 5441fe6060f1SDimitry Andric const int DataIdx = AMDGPU::getNamedOperandIdx(TID.Opcode, 5442fe6060f1SDimitry Andric (TID.TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0 5443fe6060f1SDimitry Andric : AMDGPU::OpName::vdata); 5444fe6060f1SDimitry Andric if (DataIdx != -1) { 5445bdd1243dSDimitry Andric IsAllocatable = VDstIdx != -1 || AMDGPU::hasNamedOperand( 5446bdd1243dSDimitry Andric TID.Opcode, AMDGPU::OpName::data1); 5447fe6060f1SDimitry Andric } 5448fe6060f1SDimitry Andric } 544981ad6265SDimitry Andric return adjustAllocatableRegClass(ST, RI, MF.getRegInfo(), TID, RegClass, 5450fe6060f1SDimitry Andric IsAllocatable); 5451fe6060f1SDimitry Andric } 5452fe6060f1SDimitry Andric 54530b57cec5SDimitry Andric const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI, 54540b57cec5SDimitry Andric unsigned OpNo) const { 54550b57cec5SDimitry Andric const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 54560b57cec5SDimitry Andric const MCInstrDesc &Desc = get(MI.getOpcode()); 54570b57cec5SDimitry Andric if (MI.isVariadic() || OpNo >= Desc.getNumOperands() || 5458bdd1243dSDimitry Andric Desc.operands()[OpNo].RegClass == -1) { 54598bcb0991SDimitry Andric Register Reg = MI.getOperand(OpNo).getReg(); 54600b57cec5SDimitry Andric 5461e8d8bef9SDimitry Andric if (Reg.isVirtual()) 54620b57cec5SDimitry Andric return MRI.getRegClass(Reg); 5463bdd1243dSDimitry Andric return RI.getPhysRegBaseClass(Reg); 54640b57cec5SDimitry Andric } 54650b57cec5SDimitry Andric 5466bdd1243dSDimitry Andric unsigned RCID = Desc.operands()[OpNo].RegClass; 546781ad6265SDimitry Andric return adjustAllocatableRegClass(ST, RI, MRI, Desc, RCID, true); 54680b57cec5SDimitry Andric } 54690b57cec5SDimitry Andric 54700b57cec5SDimitry Andric void SIInstrInfo::legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const { 54710b57cec5SDimitry Andric MachineBasicBlock::iterator I = MI; 54720b57cec5SDimitry Andric MachineBasicBlock *MBB = MI.getParent(); 54730b57cec5SDimitry Andric MachineOperand &MO = MI.getOperand(OpIdx); 54740b57cec5SDimitry Andric MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 5475bdd1243dSDimitry Andric unsigned RCID = get(MI.getOpcode()).operands()[OpIdx].RegClass; 54760b57cec5SDimitry Andric const TargetRegisterClass *RC = RI.getRegClass(RCID); 5477e8d8bef9SDimitry Andric unsigned Size = RI.getRegSizeInBits(*RC); 54780b57cec5SDimitry Andric unsigned Opcode = (Size == 64) ? AMDGPU::V_MOV_B64_PSEUDO : AMDGPU::V_MOV_B32_e32; 54790b57cec5SDimitry Andric if (MO.isReg()) 54800b57cec5SDimitry Andric Opcode = AMDGPU::COPY; 54810b57cec5SDimitry Andric else if (RI.isSGPRClass(RC)) 54820b57cec5SDimitry Andric Opcode = (Size == 64) ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32; 54830b57cec5SDimitry Andric 54840b57cec5SDimitry Andric const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(RC); 54858bcb0991SDimitry Andric Register Reg = MRI.createVirtualRegister(VRC); 54860b57cec5SDimitry Andric DebugLoc DL = MBB->findDebugLoc(I); 54870b57cec5SDimitry Andric BuildMI(*MI.getParent(), I, DL, get(Opcode), Reg).add(MO); 54880b57cec5SDimitry Andric MO.ChangeToRegister(Reg, false); 54890b57cec5SDimitry Andric } 54900b57cec5SDimitry Andric 54915f757f3fSDimitry Andric unsigned SIInstrInfo::buildExtractSubReg( 54925f757f3fSDimitry Andric MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, 54935f757f3fSDimitry Andric const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, 54945f757f3fSDimitry Andric unsigned SubIdx, const TargetRegisterClass *SubRC) const { 54950b57cec5SDimitry Andric MachineBasicBlock *MBB = MI->getParent(); 54960b57cec5SDimitry Andric DebugLoc DL = MI->getDebugLoc(); 54978bcb0991SDimitry Andric Register SubReg = MRI.createVirtualRegister(SubRC); 54980b57cec5SDimitry Andric 54990b57cec5SDimitry Andric if (SuperReg.getSubReg() == AMDGPU::NoSubRegister) { 55000b57cec5SDimitry Andric BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), SubReg) 55010b57cec5SDimitry Andric .addReg(SuperReg.getReg(), 0, SubIdx); 55020b57cec5SDimitry Andric return SubReg; 55030b57cec5SDimitry Andric } 55040b57cec5SDimitry Andric 55050b57cec5SDimitry Andric // Just in case the super register is itself a sub-register, copy it to a new 55060b57cec5SDimitry Andric // value so we don't need to worry about merging its subreg index with the 55070b57cec5SDimitry Andric // SubIdx passed to this function. The register coalescer should be able to 55080b57cec5SDimitry Andric // eliminate this extra copy. 55098bcb0991SDimitry Andric Register NewSuperReg = MRI.createVirtualRegister(SuperRC); 55100b57cec5SDimitry Andric 55110b57cec5SDimitry Andric BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), NewSuperReg) 55120b57cec5SDimitry Andric .addReg(SuperReg.getReg(), 0, SuperReg.getSubReg()); 55130b57cec5SDimitry Andric 55140b57cec5SDimitry Andric BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), SubReg) 55150b57cec5SDimitry Andric .addReg(NewSuperReg, 0, SubIdx); 55160b57cec5SDimitry Andric 55170b57cec5SDimitry Andric return SubReg; 55180b57cec5SDimitry Andric } 55190b57cec5SDimitry Andric 55200b57cec5SDimitry Andric MachineOperand SIInstrInfo::buildExtractSubRegOrImm( 55215f757f3fSDimitry Andric MachineBasicBlock::iterator MII, MachineRegisterInfo &MRI, 55225f757f3fSDimitry Andric const MachineOperand &Op, const TargetRegisterClass *SuperRC, 55235f757f3fSDimitry Andric unsigned SubIdx, const TargetRegisterClass *SubRC) const { 55240b57cec5SDimitry Andric if (Op.isImm()) { 55250b57cec5SDimitry Andric if (SubIdx == AMDGPU::sub0) 55260b57cec5SDimitry Andric return MachineOperand::CreateImm(static_cast<int32_t>(Op.getImm())); 55270b57cec5SDimitry Andric if (SubIdx == AMDGPU::sub1) 55280b57cec5SDimitry Andric return MachineOperand::CreateImm(static_cast<int32_t>(Op.getImm() >> 32)); 55290b57cec5SDimitry Andric 55300b57cec5SDimitry Andric llvm_unreachable("Unhandled register index for immediate"); 55310b57cec5SDimitry Andric } 55320b57cec5SDimitry Andric 55330b57cec5SDimitry Andric unsigned SubReg = buildExtractSubReg(MII, MRI, Op, SuperRC, 55340b57cec5SDimitry Andric SubIdx, SubRC); 55350b57cec5SDimitry Andric return MachineOperand::CreateReg(SubReg, false); 55360b57cec5SDimitry Andric } 55370b57cec5SDimitry Andric 55380b57cec5SDimitry Andric // Change the order of operands from (0, 1, 2) to (0, 2, 1) 55390b57cec5SDimitry Andric void SIInstrInfo::swapOperands(MachineInstr &Inst) const { 55400b57cec5SDimitry Andric assert(Inst.getNumExplicitOperands() == 3); 55410b57cec5SDimitry Andric MachineOperand Op1 = Inst.getOperand(1); 554281ad6265SDimitry Andric Inst.removeOperand(1); 55430b57cec5SDimitry Andric Inst.addOperand(Op1); 55440b57cec5SDimitry Andric } 55450b57cec5SDimitry Andric 55460b57cec5SDimitry Andric bool SIInstrInfo::isLegalRegOperand(const MachineRegisterInfo &MRI, 55470b57cec5SDimitry Andric const MCOperandInfo &OpInfo, 55480b57cec5SDimitry Andric const MachineOperand &MO) const { 55490b57cec5SDimitry Andric if (!MO.isReg()) 55500b57cec5SDimitry Andric return false; 55510b57cec5SDimitry Andric 55528bcb0991SDimitry Andric Register Reg = MO.getReg(); 55530b57cec5SDimitry Andric 5554480093f4SDimitry Andric const TargetRegisterClass *DRC = RI.getRegClass(OpInfo.RegClass); 5555e8d8bef9SDimitry Andric if (Reg.isPhysical()) 5556e8d8bef9SDimitry Andric return DRC->contains(Reg); 5557e8d8bef9SDimitry Andric 5558e8d8bef9SDimitry Andric const TargetRegisterClass *RC = MRI.getRegClass(Reg); 5559e8d8bef9SDimitry Andric 5560480093f4SDimitry Andric if (MO.getSubReg()) { 5561480093f4SDimitry Andric const MachineFunction *MF = MO.getParent()->getParent()->getParent(); 5562480093f4SDimitry Andric const TargetRegisterClass *SuperRC = RI.getLargestLegalSuperClass(RC, *MF); 5563480093f4SDimitry Andric if (!SuperRC) 5564480093f4SDimitry Andric return false; 55650b57cec5SDimitry Andric 5566480093f4SDimitry Andric DRC = RI.getMatchingSuperRegClass(SuperRC, DRC, MO.getSubReg()); 5567480093f4SDimitry Andric if (!DRC) 5568480093f4SDimitry Andric return false; 5569480093f4SDimitry Andric } 5570480093f4SDimitry Andric return RC->hasSuperClassEq(DRC); 55710b57cec5SDimitry Andric } 55720b57cec5SDimitry Andric 55730b57cec5SDimitry Andric bool SIInstrInfo::isLegalVSrcOperand(const MachineRegisterInfo &MRI, 55740b57cec5SDimitry Andric const MCOperandInfo &OpInfo, 55750b57cec5SDimitry Andric const MachineOperand &MO) const { 55760b57cec5SDimitry Andric if (MO.isReg()) 55770b57cec5SDimitry Andric return isLegalRegOperand(MRI, OpInfo, MO); 55780b57cec5SDimitry Andric 55790b57cec5SDimitry Andric // Handle non-register types that are treated like immediates. 55800b57cec5SDimitry Andric assert(MO.isImm() || MO.isTargetIndex() || MO.isFI() || MO.isGlobal()); 55810b57cec5SDimitry Andric return true; 55820b57cec5SDimitry Andric } 55830b57cec5SDimitry Andric 55840b57cec5SDimitry Andric bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx, 55850b57cec5SDimitry Andric const MachineOperand *MO) const { 55860b57cec5SDimitry Andric const MachineFunction &MF = *MI.getParent()->getParent(); 55870b57cec5SDimitry Andric const MachineRegisterInfo &MRI = MF.getRegInfo(); 55880b57cec5SDimitry Andric const MCInstrDesc &InstDesc = MI.getDesc(); 5589bdd1243dSDimitry Andric const MCOperandInfo &OpInfo = InstDesc.operands()[OpIdx]; 55900b57cec5SDimitry Andric const TargetRegisterClass *DefinedRC = 55910b57cec5SDimitry Andric OpInfo.RegClass != -1 ? RI.getRegClass(OpInfo.RegClass) : nullptr; 55920b57cec5SDimitry Andric if (!MO) 55930b57cec5SDimitry Andric MO = &MI.getOperand(OpIdx); 55940b57cec5SDimitry Andric 55950b57cec5SDimitry Andric int ConstantBusLimit = ST.getConstantBusLimit(MI.getOpcode()); 559681ad6265SDimitry Andric int LiteralLimit = !isVOP3(MI) || ST.hasVOP3Literal() ? 1 : 0; 55970b57cec5SDimitry Andric if (isVALU(MI) && usesConstantBus(MRI, *MO, OpInfo)) { 5598bdd1243dSDimitry Andric if (!MO->isReg() && !isInlineConstant(*MO, OpInfo) && !LiteralLimit--) 55990b57cec5SDimitry Andric return false; 56000b57cec5SDimitry Andric 56010b57cec5SDimitry Andric SmallDenseSet<RegSubRegPair> SGPRsUsed; 56020b57cec5SDimitry Andric if (MO->isReg()) 56030b57cec5SDimitry Andric SGPRsUsed.insert(RegSubRegPair(MO->getReg(), MO->getSubReg())); 56040b57cec5SDimitry Andric 56050b57cec5SDimitry Andric for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { 56060b57cec5SDimitry Andric if (i == OpIdx) 56070b57cec5SDimitry Andric continue; 56080b57cec5SDimitry Andric const MachineOperand &Op = MI.getOperand(i); 56090b57cec5SDimitry Andric if (Op.isReg()) { 56100b57cec5SDimitry Andric RegSubRegPair SGPR(Op.getReg(), Op.getSubReg()); 56110b57cec5SDimitry Andric if (!SGPRsUsed.count(SGPR) && 5612bdd1243dSDimitry Andric // FIXME: This can access off the end of the operands() array. 5613bdd1243dSDimitry Andric usesConstantBus(MRI, Op, InstDesc.operands().begin()[i])) { 56140b57cec5SDimitry Andric if (--ConstantBusLimit <= 0) 56150b57cec5SDimitry Andric return false; 56160b57cec5SDimitry Andric SGPRsUsed.insert(SGPR); 56170b57cec5SDimitry Andric } 56185f757f3fSDimitry Andric } else if (AMDGPU::isSISrcOperand(InstDesc, i) && 56195f757f3fSDimitry Andric !isInlineConstant(Op, InstDesc.operands()[i])) { 562081ad6265SDimitry Andric if (!LiteralLimit--) 56210b57cec5SDimitry Andric return false; 56220b57cec5SDimitry Andric if (--ConstantBusLimit <= 0) 56230b57cec5SDimitry Andric return false; 56240b57cec5SDimitry Andric } 56250b57cec5SDimitry Andric } 56260b57cec5SDimitry Andric } 56270b57cec5SDimitry Andric 56280b57cec5SDimitry Andric if (MO->isReg()) { 5629fcaf7f86SDimitry Andric if (!DefinedRC) 5630fcaf7f86SDimitry Andric return OpInfo.OperandType == MCOI::OPERAND_UNKNOWN; 5631fe6060f1SDimitry Andric if (!isLegalRegOperand(MRI, OpInfo, *MO)) 5632fe6060f1SDimitry Andric return false; 5633fe6060f1SDimitry Andric bool IsAGPR = RI.isAGPR(MRI, MO->getReg()); 5634fe6060f1SDimitry Andric if (IsAGPR && !ST.hasMAIInsts()) 5635fe6060f1SDimitry Andric return false; 5636fe6060f1SDimitry Andric unsigned Opc = MI.getOpcode(); 5637fe6060f1SDimitry Andric if (IsAGPR && 5638fe6060f1SDimitry Andric (!ST.hasGFX90AInsts() || !MRI.reservedRegsFrozen()) && 5639fe6060f1SDimitry Andric (MI.mayLoad() || MI.mayStore() || isDS(Opc) || isMIMG(Opc))) 5640fe6060f1SDimitry Andric return false; 5641fe6060f1SDimitry Andric // Atomics should have both vdst and vdata either vgpr or agpr. 5642fe6060f1SDimitry Andric const int VDstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst); 5643fe6060f1SDimitry Andric const int DataIdx = AMDGPU::getNamedOperandIdx(Opc, 5644fe6060f1SDimitry Andric isDS(Opc) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata); 5645fe6060f1SDimitry Andric if ((int)OpIdx == VDstIdx && DataIdx != -1 && 5646fe6060f1SDimitry Andric MI.getOperand(DataIdx).isReg() && 5647fe6060f1SDimitry Andric RI.isAGPR(MRI, MI.getOperand(DataIdx).getReg()) != IsAGPR) 5648fe6060f1SDimitry Andric return false; 5649fe6060f1SDimitry Andric if ((int)OpIdx == DataIdx) { 5650fe6060f1SDimitry Andric if (VDstIdx != -1 && 5651fe6060f1SDimitry Andric RI.isAGPR(MRI, MI.getOperand(VDstIdx).getReg()) != IsAGPR) 5652fe6060f1SDimitry Andric return false; 5653fe6060f1SDimitry Andric // DS instructions with 2 src operands also must have tied RC. 5654fe6060f1SDimitry Andric const int Data1Idx = AMDGPU::getNamedOperandIdx(Opc, 5655fe6060f1SDimitry Andric AMDGPU::OpName::data1); 5656fe6060f1SDimitry Andric if (Data1Idx != -1 && MI.getOperand(Data1Idx).isReg() && 5657fe6060f1SDimitry Andric RI.isAGPR(MRI, MI.getOperand(Data1Idx).getReg()) != IsAGPR) 5658fe6060f1SDimitry Andric return false; 5659fe6060f1SDimitry Andric } 566081ad6265SDimitry Andric if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.hasGFX90AInsts() && 5661fe6060f1SDimitry Andric (int)OpIdx == AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0) && 5662fe6060f1SDimitry Andric RI.isSGPRReg(MRI, MO->getReg())) 5663fe6060f1SDimitry Andric return false; 5664fe6060f1SDimitry Andric return true; 56650b57cec5SDimitry Andric } 56660b57cec5SDimitry Andric 56675f757f3fSDimitry Andric if (MO->isImm()) { 56685f757f3fSDimitry Andric uint64_t Imm = MO->getImm(); 56695f757f3fSDimitry Andric bool Is64BitFPOp = OpInfo.OperandType == AMDGPU::OPERAND_REG_IMM_FP64; 56705f757f3fSDimitry Andric bool Is64BitOp = Is64BitFPOp || 56715f757f3fSDimitry Andric OpInfo.OperandType == AMDGPU::OPERAND_REG_IMM_INT64 || 56725f757f3fSDimitry Andric OpInfo.OperandType == AMDGPU::OPERAND_REG_IMM_V2INT32 || 56735f757f3fSDimitry Andric OpInfo.OperandType == AMDGPU::OPERAND_REG_IMM_V2FP32; 56745f757f3fSDimitry Andric if (Is64BitOp && 56755f757f3fSDimitry Andric !AMDGPU::isInlinableLiteral64(Imm, ST.hasInv2PiInlineImm())) { 56765f757f3fSDimitry Andric if (!AMDGPU::isValid32BitLiteral(Imm, Is64BitFPOp)) 56775f757f3fSDimitry Andric return false; 56785f757f3fSDimitry Andric 56795f757f3fSDimitry Andric // FIXME: We can use sign extended 64-bit literals, but only for signed 56805f757f3fSDimitry Andric // operands. At the moment we do not know if an operand is signed. 56815f757f3fSDimitry Andric // Such operand will be encoded as its low 32 bits and then either 56825f757f3fSDimitry Andric // correctly sign extended or incorrectly zero extended by HW. 56835f757f3fSDimitry Andric if (!Is64BitFPOp && (int32_t)Imm < 0) 56845f757f3fSDimitry Andric return false; 56855f757f3fSDimitry Andric } 56865f757f3fSDimitry Andric } 56875f757f3fSDimitry Andric 56880b57cec5SDimitry Andric // Handle non-register types that are treated like immediates. 56890b57cec5SDimitry Andric assert(MO->isImm() || MO->isTargetIndex() || MO->isFI() || MO->isGlobal()); 56900b57cec5SDimitry Andric 56910b57cec5SDimitry Andric if (!DefinedRC) { 56920b57cec5SDimitry Andric // This operand expects an immediate. 56930b57cec5SDimitry Andric return true; 56940b57cec5SDimitry Andric } 56950b57cec5SDimitry Andric 56960b57cec5SDimitry Andric return isImmOperandLegal(MI, OpIdx, *MO); 56970b57cec5SDimitry Andric } 56980b57cec5SDimitry Andric 56990b57cec5SDimitry Andric void SIInstrInfo::legalizeOperandsVOP2(MachineRegisterInfo &MRI, 57000b57cec5SDimitry Andric MachineInstr &MI) const { 57010b57cec5SDimitry Andric unsigned Opc = MI.getOpcode(); 57020b57cec5SDimitry Andric const MCInstrDesc &InstrDesc = get(Opc); 57030b57cec5SDimitry Andric 57040b57cec5SDimitry Andric int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 57050b57cec5SDimitry Andric MachineOperand &Src0 = MI.getOperand(Src0Idx); 57060b57cec5SDimitry Andric 57070b57cec5SDimitry Andric int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1); 57080b57cec5SDimitry Andric MachineOperand &Src1 = MI.getOperand(Src1Idx); 57090b57cec5SDimitry Andric 57100b57cec5SDimitry Andric // If there is an implicit SGPR use such as VCC use for v_addc_u32/v_subb_u32 57110b57cec5SDimitry Andric // we need to only have one constant bus use before GFX10. 5712bdd1243dSDimitry Andric bool HasImplicitSGPR = findImplicitSGPRRead(MI); 5713bdd1243dSDimitry Andric if (HasImplicitSGPR && ST.getConstantBusLimit(Opc) <= 1 && Src0.isReg() && 5714bdd1243dSDimitry Andric RI.isSGPRReg(MRI, Src0.getReg())) 57150b57cec5SDimitry Andric legalizeOpWithMove(MI, Src0Idx); 57160b57cec5SDimitry Andric 57170b57cec5SDimitry Andric // Special case: V_WRITELANE_B32 accepts only immediate or SGPR operands for 57180b57cec5SDimitry Andric // both the value to write (src0) and lane select (src1). Fix up non-SGPR 57190b57cec5SDimitry Andric // src0/src1 with V_READFIRSTLANE. 57200b57cec5SDimitry Andric if (Opc == AMDGPU::V_WRITELANE_B32) { 57210b57cec5SDimitry Andric const DebugLoc &DL = MI.getDebugLoc(); 57220b57cec5SDimitry Andric if (Src0.isReg() && RI.isVGPR(MRI, Src0.getReg())) { 57238bcb0991SDimitry Andric Register Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); 57240b57cec5SDimitry Andric BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg) 57250b57cec5SDimitry Andric .add(Src0); 57260b57cec5SDimitry Andric Src0.ChangeToRegister(Reg, false); 57270b57cec5SDimitry Andric } 57280b57cec5SDimitry Andric if (Src1.isReg() && RI.isVGPR(MRI, Src1.getReg())) { 57298bcb0991SDimitry Andric Register Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); 57300b57cec5SDimitry Andric const DebugLoc &DL = MI.getDebugLoc(); 57310b57cec5SDimitry Andric BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg) 57320b57cec5SDimitry Andric .add(Src1); 57330b57cec5SDimitry Andric Src1.ChangeToRegister(Reg, false); 57340b57cec5SDimitry Andric } 57350b57cec5SDimitry Andric return; 57360b57cec5SDimitry Andric } 57370b57cec5SDimitry Andric 57380b57cec5SDimitry Andric // No VOP2 instructions support AGPRs. 57390b57cec5SDimitry Andric if (Src0.isReg() && RI.isAGPR(MRI, Src0.getReg())) 57400b57cec5SDimitry Andric legalizeOpWithMove(MI, Src0Idx); 57410b57cec5SDimitry Andric 57420b57cec5SDimitry Andric if (Src1.isReg() && RI.isAGPR(MRI, Src1.getReg())) 57430b57cec5SDimitry Andric legalizeOpWithMove(MI, Src1Idx); 57440b57cec5SDimitry Andric 57455f757f3fSDimitry Andric // Special case: V_FMAC_F32 and V_FMAC_F16 have src2. 57465f757f3fSDimitry Andric if (Opc == AMDGPU::V_FMAC_F32_e32 || Opc == AMDGPU::V_FMAC_F16_e32) { 57475f757f3fSDimitry Andric int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); 57485f757f3fSDimitry Andric if (!RI.isVGPR(MRI, MI.getOperand(Src2Idx).getReg())) 57495f757f3fSDimitry Andric legalizeOpWithMove(MI, Src2Idx); 57505f757f3fSDimitry Andric } 57515f757f3fSDimitry Andric 57520b57cec5SDimitry Andric // VOP2 src0 instructions support all operand types, so we don't need to check 57530b57cec5SDimitry Andric // their legality. If src1 is already legal, we don't need to do anything. 5754bdd1243dSDimitry Andric if (isLegalRegOperand(MRI, InstrDesc.operands()[Src1Idx], Src1)) 57550b57cec5SDimitry Andric return; 57560b57cec5SDimitry Andric 57570b57cec5SDimitry Andric // Special case: V_READLANE_B32 accepts only immediate or SGPR operands for 57580b57cec5SDimitry Andric // lane select. Fix up using V_READFIRSTLANE, since we assume that the lane 57590b57cec5SDimitry Andric // select is uniform. 57600b57cec5SDimitry Andric if (Opc == AMDGPU::V_READLANE_B32 && Src1.isReg() && 57610b57cec5SDimitry Andric RI.isVGPR(MRI, Src1.getReg())) { 57628bcb0991SDimitry Andric Register Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); 57630b57cec5SDimitry Andric const DebugLoc &DL = MI.getDebugLoc(); 57640b57cec5SDimitry Andric BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg) 57650b57cec5SDimitry Andric .add(Src1); 57660b57cec5SDimitry Andric Src1.ChangeToRegister(Reg, false); 57670b57cec5SDimitry Andric return; 57680b57cec5SDimitry Andric } 57690b57cec5SDimitry Andric 57700b57cec5SDimitry Andric // We do not use commuteInstruction here because it is too aggressive and will 57710b57cec5SDimitry Andric // commute if it is possible. We only want to commute here if it improves 57720b57cec5SDimitry Andric // legality. This can be called a fairly large number of times so don't waste 57730b57cec5SDimitry Andric // compile time pointlessly swapping and checking legality again. 57740b57cec5SDimitry Andric if (HasImplicitSGPR || !MI.isCommutable()) { 57750b57cec5SDimitry Andric legalizeOpWithMove(MI, Src1Idx); 57760b57cec5SDimitry Andric return; 57770b57cec5SDimitry Andric } 57780b57cec5SDimitry Andric 57790b57cec5SDimitry Andric // If src0 can be used as src1, commuting will make the operands legal. 57800b57cec5SDimitry Andric // Otherwise we have to give up and insert a move. 57810b57cec5SDimitry Andric // 57820b57cec5SDimitry Andric // TODO: Other immediate-like operand kinds could be commuted if there was a 57830b57cec5SDimitry Andric // MachineOperand::ChangeTo* for them. 57840b57cec5SDimitry Andric if ((!Src1.isImm() && !Src1.isReg()) || 5785bdd1243dSDimitry Andric !isLegalRegOperand(MRI, InstrDesc.operands()[Src1Idx], Src0)) { 57860b57cec5SDimitry Andric legalizeOpWithMove(MI, Src1Idx); 57870b57cec5SDimitry Andric return; 57880b57cec5SDimitry Andric } 57890b57cec5SDimitry Andric 57900b57cec5SDimitry Andric int CommutedOpc = commuteOpcode(MI); 57910b57cec5SDimitry Andric if (CommutedOpc == -1) { 57920b57cec5SDimitry Andric legalizeOpWithMove(MI, Src1Idx); 57930b57cec5SDimitry Andric return; 57940b57cec5SDimitry Andric } 57950b57cec5SDimitry Andric 57960b57cec5SDimitry Andric MI.setDesc(get(CommutedOpc)); 57970b57cec5SDimitry Andric 57988bcb0991SDimitry Andric Register Src0Reg = Src0.getReg(); 57990b57cec5SDimitry Andric unsigned Src0SubReg = Src0.getSubReg(); 58000b57cec5SDimitry Andric bool Src0Kill = Src0.isKill(); 58010b57cec5SDimitry Andric 58020b57cec5SDimitry Andric if (Src1.isImm()) 58030b57cec5SDimitry Andric Src0.ChangeToImmediate(Src1.getImm()); 58040b57cec5SDimitry Andric else if (Src1.isReg()) { 58050b57cec5SDimitry Andric Src0.ChangeToRegister(Src1.getReg(), false, false, Src1.isKill()); 58060b57cec5SDimitry Andric Src0.setSubReg(Src1.getSubReg()); 58070b57cec5SDimitry Andric } else 58080b57cec5SDimitry Andric llvm_unreachable("Should only have register or immediate operands"); 58090b57cec5SDimitry Andric 58100b57cec5SDimitry Andric Src1.ChangeToRegister(Src0Reg, false, false, Src0Kill); 58110b57cec5SDimitry Andric Src1.setSubReg(Src0SubReg); 58120b57cec5SDimitry Andric fixImplicitOperands(MI); 58130b57cec5SDimitry Andric } 58140b57cec5SDimitry Andric 58150b57cec5SDimitry Andric // Legalize VOP3 operands. All operand types are supported for any operand 58160b57cec5SDimitry Andric // but only one literal constant and only starting from GFX10. 58170b57cec5SDimitry Andric void SIInstrInfo::legalizeOperandsVOP3(MachineRegisterInfo &MRI, 58180b57cec5SDimitry Andric MachineInstr &MI) const { 58190b57cec5SDimitry Andric unsigned Opc = MI.getOpcode(); 58200b57cec5SDimitry Andric 58210b57cec5SDimitry Andric int VOP3Idx[3] = { 58220b57cec5SDimitry Andric AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0), 58230b57cec5SDimitry Andric AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1), 58240b57cec5SDimitry Andric AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2) 58250b57cec5SDimitry Andric }; 58260b57cec5SDimitry Andric 5827e8d8bef9SDimitry Andric if (Opc == AMDGPU::V_PERMLANE16_B32_e64 || 5828e8d8bef9SDimitry Andric Opc == AMDGPU::V_PERMLANEX16_B32_e64) { 58290b57cec5SDimitry Andric // src1 and src2 must be scalar 58300b57cec5SDimitry Andric MachineOperand &Src1 = MI.getOperand(VOP3Idx[1]); 58310b57cec5SDimitry Andric MachineOperand &Src2 = MI.getOperand(VOP3Idx[2]); 58320b57cec5SDimitry Andric const DebugLoc &DL = MI.getDebugLoc(); 58330b57cec5SDimitry Andric if (Src1.isReg() && !RI.isSGPRClass(MRI.getRegClass(Src1.getReg()))) { 58348bcb0991SDimitry Andric Register Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); 58350b57cec5SDimitry Andric BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg) 58360b57cec5SDimitry Andric .add(Src1); 58370b57cec5SDimitry Andric Src1.ChangeToRegister(Reg, false); 58380b57cec5SDimitry Andric } 58390b57cec5SDimitry Andric if (Src2.isReg() && !RI.isSGPRClass(MRI.getRegClass(Src2.getReg()))) { 58408bcb0991SDimitry Andric Register Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); 58410b57cec5SDimitry Andric BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg) 58420b57cec5SDimitry Andric .add(Src2); 58430b57cec5SDimitry Andric Src2.ChangeToRegister(Reg, false); 58440b57cec5SDimitry Andric } 58450b57cec5SDimitry Andric } 58460b57cec5SDimitry Andric 58470b57cec5SDimitry Andric // Find the one SGPR operand we are allowed to use. 58480b57cec5SDimitry Andric int ConstantBusLimit = ST.getConstantBusLimit(Opc); 58490b57cec5SDimitry Andric int LiteralLimit = ST.hasVOP3Literal() ? 1 : 0; 58500b57cec5SDimitry Andric SmallDenseSet<unsigned> SGPRsUsed; 5851e8d8bef9SDimitry Andric Register SGPRReg = findUsedSGPR(MI, VOP3Idx); 5852bdd1243dSDimitry Andric if (SGPRReg) { 58530b57cec5SDimitry Andric SGPRsUsed.insert(SGPRReg); 58540b57cec5SDimitry Andric --ConstantBusLimit; 58550b57cec5SDimitry Andric } 58560b57cec5SDimitry Andric 58570eae32dcSDimitry Andric for (int Idx : VOP3Idx) { 58580b57cec5SDimitry Andric if (Idx == -1) 58590b57cec5SDimitry Andric break; 58600b57cec5SDimitry Andric MachineOperand &MO = MI.getOperand(Idx); 58610b57cec5SDimitry Andric 58620b57cec5SDimitry Andric if (!MO.isReg()) { 5863bdd1243dSDimitry Andric if (isInlineConstant(MO, get(Opc).operands()[Idx])) 58640b57cec5SDimitry Andric continue; 58650b57cec5SDimitry Andric 58660b57cec5SDimitry Andric if (LiteralLimit > 0 && ConstantBusLimit > 0) { 58670b57cec5SDimitry Andric --LiteralLimit; 58680b57cec5SDimitry Andric --ConstantBusLimit; 58690b57cec5SDimitry Andric continue; 58700b57cec5SDimitry Andric } 58710b57cec5SDimitry Andric 58720b57cec5SDimitry Andric --LiteralLimit; 58730b57cec5SDimitry Andric --ConstantBusLimit; 58740b57cec5SDimitry Andric legalizeOpWithMove(MI, Idx); 58750b57cec5SDimitry Andric continue; 58760b57cec5SDimitry Andric } 58770b57cec5SDimitry Andric 5878349cc55cSDimitry Andric if (RI.hasAGPRs(RI.getRegClassForReg(MRI, MO.getReg())) && 58790b57cec5SDimitry Andric !isOperandLegal(MI, Idx, &MO)) { 58800b57cec5SDimitry Andric legalizeOpWithMove(MI, Idx); 58810b57cec5SDimitry Andric continue; 58820b57cec5SDimitry Andric } 58830b57cec5SDimitry Andric 5884349cc55cSDimitry Andric if (!RI.isSGPRClass(RI.getRegClassForReg(MRI, MO.getReg()))) 58850b57cec5SDimitry Andric continue; // VGPRs are legal 58860b57cec5SDimitry Andric 58870b57cec5SDimitry Andric // We can use one SGPR in each VOP3 instruction prior to GFX10 58880b57cec5SDimitry Andric // and two starting from GFX10. 58890b57cec5SDimitry Andric if (SGPRsUsed.count(MO.getReg())) 58900b57cec5SDimitry Andric continue; 58910b57cec5SDimitry Andric if (ConstantBusLimit > 0) { 58920b57cec5SDimitry Andric SGPRsUsed.insert(MO.getReg()); 58930b57cec5SDimitry Andric --ConstantBusLimit; 58940b57cec5SDimitry Andric continue; 58950b57cec5SDimitry Andric } 58960b57cec5SDimitry Andric 58970b57cec5SDimitry Andric // If we make it this far, then the operand is not legal and we must 58980b57cec5SDimitry Andric // legalize it. 58990b57cec5SDimitry Andric legalizeOpWithMove(MI, Idx); 59000b57cec5SDimitry Andric } 59015f757f3fSDimitry Andric 59025f757f3fSDimitry Andric // Special case: V_FMAC_F32 and V_FMAC_F16 have src2 tied to vdst. 59035f757f3fSDimitry Andric if ((Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_e64) && 59045f757f3fSDimitry Andric !RI.isVGPR(MRI, MI.getOperand(VOP3Idx[2]).getReg())) 59055f757f3fSDimitry Andric legalizeOpWithMove(MI, VOP3Idx[2]); 59060b57cec5SDimitry Andric } 59070b57cec5SDimitry Andric 59085ffd83dbSDimitry Andric Register SIInstrInfo::readlaneVGPRToSGPR(Register SrcReg, MachineInstr &UseMI, 59090b57cec5SDimitry Andric MachineRegisterInfo &MRI) const { 59100b57cec5SDimitry Andric const TargetRegisterClass *VRC = MRI.getRegClass(SrcReg); 59110b57cec5SDimitry Andric const TargetRegisterClass *SRC = RI.getEquivalentSGPRClass(VRC); 59128bcb0991SDimitry Andric Register DstReg = MRI.createVirtualRegister(SRC); 59130b57cec5SDimitry Andric unsigned SubRegs = RI.getRegSizeInBits(*VRC) / 32; 59140b57cec5SDimitry Andric 59150b57cec5SDimitry Andric if (RI.hasAGPRs(VRC)) { 59160b57cec5SDimitry Andric VRC = RI.getEquivalentVGPRClass(VRC); 59178bcb0991SDimitry Andric Register NewSrcReg = MRI.createVirtualRegister(VRC); 59180b57cec5SDimitry Andric BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), 59190b57cec5SDimitry Andric get(TargetOpcode::COPY), NewSrcReg) 59200b57cec5SDimitry Andric .addReg(SrcReg); 59210b57cec5SDimitry Andric SrcReg = NewSrcReg; 59220b57cec5SDimitry Andric } 59230b57cec5SDimitry Andric 59240b57cec5SDimitry Andric if (SubRegs == 1) { 59250b57cec5SDimitry Andric BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), 59260b57cec5SDimitry Andric get(AMDGPU::V_READFIRSTLANE_B32), DstReg) 59270b57cec5SDimitry Andric .addReg(SrcReg); 59280b57cec5SDimitry Andric return DstReg; 59290b57cec5SDimitry Andric } 59300b57cec5SDimitry Andric 5931bdd1243dSDimitry Andric SmallVector<Register, 8> SRegs; 59320b57cec5SDimitry Andric for (unsigned i = 0; i < SubRegs; ++i) { 59338bcb0991SDimitry Andric Register SGPR = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 59340b57cec5SDimitry Andric BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), 59350b57cec5SDimitry Andric get(AMDGPU::V_READFIRSTLANE_B32), SGPR) 59360b57cec5SDimitry Andric .addReg(SrcReg, 0, RI.getSubRegFromChannel(i)); 59370b57cec5SDimitry Andric SRegs.push_back(SGPR); 59380b57cec5SDimitry Andric } 59390b57cec5SDimitry Andric 59400b57cec5SDimitry Andric MachineInstrBuilder MIB = 59410b57cec5SDimitry Andric BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), 59420b57cec5SDimitry Andric get(AMDGPU::REG_SEQUENCE), DstReg); 59430b57cec5SDimitry Andric for (unsigned i = 0; i < SubRegs; ++i) { 59440b57cec5SDimitry Andric MIB.addReg(SRegs[i]); 59450b57cec5SDimitry Andric MIB.addImm(RI.getSubRegFromChannel(i)); 59460b57cec5SDimitry Andric } 59470b57cec5SDimitry Andric return DstReg; 59480b57cec5SDimitry Andric } 59490b57cec5SDimitry Andric 59500b57cec5SDimitry Andric void SIInstrInfo::legalizeOperandsSMRD(MachineRegisterInfo &MRI, 59510b57cec5SDimitry Andric MachineInstr &MI) const { 59520b57cec5SDimitry Andric 59530b57cec5SDimitry Andric // If the pointer is store in VGPRs, then we need to move them to 59540b57cec5SDimitry Andric // SGPRs using v_readfirstlane. This is safe because we only select 59550b57cec5SDimitry Andric // loads with uniform pointers to SMRD instruction so we know the 59560b57cec5SDimitry Andric // pointer value is uniform. 59570b57cec5SDimitry Andric MachineOperand *SBase = getNamedOperand(MI, AMDGPU::OpName::sbase); 59580b57cec5SDimitry Andric if (SBase && !RI.isSGPRClass(MRI.getRegClass(SBase->getReg()))) { 5959e8d8bef9SDimitry Andric Register SGPR = readlaneVGPRToSGPR(SBase->getReg(), MI, MRI); 59600b57cec5SDimitry Andric SBase->setReg(SGPR); 59610b57cec5SDimitry Andric } 596281ad6265SDimitry Andric MachineOperand *SOff = getNamedOperand(MI, AMDGPU::OpName::soffset); 59630b57cec5SDimitry Andric if (SOff && !RI.isSGPRClass(MRI.getRegClass(SOff->getReg()))) { 5964e8d8bef9SDimitry Andric Register SGPR = readlaneVGPRToSGPR(SOff->getReg(), MI, MRI); 59650b57cec5SDimitry Andric SOff->setReg(SGPR); 59660b57cec5SDimitry Andric } 59670b57cec5SDimitry Andric } 59680b57cec5SDimitry Andric 5969fe6060f1SDimitry Andric bool SIInstrInfo::moveFlatAddrToVGPR(MachineInstr &Inst) const { 5970fe6060f1SDimitry Andric unsigned Opc = Inst.getOpcode(); 5971fe6060f1SDimitry Andric int OldSAddrIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::saddr); 5972fe6060f1SDimitry Andric if (OldSAddrIdx < 0) 5973fe6060f1SDimitry Andric return false; 5974fe6060f1SDimitry Andric 5975fe6060f1SDimitry Andric assert(isSegmentSpecificFLAT(Inst)); 5976fe6060f1SDimitry Andric 5977fe6060f1SDimitry Andric int NewOpc = AMDGPU::getGlobalVaddrOp(Opc); 5978fe6060f1SDimitry Andric if (NewOpc < 0) 5979fe6060f1SDimitry Andric NewOpc = AMDGPU::getFlatScratchInstSVfromSS(Opc); 5980fe6060f1SDimitry Andric if (NewOpc < 0) 5981fe6060f1SDimitry Andric return false; 5982fe6060f1SDimitry Andric 5983fe6060f1SDimitry Andric MachineRegisterInfo &MRI = Inst.getMF()->getRegInfo(); 5984fe6060f1SDimitry Andric MachineOperand &SAddr = Inst.getOperand(OldSAddrIdx); 5985fe6060f1SDimitry Andric if (RI.isSGPRReg(MRI, SAddr.getReg())) 5986fe6060f1SDimitry Andric return false; 5987fe6060f1SDimitry Andric 5988fe6060f1SDimitry Andric int NewVAddrIdx = AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vaddr); 5989fe6060f1SDimitry Andric if (NewVAddrIdx < 0) 5990fe6060f1SDimitry Andric return false; 5991fe6060f1SDimitry Andric 5992fe6060f1SDimitry Andric int OldVAddrIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr); 5993fe6060f1SDimitry Andric 5994fe6060f1SDimitry Andric // Check vaddr, it shall be zero or absent. 5995fe6060f1SDimitry Andric MachineInstr *VAddrDef = nullptr; 5996fe6060f1SDimitry Andric if (OldVAddrIdx >= 0) { 5997fe6060f1SDimitry Andric MachineOperand &VAddr = Inst.getOperand(OldVAddrIdx); 5998fe6060f1SDimitry Andric VAddrDef = MRI.getUniqueVRegDef(VAddr.getReg()); 5999fe6060f1SDimitry Andric if (!VAddrDef || VAddrDef->getOpcode() != AMDGPU::V_MOV_B32_e32 || 6000fe6060f1SDimitry Andric !VAddrDef->getOperand(1).isImm() || 6001fe6060f1SDimitry Andric VAddrDef->getOperand(1).getImm() != 0) 6002fe6060f1SDimitry Andric return false; 6003fe6060f1SDimitry Andric } 6004fe6060f1SDimitry Andric 6005fe6060f1SDimitry Andric const MCInstrDesc &NewDesc = get(NewOpc); 6006fe6060f1SDimitry Andric Inst.setDesc(NewDesc); 6007fe6060f1SDimitry Andric 600881ad6265SDimitry Andric // Callers expect iterator to be valid after this call, so modify the 6009fe6060f1SDimitry Andric // instruction in place. 6010fe6060f1SDimitry Andric if (OldVAddrIdx == NewVAddrIdx) { 6011fe6060f1SDimitry Andric MachineOperand &NewVAddr = Inst.getOperand(NewVAddrIdx); 6012fe6060f1SDimitry Andric // Clear use list from the old vaddr holding a zero register. 6013fe6060f1SDimitry Andric MRI.removeRegOperandFromUseList(&NewVAddr); 6014fe6060f1SDimitry Andric MRI.moveOperands(&NewVAddr, &SAddr, 1); 601581ad6265SDimitry Andric Inst.removeOperand(OldSAddrIdx); 6016fe6060f1SDimitry Andric // Update the use list with the pointer we have just moved from vaddr to 601781ad6265SDimitry Andric // saddr position. Otherwise new vaddr will be missing from the use list. 6018fe6060f1SDimitry Andric MRI.removeRegOperandFromUseList(&NewVAddr); 6019fe6060f1SDimitry Andric MRI.addRegOperandToUseList(&NewVAddr); 6020fe6060f1SDimitry Andric } else { 6021fe6060f1SDimitry Andric assert(OldSAddrIdx == NewVAddrIdx); 6022fe6060f1SDimitry Andric 6023fe6060f1SDimitry Andric if (OldVAddrIdx >= 0) { 6024fe6060f1SDimitry Andric int NewVDstIn = AMDGPU::getNamedOperandIdx(NewOpc, 6025fe6060f1SDimitry Andric AMDGPU::OpName::vdst_in); 6026fe6060f1SDimitry Andric 602781ad6265SDimitry Andric // removeOperand doesn't try to fixup tied operand indexes at it goes, so 6028fe6060f1SDimitry Andric // it asserts. Untie the operands for now and retie them afterwards. 6029fe6060f1SDimitry Andric if (NewVDstIn != -1) { 6030fe6060f1SDimitry Andric int OldVDstIn = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in); 6031fe6060f1SDimitry Andric Inst.untieRegOperand(OldVDstIn); 6032fe6060f1SDimitry Andric } 6033fe6060f1SDimitry Andric 603481ad6265SDimitry Andric Inst.removeOperand(OldVAddrIdx); 6035fe6060f1SDimitry Andric 6036fe6060f1SDimitry Andric if (NewVDstIn != -1) { 6037fe6060f1SDimitry Andric int NewVDst = AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst); 6038fe6060f1SDimitry Andric Inst.tieOperands(NewVDst, NewVDstIn); 6039fe6060f1SDimitry Andric } 6040fe6060f1SDimitry Andric } 6041fe6060f1SDimitry Andric } 6042fe6060f1SDimitry Andric 6043fe6060f1SDimitry Andric if (VAddrDef && MRI.use_nodbg_empty(VAddrDef->getOperand(0).getReg())) 6044fe6060f1SDimitry Andric VAddrDef->eraseFromParent(); 6045fe6060f1SDimitry Andric 6046fe6060f1SDimitry Andric return true; 6047fe6060f1SDimitry Andric } 6048fe6060f1SDimitry Andric 6049e8d8bef9SDimitry Andric // FIXME: Remove this when SelectionDAG is obsoleted. 6050e8d8bef9SDimitry Andric void SIInstrInfo::legalizeOperandsFLAT(MachineRegisterInfo &MRI, 6051e8d8bef9SDimitry Andric MachineInstr &MI) const { 6052e8d8bef9SDimitry Andric if (!isSegmentSpecificFLAT(MI)) 6053e8d8bef9SDimitry Andric return; 6054e8d8bef9SDimitry Andric 6055e8d8bef9SDimitry Andric // Fixup SGPR operands in VGPRs. We only select these when the DAG divergence 6056e8d8bef9SDimitry Andric // thinks they are uniform, so a readfirstlane should be valid. 6057e8d8bef9SDimitry Andric MachineOperand *SAddr = getNamedOperand(MI, AMDGPU::OpName::saddr); 6058e8d8bef9SDimitry Andric if (!SAddr || RI.isSGPRClass(MRI.getRegClass(SAddr->getReg()))) 6059e8d8bef9SDimitry Andric return; 6060e8d8bef9SDimitry Andric 6061fe6060f1SDimitry Andric if (moveFlatAddrToVGPR(MI)) 6062fe6060f1SDimitry Andric return; 6063fe6060f1SDimitry Andric 6064e8d8bef9SDimitry Andric Register ToSGPR = readlaneVGPRToSGPR(SAddr->getReg(), MI, MRI); 6065e8d8bef9SDimitry Andric SAddr->setReg(ToSGPR); 6066e8d8bef9SDimitry Andric } 6067e8d8bef9SDimitry Andric 60680b57cec5SDimitry Andric void SIInstrInfo::legalizeGenericOperand(MachineBasicBlock &InsertMBB, 60690b57cec5SDimitry Andric MachineBasicBlock::iterator I, 60700b57cec5SDimitry Andric const TargetRegisterClass *DstRC, 60710b57cec5SDimitry Andric MachineOperand &Op, 60720b57cec5SDimitry Andric MachineRegisterInfo &MRI, 60730b57cec5SDimitry Andric const DebugLoc &DL) const { 60748bcb0991SDimitry Andric Register OpReg = Op.getReg(); 60750b57cec5SDimitry Andric unsigned OpSubReg = Op.getSubReg(); 60760b57cec5SDimitry Andric 60770b57cec5SDimitry Andric const TargetRegisterClass *OpRC = RI.getSubClassWithSubReg( 60780b57cec5SDimitry Andric RI.getRegClassForReg(MRI, OpReg), OpSubReg); 60790b57cec5SDimitry Andric 60800b57cec5SDimitry Andric // Check if operand is already the correct register class. 60810b57cec5SDimitry Andric if (DstRC == OpRC) 60820b57cec5SDimitry Andric return; 60830b57cec5SDimitry Andric 60848bcb0991SDimitry Andric Register DstReg = MRI.createVirtualRegister(DstRC); 6085349cc55cSDimitry Andric auto Copy = BuildMI(InsertMBB, I, DL, get(AMDGPU::COPY), DstReg).add(Op); 60860b57cec5SDimitry Andric 60870b57cec5SDimitry Andric Op.setReg(DstReg); 60880b57cec5SDimitry Andric Op.setSubReg(0); 60890b57cec5SDimitry Andric 60900b57cec5SDimitry Andric MachineInstr *Def = MRI.getVRegDef(OpReg); 60910b57cec5SDimitry Andric if (!Def) 60920b57cec5SDimitry Andric return; 60930b57cec5SDimitry Andric 60940b57cec5SDimitry Andric // Try to eliminate the copy if it is copying an immediate value. 60958bcb0991SDimitry Andric if (Def->isMoveImmediate() && DstRC != &AMDGPU::VReg_1RegClass) 60960b57cec5SDimitry Andric FoldImmediate(*Copy, *Def, OpReg, &MRI); 60978bcb0991SDimitry Andric 60988bcb0991SDimitry Andric bool ImpDef = Def->isImplicitDef(); 60998bcb0991SDimitry Andric while (!ImpDef && Def && Def->isCopy()) { 61008bcb0991SDimitry Andric if (Def->getOperand(1).getReg().isPhysical()) 61018bcb0991SDimitry Andric break; 61028bcb0991SDimitry Andric Def = MRI.getUniqueVRegDef(Def->getOperand(1).getReg()); 61038bcb0991SDimitry Andric ImpDef = Def && Def->isImplicitDef(); 61048bcb0991SDimitry Andric } 61058bcb0991SDimitry Andric if (!RI.isSGPRClass(DstRC) && !Copy->readsRegister(AMDGPU::EXEC, &RI) && 61068bcb0991SDimitry Andric !ImpDef) 6107349cc55cSDimitry Andric Copy.addReg(AMDGPU::EXEC, RegState::Implicit); 61080b57cec5SDimitry Andric } 61090b57cec5SDimitry Andric 61100b57cec5SDimitry Andric // Emit the actual waterfall loop, executing the wrapped instruction for each 611106c3fb27SDimitry Andric // unique value of \p ScalarOps across all lanes. In the best case we execute 1 61120b57cec5SDimitry Andric // iteration, in the worst case we execute 64 (once per lane). 611306c3fb27SDimitry Andric static void emitLoadScalarOpsFromVGPRLoop( 611406c3fb27SDimitry Andric const SIInstrInfo &TII, MachineRegisterInfo &MRI, MachineBasicBlock &OrigBB, 611506c3fb27SDimitry Andric MachineBasicBlock &LoopBB, MachineBasicBlock &BodyBB, const DebugLoc &DL, 611606c3fb27SDimitry Andric ArrayRef<MachineOperand *> ScalarOps) { 61170b57cec5SDimitry Andric MachineFunction &MF = *OrigBB.getParent(); 61180b57cec5SDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 61190b57cec5SDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo(); 61200b57cec5SDimitry Andric unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; 61210b57cec5SDimitry Andric unsigned SaveExecOpc = 61220b57cec5SDimitry Andric ST.isWave32() ? AMDGPU::S_AND_SAVEEXEC_B32 : AMDGPU::S_AND_SAVEEXEC_B64; 61230b57cec5SDimitry Andric unsigned XorTermOpc = 61240b57cec5SDimitry Andric ST.isWave32() ? AMDGPU::S_XOR_B32_term : AMDGPU::S_XOR_B64_term; 61250b57cec5SDimitry Andric unsigned AndOpc = 61260b57cec5SDimitry Andric ST.isWave32() ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64; 61270b57cec5SDimitry Andric const auto *BoolXExecRC = TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID); 61280b57cec5SDimitry Andric 61290b57cec5SDimitry Andric MachineBasicBlock::iterator I = LoopBB.begin(); 61300b57cec5SDimitry Andric 6131e8d8bef9SDimitry Andric SmallVector<Register, 8> ReadlanePieces; 6132bdd1243dSDimitry Andric Register CondReg; 6133e8d8bef9SDimitry Andric 613406c3fb27SDimitry Andric for (MachineOperand *ScalarOp : ScalarOps) { 613506c3fb27SDimitry Andric unsigned RegSize = TRI->getRegSizeInBits(ScalarOp->getReg(), MRI); 6136e8d8bef9SDimitry Andric unsigned NumSubRegs = RegSize / 32; 613706c3fb27SDimitry Andric Register VScalarOp = ScalarOp->getReg(); 613806c3fb27SDimitry Andric 613906c3fb27SDimitry Andric if (NumSubRegs == 1) { 614006c3fb27SDimitry Andric Register CurReg = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 614106c3fb27SDimitry Andric 614206c3fb27SDimitry Andric BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), CurReg) 614306c3fb27SDimitry Andric .addReg(VScalarOp); 614406c3fb27SDimitry Andric 614506c3fb27SDimitry Andric Register NewCondReg = MRI.createVirtualRegister(BoolXExecRC); 614606c3fb27SDimitry Andric 614706c3fb27SDimitry Andric BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_CMP_EQ_U32_e64), NewCondReg) 614806c3fb27SDimitry Andric .addReg(CurReg) 614906c3fb27SDimitry Andric .addReg(VScalarOp); 615006c3fb27SDimitry Andric 615106c3fb27SDimitry Andric // Combine the comparison results with AND. 615206c3fb27SDimitry Andric if (!CondReg) // First. 615306c3fb27SDimitry Andric CondReg = NewCondReg; 615406c3fb27SDimitry Andric else { // If not the first, we create an AND. 615506c3fb27SDimitry Andric Register AndReg = MRI.createVirtualRegister(BoolXExecRC); 615606c3fb27SDimitry Andric BuildMI(LoopBB, I, DL, TII.get(AndOpc), AndReg) 615706c3fb27SDimitry Andric .addReg(CondReg) 615806c3fb27SDimitry Andric .addReg(NewCondReg); 615906c3fb27SDimitry Andric CondReg = AndReg; 616006c3fb27SDimitry Andric } 616106c3fb27SDimitry Andric 616206c3fb27SDimitry Andric // Update ScalarOp operand to use the SGPR ScalarOp. 616306c3fb27SDimitry Andric ScalarOp->setReg(CurReg); 616406c3fb27SDimitry Andric ScalarOp->setIsKill(); 616506c3fb27SDimitry Andric } else { 616606c3fb27SDimitry Andric unsigned VScalarOpUndef = getUndefRegState(ScalarOp->isUndef()); 616706c3fb27SDimitry Andric assert(NumSubRegs % 2 == 0 && NumSubRegs <= 32 && 616806c3fb27SDimitry Andric "Unhandled register size"); 61690b57cec5SDimitry Andric 6170e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < NumSubRegs; Idx += 2) { 6171e8d8bef9SDimitry Andric Register CurRegLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 6172e8d8bef9SDimitry Andric Register CurRegHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 6173e8d8bef9SDimitry Andric 6174e8d8bef9SDimitry Andric // Read the next variant <- also loop target. 6175e8d8bef9SDimitry Andric BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegLo) 617606c3fb27SDimitry Andric .addReg(VScalarOp, VScalarOpUndef, TRI->getSubRegFromChannel(Idx)); 6177e8d8bef9SDimitry Andric 6178e8d8bef9SDimitry Andric // Read the next variant <- also loop target. 6179e8d8bef9SDimitry Andric BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegHi) 618006c3fb27SDimitry Andric .addReg(VScalarOp, VScalarOpUndef, 618106c3fb27SDimitry Andric TRI->getSubRegFromChannel(Idx + 1)); 6182e8d8bef9SDimitry Andric 6183e8d8bef9SDimitry Andric ReadlanePieces.push_back(CurRegLo); 6184e8d8bef9SDimitry Andric ReadlanePieces.push_back(CurRegHi); 6185e8d8bef9SDimitry Andric 6186e8d8bef9SDimitry Andric // Comparison is to be done as 64-bit. 6187e8d8bef9SDimitry Andric Register CurReg = MRI.createVirtualRegister(&AMDGPU::SGPR_64RegClass); 6188e8d8bef9SDimitry Andric BuildMI(LoopBB, I, DL, TII.get(AMDGPU::REG_SEQUENCE), CurReg) 6189e8d8bef9SDimitry Andric .addReg(CurRegLo) 61900b57cec5SDimitry Andric .addImm(AMDGPU::sub0) 6191e8d8bef9SDimitry Andric .addReg(CurRegHi) 6192e8d8bef9SDimitry Andric .addImm(AMDGPU::sub1); 6193e8d8bef9SDimitry Andric 6194e8d8bef9SDimitry Andric Register NewCondReg = MRI.createVirtualRegister(BoolXExecRC); 619506c3fb27SDimitry Andric auto Cmp = BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_CMP_EQ_U64_e64), 619606c3fb27SDimitry Andric NewCondReg) 6197e8d8bef9SDimitry Andric .addReg(CurReg); 6198e8d8bef9SDimitry Andric if (NumSubRegs <= 2) 619906c3fb27SDimitry Andric Cmp.addReg(VScalarOp); 6200e8d8bef9SDimitry Andric else 620106c3fb27SDimitry Andric Cmp.addReg(VScalarOp, VScalarOpUndef, 620206c3fb27SDimitry Andric TRI->getSubRegFromChannel(Idx, 2)); 6203e8d8bef9SDimitry Andric 620481ad6265SDimitry Andric // Combine the comparison results with AND. 6205bdd1243dSDimitry Andric if (!CondReg) // First. 6206e8d8bef9SDimitry Andric CondReg = NewCondReg; 6207e8d8bef9SDimitry Andric else { // If not the first, we create an AND. 6208e8d8bef9SDimitry Andric Register AndReg = MRI.createVirtualRegister(BoolXExecRC); 6209e8d8bef9SDimitry Andric BuildMI(LoopBB, I, DL, TII.get(AndOpc), AndReg) 6210e8d8bef9SDimitry Andric .addReg(CondReg) 6211e8d8bef9SDimitry Andric .addReg(NewCondReg); 6212e8d8bef9SDimitry Andric CondReg = AndReg; 6213e8d8bef9SDimitry Andric } 6214e8d8bef9SDimitry Andric } // End for loop. 6215e8d8bef9SDimitry Andric 621606c3fb27SDimitry Andric auto SScalarOpRC = 621706c3fb27SDimitry Andric TRI->getEquivalentSGPRClass(MRI.getRegClass(VScalarOp)); 621806c3fb27SDimitry Andric Register SScalarOp = MRI.createVirtualRegister(SScalarOpRC); 6219e8d8bef9SDimitry Andric 622006c3fb27SDimitry Andric // Build scalar ScalarOp. 622106c3fb27SDimitry Andric auto Merge = 622206c3fb27SDimitry Andric BuildMI(LoopBB, I, DL, TII.get(AMDGPU::REG_SEQUENCE), SScalarOp); 6223e8d8bef9SDimitry Andric unsigned Channel = 0; 6224e8d8bef9SDimitry Andric for (Register Piece : ReadlanePieces) { 622506c3fb27SDimitry Andric Merge.addReg(Piece).addImm(TRI->getSubRegFromChannel(Channel++)); 6226e8d8bef9SDimitry Andric } 62270b57cec5SDimitry Andric 622806c3fb27SDimitry Andric // Update ScalarOp operand to use the SGPR ScalarOp. 622906c3fb27SDimitry Andric ScalarOp->setReg(SScalarOp); 623006c3fb27SDimitry Andric ScalarOp->setIsKill(); 623106c3fb27SDimitry Andric } 623206c3fb27SDimitry Andric } 62330b57cec5SDimitry Andric 6234e8d8bef9SDimitry Andric Register SaveExec = MRI.createVirtualRegister(BoolXExecRC); 6235e8d8bef9SDimitry Andric MRI.setSimpleHint(SaveExec, CondReg); 62360b57cec5SDimitry Andric 62370b57cec5SDimitry Andric // Update EXEC to matching lanes, saving original to SaveExec. 62380b57cec5SDimitry Andric BuildMI(LoopBB, I, DL, TII.get(SaveExecOpc), SaveExec) 6239e8d8bef9SDimitry Andric .addReg(CondReg, RegState::Kill); 62400b57cec5SDimitry Andric 62410b57cec5SDimitry Andric // The original instruction is here; we insert the terminators after it. 624281ad6265SDimitry Andric I = BodyBB.end(); 62430b57cec5SDimitry Andric 62440b57cec5SDimitry Andric // Update EXEC, switch all done bits to 0 and all todo bits to 1. 624581ad6265SDimitry Andric BuildMI(BodyBB, I, DL, TII.get(XorTermOpc), Exec) 62460b57cec5SDimitry Andric .addReg(Exec) 62470b57cec5SDimitry Andric .addReg(SaveExec); 6248e8d8bef9SDimitry Andric 624981ad6265SDimitry Andric BuildMI(BodyBB, I, DL, TII.get(AMDGPU::SI_WATERFALL_LOOP)).addMBB(&LoopBB); 62500b57cec5SDimitry Andric } 62510b57cec5SDimitry Andric 625206c3fb27SDimitry Andric // Build a waterfall loop around \p MI, replacing the VGPR \p ScalarOp register 62530b57cec5SDimitry Andric // with SGPRs by iterating over all unique values across all lanes. 6254e8d8bef9SDimitry Andric // Returns the loop basic block that now contains \p MI. 6255e8d8bef9SDimitry Andric static MachineBasicBlock * 625606c3fb27SDimitry Andric loadMBUFScalarOperandsFromVGPR(const SIInstrInfo &TII, MachineInstr &MI, 625706c3fb27SDimitry Andric ArrayRef<MachineOperand *> ScalarOps, 625806c3fb27SDimitry Andric MachineDominatorTree *MDT, 6259e8d8bef9SDimitry Andric MachineBasicBlock::iterator Begin = nullptr, 6260e8d8bef9SDimitry Andric MachineBasicBlock::iterator End = nullptr) { 62610b57cec5SDimitry Andric MachineBasicBlock &MBB = *MI.getParent(); 62620b57cec5SDimitry Andric MachineFunction &MF = *MBB.getParent(); 62630b57cec5SDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 62640b57cec5SDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo(); 62650b57cec5SDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo(); 6266e8d8bef9SDimitry Andric if (!Begin.isValid()) 6267e8d8bef9SDimitry Andric Begin = &MI; 6268e8d8bef9SDimitry Andric if (!End.isValid()) { 6269e8d8bef9SDimitry Andric End = &MI; 6270e8d8bef9SDimitry Andric ++End; 6271e8d8bef9SDimitry Andric } 62720b57cec5SDimitry Andric const DebugLoc &DL = MI.getDebugLoc(); 62730b57cec5SDimitry Andric unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; 62740b57cec5SDimitry Andric unsigned MovExecOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; 62750b57cec5SDimitry Andric const auto *BoolXExecRC = TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID); 62760b57cec5SDimitry Andric 62775f757f3fSDimitry Andric // Save SCC. Waterfall Loop may overwrite SCC. 62785f757f3fSDimitry Andric Register SaveSCCReg; 62795f757f3fSDimitry Andric bool SCCNotDead = (MBB.computeRegisterLiveness(TRI, AMDGPU::SCC, MI, 30) != 62805f757f3fSDimitry Andric MachineBasicBlock::LQR_Dead); 62815f757f3fSDimitry Andric if (SCCNotDead) { 62825f757f3fSDimitry Andric SaveSCCReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); 62835f757f3fSDimitry Andric BuildMI(MBB, Begin, DL, TII.get(AMDGPU::S_CSELECT_B32), SaveSCCReg) 62845f757f3fSDimitry Andric .addImm(1) 62855f757f3fSDimitry Andric .addImm(0); 62865f757f3fSDimitry Andric } 62875f757f3fSDimitry Andric 62888bcb0991SDimitry Andric Register SaveExec = MRI.createVirtualRegister(BoolXExecRC); 62890b57cec5SDimitry Andric 62900b57cec5SDimitry Andric // Save the EXEC mask 6291e8d8bef9SDimitry Andric BuildMI(MBB, Begin, DL, TII.get(MovExecOpc), SaveExec).addReg(Exec); 62920b57cec5SDimitry Andric 62930b57cec5SDimitry Andric // Killed uses in the instruction we are waterfalling around will be 62940b57cec5SDimitry Andric // incorrect due to the added control-flow. 6295e8d8bef9SDimitry Andric MachineBasicBlock::iterator AfterMI = MI; 6296e8d8bef9SDimitry Andric ++AfterMI; 6297e8d8bef9SDimitry Andric for (auto I = Begin; I != AfterMI; I++) { 629806c3fb27SDimitry Andric for (auto &MO : I->all_uses()) 62990b57cec5SDimitry Andric MRI.clearKillFlags(MO.getReg()); 63000b57cec5SDimitry Andric } 63010b57cec5SDimitry Andric 63020b57cec5SDimitry Andric // To insert the loop we need to split the block. Move everything after this 63030b57cec5SDimitry Andric // point to a new block, and insert a new empty block between the two. 63040b57cec5SDimitry Andric MachineBasicBlock *LoopBB = MF.CreateMachineBasicBlock(); 630581ad6265SDimitry Andric MachineBasicBlock *BodyBB = MF.CreateMachineBasicBlock(); 63060b57cec5SDimitry Andric MachineBasicBlock *RemainderBB = MF.CreateMachineBasicBlock(); 63070b57cec5SDimitry Andric MachineFunction::iterator MBBI(MBB); 63080b57cec5SDimitry Andric ++MBBI; 63090b57cec5SDimitry Andric 63100b57cec5SDimitry Andric MF.insert(MBBI, LoopBB); 631181ad6265SDimitry Andric MF.insert(MBBI, BodyBB); 63120b57cec5SDimitry Andric MF.insert(MBBI, RemainderBB); 63130b57cec5SDimitry Andric 631481ad6265SDimitry Andric LoopBB->addSuccessor(BodyBB); 631581ad6265SDimitry Andric BodyBB->addSuccessor(LoopBB); 631681ad6265SDimitry Andric BodyBB->addSuccessor(RemainderBB); 63170b57cec5SDimitry Andric 631881ad6265SDimitry Andric // Move Begin to MI to the BodyBB, and the remainder of the block to 6319e8d8bef9SDimitry Andric // RemainderBB. 63200b57cec5SDimitry Andric RemainderBB->transferSuccessorsAndUpdatePHIs(&MBB); 6321e8d8bef9SDimitry Andric RemainderBB->splice(RemainderBB->begin(), &MBB, End, MBB.end()); 632281ad6265SDimitry Andric BodyBB->splice(BodyBB->begin(), &MBB, Begin, MBB.end()); 63230b57cec5SDimitry Andric 63240b57cec5SDimitry Andric MBB.addSuccessor(LoopBB); 63250b57cec5SDimitry Andric 63260b57cec5SDimitry Andric // Update dominators. We know that MBB immediately dominates LoopBB, that 632781ad6265SDimitry Andric // LoopBB immediately dominates BodyBB, and BodyBB immediately dominates 632881ad6265SDimitry Andric // RemainderBB. RemainderBB immediately dominates all of the successors 632981ad6265SDimitry Andric // transferred to it from MBB that MBB used to properly dominate. 63300b57cec5SDimitry Andric if (MDT) { 63310b57cec5SDimitry Andric MDT->addNewBlock(LoopBB, &MBB); 633281ad6265SDimitry Andric MDT->addNewBlock(BodyBB, LoopBB); 633381ad6265SDimitry Andric MDT->addNewBlock(RemainderBB, BodyBB); 63340b57cec5SDimitry Andric for (auto &Succ : RemainderBB->successors()) { 6335480093f4SDimitry Andric if (MDT->properlyDominates(&MBB, Succ)) { 63360b57cec5SDimitry Andric MDT->changeImmediateDominator(Succ, RemainderBB); 63370b57cec5SDimitry Andric } 63380b57cec5SDimitry Andric } 63390b57cec5SDimitry Andric } 63400b57cec5SDimitry Andric 634106c3fb27SDimitry Andric emitLoadScalarOpsFromVGPRLoop(TII, MRI, MBB, *LoopBB, *BodyBB, DL, ScalarOps); 63420b57cec5SDimitry Andric 63430b57cec5SDimitry Andric MachineBasicBlock::iterator First = RemainderBB->begin(); 63445f757f3fSDimitry Andric // Restore SCC 63455f757f3fSDimitry Andric if (SCCNotDead) { 63465f757f3fSDimitry Andric BuildMI(*RemainderBB, First, DL, TII.get(AMDGPU::S_CMP_LG_U32)) 63475f757f3fSDimitry Andric .addReg(SaveSCCReg, RegState::Kill) 63485f757f3fSDimitry Andric .addImm(0); 63495f757f3fSDimitry Andric } 63505f757f3fSDimitry Andric 63515f757f3fSDimitry Andric // Restore the EXEC mask 63520b57cec5SDimitry Andric BuildMI(*RemainderBB, First, DL, TII.get(MovExecOpc), Exec).addReg(SaveExec); 635381ad6265SDimitry Andric return BodyBB; 63540b57cec5SDimitry Andric } 63550b57cec5SDimitry Andric 63560b57cec5SDimitry Andric // Extract pointer from Rsrc and return a zero-value Rsrc replacement. 63570b57cec5SDimitry Andric static std::tuple<unsigned, unsigned> 63580b57cec5SDimitry Andric extractRsrcPtr(const SIInstrInfo &TII, MachineInstr &MI, MachineOperand &Rsrc) { 63590b57cec5SDimitry Andric MachineBasicBlock &MBB = *MI.getParent(); 63600b57cec5SDimitry Andric MachineFunction &MF = *MBB.getParent(); 63610b57cec5SDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo(); 63620b57cec5SDimitry Andric 63630b57cec5SDimitry Andric // Extract the ptr from the resource descriptor. 63640b57cec5SDimitry Andric unsigned RsrcPtr = 63650b57cec5SDimitry Andric TII.buildExtractSubReg(MI, MRI, Rsrc, &AMDGPU::VReg_128RegClass, 63660b57cec5SDimitry Andric AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass); 63670b57cec5SDimitry Andric 63680b57cec5SDimitry Andric // Create an empty resource descriptor 63698bcb0991SDimitry Andric Register Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); 63708bcb0991SDimitry Andric Register SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 63718bcb0991SDimitry Andric Register SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 63728bcb0991SDimitry Andric Register NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SGPR_128RegClass); 63730b57cec5SDimitry Andric uint64_t RsrcDataFormat = TII.getDefaultRsrcDataFormat(); 63740b57cec5SDimitry Andric 63750b57cec5SDimitry Andric // Zero64 = 0 63760b57cec5SDimitry Andric BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(AMDGPU::S_MOV_B64), Zero64) 63770b57cec5SDimitry Andric .addImm(0); 63780b57cec5SDimitry Andric 63790b57cec5SDimitry Andric // SRsrcFormatLo = RSRC_DATA_FORMAT{31-0} 63800b57cec5SDimitry Andric BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), SRsrcFormatLo) 63810b57cec5SDimitry Andric .addImm(RsrcDataFormat & 0xFFFFFFFF); 63820b57cec5SDimitry Andric 63830b57cec5SDimitry Andric // SRsrcFormatHi = RSRC_DATA_FORMAT{63-32} 63840b57cec5SDimitry Andric BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), SRsrcFormatHi) 63850b57cec5SDimitry Andric .addImm(RsrcDataFormat >> 32); 63860b57cec5SDimitry Andric 63870b57cec5SDimitry Andric // NewSRsrc = {Zero64, SRsrcFormat} 63880b57cec5SDimitry Andric BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(AMDGPU::REG_SEQUENCE), NewSRsrc) 63890b57cec5SDimitry Andric .addReg(Zero64) 63900b57cec5SDimitry Andric .addImm(AMDGPU::sub0_sub1) 63910b57cec5SDimitry Andric .addReg(SRsrcFormatLo) 63920b57cec5SDimitry Andric .addImm(AMDGPU::sub2) 63930b57cec5SDimitry Andric .addReg(SRsrcFormatHi) 63940b57cec5SDimitry Andric .addImm(AMDGPU::sub3); 63950b57cec5SDimitry Andric 6396bdd1243dSDimitry Andric return std::tuple(RsrcPtr, NewSRsrc); 63970b57cec5SDimitry Andric } 63980b57cec5SDimitry Andric 6399e8d8bef9SDimitry Andric MachineBasicBlock * 6400e8d8bef9SDimitry Andric SIInstrInfo::legalizeOperands(MachineInstr &MI, 64010b57cec5SDimitry Andric MachineDominatorTree *MDT) const { 64020b57cec5SDimitry Andric MachineFunction &MF = *MI.getParent()->getParent(); 64030b57cec5SDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo(); 6404e8d8bef9SDimitry Andric MachineBasicBlock *CreatedBB = nullptr; 64050b57cec5SDimitry Andric 64060b57cec5SDimitry Andric // Legalize VOP2 64070b57cec5SDimitry Andric if (isVOP2(MI) || isVOPC(MI)) { 64080b57cec5SDimitry Andric legalizeOperandsVOP2(MRI, MI); 6409e8d8bef9SDimitry Andric return CreatedBB; 64100b57cec5SDimitry Andric } 64110b57cec5SDimitry Andric 64120b57cec5SDimitry Andric // Legalize VOP3 64130b57cec5SDimitry Andric if (isVOP3(MI)) { 64140b57cec5SDimitry Andric legalizeOperandsVOP3(MRI, MI); 6415e8d8bef9SDimitry Andric return CreatedBB; 64160b57cec5SDimitry Andric } 64170b57cec5SDimitry Andric 64180b57cec5SDimitry Andric // Legalize SMRD 64190b57cec5SDimitry Andric if (isSMRD(MI)) { 64200b57cec5SDimitry Andric legalizeOperandsSMRD(MRI, MI); 6421e8d8bef9SDimitry Andric return CreatedBB; 6422e8d8bef9SDimitry Andric } 6423e8d8bef9SDimitry Andric 6424e8d8bef9SDimitry Andric // Legalize FLAT 6425e8d8bef9SDimitry Andric if (isFLAT(MI)) { 6426e8d8bef9SDimitry Andric legalizeOperandsFLAT(MRI, MI); 6427e8d8bef9SDimitry Andric return CreatedBB; 64280b57cec5SDimitry Andric } 64290b57cec5SDimitry Andric 64300b57cec5SDimitry Andric // Legalize REG_SEQUENCE and PHI 64310b57cec5SDimitry Andric // The register class of the operands much be the same type as the register 64320b57cec5SDimitry Andric // class of the output. 64330b57cec5SDimitry Andric if (MI.getOpcode() == AMDGPU::PHI) { 64340b57cec5SDimitry Andric const TargetRegisterClass *RC = nullptr, *SRC = nullptr, *VRC = nullptr; 64350b57cec5SDimitry Andric for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) { 6436e8d8bef9SDimitry Andric if (!MI.getOperand(i).isReg() || !MI.getOperand(i).getReg().isVirtual()) 64370b57cec5SDimitry Andric continue; 64380b57cec5SDimitry Andric const TargetRegisterClass *OpRC = 64390b57cec5SDimitry Andric MRI.getRegClass(MI.getOperand(i).getReg()); 64400b57cec5SDimitry Andric if (RI.hasVectorRegisters(OpRC)) { 64410b57cec5SDimitry Andric VRC = OpRC; 64420b57cec5SDimitry Andric } else { 64430b57cec5SDimitry Andric SRC = OpRC; 64440b57cec5SDimitry Andric } 64450b57cec5SDimitry Andric } 64460b57cec5SDimitry Andric 64470b57cec5SDimitry Andric // If any of the operands are VGPR registers, then they all most be 64480b57cec5SDimitry Andric // otherwise we will create illegal VGPR->SGPR copies when legalizing 64490b57cec5SDimitry Andric // them. 64500b57cec5SDimitry Andric if (VRC || !RI.isSGPRClass(getOpRegClass(MI, 0))) { 64510b57cec5SDimitry Andric if (!VRC) { 64520b57cec5SDimitry Andric assert(SRC); 64538bcb0991SDimitry Andric if (getOpRegClass(MI, 0) == &AMDGPU::VReg_1RegClass) { 64548bcb0991SDimitry Andric VRC = &AMDGPU::VReg_1RegClass; 64558bcb0991SDimitry Andric } else 64564824e7fdSDimitry Andric VRC = RI.isAGPRClass(getOpRegClass(MI, 0)) 64578bcb0991SDimitry Andric ? RI.getEquivalentAGPRClass(SRC) 64580b57cec5SDimitry Andric : RI.getEquivalentVGPRClass(SRC); 64598bcb0991SDimitry Andric } else { 64604824e7fdSDimitry Andric VRC = RI.isAGPRClass(getOpRegClass(MI, 0)) 64618bcb0991SDimitry Andric ? RI.getEquivalentAGPRClass(VRC) 64628bcb0991SDimitry Andric : RI.getEquivalentVGPRClass(VRC); 64630b57cec5SDimitry Andric } 64640b57cec5SDimitry Andric RC = VRC; 64650b57cec5SDimitry Andric } else { 64660b57cec5SDimitry Andric RC = SRC; 64670b57cec5SDimitry Andric } 64680b57cec5SDimitry Andric 64690b57cec5SDimitry Andric // Update all the operands so they have the same type. 64700b57cec5SDimitry Andric for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) { 64710b57cec5SDimitry Andric MachineOperand &Op = MI.getOperand(I); 6472e8d8bef9SDimitry Andric if (!Op.isReg() || !Op.getReg().isVirtual()) 64730b57cec5SDimitry Andric continue; 64740b57cec5SDimitry Andric 64750b57cec5SDimitry Andric // MI is a PHI instruction. 64760b57cec5SDimitry Andric MachineBasicBlock *InsertBB = MI.getOperand(I + 1).getMBB(); 64770b57cec5SDimitry Andric MachineBasicBlock::iterator Insert = InsertBB->getFirstTerminator(); 64780b57cec5SDimitry Andric 64790b57cec5SDimitry Andric // Avoid creating no-op copies with the same src and dst reg class. These 64800b57cec5SDimitry Andric // confuse some of the machine passes. 64810b57cec5SDimitry Andric legalizeGenericOperand(*InsertBB, Insert, RC, Op, MRI, MI.getDebugLoc()); 64820b57cec5SDimitry Andric } 64830b57cec5SDimitry Andric } 64840b57cec5SDimitry Andric 64850b57cec5SDimitry Andric // REG_SEQUENCE doesn't really require operand legalization, but if one has a 64860b57cec5SDimitry Andric // VGPR dest type and SGPR sources, insert copies so all operands are 64870b57cec5SDimitry Andric // VGPRs. This seems to help operand folding / the register coalescer. 64880b57cec5SDimitry Andric if (MI.getOpcode() == AMDGPU::REG_SEQUENCE) { 64890b57cec5SDimitry Andric MachineBasicBlock *MBB = MI.getParent(); 64900b57cec5SDimitry Andric const TargetRegisterClass *DstRC = getOpRegClass(MI, 0); 64910b57cec5SDimitry Andric if (RI.hasVGPRs(DstRC)) { 64920b57cec5SDimitry Andric // Update all the operands so they are VGPR register classes. These may 64930b57cec5SDimitry Andric // not be the same register class because REG_SEQUENCE supports mixing 64940b57cec5SDimitry Andric // subregister index types e.g. sub0_sub1 + sub2 + sub3 64950b57cec5SDimitry Andric for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) { 64960b57cec5SDimitry Andric MachineOperand &Op = MI.getOperand(I); 6497e8d8bef9SDimitry Andric if (!Op.isReg() || !Op.getReg().isVirtual()) 64980b57cec5SDimitry Andric continue; 64990b57cec5SDimitry Andric 65000b57cec5SDimitry Andric const TargetRegisterClass *OpRC = MRI.getRegClass(Op.getReg()); 65010b57cec5SDimitry Andric const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(OpRC); 65020b57cec5SDimitry Andric if (VRC == OpRC) 65030b57cec5SDimitry Andric continue; 65040b57cec5SDimitry Andric 65050b57cec5SDimitry Andric legalizeGenericOperand(*MBB, MI, VRC, Op, MRI, MI.getDebugLoc()); 65060b57cec5SDimitry Andric Op.setIsKill(); 65070b57cec5SDimitry Andric } 65080b57cec5SDimitry Andric } 65090b57cec5SDimitry Andric 6510e8d8bef9SDimitry Andric return CreatedBB; 65110b57cec5SDimitry Andric } 65120b57cec5SDimitry Andric 65130b57cec5SDimitry Andric // Legalize INSERT_SUBREG 65140b57cec5SDimitry Andric // src0 must have the same register class as dst 65150b57cec5SDimitry Andric if (MI.getOpcode() == AMDGPU::INSERT_SUBREG) { 65168bcb0991SDimitry Andric Register Dst = MI.getOperand(0).getReg(); 65178bcb0991SDimitry Andric Register Src0 = MI.getOperand(1).getReg(); 65180b57cec5SDimitry Andric const TargetRegisterClass *DstRC = MRI.getRegClass(Dst); 65190b57cec5SDimitry Andric const TargetRegisterClass *Src0RC = MRI.getRegClass(Src0); 65200b57cec5SDimitry Andric if (DstRC != Src0RC) { 65210b57cec5SDimitry Andric MachineBasicBlock *MBB = MI.getParent(); 65220b57cec5SDimitry Andric MachineOperand &Op = MI.getOperand(1); 65230b57cec5SDimitry Andric legalizeGenericOperand(*MBB, MI, DstRC, Op, MRI, MI.getDebugLoc()); 65240b57cec5SDimitry Andric } 6525e8d8bef9SDimitry Andric return CreatedBB; 65260b57cec5SDimitry Andric } 65270b57cec5SDimitry Andric 65280b57cec5SDimitry Andric // Legalize SI_INIT_M0 65290b57cec5SDimitry Andric if (MI.getOpcode() == AMDGPU::SI_INIT_M0) { 65300b57cec5SDimitry Andric MachineOperand &Src = MI.getOperand(0); 65310b57cec5SDimitry Andric if (Src.isReg() && RI.hasVectorRegisters(MRI.getRegClass(Src.getReg()))) 65320b57cec5SDimitry Andric Src.setReg(readlaneVGPRToSGPR(Src.getReg(), MI, MRI)); 6533e8d8bef9SDimitry Andric return CreatedBB; 65340b57cec5SDimitry Andric } 65350b57cec5SDimitry Andric 65365f757f3fSDimitry Andric // Legalize S_BITREPLICATE, S_QUADMASK and S_WQM 65375f757f3fSDimitry Andric if (MI.getOpcode() == AMDGPU::S_BITREPLICATE_B64_B32 || 65385f757f3fSDimitry Andric MI.getOpcode() == AMDGPU::S_QUADMASK_B32 || 65395f757f3fSDimitry Andric MI.getOpcode() == AMDGPU::S_QUADMASK_B64 || 65405f757f3fSDimitry Andric MI.getOpcode() == AMDGPU::S_WQM_B32 || 65415f757f3fSDimitry Andric MI.getOpcode() == AMDGPU::S_WQM_B64) { 65425f757f3fSDimitry Andric MachineOperand &Src = MI.getOperand(1); 65435f757f3fSDimitry Andric if (Src.isReg() && RI.hasVectorRegisters(MRI.getRegClass(Src.getReg()))) 65445f757f3fSDimitry Andric Src.setReg(readlaneVGPRToSGPR(Src.getReg(), MI, MRI)); 65455f757f3fSDimitry Andric return CreatedBB; 65465f757f3fSDimitry Andric } 65475f757f3fSDimitry Andric 65485f757f3fSDimitry Andric // Legalize MIMG/VIMAGE/VSAMPLE and MUBUF/MTBUF for shaders. 65490b57cec5SDimitry Andric // 65500b57cec5SDimitry Andric // Shaders only generate MUBUF/MTBUF instructions via intrinsics or via 65510b57cec5SDimitry Andric // scratch memory access. In both cases, the legalization never involves 65520b57cec5SDimitry Andric // conversion to the addr64 form. 65535f757f3fSDimitry Andric if (isImage(MI) || (AMDGPU::isGraphics(MF.getFunction().getCallingConv()) && 65540b57cec5SDimitry Andric (isMUBUF(MI) || isMTBUF(MI)))) { 65555f757f3fSDimitry Andric int RSrcOpName = (isVIMAGE(MI) || isVSAMPLE(MI)) ? AMDGPU::OpName::rsrc 65565f757f3fSDimitry Andric : AMDGPU::OpName::srsrc; 65575f757f3fSDimitry Andric MachineOperand *SRsrc = getNamedOperand(MI, RSrcOpName); 6558e8d8bef9SDimitry Andric if (SRsrc && !RI.isSGPRClass(MRI.getRegClass(SRsrc->getReg()))) 655906c3fb27SDimitry Andric CreatedBB = loadMBUFScalarOperandsFromVGPR(*this, MI, {SRsrc}, MDT); 65600b57cec5SDimitry Andric 65615f757f3fSDimitry Andric int SampOpName = isMIMG(MI) ? AMDGPU::OpName::ssamp : AMDGPU::OpName::samp; 65625f757f3fSDimitry Andric MachineOperand *SSamp = getNamedOperand(MI, SampOpName); 6563e8d8bef9SDimitry Andric if (SSamp && !RI.isSGPRClass(MRI.getRegClass(SSamp->getReg()))) 656406c3fb27SDimitry Andric CreatedBB = loadMBUFScalarOperandsFromVGPR(*this, MI, {SSamp}, MDT); 6565e8d8bef9SDimitry Andric 6566e8d8bef9SDimitry Andric return CreatedBB; 65670b57cec5SDimitry Andric } 6568e8d8bef9SDimitry Andric 6569e8d8bef9SDimitry Andric // Legalize SI_CALL 6570e8d8bef9SDimitry Andric if (MI.getOpcode() == AMDGPU::SI_CALL_ISEL) { 6571e8d8bef9SDimitry Andric MachineOperand *Dest = &MI.getOperand(0); 6572e8d8bef9SDimitry Andric if (!RI.isSGPRClass(MRI.getRegClass(Dest->getReg()))) { 6573e8d8bef9SDimitry Andric // Move everything between ADJCALLSTACKUP and ADJCALLSTACKDOWN and 6574e8d8bef9SDimitry Andric // following copies, we also need to move copies from and to physical 6575e8d8bef9SDimitry Andric // registers into the loop block. 6576e8d8bef9SDimitry Andric unsigned FrameSetupOpcode = getCallFrameSetupOpcode(); 6577e8d8bef9SDimitry Andric unsigned FrameDestroyOpcode = getCallFrameDestroyOpcode(); 6578e8d8bef9SDimitry Andric 6579e8d8bef9SDimitry Andric // Also move the copies to physical registers into the loop block 6580e8d8bef9SDimitry Andric MachineBasicBlock &MBB = *MI.getParent(); 6581e8d8bef9SDimitry Andric MachineBasicBlock::iterator Start(&MI); 6582e8d8bef9SDimitry Andric while (Start->getOpcode() != FrameSetupOpcode) 6583e8d8bef9SDimitry Andric --Start; 6584e8d8bef9SDimitry Andric MachineBasicBlock::iterator End(&MI); 6585e8d8bef9SDimitry Andric while (End->getOpcode() != FrameDestroyOpcode) 6586e8d8bef9SDimitry Andric ++End; 6587e8d8bef9SDimitry Andric // Also include following copies of the return value 6588e8d8bef9SDimitry Andric ++End; 6589e8d8bef9SDimitry Andric while (End != MBB.end() && End->isCopy() && End->getOperand(1).isReg() && 6590e8d8bef9SDimitry Andric MI.definesRegister(End->getOperand(1).getReg())) 6591e8d8bef9SDimitry Andric ++End; 659206c3fb27SDimitry Andric CreatedBB = 659306c3fb27SDimitry Andric loadMBUFScalarOperandsFromVGPR(*this, MI, {Dest}, MDT, Start, End); 6594e8d8bef9SDimitry Andric } 65950b57cec5SDimitry Andric } 65960b57cec5SDimitry Andric 65975f757f3fSDimitry Andric // Legalize s_sleep_var. 65985f757f3fSDimitry Andric if (MI.getOpcode() == AMDGPU::S_SLEEP_VAR) { 65995f757f3fSDimitry Andric const DebugLoc &DL = MI.getDebugLoc(); 66005f757f3fSDimitry Andric Register Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); 66015f757f3fSDimitry Andric int Src0Idx = 66025f757f3fSDimitry Andric AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0); 66035f757f3fSDimitry Andric MachineOperand &Src0 = MI.getOperand(Src0Idx); 66045f757f3fSDimitry Andric BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg) 66055f757f3fSDimitry Andric .add(Src0); 66065f757f3fSDimitry Andric Src0.ChangeToRegister(Reg, false); 66075f757f3fSDimitry Andric return nullptr; 66085f757f3fSDimitry Andric } 66095f757f3fSDimitry Andric 661006c3fb27SDimitry Andric // Legalize MUBUF instructions. 661106c3fb27SDimitry Andric bool isSoffsetLegal = true; 661206c3fb27SDimitry Andric int SoffsetIdx = 661306c3fb27SDimitry Andric AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::soffset); 661406c3fb27SDimitry Andric if (SoffsetIdx != -1) { 661506c3fb27SDimitry Andric MachineOperand *Soffset = &MI.getOperand(SoffsetIdx); 66165f757f3fSDimitry Andric if (Soffset->isReg() && Soffset->getReg().isVirtual() && 661706c3fb27SDimitry Andric !RI.isSGPRClass(MRI.getRegClass(Soffset->getReg()))) { 661806c3fb27SDimitry Andric isSoffsetLegal = false; 661906c3fb27SDimitry Andric } 662006c3fb27SDimitry Andric } 662106c3fb27SDimitry Andric 662206c3fb27SDimitry Andric bool isRsrcLegal = true; 66230b57cec5SDimitry Andric int RsrcIdx = 66240b57cec5SDimitry Andric AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc); 66250b57cec5SDimitry Andric if (RsrcIdx != -1) { 66260b57cec5SDimitry Andric MachineOperand *Rsrc = &MI.getOperand(RsrcIdx); 662706c3fb27SDimitry Andric if (Rsrc->isReg() && !RI.isSGPRClass(MRI.getRegClass(Rsrc->getReg()))) { 662806c3fb27SDimitry Andric isRsrcLegal = false; 662906c3fb27SDimitry Andric } 66300b57cec5SDimitry Andric } 66310b57cec5SDimitry Andric 663206c3fb27SDimitry Andric // The operands are legal. 663306c3fb27SDimitry Andric if (isRsrcLegal && isSoffsetLegal) 663406c3fb27SDimitry Andric return CreatedBB; 663506c3fb27SDimitry Andric 663606c3fb27SDimitry Andric if (!isRsrcLegal) { 663706c3fb27SDimitry Andric // Legalize a VGPR Rsrc 66380b57cec5SDimitry Andric // 66390b57cec5SDimitry Andric // If the instruction is _ADDR64, we can avoid a waterfall by extracting 66400b57cec5SDimitry Andric // the base pointer from the VGPR Rsrc, adding it to the VAddr, then using 66410b57cec5SDimitry Andric // a zero-value SRsrc. 66420b57cec5SDimitry Andric // 66430b57cec5SDimitry Andric // If the instruction is _OFFSET (both idxen and offen disabled), and we 66440b57cec5SDimitry Andric // support ADDR64 instructions, we can convert to ADDR64 and do the same as 66450b57cec5SDimitry Andric // above. 66460b57cec5SDimitry Andric // 66470b57cec5SDimitry Andric // Otherwise we are on non-ADDR64 hardware, and/or we have 66480b57cec5SDimitry Andric // idxen/offen/bothen and we fall back to a waterfall loop. 66490b57cec5SDimitry Andric 665006c3fb27SDimitry Andric MachineOperand *Rsrc = &MI.getOperand(RsrcIdx); 66510b57cec5SDimitry Andric MachineBasicBlock &MBB = *MI.getParent(); 66520b57cec5SDimitry Andric 66530b57cec5SDimitry Andric MachineOperand *VAddr = getNamedOperand(MI, AMDGPU::OpName::vaddr); 66540b57cec5SDimitry Andric if (VAddr && AMDGPU::getIfAddr64Inst(MI.getOpcode()) != -1) { 66550b57cec5SDimitry Andric // This is already an ADDR64 instruction so we need to add the pointer 66560b57cec5SDimitry Andric // extracted from the resource descriptor to the current value of VAddr. 66578bcb0991SDimitry Andric Register NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 66588bcb0991SDimitry Andric Register NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 66598bcb0991SDimitry Andric Register NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); 66600b57cec5SDimitry Andric 66610b57cec5SDimitry Andric const auto *BoolXExecRC = RI.getRegClass(AMDGPU::SReg_1_XEXECRegClassID); 66628bcb0991SDimitry Andric Register CondReg0 = MRI.createVirtualRegister(BoolXExecRC); 66638bcb0991SDimitry Andric Register CondReg1 = MRI.createVirtualRegister(BoolXExecRC); 66640b57cec5SDimitry Andric 66650b57cec5SDimitry Andric unsigned RsrcPtr, NewSRsrc; 66660b57cec5SDimitry Andric std::tie(RsrcPtr, NewSRsrc) = extractRsrcPtr(*this, MI, *Rsrc); 66670b57cec5SDimitry Andric 66680b57cec5SDimitry Andric // NewVaddrLo = RsrcPtr:sub0 + VAddr:sub0 66690b57cec5SDimitry Andric const DebugLoc &DL = MI.getDebugLoc(); 6670e8d8bef9SDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_CO_U32_e64), NewVAddrLo) 66710b57cec5SDimitry Andric .addDef(CondReg0) 66720b57cec5SDimitry Andric .addReg(RsrcPtr, 0, AMDGPU::sub0) 66730b57cec5SDimitry Andric .addReg(VAddr->getReg(), 0, AMDGPU::sub0) 66740b57cec5SDimitry Andric .addImm(0); 66750b57cec5SDimitry Andric 66760b57cec5SDimitry Andric // NewVaddrHi = RsrcPtr:sub1 + VAddr:sub1 66770b57cec5SDimitry Andric BuildMI(MBB, MI, DL, get(AMDGPU::V_ADDC_U32_e64), NewVAddrHi) 66780b57cec5SDimitry Andric .addDef(CondReg1, RegState::Dead) 66790b57cec5SDimitry Andric .addReg(RsrcPtr, 0, AMDGPU::sub1) 66800b57cec5SDimitry Andric .addReg(VAddr->getReg(), 0, AMDGPU::sub1) 66810b57cec5SDimitry Andric .addReg(CondReg0, RegState::Kill) 66820b57cec5SDimitry Andric .addImm(0); 66830b57cec5SDimitry Andric 66840b57cec5SDimitry Andric // NewVaddr = {NewVaddrHi, NewVaddrLo} 66850b57cec5SDimitry Andric BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewVAddr) 66860b57cec5SDimitry Andric .addReg(NewVAddrLo) 66870b57cec5SDimitry Andric .addImm(AMDGPU::sub0) 66880b57cec5SDimitry Andric .addReg(NewVAddrHi) 66890b57cec5SDimitry Andric .addImm(AMDGPU::sub1); 66900b57cec5SDimitry Andric 66910b57cec5SDimitry Andric VAddr->setReg(NewVAddr); 66920b57cec5SDimitry Andric Rsrc->setReg(NewSRsrc); 66930b57cec5SDimitry Andric } else if (!VAddr && ST.hasAddr64()) { 66940b57cec5SDimitry Andric // This instructions is the _OFFSET variant, so we need to convert it to 66950b57cec5SDimitry Andric // ADDR64. 6696e8d8bef9SDimitry Andric assert(ST.getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS && 66970b57cec5SDimitry Andric "FIXME: Need to emit flat atomics here"); 66980b57cec5SDimitry Andric 66990b57cec5SDimitry Andric unsigned RsrcPtr, NewSRsrc; 67000b57cec5SDimitry Andric std::tie(RsrcPtr, NewSRsrc) = extractRsrcPtr(*this, MI, *Rsrc); 67010b57cec5SDimitry Andric 67028bcb0991SDimitry Andric Register NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); 67030b57cec5SDimitry Andric MachineOperand *VData = getNamedOperand(MI, AMDGPU::OpName::vdata); 67040b57cec5SDimitry Andric MachineOperand *Offset = getNamedOperand(MI, AMDGPU::OpName::offset); 67050b57cec5SDimitry Andric MachineOperand *SOffset = getNamedOperand(MI, AMDGPU::OpName::soffset); 67060b57cec5SDimitry Andric unsigned Addr64Opcode = AMDGPU::getAddr64Inst(MI.getOpcode()); 67070b57cec5SDimitry Andric 670881ad6265SDimitry Andric // Atomics with return have an additional tied operand and are 67090b57cec5SDimitry Andric // missing some of the special bits. 67100b57cec5SDimitry Andric MachineOperand *VDataIn = getNamedOperand(MI, AMDGPU::OpName::vdata_in); 67110b57cec5SDimitry Andric MachineInstr *Addr64; 67120b57cec5SDimitry Andric 67130b57cec5SDimitry Andric if (!VDataIn) { 67140b57cec5SDimitry Andric // Regular buffer load / store. 67150b57cec5SDimitry Andric MachineInstrBuilder MIB = 67160b57cec5SDimitry Andric BuildMI(MBB, MI, MI.getDebugLoc(), get(Addr64Opcode)) 67170b57cec5SDimitry Andric .add(*VData) 67180b57cec5SDimitry Andric .addReg(NewVAddr) 67190b57cec5SDimitry Andric .addReg(NewSRsrc) 67200b57cec5SDimitry Andric .add(*SOffset) 67210b57cec5SDimitry Andric .add(*Offset); 67220b57cec5SDimitry Andric 6723fe6060f1SDimitry Andric if (const MachineOperand *CPol = 6724fe6060f1SDimitry Andric getNamedOperand(MI, AMDGPU::OpName::cpol)) { 6725fe6060f1SDimitry Andric MIB.addImm(CPol->getImm()); 67260b57cec5SDimitry Andric } 67270b57cec5SDimitry Andric 67280b57cec5SDimitry Andric if (const MachineOperand *TFE = 67290b57cec5SDimitry Andric getNamedOperand(MI, AMDGPU::OpName::tfe)) { 67300b57cec5SDimitry Andric MIB.addImm(TFE->getImm()); 67310b57cec5SDimitry Andric } 67320b57cec5SDimitry Andric 67338bcb0991SDimitry Andric MIB.addImm(getNamedImmOperand(MI, AMDGPU::OpName::swz)); 67348bcb0991SDimitry Andric 67350b57cec5SDimitry Andric MIB.cloneMemRefs(MI); 67360b57cec5SDimitry Andric Addr64 = MIB; 67370b57cec5SDimitry Andric } else { 67380b57cec5SDimitry Andric // Atomics with return. 67390b57cec5SDimitry Andric Addr64 = BuildMI(MBB, MI, MI.getDebugLoc(), get(Addr64Opcode)) 67400b57cec5SDimitry Andric .add(*VData) 67410b57cec5SDimitry Andric .add(*VDataIn) 67420b57cec5SDimitry Andric .addReg(NewVAddr) 67430b57cec5SDimitry Andric .addReg(NewSRsrc) 67440b57cec5SDimitry Andric .add(*SOffset) 67450b57cec5SDimitry Andric .add(*Offset) 6746fe6060f1SDimitry Andric .addImm(getNamedImmOperand(MI, AMDGPU::OpName::cpol)) 67470b57cec5SDimitry Andric .cloneMemRefs(MI); 67480b57cec5SDimitry Andric } 67490b57cec5SDimitry Andric 67500b57cec5SDimitry Andric MI.removeFromParent(); 67510b57cec5SDimitry Andric 67520b57cec5SDimitry Andric // NewVaddr = {NewVaddrHi, NewVaddrLo} 67530b57cec5SDimitry Andric BuildMI(MBB, Addr64, Addr64->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), 67540b57cec5SDimitry Andric NewVAddr) 67550b57cec5SDimitry Andric .addReg(RsrcPtr, 0, AMDGPU::sub0) 67560b57cec5SDimitry Andric .addImm(AMDGPU::sub0) 67570b57cec5SDimitry Andric .addReg(RsrcPtr, 0, AMDGPU::sub1) 67580b57cec5SDimitry Andric .addImm(AMDGPU::sub1); 67590b57cec5SDimitry Andric } else { 676006c3fb27SDimitry Andric // Legalize a VGPR Rsrc and soffset together. 676106c3fb27SDimitry Andric if (!isSoffsetLegal) { 676206c3fb27SDimitry Andric MachineOperand *Soffset = getNamedOperand(MI, AMDGPU::OpName::soffset); 676306c3fb27SDimitry Andric CreatedBB = 676406c3fb27SDimitry Andric loadMBUFScalarOperandsFromVGPR(*this, MI, {Rsrc, Soffset}, MDT); 6765e8d8bef9SDimitry Andric return CreatedBB; 67660b57cec5SDimitry Andric } 676706c3fb27SDimitry Andric CreatedBB = loadMBUFScalarOperandsFromVGPR(*this, MI, {Rsrc}, MDT); 676806c3fb27SDimitry Andric return CreatedBB; 676906c3fb27SDimitry Andric } 677006c3fb27SDimitry Andric } 677106c3fb27SDimitry Andric 677206c3fb27SDimitry Andric // Legalize a VGPR soffset. 677306c3fb27SDimitry Andric if (!isSoffsetLegal) { 677406c3fb27SDimitry Andric MachineOperand *Soffset = getNamedOperand(MI, AMDGPU::OpName::soffset); 677506c3fb27SDimitry Andric CreatedBB = loadMBUFScalarOperandsFromVGPR(*this, MI, {Soffset}, MDT); 677606c3fb27SDimitry Andric return CreatedBB; 67770b57cec5SDimitry Andric } 6778e8d8bef9SDimitry Andric return CreatedBB; 67790b57cec5SDimitry Andric } 67800b57cec5SDimitry Andric 678106c3fb27SDimitry Andric void SIInstrWorklist::insert(MachineInstr *MI) { 678206c3fb27SDimitry Andric InstrList.insert(MI); 678306c3fb27SDimitry Andric // Add MBUF instructiosn to deferred list. 678406c3fb27SDimitry Andric int RsrcIdx = 678506c3fb27SDimitry Andric AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::srsrc); 678606c3fb27SDimitry Andric if (RsrcIdx != -1) { 678706c3fb27SDimitry Andric DeferredList.insert(MI); 678806c3fb27SDimitry Andric } 678906c3fb27SDimitry Andric } 679006c3fb27SDimitry Andric 679106c3fb27SDimitry Andric bool SIInstrWorklist::isDeferred(MachineInstr *MI) { 679206c3fb27SDimitry Andric return DeferredList.contains(MI); 679306c3fb27SDimitry Andric } 679406c3fb27SDimitry Andric 679506c3fb27SDimitry Andric void SIInstrInfo::moveToVALU(SIInstrWorklist &Worklist, 67960b57cec5SDimitry Andric MachineDominatorTree *MDT) const { 67970b57cec5SDimitry Andric 67980b57cec5SDimitry Andric while (!Worklist.empty()) { 679906c3fb27SDimitry Andric MachineInstr &Inst = *Worklist.top(); 680006c3fb27SDimitry Andric Worklist.erase_top(); 680106c3fb27SDimitry Andric // Skip MachineInstr in the deferred list. 680206c3fb27SDimitry Andric if (Worklist.isDeferred(&Inst)) 680306c3fb27SDimitry Andric continue; 680406c3fb27SDimitry Andric moveToVALUImpl(Worklist, MDT, Inst); 680506c3fb27SDimitry Andric } 68060b57cec5SDimitry Andric 680706c3fb27SDimitry Andric // Deferred list of instructions will be processed once 680806c3fb27SDimitry Andric // all the MachineInstr in the worklist are done. 680906c3fb27SDimitry Andric for (MachineInstr *Inst : Worklist.getDeferredList()) { 681006c3fb27SDimitry Andric moveToVALUImpl(Worklist, MDT, *Inst); 681106c3fb27SDimitry Andric assert(Worklist.empty() && 681206c3fb27SDimitry Andric "Deferred MachineInstr are not supposed to re-populate worklist"); 681306c3fb27SDimitry Andric } 681406c3fb27SDimitry Andric } 681506c3fb27SDimitry Andric 681606c3fb27SDimitry Andric void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist, 681706c3fb27SDimitry Andric MachineDominatorTree *MDT, 681806c3fb27SDimitry Andric MachineInstr &Inst) const { 681906c3fb27SDimitry Andric 682006c3fb27SDimitry Andric MachineBasicBlock *MBB = Inst.getParent(); 682106c3fb27SDimitry Andric if (!MBB) 682206c3fb27SDimitry Andric return; 682306c3fb27SDimitry Andric MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 68240b57cec5SDimitry Andric unsigned Opcode = Inst.getOpcode(); 68250b57cec5SDimitry Andric unsigned NewOpcode = getVALUOp(Inst); 68260b57cec5SDimitry Andric // Handle some special cases 68270b57cec5SDimitry Andric switch (Opcode) { 68280b57cec5SDimitry Andric default: 68290b57cec5SDimitry Andric break; 68300b57cec5SDimitry Andric case AMDGPU::S_ADD_U64_PSEUDO: 68315f757f3fSDimitry Andric NewOpcode = AMDGPU::V_ADD_U64_PSEUDO; 68325f757f3fSDimitry Andric break; 68330b57cec5SDimitry Andric case AMDGPU::S_SUB_U64_PSEUDO: 68345f757f3fSDimitry Andric NewOpcode = AMDGPU::V_SUB_U64_PSEUDO; 68355f757f3fSDimitry Andric break; 68360b57cec5SDimitry Andric case AMDGPU::S_ADD_I32: 6837e8d8bef9SDimitry Andric case AMDGPU::S_SUB_I32: { 68380b57cec5SDimitry Andric // FIXME: The u32 versions currently selected use the carry. 6839e8d8bef9SDimitry Andric bool Changed; 684006c3fb27SDimitry Andric MachineBasicBlock *CreatedBBTmp = nullptr; 6841e8d8bef9SDimitry Andric std::tie(Changed, CreatedBBTmp) = moveScalarAddSub(Worklist, Inst, MDT); 6842e8d8bef9SDimitry Andric if (Changed) 684306c3fb27SDimitry Andric return; 68440b57cec5SDimitry Andric 68450b57cec5SDimitry Andric // Default handling 68460b57cec5SDimitry Andric break; 6847e8d8bef9SDimitry Andric } 68480b57cec5SDimitry Andric case AMDGPU::S_AND_B64: 68490b57cec5SDimitry Andric splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32, MDT); 68500b57cec5SDimitry Andric Inst.eraseFromParent(); 685106c3fb27SDimitry Andric return; 68520b57cec5SDimitry Andric 68530b57cec5SDimitry Andric case AMDGPU::S_OR_B64: 68540b57cec5SDimitry Andric splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32, MDT); 68550b57cec5SDimitry Andric Inst.eraseFromParent(); 685606c3fb27SDimitry Andric return; 68570b57cec5SDimitry Andric 68580b57cec5SDimitry Andric case AMDGPU::S_XOR_B64: 68590b57cec5SDimitry Andric splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32, MDT); 68600b57cec5SDimitry Andric Inst.eraseFromParent(); 686106c3fb27SDimitry Andric return; 68620b57cec5SDimitry Andric 68630b57cec5SDimitry Andric case AMDGPU::S_NAND_B64: 68640b57cec5SDimitry Andric splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NAND_B32, MDT); 68650b57cec5SDimitry Andric Inst.eraseFromParent(); 686606c3fb27SDimitry Andric return; 68670b57cec5SDimitry Andric 68680b57cec5SDimitry Andric case AMDGPU::S_NOR_B64: 68690b57cec5SDimitry Andric splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NOR_B32, MDT); 68700b57cec5SDimitry Andric Inst.eraseFromParent(); 687106c3fb27SDimitry Andric return; 68720b57cec5SDimitry Andric 68730b57cec5SDimitry Andric case AMDGPU::S_XNOR_B64: 68740b57cec5SDimitry Andric if (ST.hasDLInsts()) 68750b57cec5SDimitry Andric splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XNOR_B32, MDT); 68760b57cec5SDimitry Andric else 68770b57cec5SDimitry Andric splitScalar64BitXnor(Worklist, Inst, MDT); 68780b57cec5SDimitry Andric Inst.eraseFromParent(); 687906c3fb27SDimitry Andric return; 68800b57cec5SDimitry Andric 68810b57cec5SDimitry Andric case AMDGPU::S_ANDN2_B64: 68820b57cec5SDimitry Andric splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ANDN2_B32, MDT); 68830b57cec5SDimitry Andric Inst.eraseFromParent(); 688406c3fb27SDimitry Andric return; 68850b57cec5SDimitry Andric 68860b57cec5SDimitry Andric case AMDGPU::S_ORN2_B64: 68870b57cec5SDimitry Andric splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ORN2_B32, MDT); 68880b57cec5SDimitry Andric Inst.eraseFromParent(); 688906c3fb27SDimitry Andric return; 68900b57cec5SDimitry Andric 6891fe6060f1SDimitry Andric case AMDGPU::S_BREV_B64: 6892fe6060f1SDimitry Andric splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_BREV_B32, true); 6893fe6060f1SDimitry Andric Inst.eraseFromParent(); 689406c3fb27SDimitry Andric return; 6895fe6060f1SDimitry Andric 68960b57cec5SDimitry Andric case AMDGPU::S_NOT_B64: 68970b57cec5SDimitry Andric splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32); 68980b57cec5SDimitry Andric Inst.eraseFromParent(); 689906c3fb27SDimitry Andric return; 69000b57cec5SDimitry Andric 69010b57cec5SDimitry Andric case AMDGPU::S_BCNT1_I32_B64: 69020b57cec5SDimitry Andric splitScalar64BitBCNT(Worklist, Inst); 69030b57cec5SDimitry Andric Inst.eraseFromParent(); 690406c3fb27SDimitry Andric return; 69050b57cec5SDimitry Andric 69060b57cec5SDimitry Andric case AMDGPU::S_BFE_I64: 69070b57cec5SDimitry Andric splitScalar64BitBFE(Worklist, Inst); 69080b57cec5SDimitry Andric Inst.eraseFromParent(); 690906c3fb27SDimitry Andric return; 69100b57cec5SDimitry Andric 6911cb14a3feSDimitry Andric case AMDGPU::S_FLBIT_I32_B64: 6912cb14a3feSDimitry Andric splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBH_U32_e32); 6913cb14a3feSDimitry Andric Inst.eraseFromParent(); 6914cb14a3feSDimitry Andric return; 6915cb14a3feSDimitry Andric case AMDGPU::S_FF1_I32_B64: 6916cb14a3feSDimitry Andric splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBL_B32_e32); 6917cb14a3feSDimitry Andric Inst.eraseFromParent(); 6918cb14a3feSDimitry Andric return; 6919cb14a3feSDimitry Andric 69200b57cec5SDimitry Andric case AMDGPU::S_LSHL_B32: 69210b57cec5SDimitry Andric if (ST.hasOnlyRevVALUShifts()) { 69220b57cec5SDimitry Andric NewOpcode = AMDGPU::V_LSHLREV_B32_e64; 69230b57cec5SDimitry Andric swapOperands(Inst); 69240b57cec5SDimitry Andric } 69250b57cec5SDimitry Andric break; 69260b57cec5SDimitry Andric case AMDGPU::S_ASHR_I32: 69270b57cec5SDimitry Andric if (ST.hasOnlyRevVALUShifts()) { 69280b57cec5SDimitry Andric NewOpcode = AMDGPU::V_ASHRREV_I32_e64; 69290b57cec5SDimitry Andric swapOperands(Inst); 69300b57cec5SDimitry Andric } 69310b57cec5SDimitry Andric break; 69320b57cec5SDimitry Andric case AMDGPU::S_LSHR_B32: 69330b57cec5SDimitry Andric if (ST.hasOnlyRevVALUShifts()) { 69340b57cec5SDimitry Andric NewOpcode = AMDGPU::V_LSHRREV_B32_e64; 69350b57cec5SDimitry Andric swapOperands(Inst); 69360b57cec5SDimitry Andric } 69370b57cec5SDimitry Andric break; 69380b57cec5SDimitry Andric case AMDGPU::S_LSHL_B64: 69390b57cec5SDimitry Andric if (ST.hasOnlyRevVALUShifts()) { 69405f757f3fSDimitry Andric NewOpcode = ST.getGeneration() >= AMDGPUSubtarget::GFX12 69415f757f3fSDimitry Andric ? AMDGPU::V_LSHLREV_B64_pseudo_e64 69425f757f3fSDimitry Andric : AMDGPU::V_LSHLREV_B64_e64; 69430b57cec5SDimitry Andric swapOperands(Inst); 69440b57cec5SDimitry Andric } 69450b57cec5SDimitry Andric break; 69460b57cec5SDimitry Andric case AMDGPU::S_ASHR_I64: 69470b57cec5SDimitry Andric if (ST.hasOnlyRevVALUShifts()) { 6948e8d8bef9SDimitry Andric NewOpcode = AMDGPU::V_ASHRREV_I64_e64; 69490b57cec5SDimitry Andric swapOperands(Inst); 69500b57cec5SDimitry Andric } 69510b57cec5SDimitry Andric break; 69520b57cec5SDimitry Andric case AMDGPU::S_LSHR_B64: 69530b57cec5SDimitry Andric if (ST.hasOnlyRevVALUShifts()) { 6954e8d8bef9SDimitry Andric NewOpcode = AMDGPU::V_LSHRREV_B64_e64; 69550b57cec5SDimitry Andric swapOperands(Inst); 69560b57cec5SDimitry Andric } 69570b57cec5SDimitry Andric break; 69580b57cec5SDimitry Andric 69590b57cec5SDimitry Andric case AMDGPU::S_ABS_I32: 69600b57cec5SDimitry Andric lowerScalarAbs(Worklist, Inst); 69610b57cec5SDimitry Andric Inst.eraseFromParent(); 696206c3fb27SDimitry Andric return; 69630b57cec5SDimitry Andric 69640b57cec5SDimitry Andric case AMDGPU::S_CBRANCH_SCC0: 6965349cc55cSDimitry Andric case AMDGPU::S_CBRANCH_SCC1: { 69660b57cec5SDimitry Andric // Clear unused bits of vcc 6967349cc55cSDimitry Andric Register CondReg = Inst.getOperand(1).getReg(); 6968349cc55cSDimitry Andric bool IsSCC = CondReg == AMDGPU::SCC; 6969349cc55cSDimitry Andric Register VCC = RI.getVCC(); 6970349cc55cSDimitry Andric Register EXEC = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; 6971349cc55cSDimitry Andric unsigned Opc = ST.isWave32() ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64; 6972349cc55cSDimitry Andric BuildMI(*MBB, Inst, Inst.getDebugLoc(), get(Opc), VCC) 6973349cc55cSDimitry Andric .addReg(EXEC) 6974349cc55cSDimitry Andric .addReg(IsSCC ? VCC : CondReg); 697581ad6265SDimitry Andric Inst.removeOperand(1); 697606c3fb27SDimitry Andric } break; 69770b57cec5SDimitry Andric 69780b57cec5SDimitry Andric case AMDGPU::S_BFE_U64: 69790b57cec5SDimitry Andric case AMDGPU::S_BFM_B64: 69800b57cec5SDimitry Andric llvm_unreachable("Moving this op to VALU not implemented"); 69810b57cec5SDimitry Andric 69820b57cec5SDimitry Andric case AMDGPU::S_PACK_LL_B32_B16: 69830b57cec5SDimitry Andric case AMDGPU::S_PACK_LH_B32_B16: 698481ad6265SDimitry Andric case AMDGPU::S_PACK_HL_B32_B16: 69850b57cec5SDimitry Andric case AMDGPU::S_PACK_HH_B32_B16: 69860b57cec5SDimitry Andric movePackToVALU(Worklist, MRI, Inst); 69870b57cec5SDimitry Andric Inst.eraseFromParent(); 698806c3fb27SDimitry Andric return; 69890b57cec5SDimitry Andric 69900b57cec5SDimitry Andric case AMDGPU::S_XNOR_B32: 69910b57cec5SDimitry Andric lowerScalarXnor(Worklist, Inst); 69920b57cec5SDimitry Andric Inst.eraseFromParent(); 699306c3fb27SDimitry Andric return; 69940b57cec5SDimitry Andric 69950b57cec5SDimitry Andric case AMDGPU::S_NAND_B32: 69960b57cec5SDimitry Andric splitScalarNotBinop(Worklist, Inst, AMDGPU::S_AND_B32); 69970b57cec5SDimitry Andric Inst.eraseFromParent(); 699806c3fb27SDimitry Andric return; 69990b57cec5SDimitry Andric 70000b57cec5SDimitry Andric case AMDGPU::S_NOR_B32: 70010b57cec5SDimitry Andric splitScalarNotBinop(Worklist, Inst, AMDGPU::S_OR_B32); 70020b57cec5SDimitry Andric Inst.eraseFromParent(); 700306c3fb27SDimitry Andric return; 70040b57cec5SDimitry Andric 70050b57cec5SDimitry Andric case AMDGPU::S_ANDN2_B32: 70060b57cec5SDimitry Andric splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_AND_B32); 70070b57cec5SDimitry Andric Inst.eraseFromParent(); 700806c3fb27SDimitry Andric return; 70090b57cec5SDimitry Andric 70100b57cec5SDimitry Andric case AMDGPU::S_ORN2_B32: 70110b57cec5SDimitry Andric splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_OR_B32); 70120b57cec5SDimitry Andric Inst.eraseFromParent(); 701306c3fb27SDimitry Andric return; 70145ffd83dbSDimitry Andric 70155ffd83dbSDimitry Andric // TODO: remove as soon as everything is ready 70165ffd83dbSDimitry Andric // to replace VGPR to SGPR copy with V_READFIRSTLANEs. 70175ffd83dbSDimitry Andric // S_ADD/SUB_CO_PSEUDO as well as S_UADDO/USUBO_PSEUDO 70185ffd83dbSDimitry Andric // can only be selected from the uniform SDNode. 70195ffd83dbSDimitry Andric case AMDGPU::S_ADD_CO_PSEUDO: 70205ffd83dbSDimitry Andric case AMDGPU::S_SUB_CO_PSEUDO: { 70215ffd83dbSDimitry Andric unsigned Opc = (Inst.getOpcode() == AMDGPU::S_ADD_CO_PSEUDO) 70225ffd83dbSDimitry Andric ? AMDGPU::V_ADDC_U32_e64 70235ffd83dbSDimitry Andric : AMDGPU::V_SUBB_U32_e64; 70245ffd83dbSDimitry Andric const auto *CarryRC = RI.getRegClass(AMDGPU::SReg_1_XEXECRegClassID); 70255ffd83dbSDimitry Andric 70265ffd83dbSDimitry Andric Register CarryInReg = Inst.getOperand(4).getReg(); 70275ffd83dbSDimitry Andric if (!MRI.constrainRegClass(CarryInReg, CarryRC)) { 70285ffd83dbSDimitry Andric Register NewCarryReg = MRI.createVirtualRegister(CarryRC); 702906c3fb27SDimitry Andric BuildMI(*MBB, Inst, Inst.getDebugLoc(), get(AMDGPU::COPY), NewCarryReg) 70305ffd83dbSDimitry Andric .addReg(CarryInReg); 70315ffd83dbSDimitry Andric } 70325ffd83dbSDimitry Andric 70335ffd83dbSDimitry Andric Register CarryOutReg = Inst.getOperand(1).getReg(); 70345ffd83dbSDimitry Andric 70355ffd83dbSDimitry Andric Register DestReg = MRI.createVirtualRegister(RI.getEquivalentVGPRClass( 70365ffd83dbSDimitry Andric MRI.getRegClass(Inst.getOperand(0).getReg()))); 70375ffd83dbSDimitry Andric MachineInstr *CarryOp = 70385ffd83dbSDimitry Andric BuildMI(*MBB, &Inst, Inst.getDebugLoc(), get(Opc), DestReg) 70395ffd83dbSDimitry Andric .addReg(CarryOutReg, RegState::Define) 70405ffd83dbSDimitry Andric .add(Inst.getOperand(2)) 70415ffd83dbSDimitry Andric .add(Inst.getOperand(3)) 70425ffd83dbSDimitry Andric .addReg(CarryInReg) 70435ffd83dbSDimitry Andric .addImm(0); 704406c3fb27SDimitry Andric legalizeOperands(*CarryOp); 70455ffd83dbSDimitry Andric MRI.replaceRegWith(Inst.getOperand(0).getReg(), DestReg); 70465ffd83dbSDimitry Andric addUsersToMoveToVALUWorklist(DestReg, MRI, Worklist); 70475ffd83dbSDimitry Andric Inst.eraseFromParent(); 70485ffd83dbSDimitry Andric } 704906c3fb27SDimitry Andric return; 70505ffd83dbSDimitry Andric case AMDGPU::S_UADDO_PSEUDO: 70515ffd83dbSDimitry Andric case AMDGPU::S_USUBO_PSEUDO: { 70525ffd83dbSDimitry Andric const DebugLoc &DL = Inst.getDebugLoc(); 70535ffd83dbSDimitry Andric MachineOperand &Dest0 = Inst.getOperand(0); 70545ffd83dbSDimitry Andric MachineOperand &Dest1 = Inst.getOperand(1); 70555ffd83dbSDimitry Andric MachineOperand &Src0 = Inst.getOperand(2); 70565ffd83dbSDimitry Andric MachineOperand &Src1 = Inst.getOperand(3); 70575ffd83dbSDimitry Andric 70585ffd83dbSDimitry Andric unsigned Opc = (Inst.getOpcode() == AMDGPU::S_UADDO_PSEUDO) 7059e8d8bef9SDimitry Andric ? AMDGPU::V_ADD_CO_U32_e64 7060e8d8bef9SDimitry Andric : AMDGPU::V_SUB_CO_U32_e64; 70615ffd83dbSDimitry Andric const TargetRegisterClass *NewRC = 70625ffd83dbSDimitry Andric RI.getEquivalentVGPRClass(MRI.getRegClass(Dest0.getReg())); 70635ffd83dbSDimitry Andric Register DestReg = MRI.createVirtualRegister(NewRC); 70645ffd83dbSDimitry Andric MachineInstr *NewInstr = BuildMI(*MBB, &Inst, DL, get(Opc), DestReg) 70655ffd83dbSDimitry Andric .addReg(Dest1.getReg(), RegState::Define) 70665ffd83dbSDimitry Andric .add(Src0) 70675ffd83dbSDimitry Andric .add(Src1) 70685ffd83dbSDimitry Andric .addImm(0); // clamp bit 70695ffd83dbSDimitry Andric 707006c3fb27SDimitry Andric legalizeOperands(*NewInstr, MDT); 70715ffd83dbSDimitry Andric MRI.replaceRegWith(Dest0.getReg(), DestReg); 70725ffd83dbSDimitry Andric addUsersToMoveToVALUWorklist(NewInstr->getOperand(0).getReg(), MRI, 70735ffd83dbSDimitry Andric Worklist); 70745ffd83dbSDimitry Andric Inst.eraseFromParent(); 70755ffd83dbSDimitry Andric } 707606c3fb27SDimitry Andric return; 70775ffd83dbSDimitry Andric 70785ffd83dbSDimitry Andric case AMDGPU::S_CSELECT_B32: 7079349cc55cSDimitry Andric case AMDGPU::S_CSELECT_B64: 708004eeddc0SDimitry Andric lowerSelect(Worklist, Inst, MDT); 7081349cc55cSDimitry Andric Inst.eraseFromParent(); 708206c3fb27SDimitry Andric return; 7083349cc55cSDimitry Andric case AMDGPU::S_CMP_EQ_I32: 7084349cc55cSDimitry Andric case AMDGPU::S_CMP_LG_I32: 7085349cc55cSDimitry Andric case AMDGPU::S_CMP_GT_I32: 7086349cc55cSDimitry Andric case AMDGPU::S_CMP_GE_I32: 7087349cc55cSDimitry Andric case AMDGPU::S_CMP_LT_I32: 7088349cc55cSDimitry Andric case AMDGPU::S_CMP_LE_I32: 7089349cc55cSDimitry Andric case AMDGPU::S_CMP_EQ_U32: 7090349cc55cSDimitry Andric case AMDGPU::S_CMP_LG_U32: 7091349cc55cSDimitry Andric case AMDGPU::S_CMP_GT_U32: 7092349cc55cSDimitry Andric case AMDGPU::S_CMP_GE_U32: 7093349cc55cSDimitry Andric case AMDGPU::S_CMP_LT_U32: 7094349cc55cSDimitry Andric case AMDGPU::S_CMP_LE_U32: 7095349cc55cSDimitry Andric case AMDGPU::S_CMP_EQ_U64: 70965f757f3fSDimitry Andric case AMDGPU::S_CMP_LG_U64: 70975f757f3fSDimitry Andric case AMDGPU::S_CMP_LT_F32: 70985f757f3fSDimitry Andric case AMDGPU::S_CMP_EQ_F32: 70995f757f3fSDimitry Andric case AMDGPU::S_CMP_LE_F32: 71005f757f3fSDimitry Andric case AMDGPU::S_CMP_GT_F32: 71015f757f3fSDimitry Andric case AMDGPU::S_CMP_LG_F32: 71025f757f3fSDimitry Andric case AMDGPU::S_CMP_GE_F32: 71035f757f3fSDimitry Andric case AMDGPU::S_CMP_O_F32: 71045f757f3fSDimitry Andric case AMDGPU::S_CMP_U_F32: 71055f757f3fSDimitry Andric case AMDGPU::S_CMP_NGE_F32: 71065f757f3fSDimitry Andric case AMDGPU::S_CMP_NLG_F32: 71075f757f3fSDimitry Andric case AMDGPU::S_CMP_NGT_F32: 71085f757f3fSDimitry Andric case AMDGPU::S_CMP_NLE_F32: 71095f757f3fSDimitry Andric case AMDGPU::S_CMP_NEQ_F32: 71105f757f3fSDimitry Andric case AMDGPU::S_CMP_NLT_F32: 71115f757f3fSDimitry Andric case AMDGPU::S_CMP_LT_F16: 71125f757f3fSDimitry Andric case AMDGPU::S_CMP_EQ_F16: 71135f757f3fSDimitry Andric case AMDGPU::S_CMP_LE_F16: 71145f757f3fSDimitry Andric case AMDGPU::S_CMP_GT_F16: 71155f757f3fSDimitry Andric case AMDGPU::S_CMP_LG_F16: 71165f757f3fSDimitry Andric case AMDGPU::S_CMP_GE_F16: 71175f757f3fSDimitry Andric case AMDGPU::S_CMP_O_F16: 71185f757f3fSDimitry Andric case AMDGPU::S_CMP_U_F16: 71195f757f3fSDimitry Andric case AMDGPU::S_CMP_NGE_F16: 71205f757f3fSDimitry Andric case AMDGPU::S_CMP_NLG_F16: 71215f757f3fSDimitry Andric case AMDGPU::S_CMP_NGT_F16: 71225f757f3fSDimitry Andric case AMDGPU::S_CMP_NLE_F16: 71235f757f3fSDimitry Andric case AMDGPU::S_CMP_NEQ_F16: 71245f757f3fSDimitry Andric case AMDGPU::S_CMP_NLT_F16: { 7125349cc55cSDimitry Andric Register CondReg = MRI.createVirtualRegister(RI.getWaveMaskRegClass()); 71265f757f3fSDimitry Andric auto NewInstr = 71275f757f3fSDimitry Andric BuildMI(*MBB, Inst, Inst.getDebugLoc(), get(NewOpcode), CondReg) 71285f757f3fSDimitry Andric .setMIFlags(Inst.getFlags()); 71295f757f3fSDimitry Andric if (AMDGPU::getNamedOperandIdx(NewOpcode, 71305f757f3fSDimitry Andric AMDGPU::OpName::src0_modifiers) >= 0) { 71315f757f3fSDimitry Andric NewInstr 71325f757f3fSDimitry Andric .addImm(0) // src0_modifiers 71335f757f3fSDimitry Andric .add(Inst.getOperand(0)) // src0 71345f757f3fSDimitry Andric .addImm(0) // src1_modifiers 71355f757f3fSDimitry Andric .add(Inst.getOperand(1)) // src1 71365f757f3fSDimitry Andric .addImm(0); // clamp 71375f757f3fSDimitry Andric } else { 71385f757f3fSDimitry Andric NewInstr 7139349cc55cSDimitry Andric .add(Inst.getOperand(0)) 7140349cc55cSDimitry Andric .add(Inst.getOperand(1)); 71415f757f3fSDimitry Andric } 7142349cc55cSDimitry Andric legalizeOperands(*NewInstr, MDT); 7143349cc55cSDimitry Andric int SCCIdx = Inst.findRegisterDefOperandIdx(AMDGPU::SCC); 7144349cc55cSDimitry Andric MachineOperand SCCOp = Inst.getOperand(SCCIdx); 7145349cc55cSDimitry Andric addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg); 7146349cc55cSDimitry Andric Inst.eraseFromParent(); 714706c3fb27SDimitry Andric return; 7148349cc55cSDimitry Andric } 71495f757f3fSDimitry Andric case AMDGPU::S_CVT_HI_F32_F16: { 71505f757f3fSDimitry Andric const DebugLoc &DL = Inst.getDebugLoc(); 71515f757f3fSDimitry Andric Register TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 71525f757f3fSDimitry Andric Register NewDst = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 71535f757f3fSDimitry Andric BuildMI(*MBB, Inst, DL, get(AMDGPU::V_LSHRREV_B32_e64), TmpReg) 71545f757f3fSDimitry Andric .addImm(16) 71555f757f3fSDimitry Andric .add(Inst.getOperand(1)); 71565f757f3fSDimitry Andric BuildMI(*MBB, Inst, DL, get(NewOpcode), NewDst) 71575f757f3fSDimitry Andric .addImm(0) // src0_modifiers 71585f757f3fSDimitry Andric .addReg(TmpReg) 71595f757f3fSDimitry Andric .addImm(0) // clamp 71605f757f3fSDimitry Andric .addImm(0); // omod 71615f757f3fSDimitry Andric 71625f757f3fSDimitry Andric MRI.replaceRegWith(Inst.getOperand(0).getReg(), NewDst); 71635f757f3fSDimitry Andric addUsersToMoveToVALUWorklist(NewDst, MRI, Worklist); 71645f757f3fSDimitry Andric Inst.eraseFromParent(); 71655f757f3fSDimitry Andric return; 71665f757f3fSDimitry Andric } 71675f757f3fSDimitry Andric case AMDGPU::S_MINIMUM_F32: 71685f757f3fSDimitry Andric case AMDGPU::S_MAXIMUM_F32: 71695f757f3fSDimitry Andric case AMDGPU::S_MINIMUM_F16: 71705f757f3fSDimitry Andric case AMDGPU::S_MAXIMUM_F16: { 71715f757f3fSDimitry Andric const DebugLoc &DL = Inst.getDebugLoc(); 71725f757f3fSDimitry Andric Register NewDst = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 71735f757f3fSDimitry Andric MachineInstr *NewInstr = BuildMI(*MBB, Inst, DL, get(NewOpcode), NewDst) 71745f757f3fSDimitry Andric .addImm(0) // src0_modifiers 71755f757f3fSDimitry Andric .add(Inst.getOperand(1)) 71765f757f3fSDimitry Andric .addImm(0) // src1_modifiers 71775f757f3fSDimitry Andric .add(Inst.getOperand(2)) 71785f757f3fSDimitry Andric .addImm(0) // clamp 71795f757f3fSDimitry Andric .addImm(0); // omod 71805f757f3fSDimitry Andric MRI.replaceRegWith(Inst.getOperand(0).getReg(), NewDst); 71815f757f3fSDimitry Andric 71825f757f3fSDimitry Andric legalizeOperands(*NewInstr, MDT); 71835f757f3fSDimitry Andric addUsersToMoveToVALUWorklist(NewDst, MRI, Worklist); 71845f757f3fSDimitry Andric Inst.eraseFromParent(); 71855f757f3fSDimitry Andric return; 71865f757f3fSDimitry Andric } 71875f757f3fSDimitry Andric } 7188349cc55cSDimitry Andric 71890b57cec5SDimitry Andric if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) { 71900b57cec5SDimitry Andric // We cannot move this instruction to the VALU, so we should try to 71910b57cec5SDimitry Andric // legalize its operands instead. 719206c3fb27SDimitry Andric legalizeOperands(Inst, MDT); 719306c3fb27SDimitry Andric return; 71940b57cec5SDimitry Andric } 7195bdd1243dSDimitry Andric // Handle converting generic instructions like COPY-to-SGPR into 7196bdd1243dSDimitry Andric // COPY-to-VGPR. 7197bdd1243dSDimitry Andric if (NewOpcode == Opcode) { 71988bcb0991SDimitry Andric Register DstReg = Inst.getOperand(0).getReg(); 71990b57cec5SDimitry Andric const TargetRegisterClass *NewDstRC = getDestEquivalentVGPRClass(Inst); 72000b57cec5SDimitry Andric 7201*647cbc5dSDimitry Andric // If it's a copy of a VGPR to a physical SGPR, insert a V_READFIRSTLANE and 7202*647cbc5dSDimitry Andric // hope for the best. 7203*647cbc5dSDimitry Andric if (Inst.isCopy() && DstReg.isPhysical() && 7204*647cbc5dSDimitry Andric RI.isVGPR(MRI, Inst.getOperand(1).getReg())) { 7205*647cbc5dSDimitry Andric // TODO: Only works for 32 bit registers. 7206*647cbc5dSDimitry Andric BuildMI(*Inst.getParent(), &Inst, Inst.getDebugLoc(), 7207*647cbc5dSDimitry Andric get(AMDGPU::V_READFIRSTLANE_B32), Inst.getOperand(0).getReg()) 7208*647cbc5dSDimitry Andric .add(Inst.getOperand(1)); 7209*647cbc5dSDimitry Andric Inst.eraseFromParent(); 7210*647cbc5dSDimitry Andric return; 7211*647cbc5dSDimitry Andric } 7212*647cbc5dSDimitry Andric 7213e8d8bef9SDimitry Andric if (Inst.isCopy() && Inst.getOperand(1).getReg().isVirtual() && 72140b57cec5SDimitry Andric NewDstRC == RI.getRegClassForReg(MRI, Inst.getOperand(1).getReg())) { 72150b57cec5SDimitry Andric // Instead of creating a copy where src and dst are the same register 72160b57cec5SDimitry Andric // class, we just replace all uses of dst with src. These kinds of 72170b57cec5SDimitry Andric // copies interfere with the heuristics MachineSink uses to decide 72180b57cec5SDimitry Andric // whether or not to split a critical edge. Since the pass assumes 72190b57cec5SDimitry Andric // that copies will end up as machine instructions and not be 72200b57cec5SDimitry Andric // eliminated. 72210b57cec5SDimitry Andric addUsersToMoveToVALUWorklist(DstReg, MRI, Worklist); 72220b57cec5SDimitry Andric MRI.replaceRegWith(DstReg, Inst.getOperand(1).getReg()); 72230b57cec5SDimitry Andric MRI.clearKillFlags(Inst.getOperand(1).getReg()); 72240b57cec5SDimitry Andric Inst.getOperand(0).setReg(DstReg); 72250b57cec5SDimitry Andric // Make sure we don't leave around a dead VGPR->SGPR copy. Normally 72260b57cec5SDimitry Andric // these are deleted later, but at -O0 it would leave a suspicious 72270b57cec5SDimitry Andric // looking illegal copy of an undef register. 72280b57cec5SDimitry Andric for (unsigned I = Inst.getNumOperands() - 1; I != 0; --I) 722981ad6265SDimitry Andric Inst.removeOperand(I); 72300b57cec5SDimitry Andric Inst.setDesc(get(AMDGPU::IMPLICIT_DEF)); 723106c3fb27SDimitry Andric return; 72320b57cec5SDimitry Andric } 7233bdd1243dSDimitry Andric Register NewDstReg = MRI.createVirtualRegister(NewDstRC); 7234bdd1243dSDimitry Andric MRI.replaceRegWith(DstReg, NewDstReg); 7235bdd1243dSDimitry Andric legalizeOperands(Inst, MDT); 7236bdd1243dSDimitry Andric addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist); 723706c3fb27SDimitry Andric return; 7238bdd1243dSDimitry Andric } 7239bdd1243dSDimitry Andric 7240bdd1243dSDimitry Andric // Use the new VALU Opcode. 7241bdd1243dSDimitry Andric auto NewInstr = BuildMI(*MBB, Inst, Inst.getDebugLoc(), get(NewOpcode)) 7242bdd1243dSDimitry Andric .setMIFlags(Inst.getFlags()); 72435f757f3fSDimitry Andric if (isVOP3(NewOpcode) && !isVOP3(Opcode)) { 72445f757f3fSDimitry Andric // Intersperse VOP3 modifiers among the SALU operands. 72455f757f3fSDimitry Andric NewInstr->addOperand(Inst.getOperand(0)); 72465f757f3fSDimitry Andric if (AMDGPU::getNamedOperandIdx(NewOpcode, 72475f757f3fSDimitry Andric AMDGPU::OpName::src0_modifiers) >= 0) 72485f757f3fSDimitry Andric NewInstr.addImm(0); 72495f757f3fSDimitry Andric if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src0) >= 0) 72505f757f3fSDimitry Andric NewInstr->addOperand(Inst.getOperand(1)); 72515f757f3fSDimitry Andric 72525f757f3fSDimitry Andric if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) { 72535f757f3fSDimitry Andric // We are converting these to a BFE, so we need to add the missing 72545f757f3fSDimitry Andric // operands for the size and offset. 72555f757f3fSDimitry Andric unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16; 72565f757f3fSDimitry Andric NewInstr.addImm(0); 72575f757f3fSDimitry Andric NewInstr.addImm(Size); 72585f757f3fSDimitry Andric } else if (Opcode == AMDGPU::S_BCNT1_I32_B32) { 72595f757f3fSDimitry Andric // The VALU version adds the second operand to the result, so insert an 72605f757f3fSDimitry Andric // extra 0 operand. 72615f757f3fSDimitry Andric NewInstr.addImm(0); 72625f757f3fSDimitry Andric } else if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) { 72635f757f3fSDimitry Andric const MachineOperand &OffsetWidthOp = Inst.getOperand(2); 72645f757f3fSDimitry Andric // If we need to move this to VGPRs, we need to unpack the second 72655f757f3fSDimitry Andric // operand back into the 2 separate ones for bit offset and width. 72665f757f3fSDimitry Andric assert(OffsetWidthOp.isImm() && 72675f757f3fSDimitry Andric "Scalar BFE is only implemented for constant width and offset"); 72685f757f3fSDimitry Andric uint32_t Imm = OffsetWidthOp.getImm(); 72695f757f3fSDimitry Andric 72705f757f3fSDimitry Andric uint32_t Offset = Imm & 0x3f; // Extract bits [5:0]. 72715f757f3fSDimitry Andric uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16]. 72725f757f3fSDimitry Andric NewInstr.addImm(Offset); 72735f757f3fSDimitry Andric NewInstr.addImm(BitWidth); 72745f757f3fSDimitry Andric } else { 72755f757f3fSDimitry Andric if (AMDGPU::getNamedOperandIdx(NewOpcode, 72765f757f3fSDimitry Andric AMDGPU::OpName::src1_modifiers) >= 0) 72775f757f3fSDimitry Andric NewInstr.addImm(0); 72785f757f3fSDimitry Andric if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src1) >= 0) 72795f757f3fSDimitry Andric NewInstr->addOperand(Inst.getOperand(2)); 72805f757f3fSDimitry Andric if (AMDGPU::getNamedOperandIdx(NewOpcode, 72815f757f3fSDimitry Andric AMDGPU::OpName::src2_modifiers) >= 0) 72825f757f3fSDimitry Andric NewInstr.addImm(0); 72835f757f3fSDimitry Andric if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src2) >= 0) 72845f757f3fSDimitry Andric NewInstr->addOperand(Inst.getOperand(3)); 72855f757f3fSDimitry Andric if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::clamp) >= 0) 72865f757f3fSDimitry Andric NewInstr.addImm(0); 72875f757f3fSDimitry Andric if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::omod) >= 0) 72885f757f3fSDimitry Andric NewInstr.addImm(0); 72895f757f3fSDimitry Andric if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::op_sel) >= 0) 72905f757f3fSDimitry Andric NewInstr.addImm(0); 72915f757f3fSDimitry Andric } 72925f757f3fSDimitry Andric } else { 72935f757f3fSDimitry Andric // Just copy the SALU operands. 7294bdd1243dSDimitry Andric for (const MachineOperand &Op : Inst.explicit_operands()) 7295bdd1243dSDimitry Andric NewInstr->addOperand(Op); 72965f757f3fSDimitry Andric } 72975f757f3fSDimitry Andric 7298bdd1243dSDimitry Andric // Remove any references to SCC. Vector instructions can't read from it, and 7299bdd1243dSDimitry Andric // We're just about to add the implicit use / defs of VCC, and we don't want 7300bdd1243dSDimitry Andric // both. 7301bdd1243dSDimitry Andric for (MachineOperand &Op : Inst.implicit_operands()) { 7302bdd1243dSDimitry Andric if (Op.getReg() == AMDGPU::SCC) { 7303bdd1243dSDimitry Andric // Only propagate through live-def of SCC. 7304bdd1243dSDimitry Andric if (Op.isDef() && !Op.isDead()) 7305bdd1243dSDimitry Andric addSCCDefUsersToVALUWorklist(Op, Inst, Worklist); 7306bdd1243dSDimitry Andric if (Op.isUse()) 7307bdd1243dSDimitry Andric addSCCDefsToVALUWorklist(NewInstr, Worklist); 7308bdd1243dSDimitry Andric } 7309bdd1243dSDimitry Andric } 7310bdd1243dSDimitry Andric Inst.eraseFromParent(); 7311bdd1243dSDimitry Andric Register NewDstReg; 7312bdd1243dSDimitry Andric if (NewInstr->getOperand(0).isReg() && NewInstr->getOperand(0).isDef()) { 7313bdd1243dSDimitry Andric Register DstReg = NewInstr->getOperand(0).getReg(); 7314bdd1243dSDimitry Andric assert(DstReg.isVirtual()); 7315bdd1243dSDimitry Andric // Update the destination register class. 731606c3fb27SDimitry Andric const TargetRegisterClass *NewDstRC = getDestEquivalentVGPRClass(*NewInstr); 7317bdd1243dSDimitry Andric assert(NewDstRC); 73180b57cec5SDimitry Andric NewDstReg = MRI.createVirtualRegister(NewDstRC); 73190b57cec5SDimitry Andric MRI.replaceRegWith(DstReg, NewDstReg); 73200b57cec5SDimitry Andric } 7321bdd1243dSDimitry Andric fixImplicitOperands(*NewInstr); 73220b57cec5SDimitry Andric // Legalize the operands 732306c3fb27SDimitry Andric legalizeOperands(*NewInstr, MDT); 7324bdd1243dSDimitry Andric if (NewDstReg) 73250b57cec5SDimitry Andric addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist); 73260b57cec5SDimitry Andric } 73270b57cec5SDimitry Andric 73280b57cec5SDimitry Andric // Add/sub require special handling to deal with carry outs. 7329e8d8bef9SDimitry Andric std::pair<bool, MachineBasicBlock *> 733006c3fb27SDimitry Andric SIInstrInfo::moveScalarAddSub(SIInstrWorklist &Worklist, MachineInstr &Inst, 73310b57cec5SDimitry Andric MachineDominatorTree *MDT) const { 73320b57cec5SDimitry Andric if (ST.hasAddNoCarry()) { 73330b57cec5SDimitry Andric // Assume there is no user of scc since we don't select this in that case. 73340b57cec5SDimitry Andric // Since scc isn't used, it doesn't really matter if the i32 or u32 variant 73350b57cec5SDimitry Andric // is used. 73360b57cec5SDimitry Andric 73370b57cec5SDimitry Andric MachineBasicBlock &MBB = *Inst.getParent(); 73380b57cec5SDimitry Andric MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 73390b57cec5SDimitry Andric 73408bcb0991SDimitry Andric Register OldDstReg = Inst.getOperand(0).getReg(); 73418bcb0991SDimitry Andric Register ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 73420b57cec5SDimitry Andric 73430b57cec5SDimitry Andric unsigned Opc = Inst.getOpcode(); 73440b57cec5SDimitry Andric assert(Opc == AMDGPU::S_ADD_I32 || Opc == AMDGPU::S_SUB_I32); 73450b57cec5SDimitry Andric 73460b57cec5SDimitry Andric unsigned NewOpc = Opc == AMDGPU::S_ADD_I32 ? 73470b57cec5SDimitry Andric AMDGPU::V_ADD_U32_e64 : AMDGPU::V_SUB_U32_e64; 73480b57cec5SDimitry Andric 73490b57cec5SDimitry Andric assert(Inst.getOperand(3).getReg() == AMDGPU::SCC); 735081ad6265SDimitry Andric Inst.removeOperand(3); 73510b57cec5SDimitry Andric 73520b57cec5SDimitry Andric Inst.setDesc(get(NewOpc)); 73530b57cec5SDimitry Andric Inst.addOperand(MachineOperand::CreateImm(0)); // clamp bit 73540b57cec5SDimitry Andric Inst.addImplicitDefUseOperands(*MBB.getParent()); 73550b57cec5SDimitry Andric MRI.replaceRegWith(OldDstReg, ResultReg); 7356e8d8bef9SDimitry Andric MachineBasicBlock *NewBB = legalizeOperands(Inst, MDT); 73570b57cec5SDimitry Andric 73580b57cec5SDimitry Andric addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist); 7359bdd1243dSDimitry Andric return std::pair(true, NewBB); 73600b57cec5SDimitry Andric } 73610b57cec5SDimitry Andric 7362bdd1243dSDimitry Andric return std::pair(false, nullptr); 73630b57cec5SDimitry Andric } 73640b57cec5SDimitry Andric 736506c3fb27SDimitry Andric void SIInstrInfo::lowerSelect(SIInstrWorklist &Worklist, MachineInstr &Inst, 73665ffd83dbSDimitry Andric MachineDominatorTree *MDT) const { 73675ffd83dbSDimitry Andric 73685ffd83dbSDimitry Andric MachineBasicBlock &MBB = *Inst.getParent(); 73695ffd83dbSDimitry Andric MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 73705ffd83dbSDimitry Andric MachineBasicBlock::iterator MII = Inst; 73715ffd83dbSDimitry Andric DebugLoc DL = Inst.getDebugLoc(); 73725ffd83dbSDimitry Andric 73735ffd83dbSDimitry Andric MachineOperand &Dest = Inst.getOperand(0); 73745ffd83dbSDimitry Andric MachineOperand &Src0 = Inst.getOperand(1); 73755ffd83dbSDimitry Andric MachineOperand &Src1 = Inst.getOperand(2); 73765ffd83dbSDimitry Andric MachineOperand &Cond = Inst.getOperand(3); 73775ffd83dbSDimitry Andric 73785f757f3fSDimitry Andric Register CondReg = Cond.getReg(); 73795f757f3fSDimitry Andric bool IsSCC = (CondReg == AMDGPU::SCC); 7380349cc55cSDimitry Andric 7381349cc55cSDimitry Andric // If this is a trivial select where the condition is effectively not SCC 73825f757f3fSDimitry Andric // (CondReg is a source of copy to SCC), then the select is semantically 73835f757f3fSDimitry Andric // equivalent to copying CondReg. Hence, there is no need to create 7384349cc55cSDimitry Andric // V_CNDMASK, we can just use that and bail out. 7385349cc55cSDimitry Andric if (!IsSCC && Src0.isImm() && (Src0.getImm() == -1) && Src1.isImm() && 7386349cc55cSDimitry Andric (Src1.getImm() == 0)) { 73875f757f3fSDimitry Andric MRI.replaceRegWith(Dest.getReg(), CondReg); 7388349cc55cSDimitry Andric return; 7389349cc55cSDimitry Andric } 7390349cc55cSDimitry Andric 73915f757f3fSDimitry Andric Register NewCondReg = CondReg; 73925f757f3fSDimitry Andric if (IsSCC) { 7393349cc55cSDimitry Andric const TargetRegisterClass *TC = 7394349cc55cSDimitry Andric RI.getRegClass(AMDGPU::SReg_1_XEXECRegClassID); 73955f757f3fSDimitry Andric NewCondReg = MRI.createVirtualRegister(TC); 7396349cc55cSDimitry Andric 7397349cc55cSDimitry Andric // Now look for the closest SCC def if it is a copy 73985f757f3fSDimitry Andric // replacing the CondReg with the COPY source register 7399349cc55cSDimitry Andric bool CopyFound = false; 74005ffd83dbSDimitry Andric for (MachineInstr &CandI : 74015ffd83dbSDimitry Andric make_range(std::next(MachineBasicBlock::reverse_iterator(Inst)), 74025ffd83dbSDimitry Andric Inst.getParent()->rend())) { 74035ffd83dbSDimitry Andric if (CandI.findRegisterDefOperandIdx(AMDGPU::SCC, false, false, &RI) != 74045ffd83dbSDimitry Andric -1) { 74055ffd83dbSDimitry Andric if (CandI.isCopy() && CandI.getOperand(0).getReg() == AMDGPU::SCC) { 74065f757f3fSDimitry Andric BuildMI(MBB, MII, DL, get(AMDGPU::COPY), NewCondReg) 7407349cc55cSDimitry Andric .addReg(CandI.getOperand(1).getReg()); 7408349cc55cSDimitry Andric CopyFound = true; 74095ffd83dbSDimitry Andric } 74105ffd83dbSDimitry Andric break; 74115ffd83dbSDimitry Andric } 74125ffd83dbSDimitry Andric } 7413349cc55cSDimitry Andric if (!CopyFound) { 7414349cc55cSDimitry Andric // SCC def is not a copy 74155ffd83dbSDimitry Andric // Insert a trivial select instead of creating a copy, because a copy from 74165ffd83dbSDimitry Andric // SCC would semantically mean just copying a single bit, but we may need 74175ffd83dbSDimitry Andric // the result to be a vector condition mask that needs preserving. 74185ffd83dbSDimitry Andric unsigned Opcode = (ST.getWavefrontSize() == 64) ? AMDGPU::S_CSELECT_B64 74195ffd83dbSDimitry Andric : AMDGPU::S_CSELECT_B32; 74205ffd83dbSDimitry Andric auto NewSelect = 74215f757f3fSDimitry Andric BuildMI(MBB, MII, DL, get(Opcode), NewCondReg).addImm(-1).addImm(0); 74225ffd83dbSDimitry Andric NewSelect->getOperand(3).setIsUndef(Cond.isUndef()); 7423349cc55cSDimitry Andric } 74245ffd83dbSDimitry Andric } 74255ffd83dbSDimitry Andric 74265f757f3fSDimitry Andric Register NewDestReg = MRI.createVirtualRegister( 74275f757f3fSDimitry Andric RI.getEquivalentVGPRClass(MRI.getRegClass(Dest.getReg()))); 74285f757f3fSDimitry Andric MachineInstr *NewInst; 74295f757f3fSDimitry Andric if (Inst.getOpcode() == AMDGPU::S_CSELECT_B32) { 74305f757f3fSDimitry Andric NewInst = BuildMI(MBB, MII, DL, get(AMDGPU::V_CNDMASK_B32_e64), NewDestReg) 74315ffd83dbSDimitry Andric .addImm(0) 74325ffd83dbSDimitry Andric .add(Src1) // False 74335ffd83dbSDimitry Andric .addImm(0) 74345ffd83dbSDimitry Andric .add(Src0) // True 74355f757f3fSDimitry Andric .addReg(NewCondReg); 74365f757f3fSDimitry Andric } else { 74375f757f3fSDimitry Andric NewInst = 74385f757f3fSDimitry Andric BuildMI(MBB, MII, DL, get(AMDGPU::V_CNDMASK_B64_PSEUDO), NewDestReg) 74395f757f3fSDimitry Andric .add(Src1) // False 74405f757f3fSDimitry Andric .add(Src0) // True 74415f757f3fSDimitry Andric .addReg(NewCondReg); 74425f757f3fSDimitry Andric } 74435f757f3fSDimitry Andric MRI.replaceRegWith(Dest.getReg(), NewDestReg); 74445f757f3fSDimitry Andric legalizeOperands(*NewInst, MDT); 74455f757f3fSDimitry Andric addUsersToMoveToVALUWorklist(NewDestReg, MRI, Worklist); 74465ffd83dbSDimitry Andric } 74475ffd83dbSDimitry Andric 744806c3fb27SDimitry Andric void SIInstrInfo::lowerScalarAbs(SIInstrWorklist &Worklist, 74490b57cec5SDimitry Andric MachineInstr &Inst) const { 74500b57cec5SDimitry Andric MachineBasicBlock &MBB = *Inst.getParent(); 74510b57cec5SDimitry Andric MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 74520b57cec5SDimitry Andric MachineBasicBlock::iterator MII = Inst; 74530b57cec5SDimitry Andric DebugLoc DL = Inst.getDebugLoc(); 74540b57cec5SDimitry Andric 74550b57cec5SDimitry Andric MachineOperand &Dest = Inst.getOperand(0); 74560b57cec5SDimitry Andric MachineOperand &Src = Inst.getOperand(1); 74578bcb0991SDimitry Andric Register TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 74588bcb0991SDimitry Andric Register ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 74590b57cec5SDimitry Andric 74600b57cec5SDimitry Andric unsigned SubOp = ST.hasAddNoCarry() ? 7461e8d8bef9SDimitry Andric AMDGPU::V_SUB_U32_e32 : AMDGPU::V_SUB_CO_U32_e32; 74620b57cec5SDimitry Andric 74630b57cec5SDimitry Andric BuildMI(MBB, MII, DL, get(SubOp), TmpReg) 74640b57cec5SDimitry Andric .addImm(0) 74650b57cec5SDimitry Andric .addReg(Src.getReg()); 74660b57cec5SDimitry Andric 74670b57cec5SDimitry Andric BuildMI(MBB, MII, DL, get(AMDGPU::V_MAX_I32_e64), ResultReg) 74680b57cec5SDimitry Andric .addReg(Src.getReg()) 74690b57cec5SDimitry Andric .addReg(TmpReg); 74700b57cec5SDimitry Andric 74710b57cec5SDimitry Andric MRI.replaceRegWith(Dest.getReg(), ResultReg); 74720b57cec5SDimitry Andric addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist); 74730b57cec5SDimitry Andric } 74740b57cec5SDimitry Andric 747506c3fb27SDimitry Andric void SIInstrInfo::lowerScalarXnor(SIInstrWorklist &Worklist, 74760b57cec5SDimitry Andric MachineInstr &Inst) const { 74770b57cec5SDimitry Andric MachineBasicBlock &MBB = *Inst.getParent(); 74780b57cec5SDimitry Andric MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 74790b57cec5SDimitry Andric MachineBasicBlock::iterator MII = Inst; 74800b57cec5SDimitry Andric const DebugLoc &DL = Inst.getDebugLoc(); 74810b57cec5SDimitry Andric 74820b57cec5SDimitry Andric MachineOperand &Dest = Inst.getOperand(0); 74830b57cec5SDimitry Andric MachineOperand &Src0 = Inst.getOperand(1); 74840b57cec5SDimitry Andric MachineOperand &Src1 = Inst.getOperand(2); 74850b57cec5SDimitry Andric 74860b57cec5SDimitry Andric if (ST.hasDLInsts()) { 74878bcb0991SDimitry Andric Register NewDest = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 74880b57cec5SDimitry Andric legalizeGenericOperand(MBB, MII, &AMDGPU::VGPR_32RegClass, Src0, MRI, DL); 74890b57cec5SDimitry Andric legalizeGenericOperand(MBB, MII, &AMDGPU::VGPR_32RegClass, Src1, MRI, DL); 74900b57cec5SDimitry Andric 74910b57cec5SDimitry Andric BuildMI(MBB, MII, DL, get(AMDGPU::V_XNOR_B32_e64), NewDest) 74920b57cec5SDimitry Andric .add(Src0) 74930b57cec5SDimitry Andric .add(Src1); 74940b57cec5SDimitry Andric 74950b57cec5SDimitry Andric MRI.replaceRegWith(Dest.getReg(), NewDest); 74960b57cec5SDimitry Andric addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist); 74970b57cec5SDimitry Andric } else { 74980b57cec5SDimitry Andric // Using the identity !(x ^ y) == (!x ^ y) == (x ^ !y), we can 74990b57cec5SDimitry Andric // invert either source and then perform the XOR. If either source is a 75000b57cec5SDimitry Andric // scalar register, then we can leave the inversion on the scalar unit to 750181ad6265SDimitry Andric // achieve a better distribution of scalar and vector instructions. 75020b57cec5SDimitry Andric bool Src0IsSGPR = Src0.isReg() && 75030b57cec5SDimitry Andric RI.isSGPRClass(MRI.getRegClass(Src0.getReg())); 75040b57cec5SDimitry Andric bool Src1IsSGPR = Src1.isReg() && 75050b57cec5SDimitry Andric RI.isSGPRClass(MRI.getRegClass(Src1.getReg())); 75060b57cec5SDimitry Andric MachineInstr *Xor; 75078bcb0991SDimitry Andric Register Temp = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); 75088bcb0991SDimitry Andric Register NewDest = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); 75090b57cec5SDimitry Andric 75100b57cec5SDimitry Andric // Build a pair of scalar instructions and add them to the work list. 75110b57cec5SDimitry Andric // The next iteration over the work list will lower these to the vector 75120b57cec5SDimitry Andric // unit as necessary. 75130b57cec5SDimitry Andric if (Src0IsSGPR) { 75140b57cec5SDimitry Andric BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), Temp).add(Src0); 75150b57cec5SDimitry Andric Xor = BuildMI(MBB, MII, DL, get(AMDGPU::S_XOR_B32), NewDest) 75160b57cec5SDimitry Andric .addReg(Temp) 75170b57cec5SDimitry Andric .add(Src1); 75180b57cec5SDimitry Andric } else if (Src1IsSGPR) { 75190b57cec5SDimitry Andric BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), Temp).add(Src1); 75200b57cec5SDimitry Andric Xor = BuildMI(MBB, MII, DL, get(AMDGPU::S_XOR_B32), NewDest) 75210b57cec5SDimitry Andric .add(Src0) 75220b57cec5SDimitry Andric .addReg(Temp); 75230b57cec5SDimitry Andric } else { 75240b57cec5SDimitry Andric Xor = BuildMI(MBB, MII, DL, get(AMDGPU::S_XOR_B32), Temp) 75250b57cec5SDimitry Andric .add(Src0) 75260b57cec5SDimitry Andric .add(Src1); 75270b57cec5SDimitry Andric MachineInstr *Not = 75280b57cec5SDimitry Andric BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), NewDest).addReg(Temp); 75290b57cec5SDimitry Andric Worklist.insert(Not); 75300b57cec5SDimitry Andric } 75310b57cec5SDimitry Andric 75320b57cec5SDimitry Andric MRI.replaceRegWith(Dest.getReg(), NewDest); 75330b57cec5SDimitry Andric 75340b57cec5SDimitry Andric Worklist.insert(Xor); 75350b57cec5SDimitry Andric 75360b57cec5SDimitry Andric addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist); 75370b57cec5SDimitry Andric } 75380b57cec5SDimitry Andric } 75390b57cec5SDimitry Andric 754006c3fb27SDimitry Andric void SIInstrInfo::splitScalarNotBinop(SIInstrWorklist &Worklist, 75410b57cec5SDimitry Andric MachineInstr &Inst, 75420b57cec5SDimitry Andric unsigned Opcode) const { 75430b57cec5SDimitry Andric MachineBasicBlock &MBB = *Inst.getParent(); 75440b57cec5SDimitry Andric MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 75450b57cec5SDimitry Andric MachineBasicBlock::iterator MII = Inst; 75460b57cec5SDimitry Andric const DebugLoc &DL = Inst.getDebugLoc(); 75470b57cec5SDimitry Andric 75480b57cec5SDimitry Andric MachineOperand &Dest = Inst.getOperand(0); 75490b57cec5SDimitry Andric MachineOperand &Src0 = Inst.getOperand(1); 75500b57cec5SDimitry Andric MachineOperand &Src1 = Inst.getOperand(2); 75510b57cec5SDimitry Andric 75528bcb0991SDimitry Andric Register NewDest = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); 75538bcb0991SDimitry Andric Register Interm = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); 75540b57cec5SDimitry Andric 75550b57cec5SDimitry Andric MachineInstr &Op = *BuildMI(MBB, MII, DL, get(Opcode), Interm) 75560b57cec5SDimitry Andric .add(Src0) 75570b57cec5SDimitry Andric .add(Src1); 75580b57cec5SDimitry Andric 75590b57cec5SDimitry Andric MachineInstr &Not = *BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), NewDest) 75600b57cec5SDimitry Andric .addReg(Interm); 75610b57cec5SDimitry Andric 75620b57cec5SDimitry Andric Worklist.insert(&Op); 75630b57cec5SDimitry Andric Worklist.insert(&Not); 75640b57cec5SDimitry Andric 75650b57cec5SDimitry Andric MRI.replaceRegWith(Dest.getReg(), NewDest); 75660b57cec5SDimitry Andric addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist); 75670b57cec5SDimitry Andric } 75680b57cec5SDimitry Andric 756906c3fb27SDimitry Andric void SIInstrInfo::splitScalarBinOpN2(SIInstrWorklist &Worklist, 75700b57cec5SDimitry Andric MachineInstr &Inst, 75710b57cec5SDimitry Andric unsigned Opcode) const { 75720b57cec5SDimitry Andric MachineBasicBlock &MBB = *Inst.getParent(); 75730b57cec5SDimitry Andric MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 75740b57cec5SDimitry Andric MachineBasicBlock::iterator MII = Inst; 75750b57cec5SDimitry Andric const DebugLoc &DL = Inst.getDebugLoc(); 75760b57cec5SDimitry Andric 75770b57cec5SDimitry Andric MachineOperand &Dest = Inst.getOperand(0); 75780b57cec5SDimitry Andric MachineOperand &Src0 = Inst.getOperand(1); 75790b57cec5SDimitry Andric MachineOperand &Src1 = Inst.getOperand(2); 75800b57cec5SDimitry Andric 75818bcb0991SDimitry Andric Register NewDest = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); 75828bcb0991SDimitry Andric Register Interm = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); 75830b57cec5SDimitry Andric 75840b57cec5SDimitry Andric MachineInstr &Not = *BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), Interm) 75850b57cec5SDimitry Andric .add(Src1); 75860b57cec5SDimitry Andric 75870b57cec5SDimitry Andric MachineInstr &Op = *BuildMI(MBB, MII, DL, get(Opcode), NewDest) 75880b57cec5SDimitry Andric .add(Src0) 75890b57cec5SDimitry Andric .addReg(Interm); 75900b57cec5SDimitry Andric 75910b57cec5SDimitry Andric Worklist.insert(&Not); 75920b57cec5SDimitry Andric Worklist.insert(&Op); 75930b57cec5SDimitry Andric 75940b57cec5SDimitry Andric MRI.replaceRegWith(Dest.getReg(), NewDest); 75950b57cec5SDimitry Andric addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist); 75960b57cec5SDimitry Andric } 75970b57cec5SDimitry Andric 759806c3fb27SDimitry Andric void SIInstrInfo::splitScalar64BitUnaryOp(SIInstrWorklist &Worklist, 759906c3fb27SDimitry Andric MachineInstr &Inst, unsigned Opcode, 760006c3fb27SDimitry Andric bool Swap) const { 76010b57cec5SDimitry Andric MachineBasicBlock &MBB = *Inst.getParent(); 76020b57cec5SDimitry Andric MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 76030b57cec5SDimitry Andric 76040b57cec5SDimitry Andric MachineOperand &Dest = Inst.getOperand(0); 76050b57cec5SDimitry Andric MachineOperand &Src0 = Inst.getOperand(1); 76060b57cec5SDimitry Andric DebugLoc DL = Inst.getDebugLoc(); 76070b57cec5SDimitry Andric 76080b57cec5SDimitry Andric MachineBasicBlock::iterator MII = Inst; 76090b57cec5SDimitry Andric 76100b57cec5SDimitry Andric const MCInstrDesc &InstDesc = get(Opcode); 76110b57cec5SDimitry Andric const TargetRegisterClass *Src0RC = Src0.isReg() ? 76120b57cec5SDimitry Andric MRI.getRegClass(Src0.getReg()) : 76130b57cec5SDimitry Andric &AMDGPU::SGPR_32RegClass; 76140b57cec5SDimitry Andric 7615bdd1243dSDimitry Andric const TargetRegisterClass *Src0SubRC = 7616bdd1243dSDimitry Andric RI.getSubRegisterClass(Src0RC, AMDGPU::sub0); 76170b57cec5SDimitry Andric 76180b57cec5SDimitry Andric MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, 76190b57cec5SDimitry Andric AMDGPU::sub0, Src0SubRC); 76200b57cec5SDimitry Andric 76210b57cec5SDimitry Andric const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg()); 76220b57cec5SDimitry Andric const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC); 7623bdd1243dSDimitry Andric const TargetRegisterClass *NewDestSubRC = 7624bdd1243dSDimitry Andric RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0); 76250b57cec5SDimitry Andric 76268bcb0991SDimitry Andric Register DestSub0 = MRI.createVirtualRegister(NewDestSubRC); 76270b57cec5SDimitry Andric MachineInstr &LoHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub0).add(SrcReg0Sub0); 76280b57cec5SDimitry Andric 76290b57cec5SDimitry Andric MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, 76300b57cec5SDimitry Andric AMDGPU::sub1, Src0SubRC); 76310b57cec5SDimitry Andric 76328bcb0991SDimitry Andric Register DestSub1 = MRI.createVirtualRegister(NewDestSubRC); 76330b57cec5SDimitry Andric MachineInstr &HiHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub1).add(SrcReg0Sub1); 76340b57cec5SDimitry Andric 7635fe6060f1SDimitry Andric if (Swap) 7636fe6060f1SDimitry Andric std::swap(DestSub0, DestSub1); 7637fe6060f1SDimitry Andric 76388bcb0991SDimitry Andric Register FullDestReg = MRI.createVirtualRegister(NewDestRC); 76390b57cec5SDimitry Andric BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg) 76400b57cec5SDimitry Andric .addReg(DestSub0) 76410b57cec5SDimitry Andric .addImm(AMDGPU::sub0) 76420b57cec5SDimitry Andric .addReg(DestSub1) 76430b57cec5SDimitry Andric .addImm(AMDGPU::sub1); 76440b57cec5SDimitry Andric 76450b57cec5SDimitry Andric MRI.replaceRegWith(Dest.getReg(), FullDestReg); 76460b57cec5SDimitry Andric 76470b57cec5SDimitry Andric Worklist.insert(&LoHalf); 76480b57cec5SDimitry Andric Worklist.insert(&HiHalf); 76490b57cec5SDimitry Andric 76500b57cec5SDimitry Andric // We don't need to legalizeOperands here because for a single operand, src0 76510b57cec5SDimitry Andric // will support any kind of input. 76520b57cec5SDimitry Andric 76530b57cec5SDimitry Andric // Move all users of this moved value. 76540b57cec5SDimitry Andric addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist); 76550b57cec5SDimitry Andric } 76560b57cec5SDimitry Andric 765706c3fb27SDimitry Andric void SIInstrInfo::splitScalar64BitBinaryOp(SIInstrWorklist &Worklist, 76580b57cec5SDimitry Andric MachineInstr &Inst, unsigned Opcode, 76590b57cec5SDimitry Andric MachineDominatorTree *MDT) const { 76600b57cec5SDimitry Andric MachineBasicBlock &MBB = *Inst.getParent(); 76610b57cec5SDimitry Andric MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 76620b57cec5SDimitry Andric 76630b57cec5SDimitry Andric MachineOperand &Dest = Inst.getOperand(0); 76640b57cec5SDimitry Andric MachineOperand &Src0 = Inst.getOperand(1); 76650b57cec5SDimitry Andric MachineOperand &Src1 = Inst.getOperand(2); 76660b57cec5SDimitry Andric DebugLoc DL = Inst.getDebugLoc(); 76670b57cec5SDimitry Andric 76680b57cec5SDimitry Andric MachineBasicBlock::iterator MII = Inst; 76690b57cec5SDimitry Andric 76700b57cec5SDimitry Andric const MCInstrDesc &InstDesc = get(Opcode); 76710b57cec5SDimitry Andric const TargetRegisterClass *Src0RC = Src0.isReg() ? 76720b57cec5SDimitry Andric MRI.getRegClass(Src0.getReg()) : 76730b57cec5SDimitry Andric &AMDGPU::SGPR_32RegClass; 76740b57cec5SDimitry Andric 7675bdd1243dSDimitry Andric const TargetRegisterClass *Src0SubRC = 7676bdd1243dSDimitry Andric RI.getSubRegisterClass(Src0RC, AMDGPU::sub0); 76770b57cec5SDimitry Andric const TargetRegisterClass *Src1RC = Src1.isReg() ? 76780b57cec5SDimitry Andric MRI.getRegClass(Src1.getReg()) : 76790b57cec5SDimitry Andric &AMDGPU::SGPR_32RegClass; 76800b57cec5SDimitry Andric 7681bdd1243dSDimitry Andric const TargetRegisterClass *Src1SubRC = 7682bdd1243dSDimitry Andric RI.getSubRegisterClass(Src1RC, AMDGPU::sub0); 76830b57cec5SDimitry Andric 76840b57cec5SDimitry Andric MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, 76850b57cec5SDimitry Andric AMDGPU::sub0, Src0SubRC); 76860b57cec5SDimitry Andric MachineOperand SrcReg1Sub0 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC, 76870b57cec5SDimitry Andric AMDGPU::sub0, Src1SubRC); 76880b57cec5SDimitry Andric MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, 76890b57cec5SDimitry Andric AMDGPU::sub1, Src0SubRC); 76900b57cec5SDimitry Andric MachineOperand SrcReg1Sub1 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC, 76910b57cec5SDimitry Andric AMDGPU::sub1, Src1SubRC); 76920b57cec5SDimitry Andric 76930b57cec5SDimitry Andric const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg()); 76940b57cec5SDimitry Andric const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC); 7695bdd1243dSDimitry Andric const TargetRegisterClass *NewDestSubRC = 7696bdd1243dSDimitry Andric RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0); 76970b57cec5SDimitry Andric 76988bcb0991SDimitry Andric Register DestSub0 = MRI.createVirtualRegister(NewDestSubRC); 76990b57cec5SDimitry Andric MachineInstr &LoHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub0) 77000b57cec5SDimitry Andric .add(SrcReg0Sub0) 77010b57cec5SDimitry Andric .add(SrcReg1Sub0); 77020b57cec5SDimitry Andric 77038bcb0991SDimitry Andric Register DestSub1 = MRI.createVirtualRegister(NewDestSubRC); 77040b57cec5SDimitry Andric MachineInstr &HiHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub1) 77050b57cec5SDimitry Andric .add(SrcReg0Sub1) 77060b57cec5SDimitry Andric .add(SrcReg1Sub1); 77070b57cec5SDimitry Andric 77088bcb0991SDimitry Andric Register FullDestReg = MRI.createVirtualRegister(NewDestRC); 77090b57cec5SDimitry Andric BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg) 77100b57cec5SDimitry Andric .addReg(DestSub0) 77110b57cec5SDimitry Andric .addImm(AMDGPU::sub0) 77120b57cec5SDimitry Andric .addReg(DestSub1) 77130b57cec5SDimitry Andric .addImm(AMDGPU::sub1); 77140b57cec5SDimitry Andric 77150b57cec5SDimitry Andric MRI.replaceRegWith(Dest.getReg(), FullDestReg); 77160b57cec5SDimitry Andric 77170b57cec5SDimitry Andric Worklist.insert(&LoHalf); 77180b57cec5SDimitry Andric Worklist.insert(&HiHalf); 77190b57cec5SDimitry Andric 772081ad6265SDimitry Andric // Move all users of this moved value. 77210b57cec5SDimitry Andric addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist); 77220b57cec5SDimitry Andric } 77230b57cec5SDimitry Andric 772406c3fb27SDimitry Andric void SIInstrInfo::splitScalar64BitXnor(SIInstrWorklist &Worklist, 77250b57cec5SDimitry Andric MachineInstr &Inst, 77260b57cec5SDimitry Andric MachineDominatorTree *MDT) const { 77270b57cec5SDimitry Andric MachineBasicBlock &MBB = *Inst.getParent(); 77280b57cec5SDimitry Andric MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 77290b57cec5SDimitry Andric 77300b57cec5SDimitry Andric MachineOperand &Dest = Inst.getOperand(0); 77310b57cec5SDimitry Andric MachineOperand &Src0 = Inst.getOperand(1); 77320b57cec5SDimitry Andric MachineOperand &Src1 = Inst.getOperand(2); 77330b57cec5SDimitry Andric const DebugLoc &DL = Inst.getDebugLoc(); 77340b57cec5SDimitry Andric 77350b57cec5SDimitry Andric MachineBasicBlock::iterator MII = Inst; 77360b57cec5SDimitry Andric 77370b57cec5SDimitry Andric const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg()); 77380b57cec5SDimitry Andric 77398bcb0991SDimitry Andric Register Interm = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); 77400b57cec5SDimitry Andric 77410b57cec5SDimitry Andric MachineOperand* Op0; 77420b57cec5SDimitry Andric MachineOperand* Op1; 77430b57cec5SDimitry Andric 77440b57cec5SDimitry Andric if (Src0.isReg() && RI.isSGPRReg(MRI, Src0.getReg())) { 77450b57cec5SDimitry Andric Op0 = &Src0; 77460b57cec5SDimitry Andric Op1 = &Src1; 77470b57cec5SDimitry Andric } else { 77480b57cec5SDimitry Andric Op0 = &Src1; 77490b57cec5SDimitry Andric Op1 = &Src0; 77500b57cec5SDimitry Andric } 77510b57cec5SDimitry Andric 77520b57cec5SDimitry Andric BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B64), Interm) 77530b57cec5SDimitry Andric .add(*Op0); 77540b57cec5SDimitry Andric 77558bcb0991SDimitry Andric Register NewDest = MRI.createVirtualRegister(DestRC); 77560b57cec5SDimitry Andric 77570b57cec5SDimitry Andric MachineInstr &Xor = *BuildMI(MBB, MII, DL, get(AMDGPU::S_XOR_B64), NewDest) 77580b57cec5SDimitry Andric .addReg(Interm) 77590b57cec5SDimitry Andric .add(*Op1); 77600b57cec5SDimitry Andric 77610b57cec5SDimitry Andric MRI.replaceRegWith(Dest.getReg(), NewDest); 77620b57cec5SDimitry Andric 77630b57cec5SDimitry Andric Worklist.insert(&Xor); 77640b57cec5SDimitry Andric } 77650b57cec5SDimitry Andric 776606c3fb27SDimitry Andric void SIInstrInfo::splitScalar64BitBCNT(SIInstrWorklist &Worklist, 776706c3fb27SDimitry Andric MachineInstr &Inst) const { 77680b57cec5SDimitry Andric MachineBasicBlock &MBB = *Inst.getParent(); 77690b57cec5SDimitry Andric MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 77700b57cec5SDimitry Andric 77710b57cec5SDimitry Andric MachineBasicBlock::iterator MII = Inst; 77720b57cec5SDimitry Andric const DebugLoc &DL = Inst.getDebugLoc(); 77730b57cec5SDimitry Andric 77740b57cec5SDimitry Andric MachineOperand &Dest = Inst.getOperand(0); 77750b57cec5SDimitry Andric MachineOperand &Src = Inst.getOperand(1); 77760b57cec5SDimitry Andric 77770b57cec5SDimitry Andric const MCInstrDesc &InstDesc = get(AMDGPU::V_BCNT_U32_B32_e64); 77780b57cec5SDimitry Andric const TargetRegisterClass *SrcRC = Src.isReg() ? 77790b57cec5SDimitry Andric MRI.getRegClass(Src.getReg()) : 77800b57cec5SDimitry Andric &AMDGPU::SGPR_32RegClass; 77810b57cec5SDimitry Andric 77828bcb0991SDimitry Andric Register MidReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 77838bcb0991SDimitry Andric Register ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 77840b57cec5SDimitry Andric 7785bdd1243dSDimitry Andric const TargetRegisterClass *SrcSubRC = 7786bdd1243dSDimitry Andric RI.getSubRegisterClass(SrcRC, AMDGPU::sub0); 77870b57cec5SDimitry Andric 77880b57cec5SDimitry Andric MachineOperand SrcRegSub0 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC, 77890b57cec5SDimitry Andric AMDGPU::sub0, SrcSubRC); 77900b57cec5SDimitry Andric MachineOperand SrcRegSub1 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC, 77910b57cec5SDimitry Andric AMDGPU::sub1, SrcSubRC); 77920b57cec5SDimitry Andric 77930b57cec5SDimitry Andric BuildMI(MBB, MII, DL, InstDesc, MidReg).add(SrcRegSub0).addImm(0); 77940b57cec5SDimitry Andric 77950b57cec5SDimitry Andric BuildMI(MBB, MII, DL, InstDesc, ResultReg).add(SrcRegSub1).addReg(MidReg); 77960b57cec5SDimitry Andric 77970b57cec5SDimitry Andric MRI.replaceRegWith(Dest.getReg(), ResultReg); 77980b57cec5SDimitry Andric 779981ad6265SDimitry Andric // We don't need to legalize operands here. src0 for either instruction can be 78000b57cec5SDimitry Andric // an SGPR, and the second input is unused or determined here. 78010b57cec5SDimitry Andric addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist); 78020b57cec5SDimitry Andric } 78030b57cec5SDimitry Andric 780406c3fb27SDimitry Andric void SIInstrInfo::splitScalar64BitBFE(SIInstrWorklist &Worklist, 78050b57cec5SDimitry Andric MachineInstr &Inst) const { 78060b57cec5SDimitry Andric MachineBasicBlock &MBB = *Inst.getParent(); 78070b57cec5SDimitry Andric MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 78080b57cec5SDimitry Andric MachineBasicBlock::iterator MII = Inst; 78090b57cec5SDimitry Andric const DebugLoc &DL = Inst.getDebugLoc(); 78100b57cec5SDimitry Andric 78110b57cec5SDimitry Andric MachineOperand &Dest = Inst.getOperand(0); 78120b57cec5SDimitry Andric uint32_t Imm = Inst.getOperand(2).getImm(); 78130b57cec5SDimitry Andric uint32_t Offset = Imm & 0x3f; // Extract bits [5:0]. 78140b57cec5SDimitry Andric uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16]. 78150b57cec5SDimitry Andric 78160b57cec5SDimitry Andric (void) Offset; 78170b57cec5SDimitry Andric 78180b57cec5SDimitry Andric // Only sext_inreg cases handled. 78190b57cec5SDimitry Andric assert(Inst.getOpcode() == AMDGPU::S_BFE_I64 && BitWidth <= 32 && 78200b57cec5SDimitry Andric Offset == 0 && "Not implemented"); 78210b57cec5SDimitry Andric 78220b57cec5SDimitry Andric if (BitWidth < 32) { 78238bcb0991SDimitry Andric Register MidRegLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 78248bcb0991SDimitry Andric Register MidRegHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 78258bcb0991SDimitry Andric Register ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); 78260b57cec5SDimitry Andric 7827e8d8bef9SDimitry Andric BuildMI(MBB, MII, DL, get(AMDGPU::V_BFE_I32_e64), MidRegLo) 78280b57cec5SDimitry Andric .addReg(Inst.getOperand(1).getReg(), 0, AMDGPU::sub0) 78290b57cec5SDimitry Andric .addImm(0) 78300b57cec5SDimitry Andric .addImm(BitWidth); 78310b57cec5SDimitry Andric 78320b57cec5SDimitry Andric BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e32), MidRegHi) 78330b57cec5SDimitry Andric .addImm(31) 78340b57cec5SDimitry Andric .addReg(MidRegLo); 78350b57cec5SDimitry Andric 78360b57cec5SDimitry Andric BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), ResultReg) 78370b57cec5SDimitry Andric .addReg(MidRegLo) 78380b57cec5SDimitry Andric .addImm(AMDGPU::sub0) 78390b57cec5SDimitry Andric .addReg(MidRegHi) 78400b57cec5SDimitry Andric .addImm(AMDGPU::sub1); 78410b57cec5SDimitry Andric 78420b57cec5SDimitry Andric MRI.replaceRegWith(Dest.getReg(), ResultReg); 78430b57cec5SDimitry Andric addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist); 78440b57cec5SDimitry Andric return; 78450b57cec5SDimitry Andric } 78460b57cec5SDimitry Andric 78470b57cec5SDimitry Andric MachineOperand &Src = Inst.getOperand(1); 78488bcb0991SDimitry Andric Register TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 78498bcb0991SDimitry Andric Register ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); 78500b57cec5SDimitry Andric 78510b57cec5SDimitry Andric BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e64), TmpReg) 78520b57cec5SDimitry Andric .addImm(31) 78530b57cec5SDimitry Andric .addReg(Src.getReg(), 0, AMDGPU::sub0); 78540b57cec5SDimitry Andric 78550b57cec5SDimitry Andric BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), ResultReg) 78560b57cec5SDimitry Andric .addReg(Src.getReg(), 0, AMDGPU::sub0) 78570b57cec5SDimitry Andric .addImm(AMDGPU::sub0) 78580b57cec5SDimitry Andric .addReg(TmpReg) 78590b57cec5SDimitry Andric .addImm(AMDGPU::sub1); 78600b57cec5SDimitry Andric 78610b57cec5SDimitry Andric MRI.replaceRegWith(Dest.getReg(), ResultReg); 78620b57cec5SDimitry Andric addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist); 78630b57cec5SDimitry Andric } 78640b57cec5SDimitry Andric 7865cb14a3feSDimitry Andric void SIInstrInfo::splitScalar64BitCountOp(SIInstrWorklist &Worklist, 7866cb14a3feSDimitry Andric MachineInstr &Inst, unsigned Opcode, 7867cb14a3feSDimitry Andric MachineDominatorTree *MDT) const { 7868cb14a3feSDimitry Andric // (S_FLBIT_I32_B64 hi:lo) -> 7869cb14a3feSDimitry Andric // -> (umin (V_FFBH_U32_e32 hi), (uaddsat (V_FFBH_U32_e32 lo), 32)) 7870cb14a3feSDimitry Andric // (S_FF1_I32_B64 hi:lo) -> 7871cb14a3feSDimitry Andric // ->(umin (uaddsat (V_FFBL_B32_e32 hi), 32) (V_FFBL_B32_e32 lo)) 7872cb14a3feSDimitry Andric 7873cb14a3feSDimitry Andric MachineBasicBlock &MBB = *Inst.getParent(); 7874cb14a3feSDimitry Andric MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 7875cb14a3feSDimitry Andric MachineBasicBlock::iterator MII = Inst; 7876cb14a3feSDimitry Andric const DebugLoc &DL = Inst.getDebugLoc(); 7877cb14a3feSDimitry Andric 7878cb14a3feSDimitry Andric MachineOperand &Dest = Inst.getOperand(0); 7879cb14a3feSDimitry Andric MachineOperand &Src = Inst.getOperand(1); 7880cb14a3feSDimitry Andric 7881cb14a3feSDimitry Andric const MCInstrDesc &InstDesc = get(Opcode); 7882cb14a3feSDimitry Andric 7883cb14a3feSDimitry Andric bool IsCtlz = Opcode == AMDGPU::V_FFBH_U32_e32; 7884cb14a3feSDimitry Andric unsigned OpcodeAdd = 7885cb14a3feSDimitry Andric ST.hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32; 7886cb14a3feSDimitry Andric 7887cb14a3feSDimitry Andric const TargetRegisterClass *SrcRC = 7888cb14a3feSDimitry Andric Src.isReg() ? MRI.getRegClass(Src.getReg()) : &AMDGPU::SGPR_32RegClass; 7889cb14a3feSDimitry Andric const TargetRegisterClass *SrcSubRC = 7890cb14a3feSDimitry Andric RI.getSubRegisterClass(SrcRC, AMDGPU::sub0); 7891cb14a3feSDimitry Andric 7892cb14a3feSDimitry Andric MachineOperand SrcRegSub0 = 7893cb14a3feSDimitry Andric buildExtractSubRegOrImm(MII, MRI, Src, SrcRC, AMDGPU::sub0, SrcSubRC); 7894cb14a3feSDimitry Andric MachineOperand SrcRegSub1 = 7895cb14a3feSDimitry Andric buildExtractSubRegOrImm(MII, MRI, Src, SrcRC, AMDGPU::sub1, SrcSubRC); 7896cb14a3feSDimitry Andric 7897cb14a3feSDimitry Andric Register MidReg1 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 7898cb14a3feSDimitry Andric Register MidReg2 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 7899cb14a3feSDimitry Andric Register MidReg3 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 7900cb14a3feSDimitry Andric Register MidReg4 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 7901cb14a3feSDimitry Andric 7902cb14a3feSDimitry Andric BuildMI(MBB, MII, DL, InstDesc, MidReg1).add(SrcRegSub0); 7903cb14a3feSDimitry Andric 7904cb14a3feSDimitry Andric BuildMI(MBB, MII, DL, InstDesc, MidReg2).add(SrcRegSub1); 7905cb14a3feSDimitry Andric 7906cb14a3feSDimitry Andric BuildMI(MBB, MII, DL, get(OpcodeAdd), MidReg3) 7907cb14a3feSDimitry Andric .addReg(IsCtlz ? MidReg1 : MidReg2) 7908cb14a3feSDimitry Andric .addImm(32) 7909cb14a3feSDimitry Andric .addImm(1); // enable clamp 7910cb14a3feSDimitry Andric 7911cb14a3feSDimitry Andric BuildMI(MBB, MII, DL, get(AMDGPU::V_MIN_U32_e64), MidReg4) 7912cb14a3feSDimitry Andric .addReg(MidReg3) 7913cb14a3feSDimitry Andric .addReg(IsCtlz ? MidReg2 : MidReg1); 7914cb14a3feSDimitry Andric 7915cb14a3feSDimitry Andric MRI.replaceRegWith(Dest.getReg(), MidReg4); 7916cb14a3feSDimitry Andric 7917cb14a3feSDimitry Andric addUsersToMoveToVALUWorklist(MidReg4, MRI, Worklist); 7918cb14a3feSDimitry Andric } 7919cb14a3feSDimitry Andric 79200b57cec5SDimitry Andric void SIInstrInfo::addUsersToMoveToVALUWorklist( 792106c3fb27SDimitry Andric Register DstReg, MachineRegisterInfo &MRI, 792206c3fb27SDimitry Andric SIInstrWorklist &Worklist) const { 79230b57cec5SDimitry Andric for (MachineRegisterInfo::use_iterator I = MRI.use_begin(DstReg), 79240b57cec5SDimitry Andric E = MRI.use_end(); I != E;) { 79250b57cec5SDimitry Andric MachineInstr &UseMI = *I->getParent(); 79260b57cec5SDimitry Andric 79270b57cec5SDimitry Andric unsigned OpNo = 0; 79280b57cec5SDimitry Andric 79290b57cec5SDimitry Andric switch (UseMI.getOpcode()) { 79300b57cec5SDimitry Andric case AMDGPU::COPY: 79310b57cec5SDimitry Andric case AMDGPU::WQM: 79328bcb0991SDimitry Andric case AMDGPU::SOFT_WQM: 7933fe6060f1SDimitry Andric case AMDGPU::STRICT_WWM: 7934fe6060f1SDimitry Andric case AMDGPU::STRICT_WQM: 79350b57cec5SDimitry Andric case AMDGPU::REG_SEQUENCE: 79360b57cec5SDimitry Andric case AMDGPU::PHI: 79370b57cec5SDimitry Andric case AMDGPU::INSERT_SUBREG: 79380b57cec5SDimitry Andric break; 79390b57cec5SDimitry Andric default: 79400b57cec5SDimitry Andric OpNo = I.getOperandNo(); 79410b57cec5SDimitry Andric break; 79420b57cec5SDimitry Andric } 79430b57cec5SDimitry Andric 79440b57cec5SDimitry Andric if (!RI.hasVectorRegisters(getOpRegClass(UseMI, OpNo))) { 79450b57cec5SDimitry Andric Worklist.insert(&UseMI); 79460b57cec5SDimitry Andric 79470b57cec5SDimitry Andric do { 79480b57cec5SDimitry Andric ++I; 79490b57cec5SDimitry Andric } while (I != E && I->getParent() == &UseMI); 79500b57cec5SDimitry Andric } else { 79510b57cec5SDimitry Andric ++I; 79520b57cec5SDimitry Andric } 79530b57cec5SDimitry Andric } 79540b57cec5SDimitry Andric } 79550b57cec5SDimitry Andric 795606c3fb27SDimitry Andric void SIInstrInfo::movePackToVALU(SIInstrWorklist &Worklist, 79570b57cec5SDimitry Andric MachineRegisterInfo &MRI, 79580b57cec5SDimitry Andric MachineInstr &Inst) const { 79598bcb0991SDimitry Andric Register ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 79600b57cec5SDimitry Andric MachineBasicBlock *MBB = Inst.getParent(); 79610b57cec5SDimitry Andric MachineOperand &Src0 = Inst.getOperand(1); 79620b57cec5SDimitry Andric MachineOperand &Src1 = Inst.getOperand(2); 79630b57cec5SDimitry Andric const DebugLoc &DL = Inst.getDebugLoc(); 79640b57cec5SDimitry Andric 79650b57cec5SDimitry Andric switch (Inst.getOpcode()) { 79660b57cec5SDimitry Andric case AMDGPU::S_PACK_LL_B32_B16: { 79678bcb0991SDimitry Andric Register ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 79688bcb0991SDimitry Andric Register TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 79690b57cec5SDimitry Andric 79700b57cec5SDimitry Andric // FIXME: Can do a lot better if we know the high bits of src0 or src1 are 79710b57cec5SDimitry Andric // 0. 79720b57cec5SDimitry Andric BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), ImmReg) 79730b57cec5SDimitry Andric .addImm(0xffff); 79740b57cec5SDimitry Andric 79750b57cec5SDimitry Andric BuildMI(*MBB, Inst, DL, get(AMDGPU::V_AND_B32_e64), TmpReg) 79760b57cec5SDimitry Andric .addReg(ImmReg, RegState::Kill) 79770b57cec5SDimitry Andric .add(Src0); 79780b57cec5SDimitry Andric 7979e8d8bef9SDimitry Andric BuildMI(*MBB, Inst, DL, get(AMDGPU::V_LSHL_OR_B32_e64), ResultReg) 79800b57cec5SDimitry Andric .add(Src1) 79810b57cec5SDimitry Andric .addImm(16) 79820b57cec5SDimitry Andric .addReg(TmpReg, RegState::Kill); 79830b57cec5SDimitry Andric break; 79840b57cec5SDimitry Andric } 79850b57cec5SDimitry Andric case AMDGPU::S_PACK_LH_B32_B16: { 79868bcb0991SDimitry Andric Register ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 79870b57cec5SDimitry Andric BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), ImmReg) 79880b57cec5SDimitry Andric .addImm(0xffff); 7989e8d8bef9SDimitry Andric BuildMI(*MBB, Inst, DL, get(AMDGPU::V_BFI_B32_e64), ResultReg) 79900b57cec5SDimitry Andric .addReg(ImmReg, RegState::Kill) 79910b57cec5SDimitry Andric .add(Src0) 79920b57cec5SDimitry Andric .add(Src1); 79930b57cec5SDimitry Andric break; 79940b57cec5SDimitry Andric } 799581ad6265SDimitry Andric case AMDGPU::S_PACK_HL_B32_B16: { 799681ad6265SDimitry Andric Register TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 799781ad6265SDimitry Andric BuildMI(*MBB, Inst, DL, get(AMDGPU::V_LSHRREV_B32_e64), TmpReg) 799881ad6265SDimitry Andric .addImm(16) 799981ad6265SDimitry Andric .add(Src0); 800081ad6265SDimitry Andric BuildMI(*MBB, Inst, DL, get(AMDGPU::V_LSHL_OR_B32_e64), ResultReg) 800181ad6265SDimitry Andric .add(Src1) 800281ad6265SDimitry Andric .addImm(16) 800381ad6265SDimitry Andric .addReg(TmpReg, RegState::Kill); 800481ad6265SDimitry Andric break; 800581ad6265SDimitry Andric } 80060b57cec5SDimitry Andric case AMDGPU::S_PACK_HH_B32_B16: { 80078bcb0991SDimitry Andric Register ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 80088bcb0991SDimitry Andric Register TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 80090b57cec5SDimitry Andric BuildMI(*MBB, Inst, DL, get(AMDGPU::V_LSHRREV_B32_e64), TmpReg) 80100b57cec5SDimitry Andric .addImm(16) 80110b57cec5SDimitry Andric .add(Src0); 80120b57cec5SDimitry Andric BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), ImmReg) 80130b57cec5SDimitry Andric .addImm(0xffff0000); 8014e8d8bef9SDimitry Andric BuildMI(*MBB, Inst, DL, get(AMDGPU::V_AND_OR_B32_e64), ResultReg) 80150b57cec5SDimitry Andric .add(Src1) 80160b57cec5SDimitry Andric .addReg(ImmReg, RegState::Kill) 80170b57cec5SDimitry Andric .addReg(TmpReg, RegState::Kill); 80180b57cec5SDimitry Andric break; 80190b57cec5SDimitry Andric } 80200b57cec5SDimitry Andric default: 80210b57cec5SDimitry Andric llvm_unreachable("unhandled s_pack_* instruction"); 80220b57cec5SDimitry Andric } 80230b57cec5SDimitry Andric 80240b57cec5SDimitry Andric MachineOperand &Dest = Inst.getOperand(0); 80250b57cec5SDimitry Andric MRI.replaceRegWith(Dest.getReg(), ResultReg); 80260b57cec5SDimitry Andric addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist); 80270b57cec5SDimitry Andric } 80280b57cec5SDimitry Andric 80290b57cec5SDimitry Andric void SIInstrInfo::addSCCDefUsersToVALUWorklist(MachineOperand &Op, 80300b57cec5SDimitry Andric MachineInstr &SCCDefInst, 803106c3fb27SDimitry Andric SIInstrWorklist &Worklist, 8032349cc55cSDimitry Andric Register NewCond) const { 80335ffd83dbSDimitry Andric 80340b57cec5SDimitry Andric // Ensure that def inst defines SCC, which is still live. 80350b57cec5SDimitry Andric assert(Op.isReg() && Op.getReg() == AMDGPU::SCC && Op.isDef() && 80360b57cec5SDimitry Andric !Op.isDead() && Op.getParent() == &SCCDefInst); 80375ffd83dbSDimitry Andric SmallVector<MachineInstr *, 4> CopyToDelete; 80380b57cec5SDimitry Andric // This assumes that all the users of SCC are in the same block 80390b57cec5SDimitry Andric // as the SCC def. 80400b57cec5SDimitry Andric for (MachineInstr &MI : // Skip the def inst itself. 80410b57cec5SDimitry Andric make_range(std::next(MachineBasicBlock::iterator(SCCDefInst)), 80420b57cec5SDimitry Andric SCCDefInst.getParent()->end())) { 80430b57cec5SDimitry Andric // Check if SCC is used first. 8044349cc55cSDimitry Andric int SCCIdx = MI.findRegisterUseOperandIdx(AMDGPU::SCC, false, &RI); 8045349cc55cSDimitry Andric if (SCCIdx != -1) { 80465ffd83dbSDimitry Andric if (MI.isCopy()) { 80475ffd83dbSDimitry Andric MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 8048e8d8bef9SDimitry Andric Register DestReg = MI.getOperand(0).getReg(); 80495ffd83dbSDimitry Andric 8050349cc55cSDimitry Andric MRI.replaceRegWith(DestReg, NewCond); 80515ffd83dbSDimitry Andric CopyToDelete.push_back(&MI); 80525ffd83dbSDimitry Andric } else { 8053349cc55cSDimitry Andric 8054349cc55cSDimitry Andric if (NewCond.isValid()) 8055349cc55cSDimitry Andric MI.getOperand(SCCIdx).setReg(NewCond); 80565ffd83dbSDimitry Andric 80570b57cec5SDimitry Andric Worklist.insert(&MI); 80585ffd83dbSDimitry Andric } 80595ffd83dbSDimitry Andric } 80600b57cec5SDimitry Andric // Exit if we find another SCC def. 80610b57cec5SDimitry Andric if (MI.findRegisterDefOperandIdx(AMDGPU::SCC, false, false, &RI) != -1) 80625ffd83dbSDimitry Andric break; 80635ffd83dbSDimitry Andric } 80645ffd83dbSDimitry Andric for (auto &Copy : CopyToDelete) 80655ffd83dbSDimitry Andric Copy->eraseFromParent(); 80660b57cec5SDimitry Andric } 80670b57cec5SDimitry Andric 8068fe6060f1SDimitry Andric // Instructions that use SCC may be converted to VALU instructions. When that 8069fe6060f1SDimitry Andric // happens, the SCC register is changed to VCC_LO. The instruction that defines 8070fe6060f1SDimitry Andric // SCC must be changed to an instruction that defines VCC. This function makes 8071fe6060f1SDimitry Andric // sure that the instruction that defines SCC is added to the moveToVALU 8072fe6060f1SDimitry Andric // worklist. 8073bdd1243dSDimitry Andric void SIInstrInfo::addSCCDefsToVALUWorklist(MachineInstr *SCCUseInst, 807406c3fb27SDimitry Andric SIInstrWorklist &Worklist) const { 807581ad6265SDimitry Andric // Look for a preceding instruction that either defines VCC or SCC. If VCC 8076fe6060f1SDimitry Andric // then there is nothing to do because the defining instruction has been 8077fe6060f1SDimitry Andric // converted to a VALU already. If SCC then that instruction needs to be 8078fe6060f1SDimitry Andric // converted to a VALU. 8079fe6060f1SDimitry Andric for (MachineInstr &MI : 8080fe6060f1SDimitry Andric make_range(std::next(MachineBasicBlock::reverse_iterator(SCCUseInst)), 8081fe6060f1SDimitry Andric SCCUseInst->getParent()->rend())) { 8082fe6060f1SDimitry Andric if (MI.modifiesRegister(AMDGPU::VCC, &RI)) 8083fe6060f1SDimitry Andric break; 8084fe6060f1SDimitry Andric if (MI.definesRegister(AMDGPU::SCC, &RI)) { 8085fe6060f1SDimitry Andric Worklist.insert(&MI); 8086fe6060f1SDimitry Andric break; 8087fe6060f1SDimitry Andric } 8088fe6060f1SDimitry Andric } 8089fe6060f1SDimitry Andric } 8090fe6060f1SDimitry Andric 80910b57cec5SDimitry Andric const TargetRegisterClass *SIInstrInfo::getDestEquivalentVGPRClass( 80920b57cec5SDimitry Andric const MachineInstr &Inst) const { 80930b57cec5SDimitry Andric const TargetRegisterClass *NewDstRC = getOpRegClass(Inst, 0); 80940b57cec5SDimitry Andric 80950b57cec5SDimitry Andric switch (Inst.getOpcode()) { 80960b57cec5SDimitry Andric // For target instructions, getOpRegClass just returns the virtual register 80970b57cec5SDimitry Andric // class associated with the operand, so we need to find an equivalent VGPR 80980b57cec5SDimitry Andric // register class in order to move the instruction to the VALU. 80990b57cec5SDimitry Andric case AMDGPU::COPY: 81000b57cec5SDimitry Andric case AMDGPU::PHI: 81010b57cec5SDimitry Andric case AMDGPU::REG_SEQUENCE: 81020b57cec5SDimitry Andric case AMDGPU::INSERT_SUBREG: 81030b57cec5SDimitry Andric case AMDGPU::WQM: 81048bcb0991SDimitry Andric case AMDGPU::SOFT_WQM: 8105fe6060f1SDimitry Andric case AMDGPU::STRICT_WWM: 8106fe6060f1SDimitry Andric case AMDGPU::STRICT_WQM: { 81070b57cec5SDimitry Andric const TargetRegisterClass *SrcRC = getOpRegClass(Inst, 1); 81084824e7fdSDimitry Andric if (RI.isAGPRClass(SrcRC)) { 81094824e7fdSDimitry Andric if (RI.isAGPRClass(NewDstRC)) 81100b57cec5SDimitry Andric return nullptr; 81110b57cec5SDimitry Andric 81128bcb0991SDimitry Andric switch (Inst.getOpcode()) { 81138bcb0991SDimitry Andric case AMDGPU::PHI: 81148bcb0991SDimitry Andric case AMDGPU::REG_SEQUENCE: 81158bcb0991SDimitry Andric case AMDGPU::INSERT_SUBREG: 81160b57cec5SDimitry Andric NewDstRC = RI.getEquivalentAGPRClass(NewDstRC); 81178bcb0991SDimitry Andric break; 81188bcb0991SDimitry Andric default: 81198bcb0991SDimitry Andric NewDstRC = RI.getEquivalentVGPRClass(NewDstRC); 81208bcb0991SDimitry Andric } 81218bcb0991SDimitry Andric 81220b57cec5SDimitry Andric if (!NewDstRC) 81230b57cec5SDimitry Andric return nullptr; 81240b57cec5SDimitry Andric } else { 81254824e7fdSDimitry Andric if (RI.isVGPRClass(NewDstRC) || NewDstRC == &AMDGPU::VReg_1RegClass) 81260b57cec5SDimitry Andric return nullptr; 81270b57cec5SDimitry Andric 81280b57cec5SDimitry Andric NewDstRC = RI.getEquivalentVGPRClass(NewDstRC); 81290b57cec5SDimitry Andric if (!NewDstRC) 81300b57cec5SDimitry Andric return nullptr; 81310b57cec5SDimitry Andric } 81320b57cec5SDimitry Andric 81330b57cec5SDimitry Andric return NewDstRC; 81340b57cec5SDimitry Andric } 81350b57cec5SDimitry Andric default: 81360b57cec5SDimitry Andric return NewDstRC; 81370b57cec5SDimitry Andric } 81380b57cec5SDimitry Andric } 81390b57cec5SDimitry Andric 81400b57cec5SDimitry Andric // Find the one SGPR operand we are allowed to use. 81415ffd83dbSDimitry Andric Register SIInstrInfo::findUsedSGPR(const MachineInstr &MI, 81420b57cec5SDimitry Andric int OpIndices[3]) const { 81430b57cec5SDimitry Andric const MCInstrDesc &Desc = MI.getDesc(); 81440b57cec5SDimitry Andric 81450b57cec5SDimitry Andric // Find the one SGPR operand we are allowed to use. 81460b57cec5SDimitry Andric // 81470b57cec5SDimitry Andric // First we need to consider the instruction's operand requirements before 81480b57cec5SDimitry Andric // legalizing. Some operands are required to be SGPRs, such as implicit uses 81490b57cec5SDimitry Andric // of VCC, but we are still bound by the constant bus requirement to only use 81500b57cec5SDimitry Andric // one. 81510b57cec5SDimitry Andric // 81520b57cec5SDimitry Andric // If the operand's class is an SGPR, we can never move it. 81530b57cec5SDimitry Andric 81545ffd83dbSDimitry Andric Register SGPRReg = findImplicitSGPRRead(MI); 8155bdd1243dSDimitry Andric if (SGPRReg) 81560b57cec5SDimitry Andric return SGPRReg; 81570b57cec5SDimitry Andric 8158bdd1243dSDimitry Andric Register UsedSGPRs[3] = {Register()}; 81590b57cec5SDimitry Andric const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 81600b57cec5SDimitry Andric 81610b57cec5SDimitry Andric for (unsigned i = 0; i < 3; ++i) { 81620b57cec5SDimitry Andric int Idx = OpIndices[i]; 81630b57cec5SDimitry Andric if (Idx == -1) 81640b57cec5SDimitry Andric break; 81650b57cec5SDimitry Andric 81660b57cec5SDimitry Andric const MachineOperand &MO = MI.getOperand(Idx); 81670b57cec5SDimitry Andric if (!MO.isReg()) 81680b57cec5SDimitry Andric continue; 81690b57cec5SDimitry Andric 81700b57cec5SDimitry Andric // Is this operand statically required to be an SGPR based on the operand 81710b57cec5SDimitry Andric // constraints? 8172bdd1243dSDimitry Andric const TargetRegisterClass *OpRC = 8173bdd1243dSDimitry Andric RI.getRegClass(Desc.operands()[Idx].RegClass); 81740b57cec5SDimitry Andric bool IsRequiredSGPR = RI.isSGPRClass(OpRC); 81750b57cec5SDimitry Andric if (IsRequiredSGPR) 81760b57cec5SDimitry Andric return MO.getReg(); 81770b57cec5SDimitry Andric 81780b57cec5SDimitry Andric // If this could be a VGPR or an SGPR, Check the dynamic register class. 81798bcb0991SDimitry Andric Register Reg = MO.getReg(); 81800b57cec5SDimitry Andric const TargetRegisterClass *RegRC = MRI.getRegClass(Reg); 81810b57cec5SDimitry Andric if (RI.isSGPRClass(RegRC)) 81820b57cec5SDimitry Andric UsedSGPRs[i] = Reg; 81830b57cec5SDimitry Andric } 81840b57cec5SDimitry Andric 81850b57cec5SDimitry Andric // We don't have a required SGPR operand, so we have a bit more freedom in 81860b57cec5SDimitry Andric // selecting operands to move. 81870b57cec5SDimitry Andric 81880b57cec5SDimitry Andric // Try to select the most used SGPR. If an SGPR is equal to one of the 81890b57cec5SDimitry Andric // others, we choose that. 81900b57cec5SDimitry Andric // 81910b57cec5SDimitry Andric // e.g. 81920b57cec5SDimitry Andric // V_FMA_F32 v0, s0, s0, s0 -> No moves 81930b57cec5SDimitry Andric // V_FMA_F32 v0, s0, s1, s0 -> Move s1 81940b57cec5SDimitry Andric 81950b57cec5SDimitry Andric // TODO: If some of the operands are 64-bit SGPRs and some 32, we should 81960b57cec5SDimitry Andric // prefer those. 81970b57cec5SDimitry Andric 8198bdd1243dSDimitry Andric if (UsedSGPRs[0]) { 81990b57cec5SDimitry Andric if (UsedSGPRs[0] == UsedSGPRs[1] || UsedSGPRs[0] == UsedSGPRs[2]) 82000b57cec5SDimitry Andric SGPRReg = UsedSGPRs[0]; 82010b57cec5SDimitry Andric } 82020b57cec5SDimitry Andric 8203bdd1243dSDimitry Andric if (!SGPRReg && UsedSGPRs[1]) { 82040b57cec5SDimitry Andric if (UsedSGPRs[1] == UsedSGPRs[2]) 82050b57cec5SDimitry Andric SGPRReg = UsedSGPRs[1]; 82060b57cec5SDimitry Andric } 82070b57cec5SDimitry Andric 82080b57cec5SDimitry Andric return SGPRReg; 82090b57cec5SDimitry Andric } 82100b57cec5SDimitry Andric 82110b57cec5SDimitry Andric MachineOperand *SIInstrInfo::getNamedOperand(MachineInstr &MI, 82120b57cec5SDimitry Andric unsigned OperandName) const { 82130b57cec5SDimitry Andric int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OperandName); 82140b57cec5SDimitry Andric if (Idx == -1) 82150b57cec5SDimitry Andric return nullptr; 82160b57cec5SDimitry Andric 82170b57cec5SDimitry Andric return &MI.getOperand(Idx); 82180b57cec5SDimitry Andric } 82190b57cec5SDimitry Andric 82200b57cec5SDimitry Andric uint64_t SIInstrInfo::getDefaultRsrcDataFormat() const { 82210b57cec5SDimitry Andric if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) { 8222bdd1243dSDimitry Andric int64_t Format = ST.getGeneration() >= AMDGPUSubtarget::GFX11 8223bdd1243dSDimitry Andric ? (int64_t)AMDGPU::UfmtGFX11::UFMT_32_FLOAT 8224bdd1243dSDimitry Andric : (int64_t)AMDGPU::UfmtGFX10::UFMT_32_FLOAT; 822581ad6265SDimitry Andric return (Format << 44) | 82260b57cec5SDimitry Andric (1ULL << 56) | // RESOURCE_LEVEL = 1 82270b57cec5SDimitry Andric (3ULL << 60); // OOB_SELECT = 3 82280b57cec5SDimitry Andric } 82290b57cec5SDimitry Andric 82300b57cec5SDimitry Andric uint64_t RsrcDataFormat = AMDGPU::RSRC_DATA_FORMAT; 82310b57cec5SDimitry Andric if (ST.isAmdHsaOS()) { 82320b57cec5SDimitry Andric // Set ATC = 1. GFX9 doesn't have this bit. 82330b57cec5SDimitry Andric if (ST.getGeneration() <= AMDGPUSubtarget::VOLCANIC_ISLANDS) 82340b57cec5SDimitry Andric RsrcDataFormat |= (1ULL << 56); 82350b57cec5SDimitry Andric 82360b57cec5SDimitry Andric // Set MTYPE = 2 (MTYPE_UC = uncached). GFX9 doesn't have this. 82370b57cec5SDimitry Andric // BTW, it disables TC L2 and therefore decreases performance. 82380b57cec5SDimitry Andric if (ST.getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS) 82390b57cec5SDimitry Andric RsrcDataFormat |= (2ULL << 59); 82400b57cec5SDimitry Andric } 82410b57cec5SDimitry Andric 82420b57cec5SDimitry Andric return RsrcDataFormat; 82430b57cec5SDimitry Andric } 82440b57cec5SDimitry Andric 82450b57cec5SDimitry Andric uint64_t SIInstrInfo::getScratchRsrcWords23() const { 82460b57cec5SDimitry Andric uint64_t Rsrc23 = getDefaultRsrcDataFormat() | 82470b57cec5SDimitry Andric AMDGPU::RSRC_TID_ENABLE | 82480b57cec5SDimitry Andric 0xffffffff; // Size; 82490b57cec5SDimitry Andric 82500b57cec5SDimitry Andric // GFX9 doesn't have ELEMENT_SIZE. 82510b57cec5SDimitry Andric if (ST.getGeneration() <= AMDGPUSubtarget::VOLCANIC_ISLANDS) { 8252e8d8bef9SDimitry Andric uint64_t EltSizeValue = Log2_32(ST.getMaxPrivateElementSize(true)) - 1; 82530b57cec5SDimitry Andric Rsrc23 |= EltSizeValue << AMDGPU::RSRC_ELEMENT_SIZE_SHIFT; 82540b57cec5SDimitry Andric } 82550b57cec5SDimitry Andric 82560b57cec5SDimitry Andric // IndexStride = 64 / 32. 82570b57cec5SDimitry Andric uint64_t IndexStride = ST.getWavefrontSize() == 64 ? 3 : 2; 82580b57cec5SDimitry Andric Rsrc23 |= IndexStride << AMDGPU::RSRC_INDEX_STRIDE_SHIFT; 82590b57cec5SDimitry Andric 82600b57cec5SDimitry Andric // If TID_ENABLE is set, DATA_FORMAT specifies stride bits [14:17]. 82610b57cec5SDimitry Andric // Clear them unless we want a huge stride. 82620b57cec5SDimitry Andric if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS && 82630b57cec5SDimitry Andric ST.getGeneration() <= AMDGPUSubtarget::GFX9) 82640b57cec5SDimitry Andric Rsrc23 &= ~AMDGPU::RSRC_DATA_FORMAT; 82650b57cec5SDimitry Andric 82660b57cec5SDimitry Andric return Rsrc23; 82670b57cec5SDimitry Andric } 82680b57cec5SDimitry Andric 82690b57cec5SDimitry Andric bool SIInstrInfo::isLowLatencyInstruction(const MachineInstr &MI) const { 82700b57cec5SDimitry Andric unsigned Opc = MI.getOpcode(); 82710b57cec5SDimitry Andric 82720b57cec5SDimitry Andric return isSMRD(Opc); 82730b57cec5SDimitry Andric } 82740b57cec5SDimitry Andric 82755ffd83dbSDimitry Andric bool SIInstrInfo::isHighLatencyDef(int Opc) const { 82765ffd83dbSDimitry Andric return get(Opc).mayLoad() && 82775ffd83dbSDimitry Andric (isMUBUF(Opc) || isMTBUF(Opc) || isMIMG(Opc) || isFLAT(Opc)); 82780b57cec5SDimitry Andric } 82790b57cec5SDimitry Andric 82800b57cec5SDimitry Andric unsigned SIInstrInfo::isStackAccess(const MachineInstr &MI, 82810b57cec5SDimitry Andric int &FrameIndex) const { 82820b57cec5SDimitry Andric const MachineOperand *Addr = getNamedOperand(MI, AMDGPU::OpName::vaddr); 82830b57cec5SDimitry Andric if (!Addr || !Addr->isFI()) 8284bdd1243dSDimitry Andric return Register(); 82850b57cec5SDimitry Andric 82860b57cec5SDimitry Andric assert(!MI.memoperands_empty() && 82870b57cec5SDimitry Andric (*MI.memoperands_begin())->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS); 82880b57cec5SDimitry Andric 82890b57cec5SDimitry Andric FrameIndex = Addr->getIndex(); 82900b57cec5SDimitry Andric return getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg(); 82910b57cec5SDimitry Andric } 82920b57cec5SDimitry Andric 82930b57cec5SDimitry Andric unsigned SIInstrInfo::isSGPRStackAccess(const MachineInstr &MI, 82940b57cec5SDimitry Andric int &FrameIndex) const { 82950b57cec5SDimitry Andric const MachineOperand *Addr = getNamedOperand(MI, AMDGPU::OpName::addr); 82960b57cec5SDimitry Andric assert(Addr && Addr->isFI()); 82970b57cec5SDimitry Andric FrameIndex = Addr->getIndex(); 82980b57cec5SDimitry Andric return getNamedOperand(MI, AMDGPU::OpName::data)->getReg(); 82990b57cec5SDimitry Andric } 83000b57cec5SDimitry Andric 83010b57cec5SDimitry Andric unsigned SIInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, 83020b57cec5SDimitry Andric int &FrameIndex) const { 83030b57cec5SDimitry Andric if (!MI.mayLoad()) 8304bdd1243dSDimitry Andric return Register(); 83050b57cec5SDimitry Andric 83060b57cec5SDimitry Andric if (isMUBUF(MI) || isVGPRSpill(MI)) 83070b57cec5SDimitry Andric return isStackAccess(MI, FrameIndex); 83080b57cec5SDimitry Andric 83090b57cec5SDimitry Andric if (isSGPRSpill(MI)) 83100b57cec5SDimitry Andric return isSGPRStackAccess(MI, FrameIndex); 83110b57cec5SDimitry Andric 8312bdd1243dSDimitry Andric return Register(); 83130b57cec5SDimitry Andric } 83140b57cec5SDimitry Andric 83150b57cec5SDimitry Andric unsigned SIInstrInfo::isStoreToStackSlot(const MachineInstr &MI, 83160b57cec5SDimitry Andric int &FrameIndex) const { 83170b57cec5SDimitry Andric if (!MI.mayStore()) 8318bdd1243dSDimitry Andric return Register(); 83190b57cec5SDimitry Andric 83200b57cec5SDimitry Andric if (isMUBUF(MI) || isVGPRSpill(MI)) 83210b57cec5SDimitry Andric return isStackAccess(MI, FrameIndex); 83220b57cec5SDimitry Andric 83230b57cec5SDimitry Andric if (isSGPRSpill(MI)) 83240b57cec5SDimitry Andric return isSGPRStackAccess(MI, FrameIndex); 83250b57cec5SDimitry Andric 8326bdd1243dSDimitry Andric return Register(); 83270b57cec5SDimitry Andric } 83280b57cec5SDimitry Andric 83290b57cec5SDimitry Andric unsigned SIInstrInfo::getInstBundleSize(const MachineInstr &MI) const { 83300b57cec5SDimitry Andric unsigned Size = 0; 83310b57cec5SDimitry Andric MachineBasicBlock::const_instr_iterator I = MI.getIterator(); 83320b57cec5SDimitry Andric MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end(); 83330b57cec5SDimitry Andric while (++I != E && I->isInsideBundle()) { 83340b57cec5SDimitry Andric assert(!I->isBundle() && "No nested bundle!"); 83350b57cec5SDimitry Andric Size += getInstSizeInBytes(*I); 83360b57cec5SDimitry Andric } 83370b57cec5SDimitry Andric 83380b57cec5SDimitry Andric return Size; 83390b57cec5SDimitry Andric } 83400b57cec5SDimitry Andric 83410b57cec5SDimitry Andric unsigned SIInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { 83420b57cec5SDimitry Andric unsigned Opc = MI.getOpcode(); 83430b57cec5SDimitry Andric const MCInstrDesc &Desc = getMCOpcodeFromPseudo(Opc); 83440b57cec5SDimitry Andric unsigned DescSize = Desc.getSize(); 83450b57cec5SDimitry Andric 83460b57cec5SDimitry Andric // If we have a definitive size, we can use it. Otherwise we need to inspect 83470b57cec5SDimitry Andric // the operands to know the size. 8348e8d8bef9SDimitry Andric if (isFixedSize(MI)) { 8349e8d8bef9SDimitry Andric unsigned Size = DescSize; 8350e8d8bef9SDimitry Andric 8351e8d8bef9SDimitry Andric // If we hit the buggy offset, an extra nop will be inserted in MC so 8352e8d8bef9SDimitry Andric // estimate the worst case. 8353e8d8bef9SDimitry Andric if (MI.isBranch() && ST.hasOffset3fBug()) 8354e8d8bef9SDimitry Andric Size += 4; 8355e8d8bef9SDimitry Andric 8356e8d8bef9SDimitry Andric return Size; 8357e8d8bef9SDimitry Andric } 83580b57cec5SDimitry Andric 8359349cc55cSDimitry Andric // Instructions may have a 32-bit literal encoded after them. Check 8360349cc55cSDimitry Andric // operands that could ever be literals. 83610b57cec5SDimitry Andric if (isVALU(MI) || isSALU(MI)) { 8362349cc55cSDimitry Andric if (isDPP(MI)) 83630b57cec5SDimitry Andric return DescSize; 8364349cc55cSDimitry Andric bool HasLiteral = false; 8365349cc55cSDimitry Andric for (int I = 0, E = MI.getNumExplicitOperands(); I != E; ++I) { 836681ad6265SDimitry Andric const MachineOperand &Op = MI.getOperand(I); 8367bdd1243dSDimitry Andric const MCOperandInfo &OpInfo = Desc.operands()[I]; 8368bdd1243dSDimitry Andric if (!Op.isReg() && !isInlineConstant(Op, OpInfo)) { 8369349cc55cSDimitry Andric HasLiteral = true; 8370349cc55cSDimitry Andric break; 8371349cc55cSDimitry Andric } 8372349cc55cSDimitry Andric } 8373349cc55cSDimitry Andric return HasLiteral ? DescSize + 4 : DescSize; 83740b57cec5SDimitry Andric } 83750b57cec5SDimitry Andric 83760b57cec5SDimitry Andric // Check whether we have extra NSA words. 83770b57cec5SDimitry Andric if (isMIMG(MI)) { 83780b57cec5SDimitry Andric int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 83790b57cec5SDimitry Andric if (VAddr0Idx < 0) 83800b57cec5SDimitry Andric return 8; 83810b57cec5SDimitry Andric 83820b57cec5SDimitry Andric int RSrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 83830b57cec5SDimitry Andric return 8 + 4 * ((RSrcIdx - VAddr0Idx + 2) / 4); 83840b57cec5SDimitry Andric } 83850b57cec5SDimitry Andric 83860b57cec5SDimitry Andric switch (Opc) { 83870b57cec5SDimitry Andric case TargetOpcode::BUNDLE: 83880b57cec5SDimitry Andric return getInstBundleSize(MI); 83890b57cec5SDimitry Andric case TargetOpcode::INLINEASM: 83900b57cec5SDimitry Andric case TargetOpcode::INLINEASM_BR: { 83910b57cec5SDimitry Andric const MachineFunction *MF = MI.getParent()->getParent(); 83920b57cec5SDimitry Andric const char *AsmStr = MI.getOperand(0).getSymbolName(); 8393e8d8bef9SDimitry Andric return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo(), &ST); 83940b57cec5SDimitry Andric } 83950b57cec5SDimitry Andric default: 8396fe6060f1SDimitry Andric if (MI.isMetaInstruction()) 8397fe6060f1SDimitry Andric return 0; 83980b57cec5SDimitry Andric return DescSize; 83990b57cec5SDimitry Andric } 84000b57cec5SDimitry Andric } 84010b57cec5SDimitry Andric 84020b57cec5SDimitry Andric bool SIInstrInfo::mayAccessFlatAddressSpace(const MachineInstr &MI) const { 84030b57cec5SDimitry Andric if (!isFLAT(MI)) 84040b57cec5SDimitry Andric return false; 84050b57cec5SDimitry Andric 84060b57cec5SDimitry Andric if (MI.memoperands_empty()) 84070b57cec5SDimitry Andric return true; 84080b57cec5SDimitry Andric 84090b57cec5SDimitry Andric for (const MachineMemOperand *MMO : MI.memoperands()) { 84100b57cec5SDimitry Andric if (MMO->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS) 84110b57cec5SDimitry Andric return true; 84120b57cec5SDimitry Andric } 84130b57cec5SDimitry Andric return false; 84140b57cec5SDimitry Andric } 84150b57cec5SDimitry Andric 84160b57cec5SDimitry Andric bool SIInstrInfo::isNonUniformBranchInstr(MachineInstr &Branch) const { 84170b57cec5SDimitry Andric return Branch.getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO; 84180b57cec5SDimitry Andric } 84190b57cec5SDimitry Andric 84200b57cec5SDimitry Andric void SIInstrInfo::convertNonUniformIfRegion(MachineBasicBlock *IfEntry, 84210b57cec5SDimitry Andric MachineBasicBlock *IfEnd) const { 84220b57cec5SDimitry Andric MachineBasicBlock::iterator TI = IfEntry->getFirstTerminator(); 84230b57cec5SDimitry Andric assert(TI != IfEntry->end()); 84240b57cec5SDimitry Andric 84250b57cec5SDimitry Andric MachineInstr *Branch = &(*TI); 84260b57cec5SDimitry Andric MachineFunction *MF = IfEntry->getParent(); 84270b57cec5SDimitry Andric MachineRegisterInfo &MRI = IfEntry->getParent()->getRegInfo(); 84280b57cec5SDimitry Andric 84290b57cec5SDimitry Andric if (Branch->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) { 84308bcb0991SDimitry Andric Register DstReg = MRI.createVirtualRegister(RI.getBoolRC()); 84310b57cec5SDimitry Andric MachineInstr *SIIF = 84320b57cec5SDimitry Andric BuildMI(*MF, Branch->getDebugLoc(), get(AMDGPU::SI_IF), DstReg) 84330b57cec5SDimitry Andric .add(Branch->getOperand(0)) 84340b57cec5SDimitry Andric .add(Branch->getOperand(1)); 84350b57cec5SDimitry Andric MachineInstr *SIEND = 84360b57cec5SDimitry Andric BuildMI(*MF, Branch->getDebugLoc(), get(AMDGPU::SI_END_CF)) 84370b57cec5SDimitry Andric .addReg(DstReg); 84380b57cec5SDimitry Andric 84390b57cec5SDimitry Andric IfEntry->erase(TI); 84400b57cec5SDimitry Andric IfEntry->insert(IfEntry->end(), SIIF); 84410b57cec5SDimitry Andric IfEnd->insert(IfEnd->getFirstNonPHI(), SIEND); 84420b57cec5SDimitry Andric } 84430b57cec5SDimitry Andric } 84440b57cec5SDimitry Andric 84450b57cec5SDimitry Andric void SIInstrInfo::convertNonUniformLoopRegion( 84460b57cec5SDimitry Andric MachineBasicBlock *LoopEntry, MachineBasicBlock *LoopEnd) const { 84470b57cec5SDimitry Andric MachineBasicBlock::iterator TI = LoopEnd->getFirstTerminator(); 84480b57cec5SDimitry Andric // We expect 2 terminators, one conditional and one unconditional. 84490b57cec5SDimitry Andric assert(TI != LoopEnd->end()); 84500b57cec5SDimitry Andric 84510b57cec5SDimitry Andric MachineInstr *Branch = &(*TI); 84520b57cec5SDimitry Andric MachineFunction *MF = LoopEnd->getParent(); 84530b57cec5SDimitry Andric MachineRegisterInfo &MRI = LoopEnd->getParent()->getRegInfo(); 84540b57cec5SDimitry Andric 84550b57cec5SDimitry Andric if (Branch->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) { 84560b57cec5SDimitry Andric 84578bcb0991SDimitry Andric Register DstReg = MRI.createVirtualRegister(RI.getBoolRC()); 84588bcb0991SDimitry Andric Register BackEdgeReg = MRI.createVirtualRegister(RI.getBoolRC()); 84590b57cec5SDimitry Andric MachineInstrBuilder HeaderPHIBuilder = 84600b57cec5SDimitry Andric BuildMI(*(MF), Branch->getDebugLoc(), get(TargetOpcode::PHI), DstReg); 8461349cc55cSDimitry Andric for (MachineBasicBlock *PMBB : LoopEntry->predecessors()) { 8462349cc55cSDimitry Andric if (PMBB == LoopEnd) { 84630b57cec5SDimitry Andric HeaderPHIBuilder.addReg(BackEdgeReg); 84640b57cec5SDimitry Andric } else { 84658bcb0991SDimitry Andric Register ZeroReg = MRI.createVirtualRegister(RI.getBoolRC()); 84660b57cec5SDimitry Andric materializeImmediate(*PMBB, PMBB->getFirstTerminator(), DebugLoc(), 84670b57cec5SDimitry Andric ZeroReg, 0); 84680b57cec5SDimitry Andric HeaderPHIBuilder.addReg(ZeroReg); 84690b57cec5SDimitry Andric } 8470349cc55cSDimitry Andric HeaderPHIBuilder.addMBB(PMBB); 84710b57cec5SDimitry Andric } 84720b57cec5SDimitry Andric MachineInstr *HeaderPhi = HeaderPHIBuilder; 84730b57cec5SDimitry Andric MachineInstr *SIIFBREAK = BuildMI(*(MF), Branch->getDebugLoc(), 84740b57cec5SDimitry Andric get(AMDGPU::SI_IF_BREAK), BackEdgeReg) 84750b57cec5SDimitry Andric .addReg(DstReg) 84760b57cec5SDimitry Andric .add(Branch->getOperand(0)); 84770b57cec5SDimitry Andric MachineInstr *SILOOP = 84780b57cec5SDimitry Andric BuildMI(*(MF), Branch->getDebugLoc(), get(AMDGPU::SI_LOOP)) 84790b57cec5SDimitry Andric .addReg(BackEdgeReg) 84800b57cec5SDimitry Andric .addMBB(LoopEntry); 84810b57cec5SDimitry Andric 84820b57cec5SDimitry Andric LoopEntry->insert(LoopEntry->begin(), HeaderPhi); 84830b57cec5SDimitry Andric LoopEnd->erase(TI); 84840b57cec5SDimitry Andric LoopEnd->insert(LoopEnd->end(), SIIFBREAK); 84850b57cec5SDimitry Andric LoopEnd->insert(LoopEnd->end(), SILOOP); 84860b57cec5SDimitry Andric } 84870b57cec5SDimitry Andric } 84880b57cec5SDimitry Andric 84890b57cec5SDimitry Andric ArrayRef<std::pair<int, const char *>> 84900b57cec5SDimitry Andric SIInstrInfo::getSerializableTargetIndices() const { 84910b57cec5SDimitry Andric static const std::pair<int, const char *> TargetIndices[] = { 84920b57cec5SDimitry Andric {AMDGPU::TI_CONSTDATA_START, "amdgpu-constdata-start"}, 84930b57cec5SDimitry Andric {AMDGPU::TI_SCRATCH_RSRC_DWORD0, "amdgpu-scratch-rsrc-dword0"}, 84940b57cec5SDimitry Andric {AMDGPU::TI_SCRATCH_RSRC_DWORD1, "amdgpu-scratch-rsrc-dword1"}, 84950b57cec5SDimitry Andric {AMDGPU::TI_SCRATCH_RSRC_DWORD2, "amdgpu-scratch-rsrc-dword2"}, 84960b57cec5SDimitry Andric {AMDGPU::TI_SCRATCH_RSRC_DWORD3, "amdgpu-scratch-rsrc-dword3"}}; 8497bdd1243dSDimitry Andric return ArrayRef(TargetIndices); 84980b57cec5SDimitry Andric } 84990b57cec5SDimitry Andric 85000b57cec5SDimitry Andric /// This is used by the post-RA scheduler (SchedulePostRAList.cpp). The 85010b57cec5SDimitry Andric /// post-RA version of misched uses CreateTargetMIHazardRecognizer. 85020b57cec5SDimitry Andric ScheduleHazardRecognizer * 85030b57cec5SDimitry Andric SIInstrInfo::CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, 85040b57cec5SDimitry Andric const ScheduleDAG *DAG) const { 85050b57cec5SDimitry Andric return new GCNHazardRecognizer(DAG->MF); 85060b57cec5SDimitry Andric } 85070b57cec5SDimitry Andric 85080b57cec5SDimitry Andric /// This is the hazard recognizer used at -O0 by the PostRAHazardRecognizer 85090b57cec5SDimitry Andric /// pass. 85100b57cec5SDimitry Andric ScheduleHazardRecognizer * 85110b57cec5SDimitry Andric SIInstrInfo::CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const { 85120b57cec5SDimitry Andric return new GCNHazardRecognizer(MF); 85130b57cec5SDimitry Andric } 85140b57cec5SDimitry Andric 8515349cc55cSDimitry Andric // Called during: 8516349cc55cSDimitry Andric // - pre-RA scheduling and post-RA scheduling 8517349cc55cSDimitry Andric ScheduleHazardRecognizer * 8518349cc55cSDimitry Andric SIInstrInfo::CreateTargetMIHazardRecognizer(const InstrItineraryData *II, 8519349cc55cSDimitry Andric const ScheduleDAGMI *DAG) const { 8520349cc55cSDimitry Andric // Borrowed from Arm Target 8521349cc55cSDimitry Andric // We would like to restrict this hazard recognizer to only 8522349cc55cSDimitry Andric // post-RA scheduling; we can tell that we're post-RA because we don't 8523349cc55cSDimitry Andric // track VRegLiveness. 8524349cc55cSDimitry Andric if (!DAG->hasVRegLiveness()) 8525349cc55cSDimitry Andric return new GCNHazardRecognizer(DAG->MF); 8526349cc55cSDimitry Andric return TargetInstrInfo::CreateTargetMIHazardRecognizer(II, DAG); 8527349cc55cSDimitry Andric } 8528349cc55cSDimitry Andric 85290b57cec5SDimitry Andric std::pair<unsigned, unsigned> 85300b57cec5SDimitry Andric SIInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const { 8531bdd1243dSDimitry Andric return std::pair(TF & MO_MASK, TF & ~MO_MASK); 85320b57cec5SDimitry Andric } 85330b57cec5SDimitry Andric 85340b57cec5SDimitry Andric ArrayRef<std::pair<unsigned, const char *>> 85350b57cec5SDimitry Andric SIInstrInfo::getSerializableDirectMachineOperandTargetFlags() const { 85360b57cec5SDimitry Andric static const std::pair<unsigned, const char *> TargetFlags[] = { 85370b57cec5SDimitry Andric { MO_GOTPCREL, "amdgpu-gotprel" }, 85380b57cec5SDimitry Andric { MO_GOTPCREL32_LO, "amdgpu-gotprel32-lo" }, 85390b57cec5SDimitry Andric { MO_GOTPCREL32_HI, "amdgpu-gotprel32-hi" }, 85400b57cec5SDimitry Andric { MO_REL32_LO, "amdgpu-rel32-lo" }, 85410b57cec5SDimitry Andric { MO_REL32_HI, "amdgpu-rel32-hi" }, 85420b57cec5SDimitry Andric { MO_ABS32_LO, "amdgpu-abs32-lo" }, 85430b57cec5SDimitry Andric { MO_ABS32_HI, "amdgpu-abs32-hi" }, 85440b57cec5SDimitry Andric }; 85450b57cec5SDimitry Andric 8546bdd1243dSDimitry Andric return ArrayRef(TargetFlags); 85470b57cec5SDimitry Andric } 85480b57cec5SDimitry Andric 854981ad6265SDimitry Andric ArrayRef<std::pair<MachineMemOperand::Flags, const char *>> 855081ad6265SDimitry Andric SIInstrInfo::getSerializableMachineMemOperandTargetFlags() const { 855181ad6265SDimitry Andric static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] = 855281ad6265SDimitry Andric { 855381ad6265SDimitry Andric {MONoClobber, "amdgpu-noclobber"}, 855481ad6265SDimitry Andric }; 855581ad6265SDimitry Andric 8556bdd1243dSDimitry Andric return ArrayRef(TargetFlags); 855781ad6265SDimitry Andric } 855881ad6265SDimitry Andric 85595f757f3fSDimitry Andric unsigned SIInstrInfo::getLiveRangeSplitOpcode(Register SrcReg, 85605f757f3fSDimitry Andric const MachineFunction &MF) const { 85615f757f3fSDimitry Andric const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 85625f757f3fSDimitry Andric assert(SrcReg.isVirtual()); 85635f757f3fSDimitry Andric if (MFI->checkFlag(SrcReg, AMDGPU::VirtRegFlag::WWM_REG)) 85645f757f3fSDimitry Andric return AMDGPU::WWM_COPY; 85655f757f3fSDimitry Andric 85665f757f3fSDimitry Andric return AMDGPU::COPY; 85675f757f3fSDimitry Andric } 85685f757f3fSDimitry Andric 85695f757f3fSDimitry Andric bool SIInstrInfo::isBasicBlockPrologue(const MachineInstr &MI, 85705f757f3fSDimitry Andric Register Reg) const { 85715f757f3fSDimitry Andric // We need to handle instructions which may be inserted during register 85725f757f3fSDimitry Andric // allocation to handle the prolog. The initial prolog instruction may have 85735f757f3fSDimitry Andric // been separated from the start of the block by spills and copies inserted 85745f757f3fSDimitry Andric // needed by the prolog. However, the insertions for scalar registers can 85755f757f3fSDimitry Andric // always be placed at the BB top as they are independent of the exec mask 85765f757f3fSDimitry Andric // value. 85775f757f3fSDimitry Andric bool IsNullOrVectorRegister = true; 85785f757f3fSDimitry Andric if (Reg) { 85795f757f3fSDimitry Andric const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 85805f757f3fSDimitry Andric IsNullOrVectorRegister = !RI.isSGPRClass(RI.getRegClassForReg(MRI, Reg)); 85815f757f3fSDimitry Andric } 85825f757f3fSDimitry Andric 85835f757f3fSDimitry Andric uint16_t Opc = MI.getOpcode(); 85845f757f3fSDimitry Andric // FIXME: Copies inserted in the block prolog for live-range split should also 85855f757f3fSDimitry Andric // be included. 85865f757f3fSDimitry Andric return IsNullOrVectorRegister && 85875f757f3fSDimitry Andric (isSpillOpcode(Opc) || (!MI.isTerminator() && Opc != AMDGPU::COPY && 85885f757f3fSDimitry Andric MI.modifiesRegister(AMDGPU::EXEC, &RI))); 85890b57cec5SDimitry Andric } 85900b57cec5SDimitry Andric 85910b57cec5SDimitry Andric MachineInstrBuilder 85920b57cec5SDimitry Andric SIInstrInfo::getAddNoCarry(MachineBasicBlock &MBB, 85930b57cec5SDimitry Andric MachineBasicBlock::iterator I, 85940b57cec5SDimitry Andric const DebugLoc &DL, 85955ffd83dbSDimitry Andric Register DestReg) const { 85960b57cec5SDimitry Andric if (ST.hasAddNoCarry()) 85970b57cec5SDimitry Andric return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_U32_e64), DestReg); 85980b57cec5SDimitry Andric 85990b57cec5SDimitry Andric MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 86008bcb0991SDimitry Andric Register UnusedCarry = MRI.createVirtualRegister(RI.getBoolRC()); 86010b57cec5SDimitry Andric MRI.setRegAllocationHint(UnusedCarry, 0, RI.getVCC()); 86020b57cec5SDimitry Andric 8603e8d8bef9SDimitry Andric return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_CO_U32_e64), DestReg) 86040b57cec5SDimitry Andric .addReg(UnusedCarry, RegState::Define | RegState::Dead); 86050b57cec5SDimitry Andric } 86060b57cec5SDimitry Andric 86078bcb0991SDimitry Andric MachineInstrBuilder SIInstrInfo::getAddNoCarry(MachineBasicBlock &MBB, 86088bcb0991SDimitry Andric MachineBasicBlock::iterator I, 86098bcb0991SDimitry Andric const DebugLoc &DL, 86108bcb0991SDimitry Andric Register DestReg, 86118bcb0991SDimitry Andric RegScavenger &RS) const { 86128bcb0991SDimitry Andric if (ST.hasAddNoCarry()) 86138bcb0991SDimitry Andric return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_U32_e32), DestReg); 86148bcb0991SDimitry Andric 8615480093f4SDimitry Andric // If available, prefer to use vcc. 8616480093f4SDimitry Andric Register UnusedCarry = !RS.isRegUsed(AMDGPU::VCC) 8617480093f4SDimitry Andric ? Register(RI.getVCC()) 861806c3fb27SDimitry Andric : RS.scavengeRegisterBackwards( 861906c3fb27SDimitry Andric *RI.getBoolRC(), I, /* RestoreAfter */ false, 862006c3fb27SDimitry Andric 0, /* AllowSpill */ false); 8621480093f4SDimitry Andric 86228bcb0991SDimitry Andric // TODO: Users need to deal with this. 86238bcb0991SDimitry Andric if (!UnusedCarry.isValid()) 86248bcb0991SDimitry Andric return MachineInstrBuilder(); 86258bcb0991SDimitry Andric 8626e8d8bef9SDimitry Andric return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_CO_U32_e64), DestReg) 86278bcb0991SDimitry Andric .addReg(UnusedCarry, RegState::Define | RegState::Dead); 86288bcb0991SDimitry Andric } 86298bcb0991SDimitry Andric 86300b57cec5SDimitry Andric bool SIInstrInfo::isKillTerminator(unsigned Opcode) { 86310b57cec5SDimitry Andric switch (Opcode) { 86320b57cec5SDimitry Andric case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR: 86330b57cec5SDimitry Andric case AMDGPU::SI_KILL_I1_TERMINATOR: 86340b57cec5SDimitry Andric return true; 86350b57cec5SDimitry Andric default: 86360b57cec5SDimitry Andric return false; 86370b57cec5SDimitry Andric } 86380b57cec5SDimitry Andric } 86390b57cec5SDimitry Andric 86400b57cec5SDimitry Andric const MCInstrDesc &SIInstrInfo::getKillTerminatorFromPseudo(unsigned Opcode) const { 86410b57cec5SDimitry Andric switch (Opcode) { 86420b57cec5SDimitry Andric case AMDGPU::SI_KILL_F32_COND_IMM_PSEUDO: 86430b57cec5SDimitry Andric return get(AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR); 86440b57cec5SDimitry Andric case AMDGPU::SI_KILL_I1_PSEUDO: 86450b57cec5SDimitry Andric return get(AMDGPU::SI_KILL_I1_TERMINATOR); 86460b57cec5SDimitry Andric default: 86470b57cec5SDimitry Andric llvm_unreachable("invalid opcode, expected SI_KILL_*_PSEUDO"); 86480b57cec5SDimitry Andric } 86490b57cec5SDimitry Andric } 86500b57cec5SDimitry Andric 86515f757f3fSDimitry Andric bool SIInstrInfo::isLegalMUBUFImmOffset(unsigned Imm) const { 86525f757f3fSDimitry Andric return Imm <= getMaxMUBUFImmOffset(ST); 86535f757f3fSDimitry Andric } 86545f757f3fSDimitry Andric 86555f757f3fSDimitry Andric unsigned SIInstrInfo::getMaxMUBUFImmOffset(const GCNSubtarget &ST) { 86565f757f3fSDimitry Andric // GFX12 field is non-negative 24-bit signed byte offset. 86575f757f3fSDimitry Andric const unsigned OffsetBits = 86585f757f3fSDimitry Andric ST.getGeneration() >= AMDGPUSubtarget::GFX12 ? 23 : 12; 86595f757f3fSDimitry Andric return (1 << OffsetBits) - 1; 86605f757f3fSDimitry Andric } 866106c3fb27SDimitry Andric 86620b57cec5SDimitry Andric void SIInstrInfo::fixImplicitOperands(MachineInstr &MI) const { 86630b57cec5SDimitry Andric if (!ST.isWave32()) 86640b57cec5SDimitry Andric return; 86650b57cec5SDimitry Andric 866606c3fb27SDimitry Andric if (MI.isInlineAsm()) 866706c3fb27SDimitry Andric return; 866806c3fb27SDimitry Andric 86690b57cec5SDimitry Andric for (auto &Op : MI.implicit_operands()) { 86700b57cec5SDimitry Andric if (Op.isReg() && Op.getReg() == AMDGPU::VCC) 86710b57cec5SDimitry Andric Op.setReg(AMDGPU::VCC_LO); 86720b57cec5SDimitry Andric } 86730b57cec5SDimitry Andric } 86740b57cec5SDimitry Andric 86750b57cec5SDimitry Andric bool SIInstrInfo::isBufferSMRD(const MachineInstr &MI) const { 86760b57cec5SDimitry Andric if (!isSMRD(MI)) 86770b57cec5SDimitry Andric return false; 86780b57cec5SDimitry Andric 86790b57cec5SDimitry Andric // Check that it is using a buffer resource. 86800b57cec5SDimitry Andric int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sbase); 86810b57cec5SDimitry Andric if (Idx == -1) // e.g. s_memtime 86820b57cec5SDimitry Andric return false; 86830b57cec5SDimitry Andric 8684bdd1243dSDimitry Andric const auto RCID = MI.getDesc().operands()[Idx].RegClass; 86858bcb0991SDimitry Andric return RI.getRegClass(RCID)->hasSubClassEq(&AMDGPU::SGPR_128RegClass); 86868bcb0991SDimitry Andric } 86878bcb0991SDimitry Andric 868806c3fb27SDimitry Andric // Given Imm, split it into the values to put into the SOffset and ImmOffset 868906c3fb27SDimitry Andric // fields in an MUBUF instruction. Return false if it is not possible (due to a 869006c3fb27SDimitry Andric // hardware bug needing a workaround). 869106c3fb27SDimitry Andric // 869206c3fb27SDimitry Andric // The required alignment ensures that individual address components remain 869306c3fb27SDimitry Andric // aligned if they are aligned to begin with. It also ensures that additional 869406c3fb27SDimitry Andric // offsets within the given alignment can be added to the resulting ImmOffset. 869506c3fb27SDimitry Andric bool SIInstrInfo::splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, 869606c3fb27SDimitry Andric uint32_t &ImmOffset, Align Alignment) const { 86975f757f3fSDimitry Andric const uint32_t MaxOffset = SIInstrInfo::getMaxMUBUFImmOffset(ST); 869806c3fb27SDimitry Andric const uint32_t MaxImm = alignDown(MaxOffset, Alignment.value()); 869906c3fb27SDimitry Andric uint32_t Overflow = 0; 870006c3fb27SDimitry Andric 870106c3fb27SDimitry Andric if (Imm > MaxImm) { 870206c3fb27SDimitry Andric if (Imm <= MaxImm + 64) { 870306c3fb27SDimitry Andric // Use an SOffset inline constant for 4..64 870406c3fb27SDimitry Andric Overflow = Imm - MaxImm; 870506c3fb27SDimitry Andric Imm = MaxImm; 870606c3fb27SDimitry Andric } else { 870706c3fb27SDimitry Andric // Try to keep the same value in SOffset for adjacent loads, so that 870806c3fb27SDimitry Andric // the corresponding register contents can be re-used. 870906c3fb27SDimitry Andric // 871006c3fb27SDimitry Andric // Load values with all low-bits (except for alignment bits) set into 871106c3fb27SDimitry Andric // SOffset, so that a larger range of values can be covered using 871206c3fb27SDimitry Andric // s_movk_i32. 871306c3fb27SDimitry Andric // 871406c3fb27SDimitry Andric // Atomic operations fail to work correctly when individual address 871506c3fb27SDimitry Andric // components are unaligned, even if their sum is aligned. 871606c3fb27SDimitry Andric uint32_t High = (Imm + Alignment.value()) & ~MaxOffset; 871706c3fb27SDimitry Andric uint32_t Low = (Imm + Alignment.value()) & MaxOffset; 871806c3fb27SDimitry Andric Imm = Low; 871906c3fb27SDimitry Andric Overflow = High - Alignment.value(); 872006c3fb27SDimitry Andric } 872106c3fb27SDimitry Andric } 872206c3fb27SDimitry Andric 87235f757f3fSDimitry Andric if (Overflow > 0) { 872406c3fb27SDimitry Andric // There is a hardware bug in SI and CI which prevents address clamping in 872506c3fb27SDimitry Andric // MUBUF instructions from working correctly with SOffsets. The immediate 872606c3fb27SDimitry Andric // offset is unaffected. 87275f757f3fSDimitry Andric if (ST.getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS) 872806c3fb27SDimitry Andric return false; 872906c3fb27SDimitry Andric 87305f757f3fSDimitry Andric // It is not possible to set immediate in SOffset field on some targets. 87315f757f3fSDimitry Andric if (ST.hasRestrictedSOffset()) 87325f757f3fSDimitry Andric return false; 87335f757f3fSDimitry Andric } 87345f757f3fSDimitry Andric 873506c3fb27SDimitry Andric ImmOffset = Imm; 873606c3fb27SDimitry Andric SOffset = Overflow; 873706c3fb27SDimitry Andric return true; 873806c3fb27SDimitry Andric } 873906c3fb27SDimitry Andric 8740fe6060f1SDimitry Andric // Depending on the used address space and instructions, some immediate offsets 8741fe6060f1SDimitry Andric // are allowed and some are not. 8742fe6060f1SDimitry Andric // In general, flat instruction offsets can only be non-negative, global and 8743fe6060f1SDimitry Andric // scratch instruction offsets can also be negative. 8744fe6060f1SDimitry Andric // 8745fe6060f1SDimitry Andric // There are several bugs related to these offsets: 8746fe6060f1SDimitry Andric // On gfx10.1, flat instructions that go into the global address space cannot 8747fe6060f1SDimitry Andric // use an offset. 8748fe6060f1SDimitry Andric // 8749fe6060f1SDimitry Andric // For scratch instructions, the address can be either an SGPR or a VGPR. 8750fe6060f1SDimitry Andric // The following offsets can be used, depending on the architecture (x means 8751fe6060f1SDimitry Andric // cannot be used): 8752fe6060f1SDimitry Andric // +----------------------------+------+------+ 8753fe6060f1SDimitry Andric // | Address-Mode | SGPR | VGPR | 8754fe6060f1SDimitry Andric // +----------------------------+------+------+ 8755fe6060f1SDimitry Andric // | gfx9 | | | 8756fe6060f1SDimitry Andric // | negative, 4-aligned offset | x | ok | 8757fe6060f1SDimitry Andric // | negative, unaligned offset | x | ok | 8758fe6060f1SDimitry Andric // +----------------------------+------+------+ 8759fe6060f1SDimitry Andric // | gfx10 | | | 8760fe6060f1SDimitry Andric // | negative, 4-aligned offset | ok | ok | 8761fe6060f1SDimitry Andric // | negative, unaligned offset | ok | x | 8762fe6060f1SDimitry Andric // +----------------------------+------+------+ 8763fe6060f1SDimitry Andric // | gfx10.3 | | | 8764fe6060f1SDimitry Andric // | negative, 4-aligned offset | ok | ok | 8765fe6060f1SDimitry Andric // | negative, unaligned offset | ok | ok | 8766fe6060f1SDimitry Andric // +----------------------------+------+------+ 8767fe6060f1SDimitry Andric // 8768fe6060f1SDimitry Andric // This function ignores the addressing mode, so if an offset cannot be used in 8769fe6060f1SDimitry Andric // one addressing mode, it is considered illegal. 87700b57cec5SDimitry Andric bool SIInstrInfo::isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, 8771fe6060f1SDimitry Andric uint64_t FlatVariant) const { 87720b57cec5SDimitry Andric // TODO: Should 0 be special cased? 87730b57cec5SDimitry Andric if (!ST.hasFlatInstOffsets()) 87740b57cec5SDimitry Andric return false; 87750b57cec5SDimitry Andric 8776fe6060f1SDimitry Andric if (ST.hasFlatSegmentOffsetBug() && FlatVariant == SIInstrFlags::FLAT && 8777fe6060f1SDimitry Andric (AddrSpace == AMDGPUAS::FLAT_ADDRESS || 8778fe6060f1SDimitry Andric AddrSpace == AMDGPUAS::GLOBAL_ADDRESS)) 87790b57cec5SDimitry Andric return false; 87800b57cec5SDimitry Andric 8781fe6060f1SDimitry Andric if (ST.hasNegativeUnalignedScratchOffsetBug() && 8782fe6060f1SDimitry Andric FlatVariant == SIInstrFlags::FlatScratch && Offset < 0 && 8783fe6060f1SDimitry Andric (Offset % 4) != 0) { 8784fe6060f1SDimitry Andric return false; 8785fe6060f1SDimitry Andric } 8786fe6060f1SDimitry Andric 87875f757f3fSDimitry Andric bool AllowNegative = allowNegativeFlatOffset(FlatVariant); 8788bdd1243dSDimitry Andric unsigned N = AMDGPU::getNumFlatOffsetBits(ST); 8789bdd1243dSDimitry Andric return isIntN(N, Offset) && (AllowNegative || Offset >= 0); 87900b57cec5SDimitry Andric } 87910b57cec5SDimitry Andric 8792fe6060f1SDimitry Andric // See comment on SIInstrInfo::isLegalFLATOffset for what is legal and what not. 8793fe6060f1SDimitry Andric std::pair<int64_t, int64_t> 8794fe6060f1SDimitry Andric SIInstrInfo::splitFlatOffset(int64_t COffsetVal, unsigned AddrSpace, 8795fe6060f1SDimitry Andric uint64_t FlatVariant) const { 8796e8d8bef9SDimitry Andric int64_t RemainderOffset = COffsetVal; 8797e8d8bef9SDimitry Andric int64_t ImmField = 0; 8798fe6060f1SDimitry Andric 87995f757f3fSDimitry Andric bool AllowNegative = allowNegativeFlatOffset(FlatVariant); 8800bdd1243dSDimitry Andric const unsigned NumBits = AMDGPU::getNumFlatOffsetBits(ST) - 1; 88015f757f3fSDimitry Andric 8802bdd1243dSDimitry Andric if (AllowNegative) { 8803e8d8bef9SDimitry Andric // Use signed division by a power of two to truncate towards 0. 8804bdd1243dSDimitry Andric int64_t D = 1LL << NumBits; 8805e8d8bef9SDimitry Andric RemainderOffset = (COffsetVal / D) * D; 8806e8d8bef9SDimitry Andric ImmField = COffsetVal - RemainderOffset; 8807fe6060f1SDimitry Andric 8808fe6060f1SDimitry Andric if (ST.hasNegativeUnalignedScratchOffsetBug() && 8809fe6060f1SDimitry Andric FlatVariant == SIInstrFlags::FlatScratch && ImmField < 0 && 8810fe6060f1SDimitry Andric (ImmField % 4) != 0) { 8811fe6060f1SDimitry Andric // Make ImmField a multiple of 4 8812fe6060f1SDimitry Andric RemainderOffset += ImmField % 4; 8813fe6060f1SDimitry Andric ImmField -= ImmField % 4; 8814fe6060f1SDimitry Andric } 8815e8d8bef9SDimitry Andric } else if (COffsetVal >= 0) { 8816e8d8bef9SDimitry Andric ImmField = COffsetVal & maskTrailingOnes<uint64_t>(NumBits); 8817e8d8bef9SDimitry Andric RemainderOffset = COffsetVal - ImmField; 88180b57cec5SDimitry Andric } 88190b57cec5SDimitry Andric 8820fe6060f1SDimitry Andric assert(isLegalFLATOffset(ImmField, AddrSpace, FlatVariant)); 8821e8d8bef9SDimitry Andric assert(RemainderOffset + ImmField == COffsetVal); 8822e8d8bef9SDimitry Andric return {ImmField, RemainderOffset}; 8823e8d8bef9SDimitry Andric } 88240b57cec5SDimitry Andric 88255f757f3fSDimitry Andric bool SIInstrInfo::allowNegativeFlatOffset(uint64_t FlatVariant) const { 88265f757f3fSDimitry Andric if (ST.hasNegativeScratchOffsetBug() && 88275f757f3fSDimitry Andric FlatVariant == SIInstrFlags::FlatScratch) 88285f757f3fSDimitry Andric return false; 88295f757f3fSDimitry Andric 88305f757f3fSDimitry Andric return FlatVariant != SIInstrFlags::FLAT || AMDGPU::isGFX12Plus(ST); 88315f757f3fSDimitry Andric } 88325f757f3fSDimitry Andric 883306c3fb27SDimitry Andric static unsigned subtargetEncodingFamily(const GCNSubtarget &ST) { 88340b57cec5SDimitry Andric switch (ST.getGeneration()) { 88350b57cec5SDimitry Andric default: 88360b57cec5SDimitry Andric break; 88370b57cec5SDimitry Andric case AMDGPUSubtarget::SOUTHERN_ISLANDS: 88380b57cec5SDimitry Andric case AMDGPUSubtarget::SEA_ISLANDS: 88390b57cec5SDimitry Andric return SIEncodingFamily::SI; 88400b57cec5SDimitry Andric case AMDGPUSubtarget::VOLCANIC_ISLANDS: 88410b57cec5SDimitry Andric case AMDGPUSubtarget::GFX9: 88420b57cec5SDimitry Andric return SIEncodingFamily::VI; 88430b57cec5SDimitry Andric case AMDGPUSubtarget::GFX10: 88440b57cec5SDimitry Andric return SIEncodingFamily::GFX10; 884581ad6265SDimitry Andric case AMDGPUSubtarget::GFX11: 884681ad6265SDimitry Andric return SIEncodingFamily::GFX11; 88475f757f3fSDimitry Andric case AMDGPUSubtarget::GFX12: 88485f757f3fSDimitry Andric return SIEncodingFamily::GFX12; 88490b57cec5SDimitry Andric } 88500b57cec5SDimitry Andric llvm_unreachable("Unknown subtarget generation!"); 88510b57cec5SDimitry Andric } 88520b57cec5SDimitry Andric 8853480093f4SDimitry Andric bool SIInstrInfo::isAsmOnlyOpcode(int MCOp) const { 8854480093f4SDimitry Andric switch(MCOp) { 8855480093f4SDimitry Andric // These opcodes use indirect register addressing so 8856480093f4SDimitry Andric // they need special handling by codegen (currently missing). 8857480093f4SDimitry Andric // Therefore it is too risky to allow these opcodes 8858480093f4SDimitry Andric // to be selected by dpp combiner or sdwa peepholer. 8859480093f4SDimitry Andric case AMDGPU::V_MOVRELS_B32_dpp_gfx10: 8860480093f4SDimitry Andric case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 8861480093f4SDimitry Andric case AMDGPU::V_MOVRELD_B32_dpp_gfx10: 8862480093f4SDimitry Andric case AMDGPU::V_MOVRELD_B32_sdwa_gfx10: 8863480093f4SDimitry Andric case AMDGPU::V_MOVRELSD_B32_dpp_gfx10: 8864480093f4SDimitry Andric case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 8865480093f4SDimitry Andric case AMDGPU::V_MOVRELSD_2_B32_dpp_gfx10: 8866480093f4SDimitry Andric case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 8867480093f4SDimitry Andric return true; 8868480093f4SDimitry Andric default: 8869480093f4SDimitry Andric return false; 8870480093f4SDimitry Andric } 8871480093f4SDimitry Andric } 8872480093f4SDimitry Andric 88730b57cec5SDimitry Andric int SIInstrInfo::pseudoToMCOpcode(int Opcode) const { 88745f757f3fSDimitry Andric if (SIInstrInfo::isSoftWaitcnt(Opcode)) 88755f757f3fSDimitry Andric Opcode = SIInstrInfo::getNonSoftWaitcntOpcode(Opcode); 88765f757f3fSDimitry Andric 887706c3fb27SDimitry Andric unsigned Gen = subtargetEncodingFamily(ST); 88780b57cec5SDimitry Andric 88790b57cec5SDimitry Andric if ((get(Opcode).TSFlags & SIInstrFlags::renamedInGFX9) != 0 && 88800b57cec5SDimitry Andric ST.getGeneration() == AMDGPUSubtarget::GFX9) 88810b57cec5SDimitry Andric Gen = SIEncodingFamily::GFX9; 88820b57cec5SDimitry Andric 88830b57cec5SDimitry Andric // Adjust the encoding family to GFX80 for D16 buffer instructions when the 88840b57cec5SDimitry Andric // subtarget has UnpackedD16VMem feature. 88850b57cec5SDimitry Andric // TODO: remove this when we discard GFX80 encoding. 88860b57cec5SDimitry Andric if (ST.hasUnpackedD16VMem() && (get(Opcode).TSFlags & SIInstrFlags::D16Buf)) 88870b57cec5SDimitry Andric Gen = SIEncodingFamily::GFX80; 88880b57cec5SDimitry Andric 88890b57cec5SDimitry Andric if (get(Opcode).TSFlags & SIInstrFlags::SDWA) { 88900b57cec5SDimitry Andric switch (ST.getGeneration()) { 88910b57cec5SDimitry Andric default: 88920b57cec5SDimitry Andric Gen = SIEncodingFamily::SDWA; 88930b57cec5SDimitry Andric break; 88940b57cec5SDimitry Andric case AMDGPUSubtarget::GFX9: 88950b57cec5SDimitry Andric Gen = SIEncodingFamily::SDWA9; 88960b57cec5SDimitry Andric break; 88970b57cec5SDimitry Andric case AMDGPUSubtarget::GFX10: 88980b57cec5SDimitry Andric Gen = SIEncodingFamily::SDWA10; 88990b57cec5SDimitry Andric break; 89000b57cec5SDimitry Andric } 89010b57cec5SDimitry Andric } 89020b57cec5SDimitry Andric 890304eeddc0SDimitry Andric if (isMAI(Opcode)) { 890404eeddc0SDimitry Andric int MFMAOp = AMDGPU::getMFMAEarlyClobberOp(Opcode); 890504eeddc0SDimitry Andric if (MFMAOp != -1) 890604eeddc0SDimitry Andric Opcode = MFMAOp; 890704eeddc0SDimitry Andric } 890804eeddc0SDimitry Andric 89090b57cec5SDimitry Andric int MCOp = AMDGPU::getMCOpcode(Opcode, Gen); 89100b57cec5SDimitry Andric 89115f757f3fSDimitry Andric // TODO-GFX12: Remove this. 89125f757f3fSDimitry Andric // Hack to allow some GFX12 codegen tests to run before all the encodings are 89135f757f3fSDimitry Andric // implemented. 89145f757f3fSDimitry Andric if (MCOp == (uint16_t)-1 && Gen == SIEncodingFamily::GFX12) 89155f757f3fSDimitry Andric MCOp = AMDGPU::getMCOpcode(Opcode, SIEncodingFamily::GFX11); 89165f757f3fSDimitry Andric 89170b57cec5SDimitry Andric // -1 means that Opcode is already a native instruction. 89180b57cec5SDimitry Andric if (MCOp == -1) 89190b57cec5SDimitry Andric return Opcode; 89200b57cec5SDimitry Andric 8921fe6060f1SDimitry Andric if (ST.hasGFX90AInsts()) { 8922fe6060f1SDimitry Andric uint16_t NMCOp = (uint16_t)-1; 892381ad6265SDimitry Andric if (ST.hasGFX940Insts()) 892481ad6265SDimitry Andric NMCOp = AMDGPU::getMCOpcode(Opcode, SIEncodingFamily::GFX940); 892581ad6265SDimitry Andric if (NMCOp == (uint16_t)-1) 8926fe6060f1SDimitry Andric NMCOp = AMDGPU::getMCOpcode(Opcode, SIEncodingFamily::GFX90A); 8927fe6060f1SDimitry Andric if (NMCOp == (uint16_t)-1) 8928fe6060f1SDimitry Andric NMCOp = AMDGPU::getMCOpcode(Opcode, SIEncodingFamily::GFX9); 8929fe6060f1SDimitry Andric if (NMCOp != (uint16_t)-1) 8930fe6060f1SDimitry Andric MCOp = NMCOp; 8931fe6060f1SDimitry Andric } 8932fe6060f1SDimitry Andric 89330b57cec5SDimitry Andric // (uint16_t)-1 means that Opcode is a pseudo instruction that has 89340b57cec5SDimitry Andric // no encoding in the given subtarget generation. 89350b57cec5SDimitry Andric if (MCOp == (uint16_t)-1) 89360b57cec5SDimitry Andric return -1; 89370b57cec5SDimitry Andric 8938480093f4SDimitry Andric if (isAsmOnlyOpcode(MCOp)) 8939480093f4SDimitry Andric return -1; 8940480093f4SDimitry Andric 89410b57cec5SDimitry Andric return MCOp; 89420b57cec5SDimitry Andric } 89430b57cec5SDimitry Andric 89440b57cec5SDimitry Andric static 89450b57cec5SDimitry Andric TargetInstrInfo::RegSubRegPair getRegOrUndef(const MachineOperand &RegOpnd) { 89460b57cec5SDimitry Andric assert(RegOpnd.isReg()); 89470b57cec5SDimitry Andric return RegOpnd.isUndef() ? TargetInstrInfo::RegSubRegPair() : 89480b57cec5SDimitry Andric getRegSubRegPair(RegOpnd); 89490b57cec5SDimitry Andric } 89500b57cec5SDimitry Andric 89510b57cec5SDimitry Andric TargetInstrInfo::RegSubRegPair 89520b57cec5SDimitry Andric llvm::getRegSequenceSubReg(MachineInstr &MI, unsigned SubReg) { 89530b57cec5SDimitry Andric assert(MI.isRegSequence()); 89540b57cec5SDimitry Andric for (unsigned I = 0, E = (MI.getNumOperands() - 1)/ 2; I < E; ++I) 89550b57cec5SDimitry Andric if (MI.getOperand(1 + 2 * I + 1).getImm() == SubReg) { 89560b57cec5SDimitry Andric auto &RegOp = MI.getOperand(1 + 2 * I); 89570b57cec5SDimitry Andric return getRegOrUndef(RegOp); 89580b57cec5SDimitry Andric } 89590b57cec5SDimitry Andric return TargetInstrInfo::RegSubRegPair(); 89600b57cec5SDimitry Andric } 89610b57cec5SDimitry Andric 89620b57cec5SDimitry Andric // Try to find the definition of reg:subreg in subreg-manipulation pseudos 89630b57cec5SDimitry Andric // Following a subreg of reg:subreg isn't supported 89640b57cec5SDimitry Andric static bool followSubRegDef(MachineInstr &MI, 89650b57cec5SDimitry Andric TargetInstrInfo::RegSubRegPair &RSR) { 89660b57cec5SDimitry Andric if (!RSR.SubReg) 89670b57cec5SDimitry Andric return false; 89680b57cec5SDimitry Andric switch (MI.getOpcode()) { 89690b57cec5SDimitry Andric default: break; 89700b57cec5SDimitry Andric case AMDGPU::REG_SEQUENCE: 89710b57cec5SDimitry Andric RSR = getRegSequenceSubReg(MI, RSR.SubReg); 89720b57cec5SDimitry Andric return true; 89730b57cec5SDimitry Andric // EXTRACT_SUBREG ins't supported as this would follow a subreg of subreg 89740b57cec5SDimitry Andric case AMDGPU::INSERT_SUBREG: 89750b57cec5SDimitry Andric if (RSR.SubReg == (unsigned)MI.getOperand(3).getImm()) 89760b57cec5SDimitry Andric // inserted the subreg we're looking for 89770b57cec5SDimitry Andric RSR = getRegOrUndef(MI.getOperand(2)); 89780b57cec5SDimitry Andric else { // the subreg in the rest of the reg 89790b57cec5SDimitry Andric auto R1 = getRegOrUndef(MI.getOperand(1)); 89800b57cec5SDimitry Andric if (R1.SubReg) // subreg of subreg isn't supported 89810b57cec5SDimitry Andric return false; 89820b57cec5SDimitry Andric RSR.Reg = R1.Reg; 89830b57cec5SDimitry Andric } 89840b57cec5SDimitry Andric return true; 89850b57cec5SDimitry Andric } 89860b57cec5SDimitry Andric return false; 89870b57cec5SDimitry Andric } 89880b57cec5SDimitry Andric 89890b57cec5SDimitry Andric MachineInstr *llvm::getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P, 89900b57cec5SDimitry Andric MachineRegisterInfo &MRI) { 89910b57cec5SDimitry Andric assert(MRI.isSSA()); 8992e8d8bef9SDimitry Andric if (!P.Reg.isVirtual()) 89930b57cec5SDimitry Andric return nullptr; 89940b57cec5SDimitry Andric 89950b57cec5SDimitry Andric auto RSR = P; 89960b57cec5SDimitry Andric auto *DefInst = MRI.getVRegDef(RSR.Reg); 89970b57cec5SDimitry Andric while (auto *MI = DefInst) { 89980b57cec5SDimitry Andric DefInst = nullptr; 89990b57cec5SDimitry Andric switch (MI->getOpcode()) { 90000b57cec5SDimitry Andric case AMDGPU::COPY: 90010b57cec5SDimitry Andric case AMDGPU::V_MOV_B32_e32: { 90020b57cec5SDimitry Andric auto &Op1 = MI->getOperand(1); 9003e8d8bef9SDimitry Andric if (Op1.isReg() && Op1.getReg().isVirtual()) { 90040b57cec5SDimitry Andric if (Op1.isUndef()) 90050b57cec5SDimitry Andric return nullptr; 90060b57cec5SDimitry Andric RSR = getRegSubRegPair(Op1); 90070b57cec5SDimitry Andric DefInst = MRI.getVRegDef(RSR.Reg); 90080b57cec5SDimitry Andric } 90090b57cec5SDimitry Andric break; 90100b57cec5SDimitry Andric } 90110b57cec5SDimitry Andric default: 90120b57cec5SDimitry Andric if (followSubRegDef(*MI, RSR)) { 90130b57cec5SDimitry Andric if (!RSR.Reg) 90140b57cec5SDimitry Andric return nullptr; 90150b57cec5SDimitry Andric DefInst = MRI.getVRegDef(RSR.Reg); 90160b57cec5SDimitry Andric } 90170b57cec5SDimitry Andric } 90180b57cec5SDimitry Andric if (!DefInst) 90190b57cec5SDimitry Andric return MI; 90200b57cec5SDimitry Andric } 90210b57cec5SDimitry Andric return nullptr; 90220b57cec5SDimitry Andric } 90230b57cec5SDimitry Andric 90240b57cec5SDimitry Andric bool llvm::execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI, 90250b57cec5SDimitry Andric Register VReg, 90260b57cec5SDimitry Andric const MachineInstr &DefMI, 90270b57cec5SDimitry Andric const MachineInstr &UseMI) { 90280b57cec5SDimitry Andric assert(MRI.isSSA() && "Must be run on SSA"); 90290b57cec5SDimitry Andric 90300b57cec5SDimitry Andric auto *TRI = MRI.getTargetRegisterInfo(); 90310b57cec5SDimitry Andric auto *DefBB = DefMI.getParent(); 90320b57cec5SDimitry Andric 90330b57cec5SDimitry Andric // Don't bother searching between blocks, although it is possible this block 90340b57cec5SDimitry Andric // doesn't modify exec. 90350b57cec5SDimitry Andric if (UseMI.getParent() != DefBB) 90360b57cec5SDimitry Andric return true; 90370b57cec5SDimitry Andric 90380b57cec5SDimitry Andric const int MaxInstScan = 20; 90390b57cec5SDimitry Andric int NumInst = 0; 90400b57cec5SDimitry Andric 90410b57cec5SDimitry Andric // Stop scan at the use. 90420b57cec5SDimitry Andric auto E = UseMI.getIterator(); 90430b57cec5SDimitry Andric for (auto I = std::next(DefMI.getIterator()); I != E; ++I) { 90440b57cec5SDimitry Andric if (I->isDebugInstr()) 90450b57cec5SDimitry Andric continue; 90460b57cec5SDimitry Andric 90470b57cec5SDimitry Andric if (++NumInst > MaxInstScan) 90480b57cec5SDimitry Andric return true; 90490b57cec5SDimitry Andric 90500b57cec5SDimitry Andric if (I->modifiesRegister(AMDGPU::EXEC, TRI)) 90510b57cec5SDimitry Andric return true; 90520b57cec5SDimitry Andric } 90530b57cec5SDimitry Andric 90540b57cec5SDimitry Andric return false; 90550b57cec5SDimitry Andric } 90560b57cec5SDimitry Andric 90570b57cec5SDimitry Andric bool llvm::execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI, 90580b57cec5SDimitry Andric Register VReg, 90590b57cec5SDimitry Andric const MachineInstr &DefMI) { 90600b57cec5SDimitry Andric assert(MRI.isSSA() && "Must be run on SSA"); 90610b57cec5SDimitry Andric 90620b57cec5SDimitry Andric auto *TRI = MRI.getTargetRegisterInfo(); 90630b57cec5SDimitry Andric auto *DefBB = DefMI.getParent(); 90640b57cec5SDimitry Andric 9065e8d8bef9SDimitry Andric const int MaxUseScan = 10; 9066e8d8bef9SDimitry Andric int NumUse = 0; 90670b57cec5SDimitry Andric 9068e8d8bef9SDimitry Andric for (auto &Use : MRI.use_nodbg_operands(VReg)) { 9069e8d8bef9SDimitry Andric auto &UseInst = *Use.getParent(); 90700b57cec5SDimitry Andric // Don't bother searching between blocks, although it is possible this block 90710b57cec5SDimitry Andric // doesn't modify exec. 907281ad6265SDimitry Andric if (UseInst.getParent() != DefBB || UseInst.isPHI()) 90730b57cec5SDimitry Andric return true; 90740b57cec5SDimitry Andric 9075e8d8bef9SDimitry Andric if (++NumUse > MaxUseScan) 90760b57cec5SDimitry Andric return true; 90770b57cec5SDimitry Andric } 90780b57cec5SDimitry Andric 9079e8d8bef9SDimitry Andric if (NumUse == 0) 9080e8d8bef9SDimitry Andric return false; 9081e8d8bef9SDimitry Andric 90820b57cec5SDimitry Andric const int MaxInstScan = 20; 90830b57cec5SDimitry Andric int NumInst = 0; 90840b57cec5SDimitry Andric 90850b57cec5SDimitry Andric // Stop scan when we have seen all the uses. 90860b57cec5SDimitry Andric for (auto I = std::next(DefMI.getIterator()); ; ++I) { 9087e8d8bef9SDimitry Andric assert(I != DefBB->end()); 9088e8d8bef9SDimitry Andric 90890b57cec5SDimitry Andric if (I->isDebugInstr()) 90900b57cec5SDimitry Andric continue; 90910b57cec5SDimitry Andric 90920b57cec5SDimitry Andric if (++NumInst > MaxInstScan) 90930b57cec5SDimitry Andric return true; 90940b57cec5SDimitry Andric 9095e8d8bef9SDimitry Andric for (const MachineOperand &Op : I->operands()) { 9096e8d8bef9SDimitry Andric // We don't check reg masks here as they're used only on calls: 9097e8d8bef9SDimitry Andric // 1. EXEC is only considered const within one BB 9098e8d8bef9SDimitry Andric // 2. Call should be a terminator instruction if present in a BB 90990b57cec5SDimitry Andric 9100e8d8bef9SDimitry Andric if (!Op.isReg()) 9101e8d8bef9SDimitry Andric continue; 9102e8d8bef9SDimitry Andric 9103e8d8bef9SDimitry Andric Register Reg = Op.getReg(); 9104e8d8bef9SDimitry Andric if (Op.isUse()) { 9105e8d8bef9SDimitry Andric if (Reg == VReg && --NumUse == 0) 9106e8d8bef9SDimitry Andric return false; 9107e8d8bef9SDimitry Andric } else if (TRI->regsOverlap(Reg, AMDGPU::EXEC)) 91080b57cec5SDimitry Andric return true; 91090b57cec5SDimitry Andric } 91100b57cec5SDimitry Andric } 9111e8d8bef9SDimitry Andric } 91128bcb0991SDimitry Andric 91138bcb0991SDimitry Andric MachineInstr *SIInstrInfo::createPHIDestinationCopy( 91148bcb0991SDimitry Andric MachineBasicBlock &MBB, MachineBasicBlock::iterator LastPHIIt, 91158bcb0991SDimitry Andric const DebugLoc &DL, Register Src, Register Dst) const { 91168bcb0991SDimitry Andric auto Cur = MBB.begin(); 91178bcb0991SDimitry Andric if (Cur != MBB.end()) 91188bcb0991SDimitry Andric do { 91198bcb0991SDimitry Andric if (!Cur->isPHI() && Cur->readsRegister(Dst)) 91208bcb0991SDimitry Andric return BuildMI(MBB, Cur, DL, get(TargetOpcode::COPY), Dst).addReg(Src); 91218bcb0991SDimitry Andric ++Cur; 91228bcb0991SDimitry Andric } while (Cur != MBB.end() && Cur != LastPHIIt); 91238bcb0991SDimitry Andric 91248bcb0991SDimitry Andric return TargetInstrInfo::createPHIDestinationCopy(MBB, LastPHIIt, DL, Src, 91258bcb0991SDimitry Andric Dst); 91268bcb0991SDimitry Andric } 91278bcb0991SDimitry Andric 91288bcb0991SDimitry Andric MachineInstr *SIInstrInfo::createPHISourceCopy( 91298bcb0991SDimitry Andric MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, 9130480093f4SDimitry Andric const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const { 91318bcb0991SDimitry Andric if (InsPt != MBB.end() && 91328bcb0991SDimitry Andric (InsPt->getOpcode() == AMDGPU::SI_IF || 91338bcb0991SDimitry Andric InsPt->getOpcode() == AMDGPU::SI_ELSE || 91348bcb0991SDimitry Andric InsPt->getOpcode() == AMDGPU::SI_IF_BREAK) && 91358bcb0991SDimitry Andric InsPt->definesRegister(Src)) { 91368bcb0991SDimitry Andric InsPt++; 9137480093f4SDimitry Andric return BuildMI(MBB, InsPt, DL, 91388bcb0991SDimitry Andric get(ST.isWave32() ? AMDGPU::S_MOV_B32_term 91398bcb0991SDimitry Andric : AMDGPU::S_MOV_B64_term), 91408bcb0991SDimitry Andric Dst) 91418bcb0991SDimitry Andric .addReg(Src, 0, SrcSubReg) 91428bcb0991SDimitry Andric .addReg(AMDGPU::EXEC, RegState::Implicit); 91438bcb0991SDimitry Andric } 91448bcb0991SDimitry Andric return TargetInstrInfo::createPHISourceCopy(MBB, InsPt, DL, Src, SrcSubReg, 91458bcb0991SDimitry Andric Dst); 91468bcb0991SDimitry Andric } 91478bcb0991SDimitry Andric 91488bcb0991SDimitry Andric bool llvm::SIInstrInfo::isWave32() const { return ST.isWave32(); } 9149480093f4SDimitry Andric 9150480093f4SDimitry Andric MachineInstr *SIInstrInfo::foldMemoryOperandImpl( 9151480093f4SDimitry Andric MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops, 9152480093f4SDimitry Andric MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS, 9153480093f4SDimitry Andric VirtRegMap *VRM) const { 9154480093f4SDimitry Andric // This is a bit of a hack (copied from AArch64). Consider this instruction: 9155480093f4SDimitry Andric // 9156480093f4SDimitry Andric // %0:sreg_32 = COPY $m0 9157480093f4SDimitry Andric // 9158480093f4SDimitry Andric // We explicitly chose SReg_32 for the virtual register so such a copy might 9159480093f4SDimitry Andric // be eliminated by RegisterCoalescer. However, that may not be possible, and 9160480093f4SDimitry Andric // %0 may even spill. We can't spill $m0 normally (it would require copying to 9161480093f4SDimitry Andric // a numbered SGPR anyway), and since it is in the SReg_32 register class, 9162480093f4SDimitry Andric // TargetInstrInfo::foldMemoryOperand() is going to try. 91635ffd83dbSDimitry Andric // A similar issue also exists with spilling and reloading $exec registers. 9164480093f4SDimitry Andric // 9165480093f4SDimitry Andric // To prevent that, constrain the %0 register class here. 91665f757f3fSDimitry Andric if (isFullCopyInstr(MI)) { 9167480093f4SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 9168480093f4SDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 91695ffd83dbSDimitry Andric if ((DstReg.isVirtual() || SrcReg.isVirtual()) && 91705ffd83dbSDimitry Andric (DstReg.isVirtual() != SrcReg.isVirtual())) { 91715ffd83dbSDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo(); 91725ffd83dbSDimitry Andric Register VirtReg = DstReg.isVirtual() ? DstReg : SrcReg; 91735ffd83dbSDimitry Andric const TargetRegisterClass *RC = MRI.getRegClass(VirtReg); 91745ffd83dbSDimitry Andric if (RC->hasSuperClassEq(&AMDGPU::SReg_32RegClass)) { 91755ffd83dbSDimitry Andric MRI.constrainRegClass(VirtReg, &AMDGPU::SReg_32_XM0_XEXECRegClass); 91765ffd83dbSDimitry Andric return nullptr; 91775ffd83dbSDimitry Andric } else if (RC->hasSuperClassEq(&AMDGPU::SReg_64RegClass)) { 91785ffd83dbSDimitry Andric MRI.constrainRegClass(VirtReg, &AMDGPU::SReg_64_XEXECRegClass); 9179480093f4SDimitry Andric return nullptr; 9180480093f4SDimitry Andric } 9181480093f4SDimitry Andric } 9182480093f4SDimitry Andric } 9183480093f4SDimitry Andric 9184480093f4SDimitry Andric return nullptr; 9185480093f4SDimitry Andric } 9186480093f4SDimitry Andric 9187480093f4SDimitry Andric unsigned SIInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, 9188480093f4SDimitry Andric const MachineInstr &MI, 9189480093f4SDimitry Andric unsigned *PredCost) const { 9190480093f4SDimitry Andric if (MI.isBundle()) { 9191480093f4SDimitry Andric MachineBasicBlock::const_instr_iterator I(MI.getIterator()); 9192480093f4SDimitry Andric MachineBasicBlock::const_instr_iterator E(MI.getParent()->instr_end()); 9193480093f4SDimitry Andric unsigned Lat = 0, Count = 0; 9194480093f4SDimitry Andric for (++I; I != E && I->isBundledWithPred(); ++I) { 9195480093f4SDimitry Andric ++Count; 9196480093f4SDimitry Andric Lat = std::max(Lat, SchedModel.computeInstrLatency(&*I)); 9197480093f4SDimitry Andric } 9198480093f4SDimitry Andric return Lat + Count - 1; 9199480093f4SDimitry Andric } 9200480093f4SDimitry Andric 9201480093f4SDimitry Andric return SchedModel.computeInstrLatency(&MI); 9202480093f4SDimitry Andric } 9203e8d8bef9SDimitry Andric 9204bdd1243dSDimitry Andric InstructionUniformity 9205bdd1243dSDimitry Andric SIInstrInfo::getGenericInstructionUniformity(const MachineInstr &MI) const { 9206bdd1243dSDimitry Andric unsigned opcode = MI.getOpcode(); 92075f757f3fSDimitry Andric if (auto *GI = dyn_cast<GIntrinsic>(&MI)) { 92085f757f3fSDimitry Andric auto IID = GI->getIntrinsicID(); 920906c3fb27SDimitry Andric if (AMDGPU::isIntrinsicSourceOfDivergence(IID)) 921006c3fb27SDimitry Andric return InstructionUniformity::NeverUniform; 921106c3fb27SDimitry Andric if (AMDGPU::isIntrinsicAlwaysUniform(IID)) 921206c3fb27SDimitry Andric return InstructionUniformity::AlwaysUniform; 921306c3fb27SDimitry Andric 921406c3fb27SDimitry Andric switch (IID) { 921506c3fb27SDimitry Andric case Intrinsic::amdgcn_if: 921606c3fb27SDimitry Andric case Intrinsic::amdgcn_else: 921706c3fb27SDimitry Andric // FIXME: Uniform if second result 921806c3fb27SDimitry Andric break; 921906c3fb27SDimitry Andric } 922006c3fb27SDimitry Andric 922106c3fb27SDimitry Andric return InstructionUniformity::Default; 9222bdd1243dSDimitry Andric } 9223bdd1243dSDimitry Andric 9224bdd1243dSDimitry Andric // Loads from the private and flat address spaces are divergent, because 9225bdd1243dSDimitry Andric // threads can execute the load instruction with the same inputs and get 9226bdd1243dSDimitry Andric // different results. 9227bdd1243dSDimitry Andric // 9228bdd1243dSDimitry Andric // All other loads are not divergent, because if threads issue loads with the 9229bdd1243dSDimitry Andric // same arguments, they will always get the same result. 9230bdd1243dSDimitry Andric if (opcode == AMDGPU::G_LOAD) { 9231bdd1243dSDimitry Andric if (MI.memoperands_empty()) 9232bdd1243dSDimitry Andric return InstructionUniformity::NeverUniform; // conservative assumption 9233bdd1243dSDimitry Andric 9234bdd1243dSDimitry Andric if (llvm::any_of(MI.memoperands(), [](const MachineMemOperand *mmo) { 9235bdd1243dSDimitry Andric return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS || 9236bdd1243dSDimitry Andric mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS; 9237bdd1243dSDimitry Andric })) { 9238bdd1243dSDimitry Andric // At least one MMO in a non-global address space. 9239bdd1243dSDimitry Andric return InstructionUniformity::NeverUniform; 9240bdd1243dSDimitry Andric } 9241bdd1243dSDimitry Andric return InstructionUniformity::Default; 9242bdd1243dSDimitry Andric } 9243bdd1243dSDimitry Andric 9244bdd1243dSDimitry Andric if (SIInstrInfo::isGenericAtomicRMWOpcode(opcode) || 9245bdd1243dSDimitry Andric opcode == AMDGPU::G_ATOMIC_CMPXCHG || 92465f757f3fSDimitry Andric opcode == AMDGPU::G_ATOMIC_CMPXCHG_WITH_SUCCESS || 92475f757f3fSDimitry Andric AMDGPU::isGenericAtomic(opcode)) { 9248bdd1243dSDimitry Andric return InstructionUniformity::NeverUniform; 9249bdd1243dSDimitry Andric } 9250bdd1243dSDimitry Andric return InstructionUniformity::Default; 9251bdd1243dSDimitry Andric } 9252bdd1243dSDimitry Andric 9253bdd1243dSDimitry Andric InstructionUniformity 9254bdd1243dSDimitry Andric SIInstrInfo::getInstructionUniformity(const MachineInstr &MI) const { 925506c3fb27SDimitry Andric 925606c3fb27SDimitry Andric if (isNeverUniform(MI)) 925706c3fb27SDimitry Andric return InstructionUniformity::NeverUniform; 925806c3fb27SDimitry Andric 925906c3fb27SDimitry Andric unsigned opcode = MI.getOpcode(); 92605f757f3fSDimitry Andric if (opcode == AMDGPU::V_READLANE_B32 || 92615f757f3fSDimitry Andric opcode == AMDGPU::V_READFIRSTLANE_B32 || 92625f757f3fSDimitry Andric opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR) 926306c3fb27SDimitry Andric return InstructionUniformity::AlwaysUniform; 926406c3fb27SDimitry Andric 92655f757f3fSDimitry Andric if (isCopyInstr(MI)) { 926606c3fb27SDimitry Andric const MachineOperand &srcOp = MI.getOperand(1); 926706c3fb27SDimitry Andric if (srcOp.isReg() && srcOp.getReg().isPhysical()) { 926806c3fb27SDimitry Andric const TargetRegisterClass *regClass = 926906c3fb27SDimitry Andric RI.getPhysRegBaseClass(srcOp.getReg()); 927006c3fb27SDimitry Andric return RI.isSGPRClass(regClass) ? InstructionUniformity::AlwaysUniform 927106c3fb27SDimitry Andric : InstructionUniformity::NeverUniform; 927206c3fb27SDimitry Andric } 927306c3fb27SDimitry Andric return InstructionUniformity::Default; 927406c3fb27SDimitry Andric } 927506c3fb27SDimitry Andric 927606c3fb27SDimitry Andric // GMIR handling 927706c3fb27SDimitry Andric if (MI.isPreISelOpcode()) 927806c3fb27SDimitry Andric return SIInstrInfo::getGenericInstructionUniformity(MI); 927906c3fb27SDimitry Andric 9280bdd1243dSDimitry Andric // Atomics are divergent because they are executed sequentially: when an 9281bdd1243dSDimitry Andric // atomic operation refers to the same address in each thread, then each 9282bdd1243dSDimitry Andric // thread after the first sees the value written by the previous thread as 9283bdd1243dSDimitry Andric // original value. 9284bdd1243dSDimitry Andric 9285bdd1243dSDimitry Andric if (isAtomic(MI)) 9286bdd1243dSDimitry Andric return InstructionUniformity::NeverUniform; 9287bdd1243dSDimitry Andric 9288bdd1243dSDimitry Andric // Loads from the private and flat address spaces are divergent, because 9289bdd1243dSDimitry Andric // threads can execute the load instruction with the same inputs and get 9290bdd1243dSDimitry Andric // different results. 9291bdd1243dSDimitry Andric if (isFLAT(MI) && MI.mayLoad()) { 9292bdd1243dSDimitry Andric if (MI.memoperands_empty()) 9293bdd1243dSDimitry Andric return InstructionUniformity::NeverUniform; // conservative assumption 9294bdd1243dSDimitry Andric 9295bdd1243dSDimitry Andric if (llvm::any_of(MI.memoperands(), [](const MachineMemOperand *mmo) { 9296bdd1243dSDimitry Andric return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS || 9297bdd1243dSDimitry Andric mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS; 9298bdd1243dSDimitry Andric })) { 9299bdd1243dSDimitry Andric // At least one MMO in a non-global address space. 9300bdd1243dSDimitry Andric return InstructionUniformity::NeverUniform; 9301bdd1243dSDimitry Andric } 9302bdd1243dSDimitry Andric 9303bdd1243dSDimitry Andric return InstructionUniformity::Default; 9304bdd1243dSDimitry Andric } 9305bdd1243dSDimitry Andric 9306bdd1243dSDimitry Andric const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 930706c3fb27SDimitry Andric const AMDGPURegisterBankInfo *RBI = ST.getRegBankInfo(); 930806c3fb27SDimitry Andric 930906c3fb27SDimitry Andric // FIXME: It's conceptually broken to report this for an instruction, and not 931006c3fb27SDimitry Andric // a specific def operand. For inline asm in particular, there could be mixed 931106c3fb27SDimitry Andric // uniform and divergent results. 931206c3fb27SDimitry Andric for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) { 931306c3fb27SDimitry Andric const MachineOperand &SrcOp = MI.getOperand(I); 931406c3fb27SDimitry Andric if (!SrcOp.isReg()) 9315bdd1243dSDimitry Andric continue; 9316bdd1243dSDimitry Andric 931706c3fb27SDimitry Andric Register Reg = SrcOp.getReg(); 931806c3fb27SDimitry Andric if (!Reg || !SrcOp.readsReg()) 931906c3fb27SDimitry Andric continue; 9320bdd1243dSDimitry Andric 932106c3fb27SDimitry Andric // If RegBank is null, this is unassigned or an unallocatable special 932206c3fb27SDimitry Andric // register, which are all scalars. 932306c3fb27SDimitry Andric const RegisterBank *RegBank = RBI->getRegBank(Reg, MRI, RI); 932406c3fb27SDimitry Andric if (RegBank && RegBank->getID() != AMDGPU::SGPRRegBankID) 9325bdd1243dSDimitry Andric return InstructionUniformity::NeverUniform; 9326bdd1243dSDimitry Andric } 9327bdd1243dSDimitry Andric 9328bdd1243dSDimitry Andric // TODO: Uniformity check condtions above can be rearranged for more 9329bdd1243dSDimitry Andric // redability 9330bdd1243dSDimitry Andric 9331bdd1243dSDimitry Andric // TODO: amdgcn.{ballot, [if]cmp} should be AlwaysUniform, but they are 9332bdd1243dSDimitry Andric // currently turned into no-op COPYs by SelectionDAG ISel and are 9333bdd1243dSDimitry Andric // therefore no longer recognizable. 9334bdd1243dSDimitry Andric 9335bdd1243dSDimitry Andric return InstructionUniformity::Default; 9336bdd1243dSDimitry Andric } 9337bdd1243dSDimitry Andric 9338e8d8bef9SDimitry Andric unsigned SIInstrInfo::getDSShaderTypeValue(const MachineFunction &MF) { 9339e8d8bef9SDimitry Andric switch (MF.getFunction().getCallingConv()) { 9340e8d8bef9SDimitry Andric case CallingConv::AMDGPU_PS: 9341e8d8bef9SDimitry Andric return 1; 9342e8d8bef9SDimitry Andric case CallingConv::AMDGPU_VS: 9343e8d8bef9SDimitry Andric return 2; 9344e8d8bef9SDimitry Andric case CallingConv::AMDGPU_GS: 9345e8d8bef9SDimitry Andric return 3; 9346e8d8bef9SDimitry Andric case CallingConv::AMDGPU_HS: 9347e8d8bef9SDimitry Andric case CallingConv::AMDGPU_LS: 9348e8d8bef9SDimitry Andric case CallingConv::AMDGPU_ES: 9349e8d8bef9SDimitry Andric report_fatal_error("ds_ordered_count unsupported for this calling conv"); 9350e8d8bef9SDimitry Andric case CallingConv::AMDGPU_CS: 9351e8d8bef9SDimitry Andric case CallingConv::AMDGPU_KERNEL: 9352e8d8bef9SDimitry Andric case CallingConv::C: 9353e8d8bef9SDimitry Andric case CallingConv::Fast: 9354e8d8bef9SDimitry Andric default: 9355e8d8bef9SDimitry Andric // Assume other calling conventions are various compute callable functions 9356e8d8bef9SDimitry Andric return 0; 9357e8d8bef9SDimitry Andric } 9358e8d8bef9SDimitry Andric } 9359349cc55cSDimitry Andric 9360349cc55cSDimitry Andric bool SIInstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg, 9361349cc55cSDimitry Andric Register &SrcReg2, int64_t &CmpMask, 9362349cc55cSDimitry Andric int64_t &CmpValue) const { 9363349cc55cSDimitry Andric if (!MI.getOperand(0).isReg() || MI.getOperand(0).getSubReg()) 9364349cc55cSDimitry Andric return false; 9365349cc55cSDimitry Andric 9366349cc55cSDimitry Andric switch (MI.getOpcode()) { 9367349cc55cSDimitry Andric default: 9368349cc55cSDimitry Andric break; 9369349cc55cSDimitry Andric case AMDGPU::S_CMP_EQ_U32: 9370349cc55cSDimitry Andric case AMDGPU::S_CMP_EQ_I32: 9371349cc55cSDimitry Andric case AMDGPU::S_CMP_LG_U32: 9372349cc55cSDimitry Andric case AMDGPU::S_CMP_LG_I32: 9373349cc55cSDimitry Andric case AMDGPU::S_CMP_LT_U32: 9374349cc55cSDimitry Andric case AMDGPU::S_CMP_LT_I32: 9375349cc55cSDimitry Andric case AMDGPU::S_CMP_GT_U32: 9376349cc55cSDimitry Andric case AMDGPU::S_CMP_GT_I32: 9377349cc55cSDimitry Andric case AMDGPU::S_CMP_LE_U32: 9378349cc55cSDimitry Andric case AMDGPU::S_CMP_LE_I32: 9379349cc55cSDimitry Andric case AMDGPU::S_CMP_GE_U32: 9380349cc55cSDimitry Andric case AMDGPU::S_CMP_GE_I32: 9381349cc55cSDimitry Andric case AMDGPU::S_CMP_EQ_U64: 9382349cc55cSDimitry Andric case AMDGPU::S_CMP_LG_U64: 9383349cc55cSDimitry Andric SrcReg = MI.getOperand(0).getReg(); 9384349cc55cSDimitry Andric if (MI.getOperand(1).isReg()) { 9385349cc55cSDimitry Andric if (MI.getOperand(1).getSubReg()) 9386349cc55cSDimitry Andric return false; 9387349cc55cSDimitry Andric SrcReg2 = MI.getOperand(1).getReg(); 9388349cc55cSDimitry Andric CmpValue = 0; 9389349cc55cSDimitry Andric } else if (MI.getOperand(1).isImm()) { 9390349cc55cSDimitry Andric SrcReg2 = Register(); 9391349cc55cSDimitry Andric CmpValue = MI.getOperand(1).getImm(); 9392349cc55cSDimitry Andric } else { 9393349cc55cSDimitry Andric return false; 9394349cc55cSDimitry Andric } 9395349cc55cSDimitry Andric CmpMask = ~0; 9396349cc55cSDimitry Andric return true; 9397349cc55cSDimitry Andric case AMDGPU::S_CMPK_EQ_U32: 9398349cc55cSDimitry Andric case AMDGPU::S_CMPK_EQ_I32: 9399349cc55cSDimitry Andric case AMDGPU::S_CMPK_LG_U32: 9400349cc55cSDimitry Andric case AMDGPU::S_CMPK_LG_I32: 9401349cc55cSDimitry Andric case AMDGPU::S_CMPK_LT_U32: 9402349cc55cSDimitry Andric case AMDGPU::S_CMPK_LT_I32: 9403349cc55cSDimitry Andric case AMDGPU::S_CMPK_GT_U32: 9404349cc55cSDimitry Andric case AMDGPU::S_CMPK_GT_I32: 9405349cc55cSDimitry Andric case AMDGPU::S_CMPK_LE_U32: 9406349cc55cSDimitry Andric case AMDGPU::S_CMPK_LE_I32: 9407349cc55cSDimitry Andric case AMDGPU::S_CMPK_GE_U32: 9408349cc55cSDimitry Andric case AMDGPU::S_CMPK_GE_I32: 9409349cc55cSDimitry Andric SrcReg = MI.getOperand(0).getReg(); 9410349cc55cSDimitry Andric SrcReg2 = Register(); 9411349cc55cSDimitry Andric CmpValue = MI.getOperand(1).getImm(); 9412349cc55cSDimitry Andric CmpMask = ~0; 9413349cc55cSDimitry Andric return true; 9414349cc55cSDimitry Andric } 9415349cc55cSDimitry Andric 9416349cc55cSDimitry Andric return false; 9417349cc55cSDimitry Andric } 9418349cc55cSDimitry Andric 9419349cc55cSDimitry Andric bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, 9420349cc55cSDimitry Andric Register SrcReg2, int64_t CmpMask, 9421349cc55cSDimitry Andric int64_t CmpValue, 9422349cc55cSDimitry Andric const MachineRegisterInfo *MRI) const { 9423349cc55cSDimitry Andric if (!SrcReg || SrcReg.isPhysical()) 9424349cc55cSDimitry Andric return false; 9425349cc55cSDimitry Andric 9426349cc55cSDimitry Andric if (SrcReg2 && !getFoldableImm(SrcReg2, *MRI, CmpValue)) 9427349cc55cSDimitry Andric return false; 9428349cc55cSDimitry Andric 9429349cc55cSDimitry Andric const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue, MRI, 9430349cc55cSDimitry Andric this](int64_t ExpectedValue, unsigned SrcSize, 943181ad6265SDimitry Andric bool IsReversible, bool IsSigned) -> bool { 9432349cc55cSDimitry Andric // s_cmp_eq_u32 (s_and_b32 $src, 1 << n), 1 << n => s_and_b32 $src, 1 << n 9433349cc55cSDimitry Andric // s_cmp_eq_i32 (s_and_b32 $src, 1 << n), 1 << n => s_and_b32 $src, 1 << n 9434349cc55cSDimitry Andric // s_cmp_ge_u32 (s_and_b32 $src, 1 << n), 1 << n => s_and_b32 $src, 1 << n 9435349cc55cSDimitry Andric // s_cmp_ge_i32 (s_and_b32 $src, 1 << n), 1 << n => s_and_b32 $src, 1 << n 9436349cc55cSDimitry Andric // s_cmp_eq_u64 (s_and_b64 $src, 1 << n), 1 << n => s_and_b64 $src, 1 << n 9437349cc55cSDimitry Andric // s_cmp_lg_u32 (s_and_b32 $src, 1 << n), 0 => s_and_b32 $src, 1 << n 9438349cc55cSDimitry Andric // s_cmp_lg_i32 (s_and_b32 $src, 1 << n), 0 => s_and_b32 $src, 1 << n 9439349cc55cSDimitry Andric // s_cmp_gt_u32 (s_and_b32 $src, 1 << n), 0 => s_and_b32 $src, 1 << n 9440349cc55cSDimitry Andric // s_cmp_gt_i32 (s_and_b32 $src, 1 << n), 0 => s_and_b32 $src, 1 << n 9441349cc55cSDimitry Andric // s_cmp_lg_u64 (s_and_b64 $src, 1 << n), 0 => s_and_b64 $src, 1 << n 9442349cc55cSDimitry Andric // 9443349cc55cSDimitry Andric // Signed ge/gt are not used for the sign bit. 9444349cc55cSDimitry Andric // 9445349cc55cSDimitry Andric // If result of the AND is unused except in the compare: 9446349cc55cSDimitry Andric // s_and_b(32|64) $src, 1 << n => s_bitcmp1_b(32|64) $src, n 9447349cc55cSDimitry Andric // 9448349cc55cSDimitry Andric // s_cmp_eq_u32 (s_and_b32 $src, 1 << n), 0 => s_bitcmp0_b32 $src, n 9449349cc55cSDimitry Andric // s_cmp_eq_i32 (s_and_b32 $src, 1 << n), 0 => s_bitcmp0_b32 $src, n 9450349cc55cSDimitry Andric // s_cmp_eq_u64 (s_and_b64 $src, 1 << n), 0 => s_bitcmp0_b64 $src, n 9451349cc55cSDimitry Andric // s_cmp_lg_u32 (s_and_b32 $src, 1 << n), 1 << n => s_bitcmp0_b32 $src, n 9452349cc55cSDimitry Andric // s_cmp_lg_i32 (s_and_b32 $src, 1 << n), 1 << n => s_bitcmp0_b32 $src, n 9453349cc55cSDimitry Andric // s_cmp_lg_u64 (s_and_b64 $src, 1 << n), 1 << n => s_bitcmp0_b64 $src, n 9454349cc55cSDimitry Andric 9455349cc55cSDimitry Andric MachineInstr *Def = MRI->getUniqueVRegDef(SrcReg); 9456349cc55cSDimitry Andric if (!Def || Def->getParent() != CmpInstr.getParent()) 9457349cc55cSDimitry Andric return false; 9458349cc55cSDimitry Andric 9459349cc55cSDimitry Andric if (Def->getOpcode() != AMDGPU::S_AND_B32 && 9460349cc55cSDimitry Andric Def->getOpcode() != AMDGPU::S_AND_B64) 9461349cc55cSDimitry Andric return false; 9462349cc55cSDimitry Andric 9463349cc55cSDimitry Andric int64_t Mask; 9464349cc55cSDimitry Andric const auto isMask = [&Mask, SrcSize](const MachineOperand *MO) -> bool { 9465349cc55cSDimitry Andric if (MO->isImm()) 9466349cc55cSDimitry Andric Mask = MO->getImm(); 9467349cc55cSDimitry Andric else if (!getFoldableImm(MO, Mask)) 9468349cc55cSDimitry Andric return false; 9469349cc55cSDimitry Andric Mask &= maxUIntN(SrcSize); 9470349cc55cSDimitry Andric return isPowerOf2_64(Mask); 9471349cc55cSDimitry Andric }; 9472349cc55cSDimitry Andric 9473349cc55cSDimitry Andric MachineOperand *SrcOp = &Def->getOperand(1); 9474349cc55cSDimitry Andric if (isMask(SrcOp)) 9475349cc55cSDimitry Andric SrcOp = &Def->getOperand(2); 9476349cc55cSDimitry Andric else if (isMask(&Def->getOperand(2))) 9477349cc55cSDimitry Andric SrcOp = &Def->getOperand(1); 9478349cc55cSDimitry Andric else 9479349cc55cSDimitry Andric return false; 9480349cc55cSDimitry Andric 948106c3fb27SDimitry Andric unsigned BitNo = llvm::countr_zero((uint64_t)Mask); 9482349cc55cSDimitry Andric if (IsSigned && BitNo == SrcSize - 1) 9483349cc55cSDimitry Andric return false; 9484349cc55cSDimitry Andric 9485349cc55cSDimitry Andric ExpectedValue <<= BitNo; 9486349cc55cSDimitry Andric 9487349cc55cSDimitry Andric bool IsReversedCC = false; 9488349cc55cSDimitry Andric if (CmpValue != ExpectedValue) { 948981ad6265SDimitry Andric if (!IsReversible) 9490349cc55cSDimitry Andric return false; 9491349cc55cSDimitry Andric IsReversedCC = CmpValue == (ExpectedValue ^ Mask); 9492349cc55cSDimitry Andric if (!IsReversedCC) 9493349cc55cSDimitry Andric return false; 9494349cc55cSDimitry Andric } 9495349cc55cSDimitry Andric 9496349cc55cSDimitry Andric Register DefReg = Def->getOperand(0).getReg(); 9497349cc55cSDimitry Andric if (IsReversedCC && !MRI->hasOneNonDBGUse(DefReg)) 9498349cc55cSDimitry Andric return false; 9499349cc55cSDimitry Andric 9500349cc55cSDimitry Andric for (auto I = std::next(Def->getIterator()), E = CmpInstr.getIterator(); 9501349cc55cSDimitry Andric I != E; ++I) { 9502349cc55cSDimitry Andric if (I->modifiesRegister(AMDGPU::SCC, &RI) || 9503349cc55cSDimitry Andric I->killsRegister(AMDGPU::SCC, &RI)) 9504349cc55cSDimitry Andric return false; 9505349cc55cSDimitry Andric } 9506349cc55cSDimitry Andric 9507349cc55cSDimitry Andric MachineOperand *SccDef = Def->findRegisterDefOperand(AMDGPU::SCC); 9508349cc55cSDimitry Andric SccDef->setIsDead(false); 9509349cc55cSDimitry Andric CmpInstr.eraseFromParent(); 9510349cc55cSDimitry Andric 9511349cc55cSDimitry Andric if (!MRI->use_nodbg_empty(DefReg)) { 9512349cc55cSDimitry Andric assert(!IsReversedCC); 9513349cc55cSDimitry Andric return true; 9514349cc55cSDimitry Andric } 9515349cc55cSDimitry Andric 9516349cc55cSDimitry Andric // Replace AND with unused result with a S_BITCMP. 9517349cc55cSDimitry Andric MachineBasicBlock *MBB = Def->getParent(); 9518349cc55cSDimitry Andric 9519349cc55cSDimitry Andric unsigned NewOpc = (SrcSize == 32) ? IsReversedCC ? AMDGPU::S_BITCMP0_B32 9520349cc55cSDimitry Andric : AMDGPU::S_BITCMP1_B32 9521349cc55cSDimitry Andric : IsReversedCC ? AMDGPU::S_BITCMP0_B64 9522349cc55cSDimitry Andric : AMDGPU::S_BITCMP1_B64; 9523349cc55cSDimitry Andric 9524349cc55cSDimitry Andric BuildMI(*MBB, Def, Def->getDebugLoc(), get(NewOpc)) 9525349cc55cSDimitry Andric .add(*SrcOp) 9526349cc55cSDimitry Andric .addImm(BitNo); 9527349cc55cSDimitry Andric Def->eraseFromParent(); 9528349cc55cSDimitry Andric 9529349cc55cSDimitry Andric return true; 9530349cc55cSDimitry Andric }; 9531349cc55cSDimitry Andric 9532349cc55cSDimitry Andric switch (CmpInstr.getOpcode()) { 9533349cc55cSDimitry Andric default: 9534349cc55cSDimitry Andric break; 9535349cc55cSDimitry Andric case AMDGPU::S_CMP_EQ_U32: 9536349cc55cSDimitry Andric case AMDGPU::S_CMP_EQ_I32: 9537349cc55cSDimitry Andric case AMDGPU::S_CMPK_EQ_U32: 9538349cc55cSDimitry Andric case AMDGPU::S_CMPK_EQ_I32: 9539349cc55cSDimitry Andric return optimizeCmpAnd(1, 32, true, false); 9540349cc55cSDimitry Andric case AMDGPU::S_CMP_GE_U32: 9541349cc55cSDimitry Andric case AMDGPU::S_CMPK_GE_U32: 9542349cc55cSDimitry Andric return optimizeCmpAnd(1, 32, false, false); 9543349cc55cSDimitry Andric case AMDGPU::S_CMP_GE_I32: 9544349cc55cSDimitry Andric case AMDGPU::S_CMPK_GE_I32: 9545349cc55cSDimitry Andric return optimizeCmpAnd(1, 32, false, true); 9546349cc55cSDimitry Andric case AMDGPU::S_CMP_EQ_U64: 9547349cc55cSDimitry Andric return optimizeCmpAnd(1, 64, true, false); 9548349cc55cSDimitry Andric case AMDGPU::S_CMP_LG_U32: 9549349cc55cSDimitry Andric case AMDGPU::S_CMP_LG_I32: 9550349cc55cSDimitry Andric case AMDGPU::S_CMPK_LG_U32: 9551349cc55cSDimitry Andric case AMDGPU::S_CMPK_LG_I32: 9552349cc55cSDimitry Andric return optimizeCmpAnd(0, 32, true, false); 9553349cc55cSDimitry Andric case AMDGPU::S_CMP_GT_U32: 9554349cc55cSDimitry Andric case AMDGPU::S_CMPK_GT_U32: 9555349cc55cSDimitry Andric return optimizeCmpAnd(0, 32, false, false); 9556349cc55cSDimitry Andric case AMDGPU::S_CMP_GT_I32: 9557349cc55cSDimitry Andric case AMDGPU::S_CMPK_GT_I32: 9558349cc55cSDimitry Andric return optimizeCmpAnd(0, 32, false, true); 9559349cc55cSDimitry Andric case AMDGPU::S_CMP_LG_U64: 9560349cc55cSDimitry Andric return optimizeCmpAnd(0, 64, true, false); 9561349cc55cSDimitry Andric } 9562349cc55cSDimitry Andric 9563349cc55cSDimitry Andric return false; 9564349cc55cSDimitry Andric } 956581ad6265SDimitry Andric 956681ad6265SDimitry Andric void SIInstrInfo::enforceOperandRCAlignment(MachineInstr &MI, 956781ad6265SDimitry Andric unsigned OpName) const { 956881ad6265SDimitry Andric if (!ST.needsAlignedVGPRs()) 956981ad6265SDimitry Andric return; 957081ad6265SDimitry Andric 957181ad6265SDimitry Andric int OpNo = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OpName); 957281ad6265SDimitry Andric if (OpNo < 0) 957381ad6265SDimitry Andric return; 957481ad6265SDimitry Andric MachineOperand &Op = MI.getOperand(OpNo); 957581ad6265SDimitry Andric if (getOpSize(MI, OpNo) > 4) 957681ad6265SDimitry Andric return; 957781ad6265SDimitry Andric 957881ad6265SDimitry Andric // Add implicit aligned super-reg to force alignment on the data operand. 957981ad6265SDimitry Andric const DebugLoc &DL = MI.getDebugLoc(); 958081ad6265SDimitry Andric MachineBasicBlock *BB = MI.getParent(); 958181ad6265SDimitry Andric MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); 958281ad6265SDimitry Andric Register DataReg = Op.getReg(); 958381ad6265SDimitry Andric bool IsAGPR = RI.isAGPR(MRI, DataReg); 958481ad6265SDimitry Andric Register Undef = MRI.createVirtualRegister( 958581ad6265SDimitry Andric IsAGPR ? &AMDGPU::AGPR_32RegClass : &AMDGPU::VGPR_32RegClass); 958681ad6265SDimitry Andric BuildMI(*BB, MI, DL, get(AMDGPU::IMPLICIT_DEF), Undef); 958781ad6265SDimitry Andric Register NewVR = 958881ad6265SDimitry Andric MRI.createVirtualRegister(IsAGPR ? &AMDGPU::AReg_64_Align2RegClass 958981ad6265SDimitry Andric : &AMDGPU::VReg_64_Align2RegClass); 959081ad6265SDimitry Andric BuildMI(*BB, MI, DL, get(AMDGPU::REG_SEQUENCE), NewVR) 959181ad6265SDimitry Andric .addReg(DataReg, 0, Op.getSubReg()) 959281ad6265SDimitry Andric .addImm(AMDGPU::sub0) 959381ad6265SDimitry Andric .addReg(Undef) 959481ad6265SDimitry Andric .addImm(AMDGPU::sub1); 959581ad6265SDimitry Andric Op.setReg(NewVR); 959681ad6265SDimitry Andric Op.setSubReg(AMDGPU::sub0); 959781ad6265SDimitry Andric MI.addOperand(MachineOperand::CreateReg(NewVR, false, true)); 959881ad6265SDimitry Andric } 9599